From 60fdadfc40e5986beac2108f81e3af7f486cfd0e Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Tue, 11 Apr 2017 16:12:31 -0400 Subject: [PATCH 02/10] FIXME: WIP on combining mallocs --- gcc/heap-optimizations.c | 381 ++++++++++++++++++++++++++++++- gcc/testsuite/gcc.dg/heap-optimization.c | 36 +++ 2 files changed, 410 insertions(+), 7 deletions(-) diff --git a/gcc/heap-optimizations.c b/gcc/heap-optimizations.c index a33bb36..a037225 100644 --- a/gcc/heap-optimizations.c +++ b/gcc/heap-optimizations.c @@ -17,14 +17,125 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see . */ +// FIXME: purge this list: #include "config.h" #include "system.h" #include "coretypes.h" -#include "diagnostic-core.h" -#include "function.h" +#include "backend.h" +#include "tree.h" +#include "gimple.h" #include "tree-pass.h" +#include "ssa.h" +#include "gimple-fold.h" +#include "gimple-pretty-print.h" +#include "diagnostic-core.h" +#include "fold-const.h" +#include "gimple-iterator.h" +#include "tree-ssa.h" +#include "tree-object-size.h" +#include "params.h" +#include "tree-cfg.h" +#include "print-tree.h" + +//namespace { + +/* A matching pair of allocation/release calls. */ + +struct alloc_pair +{ + alloc_pair (gcall *acquire, gcall *release) + : m_acquire (acquire), m_release (release) + {} + + gcall *m_acquire; + gcall *m_release; +}; + +/* Combining mallocs. + + Given: + + ptr0 = malloc (sz0); + ptr1 = malloc (sz1); + ... + ptrN = malloc (szN); + + where all are within the same BB, and all are freed + within (a potentially different) same BB, convert it to: + + tot1 = sz0 + sz1; + ... + totN = szN_1 + szN; + ptr_combined = malloc (totN); + ptr0 = ptr_combined; + ptr1 = ptr_combined + sz0; (as a byte offset) + ... + ptrN = ptr_combined + (sz0 + sz1 + ... + szN_1); + + and eliminate all of the free calls, apart from a: + + free (ptr_combined); + + Combine it based on frees within BBs: for each set of frees in a BB, + figure out the mallocs per BB. + + What about the aliasing implications, though? + */ + +/* A combinable group of acquire/release calls. + All calls within the group are of the same kind, + all acquire calls are within one BB, and all release + calls are within one BB. */ + +class allocation_group +{ + public: + allocation_group (basic_block acquire_bb, basic_block release_bb) + : m_acquire_bb (acquire_bb), m_release_bb (release_bb) + {} + + basic_block get_acquire_bb () const { return m_acquire_bb; } + basic_block get_release_bb () const { return m_release_bb; } + + void add_alloc_pair (gcall *alloc, gcall *release) + { + m_alloc_pairs.safe_push (alloc_pair (alloc, release)); + } + + void combine_calls (); + + private: + void sort_by_alloc_order (); + +#if 0 + gcall *get_first_alloc () const; +#endif -namespace { + private: + basic_block m_acquire_bb; + basic_block m_release_bb; + auto_vec m_alloc_pairs; +}; + +/* FIXME. */ + +class function_state +{ + public: + void add_alloc_pair (gcall *alloc, gcall *release); + + void combine_calls (); + + private: + allocation_group &get_allocation_group (basic_block bb_alloc, + basic_block bb_release); + + private: + // FIXME: probably need a more efficient data structure here. + auto_vec m_allocation_groups; +}; + +/* FIXME. */ const pass_data pass_data_heap_optimizations = { @@ -41,25 +152,281 @@ const pass_data pass_data_heap_optimizations = class pass_heap_optimizations : public gimple_opt_pass { -public: + public: pass_heap_optimizations (gcc::context *ctxt) : gimple_opt_pass (pass_data_heap_optimizations, ctxt) {} /* opt_pass methods: */ - virtual unsigned int execute (function *); + unsigned int execute (function *) OVERRIDE FINAL; + private: + void handle_malloc (gcall *call); + void handle_free (function_state &state, gcall *call); }; // class pass_heap_optimizations +void +allocation_group::combine_calls () +{ + warning_at (0, 0, "alloc BB: %i free BB: %i", + m_acquire_bb->index, m_release_bb->index); + + alloc_pair *pair; + unsigned i; + FOR_EACH_VEC_ELT (m_alloc_pairs, i, pair) + { + warning_at (pair->m_acquire->location, 0, "combining alloc here"); + warning_at (pair->m_release->location, 0, "combining release here"); + } + +#if 0 + gcall *first_alloc = get_first_alloc (); + /* FIXME: how to insert a stmt (need to sum the sizes) */ +#endif + + /* m_alloc_pairs is in order of the frees, but we need to process + the allocs in the order they occur in. */ + sort_by_alloc_order (); + + FOR_EACH_VEC_ELT (m_alloc_pairs, i, pair) + { + warning_at (pair->m_acquire->location, 0, "combining alloc here"); + warning_at (pair->m_release->location, 0, "combining release here"); + } + + gimple_seq seq = NULL; + + /* TODO: + test cases: + - involving the need for a cast on the sizeof + - different orders of malloc vs free + - use of ptrs before the allocs are used. */ + + /* Sum the sizes of the allocations. */ + // FIXME: this assumes that all sizes are available before any ptrs are used. + tree sum = size_zero_node; + FOR_EACH_VEC_ELT (m_alloc_pairs, i, pair) + { + tree alloc_size = gimple_call_arg (pair->m_acquire, 0); + tree cast = gimple_build (&seq, pair->m_acquire->location, + NOP_EXPR, size_type_node, alloc_size); + tree next_sum = gimple_build (&seq, pair->m_acquire->location, + PLUS_EXPR, size_type_node, sum, cast); + sum = next_sum; + } + +#if 1 + for (int j = 0; j < 5; j++) + { + gimple *stmt = gimple_build_assign (make_ssa_name (size_type_node), + PLUS_EXPR, size_zero_node, // FIXME + size_zero_node); // FIXME + gimple_seq_add_stmt (&seq, stmt); + //; + //gsi_replace (iter, g, false); + + //stmt = gimple_build_assign (dest, src); + //gimple_set_location (stmt, loc); + //gimple_set_vuse (stmt, new_vuse); + } +#endif + + // FIXME: all we're doing so far is summing the sizes + + gimple *last_alloc = m_alloc_pairs[m_alloc_pairs.length () - 1].m_acquire; + + gimple_stmt_iterator last_gsi = gsi_for_stmt (last_alloc); + gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT); +} + +/* FIXME. */ + +static int +sort_by_alloc_uid (const void *x, const void *y) +{ + const alloc_pair *tmp = (const alloc_pair *) x; + const alloc_pair *tmp2 = (const alloc_pair *) y; + + return ((int)tmp->m_acquire->uid - (int)tmp2->m_acquire->uid); +} + +/* FIXME. */ + +void +allocation_group::sort_by_alloc_order () +{ + /* Number the stmts within the BB. */ + unsigned int idx = 0; + for (gimple_stmt_iterator gsi = gsi_start_bb (m_acquire_bb); + !gsi_end_p (gsi); gsi_next (&gsi)) + { + gsi_stmt (gsi)->uid = idx; + idx++; + } + + /* Sort the alloc_pairs based on the ordering of the alloc stmts. */ + m_alloc_pairs.qsort (sort_by_alloc_uid); +} + + +void +function_state::add_alloc_pair (gcall *alloc, gcall *release) +{ + gcc_assert (alloc); + gcc_assert (release); + +#if 0 + warning_at (alloc->location, 0, "alloc pair: alloc here..."); + warning_at (release->location, 0, "alloc pair: ...release here"); +#endif + + allocation_group &group = get_allocation_group (alloc->bb, release->bb); + group.add_alloc_pair (alloc, release); +} + +void +function_state::combine_calls () +{ + allocation_group *group; + unsigned i; + FOR_EACH_VEC_ELT (m_allocation_groups, i, group) + group->combine_calls (); +} + +allocation_group & +function_state::get_allocation_group (basic_block bb_alloc, + basic_block bb_release) +{ + /* FIXME: for now, we inefficiently do a linear search (giving O(n^2) + on the # of malloc calls in one function). */ + + allocation_group *group; + unsigned i; + FOR_EACH_VEC_ELT (m_allocation_groups, i, group) + if (group->get_acquire_bb () == bb_alloc + && group->get_release_bb () == bb_release) + return *group; + + /* Not found? Add one. */ + m_allocation_groups.safe_push (allocation_group (bb_alloc, bb_release)); + return m_allocation_groups[m_allocation_groups.length () - 1]; +} + + unsigned int pass_heap_optimizations::execute (function *fun) { warning_at (fun->function_start_locus, 0, - "pass_heap_optimizations::execute"); + "pass_heap_optimizations::execute: %qD", fun->decl); + + // FIXME: maybe examine callgraph for calls to malloc and free? + // does this give us the BB? + + //auto_vec per_bb_allocs (n_basic_blocks_for_fn); + + function_state state; + + /* Per-BB, look for free calls; locate matching malloc calls, and + group them by *their* BB. */ + + basic_block bb; + FOR_EACH_BB_FN (bb, fun) + { + for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); ) + { + /* Iterate over statements, looking for function calls. */ + gimple *stmt = gsi_stmt (si); + +#if 0 + if (gimple_call_builtin_p (stmt, BUILT_IN_MALLOC)) + handle_malloc (as_a (stmt)); +#endif + + if (gimple_call_builtin_p (stmt, BUILT_IN_FREE)) + handle_free (state, as_a (stmt)); + + gsi_next (&si); + } + } + + state.combine_calls (); + + { + push_cfun (fun); + dump_function_to_file (fun->decl, stderr, 0); + pop_cfun (); + } + return 0; // FIXME } -} // anon namespace +/* Merging mallocs/frees. */ +/* If we have a set of malloc in one BB and a set of frees in one BB... */ + +void +pass_heap_optimizations::handle_malloc (gcall *call) +{ + warning_at (call->location, 0, "found malloc"); + + tree size = gimple_call_arg (call, 0); + debug_tree (size); + //warning_at (call->location, 0, "arg0 size: %qE", size); +} + +void +pass_heap_optimizations::handle_free (function_state &state, gcall *call) +{ + //warning_at (call->location, 0, "found free"); + + tree ptr = gimple_call_arg (call, 0); + //warning_at (call->location, 0, "arg0 ptr: %qE", ptr); + + /* Try to match the ptr up with a malloc call. */ + if (TREE_CODE (ptr) != SSA_NAME) + return; + + gimple *def_stmt = SSA_NAME_DEF_STMT (ptr); + if (!def_stmt) + return; + + if (gimple_call_builtin_p (def_stmt, BUILT_IN_MALLOC)) + { + //warning_at (def_stmt->location, 0, "found matching malloc"); + state.add_alloc_pair (as_a (def_stmt), call); + } +} + + +/* FIXME. */ + +#if 0 + /* FIXME: + + Look for calls to: + - "malloc"/"free" + - "operator new"/"operator delete" + - "operator new[]"/"operator delete[]" + + Attempt to convert malloc to something more specialized. + + Is the allocaiton in a loop? + Can the allocation escape? + + if size if known to be small enough, and not in a loop, convert to: + __builtin_alloca (size) + If not known to be small enough, convert to: + if (size < limit) + ptr = __builtin_alloca (size) + else + ptr = + Otherwise... + + We probably want to work with alloc/free pairs, and locate them. + */ +#endif + + +//} // anon namespace gimple_opt_pass * make_pass_heap_optimizations (gcc::context *ctxt) diff --git a/gcc/testsuite/gcc.dg/heap-optimization.c b/gcc/testsuite/gcc.dg/heap-optimization.c index 7be1f59..b73c65d 100644 --- a/gcc/testsuite/gcc.dg/heap-optimization.c +++ b/gcc/testsuite/gcc.dg/heap-optimization.c @@ -3,6 +3,7 @@ #include +#if 1 typedef unsigned int edit_distance_t; #define MIN(X,Y) ((X) < (Y) ? (X) : (Y)) @@ -51,3 +52,38 @@ levenshtein_distance (const char *s, int len_s, free(v1); return result; } +#endif + +#if 1 +/* This already gets optimized well. + + fre1 turns: + result = *ptr_a + *ptr_b; + into: + result = x + y; + + Then dse1 eliminates the "*ptr_a = x;" and for b and y. + + Then cddce1 eliminates the malloc/free, leading to just: + result = x + y; + */ + +int unboxing (int x, int y) +{ + int *ptr_a, *ptr_b; + int result; + + ptr_a = malloc (sizeof(int)); + *ptr_a = x; + + ptr_b = malloc (sizeof(int)); + *ptr_b = y; + + result = *ptr_a + *ptr_b; + + free (ptr_a); + free (ptr_b); + + return result; +} +#endif -- 1.8.5.3