diff options
-rw-r--r-- | ChangeLog | 66 | ||||
-rw-r--r-- | elaborate.cxx | 457 | ||||
-rw-r--r-- | elaborate.h | 3 | ||||
-rw-r--r-- | parse.cxx | 303 | ||||
-rw-r--r-- | parse.h | 2 | ||||
-rw-r--r-- | staptree.cxx | 601 | ||||
-rw-r--r-- | staptree.h | 172 | ||||
-rwxr-xr-x | testsuite/buildok/printf.stp | 29 | ||||
-rw-r--r-- | translate.cxx | 791 |
9 files changed, 2011 insertions, 413 deletions
@@ -1,3 +1,69 @@ +2005-11-13 Graydon Hoare <graydon@redhat.com> + + * staptree.h (struct indexable): New struct. + (classify_indexable): New function. + (classify_const_indexable): New function. + (struct symbol): Implement indexable. + (struct arrayindex): Take indexable as base. + (struct foreach_loop): Take indexable as base. + (struct print_format): New struct. + (enum stat_component_type): New enum. + (struct stat_op): New struct. + (enum historgram_type): New enum. + (struct hist_op): New struct. + (struct visitor) + (struct traversing_visitor) + (struct throwing_visitor) + (struct deep_copy_visitor): Add new visitor methods. + (require): Specialize for indexable*. + + * staptree.cxx (print_format::*) + (stat_op::*) + (hist_op::*) + (indexable::*) + (traversing_visitor::*) + (throwing_visitor::*) + (deep_copy_visitor::*) + (classify_indexable) + (classify_const_indexable): Implement + (deep_copy_visitor::*): Update to use indexables. + + * parse.h (parser::parse_indexable): New method. + (parser::parse_hist_op_or_bare_name): New method. + + * parse.cxx (lexer::scan): Accept @ in identifiers. + (parser::parse_array_in) + (parser::parse_foreach_loop): Call parse_indexable. + (parser::parse_hist_op_or_bare_name): Implement. + (parser::parse_indexable): Implement. + (parser::parse_symbol): Accept printf, stat_ops, hist_ops. + + * elaborate.h (struct typeresolution_info): Add methods for + visiting print_format, stat_op, hist_op. + + * elaborate.cxx (symbol_fetcher): New class. + (get_symbol_within_expression): New function. + (get_symbol_within_indexable): New function. + (mutated_var_collector): Replace mutated_map_collector. + (no_var_mutation_during_iteration_check): Replace + no_map_mutation_during_iteration_check. + (semantic_pass_vars): Replace semantic_pass_maps. + (semantic_pass): Update call accordingly. + (symresolution_info::*) Add new visitors, teach about indexables + (typeresolution_info::*) Likewise. + + * translate.cxx + (c_unparser::getiter): Take symbol, not foreach_loop. + (c_unparser::*) Add new visitors, teach about indexables. + (c_tmpcounter::*) + (delete_statement_operand_visitor::visit_arrayindex) + (c_tmpcounter_assignment::*) + (c_unparser_assignment::*): Likewise. + (hist_op_downcaster): New struct. + (expression_is_hist_op): New function. + + * testsuite/buildok/printf.stp: New test for print_format. + 2005-11-10 Frank Ch. Eigler <fche@elastic.org> * translate.cxx (c_unparser::visit_array_in, visit_arrayindex): diff --git a/elaborate.cxx b/elaborate.cxx index 890e9dc0..280fd92f 100644 --- a/elaborate.cxx +++ b/elaborate.cxx @@ -452,51 +452,121 @@ derive_probes (systemtap_session& s, // ------------------------------------------------------------------------ // -// Map usage checks +// Indexable usage checks // -struct mutated_map_collector - : public traversing_visitor +struct symbol_fetcher + : virtual public throwing_visitor { - set<vardecl *> * mutated_maps; + symbol *&sym; - mutated_map_collector(set<vardecl *> * mm) - : mutated_maps (mm) + symbol_fetcher (symbol *&sym) + : sym(sym) {} + void visit_symbol (symbol* e) + { + sym = e; + } + + void visit_arrayindex (arrayindex* e) + { + e->base->visit_indexable (this); + } + + void throwone (const token* t) + { + throw semantic_error ("Expecting symbol or array index expression", t); + } +}; + +static symbol * +get_symbol_within_expression (expression *e) +{ + symbol *sym = NULL; + symbol_fetcher fetcher(sym); + e->visit (&fetcher); + if (!sym) + throw semantic_error("Unable to find symbol in expression", e->tok); + return sym; +} + +static symbol * +get_symbol_within_indexable (indexable *ix) +{ + symbol *array = NULL; + hist_op *hist = NULL; + classify_indexable(ix, array, hist); + if (array) + return array; + else + return get_symbol_within_expression (hist->stat); +} + +struct mutated_var_collector + : virtual public traversing_visitor +{ + set<vardecl *> * mutated_vars; + + mutated_var_collector (set<vardecl *> * mm) + : mutated_vars (mm) + {} + + void visit_assignment(assignment* e) + { + if (e->type == pe_stats && e->op == "<<<") + { + vardecl *vd = get_symbol_within_expression (e->left)->referent; + if (vd) + mutated_vars->insert (vd); + } + e->left->visit (this); + e->right->visit (this); + } + void visit_arrayindex (arrayindex *e) { - if (is_active_lvalue(e)) - mutated_maps->insert(e->referent); + if (is_active_lvalue (e)) + { + symbol *sym; + if (e->base->is_symbol (sym)) + mutated_vars->insert (sym->referent); + else + throw semantic_error("Assignment to read-only histogram bucket", e->tok); + } } }; -struct no_map_mutation_during_iteration_check - : public traversing_visitor +struct no_var_mutation_during_iteration_check + : virtual public traversing_visitor { systemtap_session & session; - map<functiondecl *,set<vardecl *> *> & function_mutates_maps; - vector<vardecl *> maps_being_iterated; + map<functiondecl *,set<vardecl *> *> & function_mutates_vars; + vector<vardecl *> vars_being_iterated; - no_map_mutation_during_iteration_check + no_var_mutation_during_iteration_check (systemtap_session & sess, - map<functiondecl *,set<vardecl *> *> & fmm) - : session(sess), function_mutates_maps (fmm) + map<functiondecl *,set<vardecl *> *> & fmv) + : session(sess), function_mutates_vars (fmv) {} void visit_arrayindex (arrayindex *e) { if (is_active_lvalue(e)) { - for (unsigned i = 0; i < maps_being_iterated.size(); ++i) + vardecl *vd = get_symbol_within_indexable (e->base)->referent; + if (vd) { - vardecl *m = maps_being_iterated[i]; - if (m == e->referent) + for (unsigned i = 0; i < vars_being_iterated.size(); ++i) { - string err = ("map '" + m->name + - "' modified during 'foreach' iteration"); - session.print_error (semantic_error (err, e->tok)); + vardecl *v = vars_being_iterated[i]; + if (v == vd) + { + string err = ("variable '" + v->name + + "' modified during 'foreach' iteration"); + session.print_error (semantic_error (err, e->tok)); + } } } } @@ -505,16 +575,16 @@ struct no_map_mutation_during_iteration_check void visit_functioncall (functioncall* e) { map<functiondecl *,set<vardecl *> *>::const_iterator i - = function_mutates_maps.find (e->referent); + = function_mutates_vars.find (e->referent); - if (i != function_mutates_maps.end()) + if (i != function_mutates_vars.end()) { - for (unsigned j = 0; j < maps_being_iterated.size(); ++j) + for (unsigned j = 0; j < vars_being_iterated.size(); ++j) { - vardecl *m = maps_being_iterated[j]; + vardecl *m = vars_being_iterated[j]; if (i->second->find (m) != i->second->end()) { - string err = ("function call modifies map '" + m->name + + string err = ("function call modifies var '" + m->name + "' during 'foreach' iteration"); session.print_error (semantic_error (err, e->tok)); } @@ -527,21 +597,27 @@ struct no_map_mutation_during_iteration_check void visit_foreach_loop(foreach_loop* s) { - maps_being_iterated.push_back (s->base_referent); + vardecl *vd = get_symbol_within_indexable (s->base)->referent; + + if (vd) + vars_being_iterated.push_back (vd); + for (unsigned i=0; i<s->indexes.size(); i++) s->indexes[i]->visit (this); s->block->visit (this); - maps_being_iterated.pop_back(); + + if (vd) + vars_being_iterated.pop_back(); } }; static int -semantic_pass_maps (systemtap_session & sess) +semantic_pass_vars (systemtap_session & sess) { - map<functiondecl *, set<vardecl *> *> fmm; - no_map_mutation_during_iteration_check chk(sess, fmm); + map<functiondecl *, set<vardecl *> *> fmv; + no_var_mutation_during_iteration_check chk(sess, fmv); for (unsigned i = 0; i < sess.functions.size(); ++i) { @@ -549,9 +625,9 @@ semantic_pass_maps (systemtap_session & sess) if (fn->body) { set<vardecl *> * m = new set<vardecl *>(); - mutated_map_collector mc (m); + mutated_var_collector mc (m); fn->body->visit (&mc); - fmm[fn] = m; + fmv[fn] = m; } } @@ -575,7 +651,7 @@ semantic_pass_maps (systemtap_session & sess) static int semantic_pass_symbols (systemtap_session&); static int semantic_pass_types (systemtap_session&); -static int semantic_pass_maps (systemtap_session&); +static int semantic_pass_vars (systemtap_session&); @@ -691,7 +767,7 @@ semantic_pass (systemtap_session& s) rc = semantic_pass_symbols (s); if (rc == 0) rc = semantic_pass_types (s); - if (rc == 0) rc = semantic_pass_maps (s); + if (rc == 0) rc = semantic_pass_vars (s); } catch (const semantic_error& e) { @@ -759,18 +835,31 @@ symresolution_info::visit_foreach_loop (foreach_loop* e) for (unsigned i=0; i<e->indexes.size(); i++) e->indexes[i]->visit (this); - if (e->base_referent) - return; + symbol *array = NULL; + hist_op *hist = NULL; + classify_indexable (e->base, array, hist); - vardecl* d = find_var (e->base, e->indexes.size ()); - if (d) - e->base_referent = d; - else - throw semantic_error ("unresolved global array " + e->base, e->tok); + if (array) + { + if (!array->referent) + { + vardecl* d = find_var (array->name, e->indexes.size ()); + if (d) + array->referent = d; + else + throw semantic_error ("unresolved global array " + array->name, e->tok); + } + } + else + { + assert (hist); + hist->visit (this); + } e->block->visit (this); } + struct delete_statement_symresolution_info: public traversing_visitor @@ -844,27 +933,39 @@ symresolution_info::visit_arrayindex (arrayindex* e) for (unsigned i=0; i<e->indexes.size(); i++) e->indexes[i]->visit (this); - if (e->referent) - return; + symbol *array = NULL; + hist_op *hist = NULL; + classify_indexable(e->base, array, hist); - vardecl* d = find_var (e->base, e->indexes.size ()); - if (d) - e->referent = d; - else + if (array) { - // new local - vardecl* v = new vardecl; - v->set_arity(e->indexes.size()); - v->name = e->base; - v->tok = e->tok; - if (current_function) - current_function->locals.push_back (v); - else if (current_probe) - current_probe->locals.push_back (v); + if (array->referent) + return; + + vardecl* d = find_var (array->name, e->indexes.size ()); + if (d) + array->referent = d; else - // must not happen - throw semantic_error ("no current probe/function", e->tok); - e->referent = v; + { + // new local + vardecl* v = new vardecl; + v->set_arity(e->indexes.size()); + v->name = array->name; + v->tok = array->tok; + if (current_function) + current_function->locals.push_back (v); + else if (current_probe) + current_probe->locals.push_back (v); + else + // must not happen + throw semantic_error ("no current probe/function", e->tok); + array->referent = v; + } + } + else + { + assert (hist); + hist->visit (this); } } @@ -1224,6 +1325,7 @@ typeresolution_info::visit_assignment (assignment *e) e->right->type != pe_unknown && e->left->type != e->right->type) mismatch (e->tok, e->left->type, e->right->type); + } else throw semantic_error ("unsupported assignment operator " + e->op); @@ -1373,11 +1475,7 @@ void typeresolution_info::visit_symbol (symbol* e) { assert (e->referent != 0); - - if (e->referent->arity > 0) - unresolved (e->tok); // symbol resolution should not permit this - else - resolve_2types (e, e->referent, this, t); + resolve_2types (e, e->referent, this, t); } @@ -1391,9 +1489,32 @@ typeresolution_info::visit_target_symbol (target_symbol* e) void typeresolution_info::visit_arrayindex (arrayindex* e) { - assert (e->referent != 0); - resolve_2types (e, e->referent, this, t); + symbol *array = NULL; + hist_op *hist = NULL; + classify_indexable(e->base, array, hist); + + // Every hist_op has type [int]:int, that is to say, every hist_op + // is a pseudo-one-dimensional integer array type indexed by + // integers (bucket numbers). + + if (hist) + { + if (e->indexes.size() != 1) + unresolved (e->tok); + t = pe_long; + e->indexes[0]->visit (this); + if (e->indexes[0]->type != pe_long) + unresolved (e->tok); + hist->stat->visit (this); + return; + } + + // Now we are left with "normal" map inference and index checking. + + assert (array); + assert (array->referent != 0); + resolve_2types (e, array->referent, this, t); // now resolve the array indexes @@ -1401,12 +1522,12 @@ typeresolution_info::visit_arrayindex (arrayindex* e) // // redesignate referent as array // e->referent->set_arity (e->indexes.size ()); - if (e->indexes.size() != e->referent->index_types.size()) + if (e->indexes.size() != array->referent->index_types.size()) unresolved (e->tok); // symbol resolution should prevent this else for (unsigned i=0; i<e->indexes.size(); i++) { expression* ee = e->indexes[i]; - exp_type& ft = e->referent->index_types [i]; + exp_type& ft = array->referent->index_types [i]; t = ft; ee->visit (this); exp_type at = ee->type; @@ -1415,7 +1536,7 @@ typeresolution_info::visit_arrayindex (arrayindex* e) { // propagate to formal type ft = at; - resolved (e->referent->tok, ft); + resolved (array->referent->tok, ft); // uses array decl as there is no token for "formal type" } if (at == pe_stats) @@ -1539,31 +1660,49 @@ typeresolution_info::visit_foreach_loop (foreach_loop* e) // // redesignate referent as array // e->referent->set_arity (e->indexes.size ()); - if (e->indexes.size() != e->base_referent->index_types.size()) - unresolved (e->tok); // symbol resolution should prevent this - else for (unsigned i=0; i<e->indexes.size(); i++) - { - expression* ee = e->indexes[i]; - exp_type& ft = e->base_referent->index_types [i]; - t = ft; - ee->visit (this); - exp_type at = ee->type; + symbol *array = NULL; + hist_op *hist = NULL; + classify_indexable(e->base, array, hist); - if ((at == pe_string || at == pe_long) && ft == pe_unknown) - { - // propagate to formal type - ft = at; - resolved (e->base_referent->tok, ft); - // uses array decl as there is no token for "formal type" - } - if (at == pe_stats) - invalid (ee->tok, at); - if (ft == pe_stats) - invalid (ee->tok, ft); - if (at != pe_unknown && ft != pe_unknown && ft != at) - mismatch (e->tok, at, ft); - if (at == pe_unknown) - unresolved (ee->tok); + if (hist) + { + if (e->indexes.size() != 1) + unresolved (e->tok); + t = pe_long; + e->indexes[0]->visit (this); + if (e->indexes[0]->type != pe_long) + unresolved (e->tok); + hist->stat->visit (this); + } + else + { + assert (array); + if (e->indexes.size() != array->referent->index_types.size()) + unresolved (e->tok); // symbol resolution should prevent this + else for (unsigned i=0; i<e->indexes.size(); i++) + { + expression* ee = e->indexes[i]; + exp_type& ft = array->referent->index_types [i]; + t = ft; + ee->visit (this); + exp_type at = ee->type; + + if ((at == pe_string || at == pe_long) && ft == pe_unknown) + { + // propagate to formal type + ft = at; + resolved (array->referent->tok, ft); + // uses array decl as there is no token for "formal type" + } + if (at == pe_stats) + invalid (ee->tok, at); + if (ft == pe_stats) + invalid (ee->tok, ft); + if (at != pe_unknown && ft != pe_unknown && ft != at) + mismatch (e->tok, at, ft); + if (at == pe_unknown) + unresolved (ee->tok); + } } t = pe_unknown; @@ -1683,6 +1822,136 @@ typeresolution_info::visit_return_statement (return_statement* e) invalid (e->value->tok, e->value->type); } +void +typeresolution_info::visit_print_format (print_format* e) +{ + size_t unresolved_args = 0; + + if (e->print_with_format) + { + // If there's a format string, we can do both inference *and* + // checking. + + // First we extract the subsequence of formatting components + // which are conversions (not just literal string components) + + std::vector<print_format::format_component> components; + for (size_t i = 0; i < e->components.size(); ++i) + { + if (e->components[i].type == print_format::conv_unspecified) + throw semantic_error ("Unspecified conversion in print operator format string", + e->tok); + else if (e->components[i].type == print_format::conv_literal) + continue; + components.push_back(e->components[i]); + } + + // Then we check that the number of conversions and the number + // of args agree. + + if (components.size() != e->args.size()) + throw semantic_error ("Wrong number of args to formatted print operator", + e->tok); + + // Then we check that the types of the conversions match the types + // of the args. + for (size_t i = 0; i < components.size(); ++i) + { + exp_type wanted = pe_unknown; + + switch (components[i].type) + { + + case print_format::conv_unspecified: + case print_format::conv_literal: + assert (false); + break; + + case print_format::conv_signed_decimal: + case print_format::conv_unsigned_decimal: + case print_format::conv_unsigned_octal: + case print_format::conv_unsigned_uppercase_hex: + case print_format::conv_unsigned_lowercase_hex: + wanted = pe_long; + break; + + case print_format::conv_string: + wanted = pe_string; + break; + } + + assert (wanted != pe_unknown); + + t = wanted; + e->args[i]->visit (this); + + if (e->args[i]->type == pe_unknown) + { + e->args[i]->type = wanted; + resolved (e->args[i]->tok, wanted); + } + else if (e->args[i]->type != wanted) + { + mismatch (e->args[i]->tok, e->args[i]->type, wanted); + } + } + } + else + { + // Without a format string, the best we can do is require that + // each argument resolve to a concrete type. + for (size_t i = 0; i < e->args.size(); ++i) + { + t = pe_unknown; + e->args[i]->visit (this); + if (e->args[i]->type == pe_unknown) + { + unresolved (e->args[i]->tok); + ++unresolved_args; + } + } + } + + if (unresolved_args == 0) + { + if (e->type == pe_unknown) + { + if (e->print_to_stream) + e->type = pe_long; + else + e->type = pe_string; + resolved (e->tok, e->type); + } + } + else + { + e->type = pe_unknown; + unresolved (e->tok); + } +} + + +void +typeresolution_info::visit_stat_op (stat_op* e) +{ + t = pe_stats; + e->stat->visit (this); + if (e->type == pe_unknown) + { + e->type = pe_long; + resolved (e->tok, e->type); + } + else + mismatch (e->tok, e->type, pe_long); +} + +void +typeresolution_info::visit_hist_op (hist_op* e) +{ + t = pe_stats; + e->stat->visit (this); +} + void typeresolution_info::unresolved (const token* tok) diff --git a/elaborate.h b/elaborate.h index df021076..0d2da63b 100644 --- a/elaborate.h +++ b/elaborate.h @@ -90,6 +90,9 @@ struct typeresolution_info: public visitor void visit_target_symbol (target_symbol* e); void visit_arrayindex (arrayindex* e); void visit_functioncall (functioncall* e); + void visit_print_format (print_format* e); + void visit_stat_op (stat_op* e); + void visit_hist_op (hist_op* e); }; @@ -431,7 +431,7 @@ lexer::scan () if (isspace (c)) goto skip; - else if (isalpha (c) || c == '$' || c == '_') + else if (isalpha (c) || c == '$' || c == '@' || c == '_') { n->type = tok_identifier; n->content = (char) c; @@ -1407,11 +1407,8 @@ parser::parse_foreach_loop () t = next (); if (! (t->type == tok_identifier && t->content == "in")) throw parse_error ("expected 'in'"); - - t = next (); - if (t->type != tok_identifier) - throw parse_error ("expected identifier"); - s->base = t->content; + + s->base = parse_indexable(); t = peek (); if (t && t->type == tok_operator && @@ -1662,13 +1659,8 @@ parser::parse_array_in () arrayindex* a = new arrayindex; a->indexes = indexes; - - t = next (); - if (t->type != tok_identifier) - throw parse_error ("expected identifier"); - a->tok = t; - a->base = t->content; - + a->base = parse_indexable(); + a->tok = a->base->get_tok(); e->operand = a; return e; } @@ -1880,50 +1872,231 @@ parser::parse_value () } -// var, var[index], func(parms), thread->var, process->var +const token * +parser::parse_hist_op_or_bare_name (hist_op *&hop, string &name) +{ + hop = NULL; + const token* t = expect_ident (name); + if (name == "@hist_linear" || name == "@hist_log") + { + hop = new hist_op; + if (name == "@hist_linear") + hop->htype = hist_linear; + else if (name == "@hist_log") + hop->htype = hist_log; + hop->tok = t; + expect_op("("); + hop->stat = parse_expression (); + int64_t tnum; + if (hop->htype == hist_linear) + { + for (size_t i = 0; i < 3; ++i) + { + expect_op (","); + expect_number (tnum); + hop->params.push_back (tnum); + } + } + else + { + assert(hop->htype == hist_log); + if (peek_op (",")) + { + expect_op (","); + expect_number (tnum); + hop->params.push_back (tnum); + } + else + { + // FIXME (magic value): Logarithmic histograms get 64 + // buckets by default. + hop->params.push_back (64); + } + } + expect_op(")"); + } + return t; +} + + +indexable* +parser::parse_indexable () +{ + hist_op *hop = NULL; + string name; + const token *tok = parse_hist_op_or_bare_name(hop, name); + if (hop) + return hop; + else + { + symbol* sym = new symbol; + sym->name = name; + sym->tok = tok; + return sym; + } +} + + +// var, indexable[index], func(parms), printf("...", ...), $var, $var->member, @stat_op(stat) expression* parser::parse_symbol () { + hist_op *hop = NULL; + symbol *sym = NULL; string name; - const token* t = expect_ident (name); - const token* t2 = t; - - if (name.size() > 0 && name[0] == '$') + const token *t = parse_hist_op_or_bare_name(hop, name); + + if (!hop) { - // target_symbol time - target_symbol *tsym = new target_symbol; - tsym->tok = t; - tsym->base_name = name; - while (true) + // If we didn't get a hist_op, then we did get an identifier. We can + // now scrutinize this identifier for the various magic forms of identifier + // (printf, @stat_op, and $var...) + + if (name.size() > 0 && name[0] == '@') { - string c; - if (peek_op ("->")) - { - next(); - expect_ident (c); - tsym->components.push_back - (make_pair (target_symbol::comp_struct_member, c)); + stat_op *sop = new stat_op; + if (name == "@avg") + sop->ctype = sc_average; + else if (name == "@count") + sop->ctype = sc_count; + else if (name == "@sum") + sop->ctype = sc_sum; + else if (name == "@min") + sop->ctype = sc_min; + else if (name == "@max") + sop->ctype = sc_max; + else + throw parse_error("unknown statistic operator " + name); + expect_op("("); + sop->tok = t; + sop->stat = parse_expression (); + expect_op(")"); + return sop; + } + + else if (name.size() > 0 && (name == "print" + || name == "sprint" + || name == "printf" + || name == "sprintf")) + { + print_format *fmt = new print_format; + fmt->tok = t; + fmt->print_with_format = (name[name.size() - 1] == 'f'); + fmt->print_to_stream = (name[0] == 'p'); + expect_op("("); + if (fmt->print_with_format) + { + // Consume and convert a format string, and any subsequent + // arguments. Agreement between the format string and the + // arguments is postponed to the typechecking phase. + string tmp; + expect_unknown (tok_string, tmp); + fmt->components = print_format::string_to_components (tmp); + while (!peek_op (")")) + { + expect_op(","); + expression *e = parse_expression (); + fmt->args.push_back(e); + } } - else if (peek_op ("[")) - { - next(); - expect_unknown (tok_number, c); - expect_op ("]"); - tsym->components.push_back - (make_pair (target_symbol::comp_literal_array_index, c)); - } else - break; + { + // If we are not printing with a format string, we permit + // exactly one argument (of any type). + expression *e = parse_expression (); + fmt->args.push_back(e); + } + expect_op(")"); + return fmt; + } + + else if (name.size() > 0 && name[0] == '$') + { + // target_symbol time + target_symbol *tsym = new target_symbol; + tsym->tok = t; + tsym->base_name = name; + while (true) + { + string c; + if (peek_op ("->")) + { + next(); + expect_ident (c); + tsym->components.push_back + (make_pair (target_symbol::comp_struct_member, c)); + } + else if (peek_op ("[")) + { + next(); + expect_unknown (tok_number, c); + expect_op ("]"); + tsym->components.push_back + (make_pair (target_symbol::comp_literal_array_index, c)); + } + else + break; + } + return tsym; + } + + else if (peek_op ("(")) // function call + { + next (); + struct functioncall* f = new functioncall; + f->tok = t; + f->function = name; + // Allow empty actual parameter list + if (peek_op (")")) + { + next (); + return f; + } + while (1) + { + f->args.push_back (parse_expression ()); + if (peek_op (")")) + { + next(); + break; + } + else if (peek_op (",")) + { + next(); + continue; + } + else + throw parse_error ("expected ',' or ')'"); + } + return f; + } + + else + { + sym = new symbol; + sym->name = name; + sym->tok = t; } - return tsym; } + // By now, either we had a hist_op in the first place, or else + // we had a plain word and it was converted to a symbol. + + assert (hop || sym); + + // All that remains is to check for array indexing + if (peek_op ("[")) // array { next (); struct arrayindex* ai = new arrayindex; - ai->tok = t2; - ai->base = name; + ai->tok = t; + + if (hop) + ai->base = hop; + else + ai->base = sym; + while (1) { ai->indexes.push_back (parse_expression ()); @@ -1942,42 +2115,14 @@ parser::parse_symbol () } return ai; } - else if (peek_op ("(")) // function call - { - next (); - struct functioncall* f = new functioncall; - f->tok = t2; - f->function = name; - // Allow empty actual parameter list - if (peek_op (")")) - { - next (); - return f; - } - while (1) - { - f->args.push_back (parse_expression ()); - if (peek_op (")")) - { - next(); - break; - } - else if (peek_op (",")) - { - next(); - continue; - } - else - throw parse_error ("expected ',' or ')'"); - } - return f; - } - else - { - symbol* sym = new symbol; - sym->name = name; - sym->tok = t2; - return sym; - } + + // If we got to here, we *should* have a symbol; if we have + // a hist_op on its own, it doesn't count as an expression, + // so we throw a parse error. + + if (hop) + throw parse_error("base histogram operator where expression expected", t); + + return sym; } @@ -136,6 +136,8 @@ private: // nonterminals next_statement* parse_next_statement (); break_statement* parse_break_statement (); continue_statement* parse_continue_statement (); + indexable* parse_indexable (); + const token *parse_hist_op_or_bare_name (hist_op *&hop, std::string &name); expression* parse_expression (); expression* parse_assignment (); expression* parse_ternary (); diff --git a/staptree.cxx b/staptree.cxx index 94fd540d..6ea1fc70 100644 --- a/staptree.cxx +++ b/staptree.cxx @@ -11,6 +11,7 @@ #include "parse.h" #include <iostream> #include <typeinfo> +#include <sstream> #include <cassert> using namespace std; @@ -45,7 +46,7 @@ symbol::symbol (): arrayindex::arrayindex (): - referent (0) + base (0) { } @@ -316,6 +317,377 @@ void functioncall::print (ostream& o) const } +string +print_format::components_to_string(vector<format_component> const & components) +{ + ostringstream oss; + + for (vector<format_component>::const_iterator i = components.begin(); + i != components.end(); ++i) + { + + assert (i->type != conv_unspecified); + + if (i->type == conv_literal) + { + assert(!i->literal_string.empty()); + for (string::const_iterator j = i->literal_string.begin(); + j != i->literal_string.end(); ++j) + { + if (*j == '%') + oss << '%'; + oss << *j; + } + } + else + { + oss << '%'; + + if (i->flags & static_cast<unsigned long>(fmt_flag_zeropad)) + oss << '0'; + + if (i->flags & static_cast<unsigned long>(fmt_flag_plus)) + oss << '+'; + + if (i->flags & static_cast<unsigned long>(fmt_flag_space)) + oss << ' '; + + if (i->flags & static_cast<unsigned long>(fmt_flag_left)) + oss << '-'; + + if (i->flags & static_cast<unsigned long>(fmt_flag_special)) + oss << '#'; + + if (i->width > 0) + oss << i->width; + + if (i->precision > 0) + oss << '.' << i->precision; + + switch (i->type) + { + case conv_signed_decimal: + oss << "lld"; + break; + + case conv_unsigned_decimal: + oss << "llu"; + break; + + case conv_unsigned_octal: + oss << "llo"; + break; + + case conv_unsigned_uppercase_hex: + oss << "llX"; + break; + + case conv_unsigned_lowercase_hex: + oss << "llx"; + break; + + case conv_string: + oss << 's'; + break; + + default: + break; + } + } + } + return oss.str (); +} + +vector<print_format::format_component> +print_format::string_to_components(string const & str) +{ + format_component curr; + vector<format_component> res; + + enum + { + parsing_plain_data, + parsing_flags, + parsing_width, + parsing_precision, + parsing_conversion_specifier + } + state = parsing_plain_data; + + curr.clear(); + + string::const_iterator i = str.begin(); + + while (i != str.end()) + { + switch (state) + { + case parsing_plain_data: + + if (*i != '%') + { + assert (curr.type == conv_unspecified || curr.type == conv_literal); + curr.type = conv_literal; + curr.literal_string += *i; + } + else if (i+1 == str.end() || *(i+1) == '%') + { + assert(*i == '%'); + // *i == '%' and *(i+1) == '%'; append only one '%' to the literal string + assert (curr.type == conv_unspecified || curr.type == conv_literal); + curr.type = conv_literal; + curr.literal_string += '%'; + } + else + { + assert(*i == '%'); + state = parsing_flags; + if (curr.type != conv_unspecified) + { + assert (curr.type == conv_literal); + res.push_back(curr); + curr.clear(); + } + } + ++i; + break; + + case parsing_flags: + switch (*i) + { + case '0': + curr.flags |= static_cast<unsigned long>(fmt_flag_zeropad); + ++i; + break; + + case '+': + curr.flags |= static_cast<unsigned long>(fmt_flag_plus); + ++i; + break; + + case '-': + curr.flags |= static_cast<unsigned long>(fmt_flag_left); + ++i; + break; + + case ' ': + curr.flags |= static_cast<unsigned long>(fmt_flag_space); + ++i; + break; + + case '#': + curr.flags |= static_cast<unsigned long>(fmt_flag_special); + ++i; + break; + + default: + state = parsing_width; + break; + } + break; + + case parsing_width: + while (isdigit(*i)) + { + curr.width *= 10; + curr.width += (*i - '0'); + ++i; + } + state = parsing_precision; + break; + + case parsing_precision: + if (*i == '.') + { + ++i; + while (isdigit(*i)) + { + curr.precision *= 10; + curr.precision += (*i - '0'); + ++i; + } + } + state = parsing_conversion_specifier; + break; + + case parsing_conversion_specifier: + switch (*i) + { + + default: + if (curr.type == conv_unspecified) + throw semantic_error("no conversion specifier provided"); + + res.push_back(curr); + curr.clear(); + state = parsing_plain_data; + break; + + // Valid conversion types + case 's': + if (curr.type != conv_unspecified) + throw semantic_error("multiple conversion types supplied"); + curr.type = conv_string; + ++i; + break; + + case 'd': + case 'i': + if (curr.type != conv_unspecified) + throw semantic_error("multiple conversion types supplied"); + curr.type = conv_signed_decimal; + ++i; + break; + + case 'o': + if (curr.type != conv_unspecified) + throw semantic_error("multiple conversion types supplied"); + curr.type = conv_unsigned_octal; + ++i; + break; + + case 'u': + if (curr.type != conv_unspecified) + throw semantic_error("multiple conversion types supplied"); + curr.type = conv_unsigned_decimal; + ++i; + break; + + case 'X': + if (curr.type != conv_unspecified) + throw semantic_error("multiple conversion types supplied"); + curr.type = conv_unsigned_uppercase_hex; + ++i; + + case 'x': + if (curr.type != conv_unspecified) + throw semantic_error("multiple conversion types supplied"); + curr.type = conv_unsigned_lowercase_hex; + ++i; + break; + + // We prohibit users passing any funny stuff through which might + // make linux's printf function do naughty things. + case 'p': + case 'n': + case 'c': + case 'q': + case 'j': + case 't': + + case ',': + case '.': + case '*': + + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + case 'h': + case 'H': + case 'I': + case 'l': + case 'L': + case 'z': + case 'Z': + string err("prohibited conversion character '"); + err += *i; + err += '"'; + throw parse_error(err); + } + break; + } + } + + // Flush final component + if (curr.type != conv_unspecified) + res.push_back(curr); + + return res; +} + + +void print_format::print (ostream& o) const +{ + string name = (string(print_to_stream ? "" : "s") + + string("print") + + string(print_with_format ? "f" : "")); + o << name << "("; + if (print_with_format) + { + o << '"' << components_to_string(components) << "\", "; + } + for (vector<expression*>::const_iterator i = args.begin(); + i != args.end(); ++i) + { + if (i != args.begin()) + o << ", "; + (*i)->print(o); + } + o << ")"; +} + +void stat_op::print (ostream& o) const +{ + o << '@'; + switch (ctype) + { + case sc_average: + o << "avg("; + break; + + case sc_count: + o << "count("; + break; + + case sc_sum: + o << "sum("; + break; + + case sc_min: + o << "min("; + break; + + case sc_max: + o << "max("; + break; + } + stat->print(o); + o << ")"; +} + +void +hist_op::print (ostream& o) const +{ + o << '@'; + switch (htype) + { + case hist_linear: + assert(params.size() == 3); + o << "hist_linear("; + stat->print(o); + for (size_t i = 0; i < params.size(); ++i) + { + o << ", " << params[i]; + } + o << ")"; + break; + + case hist_log: + assert(params.size() == 1); + o << "hist_log("; + stat->print(o); + for (size_t i = 0; i < params.size(); ++i) + { + o << ", " << params[i]; + } + o << ")"; + break; + } +} + ostream& operator << (ostream& o, const statement& k) { k.print (o); @@ -706,6 +1078,147 @@ functioncall::visit (visitor* u) u->visit_functioncall (this); } +void +print_format::visit (visitor *u) +{ + u->visit_print_format (this); +} + +void +stat_op::visit (visitor *u) +{ + u->visit_stat_op (this); +} + +void +hist_op::visit (visitor *u) +{ + u->visit_hist_op (this); +} + +void +indexable::print_indexable (std::ostream& o) const +{ + const symbol *sym; + const hist_op *hist; + classify_const_indexable(this, sym, hist); + if (sym) + sym->print (o); + else + { + assert (hist); + hist->print (o); + } +} + +void +indexable::visit_indexable (visitor* u) +{ + symbol *sym; + hist_op *hist; + classify_indexable(this, sym, hist); + if (sym) + sym->visit (u); + else + { + assert (hist); + hist->visit (u); + } +} + + +bool +indexable::is_symbol(symbol *& sym_out) +{ + sym_out = NULL; + return false; +} + +bool +indexable::is_hist_op(hist_op *& hist_out) +{ + hist_out = NULL; + return false; +} + +bool +indexable::is_const_symbol(const symbol *& sym_out) const +{ + sym_out = NULL; + return false; +} + +bool +indexable::is_const_hist_op(const hist_op *& hist_out) const +{ + hist_out = NULL; + return false; +} + +bool +symbol::is_symbol(symbol *& sym_out) +{ + sym_out = this; + return true; +} + +bool +symbol::is_const_symbol(const symbol *& sym_out) const +{ + sym_out = this; + return true; +} + +const token * +symbol::get_tok() const +{ + return tok; +} + +bool +hist_op::is_hist_op(hist_op *& hist_out) +{ + hist_out = this; + return true; +} + +bool +hist_op::is_const_hist_op(const hist_op *& hist_out) const +{ + hist_out = this; + return true; +} + +const token * +hist_op::get_tok() const +{ + return tok; +} + +void +classify_indexable(indexable* ix, + symbol *& array_out, + hist_op *& hist_out) +{ + array_out = NULL; + hist_out = NULL; + if (!(ix->is_symbol (array_out) || ix->is_hist_op (hist_out))) + throw semantic_error("Expecting symbol or histogram operator", ix->get_tok()); + if (ix && !(hist_out || array_out)) + throw semantic_error("Failed to classify indexable", ix->get_tok()); +} + +void +classify_const_indexable(const indexable* ix, + const symbol *& array_out, + const hist_op *& hist_out) +{ + array_out = NULL; + hist_out = NULL; + if (!(ix->is_const_symbol(array_out) || ix->is_const_hist_op(hist_out))) + throw semantic_error("Expecting symbol or histogram operator", ix->get_tok()); +} + // ------------------------------------------------------------------------ bool @@ -921,6 +1434,25 @@ traversing_visitor::visit_functioncall (functioncall* e) e->args[i]->visit (this); } +void +traversing_visitor::visit_print_format (print_format* e) +{ + for (unsigned i=0; i<e->args.size(); i++) + e->args[i]->visit (this); +} + +void +traversing_visitor::visit_stat_op (stat_op* e) +{ + e->stat->visit (this); +} + +void +traversing_visitor::visit_hist_op (hist_op* e) +{ + e->stat->visit (this); +} + // ------------------------------------------------------------------------ @@ -1110,6 +1642,24 @@ throwing_visitor::visit_functioncall (functioncall* e) throwone (e->tok); } +void +throwing_visitor::visit_print_format (print_format* e) +{ + throwone (e->tok); +} + +void +throwing_visitor::visit_stat_op (stat_op* e) +{ + throwone (e->tok); +} + +void +throwing_visitor::visit_hist_op (hist_op* e) +{ + throwone (e->tok); +} + // ------------------------------------------------------------------------ @@ -1188,10 +1738,12 @@ deep_copy_visitor::visit_foreach_loop (foreach_loop* s) require <symbol*> (this, &sym, s->indexes[i]); n->indexes.push_back(sym); } - n->base = s->base; - n->base_referent = NULL; + + require <indexable*> (this, &(n->base), s->base); + n->sort_direction = s->sort_direction; n->sort_column = s->sort_column; + require <statement*> (this, &(n->block), s->block); provide <foreach_loop*> (this, n); } @@ -1396,8 +1948,9 @@ deep_copy_visitor::visit_arrayindex (arrayindex* e) { arrayindex* n = new arrayindex; n->tok = e->tok; - n->base = e->base; - n->referent = NULL; + + require <indexable*> (this, &(n->base), e->base); + for (unsigned i = 0; i < e->indexes.size(); ++i) { expression* ne; @@ -1423,6 +1976,44 @@ deep_copy_visitor::visit_functioncall (functioncall* e) provide <functioncall*> (this, n); } +void +deep_copy_visitor::visit_print_format (print_format* e) +{ + print_format* n = new print_format; + n->tok = e->tok; + n->print_with_format = e->print_with_format; + n->print_to_stream = e->print_to_stream; + n->components = e->components; + for (unsigned i = 0; i < e->args.size(); ++i) + { + expression* na; + require <expression*> (this, &na, e->args[i]); + n->args.push_back(na); + } + provide <print_format*> (this, n); +} + +void +deep_copy_visitor::visit_stat_op (stat_op* e) +{ + stat_op* n = new stat_op; + n->tok = e->tok; + n->ctype = e->ctype; + require <expression*> (this, &(n->stat), e->stat); + provide <stat_op*> (this, n); +} + +void +deep_copy_visitor::visit_hist_op (hist_op* e) +{ + hist_op* n = new hist_op; + n->tok = e->tok; + n->htype = e->htype; + n->params = e->params; + require <expression*> (this, &(n->stat), e->stat); + provide <hist_op*> (this, n); +} + block* deep_copy_visitor::deep_copy (block* b) { @@ -172,15 +172,50 @@ struct assignment: public binary_expression void visit (visitor* u); }; +struct symbol; +struct hist_op; +struct indexable +{ + // This is a helper class which, type-wise, acts as a disjoint union + // of symbols and histograms. You can ask it whether it's a + // histogram or a symbol, and downcast accordingly. + void print_indexable (std::ostream& o) const; + void visit_indexable (visitor* u); + virtual bool is_symbol(symbol *& sym_out); + virtual bool is_hist_op(hist_op *& hist_out); + virtual bool is_const_symbol(const symbol *& sym_out) const; + virtual bool is_const_hist_op(const hist_op *& hist_out) const; + virtual const token *get_tok() const = 0; + virtual ~indexable() {} +}; + +// Perform a downcast to one out-value and NULL the other, throwing an +// exception if neither downcast succeeds. This is (sadly) about the +// best we can accomplish in C++. +void +classify_indexable(indexable* ix, + symbol *& array_out, + hist_op *& hist_out); + +void +classify_const_indexable(const indexable* ix, + symbol const *& array_out, + hist_op const *& hist_out); class vardecl; -struct symbol: public expression +struct symbol: + public expression, + public indexable { std::string name; vardecl *referent; symbol (); void print (std::ostream& o) const; void visit (visitor* u); + // overrides of type 'indexable' + const token *get_tok() const; + bool is_const_symbol(const symbol *& sym_out) const; + bool is_symbol(symbol *& sym_out); }; @@ -200,9 +235,8 @@ struct target_symbol : public expression struct arrayindex: public expression { - std::string base; std::vector<expression*> indexes; - vardecl *referent; + indexable *base; arrayindex (); void print (std::ostream& o) const; void visit (visitor* u); @@ -221,6 +255,97 @@ struct functioncall: public expression }; +struct print_format: public expression +{ + bool print_with_format; + bool print_to_stream; + + enum format_flag + { + fmt_flag_zeropad = 1, + fmt_flag_plus = 2, + fmt_flag_space = 4, + fmt_flag_left = 8, + fmt_flag_special = 16 + }; + + enum conversion_type + { + conv_unspecified, + conv_signed_decimal, + conv_unsigned_decimal, + conv_unsigned_octal, + conv_unsigned_uppercase_hex, + conv_unsigned_lowercase_hex, + conv_string, + conv_literal + }; + + struct format_component + { + unsigned long flags; + unsigned width; + unsigned precision; + conversion_type type; + std::string literal_string; + void clear() + { + flags = 0; + width = 0; + precision = 0; + type = conv_unspecified; + literal_string.clear(); + } + }; + + std::vector<format_component> components; + std::vector<expression*> args; + + static std::string components_to_string(std::vector<format_component> const & components); + static std::vector<format_component> string_to_components(std::string const & str); + + void print (std::ostream& o) const; + void visit (visitor* u); +}; + + +enum stat_component_type + { + sc_average, + sc_count, + sc_sum, + sc_min, + sc_max, + }; + +struct stat_op: public expression +{ + stat_component_type ctype; + expression* stat; + void print (std::ostream& o) const; + void visit (visitor* u); +}; + +enum histogram_type + { + hist_linear, + hist_log + }; + +struct hist_op: public indexable +{ + const token* tok; + histogram_type htype; + expression* stat; + std::vector<int64_t> params; + void print (std::ostream& o) const; + void visit (visitor* u); + // overrides of type 'indexable' + const token *get_tok() const; + bool is_const_hist_op(const hist_op *& hist_out) const; + bool is_hist_op(hist_op *& hist_out); +}; + // ------------------------------------------------------------------------ @@ -316,8 +441,7 @@ struct foreach_loop: public statement { // this part is a specialization of arrayindex std::vector<symbol*> indexes; - std::string base; - vardecl* base_referent; + indexable *base; int sort_direction; // -1: decreasing, 0: none, 1: increasing unsigned sort_column; // 0: value, 1..N: index @@ -486,6 +610,9 @@ struct visitor virtual void visit_target_symbol (target_symbol* e) = 0; virtual void visit_arrayindex (arrayindex* e) = 0; virtual void visit_functioncall (functioncall* e) = 0; + virtual void visit_print_format (print_format* e) = 0; + virtual void visit_stat_op (stat_op* e) = 0; + virtual void visit_hist_op (hist_op* e) = 0; }; @@ -523,6 +650,9 @@ struct traversing_visitor: public visitor void visit_target_symbol (target_symbol* e); void visit_arrayindex (arrayindex* e); void visit_functioncall (functioncall* e); + void visit_print_format (print_format* e); + void visit_stat_op (stat_op* e); + void visit_hist_op (hist_op* e); }; @@ -565,6 +695,9 @@ struct throwing_visitor: public visitor void visit_target_symbol (target_symbol* e); void visit_arrayindex (arrayindex* e); void visit_functioncall (functioncall* e); + void visit_print_format (print_format* e); + void visit_stat_op (stat_op* e); + void visit_hist_op (hist_op* e); }; // A visitor which performs a deep copy of the root node it's applied @@ -609,6 +742,9 @@ struct deep_copy_visitor: public visitor virtual void visit_target_symbol (target_symbol* e); virtual void visit_arrayindex (arrayindex* e); virtual void visit_functioncall (functioncall* e); + virtual void visit_print_format (print_format* e); + virtual void visit_stat_op (stat_op* e); + virtual void visit_hist_op (hist_op* e); }; template <typename T> static void @@ -624,6 +760,32 @@ require (deep_copy_visitor* v, T* dst, T src) } } +template <> static void +require <indexable *> (deep_copy_visitor* v, indexable** dst, indexable* src) +{ + if (src != NULL) + { + symbol *array_src=NULL, *array_dst=NULL; + hist_op *hist_src=NULL, *hist_dst=NULL; + + classify_indexable(src, array_src, hist_src); + + *dst = NULL; + + if (array_src) + { + require <symbol*> (v, &array_dst, array_src); + *dst = array_dst; + } + else + { + require <hist_op*> (v, &hist_dst, hist_src); + *dst = hist_dst; + } + assert (*dst); + } +} + template <typename T> static void provide (deep_copy_visitor* v, T src) { diff --git a/testsuite/buildok/printf.stp b/testsuite/buildok/printf.stp new file mode 100755 index 00000000..4fd14ad6 --- /dev/null +++ b/testsuite/buildok/printf.stp @@ -0,0 +1,29 @@ +#! stap -p4 + +# test the translatability of the formatted printing operators + +function foo() { + return 10 +} + +probe begin +{ + x = sprintf("take %d steps forward, %d steps back\n", 3, 2) + printf("take %d steps forward, %d steps back\n", 3, 2) + printf("take %d steps forward, %d steps back\n", 3+1, 2*2) + + bob = "bob" + alice = "alice" + print(bob) + print(alice) + print("hello") + print(10) + printf("%s phoned %s %4.4x times\n", bob, alice, 3456) + printf("%s phoned %s %+4d times\n", bob . alice, alice, 3456) + printf("%s phoned %s %.4x times\n", bob, alice . bob, 3456) + printf("%s phoned %s %-i times\n", sprintf("%s%s", bob, bob), sprint(alice), 3456) + printf("%s except after %s\n", + sprintf("%s before %s", + sprint(1), sprint(3)), + sprint("C")) +} diff --git a/translate.cxx b/translate.cxx index 42ce3e45..ef401c76 100644 --- a/translate.cxx +++ b/translate.cxx @@ -107,7 +107,7 @@ struct c_unparser: public unparser, public visitor tmpvar gensym(exp_type ty); var getvar(vardecl* v, token const* tok = NULL); - itervar getiter(foreach_loop* f); + itervar getiter(symbol* s); mapvar getmap(vardecl* v, token const* tok = NULL); void load_map_indices(arrayindex* e, @@ -147,6 +147,9 @@ struct c_unparser: public unparser, public visitor void visit_target_symbol (target_symbol* e); void visit_arrayindex (arrayindex* e); void visit_functioncall (functioncall* e); + void visit_print_format (print_format* e); + void visit_stat_op (stat_op* e); + void visit_hist_op (hist_op* e); }; // A shadow visitor, meant to generate temporary variable declarations @@ -180,6 +183,7 @@ struct c_tmpcounter: void visit_assignment (assignment* e); void visit_arrayindex (arrayindex* e); void visit_functioncall (functioncall* e); + void visit_print_format (print_format* e); }; struct c_unparser_assignment: @@ -558,8 +562,8 @@ class itervar public: - itervar (foreach_loop* e, unsigned & counter) - : referent_ty(e->base_referent->type), + itervar (symbol* e, unsigned & counter) + : referent_ty(e->referent->type), name("__tmp" + stringify(counter++)) { if (referent_ty != pe_long && referent_ty != pe_string) @@ -1462,9 +1466,9 @@ c_unparser::getmap(vardecl *v, token const *tok) itervar -c_unparser::getiter(foreach_loop *f) +c_unparser::getiter(symbol *s) { - return itervar (f, tmpvar_counter); + return itervar (s, tmpvar_counter); } @@ -1625,75 +1629,99 @@ c_unparser::visit_for_loop (for_loop *s) void c_tmpcounter::visit_foreach_loop (foreach_loop *s) { - itervar iv = parent->getiter (s); - parent->o->newline() << iv.declare(); - s->block->visit (this); + symbol *array; + hist_op *hist; + classify_indexable (s->base, array, hist); + + if (array) + { + itervar iv = parent->getiter (array); + parent->o->newline() << iv.declare(); + s->block->visit (this); + } + else + { + // FIXME: fill in some logic here! + assert(false); + } } void c_unparser::visit_foreach_loop (foreach_loop *s) { - visit_statement (s, 1); + symbol *array; + hist_op *hist; + classify_indexable (s->base, array, hist); - mapvar mv = getmap (s->base_referent, s->tok); - itervar iv = getiter (s); - vector<var> keys; - - string ctr = stringify (label_counter++); - string toplabel = "top_" + ctr; - string contlabel = "continue_" + ctr; - string breaklabel = "break_" + ctr; - - // NB: structure parallels for_loop - - // initialization - - // sort array if desired - if (s->sort_direction) + if (array) { - varlock_w sort_guard (*this, mv); - o->newline() << "_stp_map_sort (" << mv.qname() << ", " - << s->sort_column << ", " << - s->sort_direction << ");"; + visit_statement (s, 1); + + mapvar mv = getmap (array->referent, s->tok); + itervar iv = getiter (array); + vector<var> keys; + + string ctr = stringify (label_counter++); + string toplabel = "top_" + ctr; + string contlabel = "continue_" + ctr; + string breaklabel = "break_" + ctr; + + // NB: structure parallels for_loop + + // initialization + + // sort array if desired + if (s->sort_direction) + { + varlock_w sort_guard (*this, mv); + o->newline() << "_stp_map_sort (" << mv.qname() << ", " + << s->sort_column << ", " << - s->sort_direction << ");"; + } + // NB: sort direction sense is opposite in runtime, thus the negation + + // XXX: There is a race condition here. Since we can't convert a + // write lock to a read lock, it is possible that another sort or update + // may get sandwiched between the release of sort_guard and the + // acquisition of guard. + + varlock_r guard (*this, mv); + o->newline() << iv << " = " << iv.start (mv) << ";"; + + // condition + o->newline(-1) << toplabel << ":"; + o->newline(1) << "if (! (" << iv << ")) goto " << breaklabel << ";"; + + // body + loop_break_labels.push_back (breaklabel); + loop_continue_labels.push_back (contlabel); + o->newline() << "{"; + o->indent (1); + for (unsigned i = 0; i < s->indexes.size(); ++i) + { + // copy the iter values into the specified locals + var v = getvar (s->indexes[i]->referent); + c_assign (v, iv.get_key (v.type(), i), s->tok); + } + s->block->visit (this); + o->newline(-1) << "}"; + loop_break_labels.pop_back (); + loop_continue_labels.pop_back (); + + // iteration + o->newline(-1) << contlabel << ":"; + o->newline(1) << iv << " = " << iv.next (mv) << ";"; + o->newline() << "goto " << toplabel << ";"; + + // exit + o->newline(-1) << breaklabel << ":"; + o->newline(1) << "; /* dummy statement */"; + // varlock dtor will show up here } - // NB: sort direction sense is opposite in runtime, thus the negation - - // XXX: There is a race condition here. Since we can't convert a - // write lock to a read lock, it is possible that another sort or update - // may get sandwiched between the release of sort_guard and the - // acquisition of guard. - - varlock_r guard (*this, mv); - o->newline() << iv << " = " << iv.start (mv) << ";"; - - // condition - o->newline(-1) << toplabel << ":"; - o->newline(1) << "if (! (" << iv << ")) goto " << breaklabel << ";"; - - // body - loop_break_labels.push_back (breaklabel); - loop_continue_labels.push_back (contlabel); - o->newline() << "{"; - o->indent (1); - for (unsigned i = 0; i < s->indexes.size(); ++i) + else { - // copy the iter values into the specified locals - var v = getvar (s->indexes[i]->referent); - c_assign (v, iv.get_key (v.type(), i), s->tok); + // FIXME: fill in some logic here! + assert(false); } - s->block->visit (this); - o->newline(-1) << "}"; - loop_break_labels.pop_back (); - loop_continue_labels.pop_back (); - - // iteration - o->newline(-1) << contlabel << ":"; - o->newline(1) << iv << " = " << iv.next (mv) << ";"; - o->newline() << "goto " << toplabel << ";"; - - // exit - o->newline(-1) << breaklabel << ":"; - o->newline(1) << "; /* dummy statement */"; - // varlock dtor will show up here } @@ -1756,14 +1784,26 @@ delete_statement_operand_visitor::visit_symbol (symbol* e) void delete_statement_operand_visitor::visit_arrayindex (arrayindex* e) { - vector<tmpvar> idx; - parent->load_map_indices (e, idx); + symbol *array; + hist_op *hist; + classify_indexable (e->base, array, hist); - { - mapvar mvar = parent->getmap (e->referent, e->tok); - varlock_w guard (*parent, mvar); - parent->o->newline() << mvar.del (idx) << ";"; - } + if (array) + { + vector<tmpvar> idx; + parent->load_map_indices (e, idx); + + { + mapvar mvar = parent->getmap (array->referent, e->tok); + varlock_w guard (*parent, mvar); + parent->o->newline() << mvar.del (idx) << ";"; + } + } + else + { + // FIXME: fill in some logic here! + assert(false); + } } @@ -1939,40 +1979,64 @@ c_unparser::visit_logical_and_expr (logical_and_expr* e) void c_tmpcounter::visit_array_in (array_in* e) { - vardecl* r = e->operand->referent; + symbol *array; + hist_op *hist; + classify_indexable (e->operand->base, array, hist); + + if (array) + { + vardecl* r = array->referent; - // One temporary per index dimension. - for (unsigned i=0; i<r->index_types.size(); i++) + // One temporary per index dimension. + for (unsigned i=0; i<r->index_types.size(); i++) + { + tmpvar ix = parent->gensym (r->index_types[i]); + ix.declare (*parent); + e->operand->indexes[i]->visit(this); + } + + // A boolean result. + tmpvar res = parent->gensym (e->type); + res.declare (*parent); + } + else { - tmpvar ix = parent->gensym (r->index_types[i]); - ix.declare (*parent); - e->operand->indexes[i]->visit(this); + // FIXME: fill in some logic here! + assert(false); } - - // A boolean result. - tmpvar res = parent->gensym (e->type); - res.declare (*parent); } void c_unparser::visit_array_in (array_in* e) { - stmt_expr block(*this); - - vector<tmpvar> idx; - load_map_indices (e->operand, idx); - o->newline() << "c->last_stmt = " << lex_cast_qstring(*e->tok) << ";"; - - tmpvar res = gensym (pe_long); - - { // block used to control varlock_r lifespan - mapvar mvar = getmap (e->operand->referent, e->tok); - varlock_r guard (*this, mvar); - c_assign (res, mvar.exists(idx), e->tok); - } + symbol *array; + hist_op *hist; + classify_indexable (e->operand->base, array, hist); + + if (array) + { + stmt_expr block(*this); + + vector<tmpvar> idx; + load_map_indices (e->operand, idx); + o->newline() << "c->last_stmt = " << lex_cast_qstring(*e->tok) << ";"; + + tmpvar res = gensym (pe_long); + + { // block used to control varlock_r lifespan + mapvar mvar = getmap (array->referent, e->tok); + varlock_r guard (*this, mvar); + c_assign (res, mvar.exists(idx), e->tok); + } - o->newline() << res << ";"; + o->newline() << res << ";"; + } + else + { + // FIXME: fill in some logic here! + assert(false); + } } @@ -2248,19 +2312,31 @@ c_unparser::visit_target_symbol (target_symbol* e) void c_tmpcounter::visit_arrayindex (arrayindex *e) { - vardecl* r = e->referent; + symbol *array; + hist_op *hist; + classify_indexable (e->base, array, hist); - // One temporary per index dimension. - for (unsigned i=0; i<r->index_types.size(); i++) + if (array) { - tmpvar ix = parent->gensym (r->index_types[i]); - ix.declare (*parent); - e->indexes[i]->visit(this); + vardecl* r = array->referent; + + // One temporary per index dimension. + for (unsigned i=0; i<r->index_types.size(); i++) + { + tmpvar ix = parent->gensym (r->index_types[i]); + ix.declare (*parent); + e->indexes[i]->visit(this); + } + + // The index-expression result. + tmpvar res = parent->gensym (e->type); + res.declare (*parent); + } + else + { + // FIXME: fill in some logic here! + assert(false); } - - // The index-expression result. - tmpvar res = parent->gensym (e->type); - res.declare (*parent); } @@ -2268,174 +2344,224 @@ void c_unparser::load_map_indices(arrayindex *e, vector<tmpvar> & idx) { - idx.clear(); + symbol *array; + hist_op *hist; + classify_indexable (e->base, array, hist); - vardecl* r = e->referent; - - if (r->index_types.size() == 0 || - r->index_types.size() != e->indexes.size()) - throw semantic_error ("invalid array reference", e->tok); - - for (unsigned i=0; i<r->index_types.size(); i++) + if (array) { - if (r->index_types[i] != e->indexes[i]->type) - throw semantic_error ("array index type mismatch", e->indexes[i]->tok); + idx.clear(); - tmpvar ix = gensym (r->index_types[i]); - o->newline() << "c->last_stmt = " - << lex_cast_qstring(*e->indexes[i]->tok) << ";"; - c_assign (ix.qname(), e->indexes[i], "array index copy"); - idx.push_back (ix); + vardecl* r = array->referent; + + if (r->index_types.size() == 0 || + r->index_types.size() != e->indexes.size()) + throw semantic_error ("invalid array reference", e->tok); + + for (unsigned i=0; i<r->index_types.size(); i++) + { + if (r->index_types[i] != e->indexes[i]->type) + throw semantic_error ("array index type mismatch", e->indexes[i]->tok); + + tmpvar ix = gensym (r->index_types[i]); + o->newline() << "c->last_stmt = " + << lex_cast_qstring(*e->indexes[i]->tok) << ";"; + c_assign (ix.qname(), e->indexes[i], "array index copy"); + idx.push_back (ix); + } } + else + { + // FIXME: fill in some logic here! + assert(false); + } } void c_unparser::visit_arrayindex (arrayindex* e) { + symbol *array; + hist_op *hist; + classify_indexable (e->base, array, hist); - // Visiting an statistic-valued array in a non-lvalue context is prohibited. - if (e->referent->type == pe_stats) + if (array) + { + + // Visiting an statistic-valued array in a non-lvalue context is prohibited. + if (array->referent->type == pe_stats) throw semantic_error ("statistic-valued array in rvalue context", e->tok); - stmt_expr block(*this); + stmt_expr block(*this); - // NB: Do not adjust the order of the next few lines; the tmpvar - // allocation order must remain the same between - // c_unparser::visit_arrayindex and c_tmpcounter::visit_arrayindex + // NB: Do not adjust the order of the next few lines; the tmpvar + // allocation order must remain the same between + // c_unparser::visit_arrayindex and c_tmpcounter::visit_arrayindex - vector<tmpvar> idx; - load_map_indices (e, idx); - tmpvar res = gensym (e->type); + vector<tmpvar> idx; + load_map_indices (e, idx); + tmpvar res = gensym (e->type); - // NB: because these expressions are nestable, emit this construct - // thusly: - // ({ tmp0=(idx0); ... tmpN=(idxN); - // lock (array); - // res = fetch (array, idx0...N); - // unlock (array); - // res; }) - // - // we store all indices in temporary variables to avoid nasty - // reentrancy issues that pop up with nested expressions: - // e.g. a[a[c]=5] could deadlock + // NB: because these expressions are nestable, emit this construct + // thusly: + // ({ tmp0=(idx0); ... tmpN=(idxN); + // lock (array); + // res = fetch (array, idx0...N); + // unlock (array); + // res; }) + // + // we store all indices in temporary variables to avoid nasty + // reentrancy issues that pop up with nested expressions: + // e.g. a[a[c]=5] could deadlock - { // block used to control varlock_r lifespan - mapvar mvar = getmap (e->referent, e->tok); - o->newline() << "c->last_stmt = " << lex_cast_qstring(*e->tok) << ";"; - varlock_r guard (*this, mvar); - c_assign (res, mvar.get(idx), e->tok); - } + { // block used to control varlock_r lifespan + mapvar mvar = getmap (array->referent, e->tok); + o->newline() << "c->last_stmt = " << lex_cast_qstring(*e->tok) << ";"; + varlock_r guard (*this, mvar); + c_assign (res, mvar.get(idx), e->tok); + } - o->newline() << res << ";"; + o->newline() << res << ";"; + } + else + { + // FIXME: fill in some logic here! + assert(false); + } } void c_tmpcounter_assignment::visit_arrayindex (arrayindex *e) { - vardecl* r = e->referent; + symbol *array; + hist_op *hist; + classify_indexable (e->base, array, hist); - // One temporary per index dimension. - for (unsigned i=0; i<r->index_types.size(); i++) + if (array) { - tmpvar ix = parent->parent->gensym (r->index_types[i]); - ix.declare (*(parent->parent)); - e->indexes[i]->visit(parent); - } + + vardecl* r = array->referent; + + // One temporary per index dimension. + for (unsigned i=0; i<r->index_types.size(); i++) + { + tmpvar ix = parent->parent->gensym (r->index_types[i]); + ix.declare (*(parent->parent)); + e->indexes[i]->visit(parent); + } - // The expression rval, lval, and result. - exp_type ty = rvalue ? rvalue->type : e->type; - tmpvar rval = parent->parent->gensym (ty); - rval.declare (*(parent->parent)); + // The expression rval, lval, and result. + exp_type ty = rvalue ? rvalue->type : e->type; + tmpvar rval = parent->parent->gensym (ty); + rval.declare (*(parent->parent)); - tmpvar lval = parent->parent->gensym (ty); - lval.declare (*(parent->parent)); + tmpvar lval = parent->parent->gensym (ty); + lval.declare (*(parent->parent)); - tmpvar res = parent->parent->gensym (ty); - res.declare (*(parent->parent)); + tmpvar res = parent->parent->gensym (ty); + res.declare (*(parent->parent)); - if (rvalue) - rvalue->visit (parent); + if (rvalue) + rvalue->visit (parent); + } + else + { + // FIXME: fill in some logic here! + assert(false); + } } void c_unparser_assignment::visit_arrayindex (arrayindex *e) { - stmt_expr block(*parent); + symbol *array; + hist_op *hist; + classify_indexable (e->base, array, hist); - translator_output *o = parent->o; + if (array) + { - if (e->referent->index_types.size() == 0) - throw semantic_error ("unexpected reference to scalar", e->tok); + stmt_expr block(*parent); - // nb: Do not adjust the order of the next few lines; the tmpvar - // allocation order must remain the same between - // c_unparser_assignment::visit_arrayindex and - // c_tmpcounter_assignment::visit_arrayindex - - vector<tmpvar> idx; - parent->load_map_indices (e, idx); - exp_type ty = rvalue ? rvalue->type : e->type; - tmpvar rvar = parent->gensym (ty); - tmpvar lvar = parent->gensym (ty); - tmpvar res = parent->gensym (ty); - - // NB: because these expressions are nestable, emit this construct - // thusly: - // ({ tmp0=(idx0); ... tmpN=(idxN); rvar=(rhs); lvar; res; - // lock (array); - // lvar = get (array,idx0...N); // if necessary - // assignop (res, lvar, rvar); - // set (array, idx0...N, lvar); - // unlock (array); - // res; }) - // - // we store all indices in temporary variables to avoid nasty - // reentrancy issues that pop up with nested expressions: - // e.g. ++a[a[c]=5] could deadlock - // - // - // There is an exception to the above form: if we're doign a <<< assigment to - // a statistic-valued map, there's a special form we follow: - // - // ({ tmp0=(idx0); ... tmpN=(idxN); rvar=(rhs); lvar; res; - // lock (array); - // _stp_map_add_stat (array, idx0...N, rvar); - // unlock (array); - // rvar; }) - // - // To simplify variable-allocation rules, we assign rvar to lvar and - // res in this block as well, even though they are technically - // superfluous. + translator_output *o = parent->o; - prepare_rvalue (op, rvar, e->tok); + if (array->referent->index_types.size() == 0) + throw semantic_error ("unexpected reference to scalar", e->tok); - if (op == "<<<") - { - assert (e->type == pe_stats); - assert (rvalue->type == pe_long); + // nb: Do not adjust the order of the next few lines; the tmpvar + // allocation order must remain the same between + // c_unparser_assignment::visit_arrayindex and + // c_tmpcounter_assignment::visit_arrayindex + + vector<tmpvar> idx; + parent->load_map_indices (e, idx); + exp_type ty = rvalue ? rvalue->type : e->type; + tmpvar rvar = parent->gensym (ty); + tmpvar lvar = parent->gensym (ty); + tmpvar res = parent->gensym (ty); + + // NB: because these expressions are nestable, emit this construct + // thusly: + // ({ tmp0=(idx0); ... tmpN=(idxN); rvar=(rhs); lvar; res; + // lock (array); + // lvar = get (array,idx0...N); // if necessary + // assignop (res, lvar, rvar); + // set (array, idx0...N, lvar); + // unlock (array); + // res; }) + // + // we store all indices in temporary variables to avoid nasty + // reentrancy issues that pop up with nested expressions: + // e.g. ++a[a[c]=5] could deadlock + // + // + // There is an exception to the above form: if we're doign a <<< assigment to + // a statistic-valued map, there's a special form we follow: + // + // ({ tmp0=(idx0); ... tmpN=(idxN); rvar=(rhs); lvar; res; + // lock (array); + // _stp_map_add_stat (array, idx0...N, rvar); + // unlock (array); + // rvar; }) + // + // To simplify variable-allocation rules, we assign rvar to lvar and + // res in this block as well, even though they are technically + // superfluous. + + prepare_rvalue (op, rvar, e->tok); + + if (op == "<<<") + { + assert (e->type == pe_stats); + assert (rvalue->type == pe_long); + + mapvar mvar = parent->getmap (array->referent, e->tok); + o->newline() << "c->last_stmt = " << lex_cast_qstring(*e->tok) << ";"; + varlock_w guard (*parent, mvar); + o->newline() << mvar.add (idx, rvar) << ";"; + // dummy assignments + o->newline() << lvar << " = " << rvar << ";"; + o->newline() << res << " = " << rvar << ";"; + } + else + { // block used to control varlock_w lifespan + mapvar mvar = parent->getmap (array->referent, e->tok); + o->newline() << "c->last_stmt = " << lex_cast_qstring(*e->tok) << ";"; + varlock_w guard (*parent, mvar); + if (op != "=") // don't bother fetch slot if we will just overwrite it + parent->c_assign (lvar, mvar.get(idx), e->tok); + c_assignop (res, lvar, rvar, e->tok); + o->newline() << mvar.set (idx, lvar) << ";"; + } - mapvar mvar = parent->getmap (e->referent, e->tok); - o->newline() << "c->last_stmt = " << lex_cast_qstring(*e->tok) << ";"; - varlock_w guard (*parent, mvar); - o->newline() << mvar.add (idx, rvar) << ";"; - // dummy assignments - o->newline() << lvar << " = " << rvar << ";"; - o->newline() << res << " = " << rvar << ";"; - } + o->newline() << res << ";"; + } else - { // block used to control varlock_w lifespan - mapvar mvar = parent->getmap (e->referent, e->tok); - o->newline() << "c->last_stmt = " << lex_cast_qstring(*e->tok) << ";"; - varlock_w guard (*parent, mvar); - if (op != "=") // don't bother fetch slot if we will just overwrite it - parent->c_assign (lvar, mvar.get(idx), e->tok); - c_assignop (res, lvar, rvar, e->tok); - o->newline() << mvar.set (idx, lvar) << ";"; + { + // FIXME: fill in some logic here! + assert(false); } - - o->newline() << res << ";"; } @@ -2530,6 +2656,211 @@ c_unparser::visit_functioncall (functioncall* e) << ".__retvalue;"; } +struct hist_op_downcaster + : virtual public traversing_visitor +{ + hist_op *& hist; + + hist_op_downcaster (hist_op *& hist) + : hist(hist) + {} + + void visit_hist_op (hist_op* e) + { + hist = e; + } +}; + +static bool +expression_is_hist_op (expression *e, + hist_op *& hist) +{ + hist_op *h = NULL; + hist_op_downcaster d(h); + e->visit (&d); + if (static_cast<void*>(h) == static_cast<void*>(e)) + { + hist = h; + return true; + } + return false; +} + + +void +c_tmpcounter::visit_print_format (print_format* e) +{ + hist_op *hist; + if ((!e->print_with_format) && + (e->args.size() == 1) && + expression_is_hist_op (e->args[0], hist)) + { + } + else + { + // One temporary per argument + for (unsigned i=0; i < e->args.size(); i++) + { + tmpvar t = parent->gensym (e->args[i]->type); + if (e->args[i]->type == pe_unknown) + { + throw semantic_error("unknown type of arg to print operator", + e->args[i]->tok); + } + + t.declare (*parent); + e->args[i]->visit (this); + } + + // And the result + exp_type ty = e->print_to_stream ? pe_long : pe_string; + tmpvar res = parent->gensym (ty); + res.declare (*parent); + } +} + + +void +c_unparser::visit_print_format (print_format* e) +{ + // Print formats can contain a general argument list *or* a special + // type of argument which gets its own processing: a single, + // non-format-string'ed, histogram-type stat_op expression. + + hist_op *hist; + if ((!e->print_with_format) && + (e->args.size() == 1) && + expression_is_hist_op (e->args[0], hist)) + { + // FIXME: fill in some logic here! + assert(false); + } + else + { + stmt_expr block(*this); + + // Compute actual arguments + vector<tmpvar> tmp; + + for (unsigned i=0; i<e->args.size(); i++) + { + tmpvar t = gensym(e->args[i]->type); + tmp.push_back(t); + + o->newline() << "c->last_stmt = " + << lex_cast_qstring(*e->args[i]->tok) << ";"; + c_assign (t.qname(), e->args[i], "print format actual argument evaluation"); + } + + std::vector<print_format::format_component> components; + + if (e->print_with_format) + { + components = e->components; + } + else + { + // Synthesize a print-format string if the user didn't + // provide one; the synthetic string simply contains one + // directive for each argument. + for (unsigned i = 0; i < e->args.size(); ++i) + { + print_format::format_component curr; + curr.clear(); + switch (e->args[i]->type) + { + case pe_unknown: + throw semantic_error("Cannot print unknown expression type", e->args[i]->tok); + case pe_stats: + throw semantic_error("Cannot print a raw stats object", e->args[i]->tok); + case pe_long: + curr.type = print_format::conv_signed_decimal; + break; + case pe_string: + curr.type = print_format::conv_string; + break; + } + components.push_back (curr); + } + } + + + // Allocate the result + exp_type ty = e->print_to_stream ? pe_long : pe_string; + tmpvar res = gensym (ty); + + // Make the [s]printf call + if (e->print_to_stream) + { + o->newline() << res.qname() << " = 0;"; + o->newline() << "_stp_printf ("; + } + else + o->newline() << "snprintf (" << res.qname() << ", MAXSTRINGLEN, "; + + o->line() << "\"" << print_format::components_to_string(components) << "\""; + + for (unsigned i = 0; i < tmp.size(); ++i) + { + o->line() << ", " << tmp[i].qname(); + } + o->line() << ");"; + o->newline() << res.qname() << ";"; + } +} + +void +c_unparser::visit_stat_op (stat_op* e) +{ + // + // Stat ops can be *applied* to two types of expression: + // + // 1. An arrayindex expression on a pe_stats-valued array. + // + // 2. A symbol of type pe_stats. + // + // Stat ops can only *occur* in a limited set of circumstances: + // + // 1. Inside an arrayindex expression, as the base referent, when + // the stat_component_type is a histogram type. See + // c_unparser::visit_arrayindex for handling of this case. + // + // 2. Inside a foreach statement, as the base referent, when the + // stat_component_type is a histogram type. See + // c_unparser::visit_foreach_loop for handling this case. + // + // 3. Inside a print_format expression, as the sole argument, when + // the stat_component_type is a histogram type. See + // c_unparser::visit_print_format for handling this case. + // + // 4. Inside a normal rvalue context, when the stat_component_type + // is a scalar. That's this case. + // + + // FIXME: classify the expression the stat_op is being applied to, + // call appropriate stp_get_stat() / stp_pmap_get_stat() helper, + // then reach into resultant struct stat_data. + + switch (e->type) + { + case sc_average: + case sc_count: + case sc_sum: + case sc_min: + case sc_max: + + default: + assert(false); + break; + } +} + + +void +c_unparser::visit_hist_op (hist_op* e) +{ + assert(false); +} int |