diff options
Diffstat (limited to 'lib/puppet/parser/lexer.rb')
-rw-r--r-- | lib/puppet/parser/lexer.rb | 152 |
1 files changed, 91 insertions, 61 deletions
diff --git a/lib/puppet/parser/lexer.rb b/lib/puppet/parser/lexer.rb index bb4fdf9c9..26e6b60f5 100644 --- a/lib/puppet/parser/lexer.rb +++ b/lib/puppet/parser/lexer.rb @@ -11,11 +11,14 @@ end module Puppet::Parser; end class Puppet::Parser::Lexer - attr_reader :last, :file, :lexing_context + attr_reader :last, :file, :lexing_context, :token_queue attr_accessor :line, :indefine - # Our base token class. + def lex_error msg + raise Puppet::LexError.new(msg) + end + class Token attr_accessor :regex, :name, :string, :skip, :incr_line, :skip_text, :accumulate @@ -28,6 +31,7 @@ class Puppet::Parser::Lexer end end + # MQR: Why not just alias? %w{skip accumulate}.each do |method| define_method(method+"?") do self.send(method) @@ -142,10 +146,13 @@ class Puppet::Parser::Lexer '=~' => :MATCH, '!~' => :NOMATCH, %r{([a-z][-\w]*)?(::[a-z][-\w]*)+} => :CLASSNAME, # Require '::' in the class name, else we'd compete with NAME - %r{((::){0,1}[A-Z][-\w]*)+} => :CLASSREF - ) - - TOKENS.add_tokens "Whatever" => :DQTEXT, "Nomatter" => :SQTEXT, "alsonomatter" => :BOOLEAN + %r{((::){0,1}[A-Z][-\w]*)+} => :CLASSREF, + "<string>" => :STRING, + "<dqstring up to first interpolation>" => :DQPRE, + "<dqstring between two interpolations>" => :DQMID, + "<dqstring after final interpolation>" => :DQPOST, + "<boolean>" => :BOOLEAN + ) TOKENS.add_token :NUMBER, %r{\b(?:0[xX][0-9A-Fa-f]+|0?\d+(?:\.\d+)?(?:[eE]-?\d+)?)\b} do |lexer, value| [TOKENS[:NAME], value] @@ -163,6 +170,9 @@ class Puppet::Parser::Lexer end [string_token, value] end + def (TOKENS[:NAME]).acceptable?(context={}) + ![:DQPRE,:DQMID].include? context[:after] + end TOKENS.add_token :COMMENT, %r{#.*}, :accumulate => true, :skip => true do |lexer,value| value.sub!(/# ?/,'') @@ -176,7 +186,7 @@ class Puppet::Parser::Lexer [self,value] end - regex_token = TOKENS.add_token :REGEX, %r{/[^/\n]*/} do |lexer, value| + TOKENS.add_token :REGEX, %r{/[^/\n]*/} do |lexer, value| # Make sure we haven't matched an escaped / while value[-2..-2] == '\\' other = lexer.scan_until(%r{/}) @@ -186,27 +196,40 @@ class Puppet::Parser::Lexer [self, Regexp.new(regex)] end - def regex_token.acceptable?(context={}) + def (TOKENS[:REGEX]).acceptable?(context={}) [:NODE,:LBRACE,:RBRACE,:MATCH,:NOMATCH,:COMMA].include? context[:after] end TOKENS.add_token :RETURN, "\n", :skip => true, :incr_line => true, :skip_text => true TOKENS.add_token :SQUOTE, "'" do |lexer, value| - value = lexer.slurpstring(value) - [TOKENS[:SQTEXT], value] + [TOKENS[:STRING], lexer.slurpstring(value).first ] end - TOKENS.add_token :DQUOTE, '"' do |lexer, value| - value = lexer.slurpstring(value) - [TOKENS[:DQTEXT], value] + DQ_initial_token_types = {'$' => :DQPRE,'"' => :STRING} + DQ_continuation_token_types = {'$' => :DQMID,'"' => :DQPOST} + + TOKENS.add_token :DQUOTE, /"/ do |lexer, value| + lexer.tokenize_interpolated_string(DQ_initial_token_types) end - TOKENS.add_token :VARIABLE, %r{\$(\w*::)*\w+} do |lexer, value| - value = value.sub(/^\$/, '') - [self, value] + TOKENS.add_token :DQCONT, /\}/ do |lexer, value| + lexer.tokenize_interpolated_string(DQ_continuation_token_types) + end + def (TOKENS[:DQCONT]).acceptable?(context={}) + context[:string_interpolation_depth] > 0 end + TOKENS.add_token :DOLLAR_VAR, %r{\$(\w*::)*\w+} do |lexer, value| + [TOKENS[:VARIABLE],value[1..-1]] + end + + TOKENS.add_token :VARIABLE, %r{(\w*::)*\w+} + def (TOKENS[:VARIABLE]).acceptable?(context={}) + [:DQPRE,:DQMID].include? context[:after] + end + + TOKENS.sort_tokens @@pairs = { @@ -244,9 +267,7 @@ class Puppet::Parser::Lexer def expected return nil if @expected.empty? name = @expected[-1] - raise "Could not find expected token %s" % name unless token = TOKENS.lookup(name) - - return token + TOKENS.lookup(name) or lex_error "Could not find expected token #{name}" end # scan the whole file @@ -274,22 +295,19 @@ class Puppet::Parser::Lexer } end - def find_string_token - matched_token = value = nil + def shift_token + @token_queue.shift + end + def find_string_token # We know our longest string token is three chars, so try each size in turn # until we either match or run out of chars. This way our worst-case is three - # tries, where it is otherwise the number of string chars we have. Also, + # tries, where it is otherwise the number of string token we have. Also, # the lookups are optimized hash lookups, instead of regex scans. - [3, 2, 1].each do |i| - str = @scanner.peek(i) - if matched_token = TOKENS.lookup(str) - value = @scanner.scan(matched_token.regex) - break - end - end - - return matched_token, value + # + s = @scanner.peek(3) + token = TOKENS.lookup(s[0,3]) || TOKENS.lookup(s[0,2]) || TOKENS.lookup(s[0,1]) + [ token, token && @scanner.scan(token.regex) ] end # Find the next token that matches a regex. We look for these first. @@ -316,7 +334,7 @@ class Puppet::Parser::Lexer # Find the next token, returning the string and the token. def find_token @find += 1 - find_regex_token || find_string_token + shift_token || find_regex_token || find_string_token end def indefine? @@ -343,10 +361,15 @@ class Puppet::Parser::Lexer @skip = %r{[ \t]+} @namestack = [] + @token_queue = [] @indefine = false @expected = [] @commentstack = [ ['', @line] ] - @lexing_context = {:after => nil, :start_of_line => true} + @lexing_context = { + :after => nil, + :start_of_line => true, + :string_interpolation_depth => 0 + } end # Make any necessary changes to the token and/or value. @@ -396,28 +419,17 @@ class Puppet::Parser::Lexer # this is the heart of the lexer def scan #Puppet.debug("entering scan") - raise Puppet::LexError.new("Invalid or empty string") unless @scanner + lex_error "Invalid or empty string" unless @scanner # Skip any initial whitespace. skip() - until @scanner.eos? do + until token_queue.empty? and @scanner.eos? do yielded = false matched_token, value = find_token # error out if we didn't match anything at all - if matched_token.nil? - nword = nil - # Try to pull a 'word' out of the remaining string. - if @scanner.rest =~ /^(\S+)/ - nword = $1 - elsif @scanner.rest =~ /^(\s+)/ - nword = $1 - else - nword = @scanner.rest - end - raise "Could not match '%s'" % nword - end + lex_error "Could not match #{@scanner.rest[/^(\S+|\s+|.*)/]}" unless matched_token newline = matched_token.name == :RETURN @@ -433,6 +445,8 @@ class Puppet::Parser::Lexer end lexing_context[:after] = final_token.name unless newline + lexing_context[:string_interpolation_depth] += 1 if final_token.name == :DQPRE + lexing_context[:string_interpolation_depth] -= 1 if final_token.name == :DQPOST value = token_value[:value] @@ -481,24 +495,40 @@ class Puppet::Parser::Lexer @scanner.scan_until(regex) end - # we've encountered an opening quote... + # we've encountered the start of a string... # slurp in the rest of the string and return it - def slurpstring(quote) + Valid_escapes_in_strings = %w{ \\ $ ' " n t s }+["\n"] + def slurpstring(terminators) # we search for the next quote that isn't preceded by a # backslash; the caret is there to match empty strings - str = @scanner.scan_until(/([^\\]|^)#{quote}/) - if str.nil? - raise Puppet::LexError.new("Unclosed quote after '%s' in '%s'" % - [self.last,self.rest]) - else - str.sub!(/#{quote}\Z/,"") - str.gsub!(/\\#{quote}/,quote) - end - - # Add to our line count for every carriage return in multi-line strings. - @line += str.count("\n") + str = @scanner.scan_until(/([^\\]|^)[#{terminators}]/) or lex_error "Unclosed quote after '#{last}' in '#{rest}'" + @line += str.count("\n") # literal carriage returns add to the line count. + str.gsub!(/\\(.)/) { + case ch=$1 + when 'n'; "\n" + when 't'; "\t" + when 's'; " " + else + if Valid_escapes_in_strings.include? ch + ch + else + Puppet.warning "Unrecognised escape sequence '\\#{ch}'#{file && " in file #{file}"}#{line && " at line #{line}"}" + "\\#{ch}" + end + end + } + [ str[0..-2],str[-1,1] ] + end - return str + def tokenize_interpolated_string(token_type) + value,terminator = slurpstring('"$') + token_queue << [TOKENS[token_type[terminator]],value] + while terminator == '$' and not @scanner.scan(/\{/) + token_queue << [TOKENS[:VARIABLE],@scanner.scan(%r{(\w*::)*\w+|[0-9]})] + value,terminator = slurpstring('"$') + token_queue << [TOKENS[DQ_continuation_token_types[terminator]],value] + end + token_queue.shift end # just parse a string, not a whole file |