summaryrefslogtreecommitdiffstats
path: root/lib/puppet/parser/lexer.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/puppet/parser/lexer.rb')
-rw-r--r--lib/puppet/parser/lexer.rb152
1 files changed, 91 insertions, 61 deletions
diff --git a/lib/puppet/parser/lexer.rb b/lib/puppet/parser/lexer.rb
index bb4fdf9c9..26e6b60f5 100644
--- a/lib/puppet/parser/lexer.rb
+++ b/lib/puppet/parser/lexer.rb
@@ -11,11 +11,14 @@ end
module Puppet::Parser; end
class Puppet::Parser::Lexer
- attr_reader :last, :file, :lexing_context
+ attr_reader :last, :file, :lexing_context, :token_queue
attr_accessor :line, :indefine
- # Our base token class.
+ def lex_error msg
+ raise Puppet::LexError.new(msg)
+ end
+
class Token
attr_accessor :regex, :name, :string, :skip, :incr_line, :skip_text, :accumulate
@@ -28,6 +31,7 @@ class Puppet::Parser::Lexer
end
end
+ # MQR: Why not just alias?
%w{skip accumulate}.each do |method|
define_method(method+"?") do
self.send(method)
@@ -142,10 +146,13 @@ class Puppet::Parser::Lexer
'=~' => :MATCH,
'!~' => :NOMATCH,
%r{([a-z][-\w]*)?(::[a-z][-\w]*)+} => :CLASSNAME, # Require '::' in the class name, else we'd compete with NAME
- %r{((::){0,1}[A-Z][-\w]*)+} => :CLASSREF
- )
-
- TOKENS.add_tokens "Whatever" => :DQTEXT, "Nomatter" => :SQTEXT, "alsonomatter" => :BOOLEAN
+ %r{((::){0,1}[A-Z][-\w]*)+} => :CLASSREF,
+ "<string>" => :STRING,
+ "<dqstring up to first interpolation>" => :DQPRE,
+ "<dqstring between two interpolations>" => :DQMID,
+ "<dqstring after final interpolation>" => :DQPOST,
+ "<boolean>" => :BOOLEAN
+ )
TOKENS.add_token :NUMBER, %r{\b(?:0[xX][0-9A-Fa-f]+|0?\d+(?:\.\d+)?(?:[eE]-?\d+)?)\b} do |lexer, value|
[TOKENS[:NAME], value]
@@ -163,6 +170,9 @@ class Puppet::Parser::Lexer
end
[string_token, value]
end
+ def (TOKENS[:NAME]).acceptable?(context={})
+ ![:DQPRE,:DQMID].include? context[:after]
+ end
TOKENS.add_token :COMMENT, %r{#.*}, :accumulate => true, :skip => true do |lexer,value|
value.sub!(/# ?/,'')
@@ -176,7 +186,7 @@ class Puppet::Parser::Lexer
[self,value]
end
- regex_token = TOKENS.add_token :REGEX, %r{/[^/\n]*/} do |lexer, value|
+ TOKENS.add_token :REGEX, %r{/[^/\n]*/} do |lexer, value|
# Make sure we haven't matched an escaped /
while value[-2..-2] == '\\'
other = lexer.scan_until(%r{/})
@@ -186,27 +196,40 @@ class Puppet::Parser::Lexer
[self, Regexp.new(regex)]
end
- def regex_token.acceptable?(context={})
+ def (TOKENS[:REGEX]).acceptable?(context={})
[:NODE,:LBRACE,:RBRACE,:MATCH,:NOMATCH,:COMMA].include? context[:after]
end
TOKENS.add_token :RETURN, "\n", :skip => true, :incr_line => true, :skip_text => true
TOKENS.add_token :SQUOTE, "'" do |lexer, value|
- value = lexer.slurpstring(value)
- [TOKENS[:SQTEXT], value]
+ [TOKENS[:STRING], lexer.slurpstring(value).first ]
end
- TOKENS.add_token :DQUOTE, '"' do |lexer, value|
- value = lexer.slurpstring(value)
- [TOKENS[:DQTEXT], value]
+ DQ_initial_token_types = {'$' => :DQPRE,'"' => :STRING}
+ DQ_continuation_token_types = {'$' => :DQMID,'"' => :DQPOST}
+
+ TOKENS.add_token :DQUOTE, /"/ do |lexer, value|
+ lexer.tokenize_interpolated_string(DQ_initial_token_types)
end
- TOKENS.add_token :VARIABLE, %r{\$(\w*::)*\w+} do |lexer, value|
- value = value.sub(/^\$/, '')
- [self, value]
+ TOKENS.add_token :DQCONT, /\}/ do |lexer, value|
+ lexer.tokenize_interpolated_string(DQ_continuation_token_types)
+ end
+ def (TOKENS[:DQCONT]).acceptable?(context={})
+ context[:string_interpolation_depth] > 0
end
+ TOKENS.add_token :DOLLAR_VAR, %r{\$(\w*::)*\w+} do |lexer, value|
+ [TOKENS[:VARIABLE],value[1..-1]]
+ end
+
+ TOKENS.add_token :VARIABLE, %r{(\w*::)*\w+}
+ def (TOKENS[:VARIABLE]).acceptable?(context={})
+ [:DQPRE,:DQMID].include? context[:after]
+ end
+
+
TOKENS.sort_tokens
@@pairs = {
@@ -244,9 +267,7 @@ class Puppet::Parser::Lexer
def expected
return nil if @expected.empty?
name = @expected[-1]
- raise "Could not find expected token %s" % name unless token = TOKENS.lookup(name)
-
- return token
+ TOKENS.lookup(name) or lex_error "Could not find expected token #{name}"
end
# scan the whole file
@@ -274,22 +295,19 @@ class Puppet::Parser::Lexer
}
end
- def find_string_token
- matched_token = value = nil
+ def shift_token
+ @token_queue.shift
+ end
+ def find_string_token
# We know our longest string token is three chars, so try each size in turn
# until we either match or run out of chars. This way our worst-case is three
- # tries, where it is otherwise the number of string chars we have. Also,
+ # tries, where it is otherwise the number of string token we have. Also,
# the lookups are optimized hash lookups, instead of regex scans.
- [3, 2, 1].each do |i|
- str = @scanner.peek(i)
- if matched_token = TOKENS.lookup(str)
- value = @scanner.scan(matched_token.regex)
- break
- end
- end
-
- return matched_token, value
+ #
+ s = @scanner.peek(3)
+ token = TOKENS.lookup(s[0,3]) || TOKENS.lookup(s[0,2]) || TOKENS.lookup(s[0,1])
+ [ token, token && @scanner.scan(token.regex) ]
end
# Find the next token that matches a regex. We look for these first.
@@ -316,7 +334,7 @@ class Puppet::Parser::Lexer
# Find the next token, returning the string and the token.
def find_token
@find += 1
- find_regex_token || find_string_token
+ shift_token || find_regex_token || find_string_token
end
def indefine?
@@ -343,10 +361,15 @@ class Puppet::Parser::Lexer
@skip = %r{[ \t]+}
@namestack = []
+ @token_queue = []
@indefine = false
@expected = []
@commentstack = [ ['', @line] ]
- @lexing_context = {:after => nil, :start_of_line => true}
+ @lexing_context = {
+ :after => nil,
+ :start_of_line => true,
+ :string_interpolation_depth => 0
+ }
end
# Make any necessary changes to the token and/or value.
@@ -396,28 +419,17 @@ class Puppet::Parser::Lexer
# this is the heart of the lexer
def scan
#Puppet.debug("entering scan")
- raise Puppet::LexError.new("Invalid or empty string") unless @scanner
+ lex_error "Invalid or empty string" unless @scanner
# Skip any initial whitespace.
skip()
- until @scanner.eos? do
+ until token_queue.empty? and @scanner.eos? do
yielded = false
matched_token, value = find_token
# error out if we didn't match anything at all
- if matched_token.nil?
- nword = nil
- # Try to pull a 'word' out of the remaining string.
- if @scanner.rest =~ /^(\S+)/
- nword = $1
- elsif @scanner.rest =~ /^(\s+)/
- nword = $1
- else
- nword = @scanner.rest
- end
- raise "Could not match '%s'" % nword
- end
+ lex_error "Could not match #{@scanner.rest[/^(\S+|\s+|.*)/]}" unless matched_token
newline = matched_token.name == :RETURN
@@ -433,6 +445,8 @@ class Puppet::Parser::Lexer
end
lexing_context[:after] = final_token.name unless newline
+ lexing_context[:string_interpolation_depth] += 1 if final_token.name == :DQPRE
+ lexing_context[:string_interpolation_depth] -= 1 if final_token.name == :DQPOST
value = token_value[:value]
@@ -481,24 +495,40 @@ class Puppet::Parser::Lexer
@scanner.scan_until(regex)
end
- # we've encountered an opening quote...
+ # we've encountered the start of a string...
# slurp in the rest of the string and return it
- def slurpstring(quote)
+ Valid_escapes_in_strings = %w{ \\ $ ' " n t s }+["\n"]
+ def slurpstring(terminators)
# we search for the next quote that isn't preceded by a
# backslash; the caret is there to match empty strings
- str = @scanner.scan_until(/([^\\]|^)#{quote}/)
- if str.nil?
- raise Puppet::LexError.new("Unclosed quote after '%s' in '%s'" %
- [self.last,self.rest])
- else
- str.sub!(/#{quote}\Z/,"")
- str.gsub!(/\\#{quote}/,quote)
- end
-
- # Add to our line count for every carriage return in multi-line strings.
- @line += str.count("\n")
+ str = @scanner.scan_until(/([^\\]|^)[#{terminators}]/) or lex_error "Unclosed quote after '#{last}' in '#{rest}'"
+ @line += str.count("\n") # literal carriage returns add to the line count.
+ str.gsub!(/\\(.)/) {
+ case ch=$1
+ when 'n'; "\n"
+ when 't'; "\t"
+ when 's'; " "
+ else
+ if Valid_escapes_in_strings.include? ch
+ ch
+ else
+ Puppet.warning "Unrecognised escape sequence '\\#{ch}'#{file && " in file #{file}"}#{line && " at line #{line}"}"
+ "\\#{ch}"
+ end
+ end
+ }
+ [ str[0..-2],str[-1,1] ]
+ end
- return str
+ def tokenize_interpolated_string(token_type)
+ value,terminator = slurpstring('"$')
+ token_queue << [TOKENS[token_type[terminator]],value]
+ while terminator == '$' and not @scanner.scan(/\{/)
+ token_queue << [TOKENS[:VARIABLE],@scanner.scan(%r{(\w*::)*\w+|[0-9]})]
+ value,terminator = slurpstring('"$')
+ token_queue << [TOKENS[DQ_continuation_token_types[terminator]],value]
+ end
+ token_queue.shift
end
# just parse a string, not a whole file