summaryrefslogtreecommitdiffstats
path: root/lib/puppet/parser/lexer.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/puppet/parser/lexer.rb')
-rw-r--r--lib/puppet/parser/lexer.rb673
1 files changed, 391 insertions, 282 deletions
diff --git a/lib/puppet/parser/lexer.rb b/lib/puppet/parser/lexer.rb
index 086d82c09..51026ea1b 100644
--- a/lib/puppet/parser/lexer.rb
+++ b/lib/puppet/parser/lexer.rb
@@ -1,4 +1,3 @@
-
# the scanner/lexer
require 'strscan'
@@ -7,326 +6,436 @@ require 'puppet'
module Puppet
class LexError < RuntimeError; end
- module Parser
- #---------------------------------------------------------------
- class Lexer
- attr_reader :line, :last, :file
-
- attr_accessor :indefine
-
- #%r{\w+} => :WORD,
- @@tokens = {
- %r{#.*} => :COMMENT,
- %r{\[} => :LBRACK,
- %r{\]} => :RBRACK,
- %r{\{} => :LBRACE,
- %r{\}} => :RBRACE,
- %r{\(} => :LPAREN,
- %r{\)} => :RPAREN,
- %r{\"} => :DQUOTE,
- %r{\n} => :RETURN,
- %r{\'} => :SQUOTE,
- %r{=} => :EQUALS,
- %r{==} => :ISEQUAL,
- %r{>=} => :GREATEREQUAL,
- %r{>} => :GREATERTHAN,
- %r{<} => :LESSTHAN,
- %r{<=} => :LESSEQUAL,
- %r{!=} => :NOTEQUAL,
- %r{!} => :NOT,
- %r{,} => :COMMA,
- %r{\.} => :DOT,
- %r{:} => :COLON,
- %r{@} => :AT,
- %r{<<\|} => :LLCOLLECT,
- %r{\|>>} => :RRCOLLECT,
- %r{<\|} => :LCOLLECT,
- %r{\|>} => :RCOLLECT,
- %r{;} => :SEMIC,
- %r{\?} => :QMARK,
- %r{\\} => :BACKSLASH,
- %r{=>} => :FARROW,
- %r{\+>} => :PARROW,
- %r{[a-z][-\w]*} => :NAME,
- %r{([a-z][-\w]*::)+[a-z][-\w]*} => :CLASSNAME,
- %r{((::){0,1}[A-Z][-\w]*)+} => :CLASSREF,
- %r{[0-9]+} => :NUMBER,
- %r{\$(\w*::)*\w+} => :VARIABLE
- }
-
- @@pairs = {
- "{" => "}",
- "(" => ")",
- "[" => "]",
- "<|" => "|>",
- "<<|" => "|>>"
- }
-
- @@reverse_pairs = @@pairs.inject({}) { |hash, pair| hash[pair[1]] = pair[0]; hash }
-
- @@keywords = {
- "case" => :CASE,
- "class" => :CLASS,
- "default" => :DEFAULT,
- "define" => :DEFINE,
- "false" => :BOOLEAN,
- "import" => :IMPORT,
- "if" => :IF,
- "elsif" => :ELSIF,
- "else" => :ELSE,
- "inherits" => :INHERITS,
- "node" => :NODE,
- "true" => :BOOLEAN,
- "and" => :AND,
- "or" => :OR,
- "undef" => :UNDEF
- }
-
- def clear
- initvars
- end
+end
- def expected
- if @expected.empty?
- nil
- else
- token = @expected[-1]
- @@tokens.each do |value, name|
- if token == name
- return value
- end
- end
- return token
- end
- end
+module Puppet::Parser; end
- # scan the whole file
- # basically just used for testing
- def fullscan
- array = []
-
- self.scan { |token,str|
- # Ignore any definition nesting problems
- @indefine = false
- #Puppet.debug("got token '%s' => '%s'" % [token,str])
- if token.nil?
- return array
- else
- array.push([token,str])
- end
- }
- return array
+class Puppet::Parser::Lexer
+ attr_reader :last, :file
+
+ attr_accessor :line, :indefine
+
+ # Our base token class.
+ class Token
+ attr_accessor :regex, :name, :string, :skip, :incr_line, :skip_text
+
+ def initialize(regex, name)
+ if regex.is_a?(String)
+ @name, @string = name, regex
+ @regex = Regexp.new(Regexp.escape(@string))
+ else
+ @name, @regex = name, regex
end
+ end
- # this is probably pretty damned inefficient...
- # it'd be nice not to have to load the whole file first...
- def file=(file)
- @file = file
- @line = 1
- File.open(file) { |of|
- str = ""
- of.each { |line| str += line }
- @scanner = StringScanner.new(str)
- }
+ def skip?
+ self.skip
+ end
+
+ def to_s
+ if self.string
+ @string
+ else
+ @name.to_s
end
+ end
+ end
- def indefine?
- if defined? @indefine
- @indefine
- else
- false
- end
+ # Maintain a list of tokens.
+ class TokenList
+ attr_reader :regex_tokens, :string_tokens
+
+ def [](name)
+ @tokens[name]
+ end
+
+ # Create a new token.
+ def add_token(name, regex, options = {}, &block)
+ token = Token.new(regex, name)
+ raise(ArgumentError, "Token %s already exists" % name) if @tokens.include?(name)
+ @tokens[token.name] = token
+ if token.string
+ @string_tokens << token
+ @tokens_by_string[token.string] = token
+ else
+ @regex_tokens << token
end
- def initialize
- initvars()
+ options.each do |name, option|
+ token.send(name.to_s + "=", option)
end
- def initvars
- @line = 1
- @last = ""
- @lasttoken = nil
- @scanner = nil
- @file = nil
- # AAARRGGGG! okay, regexes in ruby are bloody annoying
- # no one else has "\n" =~ /\s/
- @skip = %r{[ \t]+}
+ token.meta_def(:convert, &block) if block_given?
+
+ token
+ end
- @namestack = []
- @indefine = false
+ def initialize
+ @tokens = {}
+ @regex_tokens = []
+ @string_tokens = []
+ @tokens_by_string = {}
+ end
- @expected = []
- end
+ # Look up a token by its value, rather than name.
+ def lookup(string)
+ @tokens_by_string[string]
+ end
- # Go up one in the namespace.
- def namepop
- @namestack.pop
+ # Define more tokens.
+ def add_tokens(hash)
+ hash.each do |regex, name|
+ add_token(name, regex)
end
+ end
- # Collect the current namespace.
- def namespace
- @namestack.join("::")
+ # Sort our tokens by length, so we know once we match, we're done.
+ # This helps us avoid the O(n^2) nature of token matching.
+ def sort_tokens
+ @string_tokens.sort! { |a, b| b.string.length <=> a.string.length }
+ end
+ end
+
+ TOKENS = TokenList.new
+ TOKENS.add_tokens(
+ '[' => :LBRACK,
+ ']' => :RBRACK,
+ '{' => :LBRACE,
+ '}' => :RBRACE,
+ '(' => :LPAREN,
+ ')' => :RPAREN,
+ '=' => :EQUALS,
+ '==' => :ISEQUAL,
+ '>=' => :GREATEREQUAL,
+ '>' => :GREATERTHAN,
+ '<' => :LESSTHAN,
+ '<=' => :LESSEQUAL,
+ '!=' => :NOTEQUAL,
+ '!' => :NOT,
+ ',' => :COMMA,
+ '.' => :DOT,
+ ':' => :COLON,
+ '@' => :AT,
+ '<<|' => :LLCOLLECT,
+ '|>>' => :RRCOLLECT,
+ '<|' => :LCOLLECT,
+ '|>' => :RCOLLECT,
+ ';' => :SEMIC,
+ '?' => :QMARK,
+ '\\' => :BACKSLASH,
+ '=>' => :FARROW,
+ '+>' => :PARROW,
+ %r{([a-z][-\w]*::)+[a-z][-\w]*} => :CLASSNAME,
+ %r{((::){0,1}[A-Z][-\w]*)+} => :CLASSREF
+ )
+
+ TOKENS.add_tokens "Whatever" => :DQTEXT, "Nomatter" => :SQTEXT, "alsonomatter" => :BOOLEAN
+
+ TOKENS.add_token :NAME, %r{[a-z][-\w]*} do |lexer, value|
+ string_token = self
+ # we're looking for keywords here
+ if tmp = KEYWORDS.lookup(value)
+ string_token = tmp
+ if [:TRUE, :FALSE].include?(string_token.name)
+ value = eval(value)
+ string_token = TOKENS[:BOOLEAN]
end
+ end
+ [string_token, value]
+ end
+
+ TOKENS.add_token :NUMBER, %r{[0-9]+} do |lexer, value|
+ [TOKENS[:NAME], value]
+ end
+
+ TOKENS.add_token :COMMENT, %r{#.*}, :skip => true
+
+ TOKENS.add_token :RETURN, "\n", :skip => true, :incr_line => true, :skip_text => true
+
+ TOKENS.add_token :SQUOTE, "'" do |lexer, value|
+ value = lexer.slurpstring(value)
+ [TOKENS[:SQTEXT], value]
+ end
+
+ TOKENS.add_token :DQUOTE, '"' do |lexer, value|
+ value = lexer.slurpstring(value)
+ [TOKENS[:DQTEXT], value]
+ end
+
+ TOKENS.add_token :VARIABLE, %r{\$(\w*::)*\w+} do |lexer, value|
+ value = value.sub(/^\$/, '')
+ [self, value]
+ end
+
+ TOKENS.sort_tokens
+
+ @@pairs = {
+ "{" => "}",
+ "(" => ")",
+ "[" => "]",
+ "<|" => "|>",
+ "<<|" => "|>>"
+ }
+
+ KEYWORDS = TokenList.new
+
+ KEYWORDS.add_tokens(
+ "case" => :CASE,
+ "class" => :CLASS,
+ "default" => :DEFAULT,
+ "define" => :DEFINE,
+ "import" => :IMPORT,
+ "if" => :IF,
+ "elsif" => :ELSIF,
+ "else" => :ELSE,
+ "inherits" => :INHERITS,
+ "node" => :NODE,
+ "and" => :AND,
+ "or" => :OR,
+ "undef" => :UNDEF,
+ "false" => :FALSE,
+ "true" => :TRUE
+ )
+
+ def clear
+ initvars
+ end
+
+ def expected
+ return nil if @expected.empty?
+ name = @expected[-1]
+ raise "Could not find expected token %s" % name unless token = TOKENS.lookup(name)
+
+ return token
+ end
+
+ # scan the whole file
+ # basically just used for testing
+ def fullscan
+ array = []
+
+ self.scan { |token, str|
+ # Ignore any definition nesting problems
+ @indefine = false
+ array.push([token,str])
+ }
+ return array
+ end
+
+ # this is probably pretty damned inefficient...
+ # it'd be nice not to have to load the whole file first...
+ def file=(file)
+ @file = file
+ @line = 1
+ File.open(file) { |of|
+ str = ""
+ of.each { |line| str += line }
+ @scanner = StringScanner.new(str)
+ }
+ end
- # This value might have :: in it, but we don't care -- it'll be
- # handled normally when joining, and when popping we want to pop
- # this full value, however long the namespace is.
- def namestack(value)
- @namestack << value
+ def find_string_token
+ matched_token = value = nil
+
+ # We know our longest string token is three chars, so try each size in turn
+ # until we either match or run out of chars. This way our worst-case is three
+ # tries, where it is otherwise the number of string chars we have. Also,
+ # the lookups are optimized hash lookups, instead of regex scans.
+ [3, 2, 1].each do |i|
+ str = @scanner.peek(i)
+ if matched_token = TOKENS.lookup(str)
+ value = @scanner.scan(matched_token.regex)
+ break
end
+ end
+
+ return matched_token, value
+ end
- def rest
- @scanner.rest
+ # Find the next token that matches a regex. We look for these first.
+ def find_regex_token
+ @regex += 1
+ matched_token = nil
+ value = ""
+ length = 0
+
+ # I tried optimizing based on the first char, but it had
+ # a slightly negative affect and was a good bit more complicated.
+ TOKENS.regex_tokens.each do |token|
+ next unless match_length = @scanner.match?(token.regex)
+
+ # We've found a longer match
+ if match_length > length
+ value = @scanner.scan(token.regex)
+ length = value.length
+ matched_token = token
end
+ end
- # this is the heart of the lexer
- def scan
- #Puppet.debug("entering scan")
- if @scanner.nil?
- raise TypeError.new("Invalid or empty string")
- end
+ return matched_token, value
+ end
- @scanner.skip(@skip)
- until @scanner.eos? do
- yielded = false
- sendbreak = false # gah, this is a nasty hack
- stoken = nil
- sregex = nil
- value = ""
-
- # first find out which type of token we've got
- @@tokens.each { |regex,token|
- # we're just checking, which doesn't advance the scan
- # pointer
- tmp = @scanner.check(regex)
- if tmp.nil?
- #puppet.debug("did not match %s to '%s'" %
- # [regex,@scanner.rest])
- next
- end
-
- # find the longest match
- if tmp.length > value.length
- value = tmp
- stoken = token
- sregex = regex
- else
- # we've already got a longer match
- next
- end
- }
-
- # error out if we didn't match anything at all
- if stoken.nil?
- nword = nil
- if @scanner.rest =~ /^(\S+)/
- nword = $1
- elsif@scanner.rest =~ /^(\s+)/
- nword = $1
- else
- nword = @scanner.rest
- end
- raise "Could not match '%s'" % nword
- end
+ # Find the next token, returning the string and the token.
+ def find_token
+ @find += 1
+ matched_token, value = find_regex_token
- value = @scanner.scan(sregex)
+ unless matched_token
+ matched_token, value = find_string_token
+ end
- if value == ""
- raise "Didn't match regex on token %s" % stoken
- end
+ return matched_token, value
+ end
- # token-specific operations
- # if this gets much more complicated, it should
- # be moved up to where the tokens themselves are defined
- # which will get me about 75% of the way to a lexer generator
- ptoken = stoken
- case stoken
- when :NAME then
- wtoken = stoken
- # we're looking for keywords here
- if @@keywords.include?(value)
- wtoken = @@keywords[value]
- #Puppet.debug("token '%s'" % wtoken)
- if wtoken == :BOOLEAN
- value = eval(value)
- end
- end
- ptoken = wtoken
- when :NUMBER then
- ptoken = :NAME
- when :COMMENT then
- # just throw comments away
- next
- when :RETURN then
- @line += 1
- @scanner.skip(@skip)
- next
- when :SQUOTE then
- #Puppet.debug("searching '%s' after '%s'" % [self.rest,value])
- value = self.slurpstring(value)
- ptoken = :SQTEXT
- #Puppet.debug("got string '%s' => '%s'" % [:DQTEXT,value])
- when :DQUOTE then
- value = self.slurpstring(value)
- ptoken = :DQTEXT
- when :VARIABLE then
- value = value.sub(/^\$/, '')
- end
+ def indefine?
+ if defined? @indefine
+ @indefine
+ else
+ false
+ end
+ end
- if match = @@pairs[value] and ptoken != :DQUOTE and ptoken != :SQUOTE
- @expected << match
- elsif exp = @expected[-1] and exp == value and ptoken != :DQUOTE and ptoken != :SQUOTE
- @expected.pop
- end
+ def initialize
+ @find = 0
+ @regex = 0
+ initvars()
+ end
- yield [ptoken, value]
+ def initvars
+ @line = 1
+ @previous_token = nil
+ @scanner = nil
+ @file = nil
+ # AAARRGGGG! okay, regexes in ruby are bloody annoying
+ # no one else has "\n" =~ /\s/
+ @skip = %r{[ \t]+}
+
+ @namestack = []
+ @indefine = false
+ @expected = []
+ end
- if @lasttoken == :CLASS
- namestack(value)
- end
+ # Make any necessary changes to the token and/or value.
+ def munge_token(token, value)
+ @line += 1 if token.incr_line
- if @lasttoken == :DEFINE
- if indefine?
- msg = "Cannot nest definition %s inside %s" % [value, @indefine]
- self.indefine = false
- raise Puppet::ParseError, msg
- end
+ skip() if token.skip_text
- @indefine = value
- end
+ return if token.skip
- @last = value
- @lasttoken = ptoken
+ token, value = token.convert(self, value) if token.respond_to?(:convert)
- @scanner.skip(@skip)
- end
- @scanner = nil
- yield [false,false]
- end
+ return unless token
+
+ return token, value
+ end
+
+ # Go up one in the namespace.
+ def namepop
+ @namestack.pop
+ end
- # we've encountered an opening quote...
- # slurp in the rest of the string and return it
- def slurpstring(quote)
- # we search for the next quote that isn't preceded by a
- # backslash; the caret is there to match empty strings
- str = @scanner.scan_until(/([^\\]|^)#{quote}/)
- if str.nil?
- raise Puppet::LexError.new("Unclosed quote after '%s' in '%s'" %
- [self.last,self.rest])
+ # Collect the current namespace.
+ def namespace
+ @namestack.join("::")
+ end
+
+ # This value might have :: in it, but we don't care -- it'll be
+ # handled normally when joining, and when popping we want to pop
+ # this full value, however long the namespace is.
+ def namestack(value)
+ @namestack << value
+ end
+
+ def rest
+ @scanner.rest
+ end
+
+ # this is the heart of the lexer
+ def scan
+ #Puppet.debug("entering scan")
+ raise Puppet::LexError.new("Invalid or empty string") unless @scanner
+
+ # Skip any initial whitespace.
+ skip()
+
+ until @scanner.eos? do
+ yielded = false
+ matched_token, value = find_token
+
+ # error out if we didn't match anything at all
+ if matched_token.nil?
+ nword = nil
+ # Try to pull a 'word' out of the remaining string.
+ if @scanner.rest =~ /^(\S+)/
+ nword = $1
+ elsif @scanner.rest =~ /^(\s+)/
+ nword = $1
else
- str.sub!(/#{quote}\Z/,"")
- str.gsub!(/\\#{quote}/,quote)
+ nword = @scanner.rest
end
+ raise "Could not match '%s'" % nword
+ end
- return str
+ final_token, value = munge_token(matched_token, value)
+
+ next unless final_token
+
+ if match = @@pairs[value] and final_token.name != :DQUOTE and final_token.name != :SQUOTE
+ @expected << match
+ elsif exp = @expected[-1] and exp == value and final_token.name != :DQUOTE and final_token.name != :SQUOTE
+ @expected.pop
end
- # just parse a string, not a whole file
- def string=(string)
- @scanner = StringScanner.new(string)
+ yield [final_token.name, value]
+
+ if @previous_token
+ namestack(value) if @previous_token.name == :CLASS
+
+ if @previous_token.name == :DEFINE
+ if indefine?
+ msg = "Cannot nest definition %s inside %s" % [value, @indefine]
+ self.indefine = false
+ raise Puppet::ParseError, msg
+ end
+
+ @indefine = value
+ end
end
+
+ @previous_token = final_token
+ skip()
end
- #---------------------------------------------------------------
+ @scanner = nil
+
+ # This indicates that we're done parsing.
+ yield [false,false]
+ end
+
+ # Skip any skipchars in our remaining string.
+ def skip
+ @scanner.skip(@skip)
end
-end
+ # we've encountered an opening quote...
+ # slurp in the rest of the string and return it
+ def slurpstring(quote)
+ # we search for the next quote that isn't preceded by a
+ # backslash; the caret is there to match empty strings
+ str = @scanner.scan_until(/([^\\]|^)#{quote}/)
+ if str.nil?
+ raise Puppet::LexError.new("Unclosed quote after '%s' in '%s'" %
+ [self.last,self.rest])
+ else
+ str.sub!(/#{quote}\Z/,"")
+ str.gsub!(/\\#{quote}/,quote)
+ end
+
+ return str
+ end
+
+ # just parse a string, not a whole file
+ def string=(string)
+ @scanner = StringScanner.new(string)
+ end
+end