diff options
-rwxr-xr-x | ext/puppet-test | 4 | ||||
-rw-r--r-- | lib/puppet/parser/lexer.rb | 671 | ||||
-rw-r--r-- | lib/puppet/parser/parser_support.rb | 7 | ||||
-rwxr-xr-x | spec/unit/parser/lexer.rb | 465 | ||||
-rwxr-xr-x | test/language/lexer.rb | 276 |
5 files changed, 857 insertions, 566 deletions
diff --git a/ext/puppet-test b/ext/puppet-test index 0f33e0cbb..e18dd1c57 100755 --- a/ext/puppet-test +++ b/ext/puppet-test @@ -211,11 +211,11 @@ end Suite.new :parser, "Manifest parsing" do def prepare - @parser = Puppet::Parser::Parser.new(:environment => Puppet[:environment]) - @parser.file = Puppet[:manifest] end newtest :parse, "Parsed files" do + @parser = Puppet::Parser::Parser.new(:environment => Puppet[:environment]) + @parser.file = Puppet[:manifest] @parser.parse end end diff --git a/lib/puppet/parser/lexer.rb b/lib/puppet/parser/lexer.rb index 086d82c09..6661650ba 100644 --- a/lib/puppet/parser/lexer.rb +++ b/lib/puppet/parser/lexer.rb @@ -1,4 +1,3 @@ - # the scanner/lexer require 'strscan' @@ -7,326 +6,432 @@ require 'puppet' module Puppet class LexError < RuntimeError; end - module Parser - #--------------------------------------------------------------- - class Lexer - attr_reader :line, :last, :file - - attr_accessor :indefine - - #%r{\w+} => :WORD, - @@tokens = { - %r{#.*} => :COMMENT, - %r{\[} => :LBRACK, - %r{\]} => :RBRACK, - %r{\{} => :LBRACE, - %r{\}} => :RBRACE, - %r{\(} => :LPAREN, - %r{\)} => :RPAREN, - %r{\"} => :DQUOTE, - %r{\n} => :RETURN, - %r{\'} => :SQUOTE, - %r{=} => :EQUALS, - %r{==} => :ISEQUAL, - %r{>=} => :GREATEREQUAL, - %r{>} => :GREATERTHAN, - %r{<} => :LESSTHAN, - %r{<=} => :LESSEQUAL, - %r{!=} => :NOTEQUAL, - %r{!} => :NOT, - %r{,} => :COMMA, - %r{\.} => :DOT, - %r{:} => :COLON, - %r{@} => :AT, - %r{<<\|} => :LLCOLLECT, - %r{\|>>} => :RRCOLLECT, - %r{<\|} => :LCOLLECT, - %r{\|>} => :RCOLLECT, - %r{;} => :SEMIC, - %r{\?} => :QMARK, - %r{\\} => :BACKSLASH, - %r{=>} => :FARROW, - %r{\+>} => :PARROW, - %r{[a-z][-\w]*} => :NAME, - %r{([a-z][-\w]*::)+[a-z][-\w]*} => :CLASSNAME, - %r{((::){0,1}[A-Z][-\w]*)+} => :CLASSREF, - %r{[0-9]+} => :NUMBER, - %r{\$(\w*::)*\w+} => :VARIABLE - } - - @@pairs = { - "{" => "}", - "(" => ")", - "[" => "]", - "<|" => "|>", - "<<|" => "|>>" - } - - @@reverse_pairs = @@pairs.inject({}) { |hash, pair| hash[pair[1]] = pair[0]; hash } - - @@keywords = { - "case" => :CASE, - "class" => :CLASS, - "default" => :DEFAULT, - "define" => :DEFINE, - "false" => :BOOLEAN, - "import" => :IMPORT, - "if" => :IF, - "elsif" => :ELSIF, - "else" => :ELSE, - "inherits" => :INHERITS, - "node" => :NODE, - "true" => :BOOLEAN, - "and" => :AND, - "or" => :OR, - "undef" => :UNDEF - } - - def clear - initvars - end +end - def expected - if @expected.empty? - nil - else - token = @expected[-1] - @@tokens.each do |value, name| - if token == name - return value - end - end - return token - end - end +module Puppet::Parser; end - # scan the whole file - # basically just used for testing - def fullscan - array = [] - - self.scan { |token,str| - # Ignore any definition nesting problems - @indefine = false - #Puppet.debug("got token '%s' => '%s'" % [token,str]) - if token.nil? - return array - else - array.push([token,str]) - end - } - return array - end +class Puppet::Parser::Lexer + attr_reader :last, :file + + attr_accessor :line, :indefine - # this is probably pretty damned inefficient... - # it'd be nice not to have to load the whole file first... - def file=(file) - @file = file - @line = 1 - File.open(file) { |of| - str = "" - of.each { |line| str += line } - @scanner = StringScanner.new(str) - } + # Our base token class. + class Token + attr_accessor :regex, :name, :string, :skip, :incr_line, :skip_text + + def initialize(regex, name) + if regex.is_a?(String) + @name, @string = name, regex + @regex = Regexp.new(Regexp.escape(@string)) + else + @name, @regex = name, regex end + end - def indefine? - if defined? @indefine - @indefine - else - false - end + def skip? + self.skip + end + + def to_s + "Lexer token %s" % @name.to_s + end + end + + # Maintain a list of tokens. + class TokenList + attr_reader :regex_tokens, :string_tokens + + def [](name) + @tokens[name] + end + + # Create a new token. + def add_token(name, regex, options = {}, &block) + token = Token.new(regex, name) + raise(ArgumentError, "Token %s already exists" % name) if @tokens.include?(name) + @tokens[token.name] = token + if token.string + @string_tokens << token + @tokens_by_string[token.string] = token + else + @regex_tokens << token end - def initialize - initvars() + options.each do |name, option| + token.send(name.to_s + "=", option) end - def initvars - @line = 1 - @last = "" - @lasttoken = nil - @scanner = nil - @file = nil - # AAARRGGGG! okay, regexes in ruby are bloody annoying - # no one else has "\n" =~ /\s/ - @skip = %r{[ \t]+} + token.meta_def(:convert, &block) if block_given? - @namestack = [] - @indefine = false + token + end - @expected = [] - end + def initialize + @tokens = {} + @regex_tokens = [] + @string_tokens = [] + @tokens_by_string = {} + end - # Go up one in the namespace. - def namepop - @namestack.pop + # Look up a token by its value, rather than name. + def lookup(string) + @tokens_by_string[string] + end + + # Define more tokens. + def add_tokens(hash) + hash.each do |regex, name| + add_token(name, regex) end + end - # Collect the current namespace. - def namespace - @namestack.join("::") + # Sort our tokens by length, so we know once we match, we're done. + # This helps us avoid the O(n^2) nature of token matching. + def sort_tokens + @string_tokens.sort! { |a, b| b.string.length <=> a.string.length } + end + end + + TOKENS = TokenList.new + TOKENS.add_tokens( + '[' => :LBRACK, + ']' => :RBRACK, + '{' => :LBRACE, + '}' => :RBRACE, + '(' => :LPAREN, + ')' => :RPAREN, + '=' => :EQUALS, + '==' => :ISEQUAL, + '>=' => :GREATEREQUAL, + '>' => :GREATERTHAN, + '<' => :LESSTHAN, + '<=' => :LESSEQUAL, + '!=' => :NOTEQUAL, + '!' => :NOT, + ',' => :COMMA, + '.' => :DOT, + ':' => :COLON, + '@' => :AT, + '<<|' => :LLCOLLECT, + '|>>' => :RRCOLLECT, + '<|' => :LCOLLECT, + '|>' => :RCOLLECT, + ';' => :SEMIC, + '?' => :QMARK, + '\\' => :BACKSLASH, + '=>' => :FARROW, + '+>' => :PARROW, + %r{([a-z][-\w]*::)+[a-z][-\w]*} => :CLASSNAME, + %r{((::){0,1}[A-Z][-\w]*)+} => :CLASSREF + ) + + TOKENS.add_tokens "Whatever" => :DQTEXT, "Nomatter" => :SQTEXT, "alsonomatter" => :BOOLEAN + + TOKENS.add_token :NAME, %r{[a-z][-\w]*} do |lexer, value| + string_token = self + # we're looking for keywords here + if tmp = KEYWORDS.lookup(value) + string_token = tmp + if [:TRUE, :FALSE].include?(string_token.name) + value = eval(value) + string_token = TOKENS[:BOOLEAN] end + end + [string_token, value] + end + + TOKENS.add_token :NUMBER, %r{[0-9]+} do |lexer, value| + [TOKENS[:NAME], value] + end + + TOKENS.add_token :COMMENT, %r{#.*}, :skip => true + + TOKENS.add_token :RETURN, "\n", :skip => true, :incr_line => true, :skip_text => true + + TOKENS.add_token :SQUOTE, "'" do |lexer, value| + value = lexer.slurpstring(value) + [TOKENS[:SQTEXT], value] + end - # This value might have :: in it, but we don't care -- it'll be - # handled normally when joining, and when popping we want to pop - # this full value, however long the namespace is. - def namestack(value) - @namestack << value + TOKENS.add_token :DQUOTE, '"' do |lexer, value| + value = lexer.slurpstring(value) + [TOKENS[:DQTEXT], value] + end + + TOKENS.add_token :VARIABLE, %r{\$(\w*::)*\w+} do |lexer, value| + value = value.sub(/^\$/, '') + [self, value] + end + + TOKENS.sort_tokens + + @@pairs = { + "{" => "}", + "(" => ")", + "[" => "]", + "<|" => "|>", + "<<|" => "|>>" + } + + KEYWORDS = TokenList.new + + KEYWORDS.add_tokens( + "case" => :CASE, + "class" => :CLASS, + "default" => :DEFAULT, + "define" => :DEFINE, + "import" => :IMPORT, + "if" => :IF, + "elsif" => :ELSIF, + "else" => :ELSE, + "inherits" => :INHERITS, + "node" => :NODE, + "and" => :AND, + "or" => :OR, + "undef" => :UNDEF, + "false" => :FALSE, + "true" => :TRUE + ) + + def clear + initvars + end + + def expected + return nil if @expected.empty? + name = @expected[-1] + raise "Could not find expected token %s" % name unless token = TOKENS.lookup(name) + + return token + end + + # scan the whole file + # basically just used for testing + def fullscan + array = [] + + self.scan { |token, str| + # Ignore any definition nesting problems + @indefine = false + array.push([token,str]) + } + return array + end + + # this is probably pretty damned inefficient... + # it'd be nice not to have to load the whole file first... + def file=(file) + @file = file + @line = 1 + File.open(file) { |of| + str = "" + of.each { |line| str += line } + @scanner = StringScanner.new(str) + } + end + + def find_string_token + matched_token = value = nil + + # We know our longest string token is three chars, so try each size in turn + # until we either match or run out of chars. This way our worst-case is three + # tries, where it is otherwise the number of string chars we have. Also, + # the lookups are optimized hash lookups, instead of regex scans. + [3, 2, 1].each do |i| + str = @scanner.peek(i) + if matched_token = TOKENS.lookup(str) + value = @scanner.scan(matched_token.regex) + break end + end + + return matched_token, value + end - def rest - @scanner.rest + # Find the next token that matches a regex. We look for these first. + def find_regex_token + @regex += 1 + matched_token = nil + value = "" + length = 0 + + # I tried optimizing based on the first char, but it had + # a slightly negative affect and was a good bit more complicated. + TOKENS.regex_tokens.each do |token| + next unless match_length = @scanner.match?(token.regex) + + # We've found a longer match + if match_length > length + value = @scanner.scan(token.regex) + length = value.length + matched_token = token end + end - # this is the heart of the lexer - def scan - #Puppet.debug("entering scan") - if @scanner.nil? - raise TypeError.new("Invalid or empty string") - end + return matched_token, value + end - @scanner.skip(@skip) - until @scanner.eos? do - yielded = false - sendbreak = false # gah, this is a nasty hack - stoken = nil - sregex = nil - value = "" - - # first find out which type of token we've got - @@tokens.each { |regex,token| - # we're just checking, which doesn't advance the scan - # pointer - tmp = @scanner.check(regex) - if tmp.nil? - #puppet.debug("did not match %s to '%s'" % - # [regex,@scanner.rest]) - next - end - - # find the longest match - if tmp.length > value.length - value = tmp - stoken = token - sregex = regex - else - # we've already got a longer match - next - end - } - - # error out if we didn't match anything at all - if stoken.nil? - nword = nil - if @scanner.rest =~ /^(\S+)/ - nword = $1 - elsif@scanner.rest =~ /^(\s+)/ - nword = $1 - else - nword = @scanner.rest - end - raise "Could not match '%s'" % nword - end + # Find the next token, returning the string and the token. + def find_token + @find += 1 + matched_token, value = find_regex_token - value = @scanner.scan(sregex) + unless matched_token + matched_token, value = find_string_token + end - if value == "" - raise "Didn't match regex on token %s" % stoken - end + return matched_token, value + end - # token-specific operations - # if this gets much more complicated, it should - # be moved up to where the tokens themselves are defined - # which will get me about 75% of the way to a lexer generator - ptoken = stoken - case stoken - when :NAME then - wtoken = stoken - # we're looking for keywords here - if @@keywords.include?(value) - wtoken = @@keywords[value] - #Puppet.debug("token '%s'" % wtoken) - if wtoken == :BOOLEAN - value = eval(value) - end - end - ptoken = wtoken - when :NUMBER then - ptoken = :NAME - when :COMMENT then - # just throw comments away - next - when :RETURN then - @line += 1 - @scanner.skip(@skip) - next - when :SQUOTE then - #Puppet.debug("searching '%s' after '%s'" % [self.rest,value]) - value = self.slurpstring(value) - ptoken = :SQTEXT - #Puppet.debug("got string '%s' => '%s'" % [:DQTEXT,value]) - when :DQUOTE then - value = self.slurpstring(value) - ptoken = :DQTEXT - when :VARIABLE then - value = value.sub(/^\$/, '') - end + def indefine? + if defined? @indefine + @indefine + else + false + end + end - if match = @@pairs[value] and ptoken != :DQUOTE and ptoken != :SQUOTE - @expected << match - elsif exp = @expected[-1] and exp == value and ptoken != :DQUOTE and ptoken != :SQUOTE - @expected.pop - end + def initialize + @find = 0 + @regex = 0 + initvars() + end - yield [ptoken, value] + def initvars + @line = 1 + @previous_token = nil + @scanner = nil + @file = nil + # AAARRGGGG! okay, regexes in ruby are bloody annoying + # no one else has "\n" =~ /\s/ + @skip = %r{[ \t]+} + + @namestack = [] + @indefine = false + @expected = [] + end - if @lasttoken == :CLASS - namestack(value) - end + # Make any necessary changes to the token and/or value. + def munge_token(token, value) + @line += 1 if token.incr_line - if @lasttoken == :DEFINE - if indefine? - msg = "Cannot nest definition %s inside %s" % [value, @indefine] - self.indefine = false - raise Puppet::ParseError, msg - end + skip() if token.skip_text - @indefine = value - end + return if token.skip - @last = value - @lasttoken = ptoken + token, value = token.convert(self, value) if token.respond_to?(:convert) - @scanner.skip(@skip) - end - @scanner = nil - yield [false,false] - end + return unless token + + return token, value + end + + # Go up one in the namespace. + def namepop + @namestack.pop + end - # we've encountered an opening quote... - # slurp in the rest of the string and return it - def slurpstring(quote) - # we search for the next quote that isn't preceded by a - # backslash; the caret is there to match empty strings - str = @scanner.scan_until(/([^\\]|^)#{quote}/) - if str.nil? - raise Puppet::LexError.new("Unclosed quote after '%s' in '%s'" % - [self.last,self.rest]) + # Collect the current namespace. + def namespace + @namestack.join("::") + end + + # This value might have :: in it, but we don't care -- it'll be + # handled normally when joining, and when popping we want to pop + # this full value, however long the namespace is. + def namestack(value) + @namestack << value + end + + def rest + @scanner.rest + end + + # this is the heart of the lexer + def scan + #Puppet.debug("entering scan") + raise Puppet::LexError.new("Invalid or empty string") unless @scanner + + # Skip any initial whitespace. + skip() + + until @scanner.eos? do + yielded = false + matched_token, value = find_token + + # error out if we didn't match anything at all + if matched_token.nil? + nword = nil + # Try to pull a 'word' out of the remaining string. + if @scanner.rest =~ /^(\S+)/ + nword = $1 + elsif @scanner.rest =~ /^(\s+)/ + nword = $1 else - str.sub!(/#{quote}\Z/,"") - str.gsub!(/\\#{quote}/,quote) + nword = @scanner.rest end + raise "Could not match '%s'" % nword + end - return str + final_token, value = munge_token(matched_token, value) + + next unless final_token + + if match = @@pairs[value] and final_token.name != :DQUOTE and final_token.name != :SQUOTE + @expected << match + elsif exp = @expected[-1] and exp == value and final_token.name != :DQUOTE and final_token.name != :SQUOTE + @expected.pop end - # just parse a string, not a whole file - def string=(string) - @scanner = StringScanner.new(string) + yield [final_token.name, value] + + if @previous_token + namestack(value) if @previous_token.name == :CLASS + + if @previous_token.name == :DEFINE + if indefine? + msg = "Cannot nest definition %s inside %s" % [value, @indefine] + self.indefine = false + raise Puppet::ParseError, msg + end + + @indefine = value + end end + + @previous_token = final_token + skip() end - #--------------------------------------------------------------- + @scanner = nil + + # This indicates that we're done parsing. + yield [false,false] + end + + # Skip any skipchars in our remaining string. + def skip + @scanner.skip(@skip) end -end + # we've encountered an opening quote... + # slurp in the rest of the string and return it + def slurpstring(quote) + # we search for the next quote that isn't preceded by a + # backslash; the caret is there to match empty strings + str = @scanner.scan_until(/([^\\]|^)#{quote}/) + if str.nil? + raise Puppet::LexError.new("Unclosed quote after '%s' in '%s'" % + [self.last,self.rest]) + else + str.sub!(/#{quote}\Z/,"") + str.gsub!(/\\#{quote}/,quote) + end + + return str + end + + # just parse a string, not a whole file + def string=(string) + @scanner = StringScanner.new(string) + end +end diff --git a/lib/puppet/parser/parser_support.rb b/lib/puppet/parser/parser_support.rb index acf3c9f7c..b543cd3ec 100644 --- a/lib/puppet/parser/parser_support.rb +++ b/lib/puppet/parser/parser_support.rb @@ -47,11 +47,8 @@ class Puppet::Parser::Parser # Create an AST object, and automatically add the file and line information if # available. - def ast(klass, hash = nil) - hash ||= {} - unless hash.include?(:line) - hash[:line] = @lexer.line - end + def ast(klass, hash = {}) + hash[:line] = @lexer.line unless hash.include?(:line) unless hash.include?(:file) if file = @lexer.file diff --git a/spec/unit/parser/lexer.rb b/spec/unit/parser/lexer.rb new file mode 100755 index 000000000..cddbef1ed --- /dev/null +++ b/spec/unit/parser/lexer.rb @@ -0,0 +1,465 @@ +#!/usr/bin/env ruby + +require File.dirname(__FILE__) + '/../../spec_helper' + +require 'puppet/parser/lexer' + +describe Puppet::Parser::Lexer::Token do + before do + @token = Puppet::Parser::Lexer::Token.new(%r{something}, :NAME) + end + + [:regex, :name, :string, :skip, :incr_line, :skip_text].each do |param| + it "should have a #{param.to_s} reader" do + @token.should respond_to?(param) + end + + it "should have a #{param.to_s} writer" do + @token.should respond_to?(param.to_s + "=") + end + end +end + +describe Puppet::Parser::Lexer::Token, "when initializing" do + it "should create a regex if the first argument is a string" do + Puppet::Parser::Lexer::Token.new("something", :NAME).regex.should == %r{something} + end + + it "should set the string if the first argument is one" do + Puppet::Parser::Lexer::Token.new("something", :NAME).string.should == "something" + end + + it "should set the regex if the first argument is one" do + Puppet::Parser::Lexer::Token.new(%r{something}, :NAME).regex.should == %r{something} + end +end + +describe Puppet::Parser::Lexer::TokenList do + before do + @list = Puppet::Parser::Lexer::TokenList.new + end + + it "should have a method for retrieving tokens by the name" do + token = @list.add_token :name, "whatever" + @list[:name].should equal(token) + end + + it "should have a method for retrieving string tokens by the string" do + token = @list.add_token :name, "whatever" + @list.lookup("whatever").should equal(token) + end + + it "should add tokens to the list when directed" do + token = @list.add_token :name, "whatever" + @list[:name].should equal(token) + end + + it "should have a method for adding multiple tokens at once" do + @list.add_tokens "whatever" => :name, "foo" => :bar + @list[:name].should_not be_nil + @list[:bar].should_not be_nil + end + + it "should fail to add tokens sharing a name with an existing token" do + @list.add_token :name, "whatever" + lambda { @list.add_token :name, "whatever" }.should raise_error(ArgumentError) + end + + it "should set provided options on tokens being added" do + token = @list.add_token :name, "whatever", :skip_text => true + token.skip_text.should == true + end + + it "should define any provided blocks as a :convert method" do + token = @list.add_token(:name, "whatever") do "foo" end + token.convert.should == "foo" + end + + it "should store all string tokens in the :string_tokens list" do + one = @list.add_token(:name, "1") + @list.string_tokens.should be_include(one) + end + + it "should store all regex tokens in the :regex_tokens list" do + one = @list.add_token(:name, %r{one}) + @list.regex_tokens.should be_include(one) + end + + it "should not store string tokens in the :regex_tokens list" do + one = @list.add_token(:name, "1") + @list.regex_tokens.should_not be_include(one) + end + + it "should not store regex tokens in the :string_tokens list" do + one = @list.add_token(:name, %r{one}) + @list.string_tokens.should_not be_include(one) + end + + it "should sort the string tokens inversely by length when asked" do + one = @list.add_token(:name, "1") + two = @list.add_token(:other, "12") + @list.sort_tokens + @list.string_tokens.should == [two, one] + end +end + +describe Puppet::Parser::Lexer::TOKENS do + before do + @lexer = Puppet::Parser::Lexer.new() + end + + { + :LBRACK => '[', + :RBRACK => ']', + :LBRACE => '{', + :RBRACE => '}', + :LPAREN => '(', + :RPAREN => ')', + :EQUALS => '=', + :ISEQUAL => '==', + :GREATEREQUAL => '>=', + :GREATERTHAN => '>', + :LESSTHAN => '<', + :LESSEQUAL => '<=', + :NOTEQUAL => '!=', + :NOT => '!', + :COMMA => ',', + :DOT => '.', + :COLON => ':', + :AT => '@', + :LLCOLLECT => '<<|', + :RRCOLLECT => '|>>', + :LCOLLECT => '<|', + :RCOLLECT => '|>', + :SEMIC => ';', + :QMARK => '?', + :BACKSLASH => '\\', + :FARROW => '=>', + :PARROW => '+>' + }.each do |name, string| + it "should have a token named #{name.to_s}" do + Puppet::Parser::Lexer::TOKENS[name].should_not be_nil + end + + it "should match '#{string}' for the token #{name.to_s}" do + Puppet::Parser::Lexer::TOKENS[name].string.should == string + end + end + + { + "case" => :CASE, + "class" => :CLASS, + "default" => :DEFAULT, + "define" => :DEFINE, + "import" => :IMPORT, + "if" => :IF, + "elsif" => :ELSIF, + "else" => :ELSE, + "inherits" => :INHERITS, + "node" => :NODE, + "and" => :AND, + "or" => :OR, + "undef" => :UNDEF, + "false" => :FALSE, + "true" => :TRUE + }.each do |string, name| + it "should have a keyword named #{name.to_s}" do + Puppet::Parser::Lexer::KEYWORDS[name].should_not be_nil + end + + it "should have the keyword for #{name.to_s} set to #{string}" do + Puppet::Parser::Lexer::KEYWORDS[name].string.should == string + end + end + + # These tokens' strings don't matter, just that the tokens exist. + [:DQTEXT, :SQTEXT, :BOOLEAN, :NAME, :NUMBER, :COMMENT, :RETURN, :SQUOTE, :DQUOTE, :VARIABLE].each do |name| + it "should have a token named #{name.to_s}" do + Puppet::Parser::Lexer::TOKENS[name].should_not be_nil + end + end +end + +describe Puppet::Parser::Lexer::TOKENS[:NAME] do + before { @token = Puppet::Parser::Lexer::TOKENS[:NAME] } + + it "should match against lower-case alpha-numeric terms" do + @token.regex.should =~ "one-two" + end + + it "should return itself and the value if the matched term is not a keyword" do + Puppet::Parser::Lexer::KEYWORDS.expects(:lookup).returns(nil) + @token.convert(stub("lexer"), "myval").should == [Puppet::Parser::Lexer::TOKENS[:NAME], "myval"] + end + + it "should return the keyword token and the value if the matched term is a keyword" do + keyword = stub 'keyword', :name => :testing + Puppet::Parser::Lexer::KEYWORDS.expects(:lookup).returns(keyword) + @token.convert(stub("lexer"), "myval").should == [keyword, "myval"] + end + + it "should return the BOOLEAN token and 'true' if the matched term is the string 'true'" do + keyword = stub 'keyword', :name => :TRUE + Puppet::Parser::Lexer::KEYWORDS.expects(:lookup).returns(keyword) + @token.convert(stub('lexer'), "true").should == [Puppet::Parser::Lexer::TOKENS[:BOOLEAN], true] + end + + it "should return the BOOLEAN token and 'false' if the matched term is the string 'false'" do + keyword = stub 'keyword', :name => :FALSE + Puppet::Parser::Lexer::KEYWORDS.expects(:lookup).returns(keyword) + @token.convert(stub('lexer'), "false").should == [Puppet::Parser::Lexer::TOKENS[:BOOLEAN], false] + end +end + +describe Puppet::Parser::Lexer::TOKENS[:NUMBER] do + before { @token = Puppet::Parser::Lexer::TOKENS[:NUMBER] } + + it "should match against numeric terms" do + @token.regex.should =~ "2982383139" + end + + it "should return the NAME token and the value" do + @token.convert(stub("lexer"), "myval").should == [Puppet::Parser::Lexer::TOKENS[:NAME], "myval"] + end +end + +describe Puppet::Parser::Lexer::TOKENS[:COMMENT] do + before { @token = Puppet::Parser::Lexer::TOKENS[:COMMENT] } + + it "should match against lines starting with '#'" do + @token.regex.should =~ "# this is a comment" + end + + it "should be marked to get skipped" do + @token.skip?.should be_true + end +end + +describe Puppet::Parser::Lexer::TOKENS[:RETURN] do + before { @token = Puppet::Parser::Lexer::TOKENS[:RETURN] } + + it "should match against carriage returns" do + @token.regex.should =~ "\n" + end + + it "should be marked to initiate text skipping" do + @token.skip_text.should be_true + end + + it "should be marked to increment the line" do + @token.incr_line.should be_true + end +end + +describe Puppet::Parser::Lexer::TOKENS[:SQUOTE] do + before { @token = Puppet::Parser::Lexer::TOKENS[:SQUOTE] } + + it "should match against single quotes" do + @token.regex.should =~ "'" + end + + it "should slurp the rest of the quoted string" do + lexer = stub("lexer") + lexer.expects(:slurpstring).with("myval").returns("otherval") + @token.convert(lexer, "myval") + end + + it "should return the SQTEXT token with the slurped string" do + lexer = stub("lexer") + lexer.stubs(:slurpstring).with("myval").returns("otherval") + @token.convert(lexer, "myval").should == [Puppet::Parser::Lexer::TOKENS[:SQTEXT], "otherval"] + end +end + +describe Puppet::Parser::Lexer::TOKENS[:DQUOTE] do + before { @token = Puppet::Parser::Lexer::TOKENS[:DQUOTE] } + + it "should match against single quotes" do + @token.regex.should =~ '"' + end + + it "should slurp the rest of the quoted string" do + lexer = stub("lexer") + lexer.expects(:slurpstring).with("myval").returns("otherval") + @token.convert(lexer, "myval") + end + + it "should return the DQTEXT token with the slurped string" do + lexer = stub("lexer") + lexer.stubs(:slurpstring).with("myval").returns("otherval") + @token.convert(lexer, "myval").should == [Puppet::Parser::Lexer::TOKENS[:DQTEXT], "otherval"] + end +end + +describe Puppet::Parser::Lexer::TOKENS[:VARIABLE] do + before { @token = Puppet::Parser::Lexer::TOKENS[:VARIABLE] } + + it "should match against alpha words prefixed with '$'" do + @token.regex.should =~ '$this_var' + end + + it "should return the VARIABLE token and the variable name stripped of the '$'" do + @token.convert(stub("lexer"), "$myval").should == [Puppet::Parser::Lexer::TOKENS[:VARIABLE], "myval"] + end +end + +# FIXME: We need to rewrite all of these tests, but I just don't want to take the time right now. +describe "Puppet::Parser::Lexer in the old tests" do + before { @lexer = Puppet::Parser::Lexer.new } + + it "should do simple lexing" do + strings = { +%q{\\} => [[:BACKSLASH,"\\"],[false,false]], +%q{simplest scanner test} => [[:NAME,"simplest"],[:NAME,"scanner"],[:NAME,"test"],[false,false]], +%q{returned scanner test +} => [[:NAME,"returned"],[:NAME,"scanner"],[:NAME,"test"],[false,false]] + } + strings.each { |str,ary| + @lexer.string = str + @lexer.fullscan().should == ary + } + end + + it "should correctly lex quoted strings" do + strings = { +%q{a simple "scanner" test +} => [[:NAME,"a"],[:NAME,"simple"],[:DQTEXT,"scanner"],[:NAME,"test"],[false,false]], +%q{a simple 'single quote scanner' test +} => [[:NAME,"a"],[:NAME,"simple"],[:SQTEXT,"single quote scanner"],[:NAME,"test"],[false,false]], +%q{a harder 'a $b \c"' +} => [[:NAME,"a"],[:NAME,"harder"],[:SQTEXT,'a $b \c"'],[false,false]], +%q{a harder "scanner test" +} => [[:NAME,"a"],[:NAME,"harder"],[:DQTEXT,"scanner test"],[false,false]], +%q{a hardest "scanner \"test\"" +} => [[:NAME,"a"],[:NAME,"hardest"],[:DQTEXT,'scanner "test"'],[false,false]], +%q{a hardestest "scanner \"test\" +" +} => [[:NAME,"a"],[:NAME,"hardestest"],[:DQTEXT,'scanner "test" +'],[false,false]], +%q{function("call")} => [[:NAME,"function"],[:LPAREN,"("],[:DQTEXT,'call'],[:RPAREN,")"],[false,false]] +} + strings.each { |str,array| + @lexer.string = str + @lexer.fullscan().should == array + } + end + + it "should fail usefully" do + strings = %w{ + ^ + } + strings.each { |str| + @lexer.string = str + lambda { @lexer.fullscan() }.should raise_error(RuntimeError) + } + end + + it "should fail if the string is not set" do + lambda { @lexer.fullscan() }.should raise_error(Puppet::LexError) + end + + it "should correctly identify keywords" do + @lexer.string = "case" + @lexer.fullscan.should == [[:CASE, "case"], [false, false]] + end + + it "should correctly match strings" do + names = %w{this is a bunch of names} + types = %w{Many Different Words A Word} + words = %w{differently Cased words A a} + + names.each { |t| + @lexer.string = t + @lexer.fullscan.should == [[:NAME,t],[false,false]] + } + types.each { |t| + @lexer.string = t + @lexer.fullscan.should == [[:CLASSREF,t],[false,false]] + } + end + + it "should correctly parse empty strings" do + bit = '$var = ""' + + @lexer.string = bit + + lambda { @lexer.fullscan }.should_not raise_error + end + + it "should correctly parse virtual resources" do + string = "@type {" + + @lexer.string = string + + @lexer.fullscan.should == [[:AT, "@"], [:NAME, "type"], [:LBRACE, "{"], [false,false]] + end + + it "should correctly deal with namespaces" do + @lexer.string = %{class myclass} + + @lexer.fullscan + + @lexer.namespace.should == "myclass" + + @lexer.namepop + + @lexer.namespace.should == "" + + @lexer.string = "class base { class sub { class more" + + @lexer.fullscan + + @lexer.namespace.should == "base::sub::more" + + @lexer.namepop + + @lexer.namespace.should == "base::sub" + end + + it "should correctly handle fully qualified names" do + @lexer.string = "class base { class sub::more {" + + @lexer.fullscan + + @lexer.namespace.should == "base::sub::more" + + @lexer.namepop + + @lexer.namespace.should == "base" + end + + it "should correctly lex variables" do + ["$variable", "$::variable", "$qualified::variable", "$further::qualified::variable"].each do |string| + @lexer.string = string + + @lexer.scan do |t, s| + t.should == :VARIABLE + string.sub(/^\$/, '').should == s + break + end + end + end + + # #774 + it "should correctly parse the CLASSREF token" do + string = ["Foo", "::Foo","Foo::Bar","::Foo::Bar"] + + string.each do |foo| + @lexer.string = foo + @lexer.fullscan[0].should == [:CLASSREF, foo] + end + end +end + +require 'puppettest/support/utils' +describe "Puppet::Parser::Lexer in the old tests when lexing example files" do + extend PuppetTest + extend PuppetTest::Support::Utils + textfiles() do |file| + it "should correctly lex #{file}" do + lexer = Puppet::Parser::Lexer.new() + lexer.file = file + lambda { lexer.fullscan() }.should_not raise_error + end + end +end diff --git a/test/language/lexer.rb b/test/language/lexer.rb deleted file mode 100755 index e09828d51..000000000 --- a/test/language/lexer.rb +++ /dev/null @@ -1,276 +0,0 @@ -#!/usr/bin/env ruby - -require File.dirname(__FILE__) + '/../lib/puppettest' - -require 'puppet' -require 'puppet/parser/lexer' -require 'puppettest' - -#%q{service("telnet") = \{ -# port => "23", -# protocol => "tcp", -# name => "telnet", -#\} -#} => [[:NAME, "service"], [:LPAREN, "("], [:DQUOTE, "\""], [:NAME, "telnet"], [:DQUOTE, "\""], [:RPAREN, ")"], [:EQUALS, "="], [:lbrace, "{"], [:NAME, "port"], [:FARROW, "=>"], [:DQUOTE, "\""], [:NAME, "23"], [:DQUOTE, "\""], [:COMMA, ","], [:NAME, "protocol"], [:FARROW, "=>"], [:DQUOTE, "\""], [:NAME, "tcp"], [:DQUOTE, "\""], [:COMMA, ","], [:NAME, "name"], [:FARROW, "=>"], [:DQUOTE, "\""], [:NAME, "telnet"], [:DQUOTE, "\""], [:COMMA, ","], [:RBRACE, "}"]] - -class TestLexer < Test::Unit::TestCase - include PuppetTest - def setup - super - mklexer - end - - def mklexer - @lexer = Puppet::Parser::Lexer.new() - end - - def test_simple_lex - strings = { -%q{\\} => [[:BACKSLASH,"\\"],[false,false]], -%q{simplest scanner test} => [[:NAME,"simplest"],[:NAME,"scanner"],[:NAME,"test"],[false,false]], -%q{returned scanner test -} => [[:NAME,"returned"],[:NAME,"scanner"],[:NAME,"test"],[false,false]] - } - strings.each { |str,ary| - @lexer.string = str - assert_equal( - ary, - @lexer.fullscan() - ) - } - end - - def test_quoted_strings - strings = { -%q{a simple "scanner" test -} => [[:NAME,"a"],[:NAME,"simple"],[:DQTEXT,"scanner"],[:NAME,"test"],[false,false]], -%q{a simple 'single quote scanner' test -} => [[:NAME,"a"],[:NAME,"simple"],[:SQTEXT,"single quote scanner"],[:NAME,"test"],[false,false]], -%q{a harder 'a $b \c"' -} => [[:NAME,"a"],[:NAME,"harder"],[:SQTEXT,'a $b \c"'],[false,false]], -%q{a harder "scanner test" -} => [[:NAME,"a"],[:NAME,"harder"],[:DQTEXT,"scanner test"],[false,false]], -%q{a hardest "scanner \"test\"" -} => [[:NAME,"a"],[:NAME,"hardest"],[:DQTEXT,'scanner "test"'],[false,false]], -%q{a hardestest "scanner \"test\" -" -} => [[:NAME,"a"],[:NAME,"hardestest"],[:DQTEXT,'scanner "test" -'],[false,false]], -%q{function("call")} => [[:NAME,"function"],[:LPAREN,"("],[:DQTEXT,'call'],[:RPAREN,")"],[false,false]] -} - strings.each { |str,array| - @lexer.string = str - assert_equal( - array, - @lexer.fullscan() - ) - } - end - - def test_errors - strings = %w{ - ^ - } - strings.each { |str| - @lexer.string = str - assert_raise(RuntimeError) { - @lexer.fullscan() - } - } - end - - def test_more_error - assert_raise(TypeError) { - @lexer.fullscan() - } - end - - def test_files - textfiles() { |file| - lexer = Puppet::Parser::Lexer.new() - lexer.file = file - assert_nothing_raised("Failed to lex %s" % file) { - lexer.fullscan() - } - Puppet::Type.allclear - } - end - - def test_strings - names = %w{this is a bunch of names} - types = %w{Many Different Words A Word} - words = %w{differently Cased words A a} - - names.each { |t| - @lexer.string = t - assert_equal( - [[:NAME,t],[false,false]], - @lexer.fullscan - ) - } - types.each { |t| - @lexer.string = t - assert_equal( - [[:CLASSREF,t],[false,false]], - @lexer.fullscan - ) - } - end - - def test_emptystring - bit = '$var = ""' - - assert_nothing_raised { - @lexer.string = bit - } - - assert_nothing_raised { - @lexer.fullscan - } - end - - def test_collectlexing - {"@" => :AT, "<|" => :LCOLLECT, "|>" => :RCOLLECT}.each do |string, token| - assert_nothing_raised { - @lexer.string = string - } - - ret = nil - assert_nothing_raised { - ret = @lexer.fullscan - } - - assert_equal([[token, string],[false, false]], ret) - end - end - - def test_collectabletype - string = "@type {" - - assert_nothing_raised { - @lexer.string = string - } - - ret = nil - assert_nothing_raised { - ret = @lexer.fullscan - } - - assert_equal([[:AT, "@"], [:NAME, "type"], [:LBRACE, "{"], [false,false]],ret) - end - - def test_namespace - @lexer.string = %{class myclass} - - assert_nothing_raised { - @lexer.fullscan - } - - assert_equal("myclass", @lexer.namespace) - - assert_nothing_raised do - @lexer.namepop - end - - assert_equal("", @lexer.namespace) - - @lexer.string = "class base { class sub { class more" - - assert_nothing_raised { - @lexer.fullscan - } - - assert_equal("base::sub::more", @lexer.namespace) - - assert_nothing_raised do - @lexer.namepop - end - - assert_equal("base::sub", @lexer.namespace) - - # Now try it with some fq names - mklexer - - @lexer.string = "class base { class sub::more {" - - assert_nothing_raised { - @lexer.fullscan - } - - assert_equal("base::sub::more", @lexer.namespace) - - assert_nothing_raised do - @lexer.namepop - end - - assert_equal("base", @lexer.namespace) - end - - def test_indefine - @lexer.string = %{define me} - - assert_nothing_raised { - @lexer.scan { |t,s| } - } - - assert(@lexer.indefine?, "Lexer not considered in define") - - # Now make sure we throw an error when trying to nest defines. - assert_raise(Puppet::ParseError) do - @lexer.string = %{define another} - @lexer.scan { |t,s| } - end - - assert_nothing_raised do - @lexer.indefine = false - end - - assert(! @lexer.indefine?, "Lexer still considered in define") - end - - # Make sure the different qualified variables work. - def test_variable - ["$variable", "$::variable", "$qualified::variable", "$further::qualified::variable"].each do |string| - @lexer.string = string - - assert_nothing_raised("Could not lex %s" % string) do - @lexer.scan do |t, s| - assert_equal(:VARIABLE, t, "did not get variable as token") - assert_equal(string.sub(/^\$/, ''), s, "did not get correct string back") - break - end - end - end - end - - # Make sure the expected stack works as it should - def test_expected - @lexer.string = "[a{" - expected = @lexer.instance_variable_get("@expected") - @lexer.scan {} - assert_equal("}", @lexer.expected, "expected value is wrong") - - @lexer.string = "}" - @lexer.scan {} - assert_equal("]", @lexer.expected, "expected value is wrong after pop") - end - - # #774 - def test_classref_token - string = ["Foo", "::Foo","Foo::Bar","::Foo::Bar"] - - string.each do |foo| - assert_nothing_raised { - @lexer.string = foo - } - - ret = nil - assert_nothing_raised { - ret = @lexer.fullscan - } - - assert_equal([:CLASSREF, foo],ret[0], "Did not correctly tokenize '%s'" % foo) - end - end -end - |