5 files changed, 857 insertions, 566 deletions
diff --git a/ext/puppet-test b/ext/puppet-test
index 0f33e0cbb..e18dd1c57 100755
--- a/ext/puppet-test
+++ b/ext/puppet-test
@@ -211,11 +211,11 @@ end
 
 Suite.new :parser, "Manifest parsing" do
     def prepare
-        @parser = Puppet::Parser::Parser.new(:environment => Puppet[:environment])
-        @parser.file = Puppet[:manifest]
     end
 
     newtest :parse, "Parsed files" do
+        @parser = Puppet::Parser::Parser.new(:environment => Puppet[:environment])
+        @parser.file = Puppet[:manifest]
         @parser.parse
     end
 end
diff --git a/lib/puppet/parser/lexer.rb b/lib/puppet/parser/lexer.rb
index 086d82c09..6661650ba 100644
--- a/lib/puppet/parser/lexer.rb
+++ b/lib/puppet/parser/lexer.rb
@@ -1,4 +1,3 @@
-
 # the scanner/lexer
 
 require 'strscan'
@@ -7,326 +6,432 @@ require 'puppet'
 
 module Puppet
     class LexError < RuntimeError; end
-    module Parser
-        #---------------------------------------------------------------
-        class Lexer
-            attr_reader :line, :last, :file
-
-            attr_accessor :indefine
-
-                #%r{\w+} => :WORD,
-            @@tokens = {
-                %r{#.*} => :COMMENT,
-                %r{\[} => :LBRACK,
-                %r{\]} => :RBRACK,
-                %r{\{} => :LBRACE,
-                %r{\}} => :RBRACE,
-                %r{\(} => :LPAREN,
-                %r{\)} => :RPAREN,
-                %r{\"} => :DQUOTE,
-                %r{\n} => :RETURN,
-                %r{\'} => :SQUOTE,
-                %r{=} => :EQUALS,
-                %r{==} => :ISEQUAL,
-                %r{>=} => :GREATEREQUAL,
-                %r{>} => :GREATERTHAN,
-                %r{<} => :LESSTHAN,
-                %r{<=} => :LESSEQUAL,
-                %r{!=} => :NOTEQUAL,
-                %r{!} => :NOT,
-                %r{,} => :COMMA,
-                %r{\.} => :DOT,
-                %r{:} => :COLON,
-                %r{@} => :AT,
-                %r{<<\|} => :LLCOLLECT,
-                %r{\|>>} => :RRCOLLECT,
-                %r{<\|} => :LCOLLECT,
-                %r{\|>} => :RCOLLECT,
-                %r{;} => :SEMIC,
-                %r{\?} => :QMARK,
-                %r{\\} => :BACKSLASH,
-                %r{=>} => :FARROW,
-                %r{\+>} => :PARROW,
-                %r{[a-z][-\w]*} => :NAME,
-                %r{([a-z][-\w]*::)+[a-z][-\w]*} => :CLASSNAME,
-                %r{((::){0,1}[A-Z][-\w]*)+} => :CLASSREF,
-                %r{[0-9]+} => :NUMBER,
-                %r{\$(\w*::)*\w+} => :VARIABLE
-            }
-
-            @@pairs = {
-                "{" => "}",
-                "(" => ")",
-                "[" => "]",
-                "<|" => "|>",
-                "<<|" => "|>>"
-            }
-
-            @@reverse_pairs = @@pairs.inject({}) { |hash, pair| hash[pair[1]] = pair[0]; hash }
-
-            @@keywords = {
-                "case" => :CASE,
-                "class" => :CLASS,
-                "default" => :DEFAULT,
-                "define" => :DEFINE,
-                "false" => :BOOLEAN,
-                "import" => :IMPORT,
-                "if" => :IF,
-                "elsif" => :ELSIF,
-                "else" => :ELSE,
-                "inherits" => :INHERITS,
-                "node" => :NODE,
-                "true" => :BOOLEAN,
-                "and"  => :AND,
-                "or"   => :OR,
-                "undef"   => :UNDEF
-            }
-
-            def clear
-                initvars
-            end
+end
 
-            def expected
-                if @expected.empty?
-                    nil
-                else
-                    token = @expected[-1]
-                    @@tokens.each do |value, name|
-                        if token == name
-                            return value
-                        end
-                    end
-                    return token
-                end
-            end
+module Puppet::Parser; end
 
-            # scan the whole file
-            # basically just used for testing
-            def fullscan
-                array = []
-
-                self.scan { |token,str|
-                    # Ignore any definition nesting problems
-                    @indefine = false
-                    #Puppet.debug("got token '%s' => '%s'" % [token,str])
-                    if token.nil?
-                        return array
-                    else
-                        array.push([token,str])
-                    end
-                }
-                return array
-            end
+class Puppet::Parser::Lexer
+    attr_reader :last, :file
+
+    attr_accessor :line, :indefine
 
-            # this is probably pretty damned inefficient...
-            # it'd be nice not to have to load the whole file first...
-            def file=(file)
-                @file = file
-                @line = 1
-                File.open(file) { |of|
-                    str = ""
-                    of.each { |line| str += line }
-                    @scanner = StringScanner.new(str)
-                }
+    # Our base token class.
+    class Token
+        attr_accessor :regex, :name, :string, :skip, :incr_line, :skip_text
+
+        def initialize(regex, name)
+            if regex.is_a?(String)
+                @name, @string = name, regex
+                @regex = Regexp.new(Regexp.escape(@string))
+            else
+                @name, @regex = name, regex
             end
+        end
 
-            def indefine?
-                if defined? @indefine
-                    @indefine
-                else
-                    false
-                end
+        def skip?
+            self.skip
+        end
+
+        def to_s
+            "Lexer token %s" % @name.to_s
+        end
+    end
+
+    # Maintain a list of tokens.
+    class TokenList
+        attr_reader :regex_tokens, :string_tokens
+
+        def [](name)
+            @tokens[name]
+        end
+
+        # Create a new token.
+        def add_token(name, regex, options = {}, &block)
+            token = Token.new(regex, name)
+            raise(ArgumentError, "Token %s already exists" % name) if @tokens.include?(name)
+            @tokens[token.name] = token
+            if token.string
+                @string_tokens << token
+                @tokens_by_string[token.string] = token
+            else
+                @regex_tokens << token
             end
 
-            def initialize
-                initvars()
+            options.each do |name, option|
+                token.send(name.to_s + "=", option)
             end
 
-            def initvars
-                @line = 1
-                @last = ""
-                @lasttoken = nil
-                @scanner = nil
-                @file = nil
-                # AAARRGGGG! okay, regexes in ruby are bloody annoying
-                # no one else has "\n" =~ /\s/
-                @skip = %r{[ \t]+}
+            token.meta_def(:convert, &block) if block_given?
 
-                @namestack = []
-                @indefine = false
+            token
+        end
 
-                @expected = []
-            end
+        def initialize
+            @tokens = {}
+            @regex_tokens = []
+            @string_tokens = []
+            @tokens_by_string = {}
+        end
 
-            # Go up one in the namespace.
-            def namepop
-                @namestack.pop
+        # Look up a token by its value, rather than name.
+        def lookup(string)
+            @tokens_by_string[string]
+        end
+
+        # Define more tokens.
+        def add_tokens(hash)
+            hash.each do |regex, name|
+                add_token(name, regex)
             end
+        end
 
-            # Collect the current namespace.
-            def namespace
-                @namestack.join("::")
+        # Sort our tokens by length, so we know once we match, we're done.
+        # This helps us avoid the O(n^2) nature of token matching.
+        def sort_tokens
+            @string_tokens.sort! { |a, b| b.string.length <=> a.string.length }
+        end
+    end
+
+    TOKENS = TokenList.new
+    TOKENS.add_tokens(
+        '[' => :LBRACK,
+        ']' => :RBRACK,
+        '{' => :LBRACE,
+        '}' => :RBRACE,
+        '(' => :LPAREN,
+        ')' => :RPAREN,
+        '=' => :EQUALS,
+        '==' => :ISEQUAL,
+        '>=' => :GREATEREQUAL,
+        '>' => :GREATERTHAN,
+        '<' => :LESSTHAN,
+        '<=' => :LESSEQUAL,
+        '!=' => :NOTEQUAL,
+        '!' => :NOT,
+        ',' => :COMMA,
+        '.' => :DOT,
+        ':' => :COLON,
+        '@' => :AT,
+        '<<|' => :LLCOLLECT,
+        '|>>' => :RRCOLLECT,
+        '<|' => :LCOLLECT,
+        '|>' => :RCOLLECT,
+        ';' => :SEMIC,
+        '?' => :QMARK,
+        '\\' => :BACKSLASH,
+        '=>' => :FARROW,
+        '+>' => :PARROW,
+        %r{([a-z][-\w]*::)+[a-z][-\w]*} => :CLASSNAME,
+        %r{((::){0,1}[A-Z][-\w]*)+} => :CLASSREF
+    )
+
+    TOKENS.add_tokens "Whatever" => :DQTEXT, "Nomatter" => :SQTEXT, "alsonomatter" => :BOOLEAN
+
+    TOKENS.add_token :NAME, %r{[a-z][-\w]*} do |lexer, value|
+        string_token = self
+        # we're looking for keywords here
+        if tmp = KEYWORDS.lookup(value)
+            string_token = tmp
+            if [:TRUE, :FALSE].include?(string_token.name)
+                value = eval(value)
+                string_token = TOKENS[:BOOLEAN]
             end
+        end
+        [string_token, value]
+    end
+
+    TOKENS.add_token :NUMBER, %r{[0-9]+} do |lexer, value|
+        [TOKENS[:NAME], value]
+    end
+
+    TOKENS.add_token :COMMENT, %r{#.*}, :skip => true
+
+    TOKENS.add_token :RETURN, "\n", :skip => true, :incr_line => true, :skip_text => true
+
+    TOKENS.add_token :SQUOTE, "'" do |lexer, value|
+        value = lexer.slurpstring(value)
+        [TOKENS[:SQTEXT], value]
+    end
 
-            # This value might have :: in it, but we don't care -- it'll be
-            # handled normally when joining, and when popping we want to pop
-            # this full value, however long the namespace is.
-            def namestack(value)
-                @namestack << value
+    TOKENS.add_token :DQUOTE, '"' do |lexer, value|
+        value = lexer.slurpstring(value)
+        [TOKENS[:DQTEXT], value]
+    end
+
+    TOKENS.add_token :VARIABLE, %r{\$(\w*::)*\w+} do |lexer, value|
+        value = value.sub(/^\$/, '')
+        [self, value]
+    end
+
+    TOKENS.sort_tokens
+
+    @@pairs = {
+        "{" => "}",
+        "(" => ")",
+        "[" => "]",
+        "<|" => "|>",
+        "<<|" => "|>>"
+    }
+
+    KEYWORDS = TokenList.new
+
+    KEYWORDS.add_tokens(
+        "case" => :CASE,
+        "class" => :CLASS,
+        "default" => :DEFAULT,
+        "define" => :DEFINE,
+        "import" => :IMPORT,
+        "if" => :IF,
+        "elsif" => :ELSIF,
+        "else" => :ELSE,
+        "inherits" => :INHERITS,
+        "node" => :NODE,
+        "and"  => :AND,
+        "or"   => :OR,
+        "undef"   => :UNDEF,
+        "false" => :FALSE,
+        "true" => :TRUE
+    )
+
+    def clear
+        initvars
+    end
+
+    def expected
+        return nil if @expected.empty?
+        name = @expected[-1]
+        raise "Could not find expected token %s" % name unless token = TOKENS.lookup(name)
+
+        return token
+    end
+
+    # scan the whole file
+    # basically just used for testing
+    def fullscan
+        array = []
+
+        self.scan { |token, str|
+            # Ignore any definition nesting problems
+            @indefine = false
+            array.push([token,str])
+        }
+        return array
+    end
+
+    # this is probably pretty damned inefficient...
+    # it'd be nice not to have to load the whole file first...
+    def file=(file)
+        @file = file
+        @line = 1
+        File.open(file) { |of|
+            str = ""
+            of.each { |line| str += line }
+            @scanner = StringScanner.new(str)
+        }
+    end
+
+    def find_string_token
+        matched_token = value = nil
+
+        # We know our longest string token is three chars, so try each size in turn
+        # until we either match or run out of chars.  This way our worst-case is three
+        # tries, where it is otherwise the number of string chars we have.  Also,
+        # the lookups are optimized hash lookups, instead of regex scans.
+        [3, 2, 1].each do |i|
+            str = @scanner.peek(i)
+            if matched_token = TOKENS.lookup(str)
+                value = @scanner.scan(matched_token.regex)
+                break
             end
+        end
+
+        return matched_token, value
+    end
 
-            def rest
-                @scanner.rest
+    # Find the next token that matches a regex.  We look for these first.
+    def find_regex_token
+        @regex += 1
+        matched_token = nil
+        value = ""
+        length = 0
+
+        # I tried optimizing based on the first char, but it had
+        # a slightly negative affect and was a good bit more complicated.
+        TOKENS.regex_tokens.each do |token|
+            next unless match_length = @scanner.match?(token.regex) 
+            
+            # We've found a longer match
+            if match_length > length
+                value = @scanner.scan(token.regex) 
+                length = value.length
+                matched_token = token
             end
+        end
 
-            # this is the heart of the lexer
-            def scan
-                #Puppet.debug("entering scan")
-                if @scanner.nil?
-                    raise TypeError.new("Invalid or empty string")
-                end
+        return matched_token, value
+    end
 
-                @scanner.skip(@skip)
-                until @scanner.eos? do
-                    yielded = false
-                    sendbreak = false # gah, this is a nasty hack
-                    stoken = nil
-                    sregex = nil
-                    value = ""
-
-                    # first find out which type of token we've got
-                    @@tokens.each { |regex,token|
-                        # we're just checking, which doesn't advance the scan
-                        # pointer
-                        tmp = @scanner.check(regex)
-                        if tmp.nil?
-                            #puppet.debug("did not match %s to '%s'" %
-                            #    [regex,@scanner.rest])
-                            next
-                        end
-
-                        # find the longest match
-                        if tmp.length > value.length
-                            value = tmp 
-                            stoken = token
-                            sregex = regex
-                        else
-                            # we've already got a longer match
-                            next
-                        end
-                    }
-
-                    # error out if we didn't match anything at all
-                    if stoken.nil?
-                        nword = nil
-                        if @scanner.rest =~ /^(\S+)/
-                            nword = $1
-                        elsif@scanner.rest =~ /^(\s+)/
-                            nword = $1
-                        else
-                            nword = @scanner.rest
-                        end
-                        raise "Could not match '%s'" % nword
-                    end
+    # Find the next token, returning the string and the token.
+    def find_token
+        @find += 1
+        matched_token, value = find_regex_token
 
-                    value = @scanner.scan(sregex)
+        unless matched_token
+            matched_token, value = find_string_token
+        end
 
-                    if value == ""
-                        raise "Didn't match regex on token %s" % stoken
-                    end
+        return matched_token, value
+    end
 
-                    # token-specific operations
-                    # if this gets much more complicated, it should
-                    # be moved up to where the tokens themselves are defined
-                    # which will get me about 75% of the way to a lexer generator
-                    ptoken = stoken
-                    case stoken
-                    when :NAME then
-                        wtoken = stoken
-                        # we're looking for keywords here
-                        if @@keywords.include?(value)
-                            wtoken = @@keywords[value]
-                            #Puppet.debug("token '%s'" % wtoken)
-                            if wtoken == :BOOLEAN
-                                value = eval(value)
-                            end
-                        end
-                        ptoken = wtoken
-                    when :NUMBER then
-                        ptoken = :NAME
-                    when :COMMENT then
-                        # just throw comments away
-                        next
-                    when :RETURN then
-                        @line += 1
-                        @scanner.skip(@skip)
-                        next
-                    when :SQUOTE then
-                        #Puppet.debug("searching '%s' after '%s'" % [self.rest,value])
-                        value = self.slurpstring(value)
-                        ptoken = :SQTEXT
-                        #Puppet.debug("got string '%s' => '%s'" % [:DQTEXT,value])
-                    when :DQUOTE then
-                        value = self.slurpstring(value)
-                        ptoken = :DQTEXT
-                    when :VARIABLE then
-                        value = value.sub(/^\$/, '')
-                    end
+    def indefine?
+        if defined? @indefine
+            @indefine
+        else
+            false
+        end
+    end
 
-                    if match = @@pairs[value] and ptoken != :DQUOTE and ptoken != :SQUOTE
-                        @expected << match
-                    elsif exp = @expected[-1] and exp == value and ptoken != :DQUOTE and ptoken != :SQUOTE
-                        @expected.pop
-                    end
+    def initialize
+        @find = 0
+        @regex = 0
+        initvars()
+    end
 
-                    yield [ptoken, value]
+    def initvars
+        @line = 1
+        @previous_token = nil
+        @scanner = nil
+        @file = nil
+        # AAARRGGGG! okay, regexes in ruby are bloody annoying
+        # no one else has "\n" =~ /\s/
+        @skip = %r{[ \t]+}
+
+        @namestack = []
+        @indefine = false
+        @expected = []
+    end
 
-                    if @lasttoken == :CLASS
-                        namestack(value)
-                    end
+    # Make any necessary changes to the token and/or value.
+    def munge_token(token, value)
+        @line += 1 if token.incr_line
 
-                    if @lasttoken == :DEFINE
-                        if indefine?
-                            msg = "Cannot nest definition %s inside %s" % [value, @indefine]
-                            self.indefine = false
-                            raise Puppet::ParseError, msg
-                        end
+        skip() if token.skip_text
 
-                        @indefine = value
-                    end
+        return if token.skip
 
-                    @last = value
-                    @lasttoken = ptoken
+        token, value = token.convert(self, value) if token.respond_to?(:convert)
 
-                    @scanner.skip(@skip)
-                end
-                @scanner = nil
-                yield [false,false]
-            end
+        return unless token
+
+        return token, value
+    end
+
+    # Go up one in the namespace.
+    def namepop
+        @namestack.pop
+    end
 
-            # we've encountered an opening quote...
-            # slurp in the rest of the string and return it
-            def slurpstring(quote)
-                # we search for the next quote that isn't preceded by a
-                # backslash; the caret is there to match empty strings
-                str = @scanner.scan_until(/([^\\]|^)#{quote}/)
-                if str.nil?
-                    raise Puppet::LexError.new("Unclosed quote after '%s' in '%s'" %
-                        [self.last,self.rest])
+    # Collect the current namespace.
+    def namespace
+        @namestack.join("::")
+    end
+
+    # This value might have :: in it, but we don't care -- it'll be
+    # handled normally when joining, and when popping we want to pop
+    # this full value, however long the namespace is.
+    def namestack(value)
+        @namestack << value
+    end
+
+    def rest
+        @scanner.rest
+    end
+
+    # this is the heart of the lexer
+    def scan
+        #Puppet.debug("entering scan")
+        raise Puppet::LexError.new("Invalid or empty string") unless @scanner
+
+        # Skip any initial whitespace.
+        skip()
+
+        until @scanner.eos? do
+            yielded = false
+            matched_token, value = find_token
+
+            # error out if we didn't match anything at all
+            if matched_token.nil?
+                nword = nil
+                # Try to pull a 'word' out of the remaining string.
+                if @scanner.rest =~ /^(\S+)/
+                    nword = $1
+                elsif @scanner.rest =~ /^(\s+)/
+                    nword = $1
                 else
-                    str.sub!(/#{quote}\Z/,"")
-                    str.gsub!(/\\#{quote}/,quote)
+                    nword = @scanner.rest
                 end
+                raise "Could not match '%s'" % nword
+            end
 
-                return str
+            final_token, value = munge_token(matched_token, value)
+
+            next unless final_token
+
+            if match = @@pairs[value] and final_token.name != :DQUOTE and final_token.name != :SQUOTE
+                @expected << match
+            elsif exp = @expected[-1] and exp == value and final_token.name != :DQUOTE and final_token.name != :SQUOTE
+                @expected.pop
             end
 
-            # just parse a string, not a whole file
-            def string=(string)
-                @scanner = StringScanner.new(string)
+            yield [final_token.name, value]
+
+            if @previous_token
+                namestack(value) if @previous_token.name == :CLASS
+
+                if @previous_token.name == :DEFINE
+                    if indefine?
+                        msg = "Cannot nest definition %s inside %s" % [value, @indefine]
+                        self.indefine = false
+                        raise Puppet::ParseError, msg
+                    end
+
+                    @indefine = value
+                end
             end
+
+            @previous_token = final_token
+            skip()
         end
-        #---------------------------------------------------------------
+        @scanner = nil
+
+        # This indicates that we're done parsing.
+        yield [false,false]
+    end
+
+    # Skip any skipchars in our remaining string.
+    def skip
+        @scanner.skip(@skip)
     end
-end
 
+    # we've encountered an opening quote...
+    # slurp in the rest of the string and return it
+    def slurpstring(quote)
+        # we search for the next quote that isn't preceded by a
+        # backslash; the caret is there to match empty strings
+        str = @scanner.scan_until(/([^\\]|^)#{quote}/)
+        if str.nil?
+            raise Puppet::LexError.new("Unclosed quote after '%s' in '%s'" %
+                [self.last,self.rest])
+        else
+            str.sub!(/#{quote}\Z/,"")
+            str.gsub!(/\\#{quote}/,quote)
+        end
+
+        return str
+    end
+
+    # just parse a string, not a whole file
+    def string=(string)
+        @scanner = StringScanner.new(string)
+    end
+end
diff --git a/lib/puppet/parser/parser_support.rb b/lib/puppet/parser/parser_support.rb
index acf3c9f7c..b543cd3ec 100644
--- a/lib/puppet/parser/parser_support.rb
+++ b/lib/puppet/parser/parser_support.rb
@@ -47,11 +47,8 @@ class Puppet::Parser::Parser
 
     # Create an AST object, and automatically add the file and line information if
     # available.
-    def ast(klass, hash = nil)
-        hash ||= {}
-        unless hash.include?(:line)
-            hash[:line] = @lexer.line
-        end
+    def ast(klass, hash = {})
+        hash[:line] = @lexer.line unless hash.include?(:line)
 
         unless hash.include?(:file)
             if file = @lexer.file
diff --git a/spec/unit/parser/lexer.rb b/spec/unit/parser/lexer.rb
new file mode 100755
index 000000000..cddbef1ed
--- /dev/null
+++ b/spec/unit/parser/lexer.rb
@@ -0,0 +1,465 @@
+#!/usr/bin/env ruby
+
+require File.dirname(__FILE__) + '/../../spec_helper'
+
+require 'puppet/parser/lexer'
+
+describe Puppet::Parser::Lexer::Token do
+    before do
+        @token = Puppet::Parser::Lexer::Token.new(%r{something}, :NAME)
+    end
+
+    [:regex, :name, :string, :skip, :incr_line, :skip_text].each do |param|
+        it "should have a #{param.to_s} reader" do
+            @token.should respond_to?(param)
+        end
+
+        it "should have a #{param.to_s} writer" do
+            @token.should respond_to?(param.to_s + "=")
+        end
+    end
+end
+
+describe Puppet::Parser::Lexer::Token, "when initializing" do
+    it "should create a regex if the first argument is a string" do
+        Puppet::Parser::Lexer::Token.new("something", :NAME).regex.should == %r{something}
+    end
+
+    it "should set the string if the first argument is one" do
+        Puppet::Parser::Lexer::Token.new("something", :NAME).string.should == "something"
+    end
+
+    it "should set the regex if the first argument is one" do
+        Puppet::Parser::Lexer::Token.new(%r{something}, :NAME).regex.should == %r{something}
+    end
+end
+
+describe Puppet::Parser::Lexer::TokenList do
+    before do
+        @list = Puppet::Parser::Lexer::TokenList.new
+    end
+
+    it "should have a method for retrieving tokens by the name" do
+        token = @list.add_token :name, "whatever"
+        @list[:name].should equal(token)
+    end
+
+    it "should have a method for retrieving string tokens by the string" do
+        token = @list.add_token :name, "whatever"
+        @list.lookup("whatever").should equal(token)
+    end
+
+    it "should add tokens to the list when directed" do
+        token = @list.add_token :name, "whatever"
+        @list[:name].should equal(token)
+    end
+
+    it "should have a method for adding multiple tokens at once" do
+        @list.add_tokens "whatever" => :name, "foo" => :bar
+        @list[:name].should_not be_nil
+        @list[:bar].should_not be_nil
+    end
+
+    it "should fail to add tokens sharing a name with an existing token" do
+        @list.add_token :name, "whatever"
+        lambda { @list.add_token :name, "whatever" }.should raise_error(ArgumentError)
+    end
+
+    it "should set provided options on tokens being added" do
+        token = @list.add_token :name, "whatever", :skip_text => true
+        token.skip_text.should == true
+    end
+
+    it "should define any provided blocks as a :convert method" do
+        token = @list.add_token(:name, "whatever")  do "foo" end
+        token.convert.should == "foo"
+    end
+
+    it "should store all string tokens in the :string_tokens list" do
+        one = @list.add_token(:name, "1")
+        @list.string_tokens.should be_include(one)
+    end
+
+    it "should store all regex tokens in the :regex_tokens list" do
+        one = @list.add_token(:name, %r{one})
+        @list.regex_tokens.should be_include(one)
+    end
+
+    it "should not store string tokens in the :regex_tokens list" do
+        one = @list.add_token(:name, "1")
+        @list.regex_tokens.should_not be_include(one)
+    end
+
+    it "should not store regex tokens in the :string_tokens list" do
+        one = @list.add_token(:name, %r{one})
+        @list.string_tokens.should_not be_include(one)
+    end
+
+    it "should sort the string tokens inversely by length when asked" do
+        one = @list.add_token(:name, "1")
+        two = @list.add_token(:other, "12")
+        @list.sort_tokens
+        @list.string_tokens.should == [two, one]
+    end
+end
+
+describe Puppet::Parser::Lexer::TOKENS do
+    before do
+        @lexer = Puppet::Parser::Lexer.new()
+    end
+
+    {
+        :LBRACK => '[',
+        :RBRACK => ']',
+        :LBRACE => '{',
+        :RBRACE => '}',
+        :LPAREN => '(',
+        :RPAREN => ')', 
+        :EQUALS => '=',
+        :ISEQUAL => '==',
+        :GREATEREQUAL => '>=', 
+        :GREATERTHAN => '>',
+        :LESSTHAN => '<',
+        :LESSEQUAL => '<=',
+        :NOTEQUAL => '!=',
+        :NOT => '!',
+        :COMMA => ',',
+        :DOT => '.',
+        :COLON => ':',
+        :AT => '@',
+        :LLCOLLECT => '<<|',
+        :RRCOLLECT => '|>>',
+        :LCOLLECT => '<|',
+        :RCOLLECT => '|>',
+        :SEMIC => ';',
+        :QMARK => '?',
+        :BACKSLASH => '\\',
+        :FARROW => '=>',
+        :PARROW => '+>'
+    }.each do |name, string|
+        it "should have a token named #{name.to_s}" do
+            Puppet::Parser::Lexer::TOKENS[name].should_not be_nil
+        end
+
+        it "should match '#{string}' for the token #{name.to_s}" do
+            Puppet::Parser::Lexer::TOKENS[name].string.should == string
+        end
+    end
+
+    {
+        "case" => :CASE,
+        "class" => :CLASS,
+        "default" => :DEFAULT,
+        "define" => :DEFINE,
+        "import" => :IMPORT,
+        "if" => :IF,
+        "elsif" => :ELSIF,
+        "else" => :ELSE,
+        "inherits" => :INHERITS,
+        "node" => :NODE,
+        "and"  => :AND,
+        "or"   => :OR,
+        "undef"   => :UNDEF,
+        "false" => :FALSE,
+        "true" => :TRUE
+    }.each do |string, name|
+        it "should have a keyword named #{name.to_s}" do
+            Puppet::Parser::Lexer::KEYWORDS[name].should_not be_nil
+        end
+
+        it "should have the keyword for #{name.to_s} set to #{string}" do
+            Puppet::Parser::Lexer::KEYWORDS[name].string.should == string
+        end
+    end
+
+    # These tokens' strings don't matter, just that the tokens exist.
+    [:DQTEXT, :SQTEXT, :BOOLEAN, :NAME, :NUMBER, :COMMENT, :RETURN, :SQUOTE, :DQUOTE, :VARIABLE].each do |name|
+        it "should have a token named #{name.to_s}" do
+            Puppet::Parser::Lexer::TOKENS[name].should_not be_nil
+        end
+    end
+end
+
+describe Puppet::Parser::Lexer::TOKENS[:NAME] do
+    before { @token = Puppet::Parser::Lexer::TOKENS[:NAME] }
+
+    it "should match against lower-case alpha-numeric terms" do
+        @token.regex.should =~ "one-two"
+    end
+
+    it "should return itself and the value if the matched term is not a keyword" do
+        Puppet::Parser::Lexer::KEYWORDS.expects(:lookup).returns(nil)
+        @token.convert(stub("lexer"), "myval").should == [Puppet::Parser::Lexer::TOKENS[:NAME], "myval"]
+    end
+
+    it "should return the keyword token and the value if the matched term is a keyword" do
+        keyword = stub 'keyword', :name => :testing
+        Puppet::Parser::Lexer::KEYWORDS.expects(:lookup).returns(keyword)
+        @token.convert(stub("lexer"), "myval").should == [keyword, "myval"]
+    end
+
+    it "should return the BOOLEAN token and 'true' if the matched term is the string 'true'" do
+        keyword = stub 'keyword', :name => :TRUE
+        Puppet::Parser::Lexer::KEYWORDS.expects(:lookup).returns(keyword)
+        @token.convert(stub('lexer'), "true").should == [Puppet::Parser::Lexer::TOKENS[:BOOLEAN], true]
+    end
+
+    it "should return the BOOLEAN token and 'false' if the matched term is the string 'false'" do
+        keyword = stub 'keyword', :name => :FALSE
+        Puppet::Parser::Lexer::KEYWORDS.expects(:lookup).returns(keyword)
+        @token.convert(stub('lexer'), "false").should == [Puppet::Parser::Lexer::TOKENS[:BOOLEAN], false]
+    end
+end
+
+describe Puppet::Parser::Lexer::TOKENS[:NUMBER] do
+    before { @token = Puppet::Parser::Lexer::TOKENS[:NUMBER] }
+
+    it "should match against numeric terms" do
+        @token.regex.should =~ "2982383139"
+    end
+
+    it "should return the NAME token and the value" do
+        @token.convert(stub("lexer"), "myval").should == [Puppet::Parser::Lexer::TOKENS[:NAME], "myval"]
+    end
+end
+
+describe Puppet::Parser::Lexer::TOKENS[:COMMENT] do
+    before { @token = Puppet::Parser::Lexer::TOKENS[:COMMENT] }
+
+    it "should match against lines starting with '#'" do
+        @token.regex.should =~ "# this is a comment"
+    end
+
+    it "should be marked to get skipped" do
+        @token.skip?.should be_true
+    end
+end
+
+describe Puppet::Parser::Lexer::TOKENS[:RETURN] do
+    before { @token = Puppet::Parser::Lexer::TOKENS[:RETURN] }
+
+    it "should match against carriage returns" do
+        @token.regex.should =~ "\n"
+    end
+
+    it "should be marked to initiate text skipping" do
+        @token.skip_text.should be_true
+    end
+
+    it "should be marked to increment the line" do
+        @token.incr_line.should be_true
+    end
+end
+
+describe Puppet::Parser::Lexer::TOKENS[:SQUOTE] do
+    before { @token = Puppet::Parser::Lexer::TOKENS[:SQUOTE] }
+
+    it "should match against single quotes" do
+        @token.regex.should =~ "'"
+    end
+
+    it "should slurp the rest of the quoted string" do
+        lexer = stub("lexer")
+        lexer.expects(:slurpstring).with("myval").returns("otherval")
+        @token.convert(lexer, "myval")
+    end
+
+    it "should return the SQTEXT token with the slurped string" do
+        lexer = stub("lexer")
+        lexer.stubs(:slurpstring).with("myval").returns("otherval")
+        @token.convert(lexer, "myval").should == [Puppet::Parser::Lexer::TOKENS[:SQTEXT], "otherval"]
+    end
+end
+
+describe Puppet::Parser::Lexer::TOKENS[:DQUOTE] do
+    before { @token = Puppet::Parser::Lexer::TOKENS[:DQUOTE] }
+
+    it "should match against single quotes" do
+        @token.regex.should =~ '"'
+    end
+
+    it "should slurp the rest of the quoted string" do
+        lexer = stub("lexer")
+        lexer.expects(:slurpstring).with("myval").returns("otherval")
+        @token.convert(lexer, "myval")
+    end
+
+    it "should return the DQTEXT token with the slurped string" do
+        lexer = stub("lexer")
+        lexer.stubs(:slurpstring).with("myval").returns("otherval")
+        @token.convert(lexer, "myval").should == [Puppet::Parser::Lexer::TOKENS[:DQTEXT], "otherval"]
+    end
+end
+
+describe Puppet::Parser::Lexer::TOKENS[:VARIABLE] do
+    before { @token = Puppet::Parser::Lexer::TOKENS[:VARIABLE] }
+
+    it "should match against alpha words prefixed with '$'" do
+        @token.regex.should =~ '$this_var'
+    end
+
+    it "should return the VARIABLE token and the variable name stripped of the '$'" do
+        @token.convert(stub("lexer"), "$myval").should == [Puppet::Parser::Lexer::TOKENS[:VARIABLE], "myval"]
+    end
+end
+
+# FIXME: We need to rewrite all of these tests, but I just don't want to take the time right now.
+describe "Puppet::Parser::Lexer in the old tests" do
+    before { @lexer = Puppet::Parser::Lexer.new }
+
+    it "should do simple lexing" do
+        strings = {
+%q{\\} => [[:BACKSLASH,"\\"],[false,false]],
+%q{simplest scanner test} => [[:NAME,"simplest"],[:NAME,"scanner"],[:NAME,"test"],[false,false]],
+%q{returned scanner test
+} => [[:NAME,"returned"],[:NAME,"scanner"],[:NAME,"test"],[false,false]]
+        }
+        strings.each { |str,ary|
+            @lexer.string = str
+            @lexer.fullscan().should == ary
+        }
+    end
+
+    it "should correctly lex quoted strings" do
+        strings = {
+%q{a simple "scanner" test
+} => [[:NAME,"a"],[:NAME,"simple"],[:DQTEXT,"scanner"],[:NAME,"test"],[false,false]],
+%q{a simple 'single quote scanner' test
+} => [[:NAME,"a"],[:NAME,"simple"],[:SQTEXT,"single quote scanner"],[:NAME,"test"],[false,false]],
+%q{a harder 'a $b \c"'
+} => [[:NAME,"a"],[:NAME,"harder"],[:SQTEXT,'a $b \c"'],[false,false]],
+%q{a harder "scanner test"
+} => [[:NAME,"a"],[:NAME,"harder"],[:DQTEXT,"scanner test"],[false,false]],
+%q{a hardest "scanner \"test\""
+} => [[:NAME,"a"],[:NAME,"hardest"],[:DQTEXT,'scanner "test"'],[false,false]],
+%q{a hardestest "scanner \"test\"
+"
+} => [[:NAME,"a"],[:NAME,"hardestest"],[:DQTEXT,'scanner "test"
+'],[false,false]],
+%q{function("call")} => [[:NAME,"function"],[:LPAREN,"("],[:DQTEXT,'call'],[:RPAREN,")"],[false,false]]
+}
+        strings.each { |str,array|
+            @lexer.string = str
+            @lexer.fullscan().should == array
+        }
+    end
+
+    it "should fail usefully" do
+        strings = %w{
+            ^
+        }
+        strings.each { |str|
+            @lexer.string = str
+            lambda { @lexer.fullscan() }.should raise_error(RuntimeError)
+        }
+    end
+
+    it "should fail if the string is not set" do
+        lambda { @lexer.fullscan() }.should raise_error(Puppet::LexError)
+    end
+
+    it "should correctly identify keywords" do
+        @lexer.string = "case"
+        @lexer.fullscan.should == [[:CASE, "case"], [false, false]]
+    end
+
+    it "should correctly match strings" do
+        names = %w{this is a bunch of names}
+        types = %w{Many Different Words A Word}
+        words = %w{differently Cased words A a}
+
+        names.each { |t|
+            @lexer.string = t
+            @lexer.fullscan.should == [[:NAME,t],[false,false]]
+        }
+        types.each { |t|
+            @lexer.string = t
+            @lexer.fullscan.should == [[:CLASSREF,t],[false,false]]
+        }
+    end
+
+    it "should correctly parse empty strings" do
+        bit = '$var = ""'
+
+        @lexer.string = bit
+
+        lambda { @lexer.fullscan }.should_not raise_error
+    end
+
+    it "should correctly parse virtual resources" do
+        string = "@type {"
+
+        @lexer.string = string
+
+        @lexer.fullscan.should == [[:AT, "@"], [:NAME, "type"], [:LBRACE, "{"], [false,false]]
+    end
+
+    it "should correctly deal with namespaces" do
+        @lexer.string = %{class myclass}
+
+        @lexer.fullscan
+
+        @lexer.namespace.should == "myclass"
+
+        @lexer.namepop
+
+        @lexer.namespace.should == ""
+
+        @lexer.string = "class base { class sub { class more"
+
+        @lexer.fullscan
+
+        @lexer.namespace.should == "base::sub::more"
+
+        @lexer.namepop
+
+        @lexer.namespace.should == "base::sub"
+    end
+
+    it "should correctly handle fully qualified names" do
+        @lexer.string = "class base { class sub::more {"
+
+        @lexer.fullscan
+
+        @lexer.namespace.should == "base::sub::more"
+
+        @lexer.namepop
+
+        @lexer.namespace.should == "base"
+    end
+
+    it "should correctly lex variables" do
+        ["$variable", "$::variable", "$qualified::variable", "$further::qualified::variable"].each do |string|
+            @lexer.string = string
+
+            @lexer.scan do |t, s|
+                t.should == :VARIABLE
+                string.sub(/^\$/, '').should == s
+                break
+            end
+        end
+    end
+
+    # #774
+    it "should correctly parse the CLASSREF token" do
+        string = ["Foo", "::Foo","Foo::Bar","::Foo::Bar"]
+
+        string.each do |foo|
+            @lexer.string = foo
+            @lexer.fullscan[0].should == [:CLASSREF, foo]
+        end
+    end
+end
+
+require 'puppettest/support/utils'
+describe "Puppet::Parser::Lexer in the old tests when lexing example files" do
+    extend PuppetTest
+    extend PuppetTest::Support::Utils
+    textfiles() do |file|
+        it "should correctly lex #{file}" do
+            lexer = Puppet::Parser::Lexer.new()
+            lexer.file = file
+            lambda { lexer.fullscan() }.should_not raise_error
+        end
+    end
+end
diff --git a/test/language/lexer.rb b/test/language/lexer.rb
deleted file mode 100755
index e09828d51..000000000
--- a/test/language/lexer.rb
+++ /dev/null
@@ -1,276 +0,0 @@
-#!/usr/bin/env ruby
-
-require File.dirname(__FILE__) + '/../lib/puppettest'
-
-require 'puppet'
-require 'puppet/parser/lexer'
-require 'puppettest'
-
-#%q{service("telnet") = \{
-#    port => "23",
-#    protocol => "tcp",
-#    name => "telnet",
-#\}
-#} => [[:NAME, "service"], [:LPAREN, "("], [:DQUOTE, "\""], [:NAME, "telnet"], [:DQUOTE, "\""], [:RPAREN, ")"], [:EQUALS, "="], [:lbrace, "{"], [:NAME, "port"], [:FARROW, "=>"], [:DQUOTE, "\""], [:NAME, "23"], [:DQUOTE, "\""], [:COMMA, ","], [:NAME, "protocol"], [:FARROW, "=>"], [:DQUOTE, "\""], [:NAME, "tcp"], [:DQUOTE, "\""], [:COMMA, ","], [:NAME, "name"], [:FARROW, "=>"], [:DQUOTE, "\""], [:NAME, "telnet"], [:DQUOTE, "\""], [:COMMA, ","], [:RBRACE, "}"]]
-
-class TestLexer < Test::Unit::TestCase
-	include PuppetTest
-    def setup
-        super
-        mklexer
-    end
-
-    def mklexer
-        @lexer = Puppet::Parser::Lexer.new()
-    end
-
-    def test_simple_lex
-        strings = {
-%q{\\} => [[:BACKSLASH,"\\"],[false,false]],
-%q{simplest scanner test} => [[:NAME,"simplest"],[:NAME,"scanner"],[:NAME,"test"],[false,false]],
-%q{returned scanner test
-} => [[:NAME,"returned"],[:NAME,"scanner"],[:NAME,"test"],[false,false]]
-        }
-        strings.each { |str,ary|
-            @lexer.string = str
-            assert_equal(
-                ary,
-                @lexer.fullscan()
-            )
-        }
-    end
-
-    def test_quoted_strings
-        strings = {
-%q{a simple "scanner" test
-} => [[:NAME,"a"],[:NAME,"simple"],[:DQTEXT,"scanner"],[:NAME,"test"],[false,false]],
-%q{a simple 'single quote scanner' test
-} => [[:NAME,"a"],[:NAME,"simple"],[:SQTEXT,"single quote scanner"],[:NAME,"test"],[false,false]],
-%q{a harder 'a $b \c"'
-} => [[:NAME,"a"],[:NAME,"harder"],[:SQTEXT,'a $b \c"'],[false,false]],
-%q{a harder "scanner test"
-} => [[:NAME,"a"],[:NAME,"harder"],[:DQTEXT,"scanner test"],[false,false]],
-%q{a hardest "scanner \"test\""
-} => [[:NAME,"a"],[:NAME,"hardest"],[:DQTEXT,'scanner "test"'],[false,false]],
-%q{a hardestest "scanner \"test\"
-"
-} => [[:NAME,"a"],[:NAME,"hardestest"],[:DQTEXT,'scanner "test"
-'],[false,false]],
-%q{function("call")} => [[:NAME,"function"],[:LPAREN,"("],[:DQTEXT,'call'],[:RPAREN,")"],[false,false]]
-}
-        strings.each { |str,array|
-            @lexer.string = str
-            assert_equal(
-                array,
-                @lexer.fullscan()
-            )
-        }
-    end
-
-    def test_errors
-        strings = %w{
-            ^
-        }
-        strings.each { |str|
-            @lexer.string = str
-            assert_raise(RuntimeError) {
-                @lexer.fullscan()
-            }
-        }
-    end
-
-    def test_more_error
-        assert_raise(TypeError) {
-            @lexer.fullscan()
-        }
-    end
-
-    def test_files
-        textfiles() { |file|
-            lexer = Puppet::Parser::Lexer.new()
-            lexer.file = file
-            assert_nothing_raised("Failed to lex %s" % file) {
-                lexer.fullscan()
-            }
-            Puppet::Type.allclear
-        }
-    end
-
-    def test_strings
-        names = %w{this is a bunch of names}
-        types = %w{Many Different Words A Word}
-        words = %w{differently Cased words A a}
-
-        names.each { |t|
-            @lexer.string = t
-            assert_equal(
-                [[:NAME,t],[false,false]],
-                @lexer.fullscan
-            )
-        }
-        types.each { |t|
-            @lexer.string = t
-            assert_equal(
-                [[:CLASSREF,t],[false,false]],
-                @lexer.fullscan
-            )
-        }
-    end
-
-    def test_emptystring
-        bit = '$var = ""'
-
-        assert_nothing_raised {
-            @lexer.string = bit
-        }
-
-        assert_nothing_raised {
-            @lexer.fullscan
-        }
-    end
-
-    def test_collectlexing
-        {"@" => :AT, "<|" => :LCOLLECT, "|>" => :RCOLLECT}.each do |string, token|
-            assert_nothing_raised {
-                @lexer.string = string
-            }
-
-            ret = nil
-            assert_nothing_raised {
-                ret = @lexer.fullscan
-            }
-
-            assert_equal([[token, string],[false, false]], ret)
-        end
-    end
-
-    def test_collectabletype
-        string = "@type {"
-
-        assert_nothing_raised {
-            @lexer.string = string
-        }
-
-        ret = nil
-        assert_nothing_raised {
-            ret = @lexer.fullscan
-        }
-
-        assert_equal([[:AT, "@"], [:NAME, "type"], [:LBRACE, "{"], [false,false]],ret)
-    end
-
-    def test_namespace
-        @lexer.string = %{class myclass}
-
-        assert_nothing_raised {
-            @lexer.fullscan
-        }
-
-        assert_equal("myclass", @lexer.namespace)
-
-        assert_nothing_raised do
-            @lexer.namepop
-        end
-
-        assert_equal("", @lexer.namespace)
-
-        @lexer.string = "class base { class sub { class more"
-
-        assert_nothing_raised {
-            @lexer.fullscan
-        }
-
-        assert_equal("base::sub::more", @lexer.namespace)
-
-        assert_nothing_raised do
-            @lexer.namepop
-        end
-
-        assert_equal("base::sub", @lexer.namespace)
-
-        # Now try it with some fq names
-        mklexer
-
-        @lexer.string = "class base { class sub::more {"
-
-        assert_nothing_raised {
-            @lexer.fullscan
-        }
-
-        assert_equal("base::sub::more", @lexer.namespace)
-
-        assert_nothing_raised do
-            @lexer.namepop
-        end
-
-        assert_equal("base", @lexer.namespace)
-    end
-
-    def test_indefine
-        @lexer.string = %{define me}
-
-        assert_nothing_raised {
-            @lexer.scan { |t,s| }
-        }
-
-        assert(@lexer.indefine?, "Lexer not considered in define")
-
-        # Now make sure we throw an error when trying to nest defines.
-        assert_raise(Puppet::ParseError) do
-            @lexer.string = %{define another}
-            @lexer.scan { |t,s| }
-        end
-
-        assert_nothing_raised do
-            @lexer.indefine = false
-        end
-
-        assert(! @lexer.indefine?, "Lexer still considered in define")
-    end
-
-    # Make sure the different qualified variables work.
-    def test_variable
-        ["$variable", "$::variable", "$qualified::variable", "$further::qualified::variable"].each do |string|
-            @lexer.string = string
-
-            assert_nothing_raised("Could not lex %s" % string) do
-                @lexer.scan do |t, s|
-                    assert_equal(:VARIABLE, t, "did not get variable as token")
-                    assert_equal(string.sub(/^\$/, ''), s, "did not get correct string back")
-                    break
-                end
-            end
-        end
-    end
-
-    # Make sure the expected stack works as it should
-    def test_expected
-        @lexer.string = "[a{"
-        expected = @lexer.instance_variable_get("@expected")
-        @lexer.scan {}
-        assert_equal("}", @lexer.expected, "expected value is wrong")
-
-        @lexer.string = "}"
-        @lexer.scan {}
-        assert_equal("]", @lexer.expected, "expected value is wrong after pop")
-    end
-
-    # #774
-    def test_classref_token
-        string = ["Foo", "::Foo","Foo::Bar","::Foo::Bar"]
-
-        string.each do |foo|
-            assert_nothing_raised {
-                @lexer.string = foo
-            }
-
-            ret = nil
-            assert_nothing_raised {
-                ret = @lexer.fullscan
-            }
-
-            assert_equal([:CLASSREF, foo],ret[0], "Did not correctly tokenize '%s'" % foo)
-        end
-    end
-end
-