# the scanner/lexer

require 'strscan'
require 'puppet'


module Puppet
    class LexError < RuntimeError; end
end

module Puppet::Parser; end

class Puppet::Parser::Lexer
    attr_reader :last, :file

    attr_accessor :line, :indefine

    # Our base token class.
    class Token
        attr_accessor :regex, :name, :string, :skip, :incr_line, :skip_text, :accumulate

        def initialize(regex, name)
            if regex.is_a?(String)
                @name, @string = name, regex
                @regex = Regexp.new(Regexp.escape(@string))
            else
                @name, @regex = name, regex
            end
        end

        %w{skip accumulate}.each do |method|
            define_method(method+"?") do
                self.send(method)
            end
        end

        def to_s
            if self.string
                @string
            else
                @name.to_s
            end
        end
    end

    # Maintain a list of tokens.
    class TokenList
        attr_reader :regex_tokens, :string_tokens

        def [](name)
            @tokens[name]
        end

        # Create a new token.
        def add_token(name, regex, options = {}, &block)
            token = Token.new(regex, name)
            raise(ArgumentError, "Token %s already exists" % name) if @tokens.include?(name)
            @tokens[token.name] = token
            if token.string
                @string_tokens << token
                @tokens_by_string[token.string] = token
            else
                @regex_tokens << token
            end

            options.each do |name, option|
                token.send(name.to_s + "=", option)
            end

            token.meta_def(:convert, &block) if block_given?

            token
        end

        def initialize
            @tokens = {}
            @regex_tokens = []
            @string_tokens = []
            @tokens_by_string = {}
        end

        # Look up a token by its value, rather than name.
        def lookup(string)
            @tokens_by_string[string]
        end

        # Define more tokens.
        def add_tokens(hash)
            hash.each do |regex, name|
                add_token(name, regex)
            end
        end

        # Sort our tokens by length, so we know once we match, we're done.
        # This helps us avoid the O(n^2) nature of token matching.
        def sort_tokens
            @string_tokens.sort! { |a, b| b.string.length <=> a.string.length }
        end
    end

    TOKENS = TokenList.new
    TOKENS.add_tokens(
        '[' => :LBRACK,
        ']' => :RBRACK,
        '{' => :LBRACE,
        '}' => :RBRACE,
        '(' => :LPAREN,
        ')' => :RPAREN,
        '=' => :EQUALS,
        '+=' => :APPENDS,
        '==' => :ISEQUAL,
        '>=' => :GREATEREQUAL,
        '>' => :GREATERTHAN,
        '<' => :LESSTHAN,
        '<=' => :LESSEQUAL,
        '!=' => :NOTEQUAL,
        '!' => :NOT,
        ',' => :COMMA,
        '.' => :DOT,
        ':' => :COLON,
        '@' => :AT,
        '<<|' => :LLCOLLECT,
        '|>>' => :RRCOLLECT,
        '<|' => :LCOLLECT,
        '|>' => :RCOLLECT,
        ';' => :SEMIC,
        '?' => :QMARK,
        '\\' => :BACKSLASH,
        '=>' => :FARROW,
        '+>' => :PARROW,
        '+' => :PLUS,
        '-' => :MINUS,
        '/' => :DIV,
        '*' => :TIMES,
        '<<' => :LSHIFT,
        '>>' => :RSHIFT,
        %r{([a-z][-\w]*)?(::[a-z][-\w]*)+} => :CLASSNAME, # Require '::' in the class name, else we'd compete with NAME
        %r{((::){0,1}[A-Z][-\w]*)+} => :CLASSREF
    )

    TOKENS.add_tokens "Whatever" => :DQTEXT, "Nomatter" => :SQTEXT, "alsonomatter" => :BOOLEAN

    TOKENS.add_token :NUMBER, %r{\b(?:0[xX][0-9A-Fa-f]+|0?\d+(?:\.\d+)?(?:[eE]-?\d+)?)\b} do |lexer, value|
        [TOKENS[:NAME], value]
    end

    TOKENS.add_token :NAME, %r{[a-z0-9][-\w]*} do |lexer, value|
        string_token = self
        # we're looking for keywords here
        if tmp = KEYWORDS.lookup(value)
            string_token = tmp
            if [:TRUE, :FALSE].include?(string_token.name)
                value = eval(value)
                string_token = TOKENS[:BOOLEAN]
            end
        end
        [string_token, value]
    end

    TOKENS.add_token :COMMENT, %r{#.*}, :accumulate => true, :skip => true do |lexer,value|
        value.sub!(/# ?/,'')
        [self, value]
    end

    TOKENS.add_token :MLCOMMENT, %r{/\*(.*?)\*/}m, :accumulate => true, :skip => true do |lexer, value|
        lexer.line += value.count("\n")
        value.sub!(/^\/\* ?/,'')
        value.sub!(/ ?\*\/$/,'')
        [self,value]
    end

    TOKENS.add_token :RETURN, "\n", :skip => true, :incr_line => true, :skip_text => true

    TOKENS.add_token :SQUOTE, "'" do |lexer, value|
        value = lexer.slurpstring(value)
        [TOKENS[:SQTEXT], value]
    end

    TOKENS.add_token :DQUOTE, '"' do |lexer, value|
        value = lexer.slurpstring(value)
        [TOKENS[:DQTEXT], value]
    end

    TOKENS.add_token :VARIABLE, %r{\$(\w*::)*\w+} do |lexer, value|
        value = value.sub(/^\$/, '')
        [self, value]
    end

    TOKENS.sort_tokens

    @@pairs = {
        "{" => "}",
        "(" => ")",
        "[" => "]",
        "<|" => "|>",
        "<<|" => "|>>"
    }

    KEYWORDS = TokenList.new

    KEYWORDS.add_tokens(
        "case" => :CASE,
        "class" => :CLASS,
        "default" => :DEFAULT,
        "define" => :DEFINE,
        "import" => :IMPORT,
        "if" => :IF,
        "elsif" => :ELSIF,
        "else" => :ELSE,
        "inherits" => :INHERITS,
        "node" => :NODE,
        "and"  => :AND,
        "or"   => :OR,
        "undef"   => :UNDEF,
        "false" => :FALSE,
        "true" => :TRUE
    )

    def clear
        initvars
    end

    def expected
        return nil if @expected.empty?
        name = @expected[-1]
        raise "Could not find expected token %s" % name unless token = TOKENS.lookup(name)

        return token
    end

    # scan the whole file
    # basically just used for testing
    def fullscan
        array = []

        self.scan { |token, str|
            # Ignore any definition nesting problems
            @indefine = false
            array.push([token,str])
        }
        return array
    end

    # this is probably pretty damned inefficient...
    # it'd be nice not to have to load the whole file first...
    def file=(file)
        @file = file
        @line = 1
        File.open(file) { |of|
            str = ""
            of.each { |line| str += line }
            @scanner = StringScanner.new(str)
        }
    end

    def find_string_token
        matched_token = value = nil

        # We know our longest string token is three chars, so try each size in turn
        # until we either match or run out of chars.  This way our worst-case is three
        # tries, where it is otherwise the number of string chars we have.  Also,
        # the lookups are optimized hash lookups, instead of regex scans.
        [3, 2, 1].each do |i|
            str = @scanner.peek(i)
            if matched_token = TOKENS.lookup(str)
                value = @scanner.scan(matched_token.regex)
                break
            end
        end

        return matched_token, value
    end

    # Find the next token that matches a regex.  We look for these first.
    def find_regex_token
        @regex += 1
        matched_token = nil
        value = ""
        length = 0

        # I tried optimizing based on the first char, but it had
        # a slightly negative affect and was a good bit more complicated.
        TOKENS.regex_tokens.each do |token|
            next unless match_length = @scanner.match?(token.regex) 
            
            # We've found a longer match
            if match_length > length
                value = @scanner.scan(token.regex) 
                length = value.length
                matched_token = token
            end
        end

        return matched_token, value
    end

    # Find the next token, returning the string and the token.
    def find_token
        @find += 1
        matched_token, value = find_regex_token

        unless matched_token
            matched_token, value = find_string_token
        end

        return matched_token, value
    end

    def indefine?
        if defined? @indefine
            @indefine
        else
            false
        end
    end

    def initialize
        @find = 0
        @regex = 0
        initvars()
    end

    def initvars
        @line = 1
        @previous_token = nil
        @scanner = nil
        @file = nil
        # AAARRGGGG! okay, regexes in ruby are bloody annoying
        # no one else has "\n" =~ /\s/
        @skip = %r{[ \t]+}

        @namestack = []
        @indefine = false
        @expected = []
        @commentstack = ['']
    end

    # Make any necessary changes to the token and/or value.
    def munge_token(token, value)
        @line += 1 if token.incr_line

        skip() if token.skip_text

        return if token.skip and not token.accumulate?

        token, value = token.convert(self, value) if token.respond_to?(:convert)

        return unless token

        if token.accumulate?
            @commentstack.last << value + "\n"
        end

        return if token.skip

        return token, value
    end

    # Go up one in the namespace.
    def namepop
        @namestack.pop
    end

    # Collect the current namespace.
    def namespace
        @namestack.join("::")
    end

    # This value might have :: in it, but we don't care -- it'll be
    # handled normally when joining, and when popping we want to pop
    # this full value, however long the namespace is.
    def namestack(value)
        @namestack << value
    end

    def rest
        @scanner.rest
    end

    # this is the heart of the lexer
    def scan
        #Puppet.debug("entering scan")
        raise Puppet::LexError.new("Invalid or empty string") unless @scanner

        # Skip any initial whitespace.
        skip()

        until @scanner.eos? do
            yielded = false
            matched_token, value = find_token

            # error out if we didn't match anything at all
            if matched_token.nil?
                nword = nil
                # Try to pull a 'word' out of the remaining string.
                if @scanner.rest =~ /^(\S+)/
                    nword = $1
                elsif @scanner.rest =~ /^(\s+)/
                    nword = $1
                else
                    nword = @scanner.rest
                end
                raise "Could not match '%s'" % nword
            end

            if matched_token.name == :RETURN
                # this matches a blank line
                if @last_return
                    # eat the previously accumulated comments
                    getcomment
                end
                # since :RETURN skips, we won't survive to munge_token
                @last_return = true
            else
                @last_return = false
            end

            final_token, value = munge_token(matched_token, value)

            next unless final_token

            if match = @@pairs[value] and final_token.name != :DQUOTE and final_token.name != :SQUOTE
                @expected << match
            elsif exp = @expected[-1] and exp == value and final_token.name != :DQUOTE and final_token.name != :SQUOTE
                @expected.pop
            end

            if final_token.name == :LBRACE
                commentpush
            end

            yield [final_token.name, value]

            if @previous_token
                namestack(value) if @previous_token.name == :CLASS

                if @previous_token.name == :DEFINE
                    if indefine?
                        msg = "Cannot nest definition %s inside %s" % [value, @indefine]
                        self.indefine = false
                        raise Puppet::ParseError, msg
                    end

                    @indefine = value
                end
            end
            @previous_token = final_token
            skip()
        end
        @scanner = nil

        # This indicates that we're done parsing.
        yield [false,false]
    end

    # Skip any skipchars in our remaining string.
    def skip
        @scanner.skip(@skip)
    end

    # we've encountered an opening quote...
    # slurp in the rest of the string and return it
    def slurpstring(quote)
        # we search for the next quote that isn't preceded by a
        # backslash; the caret is there to match empty strings
        str = @scanner.scan_until(/([^\\]|^)#{quote}/)
        if str.nil?
            raise Puppet::LexError.new("Unclosed quote after '%s' in '%s'" %
                [self.last,self.rest])
        else
            str.sub!(/#{quote}\Z/,"")
            str.gsub!(/\\#{quote}/,quote)
        end

        # Add to our line count for every carriage return in multi-line strings.
        @line += str.count("\n")

        return str
    end

    # just parse a string, not a whole file
    def string=(string)
        @scanner = StringScanner.new(string)
    end

    # returns the content of the currently accumulated content cache
    def commentpop
        return @commentstack.pop
    end

    def getcomment
        comment = @commentstack.pop
        @commentstack.push('')
        return comment
    end

    def commentpush
        @commentstack.push('')
    end
end