Add configuration parsing code

author: Casey Dahlin <cdahlin@redhat.com> 2009-01-19 14:58:22 -0500
committer: Casey Dahlin <cdahlin@redhat.com> 2009-01-19 14:58:22 -0500
commit: c18883426fe40456de3b14a56a52c7b554282d7b (patch)
tree: 08131cd6db4736560ff264734017d801f3d9f33b
parent: 6bd1ed26f737e514abbd889ea7d89981ccb0fd54 (diff)
download: upstate-c18883426fe40456de3b14a56a52c7b554282d7b.tar.gz
upstate-c18883426fe40456de3b14a56a52c7b554282d7b.tar.xz
upstate-c18883426fe40456de3b14a56a52c7b554282d7b.zip
1 files changed, 575 insertions, 0 deletions
diff --git a/confparse.py b/confparse.py
new file mode 100644
index 0000000..d141ae1
--- /dev/null
+++ b/confparse.py
@@ -0,0 +1,575 @@
+# -*- coding: utf-8 -*-
+class ParseError(Exception):
+    """
+    Raised when there is an error while parsing. Duh.
+    """
+    pass
+
+class HaltedError(Exception):
+    """
+    Raised when we try to add to a token list that we have already "closed"
+    (informed that we had reached the end of the token stream).
+    """
+    pass
+
+class Token:
+    """
+    A single token. This class is essentially abstract. We never instantiate a
+    generic token.
+    """
+
+    def __repr__(self):
+        """
+        The representation of a Token is, by default, the value of its
+        item variable.
+        """
+        return self.item
+
+    def isnewline(self):
+        """
+        Is this token a newline token?
+        """
+        return False
+
+    def iswhitespace(self):
+        """
+        Is this token a whitespace token?
+        """
+        return False
+
+    def isword(self, val=None):
+        """
+        Is this token a word token? Is the word the same as in the string `val`?
+        """
+        return False
+
+    def isoper(self, val=None):
+        """
+        Is this token an operator token? Is the word the same as in the string
+        `val`?
+        """
+        return False
+
+class WhitespaceToken(Token):
+    """
+    Whitespace tokens come from any number of unquoted space or tab characters.
+    """
+    def __init__(self):
+        self.item = "whitespace"
+
+    def iswhitespace(self):
+        """
+        Always True for this class.
+        """
+        return True
+
+class NewlineToken(Token):
+    """
+    Token yielded by a single newline character.
+    """
+    def __init__(self):
+        self.item = "newline"
+
+    def isnewline(self):
+        """
+        Always True for this class.
+        """
+        return True
+
+class OperToken(Token):
+    """
+    Token yielded by a single non-word (alphanumeric and _) character.
+    """
+    def __init__(self, item):
+        """
+        `item` specifies the operator character this token will represent.
+        """
+        self.item = item
+
+    def isoper(self, item = None):
+        """
+        Always True if item is None, otherwise true if `item` is the operator
+        character represented by this token.
+        """
+        if item == None:
+            return True
+        return self.item == item
+
+class WordToken(Token):
+    """
+    Represents a string of word characters (alphanumeric or _) or the contents
+    of a quoted string.
+    """
+    def __init__(self, word):
+        """
+        `word` is the text this token will represent.
+        """
+        self.word = word
+
+    def __repr__(self):
+        """
+        Our representation is a bit different, and consists of the word we
+        represent, quoted.
+        """
+        return "`%s`" % self.word
+
+    def isword(self, item = None):
+        """
+        Always True if item is None, otherwise true if `item` is the word
+        represented by this token.
+        """
+        if item == None:
+            return True
+        return self.word == item
+
+class TokenList:
+    """
+    This class contains a list of tokens. It is a list-like object, but is
+    read-only until it is `close`d. It is populated by passing characters to
+    the constructor or the `add` method. Calling `close` indicates all
+    characters have been passed and the TokenList should now behave as a
+    regular list.
+    """
+    class CloseSignal:
+        """
+        Passed to the current state instead of a character to indicate we're
+        done receiving characters.
+        """
+        pass
+
+    def __init__(self, data=None):
+        """
+        `data` is a set of initial characters which are passed to `add`
+        immediately after initialization.
+        """
+        self.tokens = []
+        self.state = self.s_expect_word
+        self.current_token = ""
+
+        self.__getitem__ = self.tokens.__getitem__
+        self.__len__ = self.tokens.__len__
+        if data != None:
+            self.add(data)
+
+    def __repr__(self):
+        """
+        Our representation is simply the representation of our processed
+        tokens joined by spaces. If we have an unfinalized token, it is
+        shown last in parenthesis.
+        """
+        ret = " ".join([ str(x) for x in self.tokens])
+        if len(self.current_token) > 0:
+            ret += "(%s)" % self.current_token
+        return ret
+
+    def add(self, data):
+        """
+        Add characters to the token list. State is persistent between adds such
+        that calling:
+            tl.add(str1)
+            tl.add(str2)
+        is the same as calling:
+            tl.add(str1 + str2)
+        in all cases
+        """
+        for char in data:
+            self.state(char)
+
+    def s_expect_word(self, char):
+        """
+        Handle the next character of input while we are expecting to see
+        a word.
+        """
+        if char.__class__ == TokenList.CloseSignal:
+            self.close_token()
+            self.state = self.s_accept
+        elif char == "\n":
+            self.close_token()
+            self.tokens.append(NewlineToken())
+        elif char.isspace():
+            self.close_token()
+            self.tokens.append(WhitespaceToken())
+            self.state = self.s_skip_space
+        elif char == '"':
+            self.close_token()
+            self.state = self.s_in_quote
+        elif char == "#":
+            self.close_token()
+            self.state = self.s_in_comment
+        elif char.isalnum() or char == "_":
+            self.current_token += char
+        else:
+            self.close_token()
+            self.tokens.append(OperToken(char))
+
+    def s_skip_space(self, char):
+        """
+        Handle the next character while we are skipping a block of whitespace.
+        """
+        if char.__class__ == TokenList.CloseSignal:
+            self.state = self.s_accept
+        elif not char.isspace():
+            self.state = self.s_expect_word
+            self.state(char)
+
+    def s_in_quote(self, char):
+        """
+        Handle the next character while we are in a quoted string.
+        """
+        if char.__class__ == TokenList.CloseSignal:
+            raise ParseError('`"` expected')
+        elif char == "\\":
+            self.state = self.s_always_quote
+        elif char == '"':
+            self.close_token()
+            self.state = self.s_expect_word
+        else:
+            self.current_token += char
+
+    def s_always_quote(self, char):
+        """
+        Handle the next character, which the previous character has escaped.
+        """
+        if char.__class__ == TokenList.CloseSignal:
+            raise ParseError('`"` expected')
+        else:
+            self.current_token += char
+            self.state = self.s_in_quote
+
+    def s_in_comment(self, char):
+        """
+        Handle the next character while we are in a comment.
+        """
+        if char.__class__ == TokenList.CloseSignal:
+            self.state = self.s_accept
+        elif char == "\n":
+            self.state = self.s_expect_word
+
+    def s_accept(self, char):
+        """
+        Handle the next character when there aren't supposed to be any more
+        characters (this function getting called is an error.
+        """
+        raise HaltedError("Data given to TokenList when no more expected")
+
+    def assert_full(self):
+        """
+        Raise an error if there are no tokens in the list.
+        """
+        if len(self.tokens) == 0:
+            raise ParseError("Expected Token")
+
+    def trim_leading_whitespace(self):
+        """
+        Delete any whitespace at the beginning of the list, and raise an error
+        if this empties the string.
+        """
+        self.assert_full()
+        while self[0].iswhitespace():
+            del self[0]
+        self.assert_full()
+
+    def close(self):
+        """
+        Consider the last added character to be the final character of the
+        input. Further calls to `add` will yield an error. Items in the list
+        may now be changed and deleted.
+        """
+        self.__setitem__ = self.tokens.__setitem__
+        self.__delitem__ = self.tokens.__delitem__
+        self.state(TokenList.CloseSignal())
+
+    def close_token(self):
+        """
+        Mark the token currently being processed as complete and add it to the
+        final list.
+        """
+        if len(self.current_token) > 0:
+            self.tokens.append(WordToken(self.current_token))
+        self.current_token = ""
+
+class ParseTreeNode:
+    """
+    A node in the final parse tree we generate of a given set of config data.
+    """
+    def __repr__(self):
+        """
+        We represent ourselves as an S-expression
+        """
+        return "%s (%s)" % (self.__class__,
+                ") (".join([str(x) for x in self.children()]))
+
+class Conf(ParseTreeNode):
+    """
+    A set of configuration items.
+    """
+    def __init__(self, string):
+        """
+        `string` is the text configuration which defines the new Conf object.
+        """
+        toks = TokenList(string)
+        toks.close()
+        self.propspecs = []
+        while len(toks) > 0:
+            self.propspecs.append(PropMatch(toks))
+
+    def children(self):
+        """
+        List of child nodes of this parse tree node
+        """
+        return self.propspecs
+
+    def __repr__(self):
+        """
+        Since we're ostensibly the top of the tree, we get an extra set of
+        parens.
+        """
+        return "(%s)" % ParseTreeNode.__repr__(self)
+
+class PropMatch(ParseTreeNode):
+    """
+    A property match block. These blocks specify, textually, a category, and
+    several properties which apply to it. The text form is:
+        name property
+    or
+        name() property
+    or
+        name(args)
+            property
+    or
+        name(args): property
+    etc.
+    """
+    def __init__(self, toks):
+        """
+        `toks` is a token lists. The tokens at the beginning thereof which
+        define a property match block will be removed.
+        """
+        self.properties = []
+        while True:
+            toks.trim_leading_whitespace()
+            if not toks[0].isnewline():
+                break
+            del toks[0]
+        self.pattern = CatSpec(toks)
+        if toks[0].isoper(":"):
+            del toks[0]
+        while len(toks) > 0:
+            if toks[0].iswhitespace():
+                del toks[0]
+            elif toks[0].isnewline():
+                del toks[0]
+                if len(toks) == 0 or not toks[0].iswhitespace():
+                    break
+            else:
+                self.properties.append(PropSpec(toks))
+        if len(self.properties) == 0:
+            raise ParseError("Property expected")
+
+    def children(self):
+        """
+        List of child nodes of this parse tree node
+        """
+        return [self.pattern] + self.properties
+
+class CatSpec(ParseTreeNode):
+    """
+    A category specification. Parsed from a textual specification of a category
+    of the form:
+        name
+    or
+        name()
+    or
+        name(args)
+    """
+    def __init__(self, toks):
+        """
+        `toks` is a token stream. The tokens at the beginning thereof which
+        define a CatSpec will be removed.
+        """
+        while len(toks) > 0 and toks[0].iswhitespace():
+            del toks[0]
+        toks.assert_full()
+        if not toks[0].isword():
+            raise ParseError("State name expected")
+        self.name = toks[0]
+        del toks[0]
+        self.args = ArgSpec(toks)
+
+    def children(self):
+        """
+        List of child nodes of this parse tree node
+        """
+        if len(self.args.items) == 0:
+            return [self.name]
+        return [self.name, self.args]
+
+class ArgSpec(ParseTreeNode):
+    """
+    Argument specification. Represents a textual argument list of the form:
+    (key: value, key2: value2, ...)
+    """
+    def __init__(self, toks):
+        """
+        `toks` is a token stream. The tokens at the beginning thereof which
+        define an ArgSpec will be removed. NOTE: if there is no ArgSpec at the
+        start of the stream, no error is yielded and the created ArgSpec is
+        empty (equivalent to "()")
+        """
+        self.items = []
+        self.is_specifier = True
+        toks.assert_full()
+        if not toks[0].isoper('('):
+            return
+        del toks[0]
+        expect_arg = True
+        while len(toks) > 0:
+            if toks[0].iswhitespace():
+                del toks[0]
+            elif toks[0].isoper(")"):
+                del toks[0]
+                return
+            elif expect_arg:
+                if toks[0].isoper("*"):
+                    self.is_specifier = False
+                    del toks[0]
+                else:
+                    self.items.append(KVPair(toks))
+                expect_arg = False
+            elif toks[0].isoper(","):
+                del toks[0]
+                expect_arg = True
+            else:
+                raise ParseError("Unexpected %s token" % toks[0])
+
+    def children(self):
+        """
+        List of child nodes of this parse tree node
+        """
+        if self.is_specifier:
+            return self.items
+        else:
+            return self.items + ["NOSPEC"]
+
+class KVPair(ParseTreeNode):
+    """
+    A key value pair, specified in the form:
+        key: value
+    """
+    def __init__(self, toks):
+        """
+        `toks` is a token stream. The tokens at the beginning thereof which
+        define a KVPair will be removed.
+        """
+        toks.trim_leading_whitespace()
+        if not toks[0].isword():
+            raise ParseError("Expected name before %s token" % toks[0])
+        self.name = toks[0]
+        del toks[0]
+        toks.assert_full()
+        if not toks[0].isoper(":"):
+            raise ParseError("Expected `:` before %s token" % toks[0])
+        del toks[0]
+        toks.trim_leading_whitespace()
+        if not toks[0].isword():
+            raise ParseError("Expected value before %s token" % toks[0])
+        self.value = toks[0]
+        del toks[0]
+
+    def children(self):
+        """
+        List of child nodes of this parse tree node
+        """
+        return [self.name, self.value]
+
+class PropSpec(ParseTreeNode):
+    """
+    A property specification, specified textually as one of:
+        on someevent
+        when somestate
+        auto
+        exec somecommand
+    """
+    def __init__(self, toks):
+        """
+        `toks` is a token stream. The tokens at the beginning thereof which
+        define a PropSpec will be removed.
+        """
+        toks.trim_leading_whitespace()
+        if toks[0].isword("when"):
+            self.spec = WhenSpec(toks)
+        elif toks[0].isword("on"):
+            self.spec = OnSpec(toks)
+        elif toks[0].isword("auto"):
+            toks[0:1] = [WordToken("on"), WhitespaceToken(), WordToken("ε")]
+            self.spec = OnSpec(toks)
+        elif toks[0].isword("exec"):
+            self.spec = ExecSpec(toks)
+        else:
+            raise ParseError("Expected keyword before %s token" % toks[0])
+
+    def children(self):
+        """
+        List of child nodes of this parse tree node
+        """
+        return [self.spec]
+
+class WhenSpec(ParseTreeNode):
+    """
+    A specification of a when relationship. Defined textually as:
+        when CatSpec
+    """
+    def __init__(self, toks):
+        if not toks[0].isword("when"):
+            raise Exception("Impossible condition in parser")
+        del toks[0]
+        toks.trim_leading_whitespace()
+        if not toks[0].isword():
+            raise Exception("Expected name before %s token" % toks[0])
+        self.waiting_for = CatSpec(toks)
+
+    def children(self):
+        """
+        List of child nodes of this parse tree node
+        """
+        return [self.waiting_for]
+
+class OnSpec(ParseTreeNode):
+    """
+    A specification of an event trigger. Defined textually as:
+        on CatSpec
+    """
+    def __init__(self, toks):
+        if not toks[0].isword("on"):
+            raise Exception("Impossible condition in parser")
+        del toks[0]
+        toks.trim_leading_whitespace()
+        if not toks[0].isword():
+            raise Exception("Expected name before %s token" % toks[0])
+        self.waiting_for = CatSpec(toks)
+
+    def children(self):
+        """
+        List of child nodes of this parse tree node
+        """
+        return [self.waiting_for]
+
+class ExecSpec(ParseTreeNode):
+    """
+    A specification of a service to run. Defined textually as:
+        exec script
+    """
+    def __init__(self, toks):
+        if not toks[0].isword("exec"):
+            raise Exception("Impossible condition in parser")
+        del toks[0]
+        self.script_toks = []
+        while not toks[0].isnewline():
+            self.script_toks.append(toks[0])
+            del toks[0]
+
+    def children(self):
+        """
+        List of child nodes of this parse tree node
+        """
+        return [self.script_toks]
author	Casey Dahlin <cdahlin@redhat.com>	2009-01-19 14:58:22 -0500
committer	Casey Dahlin <cdahlin@redhat.com>	2009-01-19 14:58:22 -0500
commit	c18883426fe40456de3b14a56a52c7b554282d7b (patch)
tree	08131cd6db4736560ff264734017d801f3d9f33b
parent	6bd1ed26f737e514abbd889ea7d89981ccb0fd54 (diff)
download	upstate-c18883426fe40456de3b14a56a52c7b554282d7b.tar.gz upstate-c18883426fe40456de3b14a56a52c7b554282d7b.tar.xz upstate-c18883426fe40456de3b14a56a52c7b554282d7b.zip