diff options
author | Casey Dahlin <cdahlin@redhat.com> | 2009-01-19 14:58:22 -0500 |
---|---|---|
committer | Casey Dahlin <cdahlin@redhat.com> | 2009-01-19 14:58:22 -0500 |
commit | c18883426fe40456de3b14a56a52c7b554282d7b (patch) | |
tree | 08131cd6db4736560ff264734017d801f3d9f33b | |
parent | 6bd1ed26f737e514abbd889ea7d89981ccb0fd54 (diff) | |
download | upstate-c18883426fe40456de3b14a56a52c7b554282d7b.tar.gz upstate-c18883426fe40456de3b14a56a52c7b554282d7b.tar.xz upstate-c18883426fe40456de3b14a56a52c7b554282d7b.zip |
Add configuration parsing code
-rw-r--r-- | confparse.py | 575 |
1 files changed, 575 insertions, 0 deletions
diff --git a/confparse.py b/confparse.py new file mode 100644 index 0000000..d141ae1 --- /dev/null +++ b/confparse.py @@ -0,0 +1,575 @@ +# -*- coding: utf-8 -*- +class ParseError(Exception): + """ + Raised when there is an error while parsing. Duh. + """ + pass + +class HaltedError(Exception): + """ + Raised when we try to add to a token list that we have already "closed" + (informed that we had reached the end of the token stream). + """ + pass + +class Token: + """ + A single token. This class is essentially abstract. We never instantiate a + generic token. + """ + + def __repr__(self): + """ + The representation of a Token is, by default, the value of its + item variable. + """ + return self.item + + def isnewline(self): + """ + Is this token a newline token? + """ + return False + + def iswhitespace(self): + """ + Is this token a whitespace token? + """ + return False + + def isword(self, val=None): + """ + Is this token a word token? Is the word the same as in the string `val`? + """ + return False + + def isoper(self, val=None): + """ + Is this token an operator token? Is the word the same as in the string + `val`? + """ + return False + +class WhitespaceToken(Token): + """ + Whitespace tokens come from any number of unquoted space or tab characters. + """ + def __init__(self): + self.item = "whitespace" + + def iswhitespace(self): + """ + Always True for this class. + """ + return True + +class NewlineToken(Token): + """ + Token yielded by a single newline character. + """ + def __init__(self): + self.item = "newline" + + def isnewline(self): + """ + Always True for this class. + """ + return True + +class OperToken(Token): + """ + Token yielded by a single non-word (alphanumeric and _) character. + """ + def __init__(self, item): + """ + `item` specifies the operator character this token will represent. + """ + self.item = item + + def isoper(self, item = None): + """ + Always True if item is None, otherwise true if `item` is the operator + character represented by this token. + """ + if item == None: + return True + return self.item == item + +class WordToken(Token): + """ + Represents a string of word characters (alphanumeric or _) or the contents + of a quoted string. + """ + def __init__(self, word): + """ + `word` is the text this token will represent. + """ + self.word = word + + def __repr__(self): + """ + Our representation is a bit different, and consists of the word we + represent, quoted. + """ + return "`%s`" % self.word + + def isword(self, item = None): + """ + Always True if item is None, otherwise true if `item` is the word + represented by this token. + """ + if item == None: + return True + return self.word == item + +class TokenList: + """ + This class contains a list of tokens. It is a list-like object, but is + read-only until it is `close`d. It is populated by passing characters to + the constructor or the `add` method. Calling `close` indicates all + characters have been passed and the TokenList should now behave as a + regular list. + """ + class CloseSignal: + """ + Passed to the current state instead of a character to indicate we're + done receiving characters. + """ + pass + + def __init__(self, data=None): + """ + `data` is a set of initial characters which are passed to `add` + immediately after initialization. + """ + self.tokens = [] + self.state = self.s_expect_word + self.current_token = "" + + self.__getitem__ = self.tokens.__getitem__ + self.__len__ = self.tokens.__len__ + if data != None: + self.add(data) + + def __repr__(self): + """ + Our representation is simply the representation of our processed + tokens joined by spaces. If we have an unfinalized token, it is + shown last in parenthesis. + """ + ret = " ".join([ str(x) for x in self.tokens]) + if len(self.current_token) > 0: + ret += "(%s)" % self.current_token + return ret + + def add(self, data): + """ + Add characters to the token list. State is persistent between adds such + that calling: + tl.add(str1) + tl.add(str2) + is the same as calling: + tl.add(str1 + str2) + in all cases + """ + for char in data: + self.state(char) + + def s_expect_word(self, char): + """ + Handle the next character of input while we are expecting to see + a word. + """ + if char.__class__ == TokenList.CloseSignal: + self.close_token() + self.state = self.s_accept + elif char == "\n": + self.close_token() + self.tokens.append(NewlineToken()) + elif char.isspace(): + self.close_token() + self.tokens.append(WhitespaceToken()) + self.state = self.s_skip_space + elif char == '"': + self.close_token() + self.state = self.s_in_quote + elif char == "#": + self.close_token() + self.state = self.s_in_comment + elif char.isalnum() or char == "_": + self.current_token += char + else: + self.close_token() + self.tokens.append(OperToken(char)) + + def s_skip_space(self, char): + """ + Handle the next character while we are skipping a block of whitespace. + """ + if char.__class__ == TokenList.CloseSignal: + self.state = self.s_accept + elif not char.isspace(): + self.state = self.s_expect_word + self.state(char) + + def s_in_quote(self, char): + """ + Handle the next character while we are in a quoted string. + """ + if char.__class__ == TokenList.CloseSignal: + raise ParseError('`"` expected') + elif char == "\\": + self.state = self.s_always_quote + elif char == '"': + self.close_token() + self.state = self.s_expect_word + else: + self.current_token += char + + def s_always_quote(self, char): + """ + Handle the next character, which the previous character has escaped. + """ + if char.__class__ == TokenList.CloseSignal: + raise ParseError('`"` expected') + else: + self.current_token += char + self.state = self.s_in_quote + + def s_in_comment(self, char): + """ + Handle the next character while we are in a comment. + """ + if char.__class__ == TokenList.CloseSignal: + self.state = self.s_accept + elif char == "\n": + self.state = self.s_expect_word + + def s_accept(self, char): + """ + Handle the next character when there aren't supposed to be any more + characters (this function getting called is an error. + """ + raise HaltedError("Data given to TokenList when no more expected") + + def assert_full(self): + """ + Raise an error if there are no tokens in the list. + """ + if len(self.tokens) == 0: + raise ParseError("Expected Token") + + def trim_leading_whitespace(self): + """ + Delete any whitespace at the beginning of the list, and raise an error + if this empties the string. + """ + self.assert_full() + while self[0].iswhitespace(): + del self[0] + self.assert_full() + + def close(self): + """ + Consider the last added character to be the final character of the + input. Further calls to `add` will yield an error. Items in the list + may now be changed and deleted. + """ + self.__setitem__ = self.tokens.__setitem__ + self.__delitem__ = self.tokens.__delitem__ + self.state(TokenList.CloseSignal()) + + def close_token(self): + """ + Mark the token currently being processed as complete and add it to the + final list. + """ + if len(self.current_token) > 0: + self.tokens.append(WordToken(self.current_token)) + self.current_token = "" + +class ParseTreeNode: + """ + A node in the final parse tree we generate of a given set of config data. + """ + def __repr__(self): + """ + We represent ourselves as an S-expression + """ + return "%s (%s)" % (self.__class__, + ") (".join([str(x) for x in self.children()])) + +class Conf(ParseTreeNode): + """ + A set of configuration items. + """ + def __init__(self, string): + """ + `string` is the text configuration which defines the new Conf object. + """ + toks = TokenList(string) + toks.close() + self.propspecs = [] + while len(toks) > 0: + self.propspecs.append(PropMatch(toks)) + + def children(self): + """ + List of child nodes of this parse tree node + """ + return self.propspecs + + def __repr__(self): + """ + Since we're ostensibly the top of the tree, we get an extra set of + parens. + """ + return "(%s)" % ParseTreeNode.__repr__(self) + +class PropMatch(ParseTreeNode): + """ + A property match block. These blocks specify, textually, a category, and + several properties which apply to it. The text form is: + name property + or + name() property + or + name(args) + property + or + name(args): property + etc. + """ + def __init__(self, toks): + """ + `toks` is a token lists. The tokens at the beginning thereof which + define a property match block will be removed. + """ + self.properties = [] + while True: + toks.trim_leading_whitespace() + if not toks[0].isnewline(): + break + del toks[0] + self.pattern = CatSpec(toks) + if toks[0].isoper(":"): + del toks[0] + while len(toks) > 0: + if toks[0].iswhitespace(): + del toks[0] + elif toks[0].isnewline(): + del toks[0] + if len(toks) == 0 or not toks[0].iswhitespace(): + break + else: + self.properties.append(PropSpec(toks)) + if len(self.properties) == 0: + raise ParseError("Property expected") + + def children(self): + """ + List of child nodes of this parse tree node + """ + return [self.pattern] + self.properties + +class CatSpec(ParseTreeNode): + """ + A category specification. Parsed from a textual specification of a category + of the form: + name + or + name() + or + name(args) + """ + def __init__(self, toks): + """ + `toks` is a token stream. The tokens at the beginning thereof which + define a CatSpec will be removed. + """ + while len(toks) > 0 and toks[0].iswhitespace(): + del toks[0] + toks.assert_full() + if not toks[0].isword(): + raise ParseError("State name expected") + self.name = toks[0] + del toks[0] + self.args = ArgSpec(toks) + + def children(self): + """ + List of child nodes of this parse tree node + """ + if len(self.args.items) == 0: + return [self.name] + return [self.name, self.args] + +class ArgSpec(ParseTreeNode): + """ + Argument specification. Represents a textual argument list of the form: + (key: value, key2: value2, ...) + """ + def __init__(self, toks): + """ + `toks` is a token stream. The tokens at the beginning thereof which + define an ArgSpec will be removed. NOTE: if there is no ArgSpec at the + start of the stream, no error is yielded and the created ArgSpec is + empty (equivalent to "()") + """ + self.items = [] + self.is_specifier = True + toks.assert_full() + if not toks[0].isoper('('): + return + del toks[0] + expect_arg = True + while len(toks) > 0: + if toks[0].iswhitespace(): + del toks[0] + elif toks[0].isoper(")"): + del toks[0] + return + elif expect_arg: + if toks[0].isoper("*"): + self.is_specifier = False + del toks[0] + else: + self.items.append(KVPair(toks)) + expect_arg = False + elif toks[0].isoper(","): + del toks[0] + expect_arg = True + else: + raise ParseError("Unexpected %s token" % toks[0]) + + def children(self): + """ + List of child nodes of this parse tree node + """ + if self.is_specifier: + return self.items + else: + return self.items + ["NOSPEC"] + +class KVPair(ParseTreeNode): + """ + A key value pair, specified in the form: + key: value + """ + def __init__(self, toks): + """ + `toks` is a token stream. The tokens at the beginning thereof which + define a KVPair will be removed. + """ + toks.trim_leading_whitespace() + if not toks[0].isword(): + raise ParseError("Expected name before %s token" % toks[0]) + self.name = toks[0] + del toks[0] + toks.assert_full() + if not toks[0].isoper(":"): + raise ParseError("Expected `:` before %s token" % toks[0]) + del toks[0] + toks.trim_leading_whitespace() + if not toks[0].isword(): + raise ParseError("Expected value before %s token" % toks[0]) + self.value = toks[0] + del toks[0] + + def children(self): + """ + List of child nodes of this parse tree node + """ + return [self.name, self.value] + +class PropSpec(ParseTreeNode): + """ + A property specification, specified textually as one of: + on someevent + when somestate + auto + exec somecommand + """ + def __init__(self, toks): + """ + `toks` is a token stream. The tokens at the beginning thereof which + define a PropSpec will be removed. + """ + toks.trim_leading_whitespace() + if toks[0].isword("when"): + self.spec = WhenSpec(toks) + elif toks[0].isword("on"): + self.spec = OnSpec(toks) + elif toks[0].isword("auto"): + toks[0:1] = [WordToken("on"), WhitespaceToken(), WordToken("ε")] + self.spec = OnSpec(toks) + elif toks[0].isword("exec"): + self.spec = ExecSpec(toks) + else: + raise ParseError("Expected keyword before %s token" % toks[0]) + + def children(self): + """ + List of child nodes of this parse tree node + """ + return [self.spec] + +class WhenSpec(ParseTreeNode): + """ + A specification of a when relationship. Defined textually as: + when CatSpec + """ + def __init__(self, toks): + if not toks[0].isword("when"): + raise Exception("Impossible condition in parser") + del toks[0] + toks.trim_leading_whitespace() + if not toks[0].isword(): + raise Exception("Expected name before %s token" % toks[0]) + self.waiting_for = CatSpec(toks) + + def children(self): + """ + List of child nodes of this parse tree node + """ + return [self.waiting_for] + +class OnSpec(ParseTreeNode): + """ + A specification of an event trigger. Defined textually as: + on CatSpec + """ + def __init__(self, toks): + if not toks[0].isword("on"): + raise Exception("Impossible condition in parser") + del toks[0] + toks.trim_leading_whitespace() + if not toks[0].isword(): + raise Exception("Expected name before %s token" % toks[0]) + self.waiting_for = CatSpec(toks) + + def children(self): + """ + List of child nodes of this parse tree node + """ + return [self.waiting_for] + +class ExecSpec(ParseTreeNode): + """ + A specification of a service to run. Defined textually as: + exec script + """ + def __init__(self, toks): + if not toks[0].isword("exec"): + raise Exception("Impossible condition in parser") + del toks[0] + self.script_toks = [] + while not toks[0].isnewline(): + self.script_toks.append(toks[0]) + del toks[0] + + def children(self): + """ + List of child nodes of this parse tree node + """ + return [self.script_toks] |