diff options
author | Jan Pokorný <jpokorny@redhat.com> | 2013-01-29 18:20:13 +0100 |
---|---|---|
committer | Jan Pokorný <jpokorny@redhat.com> | 2013-01-29 18:25:12 +0100 |
commit | dbebf0be9cb077864aac9d6d60783b53ee50a2a5 (patch) | |
tree | 98eaed1a6bd19c0c8f925fec9a0655a5c9466b4d /rnctree.py | |
parent | f781e6e0c5ce428b18485d7d05df91962c04c007 (diff) | |
download | rnc2rng-dbebf0be9cb077864aac9d6d60783b53ee50a2a5.tar.gz rnc2rng-dbebf0be9cb077864aac9d6d60783b53ee50a2a5.tar.xz rnc2rng-dbebf0be9cb077864aac9d6d60783b53ee50a2a5.zip |
Massive update so it works for corosync.rnc and several others
- continued style cleanup
- some files moved to "unused" dir
- added several files for testing and the ones like TODO and HACKING
Signed-off-by: Jan Pokorný <jpokorny@redhat.com>
Diffstat (limited to 'rnctree.py')
-rwxr-xr-x | rnctree.py | 483 |
1 files changed, 384 insertions, 99 deletions
@@ -1,49 +1,129 @@ #!/usr/bin/env python # Convert an RELAX NG compact syntax schema to a Node tree # This file released to the Public Domain by David Mertz -from __future__ import generators +# +# Extended under revised BSD license by Jan Pokorny (jpokorny@redhat.com) +# Copyright 2013 Red Hat, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the Red Hat, Inc. nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. + +# Differences when compared to trang output +# 1. comments placement +# 2. sometimes superfluous <group> +# 3. context-free dichotomy (diff conv08.rng.{expected,trang}) +# plenty of others (it's not the primary goal to achieve 1:1 trang match) + +# XXX: each AST node has its own subclass, knows how to XMLize itself, ...? + + import sys -from rnc_tokenize import token_list +from rnc_tokenize import tokens, pair_rules, keywords, token_list + +# ONE ... default cardinality of one +# DIRECT ... denotes that the usage of NAME is <name>, not <ref name=...> +quant_tokens_aux = tuple(''' + DIRECT + ONE + '''.split()) +# AST nodes not directly matching the tokens +parse_constructs = tuple(''' + ROOT + '''.split()) + tuple(r[2] for r in pair_rules) -class ParseError(SyntaxError): pass +for t in tokens + quant_tokens_aux + parse_constructs: + globals()[t] = t -for t in """ - ANY SOME MAYBE ONE BODY ANNOTATION ELEM ATTR GROUP LITERAL - NAME COMMENT TEXT EMPTY INTERLEAVE CHOICE SEQ ROOT - DEFAULT_NS NS DATATYPES DATATAG PATTERN START DEFINE - """.split(): globals()[t] = t +keyword_list = keywords.values() -PAIRS = {'BEG_BODY': ('END_BODY', BODY), - 'BEG_PAREN': ('END_PAREN', GROUP), - 'BEG_ANNO': ('END_ANNO', ANNOTATION)} +PAIRS = {r[0]: tuple(r[1:]) for r in pair_rules} -TAGS = {ONE: 'group', - SOME: 'oneOrMore', - MAYBE: 'optional', - ANY: 'zeroOrMore'} +TAGS = { + ONE: 'group', + SOME: 'oneOrMore', + MAYBE: 'optional', + ANY: 'zeroOrMore', + ELEM: 'element', + ATTR: 'attribute', + NAME: 'ref', +} + +URI_DATATYPES = "http://www.w3.org/2001/XMLSchema-datatypes" +URI_ANNOTATIONS = "http://relaxng.org/ns/compatibility/annotations/1.0" DEFAULT_NAMESPACE = None -DATATYPE_LIB = [0, '"http://www.w3.org/2001/XMLSchema-datatypes"'] +DATATYPE_LIB = [0, '"' + URI_DATATYPES + '"'] OTHER_NAMESPACE = {} CONTEXT_FREE = 0 -try: enumerate -except: enumerate = lambda seq: zip(range(len(seq)), seq) +# debugging +for i, n in enumerate(""" + D_NOTHING + D_TO_NODES + D_MATCH_PAIR + D_TYPE_BODIES + D_NEST_DEFINES + D_SCAN_NS +""".split()): + globals()[n] = i and 2 << (i - 1) or 0 +dlist = [] +#dlist.append(D_TO_NODES) +#dlist.append(D_MATCH_PAIR) +#dlist.append(D_TYPE_BODIES) +#dlist.append(D_NEST_DEFINES) +#dlist.append(D_SCAN_NS) +debug = reduce(lambda a, b: a | b, dlist, D_NOTHING) + + +def try_debug(what, nodes): + if debug & globals().get('D_' + what, D_NOTHING): + print what + for node in nodes: + print node.prettyprint() + + nodetypes = lambda nl: tuple(map(lambda n: n.type, nl)) toNodes = lambda toks: map(lambda t: Node(t.type, t.value), toks) +class ParseError(SyntaxError): + pass + + class Node(object): __slots__ = ('type', 'value', 'name', 'quant') - def __iter__(self): yield self + def __iter__(self): + yield self __len__ = lambda self: 1 - def __init__(self, type='', value=[], name=None, quant=ONE): - self.type = type - self.value = value - self.name = name + def __init__(self, type='', value=None, name=None, quant=ONE): + self.type = type + self.value = value if value is not None else [] + self.name = name self.quant = quant def format(self, indent=0): @@ -73,26 +153,58 @@ class Node(object): else: return self.add_ns(self.xmlnode()) + def collect_annot(self, x): + ret = {} + if isinstance(x.value, basestring): + return ret + + name, value = None, None + for node in x.value: + if node.type != NS_ANNOTATION: + break + for i, inner in enumerate(node.value): + if i % 3 == 0 and inner.type == NAME: + name = inner.value + elif i % 3 == 1 and inner.type == DEFINE: + name += ':' + inner.value + elif i % 3 == 2 and inner.type == LITERAL: + value = inner.value + if ret.setdefault(name, value) is not value: + assert 0, "redefinition of %s" % name + name, value = None, None + elif i % 3 == 0 and i > 0: + break + else: + assert 0, "NS_ANNOTATION body does not match" + return [n + '="' + v + '"' for n, v in ret.iteritems()] + def xmlnode(self, indent=0): out = [] write = out.append if self.type == ROOT: write('<?xml version="1.0" encoding="UTF-8"?>') - for x in self.value: + for i, x in enumerate(self.value): if not isinstance(x, Node): raise TypeError("Unhappy Node.value: " + repr(x)) - elif x.type == START: - startelem = '<start><ref name="%s"/></start>' % x.value - write(' ' * indent + startelem) + if x.type == START: + write(' ' * indent + '<start>') + if (x.name is not None): + write(' ' * (indent + 1) + '<ref name="%s"/>' % x.name) + else: + write(x.xmlnode(indent + 1)) + write(' ' * indent + '</start>') elif x.type == DEFINE: write(' ' * indent + '<define name="%s">' % x.name) write(x.xmlnode(indent + 1)) write(' ' * indent + '</define>') - elif x.type == NAME: - write(' ' * indent + '<ref name="%s"/>' % x.value) elif x.type == COMMENT: - write(' ' * indent + '<!-- %s -->' % x.value) + comments = x.value.split('\n') + if len(comments) == 1: + c = ' ' + comments[0] + ' ' + else: + c = ('\n' + ' ' * (indent + 1)).join([''] + comments + ['']) + write(' ' * indent + '<!--%s-->' % c) elif x.type == LITERAL: write(' ' * indent + '<value>%s</value>' % x.value) elif x.type == ANNOTATION: @@ -102,14 +214,14 @@ class Node(object): write(' ' * indent + '<interleave>') write(x.xmlnode(indent + 1)) write(' ' * indent + '</interleave>') - elif x.type == SEQ: - write(x.xmlnode(indent + 1)) elif x.type == CHOICE: write(' ' * indent + '<choice>') write(x.xmlnode(indent + 1)) write(' ' * indent + '</choice>') - elif x.type == GROUP: - write(x.xmlnode(indent)) + elif x.type in (GROUP, SEQ): + write(' ' * indent + '<group>') + write(x.xmlnode(indent + 1)) + write(' ' * indent + '</group>') elif x.type == TEXT: write(' ' * indent + '<text/>') elif x.type == EMPTY: @@ -123,24 +235,32 @@ class Node(object): p = '<param name="pattern">%s</param>' % x.value write(' ' * (indent + 1) + p) write(' ' * indent + '</data>') - elif x.type == ELEM: - if x.quant == ONE: - write(' ' * indent + '<element name="%s">' % x.name) - write(x.xmlnode(indent + 1)) - write(' ' * indent + '</element>') + elif x.type == INCLUDE: + write(' ' * indent + '<include href="%s"/>' % x.value) + elif x.type == NAME and x.quant == DIRECT: + assert x.type == NAME + write(' ' * indent + '<name>%s</name>' % x.value) + elif x.type in (ATTR, ELEM, NAME): + a = ('\n' + ' ' * (indent + 3)).join(self.collect_annot(x)) + name_n_annot = '%s' % (' ' + a).rstrip() + name = x.value if x.type == NAME else x.name + if name: + name_n_annot = ' name="%s"' % name + name_n_annot + + indent_inner = indent + if x.quant != ONE: + write(' ' * indent_inner + '<%s>' % TAGS[x.quant]) + indent_inner += 1 + tag, rest = TAGS[x.type], name_n_annot + if x.type == NAME or x.type == ATTR and x.value[0].type == TEXT: + write(' ' * indent_inner + '<%s%s/>' % (tag, rest)) else: - write(' ' * indent + '<%s>' % TAGS[x.quant]) - write(' ' * (indent + 1) + '<element name="%s">' % x.name) - write(x.xmlnode(indent + 2)) - write(' ' * (indent + 1) + '</element>') - write(' ' * indent + '</%s>' % TAGS[x.quant]) - elif x.type == ATTR: - if x.value[0].type == TEXT: - write(' ' * indent + '<attribute name="%s"/>' % x.name) - elif x.value[0].type == EMPTY: - write(' ' * indent + '<attribute name="%s">' % x.name) - write(' ' * (indent + 1) + '<empty/>') - write(' ' * indent + '</attribute>') + write(' ' * indent_inner + '<%s%s>' % (tag, rest)) + write(x.xmlnode(indent_inner + 1)) + write(' ' * indent_inner + '</%s>' % tag) + if x.quant != ONE: + indent_inner -= 1 + write(' ' * indent_inner + '</%s>' % TAGS[x.quant]) return '\n'.join(out) @@ -156,7 +276,11 @@ class Node(object): ltpos = line.find('<') if ltpos >= 0 and line[ltpos + 1] not in ('!', '?'): # We've got an element tag, not PI or comment - new = line[:line.find('>')] + tail = '>' + new = line[:line.find(tail)] + if new.endswith('/'): + new = new[:-1] + tail = '/' + tail new += ' xmlns="http://relaxng.org/ns/structure/1.0"' if DEFAULT_NAMESPACE is not None: new += '\n ns=%s' % DEFAULT_NAMESPACE @@ -164,7 +288,7 @@ class Node(object): new += '\n datatypeLibrary=%s' % DATATYPE_LIB[1] for ns, url in OTHER_NAMESPACE.items(): new += '\n xmlns:%s=%s' % (ns, url) - new += '>' + new += tail lines[i] = new break return '\n'.join(lines) @@ -188,21 +312,38 @@ def findmatch(beg, nodes, offset): level = 1 end = PAIRS[beg][0] for i, t in enumerate(nodes[offset:]): - if t.type == beg: level += 1 - elif t.type == end: level -= 1 + if t.type == beg: + level += 1 + elif t.type == end: + level -= 1 if level == 0: return i + offset raise EOFError("No closing token encountered for %s @ %d" - % (beg, offset)) + % (beg, offset)) + +# +# 1st pass in the pipe +# def match_pairs(nodes): + """<left paren., []> + <tokens> + <right paren., []> --> <ent., <tokens>> + + Other effects: + - merge comments/annotations + """ newnodes = [] i = 0 while 1: - if i >= len(nodes): break + if i >= len(nodes): + break node = nodes[i] if node.type in PAIRS.keys(): + # TOKEN, etc. -> NAME where suitable + # (keyword-like names do not need to be escaped in some cases) + if node.type == 'BEG_BODY' and newnodes[-1].type in keyword_list: + if newnodes[-2].type in (ELEM, ATTR): + newnodes[-1].type = NAME # Look for enclosing brackets match = findmatch(node.type, nodes, i + 1) matchtype = PAIRS[node.type][1] @@ -210,69 +351,139 @@ def match_pairs(nodes): node.value = match_pairs(node.value) newnodes.append(node) i = match + 1 + elif (node.type in (COMMENT, ANNOTATION) and i > 0 + and newnodes[-1].type == node.type): + # merge comments/annotations + newnodes[-1].value += "\n" + node.value + i += 1 else: newnodes.append(node) i += 1 - if i >= len(nodes): break + if i >= len(nodes): + break if nodes[i].type in (ANY, SOME, MAYBE): newnodes[-1].quant = nodes[i].type i += 1 + nodes[:] = newnodes return nodes +# +# 2nd pass in the pipe +# + def type_bodies(nodes): + """Another (main) de-linearization""" newnodes = [] i = 0 while 1: - if i >= len(nodes): break - if nodetypes(nodes[i:i + 3]) == (ELEM, NAME, BODY) or \ - nodetypes(nodes[i:i + 3]) == (ATTR, NAME, BODY): + if i >= len(nodes): + break + if (nodetypes(nodes[i:i + 3]) == (ELEM, NAME, BODY) + or nodetypes(nodes[i:i + 3]) == (ATTR, NAME, BODY)): name, body = nodes[i + 1].value, nodes[i + 2] value, quant = type_bodies(body.value), body.quant node = Node(nodes[i].type, value, name, quant) newnodes.append(node) + if not name: + assert False i += 3 + # "element a|b" cases + elif (nodetypes(nodes[i:i + 3]) == (ELEM, NAME, CHOICE) + or nodetypes(nodes[i:i + 3]) == (ATTR, NAME, CHOICE)): + # see nameClass (choice of nameClass+) + # XXX: very simplified + if nodes[i].type == ATTR: + assert False + node_type = nodes[i].type + value = [nodes[i + 1]] + i += 2 + while nodetypes(nodes[i:i + 2]) == (CHOICE, NAME): + value.extend(type_bodies(nodes[i:i + 2])) + i += 2 + # re-mark quant as we do not want "ref" output here + for v in value: + if v.type == NAME: + v.quant = DIRECT + assert len(nodes) >= i and nodes[i].type == BODY + value.extend(type_bodies(nodes[i].value)) + node = Node(node_type, value, None, nodes[i].quant) + i += 1 + newnodes.append(node) elif nodetypes(nodes[i:i + 2]) == (DATATAG, PATTERN): node = Node(DATATAG, nodes[i + 1].value, nodes[i].value) newnodes.append(node) i += 2 - elif nodes[i] == DEFINE: - print nodes[i:] else: - if nodes[i].type == GROUP: # Recurse into groups - value = type_bodies(nodes[i].value) - nodes[i] = Node(GROUP, value, None, nodes[i].quant) - newnodes.append(nodes[i]) + n = nodes[i] + if n.type == GROUP: # Recurse into groups + value = type_bodies(n.value) + if len(value) > 1 and n.type: + n = Node(GROUP, value, None, n.quant) + newnodes.append(n) i += 1 nodes[:] = newnodes return nodes -def nest_defines(nodes): - "Attach groups to named patterns" +# +# 3rd pass in the pipe +# + +def _nest_annotations(nodes, mapping, delim=None): + """Helper to move comments/annotations down into attributes/elements + + Uses non-tail recursion to proceed the tree bottom-up as + otherwise there would be confusion if the annotations are + newly added (and thus should be kept) or the original ones + to be moved. + + Mapping is partially defined + token-type |-> accumulator-list for token-type + for token-types covering annotations (ANNOTATION, NS_ANNOTATION) + and is used to pass unconsumed annotations down the tree. + + Returns triplet: number of consumed nodes, filtered nodes, mapping. + + Note that mapping should contain empty lists only when the recursion + returns back to the initiator (XXX: little bit of sanity checking, + we cannot speak about proper validation here). + """ + # XXX: unclean, yes newnodes = [] - i = 0 - while 1: - if i >= len(nodes): break - node = nodes[i] - newnodes.append(node) - if node.type == DEFINE: - group = [] - while (i + 1) < len(nodes) and nodes[i + 1].type != DEFINE: - group.append(nodes[i + 1]) - i += 1 - node.name = node.value - node.value = Node(GROUP, group) - i += 1 + for i, n in enumerate(nodes): + if delim and n.type == delim: + break + + if not isinstance(n.value, str): # no recurse to terminal str + if n.type in (ELEM, ATTR): + mapping_rec = {n: [] for n in + (ANNOTATION, NS_ANNOTATION, COMMENT)} + else: + mapping_rec = mapping + _nest_annotations(n.value, mapping_rec) + + if n.type in (ELEM, ATTR): # annot. consumer (guarded in recursion) + n.value = (mapping['NS_ANNOTATION'] + mapping['ANNOTATION'] + + mapping['COMMENT'] + n.value) + mapping['NS_ANNOTATION'][:], mapping['ANNOTATION'][:] = [], [] + mapping['COMMENT'][:] = [] + elif i == len(nodes) - 1 and n.type == COMMENT and not delim: + # comment at the end of the nodelist, but only if not top-level + newnodes.append(n) + continue + + mapping.get(n.type, newnodes).append(n) + nodes[:] = newnodes - return nodes + return i, nodes, mapping -def intersperse(nodes): - "Look for interleaved, choice, or sequential nodes in groups/bodies" +def _intersperse(nodes): + """Look for interleaved, choice, or sequential nodes in groups/bodies""" for node in nodes: - if node.type in (ELEM, ATTR, GROUP, LITERAL): + if node.type in (ELEM, ATTR, GROUP, LITERAL): # XXX: literal? val = node.value ntypes = [n.type for n in val if not isinstance(val, str)] inters = [t for t in ntypes if t in (INTERLEAVE, CHOICE, SEQ)] @@ -281,43 +492,117 @@ def intersperse(nodes): raise ParseError("Ambiguity in sequencing: %s" % node) if len(inters) > 0: intertype = inters.keys()[0] - items = [] + outer_items, last_ntype, internode = [], None, None + simplify = node.type == GROUP for pat in node.value: - if pat.type != intertype: - items.append(pat) - node.value = Node(intertype, items) + if pat.type == intertype: + if internode is None: + internode = Node(intertype, [outer_items.pop()]) + outer_items.append(internode) + # otherwise drop it + elif last_ntype == intertype: + internode.value.append(pat) + else: + outer_items.append(pat) + if pat.type in (COMMENT, ANNOTATION): + # these are not interesting wrt. last type + continue + elif pat.quant not in (ONE, MAYBE): + simplify = False + last_ntype = pat.type + + if (simplify and len(outer_items) == 1 + and outer_items[0] is internode): + node.type, node.value = internode.type, internode.value + else: + node.value = outer_items if not isinstance(node.value, str): # No recurse to terminal str - intersperse(node.value) + _intersperse(node.value) + return nodes + + +def nest_defines(nodes): + """Attach groups to named patterns + + Other effects: + - annotations are properly nested + - comments are nested + """ + newnodes = [] + i = 0 + group, annotations, ns_annotations, comments = [], [], [], [] + mapping = dict(ANNOTATION=annotations, NS_ANNOTATION=ns_annotations, + COMMENT=comments) + while i < len(nodes): + node = nodes[i] + newnodes.append(node) + group[:], annotations[:], ns_annotations[:], comments[:] = [], [], [], [] + if node.type == DEFINE: + j, group[:], mapping = _nest_annotations(nodes[i + 1:], mapping, DEFINE) + i += j + node.name = node.value + grp = _intersperse([Node(GROUP, group[:])])[0] + if len(grp.value) > 1 and grp.type != SEQ: + node.value = [grp] + else: + node.value = grp.value[:] + # when _nest_annotations returned *not* due to reaching DEFINE, + # but trailing comments are tolerated + if i + 1 > len(nodes) or nodes[i + 1].type not in (DEFINE, COMMENT): + break + elif node.type == ELEM: + # top-level element + _intersperse(Node(GROUP, [node])) + i += 1 + nodes[:] = newnodes return nodes +# +# 4th pass in the pipe +# + def scan_NS(nodes): - "Look for any namespace configuration lines" + """Look for any namespace configuration lines + + Other effects: + - DEFINE(start) --> START + """ global DEFAULT_NAMESPACE, OTHER_NAMESPACE, CONTEXT_FREE for node in nodes: if node.type == DEFAULT_NS: DEFAULT_NAMESPACE = node.value elif node.type == NS: - ns, url = map(str.strip, node.value.split('=')) + ns, url = map(str.strip, node.value.split('=', 1)) OTHER_NAMESPACE[ns] = url elif node.type == ANNOTATION and 'a' not in OTHER_NAMESPACE: - OTHER_NAMESPACE['a'] =\ - '"http://relaxng.org/ns/compatibility/annotations/1.0"' + OTHER_NAMESPACE['a'] = '"' + URI_ANNOTATIONS + '"' elif node.type == DATATYPES: DATATYPE_LIB[:] = [1, node.value] - elif node.type == START: + elif not CONTEXT_FREE and node.type == DEFINE and node.name == 'start': CONTEXT_FREE = 1 + node.type = START + node.name = None def make_nodetree(tokens): + """Wraps the pipe of conversion passes""" nodes = toNodes(tokens) + try_debug('TO_NODES', nodes) + match_pairs(nodes) + try_debug('MATCH_PAIR', nodes) + type_bodies(nodes) + try_debug('TYPE_BODIES', nodes) + nest_defines(nodes) - intersperse(nodes) + try_debug('NEST_DEFINES', nodes) + scan_NS(nodes) - root = Node(ROOT, nodes) - return root + try_debug('SCAN_NS', nodes) + + return Node(ROOT, nodes) if __name__ == '__main__': |