Massive update so it works for corosync.rnc and several others

- continued style cleanup - some files moved to "unused" dir - added several files for testing and the ones like TODO and HACKING Signed-off-by: Jan Pokorný <jpokorny@redhat.com>
author: Jan Pokorný <jpokorny@redhat.com> 2013-01-29 18:20:13 +0100
committer: Jan Pokorný <jpokorny@redhat.com> 2013-01-29 18:25:12 +0100
commit: dbebf0be9cb077864aac9d6d60783b53ee50a2a5 (patch)
tree: 98eaed1a6bd19c0c8f925fec9a0655a5c9466b4d /rnctree.py
parent: f781e6e0c5ce428b18485d7d05df91962c04c007 (diff)
download: rnc2rng-dbebf0be9cb077864aac9d6d60783b53ee50a2a5.tar.gz
rnc2rng-dbebf0be9cb077864aac9d6d60783b53ee50a2a5.tar.xz
rnc2rng-dbebf0be9cb077864aac9d6d60783b53ee50a2a5.zip
1 files changed, 384 insertions, 99 deletions
diff --git a/rnctree.py b/rnctree.py
index ebb9433..eb2d714 100755
--- a/rnctree.py
+++ b/rnctree.py
@@ -1,49 +1,129 @@
 #!/usr/bin/env python
 # Convert an RELAX NG compact syntax schema to a Node tree
 # This file released to the Public Domain by David Mertz
-from __future__ import generators
+#
+# Extended under revised BSD license by Jan Pokorny (jpokorny@redhat.com)
+# Copyright 2013 Red Hat, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# - Neither the name of the Red Hat, Inc. nor the names of its
+#   contributors may be used to endorse or promote products derived from this
+#   software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+
+# Differences when compared to trang output
+#  1. comments placement
+#  2. sometimes superfluous <group>
+#  3. context-free dichotomy (diff conv08.rng.{expected,trang})
+#  plenty of others (it's not the primary goal to achieve 1:1 trang match)
+
+# XXX: each AST node has its own subclass, knows how to XMLize itself, ...?
+
+
 import sys
-from rnc_tokenize import token_list
+from rnc_tokenize import tokens, pair_rules, keywords, token_list
+
+# ONE    ... default cardinality of one
+# DIRECT ... denotes that the usage of NAME is <name>, not <ref name=...>
+quant_tokens_aux = tuple('''
+  DIRECT
+  ONE
+  '''.split())
 
+# AST nodes not directly matching the tokens
+parse_constructs = tuple('''
+  ROOT
+  '''.split()) + tuple(r[2] for r in pair_rules)
 
-class ParseError(SyntaxError): pass
+for t in tokens + quant_tokens_aux + parse_constructs:
+    globals()[t] = t
 
-for t in """
-  ANY SOME MAYBE ONE BODY ANNOTATION ELEM ATTR GROUP LITERAL
-  NAME COMMENT TEXT EMPTY INTERLEAVE CHOICE SEQ ROOT
-  DEFAULT_NS NS DATATYPES DATATAG PATTERN START DEFINE
-  """.split(): globals()[t] = t
+keyword_list = keywords.values()
 
-PAIRS = {'BEG_BODY':  ('END_BODY',  BODY),
-         'BEG_PAREN': ('END_PAREN', GROUP),
-         'BEG_ANNO':  ('END_ANNO',  ANNOTATION)}
+PAIRS = {r[0]: tuple(r[1:]) for r in pair_rules}
 
-TAGS = {ONE:   'group',
-        SOME:  'oneOrMore',
-        MAYBE: 'optional',
-        ANY:   'zeroOrMore'}
+TAGS = {
+    ONE:   'group',
+    SOME:  'oneOrMore',
+    MAYBE: 'optional',
+    ANY:   'zeroOrMore',
+    ELEM:  'element',
+    ATTR:  'attribute',
+    NAME:  'ref',
+}
+
+URI_DATATYPES = "http://www.w3.org/2001/XMLSchema-datatypes"
+URI_ANNOTATIONS = "http://relaxng.org/ns/compatibility/annotations/1.0"
 
 DEFAULT_NAMESPACE = None
-DATATYPE_LIB = [0, '"http://www.w3.org/2001/XMLSchema-datatypes"']
+DATATYPE_LIB = [0, '"' + URI_DATATYPES + '"']
 OTHER_NAMESPACE = {}
 CONTEXT_FREE = 0
 
-try: enumerate
-except: enumerate = lambda seq: zip(range(len(seq)), seq)
+# debugging
+for i, n in enumerate("""
+    D_NOTHING
+    D_TO_NODES
+    D_MATCH_PAIR
+    D_TYPE_BODIES
+    D_NEST_DEFINES
+    D_SCAN_NS
+""".split()):
+    globals()[n] = i and 2 << (i - 1) or 0
+dlist = []
+#dlist.append(D_TO_NODES)
+#dlist.append(D_MATCH_PAIR)
+#dlist.append(D_TYPE_BODIES)
+#dlist.append(D_NEST_DEFINES)
+#dlist.append(D_SCAN_NS)
+debug = reduce(lambda a, b: a | b, dlist, D_NOTHING)
+
+
+def try_debug(what, nodes):
+    if debug & globals().get('D_' + what, D_NOTHING):
+        print what
+        for node in nodes:
+            print node.prettyprint()
+
+
 nodetypes = lambda nl: tuple(map(lambda n: n.type, nl))
 toNodes = lambda toks: map(lambda t: Node(t.type, t.value), toks)
 
 
+class ParseError(SyntaxError):
+    pass
+
+
 class Node(object):
     __slots__ = ('type', 'value', 'name', 'quant')
 
-    def __iter__(self): yield self
+    def __iter__(self):
+        yield self
     __len__ = lambda self: 1
 
-    def __init__(self, type='', value=[], name=None, quant=ONE):
-        self.type  = type
-        self.value = value
-        self.name  = name
+    def __init__(self, type='', value=None, name=None, quant=ONE):
+        self.type = type
+        self.value = value if value is not None else []
+        self.name = name
         self.quant = quant
 
     def format(self, indent=0):
@@ -73,26 +153,58 @@ class Node(object):
         else:
             return self.add_ns(self.xmlnode())
 
+    def collect_annot(self, x):
+        ret = {}
+        if isinstance(x.value, basestring):
+            return ret
+
+        name, value = None, None
+        for node in x.value:
+            if node.type != NS_ANNOTATION:
+                break
+            for i, inner in enumerate(node.value):
+                if i % 3 == 0 and inner.type == NAME:
+                    name = inner.value
+                elif i % 3 == 1 and inner.type == DEFINE:
+                    name += ':' + inner.value
+                elif i % 3 == 2 and inner.type == LITERAL:
+                    value = inner.value
+                    if ret.setdefault(name, value) is not value:
+                        assert 0, "redefinition of %s" % name
+                    name, value = None, None
+                elif i % 3 == 0 and i > 0:
+                    break
+                else:
+                    assert 0, "NS_ANNOTATION body does not match"
+        return [n + '="' + v + '"' for n, v in ret.iteritems()]
+
     def xmlnode(self, indent=0):
         out = []
         write = out.append
         if self.type == ROOT:
             write('<?xml version="1.0" encoding="UTF-8"?>')
 
-        for x in self.value:
+        for i, x in enumerate(self.value):
             if not isinstance(x, Node):
                 raise TypeError("Unhappy Node.value: " + repr(x))
-            elif x.type == START:
-                startelem = '<start><ref name="%s"/></start>' % x.value
-                write('  ' * indent + startelem)
+            if x.type == START:
+                write('  ' * indent + '<start>')
+                if (x.name is not None):
+                    write('  ' * (indent + 1) + '<ref name="%s"/>' % x.name)
+                else:
+                    write(x.xmlnode(indent + 1))
+                write('  ' * indent + '</start>')
             elif x.type == DEFINE:
                 write('  ' * indent + '<define name="%s">' % x.name)
                 write(x.xmlnode(indent + 1))
                 write('  ' * indent + '</define>')
-            elif x.type == NAME:
-                write('  ' * indent + '<ref name="%s"/>' % x.value)
             elif x.type == COMMENT:
-                write('  ' * indent + '<!-- %s -->' % x.value)
+                comments = x.value.split('\n')
+                if len(comments) == 1:
+                    c = ' ' + comments[0] + ' '
+                else:
+                    c = ('\n' + '  ' * (indent + 1)).join([''] + comments + [''])
+                write('  ' * indent + '<!--%s-->' % c)
             elif x.type == LITERAL:
                 write('  ' * indent + '<value>%s</value>' % x.value)
             elif x.type == ANNOTATION:
@@ -102,14 +214,14 @@ class Node(object):
                 write('  ' * indent + '<interleave>')
                 write(x.xmlnode(indent + 1))
                 write('  ' * indent + '</interleave>')
-            elif x.type == SEQ:
-                write(x.xmlnode(indent + 1))
             elif x.type == CHOICE:
                 write('  ' * indent + '<choice>')
                 write(x.xmlnode(indent + 1))
                 write('  ' * indent + '</choice>')
-            elif x.type == GROUP:
-                write(x.xmlnode(indent))
+            elif x.type in (GROUP, SEQ):
+                write('  ' * indent + '<group>')
+                write(x.xmlnode(indent + 1))
+                write('  ' * indent + '</group>')
             elif x.type == TEXT:
                 write('  ' * indent + '<text/>')
             elif x.type == EMPTY:
@@ -123,24 +235,32 @@ class Node(object):
                     p = '<param name="pattern">%s</param>' % x.value
                     write('  ' * (indent + 1) + p)
                     write('  ' * indent + '</data>')
-            elif x.type == ELEM:
-                if x.quant == ONE:
-                    write('  ' * indent + '<element name="%s">' % x.name)
-                    write(x.xmlnode(indent + 1))
-                    write('  ' * indent + '</element>')
+            elif x.type == INCLUDE:
+                write('  ' * indent + '<include href="%s"/>' % x.value)
+            elif x.type == NAME and x.quant == DIRECT:
+                    assert x.type == NAME
+                    write('  ' * indent + '<name>%s</name>' % x.value)
+            elif x.type in (ATTR, ELEM, NAME):
+                a = ('\n' + '  ' * (indent + 3)).join(self.collect_annot(x))
+                name_n_annot = '%s' % (' ' + a).rstrip()
+                name = x.value if x.type == NAME else x.name
+                if name:
+                    name_n_annot = ' name="%s"' % name + name_n_annot
+
+                indent_inner = indent
+                if x.quant != ONE:
+                    write('  ' * indent_inner + '<%s>' % TAGS[x.quant])
+                    indent_inner += 1
+                tag, rest = TAGS[x.type], name_n_annot
+                if x.type == NAME or x.type == ATTR and x.value[0].type == TEXT:
+                    write('  ' * indent_inner + '<%s%s/>' % (tag, rest))
                 else:
-                    write('  ' * indent + '<%s>' % TAGS[x.quant])
-                    write('  ' * (indent + 1) + '<element name="%s">' % x.name)
-                    write(x.xmlnode(indent + 2))
-                    write('  ' * (indent + 1) + '</element>')
-                    write('  ' * indent + '</%s>' % TAGS[x.quant])
-            elif x.type == ATTR:
-                if x.value[0].type == TEXT:
-                    write('  ' * indent + '<attribute name="%s"/>' % x.name)
-                elif x.value[0].type == EMPTY:
-                    write('  ' * indent + '<attribute name="%s">' % x.name)
-                    write('  ' * (indent + 1) + '<empty/>')
-                    write('  ' * indent + '</attribute>')
+                    write('  ' * indent_inner + '<%s%s>' % (tag, rest))
+                    write(x.xmlnode(indent_inner + 1))
+                    write('  ' * indent_inner + '</%s>' % tag)
+                if x.quant != ONE:
+                    indent_inner -= 1
+                    write('  ' * indent_inner + '</%s>' % TAGS[x.quant])
 
         return '\n'.join(out)
 
@@ -156,7 +276,11 @@ class Node(object):
             ltpos = line.find('<')
             if ltpos >= 0 and line[ltpos + 1] not in ('!', '?'):
                 # We've got an element tag, not PI or comment
-                new = line[:line.find('>')]
+                tail = '>'
+                new = line[:line.find(tail)]
+                if new.endswith('/'):
+                    new = new[:-1]
+                    tail = '/' + tail
                 new += ' xmlns="http://relaxng.org/ns/structure/1.0"'
                 if DEFAULT_NAMESPACE is not None:
                     new += '\n    ns=%s' % DEFAULT_NAMESPACE
@@ -164,7 +288,7 @@ class Node(object):
                     new += '\n    datatypeLibrary=%s' % DATATYPE_LIB[1]
                 for ns, url in OTHER_NAMESPACE.items():
                     new += '\n    xmlns:%s=%s' % (ns, url)
-                new += '>'
+                new += tail
                 lines[i] = new
                 break
         return '\n'.join(lines)
@@ -188,21 +312,38 @@ def findmatch(beg, nodes, offset):
     level = 1
     end = PAIRS[beg][0]
     for i, t in enumerate(nodes[offset:]):
-        if t.type == beg:   level += 1
-        elif t.type == end: level -= 1
+        if t.type == beg:
+            level += 1
+        elif t.type == end:
+            level -= 1
         if level == 0:
             return i + offset
     raise EOFError("No closing token encountered for %s @ %d"
-                    % (beg, offset))
+                   % (beg, offset))
+
 
+#
+# 1st pass in the pipe
+#
 
 def match_pairs(nodes):
+    """<left paren., []> + <tokens> + <right paren., []>  -->  <ent., <tokens>>
+
+    Other effects:
+        - merge comments/annotations
+    """
     newnodes = []
     i = 0
     while 1:
-        if i >= len(nodes): break
+        if i >= len(nodes):
+            break
         node = nodes[i]
         if node.type in PAIRS.keys():
+            # TOKEN, etc. -> NAME where suitable
+            # (keyword-like names do not need to be escaped in some cases)
+            if node.type == 'BEG_BODY' and newnodes[-1].type in keyword_list:
+                if newnodes[-2].type in (ELEM, ATTR):
+                    newnodes[-1].type = NAME
             # Look for enclosing brackets
             match = findmatch(node.type, nodes, i + 1)
             matchtype = PAIRS[node.type][1]
@@ -210,69 +351,139 @@ def match_pairs(nodes):
             node.value = match_pairs(node.value)
             newnodes.append(node)
             i = match + 1
+        elif (node.type in (COMMENT, ANNOTATION) and i > 0
+          and newnodes[-1].type == node.type):
+            # merge comments/annotations
+            newnodes[-1].value += "\n" + node.value
+            i += 1
         else:
             newnodes.append(node)
             i += 1
-        if i >= len(nodes): break
+        if i >= len(nodes):
+            break
         if nodes[i].type in (ANY, SOME, MAYBE):
             newnodes[-1].quant = nodes[i].type
             i += 1
+
     nodes[:] = newnodes
     return nodes
 
 
+#
+# 2nd pass in the pipe
+#
+
 def type_bodies(nodes):
+    """Another (main) de-linearization"""
     newnodes = []
     i = 0
     while 1:
-        if i >= len(nodes): break
-        if nodetypes(nodes[i:i + 3]) == (ELEM, NAME, BODY) or \
-           nodetypes(nodes[i:i + 3]) == (ATTR, NAME, BODY):
+        if i >= len(nodes):
+            break
+        if (nodetypes(nodes[i:i + 3]) == (ELEM, NAME, BODY)
+          or nodetypes(nodes[i:i + 3]) == (ATTR, NAME, BODY)):
             name, body = nodes[i + 1].value, nodes[i + 2]
             value, quant = type_bodies(body.value), body.quant
             node = Node(nodes[i].type, value, name, quant)
             newnodes.append(node)
+            if not name:
+                assert False
             i += 3
+        # "element a|b" cases
+        elif (nodetypes(nodes[i:i + 3]) == (ELEM, NAME, CHOICE)
+          or nodetypes(nodes[i:i + 3]) == (ATTR, NAME, CHOICE)):
+            # see nameClass (choice of nameClass+)
+            # XXX: very simplified
+            if nodes[i].type == ATTR:
+                assert False
+            node_type = nodes[i].type
+            value = [nodes[i + 1]]
+            i += 2
+            while nodetypes(nodes[i:i + 2]) == (CHOICE, NAME):
+                value.extend(type_bodies(nodes[i:i + 2]))
+                i += 2
+            # re-mark quant as we do not want "ref" output here
+            for v in value:
+                if v.type == NAME:
+                    v.quant = DIRECT
+            assert len(nodes) >= i and nodes[i].type == BODY
+            value.extend(type_bodies(nodes[i].value))
+            node = Node(node_type, value, None, nodes[i].quant)
+            i += 1
+            newnodes.append(node)
         elif nodetypes(nodes[i:i + 2]) == (DATATAG, PATTERN):
             node = Node(DATATAG, nodes[i + 1].value, nodes[i].value)
             newnodes.append(node)
             i += 2
-        elif nodes[i] == DEFINE:
-            print nodes[i:]
         else:
-            if nodes[i].type == GROUP:   # Recurse into groups
-                value = type_bodies(nodes[i].value)
-                nodes[i] = Node(GROUP, value, None, nodes[i].quant)
-            newnodes.append(nodes[i])
+            n = nodes[i]
+            if n.type == GROUP:   # Recurse into groups
+                value = type_bodies(n.value)
+                if len(value) > 1 and n.type:
+                    n = Node(GROUP, value, None, n.quant)
+            newnodes.append(n)
             i += 1
     nodes[:] = newnodes
     return nodes
 
 
-def nest_defines(nodes):
-    "Attach groups to named patterns"
+#
+# 3rd pass in the pipe
+#
+
+def _nest_annotations(nodes, mapping, delim=None):
+    """Helper to move comments/annotations down into attributes/elements
+
+    Uses non-tail recursion to proceed the tree bottom-up as
+    otherwise there would be confusion if the annotations are
+    newly added (and thus should be kept) or the original ones
+    to be moved.
+
+    Mapping is partially defined
+        token-type |-> accumulator-list for token-type
+    for token-types covering annotations (ANNOTATION, NS_ANNOTATION)
+    and is used to pass unconsumed annotations down the tree.
+
+    Returns triplet: number of consumed nodes, filtered nodes, mapping.
+
+    Note that mapping should contain empty lists only when the recursion
+    returns back to the initiator (XXX: little bit of sanity checking,
+    we cannot speak about proper validation here).
+    """
+    # XXX: unclean, yes
     newnodes = []
-    i = 0
-    while 1:
-        if i >= len(nodes): break
-        node = nodes[i]
-        newnodes.append(node)
-        if node.type == DEFINE:
-            group = []
-            while (i + 1) < len(nodes) and nodes[i + 1].type != DEFINE:
-                group.append(nodes[i + 1])
-                i += 1
-            node.name = node.value
-            node.value = Node(GROUP, group)
-        i += 1
+    for i, n in enumerate(nodes):
+        if delim and n.type == delim:
+            break
+
+        if not isinstance(n.value, str):  # no recurse to terminal str
+            if n.type in (ELEM, ATTR):
+                mapping_rec = {n: [] for n in
+                               (ANNOTATION, NS_ANNOTATION, COMMENT)}
+            else:
+                mapping_rec = mapping
+            _nest_annotations(n.value, mapping_rec)
+
+            if n.type in (ELEM, ATTR):  # annot. consumer (guarded in recursion)
+                n.value = (mapping['NS_ANNOTATION'] + mapping['ANNOTATION']
+                           + mapping['COMMENT'] + n.value)
+                mapping['NS_ANNOTATION'][:], mapping['ANNOTATION'][:] = [], []
+                mapping['COMMENT'][:] = []
+        elif i == len(nodes) - 1 and n.type == COMMENT and not delim:
+            # comment at the end of the nodelist, but only if not top-level
+            newnodes.append(n)
+            continue
+
+        mapping.get(n.type, newnodes).append(n)
+
     nodes[:] = newnodes
-    return nodes
+    return i, nodes, mapping
 
 
-def intersperse(nodes):
-    "Look for interleaved, choice, or sequential nodes in groups/bodies"
+def _intersperse(nodes):
+    """Look for interleaved, choice, or sequential nodes in groups/bodies"""
     for node in nodes:
-        if node.type in (ELEM, ATTR, GROUP, LITERAL):
+        if node.type in (ELEM, ATTR, GROUP, LITERAL):  # XXX: literal?
             val = node.value
             ntypes = [n.type for n in val if not isinstance(val, str)]
             inters = [t for t in ntypes if t in (INTERLEAVE, CHOICE, SEQ)]
@@ -281,43 +492,117 @@ def intersperse(nodes):
                 raise ParseError("Ambiguity in sequencing: %s" % node)
             if len(inters) > 0:
                 intertype = inters.keys()[0]
-                items = []
+                outer_items, last_ntype, internode = [], None, None
+                simplify = node.type == GROUP
                 for pat in node.value:
-                    if pat.type != intertype:
-                        items.append(pat)
-                node.value = Node(intertype, items)
+                    if pat.type == intertype:
+                        if internode is None:
+                            internode = Node(intertype, [outer_items.pop()])
+                            outer_items.append(internode)
+                        # otherwise drop it
+                    elif last_ntype == intertype:
+                        internode.value.append(pat)
+                    else:
+                        outer_items.append(pat)
+                        if pat.type in (COMMENT, ANNOTATION):
+                            # these are not interesting wrt. last type
+                            continue
+                        elif pat.quant not in (ONE, MAYBE):
+                            simplify = False
+                    last_ntype = pat.type
+
+                if (simplify and len(outer_items) == 1
+                  and outer_items[0] is internode):
+                    node.type, node.value = internode.type, internode.value
+                else:
+                    node.value = outer_items
         if not isinstance(node.value, str):  # No recurse to terminal str
-            intersperse(node.value)
+            _intersperse(node.value)
+    return nodes
+
+
+def nest_defines(nodes):
+    """Attach groups to named patterns
+
+    Other effects:
+        - annotations are properly nested
+        - comments are nested
+    """
+    newnodes = []
+    i = 0
+    group, annotations, ns_annotations, comments = [], [], [], []
+    mapping = dict(ANNOTATION=annotations, NS_ANNOTATION=ns_annotations,
+                   COMMENT=comments)
+    while i < len(nodes):
+        node = nodes[i]
+        newnodes.append(node)
+        group[:], annotations[:], ns_annotations[:], comments[:] = [], [], [], []
+        if node.type == DEFINE:
+            j, group[:], mapping = _nest_annotations(nodes[i + 1:], mapping, DEFINE)
+            i += j
+            node.name = node.value
+            grp = _intersperse([Node(GROUP, group[:])])[0]
+            if len(grp.value) > 1 and grp.type != SEQ:
+                node.value = [grp]
+            else:
+                node.value = grp.value[:]
+            # when _nest_annotations returned *not* due to reaching DEFINE,
+            # but trailing comments are tolerated
+            if i + 1 > len(nodes) or nodes[i + 1].type not in (DEFINE, COMMENT):
+                break
+        elif node.type == ELEM:
+            # top-level element
+            _intersperse(Node(GROUP, [node]))
+        i += 1
+    nodes[:] = newnodes
     return nodes
 
 
+#
+# 4th pass in the pipe
+#
+
 def scan_NS(nodes):
-    "Look for any namespace configuration lines"
+    """Look for any namespace configuration lines
+
+    Other effects:
+        - DEFINE(start) --> START
+    """
     global DEFAULT_NAMESPACE, OTHER_NAMESPACE, CONTEXT_FREE
     for node in nodes:
         if node.type == DEFAULT_NS:
             DEFAULT_NAMESPACE = node.value
         elif node.type == NS:
-            ns, url = map(str.strip, node.value.split('='))
+            ns, url = map(str.strip, node.value.split('=', 1))
             OTHER_NAMESPACE[ns] = url
         elif node.type == ANNOTATION and 'a' not in OTHER_NAMESPACE:
-            OTHER_NAMESPACE['a'] =\
-              '"http://relaxng.org/ns/compatibility/annotations/1.0"'
+            OTHER_NAMESPACE['a'] = '"' + URI_ANNOTATIONS + '"'
         elif node.type == DATATYPES:
             DATATYPE_LIB[:] = [1, node.value]
-        elif node.type == START:
+        elif not CONTEXT_FREE and node.type == DEFINE and node.name == 'start':
             CONTEXT_FREE = 1
+            node.type = START
+            node.name = None
 
 
 def make_nodetree(tokens):
+    """Wraps the pipe of conversion passes"""
     nodes = toNodes(tokens)
+    try_debug('TO_NODES', nodes)
+
     match_pairs(nodes)
+    try_debug('MATCH_PAIR', nodes)
+
     type_bodies(nodes)
+    try_debug('TYPE_BODIES', nodes)
+
     nest_defines(nodes)
-    intersperse(nodes)
+    try_debug('NEST_DEFINES', nodes)
+
     scan_NS(nodes)
-    root = Node(ROOT, nodes)
-    return root
+    try_debug('SCAN_NS', nodes)
+
+    return Node(ROOT, nodes)
 
 
 if __name__ == '__main__':
author	Jan Pokorný <jpokorny@redhat.com>	2013-01-29 18:20:13 +0100
committer	Jan Pokorný <jpokorny@redhat.com>	2013-01-29 18:25:12 +0100
commit	dbebf0be9cb077864aac9d6d60783b53ee50a2a5 (patch)
tree	98eaed1a6bd19c0c8f925fec9a0655a5c9466b4d /rnctree.py
parent	f781e6e0c5ce428b18485d7d05df91962c04c007 (diff)
download	rnc2rng-dbebf0be9cb077864aac9d6d60783b53ee50a2a5.tar.gz rnc2rng-dbebf0be9cb077864aac9d6d60783b53ee50a2a5.tar.xz rnc2rng-dbebf0be9cb077864aac9d6d60783b53ee50a2a5.zip