1 files changed, 324 insertions, 324 deletions
diff --git a/rnctree.py b/rnctree.py
index fb78d05..ebb9433 100755
--- a/rnctree.py
+++ b/rnctree.py
@@ -1,324 +1,324 @@
-#!/usr/bin/env python
-# Convert an RELAX NG compact syntax schema to a Node tree
-# This file released to the Public Domain by David Mertz
-from __future__ import generators
-import sys
-from rnc_tokenize import token_list
-
-
-class ParseError(SyntaxError): pass
-
-for t in """
-  ANY SOME MAYBE ONE BODY ANNOTATION ELEM ATTR GROUP LITERAL
-  NAME COMMENT TEXT EMPTY INTERLEAVE CHOICE SEQ ROOT
-  DEFAULT_NS NS DATATYPES DATATAG PATTERN START DEFINE
-  """.split(): globals()[t] = t
-
-PAIRS = {'BEG_BODY':  ('END_BODY',  BODY),
-         'BEG_PAREN': ('END_PAREN', GROUP),
-         'BEG_ANNO':  ('END_ANNO',  ANNOTATION)}
-
-TAGS = {ONE:   'group',
-        SOME:  'oneOrMore',
-        MAYBE: 'optional',
-        ANY:   'zeroOrMore'}
-
-DEFAULT_NAMESPACE = None
-DATATYPE_LIB = [0, '"http://www.w3.org/2001/XMLSchema-datatypes"']
-OTHER_NAMESPACE = {}
-CONTEXT_FREE = 0
-
-try: enumerate
-except: enumerate = lambda seq: zip(range(len(seq)), seq)
-nodetypes = lambda nl: tuple(map(lambda n: n.type, nl))
-toNodes = lambda toks: map(lambda t: Node(t.type, t.value), toks)
-
-
-class Node(object):
-    __slots__ = ('type', 'value', 'name', 'quant')
-
-    def __iter__(self): yield self
-    __len__ = lambda self: 1
-
-    def __init__(self, type='', value=[], name=None, quant=ONE):
-        self.type  = type
-        self.value = value
-        self.name  = name
-        self.quant = quant
-
-    def format(self, indent=0):
-        out = ['  ' * indent + repr(self)]
-        write = out.append
-        if isinstance(self.value, str):
-            if self.type == COMMENT:
-                write('  ' * (1 + indent) + self.value)
-        else:
-            for node in self.value:
-                write(node.format(indent + 1))
-        return '\n'.join(out)
-
-    def prettyprint(self):
-        print self.format()
-
-    def toxml(self):
-        if CONTEXT_FREE:
-            out = []
-            write = out.append
-            write('<?xml version="1.0" encoding="UTF-8"?>')
-            write('<grammar>')
-            self.type = None
-            write(self.xmlnode(1))
-            write('</grammar>')
-            return self.add_ns('\n'.join(out))
-        else:
-            return self.add_ns(self.xmlnode())
-
-    def xmlnode(self, indent=0):
-        out = []
-        write = out.append
-        if self.type == ROOT:
-            write('<?xml version="1.0" encoding="UTF-8"?>')
-
-        for x in self.value:
-            if not isinstance(x, Node):
-                raise TypeError("Unhappy Node.value: " + repr(x))
-            elif x.type == START:
-                startelem = '<start><ref name="%s"/></start>' % x.value
-                write('  ' * indent + startelem)
-            elif x.type == DEFINE:
-                write('  ' * indent + '<define name="%s">' % x.name)
-                write(x.xmlnode(indent + 1))
-                write('  ' * indent + '</define>')
-            elif x.type == NAME:
-                write('  ' * indent + '<ref name="%s"/>' % x.value)
-            elif x.type == COMMENT:
-                write('  ' * indent + '<!-- %s -->' % x.value)
-            elif x.type == LITERAL:
-                write('  ' * indent + '<value>%s</value>' % x.value)
-            elif x.type == ANNOTATION:
-                write('  ' * indent
-                      + '<a:documentation>%s</a:documentation>' % x.value)
-            elif x.type == INTERLEAVE:
-                write('  ' * indent + '<interleave>')
-                write(x.xmlnode(indent + 1))
-                write('  ' * indent + '</interleave>')
-            elif x.type == SEQ:
-                write(x.xmlnode(indent + 1))
-            elif x.type == CHOICE:
-                write('  ' * indent + '<choice>')
-                write(x.xmlnode(indent + 1))
-                write('  ' * indent + '</choice>')
-            elif x.type == GROUP:
-                write(x.xmlnode(indent))
-            elif x.type == TEXT:
-                write('  ' * indent + '<text/>')
-            elif x.type == EMPTY:
-                write('  ' * indent + '<empty/>')
-            elif x.type == DATATAG:
-                DATATYPE_LIB[0] = 1     # Use datatypes
-                if x.name is None:      # no paramaters
-                    write('  ' * indent + '<data type="%s"/>' % x.value)
-                else:
-                    write('  ' * indent + '<data type="%s">' % x.name)
-                    p = '<param name="pattern">%s</param>' % x.value
-                    write('  ' * (indent + 1) + p)
-                    write('  ' * indent + '</data>')
-            elif x.type == ELEM:
-                if x.quant == ONE:
-                    write('  ' * indent + '<element name="%s">' % x.name)
-                    write(x.xmlnode(indent + 1))
-                    write('  ' * indent + '</element>')
-                else:
-                    write('  ' * indent + '<%s>' % TAGS[x.quant])
-                    write('  ' * (indent + 1) + '<element name="%s">' % x.name)
-                    write(x.xmlnode(indent + 2))
-                    write('  ' * (indent + 1) + '</element>')
-                    write('  ' * indent + '</%s>' % TAGS[x.quant])
-            elif x.type == ATTR:
-                if x.value[0].type == TEXT:
-                    write('  ' * indent + '<attribute name="%s"/>' % x.name)
-                elif x.value[0].type == EMPTY:
-                    write('  ' * indent + '<attribute name="%s">' % x.name)
-                    write('  ' * (indent + 1) + '<empty/>')
-                    write('  ' * indent + '</attribute>')
-
-        return '\n'.join(out)
-
-    def __repr__(self):
-        return "Node(%s,%s,%s)[%d]" % (self.type, self.name,
-                                       self.quant, len(self.value))
-
-    def add_ns(self, xml):
-        "Add namespace attributes to top level element"
-        lines = xml.split('\n')
-        self.nest_annotations(lines)  # annots not allowed before root elem
-        for i, line in enumerate(lines):
-            ltpos = line.find('<')
-            if ltpos >= 0 and line[ltpos + 1] not in ('!', '?'):
-                # We've got an element tag, not PI or comment
-                new = line[:line.find('>')]
-                new += ' xmlns="http://relaxng.org/ns/structure/1.0"'
-                if DEFAULT_NAMESPACE is not None:
-                    new += '\n    ns=%s' % DEFAULT_NAMESPACE
-                if DATATYPE_LIB[0]:
-                    new += '\n    datatypeLibrary=%s' % DATATYPE_LIB[1]
-                for ns, url in OTHER_NAMESPACE.items():
-                    new += '\n    xmlns:%s=%s' % (ns, url)
-                new += '>'
-                lines[i] = new
-                break
-        return '\n'.join(lines)
-
-    def nest_annotations(self, lines):
-        "Nest any top annotation within first element"
-        top_annotations = []
-        for i, line in enumerate(lines[:]):
-            if line.find('<a:') >= 0:
-                top_annotations.append(line)
-                del lines[i]
-            else:
-                ltpos = line.find('<')
-                if ltpos >= 0 and line[ltpos + 1] not in ('!', '?'):
-                    break
-        for line in top_annotations:
-            lines.insert(i, '  ' + line)
-
-
-def findmatch(beg, nodes, offset):
-    level = 1
-    end = PAIRS[beg][0]
-    for i, t in enumerate(nodes[offset:]):
-        if t.type == beg:   level += 1
-        elif t.type == end: level -= 1
-        if level == 0:
-            return i + offset
-    raise EOFError("No closing token encountered for %s @ %d"
-                    % (beg, offset))
-
-
-def match_pairs(nodes):
-    newnodes = []
-    i = 0
-    while 1:
-        if i >= len(nodes): break
-        node = nodes[i]
-        if node.type in PAIRS.keys():
-            # Look for enclosing brackets
-            match = findmatch(node.type, nodes, i + 1)
-            matchtype = PAIRS[node.type][1]
-            node = Node(type=matchtype, value=nodes[i + 1:match])
-            node.value = match_pairs(node.value)
-            newnodes.append(node)
-            i = match + 1
-        else:
-            newnodes.append(node)
-            i += 1
-        if i >= len(nodes): break
-        if nodes[i].type in (ANY, SOME, MAYBE):
-            newnodes[-1].quant = nodes[i].type
-            i += 1
-    nodes[:] = newnodes
-    return nodes
-
-
-def type_bodies(nodes):
-    newnodes = []
-    i = 0
-    while 1:
-        if i >= len(nodes): break
-        if nodetypes(nodes[i:i + 3]) == (ELEM, NAME, BODY) or \
-           nodetypes(nodes[i:i + 3]) == (ATTR, NAME, BODY):
-            name, body = nodes[i + 1].value, nodes[i + 2]
-            value, quant = type_bodies(body.value), body.quant
-            node = Node(nodes[i].type, value, name, quant)
-            newnodes.append(node)
-            i += 3
-        elif nodetypes(nodes[i:i + 2]) == (DATATAG, PATTERN):
-            node = Node(DATATAG, nodes[i + 1].value, nodes[i].value)
-            newnodes.append(node)
-            i += 2
-        elif nodes[i] == DEFINE:
-            print nodes[i:]
-        else:
-            if nodes[i].type == GROUP:   # Recurse into groups
-                value = type_bodies(nodes[i].value)
-                nodes[i] = Node(GROUP, value, None, nodes[i].quant)
-            newnodes.append(nodes[i])
-            i += 1
-    nodes[:] = newnodes
-    return nodes
-
-
-def nest_defines(nodes):
-    "Attach groups to named patterns"
-    newnodes = []
-    i = 0
-    while 1:
-        if i >= len(nodes): break
-        node = nodes[i]
-        newnodes.append(node)
-        if node.type == DEFINE:
-            group = []
-            while (i + 1) < len(nodes) and nodes[i + 1].type != DEFINE:
-                group.append(nodes[i + 1])
-                i += 1
-            node.name = node.value
-            node.value = Node(GROUP, group)
-        i += 1
-    nodes[:] = newnodes
-    return nodes
-
-
-def intersperse(nodes):
-    "Look for interleaved, choice, or sequential nodes in groups/bodies"
-    for node in nodes:
-        if node.type in (ELEM, ATTR, GROUP, LITERAL):
-            val = node.value
-            ntypes = [n.type for n in val if not isinstance(val, str)]
-            inters = [t for t in ntypes if t in (INTERLEAVE, CHOICE, SEQ)]
-            inters = dict(zip(inters, [0] * len(inters)))
-            if len(inters) > 1:
-                raise ParseError("Ambiguity in sequencing: %s" % node)
-            if len(inters) > 0:
-                intertype = inters.keys()[0]
-                items = []
-                for pat in node.value:
-                    if pat.type != intertype:
-                        items.append(pat)
-                node.value = Node(intertype, items)
-        if not isinstance(node.value, str):  # No recurse to terminal str
-            intersperse(node.value)
-    return nodes
-
-
-def scan_NS(nodes):
-    "Look for any namespace configuration lines"
-    global DEFAULT_NAMESPACE, OTHER_NAMESPACE, CONTEXT_FREE
-    for node in nodes:
-        if node.type == DEFAULT_NS:
-            DEFAULT_NAMESPACE = node.value
-        elif node.type == NS:
-            ns, url = map(str.strip, node.value.split('='))
-            OTHER_NAMESPACE[ns] = url
-        elif node.type == ANNOTATION and 'a' not in OTHER_NAMESPACE:
-            OTHER_NAMESPACE['a'] =\
-              '"http://relaxng.org/ns/compatibility/annotations/1.0"'
-        elif node.type == DATATYPES:
-            DATATYPE_LIB[:] = [1, node.value]
-        elif node.type == START:
-            CONTEXT_FREE = 1
-
-
-def make_nodetree(tokens):
-    nodes = toNodes(tokens)
-    match_pairs(nodes)
-    type_bodies(nodes)
-    nest_defines(nodes)
-    intersperse(nodes)
-    scan_NS(nodes)
-    root = Node(ROOT, nodes)
-    return root
-
-
-if __name__ == '__main__':
-    make_nodetree(token_list(sys.stdin.read())).prettyprint()
+#!/usr/bin/env python
+# Convert an RELAX NG compact syntax schema to a Node tree
+# This file released to the Public Domain by David Mertz
+from __future__ import generators
+import sys
+from rnc_tokenize import token_list
+
+
+class ParseError(SyntaxError): pass
+
+for t in """
+  ANY SOME MAYBE ONE BODY ANNOTATION ELEM ATTR GROUP LITERAL
+  NAME COMMENT TEXT EMPTY INTERLEAVE CHOICE SEQ ROOT
+  DEFAULT_NS NS DATATYPES DATATAG PATTERN START DEFINE
+  """.split(): globals()[t] = t
+
+PAIRS = {'BEG_BODY':  ('END_BODY',  BODY),
+         'BEG_PAREN': ('END_PAREN', GROUP),
+         'BEG_ANNO':  ('END_ANNO',  ANNOTATION)}
+
+TAGS = {ONE:   'group',
+        SOME:  'oneOrMore',
+        MAYBE: 'optional',
+        ANY:   'zeroOrMore'}
+
+DEFAULT_NAMESPACE = None
+DATATYPE_LIB = [0, '"http://www.w3.org/2001/XMLSchema-datatypes"']
+OTHER_NAMESPACE = {}
+CONTEXT_FREE = 0
+
+try: enumerate
+except: enumerate = lambda seq: zip(range(len(seq)), seq)
+nodetypes = lambda nl: tuple(map(lambda n: n.type, nl))
+toNodes = lambda toks: map(lambda t: Node(t.type, t.value), toks)
+
+
+class Node(object):
+    __slots__ = ('type', 'value', 'name', 'quant')
+
+    def __iter__(self): yield self
+    __len__ = lambda self: 1
+
+    def __init__(self, type='', value=[], name=None, quant=ONE):
+        self.type  = type
+        self.value = value
+        self.name  = name
+        self.quant = quant
+
+    def format(self, indent=0):
+        out = ['  ' * indent + repr(self)]
+        write = out.append
+        if isinstance(self.value, str):
+            if self.type == COMMENT:
+                write('  ' * (1 + indent) + self.value)
+        else:
+            for node in self.value:
+                write(node.format(indent + 1))
+        return '\n'.join(out)
+
+    def prettyprint(self):
+        print self.format()
+
+    def toxml(self):
+        if CONTEXT_FREE:
+            out = []
+            write = out.append
+            write('<?xml version="1.0" encoding="UTF-8"?>')
+            write('<grammar>')
+            self.type = None
+            write(self.xmlnode(1))
+            write('</grammar>')
+            return self.add_ns('\n'.join(out))
+        else:
+            return self.add_ns(self.xmlnode())
+
+    def xmlnode(self, indent=0):
+        out = []
+        write = out.append
+        if self.type == ROOT:
+            write('<?xml version="1.0" encoding="UTF-8"?>')
+
+        for x in self.value:
+            if not isinstance(x, Node):
+                raise TypeError("Unhappy Node.value: " + repr(x))
+            elif x.type == START:
+                startelem = '<start><ref name="%s"/></start>' % x.value
+                write('  ' * indent + startelem)
+            elif x.type == DEFINE:
+                write('  ' * indent + '<define name="%s">' % x.name)
+                write(x.xmlnode(indent + 1))
+                write('  ' * indent + '</define>')
+            elif x.type == NAME:
+                write('  ' * indent + '<ref name="%s"/>' % x.value)
+            elif x.type == COMMENT:
+                write('  ' * indent + '<!-- %s -->' % x.value)
+            elif x.type == LITERAL:
+                write('  ' * indent + '<value>%s</value>' % x.value)
+            elif x.type == ANNOTATION:
+                write('  ' * indent
+                      + '<a:documentation>%s</a:documentation>' % x.value)
+            elif x.type == INTERLEAVE:
+                write('  ' * indent + '<interleave>')
+                write(x.xmlnode(indent + 1))
+                write('  ' * indent + '</interleave>')
+            elif x.type == SEQ:
+                write(x.xmlnode(indent + 1))
+            elif x.type == CHOICE:
+                write('  ' * indent + '<choice>')
+                write(x.xmlnode(indent + 1))
+                write('  ' * indent + '</choice>')
+            elif x.type == GROUP:
+                write(x.xmlnode(indent))
+            elif x.type == TEXT:
+                write('  ' * indent + '<text/>')
+            elif x.type == EMPTY:
+                write('  ' * indent + '<empty/>')
+            elif x.type == DATATAG:
+                DATATYPE_LIB[0] = 1     # Use datatypes
+                if x.name is None:      # no paramaters
+                    write('  ' * indent + '<data type="%s"/>' % x.value)
+                else:
+                    write('  ' * indent + '<data type="%s">' % x.name)
+                    p = '<param name="pattern">%s</param>' % x.value
+                    write('  ' * (indent + 1) + p)
+                    write('  ' * indent + '</data>')
+            elif x.type == ELEM:
+                if x.quant == ONE:
+                    write('  ' * indent + '<element name="%s">' % x.name)
+                    write(x.xmlnode(indent + 1))
+                    write('  ' * indent + '</element>')
+                else:
+                    write('  ' * indent + '<%s>' % TAGS[x.quant])
+                    write('  ' * (indent + 1) + '<element name="%s">' % x.name)
+                    write(x.xmlnode(indent + 2))
+                    write('  ' * (indent + 1) + '</element>')
+                    write('  ' * indent + '</%s>' % TAGS[x.quant])
+            elif x.type == ATTR:
+                if x.value[0].type == TEXT:
+                    write('  ' * indent + '<attribute name="%s"/>' % x.name)
+                elif x.value[0].type == EMPTY:
+                    write('  ' * indent + '<attribute name="%s">' % x.name)
+                    write('  ' * (indent + 1) + '<empty/>')
+                    write('  ' * indent + '</attribute>')
+
+        return '\n'.join(out)
+
+    def __repr__(self):
+        return "Node(%s,%s,%s)[%d]" % (self.type, self.name,
+                                       self.quant, len(self.value))
+
+    def add_ns(self, xml):
+        "Add namespace attributes to top level element"
+        lines = xml.split('\n')
+        self.nest_annotations(lines)  # annots not allowed before root elem
+        for i, line in enumerate(lines):
+            ltpos = line.find('<')
+            if ltpos >= 0 and line[ltpos + 1] not in ('!', '?'):
+                # We've got an element tag, not PI or comment
+                new = line[:line.find('>')]
+                new += ' xmlns="http://relaxng.org/ns/structure/1.0"'
+                if DEFAULT_NAMESPACE is not None:
+                    new += '\n    ns=%s' % DEFAULT_NAMESPACE
+                if DATATYPE_LIB[0]:
+                    new += '\n    datatypeLibrary=%s' % DATATYPE_LIB[1]
+                for ns, url in OTHER_NAMESPACE.items():
+                    new += '\n    xmlns:%s=%s' % (ns, url)
+                new += '>'
+                lines[i] = new
+                break
+        return '\n'.join(lines)
+
+    def nest_annotations(self, lines):
+        "Nest any top annotation within first element"
+        top_annotations = []
+        for i, line in enumerate(lines[:]):
+            if line.find('<a:') >= 0:
+                top_annotations.append(line)
+                del lines[i]
+            else:
+                ltpos = line.find('<')
+                if ltpos >= 0 and line[ltpos + 1] not in ('!', '?'):
+                    break
+        for line in top_annotations:
+            lines.insert(i, '  ' + line)
+
+
+def findmatch(beg, nodes, offset):
+    level = 1
+    end = PAIRS[beg][0]
+    for i, t in enumerate(nodes[offset:]):
+        if t.type == beg:   level += 1
+        elif t.type == end: level -= 1
+        if level == 0:
+            return i + offset
+    raise EOFError("No closing token encountered for %s @ %d"
+                    % (beg, offset))
+
+
+def match_pairs(nodes):
+    newnodes = []
+    i = 0
+    while 1:
+        if i >= len(nodes): break
+        node = nodes[i]
+        if node.type in PAIRS.keys():
+            # Look for enclosing brackets
+            match = findmatch(node.type, nodes, i + 1)
+            matchtype = PAIRS[node.type][1]
+            node = Node(type=matchtype, value=nodes[i + 1:match])
+            node.value = match_pairs(node.value)
+            newnodes.append(node)
+            i = match + 1
+        else:
+            newnodes.append(node)
+            i += 1
+        if i >= len(nodes): break
+        if nodes[i].type in (ANY, SOME, MAYBE):
+            newnodes[-1].quant = nodes[i].type
+            i += 1
+    nodes[:] = newnodes
+    return nodes
+
+
+def type_bodies(nodes):
+    newnodes = []
+    i = 0
+    while 1:
+        if i >= len(nodes): break
+        if nodetypes(nodes[i:i + 3]) == (ELEM, NAME, BODY) or \
+           nodetypes(nodes[i:i + 3]) == (ATTR, NAME, BODY):
+            name, body = nodes[i + 1].value, nodes[i + 2]
+            value, quant = type_bodies(body.value), body.quant
+            node = Node(nodes[i].type, value, name, quant)
+            newnodes.append(node)
+            i += 3
+        elif nodetypes(nodes[i:i + 2]) == (DATATAG, PATTERN):
+            node = Node(DATATAG, nodes[i + 1].value, nodes[i].value)
+            newnodes.append(node)
+            i += 2
+        elif nodes[i] == DEFINE:
+            print nodes[i:]
+        else:
+            if nodes[i].type == GROUP:   # Recurse into groups
+                value = type_bodies(nodes[i].value)
+                nodes[i] = Node(GROUP, value, None, nodes[i].quant)
+            newnodes.append(nodes[i])
+            i += 1
+    nodes[:] = newnodes
+    return nodes
+
+
+def nest_defines(nodes):
+    "Attach groups to named patterns"
+    newnodes = []
+    i = 0
+    while 1:
+        if i >= len(nodes): break
+        node = nodes[i]
+        newnodes.append(node)
+        if node.type == DEFINE:
+            group = []
+            while (i + 1) < len(nodes) and nodes[i + 1].type != DEFINE:
+                group.append(nodes[i + 1])
+                i += 1
+            node.name = node.value
+            node.value = Node(GROUP, group)
+        i += 1
+    nodes[:] = newnodes
+    return nodes
+
+
+def intersperse(nodes):
+    "Look for interleaved, choice, or sequential nodes in groups/bodies"
+    for node in nodes:
+        if node.type in (ELEM, ATTR, GROUP, LITERAL):
+            val = node.value
+            ntypes = [n.type for n in val if not isinstance(val, str)]
+            inters = [t for t in ntypes if t in (INTERLEAVE, CHOICE, SEQ)]
+            inters = dict(zip(inters, [0] * len(inters)))
+            if len(inters) > 1:
+                raise ParseError("Ambiguity in sequencing: %s" % node)
+            if len(inters) > 0:
+                intertype = inters.keys()[0]
+                items = []
+                for pat in node.value:
+                    if pat.type != intertype:
+                        items.append(pat)
+                node.value = Node(intertype, items)
+        if not isinstance(node.value, str):  # No recurse to terminal str
+            intersperse(node.value)
+    return nodes
+
+
+def scan_NS(nodes):
+    "Look for any namespace configuration lines"
+    global DEFAULT_NAMESPACE, OTHER_NAMESPACE, CONTEXT_FREE
+    for node in nodes:
+        if node.type == DEFAULT_NS:
+            DEFAULT_NAMESPACE = node.value
+        elif node.type == NS:
+            ns, url = map(str.strip, node.value.split('='))
+            OTHER_NAMESPACE[ns] = url
+        elif node.type == ANNOTATION and 'a' not in OTHER_NAMESPACE:
+            OTHER_NAMESPACE['a'] =\
+              '"http://relaxng.org/ns/compatibility/annotations/1.0"'
+        elif node.type == DATATYPES:
+            DATATYPE_LIB[:] = [1, node.value]
+        elif node.type == START:
+            CONTEXT_FREE = 1
+
+
+def make_nodetree(tokens):
+    nodes = toNodes(tokens)
+    match_pairs(nodes)
+    type_bodies(nodes)
+    nest_defines(nodes)
+    intersperse(nodes)
+    scan_NS(nodes)
+    root = Node(ROOT, nodes)
+    return root
+
+
+if __name__ == '__main__':
+    make_nodetree(token_list(sys.stdin.read())).prettyprint()