Initial commit, based on rnc.zip from gnosis.cx/download/relax

Complete URL: http://www.gnosis.cx/download/relax/rnc.zip Signed-off-by: David Mertz/www.gnosis.cx
author: Jan Pokorný <jpokorny@redhat.com> 2013-01-21 18:32:54 +0100
committer: Jan Pokorný <jpokorny@redhat.com> 2013-01-21 18:32:54 +0100
commit: 73e9f64af524be2fc3a34ee1c473b64d12efbef2 (patch)
tree: 736a4c64f3d0e913498dae501bbdf3fb79aa12cc /rnctree.py
download: rnc2rng-73e9f64af524be2fc3a34ee1c473b64d12efbef2.tar.gz
rnc2rng-73e9f64af524be2fc3a34ee1c473b64d12efbef2.tar.xz
rnc2rng-73e9f64af524be2fc3a34ee1c473b64d12efbef2.zip
1 files changed, 313 insertions, 0 deletions
diff --git a/rnctree.py b/rnctree.py
new file mode 100644
index 0000000..0735444
--- /dev/null
+++ b/rnctree.py
@@ -0,0 +1,313 @@
+#!/usr/bin/env python
+# Convert an RELAX NG compact syntax schema to a Node tree
+# This file released to the Public Domain by David Mertz
+from __future__ import generators
+import sys
+from rnc_tokenize import token_list
+
+class ParseError(SyntaxError): pass
+
+for t in """
+  ANY SOME MAYBE ONE BODY ANNOTATION ELEM ATTR GROUP LITERAL
+  NAME COMMENT TEXT EMPTY INTERLEAVE CHOICE SEQ ROOT
+  DEFAULT_NS NS DATATYPES DATATAG PATTERN START DEFINE
+  """.split(): globals()[t] = t
+
+PAIRS = {'BEG_BODY':('END_BODY',BODY),
+         'BEG_PAREN':('END_PAREN',GROUP),
+         'BEG_ANNO':('END_ANNO',ANNOTATION)}
+
+TAGS = { ONE:   'group',
+         SOME:  'oneOrMore',
+         MAYBE: 'optional',
+         ANY:   'zeroOrMore'}
+
+DEFAULT_NAMESPACE = None
+DATATYPE_LIB = [0, '"http://www.w3.org/2001/XMLSchema-datatypes"']
+OTHER_NAMESPACE = {}
+CONTEXT_FREE = 0
+
+try: enumerate
+except: enumerate = lambda seq: zip(range(len(seq)),seq)
+nodetypes = lambda nl: tuple(map(lambda n: n.type, nl))
+toNodes = lambda toks: map(lambda t: Node(t.type, t.value), toks)
+
+class Node(object):
+    __slots__ = ('type','value','name','quant')
+    def __iter__(self): yield self
+    __len__ = lambda self: 1
+
+    def __init__(self, type='', value=[], name=None, quant=ONE):
+        self.type  = type
+        self.value = value
+        self.name  = name
+        self.quant = quant
+
+    def format(self, indent=0):
+        out = ['  '*indent+repr(self)]
+        write = out.append
+        if isinstance(self.value, str):
+            if self.type==COMMENT:
+                write('  '*(1+indent)+self.value)
+        else:
+            for node in self.value:
+                write(node.format(indent+1))
+        return '\n'.join(out)
+
+    def prettyprint(self):
+        print self.format()
+
+    def toxml(self):
+        if CONTEXT_FREE:
+            out = []
+            write = out.append
+            write('<?xml version="1.0" encoding="UTF-8"?>')
+            write('<grammar>')
+            self.type = None
+            write(self.xmlnode(1))
+            write('</grammar>')
+            return self.add_ns('\n'.join(out))
+        else:
+            return self.add_ns(self.xmlnode())
+
+
+    def xmlnode(self, indent=0):
+        out = []
+        write = out.append
+        if self.type == ROOT:
+            write('<?xml version="1.0" encoding="UTF-8"?>')
+
+        for x in self.value:
+            if not isinstance(x, Node):
+                raise TypeError, "Unhappy Node.value: "+repr(x)
+            elif x.type == START:
+                startelem = '<start><ref name="%s"/></start>' % x.value
+                write('  '*indent+startelem)
+            elif x.type == DEFINE:
+                write('  '*indent+'<define name="%s">' % x.name)
+                write(x.xmlnode(indent+1))
+                write('  '*indent+'</define>')
+            elif x.type == NAME:
+                write('  '*indent+ '<ref name="%s"/>' % x.value)
+            elif x.type == COMMENT:
+                write('  '*indent+'<!-- %s -->' % x.value)
+            elif x.type == LITERAL:
+                write('  '*indent+'<value>%s</value>' % x.value)
+            elif x.type == ANNOTATION:
+                write('  '*indent+\
+                      '<a:documentation>%s</a:documentation>' % x.value)
+            elif x.type == INTERLEAVE:
+                write('  '*indent+'<interleave>')
+                write(x.xmlnode(indent+1))
+                write('  '*indent+'</interleave>')
+            elif x.type == CHOICE:
+                write('  '*indent+'<choice>')
+                write(x.xmlnode(indent+1))
+                write('  '*indent+'</choice>')
+            elif x.type == GROUP:
+                write(x.xmlnode(indent))
+            elif x.type == TEXT:
+                write('  '*indent+'<text/>')
+            elif x.type == EMPTY:
+                write('  '*indent+'<empty/>')
+            elif x.type == DATATAG:
+                DATATYPE_LIB[0] = 1     # Use datatypes
+                if x.name is None:      # no paramaters
+                    write('  '*indent+'<data type="%s"/>' % x.value)
+                else:
+                    write('  '*indent+'<data type="%s">' % x.name)
+                    p = '<param name="pattern">%s</param>' % x.value
+                    write('  '*(indent+1)+p)
+                    write('  '*indent+'</data>')
+            elif x.type == ELEM:
+                if x.quant == ONE:
+                    write('  '*indent+'<element name="%s">' % x.name)
+                    write(x.xmlnode(indent+1))
+                    write('  '*indent+'</element>')
+                else:
+                    write('  '*indent+'<%s>' % TAGS[x.quant])
+                    write('  '*(indent+1)+'<element name="%s">' % x.name)
+                    write(x.xmlnode(indent+2))
+                    write('  '*(indent+1)+'</element>')
+                    write('  '*indent+'</%s>' % TAGS[x.quant])
+            elif x.type == ATTR:
+                if x.value[0].type == TEXT:
+                    write('  '*indent+'<attribute name="%s"/>' % x.name)
+                elif x.value[0].type == EMPTY:
+                    write('  '*indent+'<attribute name="%s">' % x.name)
+                    write('  '*(indent+1)+'<empty/>')
+                    write('  '*indent+'</attribute>')
+
+        return '\n'.join(out)
+
+    def __repr__(self):
+        return "Node(%s,%s,%s)[%d]" % (self.type, self.name,
+                                       self.quant, len(self.value))
+
+    def add_ns(self, xml):
+        "Add namespace attributes to top level element"
+        lines = xml.split('\n')
+        self.nest_annotations(lines) # annots not allowed before root elem
+        for i, line in enumerate(lines):
+            ltpos = line.find('<')
+            if ltpos >= 0 and line[ltpos+1] not in ('!','?'):
+                # We've got an element tag, not PI or comment
+                new = line[:line.find('>')]
+                new += ' xmlns="http://relaxng.org/ns/structure/1.0"'
+                if DEFAULT_NAMESPACE is not None:
+                    new += '\n    ns=%s' % DEFAULT_NAMESPACE
+                if DATATYPE_LIB[0]:
+                    new += '\n    datatypeLibrary=%s' % DATATYPE_LIB[1]
+                for ns, url in OTHER_NAMESPACE.items():
+                    new += '\n    xmlns:%s=%s' % (ns, url)
+                new += '>'
+                lines[i] = new
+                break
+        return '\n'.join(lines)
+
+    def nest_annotations(self, lines):
+        "Nest any top annotation within first element"
+        top_annotations = []
+        for i, line in enumerate(lines[:]):
+            if line.find('<a:') >= 0:
+                top_annotations.append(line)
+                del lines[i]
+            else:
+                ltpos = line.find('<')
+                if ltpos >= 0 and line[ltpos+1] not in ('!','?'):
+                    break
+        for line in top_annotations:
+            lines.insert(i, '  '+line)
+
+def findmatch(beg, nodes, offset):
+    level = 1
+    end = PAIRS[beg][0]
+    for i,t in enumerate(nodes[offset:]):
+        if t.type == beg:   level += 1
+        elif t.type == end: level -= 1
+        if level == 0:
+            return i+offset
+    raise EOFError, ("No closing token encountered for %s @ %d"
+                      % (beg,offset))
+
+def match_pairs(nodes):
+    newnodes = []
+    i = 0
+    while 1:
+        if i >= len(nodes): break
+        node = nodes[i]
+        if node.type in PAIRS.keys():
+            # Look for enclosing brackets
+            match = findmatch(node.type, nodes, i+1)
+            matchtype = PAIRS[node.type][1]
+            node = Node(type=matchtype, value=nodes[i+1:match])
+            node.value = match_pairs(node.value)
+            newnodes.append(node)
+            i = match+1
+        else:
+            newnodes.append(node)
+            i += 1
+        if i >= len(nodes): break
+        if nodes[i].type in (ANY, SOME, MAYBE):
+            newnodes[-1].quant = nodes[i].type
+            i += 1
+    nodes[:] = newnodes
+    return nodes
+
+def type_bodies(nodes):
+    newnodes = []
+    i = 0
+    while 1:
+        if i >= len(nodes): break
+        if nodetypes(nodes[i:i+3]) == (ELEM, NAME, BODY) or \
+           nodetypes(nodes[i:i+3]) == (ATTR, NAME, BODY):
+            name, body = nodes[i+1].value, nodes[i+2]
+            value, quant = type_bodies(body.value), body.quant
+            node = Node(nodes[i].type, value, name, quant)
+            newnodes.append(node)
+            i += 3
+        elif nodetypes(nodes[i:i+2]) == (DATATAG, PATTERN):
+            node = Node(DATATAG, nodes[i+1].value, nodes[i].value)
+            newnodes.append(node)
+            i += 2
+        elif nodes[i] == DEFINE:
+            print nodes[i:]
+        else:
+            if nodes[i].type == GROUP:   # Recurse into groups
+                value = type_bodies(nodes[i].value)
+                nodes[i] = Node(GROUP, value, None, nodes[i].quant)
+            newnodes.append(nodes[i])
+            i += 1
+    nodes[:] = newnodes
+    return nodes
+
+def nest_defines(nodes):
+    "Attach groups to named patterns"
+    newnodes = []
+    i = 0
+    while 1:
+        if i >= len(nodes): break
+        node = nodes[i]
+        newnodes.append(node)
+        if node.type == DEFINE:
+            group = []
+            while (i+1) < len(nodes) and nodes[i+1].type <> DEFINE:
+                group.append(nodes[i+1])
+                i += 1
+            node.name = node.value
+            node.value = Node(GROUP, group)
+        i += 1
+    nodes[:] = newnodes
+    return nodes
+
+def intersperse(nodes):
+    "Look for interleaved, choice, or sequential nodes in groups/bodies"
+    for node in nodes:
+        if node.type in (ELEM, ATTR, GROUP, LITERAL):
+            val = node.value
+            ntypes = [n.type for n in val if not isinstance(val,str)]
+            inters = [t for t in ntypes if t in (INTERLEAVE,CHOICE,SEQ)]
+            inters = dict(zip(inters,[0]*len(inters)))
+            if len(inters) > 1:
+                raise ParseError, "Ambiguity in sequencing: %s" % node
+            if len(inters) > 0:
+                intertype = inters.keys()[0]
+                items = []
+                for pat in node.value:
+                    if pat.type <> intertype:
+                        items.append(pat)
+                node.value = Node(intertype, items)
+        if not isinstance(node.value, str): # No recurse to terminal str
+            intersperse(node.value)
+    return nodes
+
+def scan_NS(nodes):
+    "Look for any namespace configuration lines"
+    global DEFAULT_NAMESPACE, OTHER_NAMESPACE, CONTEXT_FREE
+    for node in nodes:
+        if node.type == DEFAULT_NS:
+            DEFAULT_NAMESPACE = node.value
+        elif node.type == NS:
+            ns, url = map(str.strip, node.value.split('='))
+            OTHER_NAMESPACE[ns] = url
+        elif node.type == ANNOTATION and not OTHER_NAMESPACE.has_key('a'):
+            OTHER_NAMESPACE['a'] =\
+              '"http://relaxng.org/ns/compatibility/annotations/1.0"'
+        elif node.type == DATATYPES:
+            DATATYPE_LIB[:] = [1, node.value]
+        elif node.type == START:
+            CONTEXT_FREE = 1
+
+def make_nodetree(tokens):
+    nodes = toNodes(tokens)
+    match_pairs(nodes)
+    type_bodies(nodes)
+    nest_defines(nodes)
+    intersperse(nodes)
+    scan_NS(nodes)
+    root = Node(ROOT, nodes)
+    return root
+
+if __name__=='__main__':
+    make_nodetree(token_list(sys.stdin.read())).prettyprint()
+
author	Jan Pokorný <jpokorny@redhat.com>	2013-01-21 18:32:54 +0100
committer	Jan Pokorný <jpokorny@redhat.com>	2013-01-21 18:32:54 +0100
commit	73e9f64af524be2fc3a34ee1c473b64d12efbef2 (patch)
tree	736a4c64f3d0e913498dae501bbdf3fb79aa12cc /rnctree.py
download	rnc2rng-73e9f64af524be2fc3a34ee1c473b64d12efbef2.tar.gz rnc2rng-73e9f64af524be2fc3a34ee1c473b64d12efbef2.tar.xz rnc2rng-73e9f64af524be2fc3a34ee1c473b64d12efbef2.zip