summaryrefslogtreecommitdiffstats
path: root/rnctree.py
diff options
context:
space:
mode:
authorJan Pokorný <jpokorny@redhat.com>2013-01-21 18:32:54 +0100
committerJan Pokorný <jpokorny@redhat.com>2013-01-21 18:32:54 +0100
commit73e9f64af524be2fc3a34ee1c473b64d12efbef2 (patch)
tree736a4c64f3d0e913498dae501bbdf3fb79aa12cc /rnctree.py
downloadrnc2rng-73e9f64af524be2fc3a34ee1c473b64d12efbef2.tar.gz
rnc2rng-73e9f64af524be2fc3a34ee1c473b64d12efbef2.tar.xz
rnc2rng-73e9f64af524be2fc3a34ee1c473b64d12efbef2.zip
Initial commit, based on rnc.zip from gnosis.cx/download/relax
Complete URL: http://www.gnosis.cx/download/relax/rnc.zip Signed-off-by: David Mertz/www.gnosis.cx
Diffstat (limited to 'rnctree.py')
-rw-r--r--rnctree.py313
1 files changed, 313 insertions, 0 deletions
diff --git a/rnctree.py b/rnctree.py
new file mode 100644
index 0000000..0735444
--- /dev/null
+++ b/rnctree.py
@@ -0,0 +1,313 @@
+#!/usr/bin/env python
+# Convert an RELAX NG compact syntax schema to a Node tree
+# This file released to the Public Domain by David Mertz
+from __future__ import generators
+import sys
+from rnc_tokenize import token_list
+
+class ParseError(SyntaxError): pass
+
+for t in """
+ ANY SOME MAYBE ONE BODY ANNOTATION ELEM ATTR GROUP LITERAL
+ NAME COMMENT TEXT EMPTY INTERLEAVE CHOICE SEQ ROOT
+ DEFAULT_NS NS DATATYPES DATATAG PATTERN START DEFINE
+ """.split(): globals()[t] = t
+
+PAIRS = {'BEG_BODY':('END_BODY',BODY),
+ 'BEG_PAREN':('END_PAREN',GROUP),
+ 'BEG_ANNO':('END_ANNO',ANNOTATION)}
+
+TAGS = { ONE: 'group',
+ SOME: 'oneOrMore',
+ MAYBE: 'optional',
+ ANY: 'zeroOrMore'}
+
+DEFAULT_NAMESPACE = None
+DATATYPE_LIB = [0, '"http://www.w3.org/2001/XMLSchema-datatypes"']
+OTHER_NAMESPACE = {}
+CONTEXT_FREE = 0
+
+try: enumerate
+except: enumerate = lambda seq: zip(range(len(seq)),seq)
+nodetypes = lambda nl: tuple(map(lambda n: n.type, nl))
+toNodes = lambda toks: map(lambda t: Node(t.type, t.value), toks)
+
+class Node(object):
+ __slots__ = ('type','value','name','quant')
+ def __iter__(self): yield self
+ __len__ = lambda self: 1
+
+ def __init__(self, type='', value=[], name=None, quant=ONE):
+ self.type = type
+ self.value = value
+ self.name = name
+ self.quant = quant
+
+ def format(self, indent=0):
+ out = [' '*indent+repr(self)]
+ write = out.append
+ if isinstance(self.value, str):
+ if self.type==COMMENT:
+ write(' '*(1+indent)+self.value)
+ else:
+ for node in self.value:
+ write(node.format(indent+1))
+ return '\n'.join(out)
+
+ def prettyprint(self):
+ print self.format()
+
+ def toxml(self):
+ if CONTEXT_FREE:
+ out = []
+ write = out.append
+ write('<?xml version="1.0" encoding="UTF-8"?>')
+ write('<grammar>')
+ self.type = None
+ write(self.xmlnode(1))
+ write('</grammar>')
+ return self.add_ns('\n'.join(out))
+ else:
+ return self.add_ns(self.xmlnode())
+
+
+ def xmlnode(self, indent=0):
+ out = []
+ write = out.append
+ if self.type == ROOT:
+ write('<?xml version="1.0" encoding="UTF-8"?>')
+
+ for x in self.value:
+ if not isinstance(x, Node):
+ raise TypeError, "Unhappy Node.value: "+repr(x)
+ elif x.type == START:
+ startelem = '<start><ref name="%s"/></start>' % x.value
+ write(' '*indent+startelem)
+ elif x.type == DEFINE:
+ write(' '*indent+'<define name="%s">' % x.name)
+ write(x.xmlnode(indent+1))
+ write(' '*indent+'</define>')
+ elif x.type == NAME:
+ write(' '*indent+ '<ref name="%s"/>' % x.value)
+ elif x.type == COMMENT:
+ write(' '*indent+'<!-- %s -->' % x.value)
+ elif x.type == LITERAL:
+ write(' '*indent+'<value>%s</value>' % x.value)
+ elif x.type == ANNOTATION:
+ write(' '*indent+\
+ '<a:documentation>%s</a:documentation>' % x.value)
+ elif x.type == INTERLEAVE:
+ write(' '*indent+'<interleave>')
+ write(x.xmlnode(indent+1))
+ write(' '*indent+'</interleave>')
+ elif x.type == CHOICE:
+ write(' '*indent+'<choice>')
+ write(x.xmlnode(indent+1))
+ write(' '*indent+'</choice>')
+ elif x.type == GROUP:
+ write(x.xmlnode(indent))
+ elif x.type == TEXT:
+ write(' '*indent+'<text/>')
+ elif x.type == EMPTY:
+ write(' '*indent+'<empty/>')
+ elif x.type == DATATAG:
+ DATATYPE_LIB[0] = 1 # Use datatypes
+ if x.name is None: # no paramaters
+ write(' '*indent+'<data type="%s"/>' % x.value)
+ else:
+ write(' '*indent+'<data type="%s">' % x.name)
+ p = '<param name="pattern">%s</param>' % x.value
+ write(' '*(indent+1)+p)
+ write(' '*indent+'</data>')
+ elif x.type == ELEM:
+ if x.quant == ONE:
+ write(' '*indent+'<element name="%s">' % x.name)
+ write(x.xmlnode(indent+1))
+ write(' '*indent+'</element>')
+ else:
+ write(' '*indent+'<%s>' % TAGS[x.quant])
+ write(' '*(indent+1)+'<element name="%s">' % x.name)
+ write(x.xmlnode(indent+2))
+ write(' '*(indent+1)+'</element>')
+ write(' '*indent+'</%s>' % TAGS[x.quant])
+ elif x.type == ATTR:
+ if x.value[0].type == TEXT:
+ write(' '*indent+'<attribute name="%s"/>' % x.name)
+ elif x.value[0].type == EMPTY:
+ write(' '*indent+'<attribute name="%s">' % x.name)
+ write(' '*(indent+1)+'<empty/>')
+ write(' '*indent+'</attribute>')
+
+ return '\n'.join(out)
+
+ def __repr__(self):
+ return "Node(%s,%s,%s)[%d]" % (self.type, self.name,
+ self.quant, len(self.value))
+
+ def add_ns(self, xml):
+ "Add namespace attributes to top level element"
+ lines = xml.split('\n')
+ self.nest_annotations(lines) # annots not allowed before root elem
+ for i, line in enumerate(lines):
+ ltpos = line.find('<')
+ if ltpos >= 0 and line[ltpos+1] not in ('!','?'):
+ # We've got an element tag, not PI or comment
+ new = line[:line.find('>')]
+ new += ' xmlns="http://relaxng.org/ns/structure/1.0"'
+ if DEFAULT_NAMESPACE is not None:
+ new += '\n ns=%s' % DEFAULT_NAMESPACE
+ if DATATYPE_LIB[0]:
+ new += '\n datatypeLibrary=%s' % DATATYPE_LIB[1]
+ for ns, url in OTHER_NAMESPACE.items():
+ new += '\n xmlns:%s=%s' % (ns, url)
+ new += '>'
+ lines[i] = new
+ break
+ return '\n'.join(lines)
+
+ def nest_annotations(self, lines):
+ "Nest any top annotation within first element"
+ top_annotations = []
+ for i, line in enumerate(lines[:]):
+ if line.find('<a:') >= 0:
+ top_annotations.append(line)
+ del lines[i]
+ else:
+ ltpos = line.find('<')
+ if ltpos >= 0 and line[ltpos+1] not in ('!','?'):
+ break
+ for line in top_annotations:
+ lines.insert(i, ' '+line)
+
+def findmatch(beg, nodes, offset):
+ level = 1
+ end = PAIRS[beg][0]
+ for i,t in enumerate(nodes[offset:]):
+ if t.type == beg: level += 1
+ elif t.type == end: level -= 1
+ if level == 0:
+ return i+offset
+ raise EOFError, ("No closing token encountered for %s @ %d"
+ % (beg,offset))
+
+def match_pairs(nodes):
+ newnodes = []
+ i = 0
+ while 1:
+ if i >= len(nodes): break
+ node = nodes[i]
+ if node.type in PAIRS.keys():
+ # Look for enclosing brackets
+ match = findmatch(node.type, nodes, i+1)
+ matchtype = PAIRS[node.type][1]
+ node = Node(type=matchtype, value=nodes[i+1:match])
+ node.value = match_pairs(node.value)
+ newnodes.append(node)
+ i = match+1
+ else:
+ newnodes.append(node)
+ i += 1
+ if i >= len(nodes): break
+ if nodes[i].type in (ANY, SOME, MAYBE):
+ newnodes[-1].quant = nodes[i].type
+ i += 1
+ nodes[:] = newnodes
+ return nodes
+
+def type_bodies(nodes):
+ newnodes = []
+ i = 0
+ while 1:
+ if i >= len(nodes): break
+ if nodetypes(nodes[i:i+3]) == (ELEM, NAME, BODY) or \
+ nodetypes(nodes[i:i+3]) == (ATTR, NAME, BODY):
+ name, body = nodes[i+1].value, nodes[i+2]
+ value, quant = type_bodies(body.value), body.quant
+ node = Node(nodes[i].type, value, name, quant)
+ newnodes.append(node)
+ i += 3
+ elif nodetypes(nodes[i:i+2]) == (DATATAG, PATTERN):
+ node = Node(DATATAG, nodes[i+1].value, nodes[i].value)
+ newnodes.append(node)
+ i += 2
+ elif nodes[i] == DEFINE:
+ print nodes[i:]
+ else:
+ if nodes[i].type == GROUP: # Recurse into groups
+ value = type_bodies(nodes[i].value)
+ nodes[i] = Node(GROUP, value, None, nodes[i].quant)
+ newnodes.append(nodes[i])
+ i += 1
+ nodes[:] = newnodes
+ return nodes
+
+def nest_defines(nodes):
+ "Attach groups to named patterns"
+ newnodes = []
+ i = 0
+ while 1:
+ if i >= len(nodes): break
+ node = nodes[i]
+ newnodes.append(node)
+ if node.type == DEFINE:
+ group = []
+ while (i+1) < len(nodes) and nodes[i+1].type <> DEFINE:
+ group.append(nodes[i+1])
+ i += 1
+ node.name = node.value
+ node.value = Node(GROUP, group)
+ i += 1
+ nodes[:] = newnodes
+ return nodes
+
+def intersperse(nodes):
+ "Look for interleaved, choice, or sequential nodes in groups/bodies"
+ for node in nodes:
+ if node.type in (ELEM, ATTR, GROUP, LITERAL):
+ val = node.value
+ ntypes = [n.type for n in val if not isinstance(val,str)]
+ inters = [t for t in ntypes if t in (INTERLEAVE,CHOICE,SEQ)]
+ inters = dict(zip(inters,[0]*len(inters)))
+ if len(inters) > 1:
+ raise ParseError, "Ambiguity in sequencing: %s" % node
+ if len(inters) > 0:
+ intertype = inters.keys()[0]
+ items = []
+ for pat in node.value:
+ if pat.type <> intertype:
+ items.append(pat)
+ node.value = Node(intertype, items)
+ if not isinstance(node.value, str): # No recurse to terminal str
+ intersperse(node.value)
+ return nodes
+
+def scan_NS(nodes):
+ "Look for any namespace configuration lines"
+ global DEFAULT_NAMESPACE, OTHER_NAMESPACE, CONTEXT_FREE
+ for node in nodes:
+ if node.type == DEFAULT_NS:
+ DEFAULT_NAMESPACE = node.value
+ elif node.type == NS:
+ ns, url = map(str.strip, node.value.split('='))
+ OTHER_NAMESPACE[ns] = url
+ elif node.type == ANNOTATION and not OTHER_NAMESPACE.has_key('a'):
+ OTHER_NAMESPACE['a'] =\
+ '"http://relaxng.org/ns/compatibility/annotations/1.0"'
+ elif node.type == DATATYPES:
+ DATATYPE_LIB[:] = [1, node.value]
+ elif node.type == START:
+ CONTEXT_FREE = 1
+
+def make_nodetree(tokens):
+ nodes = toNodes(tokens)
+ match_pairs(nodes)
+ type_bodies(nodes)
+ nest_defines(nodes)
+ intersperse(nodes)
+ scan_NS(nodes)
+ root = Node(ROOT, nodes)
+ return root
+
+if __name__=='__main__':
+ make_nodetree(token_list(sys.stdin.read())).prettyprint()
+