#!/usr/bin/env python # Convert an RELAX NG compact syntax schema to a Node tree # This file released to the Public Domain by David Mertz from __future__ import generators import sys from rnc_tokenize import token_list class ParseError(SyntaxError): pass for t in """ ANY SOME MAYBE ONE BODY ANNOTATION ELEM ATTR GROUP LITERAL NAME COMMENT TEXT EMPTY INTERLEAVE CHOICE SEQ ROOT DEFAULT_NS NS DATATYPES DATATAG PATTERN START DEFINE """.split(): globals()[t] = t PAIRS = {'BEG_BODY': ('END_BODY', BODY), 'BEG_PAREN': ('END_PAREN', GROUP), 'BEG_ANNO': ('END_ANNO', ANNOTATION)} TAGS = {ONE: 'group', SOME: 'oneOrMore', MAYBE: 'optional', ANY: 'zeroOrMore'} DEFAULT_NAMESPACE = None DATATYPE_LIB = [0, '"http://www.w3.org/2001/XMLSchema-datatypes"'] OTHER_NAMESPACE = {} CONTEXT_FREE = 0 try: enumerate except: enumerate = lambda seq: zip(range(len(seq)), seq) nodetypes = lambda nl: tuple(map(lambda n: n.type, nl)) toNodes = lambda toks: map(lambda t: Node(t.type, t.value), toks) class Node(object): __slots__ = ('type', 'value', 'name', 'quant') def __iter__(self): yield self __len__ = lambda self: 1 def __init__(self, type='', value=[], name=None, quant=ONE): self.type = type self.value = value self.name = name self.quant = quant def format(self, indent=0): out = [' ' * indent + repr(self)] write = out.append if isinstance(self.value, str): if self.type == COMMENT: write(' ' * (1 + indent) + self.value) else: for node in self.value: write(node.format(indent + 1)) return '\n'.join(out) def prettyprint(self): print self.format() def toxml(self): if CONTEXT_FREE: out = [] write = out.append write('') write('') self.type = None write(self.xmlnode(1)) write('') return self.add_ns('\n'.join(out)) else: return self.add_ns(self.xmlnode()) def xmlnode(self, indent=0): out = [] write = out.append if self.type == ROOT: write('') for x in self.value: if not isinstance(x, Node): raise TypeError("Unhappy Node.value: " + repr(x)) elif x.type == START: startelem = '' % x.value write(' ' * indent + startelem) elif x.type == DEFINE: write(' ' * indent + '' % x.name) write(x.xmlnode(indent + 1)) write(' ' * indent + '') elif x.type == NAME: write(' ' * indent + '' % x.value) elif x.type == COMMENT: write(' ' * indent + '' % x.value) elif x.type == LITERAL: write(' ' * indent + '%s' % x.value) elif x.type == ANNOTATION: write(' ' * indent + '%s' % x.value) elif x.type == INTERLEAVE: write(' ' * indent + '') write(x.xmlnode(indent + 1)) write(' ' * indent + '') elif x.type == SEQ: write(x.xmlnode(indent + 1)) elif x.type == CHOICE: write(' ' * indent + '') write(x.xmlnode(indent + 1)) write(' ' * indent + '') elif x.type == GROUP: write(x.xmlnode(indent)) elif x.type == TEXT: write(' ' * indent + '') elif x.type == EMPTY: write(' ' * indent + '') elif x.type == DATATAG: DATATYPE_LIB[0] = 1 # Use datatypes if x.name is None: # no paramaters write(' ' * indent + '' % x.value) else: write(' ' * indent + '' % x.name) p = '%s' % x.value write(' ' * (indent + 1) + p) write(' ' * indent + '') elif x.type == ELEM: if x.quant == ONE: write(' ' * indent + '' % x.name) write(x.xmlnode(indent + 1)) write(' ' * indent + '') else: write(' ' * indent + '<%s>' % TAGS[x.quant]) write(' ' * (indent + 1) + '' % x.name) write(x.xmlnode(indent + 2)) write(' ' * (indent + 1) + '') write(' ' * indent + '' % TAGS[x.quant]) elif x.type == ATTR: if x.value[0].type == TEXT: write(' ' * indent + '' % x.name) elif x.value[0].type == EMPTY: write(' ' * indent + '' % x.name) write(' ' * (indent + 1) + '') write(' ' * indent + '') return '\n'.join(out) def __repr__(self): return "Node(%s,%s,%s)[%d]" % (self.type, self.name, self.quant, len(self.value)) def add_ns(self, xml): "Add namespace attributes to top level element" lines = xml.split('\n') self.nest_annotations(lines) # annots not allowed before root elem for i, line in enumerate(lines): ltpos = line.find('<') if ltpos >= 0 and line[ltpos + 1] not in ('!', '?'): # We've got an element tag, not PI or comment new = line[:line.find('>')] new += ' xmlns="http://relaxng.org/ns/structure/1.0"' if DEFAULT_NAMESPACE is not None: new += '\n ns=%s' % DEFAULT_NAMESPACE if DATATYPE_LIB[0]: new += '\n datatypeLibrary=%s' % DATATYPE_LIB[1] for ns, url in OTHER_NAMESPACE.items(): new += '\n xmlns:%s=%s' % (ns, url) new += '>' lines[i] = new break return '\n'.join(lines) def nest_annotations(self, lines): "Nest any top annotation within first element" top_annotations = [] for i, line in enumerate(lines[:]): if line.find('= 0: top_annotations.append(line) del lines[i] else: ltpos = line.find('<') if ltpos >= 0 and line[ltpos + 1] not in ('!', '?'): break for line in top_annotations: lines.insert(i, ' ' + line) def findmatch(beg, nodes, offset): level = 1 end = PAIRS[beg][0] for i, t in enumerate(nodes[offset:]): if t.type == beg: level += 1 elif t.type == end: level -= 1 if level == 0: return i + offset raise EOFError("No closing token encountered for %s @ %d" % (beg, offset)) def match_pairs(nodes): newnodes = [] i = 0 while 1: if i >= len(nodes): break node = nodes[i] if node.type in PAIRS.keys(): # Look for enclosing brackets match = findmatch(node.type, nodes, i + 1) matchtype = PAIRS[node.type][1] node = Node(type=matchtype, value=nodes[i + 1:match]) node.value = match_pairs(node.value) newnodes.append(node) i = match + 1 else: newnodes.append(node) i += 1 if i >= len(nodes): break if nodes[i].type in (ANY, SOME, MAYBE): newnodes[-1].quant = nodes[i].type i += 1 nodes[:] = newnodes return nodes def type_bodies(nodes): newnodes = [] i = 0 while 1: if i >= len(nodes): break if nodetypes(nodes[i:i + 3]) == (ELEM, NAME, BODY) or \ nodetypes(nodes[i:i + 3]) == (ATTR, NAME, BODY): name, body = nodes[i + 1].value, nodes[i + 2] value, quant = type_bodies(body.value), body.quant node = Node(nodes[i].type, value, name, quant) newnodes.append(node) i += 3 elif nodetypes(nodes[i:i + 2]) == (DATATAG, PATTERN): node = Node(DATATAG, nodes[i + 1].value, nodes[i].value) newnodes.append(node) i += 2 elif nodes[i] == DEFINE: print nodes[i:] else: if nodes[i].type == GROUP: # Recurse into groups value = type_bodies(nodes[i].value) nodes[i] = Node(GROUP, value, None, nodes[i].quant) newnodes.append(nodes[i]) i += 1 nodes[:] = newnodes return nodes def nest_defines(nodes): "Attach groups to named patterns" newnodes = [] i = 0 while 1: if i >= len(nodes): break node = nodes[i] newnodes.append(node) if node.type == DEFINE: group = [] while (i + 1) < len(nodes) and nodes[i + 1].type != DEFINE: group.append(nodes[i + 1]) i += 1 node.name = node.value node.value = Node(GROUP, group) i += 1 nodes[:] = newnodes return nodes def intersperse(nodes): "Look for interleaved, choice, or sequential nodes in groups/bodies" for node in nodes: if node.type in (ELEM, ATTR, GROUP, LITERAL): val = node.value ntypes = [n.type for n in val if not isinstance(val, str)] inters = [t for t in ntypes if t in (INTERLEAVE, CHOICE, SEQ)] inters = dict(zip(inters, [0] * len(inters))) if len(inters) > 1: raise ParseError("Ambiguity in sequencing: %s" % node) if len(inters) > 0: intertype = inters.keys()[0] items = [] for pat in node.value: if pat.type != intertype: items.append(pat) node.value = Node(intertype, items) if not isinstance(node.value, str): # No recurse to terminal str intersperse(node.value) return nodes def scan_NS(nodes): "Look for any namespace configuration lines" global DEFAULT_NAMESPACE, OTHER_NAMESPACE, CONTEXT_FREE for node in nodes: if node.type == DEFAULT_NS: DEFAULT_NAMESPACE = node.value elif node.type == NS: ns, url = map(str.strip, node.value.split('=')) OTHER_NAMESPACE[ns] = url elif node.type == ANNOTATION and 'a' not in OTHER_NAMESPACE: OTHER_NAMESPACE['a'] =\ '"http://relaxng.org/ns/compatibility/annotations/1.0"' elif node.type == DATATYPES: DATATYPE_LIB[:] = [1, node.value] elif node.type == START: CONTEXT_FREE = 1 def make_nodetree(tokens): nodes = toNodes(tokens) match_pairs(nodes) type_bodies(nodes) nest_defines(nodes) intersperse(nodes) scan_NS(nodes) root = Node(ROOT, nodes) return root if __name__ == '__main__': make_nodetree(token_list(sys.stdin.read())).prettyprint()