diff options
Diffstat (limited to 'rnctree.py')
-rwxr-xr-x | rnctree.py | 648 |
1 files changed, 324 insertions, 324 deletions
@@ -1,324 +1,324 @@ -#!/usr/bin/env python
-# Convert an RELAX NG compact syntax schema to a Node tree
-# This file released to the Public Domain by David Mertz
-from __future__ import generators
-import sys
-from rnc_tokenize import token_list
-
-
-class ParseError(SyntaxError): pass
-
-for t in """
- ANY SOME MAYBE ONE BODY ANNOTATION ELEM ATTR GROUP LITERAL
- NAME COMMENT TEXT EMPTY INTERLEAVE CHOICE SEQ ROOT
- DEFAULT_NS NS DATATYPES DATATAG PATTERN START DEFINE
- """.split(): globals()[t] = t
-
-PAIRS = {'BEG_BODY': ('END_BODY', BODY),
- 'BEG_PAREN': ('END_PAREN', GROUP),
- 'BEG_ANNO': ('END_ANNO', ANNOTATION)}
-
-TAGS = {ONE: 'group',
- SOME: 'oneOrMore',
- MAYBE: 'optional',
- ANY: 'zeroOrMore'}
-
-DEFAULT_NAMESPACE = None
-DATATYPE_LIB = [0, '"http://www.w3.org/2001/XMLSchema-datatypes"']
-OTHER_NAMESPACE = {}
-CONTEXT_FREE = 0
-
-try: enumerate
-except: enumerate = lambda seq: zip(range(len(seq)), seq)
-nodetypes = lambda nl: tuple(map(lambda n: n.type, nl))
-toNodes = lambda toks: map(lambda t: Node(t.type, t.value), toks)
-
-
-class Node(object):
- __slots__ = ('type', 'value', 'name', 'quant')
-
- def __iter__(self): yield self
- __len__ = lambda self: 1
-
- def __init__(self, type='', value=[], name=None, quant=ONE):
- self.type = type
- self.value = value
- self.name = name
- self.quant = quant
-
- def format(self, indent=0):
- out = [' ' * indent + repr(self)]
- write = out.append
- if isinstance(self.value, str):
- if self.type == COMMENT:
- write(' ' * (1 + indent) + self.value)
- else:
- for node in self.value:
- write(node.format(indent + 1))
- return '\n'.join(out)
-
- def prettyprint(self):
- print self.format()
-
- def toxml(self):
- if CONTEXT_FREE:
- out = []
- write = out.append
- write('<?xml version="1.0" encoding="UTF-8"?>')
- write('<grammar>')
- self.type = None
- write(self.xmlnode(1))
- write('</grammar>')
- return self.add_ns('\n'.join(out))
- else:
- return self.add_ns(self.xmlnode())
-
- def xmlnode(self, indent=0):
- out = []
- write = out.append
- if self.type == ROOT:
- write('<?xml version="1.0" encoding="UTF-8"?>')
-
- for x in self.value:
- if not isinstance(x, Node):
- raise TypeError("Unhappy Node.value: " + repr(x))
- elif x.type == START:
- startelem = '<start><ref name="%s"/></start>' % x.value
- write(' ' * indent + startelem)
- elif x.type == DEFINE:
- write(' ' * indent + '<define name="%s">' % x.name)
- write(x.xmlnode(indent + 1))
- write(' ' * indent + '</define>')
- elif x.type == NAME:
- write(' ' * indent + '<ref name="%s"/>' % x.value)
- elif x.type == COMMENT:
- write(' ' * indent + '<!-- %s -->' % x.value)
- elif x.type == LITERAL:
- write(' ' * indent + '<value>%s</value>' % x.value)
- elif x.type == ANNOTATION:
- write(' ' * indent
- + '<a:documentation>%s</a:documentation>' % x.value)
- elif x.type == INTERLEAVE:
- write(' ' * indent + '<interleave>')
- write(x.xmlnode(indent + 1))
- write(' ' * indent + '</interleave>')
- elif x.type == SEQ:
- write(x.xmlnode(indent + 1))
- elif x.type == CHOICE:
- write(' ' * indent + '<choice>')
- write(x.xmlnode(indent + 1))
- write(' ' * indent + '</choice>')
- elif x.type == GROUP:
- write(x.xmlnode(indent))
- elif x.type == TEXT:
- write(' ' * indent + '<text/>')
- elif x.type == EMPTY:
- write(' ' * indent + '<empty/>')
- elif x.type == DATATAG:
- DATATYPE_LIB[0] = 1 # Use datatypes
- if x.name is None: # no paramaters
- write(' ' * indent + '<data type="%s"/>' % x.value)
- else:
- write(' ' * indent + '<data type="%s">' % x.name)
- p = '<param name="pattern">%s</param>' % x.value
- write(' ' * (indent + 1) + p)
- write(' ' * indent + '</data>')
- elif x.type == ELEM:
- if x.quant == ONE:
- write(' ' * indent + '<element name="%s">' % x.name)
- write(x.xmlnode(indent + 1))
- write(' ' * indent + '</element>')
- else:
- write(' ' * indent + '<%s>' % TAGS[x.quant])
- write(' ' * (indent + 1) + '<element name="%s">' % x.name)
- write(x.xmlnode(indent + 2))
- write(' ' * (indent + 1) + '</element>')
- write(' ' * indent + '</%s>' % TAGS[x.quant])
- elif x.type == ATTR:
- if x.value[0].type == TEXT:
- write(' ' * indent + '<attribute name="%s"/>' % x.name)
- elif x.value[0].type == EMPTY:
- write(' ' * indent + '<attribute name="%s">' % x.name)
- write(' ' * (indent + 1) + '<empty/>')
- write(' ' * indent + '</attribute>')
-
- return '\n'.join(out)
-
- def __repr__(self):
- return "Node(%s,%s,%s)[%d]" % (self.type, self.name,
- self.quant, len(self.value))
-
- def add_ns(self, xml):
- "Add namespace attributes to top level element"
- lines = xml.split('\n')
- self.nest_annotations(lines) # annots not allowed before root elem
- for i, line in enumerate(lines):
- ltpos = line.find('<')
- if ltpos >= 0 and line[ltpos + 1] not in ('!', '?'):
- # We've got an element tag, not PI or comment
- new = line[:line.find('>')]
- new += ' xmlns="http://relaxng.org/ns/structure/1.0"'
- if DEFAULT_NAMESPACE is not None:
- new += '\n ns=%s' % DEFAULT_NAMESPACE
- if DATATYPE_LIB[0]:
- new += '\n datatypeLibrary=%s' % DATATYPE_LIB[1]
- for ns, url in OTHER_NAMESPACE.items():
- new += '\n xmlns:%s=%s' % (ns, url)
- new += '>'
- lines[i] = new
- break
- return '\n'.join(lines)
-
- def nest_annotations(self, lines):
- "Nest any top annotation within first element"
- top_annotations = []
- for i, line in enumerate(lines[:]):
- if line.find('<a:') >= 0:
- top_annotations.append(line)
- del lines[i]
- else:
- ltpos = line.find('<')
- if ltpos >= 0 and line[ltpos + 1] not in ('!', '?'):
- break
- for line in top_annotations:
- lines.insert(i, ' ' + line)
-
-
-def findmatch(beg, nodes, offset):
- level = 1
- end = PAIRS[beg][0]
- for i, t in enumerate(nodes[offset:]):
- if t.type == beg: level += 1
- elif t.type == end: level -= 1
- if level == 0:
- return i + offset
- raise EOFError("No closing token encountered for %s @ %d"
- % (beg, offset))
-
-
-def match_pairs(nodes):
- newnodes = []
- i = 0
- while 1:
- if i >= len(nodes): break
- node = nodes[i]
- if node.type in PAIRS.keys():
- # Look for enclosing brackets
- match = findmatch(node.type, nodes, i + 1)
- matchtype = PAIRS[node.type][1]
- node = Node(type=matchtype, value=nodes[i + 1:match])
- node.value = match_pairs(node.value)
- newnodes.append(node)
- i = match + 1
- else:
- newnodes.append(node)
- i += 1
- if i >= len(nodes): break
- if nodes[i].type in (ANY, SOME, MAYBE):
- newnodes[-1].quant = nodes[i].type
- i += 1
- nodes[:] = newnodes
- return nodes
-
-
-def type_bodies(nodes):
- newnodes = []
- i = 0
- while 1:
- if i >= len(nodes): break
- if nodetypes(nodes[i:i + 3]) == (ELEM, NAME, BODY) or \
- nodetypes(nodes[i:i + 3]) == (ATTR, NAME, BODY):
- name, body = nodes[i + 1].value, nodes[i + 2]
- value, quant = type_bodies(body.value), body.quant
- node = Node(nodes[i].type, value, name, quant)
- newnodes.append(node)
- i += 3
- elif nodetypes(nodes[i:i + 2]) == (DATATAG, PATTERN):
- node = Node(DATATAG, nodes[i + 1].value, nodes[i].value)
- newnodes.append(node)
- i += 2
- elif nodes[i] == DEFINE:
- print nodes[i:]
- else:
- if nodes[i].type == GROUP: # Recurse into groups
- value = type_bodies(nodes[i].value)
- nodes[i] = Node(GROUP, value, None, nodes[i].quant)
- newnodes.append(nodes[i])
- i += 1
- nodes[:] = newnodes
- return nodes
-
-
-def nest_defines(nodes):
- "Attach groups to named patterns"
- newnodes = []
- i = 0
- while 1:
- if i >= len(nodes): break
- node = nodes[i]
- newnodes.append(node)
- if node.type == DEFINE:
- group = []
- while (i + 1) < len(nodes) and nodes[i + 1].type != DEFINE:
- group.append(nodes[i + 1])
- i += 1
- node.name = node.value
- node.value = Node(GROUP, group)
- i += 1
- nodes[:] = newnodes
- return nodes
-
-
-def intersperse(nodes):
- "Look for interleaved, choice, or sequential nodes in groups/bodies"
- for node in nodes:
- if node.type in (ELEM, ATTR, GROUP, LITERAL):
- val = node.value
- ntypes = [n.type for n in val if not isinstance(val, str)]
- inters = [t for t in ntypes if t in (INTERLEAVE, CHOICE, SEQ)]
- inters = dict(zip(inters, [0] * len(inters)))
- if len(inters) > 1:
- raise ParseError("Ambiguity in sequencing: %s" % node)
- if len(inters) > 0:
- intertype = inters.keys()[0]
- items = []
- for pat in node.value:
- if pat.type != intertype:
- items.append(pat)
- node.value = Node(intertype, items)
- if not isinstance(node.value, str): # No recurse to terminal str
- intersperse(node.value)
- return nodes
-
-
-def scan_NS(nodes):
- "Look for any namespace configuration lines"
- global DEFAULT_NAMESPACE, OTHER_NAMESPACE, CONTEXT_FREE
- for node in nodes:
- if node.type == DEFAULT_NS:
- DEFAULT_NAMESPACE = node.value
- elif node.type == NS:
- ns, url = map(str.strip, node.value.split('='))
- OTHER_NAMESPACE[ns] = url
- elif node.type == ANNOTATION and 'a' not in OTHER_NAMESPACE:
- OTHER_NAMESPACE['a'] =\
- '"http://relaxng.org/ns/compatibility/annotations/1.0"'
- elif node.type == DATATYPES:
- DATATYPE_LIB[:] = [1, node.value]
- elif node.type == START:
- CONTEXT_FREE = 1
-
-
-def make_nodetree(tokens):
- nodes = toNodes(tokens)
- match_pairs(nodes)
- type_bodies(nodes)
- nest_defines(nodes)
- intersperse(nodes)
- scan_NS(nodes)
- root = Node(ROOT, nodes)
- return root
-
-
-if __name__ == '__main__':
- make_nodetree(token_list(sys.stdin.read())).prettyprint()
+#!/usr/bin/env python +# Convert an RELAX NG compact syntax schema to a Node tree +# This file released to the Public Domain by David Mertz +from __future__ import generators +import sys +from rnc_tokenize import token_list + + +class ParseError(SyntaxError): pass + +for t in """ + ANY SOME MAYBE ONE BODY ANNOTATION ELEM ATTR GROUP LITERAL + NAME COMMENT TEXT EMPTY INTERLEAVE CHOICE SEQ ROOT + DEFAULT_NS NS DATATYPES DATATAG PATTERN START DEFINE + """.split(): globals()[t] = t + +PAIRS = {'BEG_BODY': ('END_BODY', BODY), + 'BEG_PAREN': ('END_PAREN', GROUP), + 'BEG_ANNO': ('END_ANNO', ANNOTATION)} + +TAGS = {ONE: 'group', + SOME: 'oneOrMore', + MAYBE: 'optional', + ANY: 'zeroOrMore'} + +DEFAULT_NAMESPACE = None +DATATYPE_LIB = [0, '"http://www.w3.org/2001/XMLSchema-datatypes"'] +OTHER_NAMESPACE = {} +CONTEXT_FREE = 0 + +try: enumerate +except: enumerate = lambda seq: zip(range(len(seq)), seq) +nodetypes = lambda nl: tuple(map(lambda n: n.type, nl)) +toNodes = lambda toks: map(lambda t: Node(t.type, t.value), toks) + + +class Node(object): + __slots__ = ('type', 'value', 'name', 'quant') + + def __iter__(self): yield self + __len__ = lambda self: 1 + + def __init__(self, type='', value=[], name=None, quant=ONE): + self.type = type + self.value = value + self.name = name + self.quant = quant + + def format(self, indent=0): + out = [' ' * indent + repr(self)] + write = out.append + if isinstance(self.value, str): + if self.type == COMMENT: + write(' ' * (1 + indent) + self.value) + else: + for node in self.value: + write(node.format(indent + 1)) + return '\n'.join(out) + + def prettyprint(self): + print self.format() + + def toxml(self): + if CONTEXT_FREE: + out = [] + write = out.append + write('<?xml version="1.0" encoding="UTF-8"?>') + write('<grammar>') + self.type = None + write(self.xmlnode(1)) + write('</grammar>') + return self.add_ns('\n'.join(out)) + else: + return self.add_ns(self.xmlnode()) + + def xmlnode(self, indent=0): + out = [] + write = out.append + if self.type == ROOT: + write('<?xml version="1.0" encoding="UTF-8"?>') + + for x in self.value: + if not isinstance(x, Node): + raise TypeError("Unhappy Node.value: " + repr(x)) + elif x.type == START: + startelem = '<start><ref name="%s"/></start>' % x.value + write(' ' * indent + startelem) + elif x.type == DEFINE: + write(' ' * indent + '<define name="%s">' % x.name) + write(x.xmlnode(indent + 1)) + write(' ' * indent + '</define>') + elif x.type == NAME: + write(' ' * indent + '<ref name="%s"/>' % x.value) + elif x.type == COMMENT: + write(' ' * indent + '<!-- %s -->' % x.value) + elif x.type == LITERAL: + write(' ' * indent + '<value>%s</value>' % x.value) + elif x.type == ANNOTATION: + write(' ' * indent + + '<a:documentation>%s</a:documentation>' % x.value) + elif x.type == INTERLEAVE: + write(' ' * indent + '<interleave>') + write(x.xmlnode(indent + 1)) + write(' ' * indent + '</interleave>') + elif x.type == SEQ: + write(x.xmlnode(indent + 1)) + elif x.type == CHOICE: + write(' ' * indent + '<choice>') + write(x.xmlnode(indent + 1)) + write(' ' * indent + '</choice>') + elif x.type == GROUP: + write(x.xmlnode(indent)) + elif x.type == TEXT: + write(' ' * indent + '<text/>') + elif x.type == EMPTY: + write(' ' * indent + '<empty/>') + elif x.type == DATATAG: + DATATYPE_LIB[0] = 1 # Use datatypes + if x.name is None: # no paramaters + write(' ' * indent + '<data type="%s"/>' % x.value) + else: + write(' ' * indent + '<data type="%s">' % x.name) + p = '<param name="pattern">%s</param>' % x.value + write(' ' * (indent + 1) + p) + write(' ' * indent + '</data>') + elif x.type == ELEM: + if x.quant == ONE: + write(' ' * indent + '<element name="%s">' % x.name) + write(x.xmlnode(indent + 1)) + write(' ' * indent + '</element>') + else: + write(' ' * indent + '<%s>' % TAGS[x.quant]) + write(' ' * (indent + 1) + '<element name="%s">' % x.name) + write(x.xmlnode(indent + 2)) + write(' ' * (indent + 1) + '</element>') + write(' ' * indent + '</%s>' % TAGS[x.quant]) + elif x.type == ATTR: + if x.value[0].type == TEXT: + write(' ' * indent + '<attribute name="%s"/>' % x.name) + elif x.value[0].type == EMPTY: + write(' ' * indent + '<attribute name="%s">' % x.name) + write(' ' * (indent + 1) + '<empty/>') + write(' ' * indent + '</attribute>') + + return '\n'.join(out) + + def __repr__(self): + return "Node(%s,%s,%s)[%d]" % (self.type, self.name, + self.quant, len(self.value)) + + def add_ns(self, xml): + "Add namespace attributes to top level element" + lines = xml.split('\n') + self.nest_annotations(lines) # annots not allowed before root elem + for i, line in enumerate(lines): + ltpos = line.find('<') + if ltpos >= 0 and line[ltpos + 1] not in ('!', '?'): + # We've got an element tag, not PI or comment + new = line[:line.find('>')] + new += ' xmlns="http://relaxng.org/ns/structure/1.0"' + if DEFAULT_NAMESPACE is not None: + new += '\n ns=%s' % DEFAULT_NAMESPACE + if DATATYPE_LIB[0]: + new += '\n datatypeLibrary=%s' % DATATYPE_LIB[1] + for ns, url in OTHER_NAMESPACE.items(): + new += '\n xmlns:%s=%s' % (ns, url) + new += '>' + lines[i] = new + break + return '\n'.join(lines) + + def nest_annotations(self, lines): + "Nest any top annotation within first element" + top_annotations = [] + for i, line in enumerate(lines[:]): + if line.find('<a:') >= 0: + top_annotations.append(line) + del lines[i] + else: + ltpos = line.find('<') + if ltpos >= 0 and line[ltpos + 1] not in ('!', '?'): + break + for line in top_annotations: + lines.insert(i, ' ' + line) + + +def findmatch(beg, nodes, offset): + level = 1 + end = PAIRS[beg][0] + for i, t in enumerate(nodes[offset:]): + if t.type == beg: level += 1 + elif t.type == end: level -= 1 + if level == 0: + return i + offset + raise EOFError("No closing token encountered for %s @ %d" + % (beg, offset)) + + +def match_pairs(nodes): + newnodes = [] + i = 0 + while 1: + if i >= len(nodes): break + node = nodes[i] + if node.type in PAIRS.keys(): + # Look for enclosing brackets + match = findmatch(node.type, nodes, i + 1) + matchtype = PAIRS[node.type][1] + node = Node(type=matchtype, value=nodes[i + 1:match]) + node.value = match_pairs(node.value) + newnodes.append(node) + i = match + 1 + else: + newnodes.append(node) + i += 1 + if i >= len(nodes): break + if nodes[i].type in (ANY, SOME, MAYBE): + newnodes[-1].quant = nodes[i].type + i += 1 + nodes[:] = newnodes + return nodes + + +def type_bodies(nodes): + newnodes = [] + i = 0 + while 1: + if i >= len(nodes): break + if nodetypes(nodes[i:i + 3]) == (ELEM, NAME, BODY) or \ + nodetypes(nodes[i:i + 3]) == (ATTR, NAME, BODY): + name, body = nodes[i + 1].value, nodes[i + 2] + value, quant = type_bodies(body.value), body.quant + node = Node(nodes[i].type, value, name, quant) + newnodes.append(node) + i += 3 + elif nodetypes(nodes[i:i + 2]) == (DATATAG, PATTERN): + node = Node(DATATAG, nodes[i + 1].value, nodes[i].value) + newnodes.append(node) + i += 2 + elif nodes[i] == DEFINE: + print nodes[i:] + else: + if nodes[i].type == GROUP: # Recurse into groups + value = type_bodies(nodes[i].value) + nodes[i] = Node(GROUP, value, None, nodes[i].quant) + newnodes.append(nodes[i]) + i += 1 + nodes[:] = newnodes + return nodes + + +def nest_defines(nodes): + "Attach groups to named patterns" + newnodes = [] + i = 0 + while 1: + if i >= len(nodes): break + node = nodes[i] + newnodes.append(node) + if node.type == DEFINE: + group = [] + while (i + 1) < len(nodes) and nodes[i + 1].type != DEFINE: + group.append(nodes[i + 1]) + i += 1 + node.name = node.value + node.value = Node(GROUP, group) + i += 1 + nodes[:] = newnodes + return nodes + + +def intersperse(nodes): + "Look for interleaved, choice, or sequential nodes in groups/bodies" + for node in nodes: + if node.type in (ELEM, ATTR, GROUP, LITERAL): + val = node.value + ntypes = [n.type for n in val if not isinstance(val, str)] + inters = [t for t in ntypes if t in (INTERLEAVE, CHOICE, SEQ)] + inters = dict(zip(inters, [0] * len(inters))) + if len(inters) > 1: + raise ParseError("Ambiguity in sequencing: %s" % node) + if len(inters) > 0: + intertype = inters.keys()[0] + items = [] + for pat in node.value: + if pat.type != intertype: + items.append(pat) + node.value = Node(intertype, items) + if not isinstance(node.value, str): # No recurse to terminal str + intersperse(node.value) + return nodes + + +def scan_NS(nodes): + "Look for any namespace configuration lines" + global DEFAULT_NAMESPACE, OTHER_NAMESPACE, CONTEXT_FREE + for node in nodes: + if node.type == DEFAULT_NS: + DEFAULT_NAMESPACE = node.value + elif node.type == NS: + ns, url = map(str.strip, node.value.split('=')) + OTHER_NAMESPACE[ns] = url + elif node.type == ANNOTATION and 'a' not in OTHER_NAMESPACE: + OTHER_NAMESPACE['a'] =\ + '"http://relaxng.org/ns/compatibility/annotations/1.0"' + elif node.type == DATATYPES: + DATATYPE_LIB[:] = [1, node.value] + elif node.type == START: + CONTEXT_FREE = 1 + + +def make_nodetree(tokens): + nodes = toNodes(tokens) + match_pairs(nodes) + type_bodies(nodes) + nest_defines(nodes) + intersperse(nodes) + scan_NS(nodes) + root = Node(ROOT, nodes) + return root + + +if __name__ == '__main__': + make_nodetree(token_list(sys.stdin.read())).prettyprint() |