summaryrefslogtreecommitdiffstats
path: root/rnctree.py
diff options
context:
space:
mode:
Diffstat (limited to 'rnctree.py')
-rwxr-xr-xrnctree.py648
1 files changed, 324 insertions, 324 deletions
diff --git a/rnctree.py b/rnctree.py
index fb78d05..ebb9433 100755
--- a/rnctree.py
+++ b/rnctree.py
@@ -1,324 +1,324 @@
-#!/usr/bin/env python
-# Convert an RELAX NG compact syntax schema to a Node tree
-# This file released to the Public Domain by David Mertz
-from __future__ import generators
-import sys
-from rnc_tokenize import token_list
-
-
-class ParseError(SyntaxError): pass
-
-for t in """
- ANY SOME MAYBE ONE BODY ANNOTATION ELEM ATTR GROUP LITERAL
- NAME COMMENT TEXT EMPTY INTERLEAVE CHOICE SEQ ROOT
- DEFAULT_NS NS DATATYPES DATATAG PATTERN START DEFINE
- """.split(): globals()[t] = t
-
-PAIRS = {'BEG_BODY': ('END_BODY', BODY),
- 'BEG_PAREN': ('END_PAREN', GROUP),
- 'BEG_ANNO': ('END_ANNO', ANNOTATION)}
-
-TAGS = {ONE: 'group',
- SOME: 'oneOrMore',
- MAYBE: 'optional',
- ANY: 'zeroOrMore'}
-
-DEFAULT_NAMESPACE = None
-DATATYPE_LIB = [0, '"http://www.w3.org/2001/XMLSchema-datatypes"']
-OTHER_NAMESPACE = {}
-CONTEXT_FREE = 0
-
-try: enumerate
-except: enumerate = lambda seq: zip(range(len(seq)), seq)
-nodetypes = lambda nl: tuple(map(lambda n: n.type, nl))
-toNodes = lambda toks: map(lambda t: Node(t.type, t.value), toks)
-
-
-class Node(object):
- __slots__ = ('type', 'value', 'name', 'quant')
-
- def __iter__(self): yield self
- __len__ = lambda self: 1
-
- def __init__(self, type='', value=[], name=None, quant=ONE):
- self.type = type
- self.value = value
- self.name = name
- self.quant = quant
-
- def format(self, indent=0):
- out = [' ' * indent + repr(self)]
- write = out.append
- if isinstance(self.value, str):
- if self.type == COMMENT:
- write(' ' * (1 + indent) + self.value)
- else:
- for node in self.value:
- write(node.format(indent + 1))
- return '\n'.join(out)
-
- def prettyprint(self):
- print self.format()
-
- def toxml(self):
- if CONTEXT_FREE:
- out = []
- write = out.append
- write('<?xml version="1.0" encoding="UTF-8"?>')
- write('<grammar>')
- self.type = None
- write(self.xmlnode(1))
- write('</grammar>')
- return self.add_ns('\n'.join(out))
- else:
- return self.add_ns(self.xmlnode())
-
- def xmlnode(self, indent=0):
- out = []
- write = out.append
- if self.type == ROOT:
- write('<?xml version="1.0" encoding="UTF-8"?>')
-
- for x in self.value:
- if not isinstance(x, Node):
- raise TypeError("Unhappy Node.value: " + repr(x))
- elif x.type == START:
- startelem = '<start><ref name="%s"/></start>' % x.value
- write(' ' * indent + startelem)
- elif x.type == DEFINE:
- write(' ' * indent + '<define name="%s">' % x.name)
- write(x.xmlnode(indent + 1))
- write(' ' * indent + '</define>')
- elif x.type == NAME:
- write(' ' * indent + '<ref name="%s"/>' % x.value)
- elif x.type == COMMENT:
- write(' ' * indent + '<!-- %s -->' % x.value)
- elif x.type == LITERAL:
- write(' ' * indent + '<value>%s</value>' % x.value)
- elif x.type == ANNOTATION:
- write(' ' * indent
- + '<a:documentation>%s</a:documentation>' % x.value)
- elif x.type == INTERLEAVE:
- write(' ' * indent + '<interleave>')
- write(x.xmlnode(indent + 1))
- write(' ' * indent + '</interleave>')
- elif x.type == SEQ:
- write(x.xmlnode(indent + 1))
- elif x.type == CHOICE:
- write(' ' * indent + '<choice>')
- write(x.xmlnode(indent + 1))
- write(' ' * indent + '</choice>')
- elif x.type == GROUP:
- write(x.xmlnode(indent))
- elif x.type == TEXT:
- write(' ' * indent + '<text/>')
- elif x.type == EMPTY:
- write(' ' * indent + '<empty/>')
- elif x.type == DATATAG:
- DATATYPE_LIB[0] = 1 # Use datatypes
- if x.name is None: # no paramaters
- write(' ' * indent + '<data type="%s"/>' % x.value)
- else:
- write(' ' * indent + '<data type="%s">' % x.name)
- p = '<param name="pattern">%s</param>' % x.value
- write(' ' * (indent + 1) + p)
- write(' ' * indent + '</data>')
- elif x.type == ELEM:
- if x.quant == ONE:
- write(' ' * indent + '<element name="%s">' % x.name)
- write(x.xmlnode(indent + 1))
- write(' ' * indent + '</element>')
- else:
- write(' ' * indent + '<%s>' % TAGS[x.quant])
- write(' ' * (indent + 1) + '<element name="%s">' % x.name)
- write(x.xmlnode(indent + 2))
- write(' ' * (indent + 1) + '</element>')
- write(' ' * indent + '</%s>' % TAGS[x.quant])
- elif x.type == ATTR:
- if x.value[0].type == TEXT:
- write(' ' * indent + '<attribute name="%s"/>' % x.name)
- elif x.value[0].type == EMPTY:
- write(' ' * indent + '<attribute name="%s">' % x.name)
- write(' ' * (indent + 1) + '<empty/>')
- write(' ' * indent + '</attribute>')
-
- return '\n'.join(out)
-
- def __repr__(self):
- return "Node(%s,%s,%s)[%d]" % (self.type, self.name,
- self.quant, len(self.value))
-
- def add_ns(self, xml):
- "Add namespace attributes to top level element"
- lines = xml.split('\n')
- self.nest_annotations(lines) # annots not allowed before root elem
- for i, line in enumerate(lines):
- ltpos = line.find('<')
- if ltpos >= 0 and line[ltpos + 1] not in ('!', '?'):
- # We've got an element tag, not PI or comment
- new = line[:line.find('>')]
- new += ' xmlns="http://relaxng.org/ns/structure/1.0"'
- if DEFAULT_NAMESPACE is not None:
- new += '\n ns=%s' % DEFAULT_NAMESPACE
- if DATATYPE_LIB[0]:
- new += '\n datatypeLibrary=%s' % DATATYPE_LIB[1]
- for ns, url in OTHER_NAMESPACE.items():
- new += '\n xmlns:%s=%s' % (ns, url)
- new += '>'
- lines[i] = new
- break
- return '\n'.join(lines)
-
- def nest_annotations(self, lines):
- "Nest any top annotation within first element"
- top_annotations = []
- for i, line in enumerate(lines[:]):
- if line.find('<a:') >= 0:
- top_annotations.append(line)
- del lines[i]
- else:
- ltpos = line.find('<')
- if ltpos >= 0 and line[ltpos + 1] not in ('!', '?'):
- break
- for line in top_annotations:
- lines.insert(i, ' ' + line)
-
-
-def findmatch(beg, nodes, offset):
- level = 1
- end = PAIRS[beg][0]
- for i, t in enumerate(nodes[offset:]):
- if t.type == beg: level += 1
- elif t.type == end: level -= 1
- if level == 0:
- return i + offset
- raise EOFError("No closing token encountered for %s @ %d"
- % (beg, offset))
-
-
-def match_pairs(nodes):
- newnodes = []
- i = 0
- while 1:
- if i >= len(nodes): break
- node = nodes[i]
- if node.type in PAIRS.keys():
- # Look for enclosing brackets
- match = findmatch(node.type, nodes, i + 1)
- matchtype = PAIRS[node.type][1]
- node = Node(type=matchtype, value=nodes[i + 1:match])
- node.value = match_pairs(node.value)
- newnodes.append(node)
- i = match + 1
- else:
- newnodes.append(node)
- i += 1
- if i >= len(nodes): break
- if nodes[i].type in (ANY, SOME, MAYBE):
- newnodes[-1].quant = nodes[i].type
- i += 1
- nodes[:] = newnodes
- return nodes
-
-
-def type_bodies(nodes):
- newnodes = []
- i = 0
- while 1:
- if i >= len(nodes): break
- if nodetypes(nodes[i:i + 3]) == (ELEM, NAME, BODY) or \
- nodetypes(nodes[i:i + 3]) == (ATTR, NAME, BODY):
- name, body = nodes[i + 1].value, nodes[i + 2]
- value, quant = type_bodies(body.value), body.quant
- node = Node(nodes[i].type, value, name, quant)
- newnodes.append(node)
- i += 3
- elif nodetypes(nodes[i:i + 2]) == (DATATAG, PATTERN):
- node = Node(DATATAG, nodes[i + 1].value, nodes[i].value)
- newnodes.append(node)
- i += 2
- elif nodes[i] == DEFINE:
- print nodes[i:]
- else:
- if nodes[i].type == GROUP: # Recurse into groups
- value = type_bodies(nodes[i].value)
- nodes[i] = Node(GROUP, value, None, nodes[i].quant)
- newnodes.append(nodes[i])
- i += 1
- nodes[:] = newnodes
- return nodes
-
-
-def nest_defines(nodes):
- "Attach groups to named patterns"
- newnodes = []
- i = 0
- while 1:
- if i >= len(nodes): break
- node = nodes[i]
- newnodes.append(node)
- if node.type == DEFINE:
- group = []
- while (i + 1) < len(nodes) and nodes[i + 1].type != DEFINE:
- group.append(nodes[i + 1])
- i += 1
- node.name = node.value
- node.value = Node(GROUP, group)
- i += 1
- nodes[:] = newnodes
- return nodes
-
-
-def intersperse(nodes):
- "Look for interleaved, choice, or sequential nodes in groups/bodies"
- for node in nodes:
- if node.type in (ELEM, ATTR, GROUP, LITERAL):
- val = node.value
- ntypes = [n.type for n in val if not isinstance(val, str)]
- inters = [t for t in ntypes if t in (INTERLEAVE, CHOICE, SEQ)]
- inters = dict(zip(inters, [0] * len(inters)))
- if len(inters) > 1:
- raise ParseError("Ambiguity in sequencing: %s" % node)
- if len(inters) > 0:
- intertype = inters.keys()[0]
- items = []
- for pat in node.value:
- if pat.type != intertype:
- items.append(pat)
- node.value = Node(intertype, items)
- if not isinstance(node.value, str): # No recurse to terminal str
- intersperse(node.value)
- return nodes
-
-
-def scan_NS(nodes):
- "Look for any namespace configuration lines"
- global DEFAULT_NAMESPACE, OTHER_NAMESPACE, CONTEXT_FREE
- for node in nodes:
- if node.type == DEFAULT_NS:
- DEFAULT_NAMESPACE = node.value
- elif node.type == NS:
- ns, url = map(str.strip, node.value.split('='))
- OTHER_NAMESPACE[ns] = url
- elif node.type == ANNOTATION and 'a' not in OTHER_NAMESPACE:
- OTHER_NAMESPACE['a'] =\
- '"http://relaxng.org/ns/compatibility/annotations/1.0"'
- elif node.type == DATATYPES:
- DATATYPE_LIB[:] = [1, node.value]
- elif node.type == START:
- CONTEXT_FREE = 1
-
-
-def make_nodetree(tokens):
- nodes = toNodes(tokens)
- match_pairs(nodes)
- type_bodies(nodes)
- nest_defines(nodes)
- intersperse(nodes)
- scan_NS(nodes)
- root = Node(ROOT, nodes)
- return root
-
-
-if __name__ == '__main__':
- make_nodetree(token_list(sys.stdin.read())).prettyprint()
+#!/usr/bin/env python
+# Convert an RELAX NG compact syntax schema to a Node tree
+# This file released to the Public Domain by David Mertz
+from __future__ import generators
+import sys
+from rnc_tokenize import token_list
+
+
+class ParseError(SyntaxError): pass
+
+for t in """
+ ANY SOME MAYBE ONE BODY ANNOTATION ELEM ATTR GROUP LITERAL
+ NAME COMMENT TEXT EMPTY INTERLEAVE CHOICE SEQ ROOT
+ DEFAULT_NS NS DATATYPES DATATAG PATTERN START DEFINE
+ """.split(): globals()[t] = t
+
+PAIRS = {'BEG_BODY': ('END_BODY', BODY),
+ 'BEG_PAREN': ('END_PAREN', GROUP),
+ 'BEG_ANNO': ('END_ANNO', ANNOTATION)}
+
+TAGS = {ONE: 'group',
+ SOME: 'oneOrMore',
+ MAYBE: 'optional',
+ ANY: 'zeroOrMore'}
+
+DEFAULT_NAMESPACE = None
+DATATYPE_LIB = [0, '"http://www.w3.org/2001/XMLSchema-datatypes"']
+OTHER_NAMESPACE = {}
+CONTEXT_FREE = 0
+
+try: enumerate
+except: enumerate = lambda seq: zip(range(len(seq)), seq)
+nodetypes = lambda nl: tuple(map(lambda n: n.type, nl))
+toNodes = lambda toks: map(lambda t: Node(t.type, t.value), toks)
+
+
+class Node(object):
+ __slots__ = ('type', 'value', 'name', 'quant')
+
+ def __iter__(self): yield self
+ __len__ = lambda self: 1
+
+ def __init__(self, type='', value=[], name=None, quant=ONE):
+ self.type = type
+ self.value = value
+ self.name = name
+ self.quant = quant
+
+ def format(self, indent=0):
+ out = [' ' * indent + repr(self)]
+ write = out.append
+ if isinstance(self.value, str):
+ if self.type == COMMENT:
+ write(' ' * (1 + indent) + self.value)
+ else:
+ for node in self.value:
+ write(node.format(indent + 1))
+ return '\n'.join(out)
+
+ def prettyprint(self):
+ print self.format()
+
+ def toxml(self):
+ if CONTEXT_FREE:
+ out = []
+ write = out.append
+ write('<?xml version="1.0" encoding="UTF-8"?>')
+ write('<grammar>')
+ self.type = None
+ write(self.xmlnode(1))
+ write('</grammar>')
+ return self.add_ns('\n'.join(out))
+ else:
+ return self.add_ns(self.xmlnode())
+
+ def xmlnode(self, indent=0):
+ out = []
+ write = out.append
+ if self.type == ROOT:
+ write('<?xml version="1.0" encoding="UTF-8"?>')
+
+ for x in self.value:
+ if not isinstance(x, Node):
+ raise TypeError("Unhappy Node.value: " + repr(x))
+ elif x.type == START:
+ startelem = '<start><ref name="%s"/></start>' % x.value
+ write(' ' * indent + startelem)
+ elif x.type == DEFINE:
+ write(' ' * indent + '<define name="%s">' % x.name)
+ write(x.xmlnode(indent + 1))
+ write(' ' * indent + '</define>')
+ elif x.type == NAME:
+ write(' ' * indent + '<ref name="%s"/>' % x.value)
+ elif x.type == COMMENT:
+ write(' ' * indent + '<!-- %s -->' % x.value)
+ elif x.type == LITERAL:
+ write(' ' * indent + '<value>%s</value>' % x.value)
+ elif x.type == ANNOTATION:
+ write(' ' * indent
+ + '<a:documentation>%s</a:documentation>' % x.value)
+ elif x.type == INTERLEAVE:
+ write(' ' * indent + '<interleave>')
+ write(x.xmlnode(indent + 1))
+ write(' ' * indent + '</interleave>')
+ elif x.type == SEQ:
+ write(x.xmlnode(indent + 1))
+ elif x.type == CHOICE:
+ write(' ' * indent + '<choice>')
+ write(x.xmlnode(indent + 1))
+ write(' ' * indent + '</choice>')
+ elif x.type == GROUP:
+ write(x.xmlnode(indent))
+ elif x.type == TEXT:
+ write(' ' * indent + '<text/>')
+ elif x.type == EMPTY:
+ write(' ' * indent + '<empty/>')
+ elif x.type == DATATAG:
+ DATATYPE_LIB[0] = 1 # Use datatypes
+ if x.name is None: # no paramaters
+ write(' ' * indent + '<data type="%s"/>' % x.value)
+ else:
+ write(' ' * indent + '<data type="%s">' % x.name)
+ p = '<param name="pattern">%s</param>' % x.value
+ write(' ' * (indent + 1) + p)
+ write(' ' * indent + '</data>')
+ elif x.type == ELEM:
+ if x.quant == ONE:
+ write(' ' * indent + '<element name="%s">' % x.name)
+ write(x.xmlnode(indent + 1))
+ write(' ' * indent + '</element>')
+ else:
+ write(' ' * indent + '<%s>' % TAGS[x.quant])
+ write(' ' * (indent + 1) + '<element name="%s">' % x.name)
+ write(x.xmlnode(indent + 2))
+ write(' ' * (indent + 1) + '</element>')
+ write(' ' * indent + '</%s>' % TAGS[x.quant])
+ elif x.type == ATTR:
+ if x.value[0].type == TEXT:
+ write(' ' * indent + '<attribute name="%s"/>' % x.name)
+ elif x.value[0].type == EMPTY:
+ write(' ' * indent + '<attribute name="%s">' % x.name)
+ write(' ' * (indent + 1) + '<empty/>')
+ write(' ' * indent + '</attribute>')
+
+ return '\n'.join(out)
+
+ def __repr__(self):
+ return "Node(%s,%s,%s)[%d]" % (self.type, self.name,
+ self.quant, len(self.value))
+
+ def add_ns(self, xml):
+ "Add namespace attributes to top level element"
+ lines = xml.split('\n')
+ self.nest_annotations(lines) # annots not allowed before root elem
+ for i, line in enumerate(lines):
+ ltpos = line.find('<')
+ if ltpos >= 0 and line[ltpos + 1] not in ('!', '?'):
+ # We've got an element tag, not PI or comment
+ new = line[:line.find('>')]
+ new += ' xmlns="http://relaxng.org/ns/structure/1.0"'
+ if DEFAULT_NAMESPACE is not None:
+ new += '\n ns=%s' % DEFAULT_NAMESPACE
+ if DATATYPE_LIB[0]:
+ new += '\n datatypeLibrary=%s' % DATATYPE_LIB[1]
+ for ns, url in OTHER_NAMESPACE.items():
+ new += '\n xmlns:%s=%s' % (ns, url)
+ new += '>'
+ lines[i] = new
+ break
+ return '\n'.join(lines)
+
+ def nest_annotations(self, lines):
+ "Nest any top annotation within first element"
+ top_annotations = []
+ for i, line in enumerate(lines[:]):
+ if line.find('<a:') >= 0:
+ top_annotations.append(line)
+ del lines[i]
+ else:
+ ltpos = line.find('<')
+ if ltpos >= 0 and line[ltpos + 1] not in ('!', '?'):
+ break
+ for line in top_annotations:
+ lines.insert(i, ' ' + line)
+
+
+def findmatch(beg, nodes, offset):
+ level = 1
+ end = PAIRS[beg][0]
+ for i, t in enumerate(nodes[offset:]):
+ if t.type == beg: level += 1
+ elif t.type == end: level -= 1
+ if level == 0:
+ return i + offset
+ raise EOFError("No closing token encountered for %s @ %d"
+ % (beg, offset))
+
+
+def match_pairs(nodes):
+ newnodes = []
+ i = 0
+ while 1:
+ if i >= len(nodes): break
+ node = nodes[i]
+ if node.type in PAIRS.keys():
+ # Look for enclosing brackets
+ match = findmatch(node.type, nodes, i + 1)
+ matchtype = PAIRS[node.type][1]
+ node = Node(type=matchtype, value=nodes[i + 1:match])
+ node.value = match_pairs(node.value)
+ newnodes.append(node)
+ i = match + 1
+ else:
+ newnodes.append(node)
+ i += 1
+ if i >= len(nodes): break
+ if nodes[i].type in (ANY, SOME, MAYBE):
+ newnodes[-1].quant = nodes[i].type
+ i += 1
+ nodes[:] = newnodes
+ return nodes
+
+
+def type_bodies(nodes):
+ newnodes = []
+ i = 0
+ while 1:
+ if i >= len(nodes): break
+ if nodetypes(nodes[i:i + 3]) == (ELEM, NAME, BODY) or \
+ nodetypes(nodes[i:i + 3]) == (ATTR, NAME, BODY):
+ name, body = nodes[i + 1].value, nodes[i + 2]
+ value, quant = type_bodies(body.value), body.quant
+ node = Node(nodes[i].type, value, name, quant)
+ newnodes.append(node)
+ i += 3
+ elif nodetypes(nodes[i:i + 2]) == (DATATAG, PATTERN):
+ node = Node(DATATAG, nodes[i + 1].value, nodes[i].value)
+ newnodes.append(node)
+ i += 2
+ elif nodes[i] == DEFINE:
+ print nodes[i:]
+ else:
+ if nodes[i].type == GROUP: # Recurse into groups
+ value = type_bodies(nodes[i].value)
+ nodes[i] = Node(GROUP, value, None, nodes[i].quant)
+ newnodes.append(nodes[i])
+ i += 1
+ nodes[:] = newnodes
+ return nodes
+
+
+def nest_defines(nodes):
+ "Attach groups to named patterns"
+ newnodes = []
+ i = 0
+ while 1:
+ if i >= len(nodes): break
+ node = nodes[i]
+ newnodes.append(node)
+ if node.type == DEFINE:
+ group = []
+ while (i + 1) < len(nodes) and nodes[i + 1].type != DEFINE:
+ group.append(nodes[i + 1])
+ i += 1
+ node.name = node.value
+ node.value = Node(GROUP, group)
+ i += 1
+ nodes[:] = newnodes
+ return nodes
+
+
+def intersperse(nodes):
+ "Look for interleaved, choice, or sequential nodes in groups/bodies"
+ for node in nodes:
+ if node.type in (ELEM, ATTR, GROUP, LITERAL):
+ val = node.value
+ ntypes = [n.type for n in val if not isinstance(val, str)]
+ inters = [t for t in ntypes if t in (INTERLEAVE, CHOICE, SEQ)]
+ inters = dict(zip(inters, [0] * len(inters)))
+ if len(inters) > 1:
+ raise ParseError("Ambiguity in sequencing: %s" % node)
+ if len(inters) > 0:
+ intertype = inters.keys()[0]
+ items = []
+ for pat in node.value:
+ if pat.type != intertype:
+ items.append(pat)
+ node.value = Node(intertype, items)
+ if not isinstance(node.value, str): # No recurse to terminal str
+ intersperse(node.value)
+ return nodes
+
+
+def scan_NS(nodes):
+ "Look for any namespace configuration lines"
+ global DEFAULT_NAMESPACE, OTHER_NAMESPACE, CONTEXT_FREE
+ for node in nodes:
+ if node.type == DEFAULT_NS:
+ DEFAULT_NAMESPACE = node.value
+ elif node.type == NS:
+ ns, url = map(str.strip, node.value.split('='))
+ OTHER_NAMESPACE[ns] = url
+ elif node.type == ANNOTATION and 'a' not in OTHER_NAMESPACE:
+ OTHER_NAMESPACE['a'] =\
+ '"http://relaxng.org/ns/compatibility/annotations/1.0"'
+ elif node.type == DATATYPES:
+ DATATYPE_LIB[:] = [1, node.value]
+ elif node.type == START:
+ CONTEXT_FREE = 1
+
+
+def make_nodetree(tokens):
+ nodes = toNodes(tokens)
+ match_pairs(nodes)
+ type_bodies(nodes)
+ nest_defines(nodes)
+ intersperse(nodes)
+ scan_NS(nodes)
+ root = Node(ROOT, nodes)
+ return root
+
+
+if __name__ == '__main__':
+ make_nodetree(token_list(sys.stdin.read())).prettyprint()