summaryrefslogtreecommitdiffstats
path: root/utils_xml.py
blob: 8128a9f772f9744752120e91d650c86139c61a5e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# -*- coding: UTF-8 -*-
# Copyright 2014 Red Hat, Inc.
# Part of clufter project
# Licensed under GPLv2+ (a copy included | http://gnu.org/licenses/gpl-2.0.txt)
"""XML helpers"""
__author__ = "Jan Pokorný <jpokorny @at@ Red Hat .dot. com>"

from copy import deepcopy
from lxml import etree

from .error import ClufterPlainError
from .utils import selfaware


NAMESPACES = {
    'clufter': 'http://people.redhat.com/jpokorny/ns/clufter',
    'rng':     'http://relaxng.org/ns/structure/1.0',
    'xsl':     'http://www.w3.org/1999/XSL/Transform',
}

xslt_identity = '''\
    <xsl:template match="{0}@*|{0}node()"
                  xmlns:xsl="''' + NAMESPACES['xsl'] + '''">
        <xsl:copy>
            <xsl:apply-templates select="@*|node()"/>
       </xsl:copy>
    </xsl:template>'''


class UtilsXmlError(ClufterPlainError):
    pass


def squote(s):
    """Simple quote"""
    return "'" + s + "'"


def namespaced(ns, ident):
    """Return `ident` in Clark's notation denoting `ns` namespace"""
    ret = "{{{0}}}{1}".format(NAMESPACES.get(ns, ns), ident)
    return ret


def nselem(ns, tag, **kwargs):
    return etree.Element(namespaced(ns, tag), **kwargs)

rng_get_start = etree.ETXPath("/{0}/{1}"
                              .format(namespaced('rng', 'grammar'),
                                      namespaced('rng', 'start')))
xml_get_root_pi = etree.XPath("/*/processing-instruction()")
xmltag_get_localname = lambda tag: etree.QName(tag).localname
xmltag_get_namespace = lambda tag: etree.QName(tag).namespace

RNG_ELEMENT = ("/{0}//{1}".format(namespaced('rng', 'grammar'),
                                  namespaced('rng', 'element'))
               .replace('{', '{{').replace('}', '}}')
               + "[@name = '{0}']")


class ElementJuggler(object):
    """Element juggling, possibly utilizing own temporary holder

    This can be handy e.g. to automatically strip unused namespaces
    for `tostring` method, without a need to copy/reparse, followed
    by returning the element back.
    """

    _aside_tree = etree.ElementTree(etree.Element('ROOT'))

    def __init__(self, tree=_aside_tree):
        self._root = tree.getroot()

    def grab(self, elem):
        parent = elem.getparent()
        assert parent is not self._root
        parent_index = parent.index(elem)
        self._root.append(elem)
        return parent, parent_index

    @staticmethod
    def rebind(elem, parent_pos):
        parent, parent_index = parent_pos
        parent.insert(parent_index, elem)
        return elem

    def drop(self, elem):
        parent = elem.getparent()
        if parent is not self._root:
            raise ValueError
        parent.remove(elem)

element_juggler = ElementJuggler()


@selfaware
def rng_pivot(me, et, tag):
    """Given Relax NG grammar etree as `et`, change start tag (in situ!)

    Use copy.deepcopy or so to (somewhat) preserve the original.

    Returns the live reference to the target element, i.e.,

        at_start = rng_pivot(et, tag)

    is equivalent to

        rng_pivot(et, tag)
        at_start = rng_get_start(et)[0]
    """
    start = rng_get_start(et)
    localname = xmltag_get_localname(tag)
    if len(start) != 1:
        raise UtilsXmlError("Cannot change start if grammar's `start' is"
                            " not contained exactly once ({0} times)"
                            .format(len(start)))
    target = etree.ETXPath(RNG_ELEMENT.format(tag))(et)
    if len(target) != 1:
        raise UtilsXmlError("Cannot change start if the start element `{0}'"
                            " is not contained exactly once ({1} times)"
                            .format(localname, len(target)))
    start, target = start[0], target[0]
    parent_start, parent_target = start.getparent(), target.getparent()
    index_target = parent_target.index(target)
    label = me.__name__ + '_' + localname

    # target's content place directly under /grammar wrapped with new define...
    new_define = nselem('rng', 'define', name=label)
    new_define.append(target)
    parent_start.append(new_define)

    # ... while the original occurrence substituted in-situ with the reference
    new_ref = nselem('rng', 'ref', name=label)
    parent_target.insert(index_target, new_ref)

    # ... and finally /grammar/start pointed anew to refer to the new label
    start_ref = nselem('rng', 'ref', name=label)
    start.clear()
    start.append(start_ref)

    return target