diff options
author | Jan Pokorný <jpokorny@redhat.com> | 2016-02-01 22:09:44 +0100 |
---|---|---|
committer | Jan Pokorný <jpokorny@redhat.com> | 2016-02-01 23:55:09 +0100 |
commit | eeb3baa8119383ee28da703721263e655716d4f2 (patch) | |
tree | 17fe68f9ef2e561816e3abd8e3794107d16cf2a6 | |
parent | edb2861208291b9e5c7d8a377d7bd5725031ac34 (diff) | |
download | clufter-eeb3baa8119383ee28da703721263e655716d4f2.tar.gz clufter-eeb3baa8119383ee28da703721263e655716d4f2.tar.xz clufter-eeb3baa8119383ee28da703721263e655716d4f2.zip |
filter+format: harden (+prepare grounds for) XML/XSLT usage
Forbid any direct network/disk side-effects for XSLT and make sure we
are using XML parser without network access.
In the future, there is an possibility to forbid also entities
replacement should it be considered risky and unintrusive.
The new lxml.etree wrappers are placed into new utils_lxml module.
Signed-off-by: Jan Pokorný <jpokorny@redhat.com>
-rw-r--r-- | filter.py | 13 | ||||
-rw-r--r-- | format.py | 8 | ||||
-rw-r--r-- | utils_lxml.py | 20 |
3 files changed, 32 insertions, 9 deletions
@@ -1,5 +1,5 @@ # -*- coding: UTF-8 -*- -# Copyright 2015 Red Hat, Inc. +# Copyright 2016 Red Hat, Inc. # Part of clufter project # Licensed under GPLv2+ (a copy included | http://gnu.org/licenses/gpl-2.0.txt) """Base filter stuff (metaclass, decorator, etc.)""" @@ -32,6 +32,7 @@ from .utils import args2tuple, arg2wrapped, \ filterdict_keep, filterdict_invkeep, filterdict_pop, \ head_tail, hybridproperty, \ identity, lazystring, tuplist +from .utils_lxml import etree_XSLT_safe, etree_parser_safe from .utils_func import apply_preserving_depth, \ apply_aggregation_preserving_depth, \ apply_intercalate, \ @@ -422,7 +423,7 @@ class XMLFilter(Filter, MetaPlugin): if not reply: return False, force # terminating elems = [] - reply = etree.fromstring(reply) + reply = etree.fromstring(reply, parser=etree_parser_safe) if reply.attrib.get('force-block', '').lower() == 'true': force = 'block' for root_pi in xml_get_root_pi(reply): @@ -521,7 +522,7 @@ class XMLFilter(Filter, MetaPlugin): cl = ret.xpath("//processing-instruction('{0}')".format(pi_comment)) for e in cl: # XXX could be done better? (e.text.strip().join((' ', ) * 2)) - reverted = etree.fromstring(e.text) + reverted = etree.fromstring(e.text, parser=etree_parser_safe) element_juggler.rebind(nselem(CLUFTER_NS, 'comment', *tuple( reverted if len(reverted) else args2tuple(reverted.text))), @@ -849,8 +850,7 @@ class XMLFilter(Filter, MetaPlugin): elem = etree.ElementTree(elem) # XXX not getroottree? log.debug("Applying {0}, {1}".format(type(elem), etree.tostring(elem))) log.debug("Applying on {0}".format(etree.tostring(xslt_root))) - #ret = elem.xslt(xslt_root) - xslt = etree.XSLT(xslt_root) + xslt = etree_XSLT_safe(xslt_root) try: ret = xslt(elem, profile_run=profile) except etree.XSLTApplyError as e: @@ -886,7 +886,8 @@ class XMLFilter(Filter, MetaPlugin): # XXX: ugly solution to get rid of the unneeded namespace # (cleanup_namespaces did not work here) - ret = etree.fromstring(etree.tostring(ret)) + ret = etree.fromstring(etree.tostring(ret), + parser=etree_parser_safe) etree.cleanup_namespaces(ret) return ret @@ -1,5 +1,5 @@ # -*- coding: UTF-8 -*- -# Copyright 2015 Red Hat, Inc. +# Copyright 2016 Red Hat, Inc. # Part of clufter project # Licensed under GPLv2+ (a copy included | http://gnu.org/licenses/gpl-2.0.txt) """Base format stuff (metaclass, classes, etc.)""" @@ -35,6 +35,7 @@ from .utils import arg2wrapped, args2sgpl, args2tuple, args2unwrapped, \ isinstanceupto, \ popattr, \ tuplist +from .utils_lxml import etree_parser_safe from .utils_prog import ProtectedDict, getenv_namespaced from .utils_xml import rng_get_start, rng_pivot @@ -722,7 +723,7 @@ class XML(SimpleFormat): schema = None if schema is None: try: - schema = etree.parse(s) + schema = etree.parse(s, parser=etree_parser_safe) rng = etree.RelaxNG(schema) cls._validation_cache[s] = schema, rng except (etree.RelaxNGError, etree.XMLSyntaxError): @@ -761,4 +762,5 @@ class XML(SimpleFormat): # pre 2.7 compat: http://bugs.python.org/issue5982 validator=etree_validator.__get__(1).im_func) def get_etree(self, *protodecl): - return etree.fromstring(self.BYTESTRING()).getroottree() + return etree.fromstring(self.BYTESTRING(), + parser=etree_parser_safe).getroottree() diff --git a/utils_lxml.py b/utils_lxml.py new file mode 100644 index 0000000..2221fab --- /dev/null +++ b/utils_lxml.py @@ -0,0 +1,20 @@ +# -*- coding: UTF-8 -*- +# Copyright 2016 Red Hat, Inc. +# Part of clufter project +# Licensed under GPLv2+ (a copy included | http://gnu.org/licenses/gpl-2.0.txt) +"""Wrapper around standard lxml.etree static methods""" +__author__ = "Jan Pokorný <jpokorny @at@ Red Hat .dot. com>" + +from lxml import etree + +etree_XSLT_safe = lambda _input, **kwargs: \ + etree.XSLT(_input, + **dict(access_control=etree.XSLTAccessControl.DENY_ALL, + **kwargs)) + +etree_parser_safe_kwargs = dict( + no_network=True, + #resolve_entities=False, +) + +etree_parser_safe = etree.XMLParser(**etree_parser_safe_kwargs) |