summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Pokorný <jpokorny@redhat.com>2016-02-01 22:09:44 +0100
committerJan Pokorný <jpokorny@redhat.com>2016-02-01 23:55:09 +0100
commiteeb3baa8119383ee28da703721263e655716d4f2 (patch)
tree17fe68f9ef2e561816e3abd8e3794107d16cf2a6
parentedb2861208291b9e5c7d8a377d7bd5725031ac34 (diff)
downloadclufter-eeb3baa8119383ee28da703721263e655716d4f2.tar.gz
clufter-eeb3baa8119383ee28da703721263e655716d4f2.tar.xz
clufter-eeb3baa8119383ee28da703721263e655716d4f2.zip
filter+format: harden (+prepare grounds for) XML/XSLT usage
Forbid any direct network/disk side-effects for XSLT and make sure we are using XML parser without network access. In the future, there is an possibility to forbid also entities replacement should it be considered risky and unintrusive. The new lxml.etree wrappers are placed into new utils_lxml module. Signed-off-by: Jan Pokorný <jpokorny@redhat.com>
-rw-r--r--filter.py13
-rw-r--r--format.py8
-rw-r--r--utils_lxml.py20
3 files changed, 32 insertions, 9 deletions
diff --git a/filter.py b/filter.py
index 5f63836..ca9b214 100644
--- a/filter.py
+++ b/filter.py
@@ -1,5 +1,5 @@
# -*- coding: UTF-8 -*-
-# Copyright 2015 Red Hat, Inc.
+# Copyright 2016 Red Hat, Inc.
# Part of clufter project
# Licensed under GPLv2+ (a copy included | http://gnu.org/licenses/gpl-2.0.txt)
"""Base filter stuff (metaclass, decorator, etc.)"""
@@ -32,6 +32,7 @@ from .utils import args2tuple, arg2wrapped, \
filterdict_keep, filterdict_invkeep, filterdict_pop, \
head_tail, hybridproperty, \
identity, lazystring, tuplist
+from .utils_lxml import etree_XSLT_safe, etree_parser_safe
from .utils_func import apply_preserving_depth, \
apply_aggregation_preserving_depth, \
apply_intercalate, \
@@ -422,7 +423,7 @@ class XMLFilter(Filter, MetaPlugin):
if not reply:
return False, force # terminating
elems = []
- reply = etree.fromstring(reply)
+ reply = etree.fromstring(reply, parser=etree_parser_safe)
if reply.attrib.get('force-block', '').lower() == 'true':
force = 'block'
for root_pi in xml_get_root_pi(reply):
@@ -521,7 +522,7 @@ class XMLFilter(Filter, MetaPlugin):
cl = ret.xpath("//processing-instruction('{0}')".format(pi_comment))
for e in cl:
# XXX could be done better? (e.text.strip().join((' ', ) * 2))
- reverted = etree.fromstring(e.text)
+ reverted = etree.fromstring(e.text, parser=etree_parser_safe)
element_juggler.rebind(nselem(CLUFTER_NS, 'comment', *tuple(
reverted if len(reverted) else
args2tuple(reverted.text))),
@@ -849,8 +850,7 @@ class XMLFilter(Filter, MetaPlugin):
elem = etree.ElementTree(elem) # XXX not getroottree?
log.debug("Applying {0}, {1}".format(type(elem), etree.tostring(elem)))
log.debug("Applying on {0}".format(etree.tostring(xslt_root)))
- #ret = elem.xslt(xslt_root)
- xslt = etree.XSLT(xslt_root)
+ xslt = etree_XSLT_safe(xslt_root)
try:
ret = xslt(elem, profile_run=profile)
except etree.XSLTApplyError as e:
@@ -886,7 +886,8 @@ class XMLFilter(Filter, MetaPlugin):
# XXX: ugly solution to get rid of the unneeded namespace
# (cleanup_namespaces did not work here)
- ret = etree.fromstring(etree.tostring(ret))
+ ret = etree.fromstring(etree.tostring(ret),
+ parser=etree_parser_safe)
etree.cleanup_namespaces(ret)
return ret
diff --git a/format.py b/format.py
index 7976dfe..ede8bd4 100644
--- a/format.py
+++ b/format.py
@@ -1,5 +1,5 @@
# -*- coding: UTF-8 -*-
-# Copyright 2015 Red Hat, Inc.
+# Copyright 2016 Red Hat, Inc.
# Part of clufter project
# Licensed under GPLv2+ (a copy included | http://gnu.org/licenses/gpl-2.0.txt)
"""Base format stuff (metaclass, classes, etc.)"""
@@ -35,6 +35,7 @@ from .utils import arg2wrapped, args2sgpl, args2tuple, args2unwrapped, \
isinstanceupto, \
popattr, \
tuplist
+from .utils_lxml import etree_parser_safe
from .utils_prog import ProtectedDict, getenv_namespaced
from .utils_xml import rng_get_start, rng_pivot
@@ -722,7 +723,7 @@ class XML(SimpleFormat):
schema = None
if schema is None:
try:
- schema = etree.parse(s)
+ schema = etree.parse(s, parser=etree_parser_safe)
rng = etree.RelaxNG(schema)
cls._validation_cache[s] = schema, rng
except (etree.RelaxNGError, etree.XMLSyntaxError):
@@ -761,4 +762,5 @@ class XML(SimpleFormat):
# pre 2.7 compat: http://bugs.python.org/issue5982
validator=etree_validator.__get__(1).im_func)
def get_etree(self, *protodecl):
- return etree.fromstring(self.BYTESTRING()).getroottree()
+ return etree.fromstring(self.BYTESTRING(),
+ parser=etree_parser_safe).getroottree()
diff --git a/utils_lxml.py b/utils_lxml.py
new file mode 100644
index 0000000..2221fab
--- /dev/null
+++ b/utils_lxml.py
@@ -0,0 +1,20 @@
+# -*- coding: UTF-8 -*-
+# Copyright 2016 Red Hat, Inc.
+# Part of clufter project
+# Licensed under GPLv2+ (a copy included | http://gnu.org/licenses/gpl-2.0.txt)
+"""Wrapper around standard lxml.etree static methods"""
+__author__ = "Jan Pokorný <jpokorny @at@ Red Hat .dot. com>"
+
+from lxml import etree
+
+etree_XSLT_safe = lambda _input, **kwargs: \
+ etree.XSLT(_input,
+ **dict(access_control=etree.XSLTAccessControl.DENY_ALL,
+ **kwargs))
+
+etree_parser_safe_kwargs = dict(
+ no_network=True,
+ #resolve_entities=False,
+)
+
+etree_parser_safe = etree.XMLParser(**etree_parser_safe_kwargs)