From 90ca9dac132dc396e0f064ff01556740ca09e2b0 Mon Sep 17 00:00:00 2001 From: Daniel Elstner Date: Tue, 27 Jan 2009 16:21:13 +0000 Subject: Do not escape the ampersand "&" in entity references (bug #568485). * codegen/docextract_to_xml.py (escape_text): Do not escape the ampersand "&" in entity references (bug #568485). Replace some unusual entity references in the output with their literal values. svn path=/trunk/; revision=998 --- codegen/docextract_to_xml.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'codegen/docextract_to_xml.py') diff --git a/codegen/docextract_to_xml.py b/codegen/docextract_to_xml.py index 76ac85d..f8d3bae 100755 --- a/codegen/docextract_to_xml.py +++ b/codegen/docextract_to_xml.py @@ -7,21 +7,26 @@ # # ./docextract_to_xml.py -s /gnome/head/cvs/gtk+/gtk/ -s /gnome/head/cvs/gtk+/docs/reference/gtk/tmpl/ > gtk_docs.xml import getopt +import re import string import sys import docextract def escape_text(unescaped_text): - escaped_text = unescaped_text + # Escape every "&" not part of an entity reference + escaped_text = re.sub(r'&(?![A-Za-z]+;)', '&', unescaped_text) + + # These weird entities turn up in the output... + escaped_text = string.replace(escaped_text, '—', '—') + escaped_text = string.replace(escaped_text, '*', '*') + escaped_text = string.replace(escaped_text, '%', '%') + escaped_text = string.replace(escaped_text, '@', '@') + + # Escape for both tag contents and attribute values escaped_text = string.replace(escaped_text, '<', '<') escaped_text = string.replace(escaped_text, '>', '>') - escaped_text = string.replace(escaped_text, '&', '&') - escaped_text = string.replace(escaped_text, '\'', ''') - escaped_text = string.replace(escaped_text, '\"', '"') - - #Apparently this is an undefined symbol: - escaped_text = string.replace(escaped_text, '—', ' mdash ') + escaped_text = string.replace(escaped_text, '"', '"') return escaped_text -- cgit