From 684570b0277ce771fc66bcfdbe352eeaa541786d Mon Sep 17 00:00:00 2001 From: ddomingo Date: Mon, 2 Mar 2009 11:54:01 +1000 Subject: added publicanizer for langref --- .../publicanize-langref.sh | 157 +++++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100755 doc/Language_Reference_Guide/publicanize-langref.sh (limited to 'doc/Language_Reference_Guide/publicanize-langref.sh') diff --git a/doc/Language_Reference_Guide/publicanize-langref.sh b/doc/Language_Reference_Guide/publicanize-langref.sh new file mode 100755 index 00000000..fd4edf6a --- /dev/null +++ b/doc/Language_Reference_Guide/publicanize-langref.sh @@ -0,0 +1,157 @@ +#!/bin/bash +#this script converts the langref.tex source for the Language Reference Guide into +#DocBook XML. the conversion is done thru latexml, a utility that comes with dblatex-0.2.7. +#the output xml file of latexml is pretty dirty, so this script is needed to further clean it up. + +#copy latex file to here +cp ../langref.tex . + +#convert it to raw xml +latexml langref.tex --dest=Language_Reference_Guide.xml + +#remove excess whitespace +sed -i -e 's/^\s*//g' Language_Reference_Guide.xml + +sed -i -e 's///g' Language_Reference_Guide.xml + +cat Language_Reference_Guide.xml | +perl -p -e 'undef $/;s|\n<\?latexml options="twoside,english" class="article"\?>\n<\?latexml package="geometry"\?>\n<\?latexml RelaxNGSchema="LaTeXML"\?>\n<\?latexml RelaxNGSchema="LaTeXML"\?>\n\nSystemTap Language Reference|<\!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [ +]>\n\n|msg' | +perl -p -e 'undef $/;s|\n

This document was derived from other documents contributed to the SystemTap project by employees of Red Hat, IBM and Intel.

\n
\n\n

Copyright © 2007 Red Hat Inc.\nCopyright © 2007 IBM Corp.\nCopyright © 2007 Intel Corporation.

\n
\n\n

Permission is granted to copy, distribute and/or modify this document\nunder the terms of the GNU Free Documentation License, Version 1.2\nor any later version published by the Free Software Foundation;\nwith no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.

\n
\n\n

The GNU Free Documentation License is available from\nhttp://www.gnu.org/licenses/fdl.html or by writing to\nthe Free Software Foundation, Inc., 51 Franklin Street,\nFifth Floor, Boston, MA 02110-1301, USA.

\n
||msg' | +#fix up screens +perl -p -e 'undef $/;s|\n\n||msg' | +perl -p -e 'undef $/;s|\n\n\n||msg' | +perl -p -e 'undef $/;s|\n\n||msg' | +perl -p -e 'undef $/;s|\n\n\n||msg' | +#fix up index tags +perl -p -e 'undef $/;s|\nIndex\n||msg' | +#needed later, for TABLES! +perl -p -e 'undef $/;s|\n||msg' > clean.xml + +#further fix up headers! +perl -p -i -e 's|<\?latexml searchpaths="[^>]*>\n||g' clean.xml + + +#change main tags +sed -i -e 's/<\/document>/<\/book>/g' clean.xml + +#more fixup for screen tags +perl -p -i -e 's|||g' clean.xml +perl -p -i -e 's|||g' clean.xml + +#clean section tags +sed -i -e 's/
/<\/chapter>/g' clean.xml + +#change subsection and subsubsection tags to section +sed -i -e 's//<\/section>/g' clean.xml +sed -i -e 's//<\/section>/g' clean.xml + +#remove with +sed -i -e 's///g' clean.xml +sed -i -e 's/<\/para>//g' clean.xml +sed -i -e 's/

//g' clean.xml +sed -i -e 's/<\/p>/<\/para>/g' clean.xml + +#properly convert xrefs +sed -i -e 's//indexterm>/g' clean.xml +perl -p -i -e 's///g' clean.xml +sed -i -e 's///g' clean.xml +sed -i -e 's/<\/indexphrase>/<\/primary>/g' clean.xml + +#convert s +sed -i -e 's/emph>/emphasis>/g' clean.xml + +#convert itemizedlists and listitems, dependent on successful exec of "fix up screens" perl routines +sed -i -e 's///g' clean.xml +sed -i -e 's///g' clean.xml +sed -i -e 's/<\/itemize>/<\/itemizedlist>/g' clean.xml +sed -i -e 's/<\/item>/<\/listitem>/g' clean.xml + +#convert orderedlists and their respective listitems +perl -p -i -e 's|||g' clean.xml +perl -p -i -e 's|||g' clean.xml +perl -p -i -e 's|||g' clean.xml + +#TRICKY: this perl expression takes all occurences of +# http://sourceware.org/systemtap/wiki/HomePage +# and replaces the string with "/>". from jfearn +# note: [^"]* means "any number of occurences of characters that are NOT quotes +# note: () groups strings/an expression together, which can be called later as $1 when replacing +perl -p -i -e 's|([^<]*|$1/>|g' clean.xml + +#now, convert s +sed -i -e 's/ accordingly; bold is , typewriter is +perl -p -i -e 's|([^<]*)|$1|g' clean.xml +sed -i -e 's///g' clean.xml +perl -p -i -e 's|([^<]*)|$1|g' clean.xml +sed -i -e 's///g' clean.xml + +#weird remainders, defaulting them to command +perl -p -i -e 's|([^<]*)|$1|g' clean.xml +sed -i -e 's///g' clean.xml +perl -p -i -e 's|([^<]*)|$1|g' clean.xml +sed -i -e 's///g' clean.xml + +#TABLES! +#the first expression is quite dirty, since it assumes that all tables have 3 columns. dunno yet how to +#automagicize this, since the orig XML doesn't have any attribute that specifies columns per table +sed -i -e 's///g' clean.xml +sed -i -e 's/tabular>/tgroup>/g' clean.xml +perl -p -i -e 's|)|
]*>||g' clean.xml +perl -p -i -e 's|]*>||g' clean.xml +perl -p -i -e 's|||g' clean.xml +perl -p -i -e 's|||g' clean.xml +perl -p -i -e 's|||g' clean.xml +perl -p -i -e 's|||g' clean.xml + +#remove "About this guide" section +#perl -p -i -e 'undef $/;s|
\nAbout this guide||msg' clean.xml + +#finalize: copy clean.xml to en-US, then deletes it +cp clean.xml en-US/Language_Reference_Guide.xml + +#delete excess files +rm langref.tex +rm clean.xml +rm Language_Reference_Guide.xml -- cgit