#!/bin/bash #this script converts the langref.tex source for the Language Reference Guide into #DocBook XML. the conversion is done thru latexml, a utility that comes with dblatex-0.2.7. #the output xml file of latexml is pretty dirty, so this script is needed to further clean it up. #copy latex file to here cp ../langref.tex . #convert it to raw xml latexml langref.tex --dest=Language_Reference_Guide.xml #remove excess whitespace sed -i -e 's/^\s*//g' Language_Reference_Guide.xml sed -i -e 's///g' Language_Reference_Guide.xml cat Language_Reference_Guide.xml | perl -p -e 'undef $/;s|\n<\?latexml options="twoside,english" class="article"\?>\n<\?latexml package="geometry"\?>\n<\?latexml RelaxNGSchema="LaTeXML"\?>\n<\?latexml RelaxNGSchema="LaTeXML"\?>\n\nSystemTap Language Reference|<\!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [ ]>\n\n|msg' | perl -p -e 'undef $/;s|\n

This document was derived from other documents contributed to the SystemTap project by employees of Red Hat, IBM and Intel.

\n
\n\n

Copyright © 2007 Red Hat Inc.\nCopyright © 2007 IBM Corp.\nCopyright © 2007 Intel Corporation.

\n
\n\n

Permission is granted to copy, distribute and/or modify this document\nunder the terms of the GNU Free Documentation License, Version 1.2\nor any later version published by the Free Software Foundation;\nwith no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.

\n
\n\n

The GNU Free Documentation License is available from\nhttp://www.gnu.org/licenses/fdl.html or by writing to\nthe Free Software Foundation, Inc., 51 Franklin Street,\nFifth Floor, Boston, MA 02110-1301, USA.

\n
||msg' | #fix up screens perl -p -e 'undef $/;s|\n\n||msg' | perl -p -e 'undef $/;s|\n\n\n||msg' | perl -p -e 'undef $/;s|\n\n||msg' | perl -p -e 'undef $/;s|\n\n\n||msg' | #fix up index tags perl -p -e 'undef $/;s|\nIndex\n||msg' | #needed later, for TABLES! perl -p -e 'undef $/;s|\n||msg' > clean.xml #further fix up headers! perl -p -i -e 's|<\?latexml searchpaths="[^>]*>\n||g' clean.xml #change main tags sed -i -e 's/<\/document>/<\/book>/g' clean.xml #more fixup for screen tags perl -p -i -e 's|||g' clean.xml perl -p -i -e 's|||g' clean.xml #clean section tags sed -i -e 's/
/<\/chapter>/g' clean.xml #change subsection and subsubsection tags to section sed -i -e 's//<\/section>/g' clean.xml sed -i -e 's//<\/section>/g' clean.xml #remove with sed -i -e 's///g' clean.xml sed -i -e 's/<\/para>//g' clean.xml sed -i -e 's/

//g' clean.xml sed -i -e 's/<\/p>/<\/para>/g' clean.xml #properly convert xrefs sed -i -e 's//indexterm>/g' clean.xml perl -p -i -e 's///g' clean.xml sed -i -e 's///g' clean.xml sed -i -e 's/<\/indexphrase>/<\/primary>/g' clean.xml #convert s sed -i -e 's/emph>/emphasis>/g' clean.xml #convert itemizedlists and listitems, dependent on successful exec of "fix up screens" perl routines sed -i -e 's///g' clean.xml sed -i -e 's///g' clean.xml sed -i -e 's/<\/itemize>/<\/itemizedlist>/g' clean.xml sed -i -e 's/<\/item>/<\/listitem>/g' clean.xml #convert orderedlists and their respective listitems perl -p -i -e 's|||g' clean.xml perl -p -i -e 's|||g' clean.xml perl -p -i -e 's|||g' clean.xml #TRICKY: this perl expression takes all occurences of # http://sourceware.org/systemtap/wiki/HomePage # and replaces the string with "/>". from jfearn # note: [^"]* means "any number of occurences of characters that are NOT quotes # note: () groups strings/an expression together, which can be called later as $1 when replacing perl -p -i -e 's|([^<]*|$1/>|g' clean.xml #now, convert s sed -i -e 's/ accordingly; bold is , typewriter is perl -p -i -e 's|([^<]*)|$1|g' clean.xml sed -i -e 's///g' clean.xml perl -p -i -e 's|([^<]*)|$1|g' clean.xml sed -i -e 's///g' clean.xml #weird remainders, defaulting them to command perl -p -i -e 's|([^<]*)|$1|g' clean.xml sed -i -e 's///g' clean.xml perl -p -i -e 's|([^<]*)|$1|g' clean.xml sed -i -e 's///g' clean.xml #TABLES! #the first expression is quite dirty, since it assumes that all tables have 3 columns. dunno yet how to #automagicize this, since the orig XML doesn't have any attribute that specifies columns per table sed -i -e 's///g' clean.xml sed -i -e 's/tabular>/tgroup>/g' clean.xml perl -p -i -e 's|)|
]*>||g' clean.xml perl -p -i -e 's|]*>||g' clean.xml perl -p -i -e 's|||g' clean.xml perl -p -i -e 's|||g' clean.xml perl -p -i -e 's|||g' clean.xml perl -p -i -e 's|||g' clean.xml #remove "About this guide" section #perl -p -i -e 'undef $/;s|
\nAbout this guide||msg' clean.xml #finalize: copy clean.xml to en-US, then deletes it cp clean.xml en-US/Language_Reference_Guide.xml #delete excess files rm langref.tex rm clean.xml rm Language_Reference_Guide.xml