summaryrefslogtreecommitdiffstats
path: root/bin
diff options
context:
space:
mode:
authorTommy Reynolds <Tommy.Reynolds@MegaCoder.com>2005-12-15 03:19:56 +0000
committerTommy Reynolds <Tommy.Reynolds@MegaCoder.com>2005-12-15 03:19:56 +0000
commit4842dac2982e1f91ed13aaac0241ddb43bda5ed1 (patch)
tree14b1acab6a2c1af76580180f83ea238221430057 /bin
parenta2b64d8da9f057e5db398238f06dbbdeb279ef41 (diff)
downloadfedora-doc-utils-4842dac2982e1f91ed13aaac0241ddb43bda5ed1.tar.gz
fedora-doc-utils-4842dac2982e1f91ed13aaac0241ddb43bda5ed1.tar.xz
fedora-doc-utils-4842dac2982e1f91ed13aaac0241ddb43bda5ed1.zip
Here is a very simple XMLDIFF tool that I've hacked to work reasonably
with the FDP files. Look in the "xmldiff.pl" file for information about its origin.
Diffstat (limited to 'bin')
-rwxr-xr-xbin/xmldiff3
-rw-r--r--bin/xmldiff.pl126
-rwxr-xr-xbin/xmlpp403
3 files changed, 532 insertions, 0 deletions
diff --git a/bin/xmldiff b/bin/xmldiff
new file mode 100755
index 0000000..9e12206
--- /dev/null
+++ b/bin/xmldiff
@@ -0,0 +1,3 @@
+#!/bin/sh
+export PATH=`dirname $0`:${PATH}
+exec perl `basename $0`.pl $@
diff --git a/bin/xmldiff.pl b/bin/xmldiff.pl
new file mode 100644
index 0000000..0e59561
--- /dev/null
+++ b/bin/xmldiff.pl
@@ -0,0 +1,126 @@
+#!/usr/bin/perl
+#
+# Copyright (c) 2002, DecisionSoft Limited All rights reserved.
+# Please see:
+# http://software.decisionsoft.com/licence.html
+# for more information.
+#
+# Modified for the Fedora Docs Project by Tommy.Reynolds@MegaCoder.com
+#
+
+#
+# xmldiff: xmldiff program - uses xmlpp, which must be on the ${PATH}
+#
+
+#Change this if xmlpp is not in your current path
+#for example: $XMLPP = "./xmlpp";
+$XMLPP = "xmlpp";
+
+use Getopt::Std;
+
+getopts('tscupChHSi');
+
+if ($opt_h || @ARGV != 2) {
+ usage();
+}
+
+my $diffOpts;
+
+my $outputFmt = $opt_u + $opt_c + $opt_s + $opt_p + $opt_C;
+if( $outputFmt == 0 ) {
+ $outputFmt = $opt_p = 1;
+}
+if( $outputFmt > 1 ) {
+ print STDERR "Error: Only one mode may be specified\n";
+ usage();
+}
+
+if( $opt_s ) {
+ # Standard diff, no playing around
+}
+if ( $opt_c ) {
+ # Plain context diff
+ $diffOpts .= "-c ";
+}
+if( $opt_u ) {
+ # Plain unified diff
+ $diffOpts .= "-u ";
+}
+if( $opt_p ) {
+ # Colorized unified diff
+ # $diffOpts .= "-u ";
+ $diffOpts .= "--new-line-format='+ %l\n' ";
+ $diffOpts .= "--old-line-format='- %l\n' ";
+ $diffOpts .= "--unchanged-line-format=' %l\n' ";
+}
+if( $opt_C ) {
+ $diffOpts .= "--changed-group-format='\n<<<<<<<<<<<<<<\n%<==============\n%>>>>>>>>>>>>>>>\n\n' ";
+ $diffOpts .= "--new-line-format='+ %l\n' ";
+ $diffOpts .= "--old-line-format='- %l\n' ";
+ $diffOpts .= "--unchanged-line-format=' %l\n' ";
+}
+if( $opt_H ) {
+ $diffOpts .= "--changed-group-format='%<%>' ";
+ $diffOpts .= " --new-group-format='%>' ";
+ $diffOpts .= "--old-group-format='%<' ";
+ $diffOpts .= "--new-line-format='<font color=\"green\">+ %l</font>\n' ";
+ $diffOpts .= "--old-line-format='<font color=\"red\">- %l</font>\n' ";
+ $diffOpts .= "--unchanged-line-format='<font color=\"gray\"> %l</font>\n' ";
+}
+
+# Set up xmlpp options
+
+my $prettyOpts = $opt_t ? "-t " : "";
+$prettyOpts .= $opt_S ? "-S " : "";
+$prettyOpts .= $opt_H ? "-H " : "";
+$prettyOpts .= "-s -e ";
+
+$file1 = "xmlppTEMP1.$$";
+$file2 = "xmlppTEMP2.$$";
+
+my $results = 0;
+$results += system("$XMLPP $prettyOpts '$ARGV[0]' > $file1");
+$results += system("$XMLPP $prettyOpts '$ARGV[1]' > $file2");
+
+if($opt_H) {
+ print "<HTML>\n";
+ print " <HEAD>\n";
+ print " <TITLE>XML Diff</TITLE>\n";
+ print " </HEAD>\n";
+ print " <BODY bgcolor=\"#FEFEFF\">\n";
+ print " <PRE>";
+ $results += system("/usr/bin/diff -bB $diffOpts $file1 $file2");
+ # Do not add extra whitespace before the </PRE>
+ print "</PRE>\n";
+ print " </BODY>\n";
+ print "</HTML>\n";
+
+} else {
+ $results += system("/usr/bin/diff -bB $diffOpts $file1 $file2");
+}
+
+
+unlink($file1,$file2);
+
+exit( $results );
+
+sub usage {
+ print STDERR <<EOF;
+usage: $0 [ mode ] [ options ] oldfile.xml newfile.xml
+
+mode must be one of:
+ -p coloured unified diff [default]
+ -c context diff
+ -u unified diff
+ -s standard diff output
+ -C vaguely CVS like unified diff
+
+options:
+ -H HTML output
+ -t split attributes - good for spotting changes in attributes
+ -S schema hack mode - good for diffing schemas
+ -i ignore element and attribute contents
+
+EOF
+ exit 1;
+}
diff --git a/bin/xmlpp b/bin/xmlpp
new file mode 100755
index 0000000..3de854a
--- /dev/null
+++ b/bin/xmlpp
@@ -0,0 +1,403 @@
+#!/usr/bin/perl -w
+
+#
+# Copyright (c) 2002, DecisionSoft Limited All rights reserved.
+# Please see:
+# http://software.decisionsoft.com/licence.html
+# for more information.
+#
+
+# $Revision: 1.1 $
+#
+# xmlpp: XML pretty printing
+#
+
+# For custom attribute sorting create an attributeOrdering.txt file that
+# lists each attributes separated by a newline in the order you would like
+# them to be sorted separated by a newline. Then use the -s option.
+
+use FileHandle;
+use Fcntl;
+use Getopt::Std;
+
+use vars qw($opt_h $opt_H $opt_s $opt_z $opt_t $opt_e $opt_S $opt_c $opt_n);
+
+my $indent=0;
+my $textContent='';
+my $lastTag=undef;
+my $output;
+my $inAnnotation = 0;
+
+
+if (!getopts('nzhHsteSc') or $opt_h) {
+ usage();
+}
+
+if ($opt_s){
+
+# expect to find attributeOrdering.txt file in same directory
+# as xmlpp is being run from
+
+ my $scriptDir = $0;
+ if ($scriptDir =~ m#/#){
+ $scriptDir =~ s#/[^/]+$##;
+ }
+ else{
+ $scriptDir =".";
+ }
+
+ # get attribute ordering from external file
+ if (open(SORTLIST, "<$scriptDir/attributeOrdering.txt")) {
+ @sortlist = <SORTLIST>;
+ chomp @sortlist;
+ close (SORTLIST);
+ @specialSort = grep(/^\w+/, @sortlist);
+ }
+ else {
+# print STDERR "Could not open $scriptDir/attributeOrdering.txt: $!\nWARNING attribute sorting will only be alphabetic\n\n";
+ }
+}
+
+
+# set line separator to ">" speeding up parsing of XML files
+# with no line breaks
+
+$/ = ">";
+
+
+my $sortAttributes = $opt_s;
+my $newLineComments = $opt_c;
+my $splitAttributes = $opt_t;
+my $schemaHackMode = $opt_S;
+my $normaliseWhiteSpace = $opt_n;
+
+my $filename = $ARGV[0];
+if ($opt_z && (!$filename or $filename eq '-')) {
+ print STDERR "Error: I can't edit STDIN in place.\n";
+ usage();
+}
+
+if (!$opt_z && scalar(@ARGV) > 1) {
+ print STDERR "Warning: Multiple files specified without -z option\n";
+}
+
+my $fh;
+
+my $stdin;
+
+if (!$filename or $filename eq '-') {
+ $fh=*STDIN;
+ $stdin=1;
+} else {
+ $fh = open_next_file() or exit(1);
+ $stdin=0;
+}
+
+do {
+ $indent=0;
+ $textContent='';
+ $lastTag=undef;
+ $output = '';
+ my $re_name = "(?:[A-Za-z0-9_:][A-Za-z0-9_:.-]*)";
+ my $re_attr = "(?:'[^']*'|\"[^\"]*\")";
+ my $input;
+
+ while ($input .= <$fh>) {
+ while ($input) {
+ if ($input =~ s/^<($re_name)((?:\s+$re_name\s*=\s*$re_attr)*\s*)(\/?)>(.*)$/$4/s ) {
+ my %attr;
+ my ($name,$attr,$selfclose) = ($1,$2,$3);
+ while ($attr =~ m/($re_name)\s*=\s*($re_attr)/gs) {
+ my ($name,$value) = ($1,$2);
+ $value =~ s/^["'](.*)["']$/$1/s;
+ $attr{$name} = $value;
+ }
+ if ($opt_e) {
+ parseStart($name, 0, %attr);
+ if ($selfclose) { parseEnd($name) }
+ } else {
+ parseStart($name, $selfclose, %attr);
+ }
+ } elsif ($input =~ s/^<\/($re_name)\s*>(.*)$/$2/s) {
+ parseEnd($1);
+ } elsif ($input =~ s/^<!--(.*?)-->(.*)$/$2/s) {
+ parseComment($1);
+ } elsif ($input =~ s/^([^<]+)(.*)$/$2/s) {
+ parseDefault($1);
+ } elsif ($input =~ s/^(<\?[^>]*\?>)(.*)$/$2/s) {
+ parsePI("$1\n");
+ } elsif ($input =~ s/^(<\!DOCTYPE[^\[>]*(\[[^\]]*\])?[^>]*>)(.*)$/$3/s) {
+ parseDoctype("$1");
+ } else {
+ last;
+ }
+ }
+ if (eof($fh)) {
+ last;
+ }
+ }
+
+
+ if ($input) {
+ $input =~ m/([^\n]+)/gs;
+ print STDERR "WARNING: junk remaining on input: $1\n";
+ }
+ $fh->close();
+
+ if (!$opt_z) {
+ if(!$opt_H){
+ print "$output\n"
+ } else {
+ print html_escape($output)."\n"
+ }
+ } else {
+ if ($input) {
+ print STDERR "Not overwriting file\n";
+ } else {
+ open FOUT,"> $filename" or die "Cannot overwrite file: $!";
+ if(!$opt_H){
+ print FOUT "$output\n"
+ } else {
+ print FOUT html_escape($output)."\n"
+ }
+ close FOUT
+ }
+ }
+} while (
+ !$stdin && $opt_z && ($fh = open_next_file(\$filename))
+ );
+
+
+
+sub parseStart {
+ my $s = shift;
+ my $selfclose = shift;
+ my %attr = @_;
+
+ $textContent =~ s/\s+$//;
+ printContent($textContent);
+
+ if($inAnnotation) {
+ return;
+ }
+
+ if($schemaHackMode and $s =~ m/(^|:)annotation$/) {
+ $inAnnotation = 1;
+ $textContent = '';
+ $lastTag = 1;
+ return;
+ }
+ if (length($output)) {
+ $output .= "\n";
+ }
+
+ $output .= " " x $indent;
+ $output .= "<$s";
+ my @k = keys %attr;
+
+ if ($sortAttributes && (scalar(@k) > 1) ){
+
+ my @alphaSorted;
+ my @needSpecialSort;
+ my @final;
+ my $isSpecial;
+
+ # sort attributes alphabetically (default ordering)
+ @alphaSorted = sort @k;
+
+ # read through sorted list, if attribute doesn't have specified
+ # sort order, push it onto the end of the final array (this maintains
+ # alphabetic order). Else create a list that has attributes needing
+ # special ordering.
+ foreach $attribute (@alphaSorted){
+ $isSpecial = 0;
+ foreach $sortAttrib (@specialSort){
+ if ($attribute eq $sortAttrib){
+ push @needSpecialSort, $attribute;
+ $isSpecial = 1;
+ }
+ }
+ if (!$isSpecial){
+ push @final, $attribute;
+ }
+ }
+
+ # now read through the specialSort list backwards looking for
+ # any match in the needSpecialSort list. Unshift this onto the
+ # front of the final array to maintain proper order.
+ foreach my $attribute (reverse @specialSort){
+ foreach (@needSpecialSort){
+ if ($attribute eq $_){
+ unshift @final, $attribute;
+ }
+ }
+ }
+
+ @k = @final;
+ }
+
+ foreach my $attr (@k) {
+ #
+ # Remove (min|max)Occurs = 1 if schemaHackMode
+ #
+ if ($schemaHackMode and $attr =~ m/^(minOccurs|maxOccurs)$/ and $attr{$attr} eq "1") {
+ next;
+ }
+
+ if ($splitAttributes) {
+ $output .= "\n"." " x $indent." ";
+ }
+ if ($attr{$attr} =~ /'/) {
+ $output .= " $attr=\"$attr{$attr}\"";
+ } else {
+ $output .= " $attr='$attr{$attr}'";
+ }
+ }
+ if ($splitAttributes and @k) {
+ $output .= "\n"." " x $indent;
+ }
+ if ($selfclose) {
+ $output .= " />";
+ $lastTag = 0;
+ } else {
+ $output .= ">";
+ $indent++;
+ $lastTag = 1;
+ }
+ $textContent = '';
+}
+
+sub parseEnd {
+ my $s = shift;
+
+ if($inAnnotation) {
+ if($s =~ m/(^|:)annotation$/) {
+ $inAnnotation = 0;
+ }
+ return;
+ }
+
+ if($normaliseWhiteSpace) {
+ $textContent =~ s/^\s*(.*?)\s*$/$1/;
+ }
+ $indent--;
+ printContent($textContent);
+ if ($lastTag == 0) {
+ $output .= "\n";
+ $output .= " " x $indent;
+ }
+ $output .= "</$s>";
+ $textContent = '';
+ $lastTag = 0;
+}
+
+sub parseDefault {
+ my $s = shift;
+ if($inAnnotation) { return }
+ $textContent .= "$s";
+}
+
+sub parsePI {
+ my $s = shift;
+ $output .= "$s";
+}
+
+sub parseDoctype {
+ my $s = shift;
+ if ($s =~ /^([^\[]*\[)([^\]]*)(\].*)$/ms) {
+ $start = $1;
+ $DTD = $2;
+ $finish = $3;
+ $DTD =~ s/\</\n \</msg;
+ $output .= "$start$DTD\n$finish\n";
+ } else {
+ $output .= "$s";
+ }
+}
+
+sub parseComment {
+ my $s = shift;
+ if($inAnnotation) { return }
+ printContent($textContent,1);
+ if ($s =~ /([^\<]*)(<.*>)(.*)/ms) {
+ $start = $1;
+ $xml = $2;
+ $finish = $3;
+ $xml =~ s/\</\n\</msg;
+ $xml =~ s/(\n\s*\n?)+/\n/msg;
+ $xml =~ s/^\s*//msg;
+ $xml =~ s/\s*$//msg;
+ $s = "$start\n$xml\n$finish";
+ }
+ $s =~ s/\n\s*$/\n /msg;
+ if ($newLineComments) {
+ $output .= "\n<!--$s-->\n";
+ } else {
+ $output .= "<!--$s-->";
+ }
+ $textContent='';
+}
+
+sub printContent {
+ my $s = shift;
+ my $printLF = shift;
+ my ($LF,$ret) = ("","");
+
+ if ($s =~ m/\n\s*$/) {
+ $LF = "\n";
+ }
+ if ($s =~ m/^[\s\n]*$/) {
+ $ret = undef;
+ } else {
+ $output .= "$s";
+ $ret = 1;
+ }
+ if ($printLF) {
+ $output .= $LF;
+ }
+}
+
+
+sub html_escape {
+ my $s = shift;
+ $s =~ s/&/&amp;/gsm;
+ $s =~ s/</&lt;/gsm;
+ $s =~ s/>/&gt;/gsm;
+ return $s;
+}
+
+sub open_next_file {
+ my $filename = shift;
+ $$filename = shift @ARGV;
+ while ($$filename and ! -f $$filename) {
+ print STDERR "WARNING: Could not find file: $$filename\n";
+ $$filename = shift @ARGV;
+ }
+ if(!$$filename) {
+ return undef;
+ }
+ my $fh = new FileHandle;
+ $fh->open("< $$filename") or die "Can't open $$filename: $!";
+ return $fh;
+}
+
+sub usage {
+ print STDERR <<EOF;
+usage: $0 [ options ] [ file.xml ... ]
+
+options:
+ -h display this help message
+ -H escape characters (useful for further processing)
+ -t split attributes, one per line (useful for diff)
+ -s sort attributes (useful for diff)
+ -z in place edit (zap)
+ -e expand self closing tags (useful for diff)
+ -S schema hack mode (used by xmldiff)
+ -c place comments on new line.
+ -n normalise whitespace (remove leading and trailing whitespace from nodes
+ with text content.
+
+EOF
+ exit 1;
+}
+