Here is a very simple XMLDIFF tool that I've hacked to work reasonably

with the FDP files. Look in the "xmldiff.pl" file for information about its origin.
author: Tommy Reynolds <Tommy.Reynolds@MegaCoder.com> 2005-12-15 03:19:56 +0000
committer: Tommy Reynolds <Tommy.Reynolds@MegaCoder.com> 2005-12-15 03:19:56 +0000
commit: 4842dac2982e1f91ed13aaac0241ddb43bda5ed1 (patch)
tree: 14b1acab6a2c1af76580180f83ea238221430057 /bin
parent: a2b64d8da9f057e5db398238f06dbbdeb279ef41 (diff)
download: fedora-doc-utils-4842dac2982e1f91ed13aaac0241ddb43bda5ed1.tar.gz
fedora-doc-utils-4842dac2982e1f91ed13aaac0241ddb43bda5ed1.tar.xz
fedora-doc-utils-4842dac2982e1f91ed13aaac0241ddb43bda5ed1.zip
3 files changed, 532 insertions, 0 deletions
diff --git a/bin/xmldiff b/bin/xmldiff
new file mode 100755
index 0000000..9e12206
--- /dev/null
+++ b/bin/xmldiff
@@ -0,0 +1,3 @@
+#!/bin/sh
+export PATH=`dirname $0`:${PATH}
+exec perl `basename $0`.pl $@
diff --git a/bin/xmldiff.pl b/bin/xmldiff.pl
new file mode 100644
index 0000000..0e59561
--- /dev/null
+++ b/bin/xmldiff.pl
@@ -0,0 +1,126 @@
+#!/usr/bin/perl
+#
+#  Copyright (c) 2002, DecisionSoft Limited All rights reserved.
+#  Please see: 
+#  http://software.decisionsoft.com/licence.html 
+#  for more information.
+#
+# Modified for the Fedora Docs Project by Tommy.Reynolds@MegaCoder.com
+# 
+
+#
+# xmldiff: xmldiff program - uses xmlpp, which must be on the ${PATH}
+#
+
+#Change this if xmlpp is not in your current path
+#for example: $XMLPP = "./xmlpp";
+$XMLPP = "xmlpp";
+
+use Getopt::Std;
+
+getopts('tscupChHSi');
+
+if ($opt_h || @ARGV != 2) {
+	usage();
+}
+
+my $diffOpts;
+
+my $outputFmt = $opt_u + $opt_c + $opt_s + $opt_p + $opt_C;
+if( $outputFmt == 0 )	{
+	$outputFmt = $opt_p = 1;
+}
+if( $outputFmt > 1 )	{
+  print STDERR "Error: Only one mode may be specified\n";
+  usage();
+}
+
+if( $opt_s )	{
+	# Standard diff, no playing around
+} 
+if ( $opt_c )	{
+	# Plain context diff
+	$diffOpts .= "-c ";
+} 
+if( $opt_u )	{
+	# Plain unified diff
+	$diffOpts .= "-u ";
+} 
+if( $opt_p )	{
+	# Colorized unified diff
+	# $diffOpts .= "-u ";
+	$diffOpts .= "--new-line-format='[1m[33m+ %l\n[m' ";
+	$diffOpts .= "--old-line-format='[1m[31m- %l\n[m' ";
+	$diffOpts .= "--unchanged-line-format='[1m[30m  %l[m\n' ";
+} 
+if( $opt_C )	{
+  $diffOpts .= "--changed-group-format='\n<<<<<<<<<<<<<<\n%<==============\n%>>>>>>>>>>>>>>>\n\n' ";
+  $diffOpts .= "--new-line-format='+ %l\n' ";
+  $diffOpts .= "--old-line-format='- %l\n' ";
+  $diffOpts .= "--unchanged-line-format='  %l\n' ";
+} 
+if( $opt_H )	{
+  $diffOpts .= "--changed-group-format='%<%>' ";
+  $diffOpts .= " --new-group-format='%>' ";
+  $diffOpts .= "--old-group-format='%<' ";
+  $diffOpts .= "--new-line-format='<font color=\"green\">+ %l</font>\n' ";
+  $diffOpts .= "--old-line-format='<font color=\"red\">- %l</font>\n' ";
+  $diffOpts .= "--unchanged-line-format='<font color=\"gray\">  %l</font>\n' ";
+}
+
+# Set up xmlpp options
+
+my $prettyOpts = $opt_t ? "-t " : "";
+$prettyOpts   .= $opt_S ? "-S " : "";
+$prettyOpts   .= $opt_H ? "-H " : "";
+$prettyOpts   .= "-s -e ";
+
+$file1 = "xmlppTEMP1.$$";
+$file2 = "xmlppTEMP2.$$";
+
+my $results = 0;
+$results += system("$XMLPP $prettyOpts '$ARGV[0]' > $file1");
+$results += system("$XMLPP $prettyOpts '$ARGV[1]' > $file2");
+
+if($opt_H) {
+	print "<HTML>\n";
+	print "  <HEAD>\n";
+	print "    <TITLE>XML Diff</TITLE>\n";
+	print "  </HEAD>\n";
+	print "  <BODY bgcolor=\"#FEFEFF\">\n";
+	print "    <PRE>";
+	$results += system("/usr/bin/diff -bB $diffOpts $file1 $file2");
+	# Do not add extra whitespace before the </PRE>
+	print "</PRE>\n";
+	print "  </BODY>\n";
+	print "</HTML>\n";
+
+} else {
+	$results += system("/usr/bin/diff -bB $diffOpts $file1 $file2");
+}
+
+
+unlink($file1,$file2);
+
+exit( $results );
+
+sub usage {
+  print STDERR <<EOF;
+usage: $0 [ mode ] [ options ] oldfile.xml newfile.xml
+
+mode must be one of:
+  -p  coloured unified diff [default]
+  -c  context diff
+  -u  unified diff
+  -s  standard diff output
+  -C  vaguely CVS like unified diff
+
+options:
+  -H  HTML output
+  -t  split attributes - good for spotting changes in attributes
+  -S  schema hack mode - good for diffing schemas
+  -i  ignore element and attribute contents
+
+EOF
+  exit 1;
+}
diff --git a/bin/xmlpp b/bin/xmlpp
new file mode 100755
index 0000000..3de854a
--- /dev/null
+++ b/bin/xmlpp
@@ -0,0 +1,403 @@
+#!/usr/bin/perl  -w
+
+#
+#  Copyright (c) 2002, DecisionSoft Limited All rights reserved.
+#  Please see: 
+#  http://software.decisionsoft.com/licence.html
+#  for more information.
+# 
+
+# $Revision: 1.1 $
+#
+# xmlpp: XML pretty printing
+#
+
+# For custom attribute sorting create an attributeOrdering.txt file that
+# lists each attributes separated by a newline in the order you would like
+# them to be sorted separated by a newline. Then use the -s option.
+
+use FileHandle;
+use Fcntl;
+use Getopt::Std;
+
+use vars qw($opt_h $opt_H $opt_s $opt_z $opt_t $opt_e $opt_S $opt_c $opt_n);
+
+my $indent=0;
+my $textContent='';
+my $lastTag=undef;
+my $output;
+my $inAnnotation = 0;
+
+
+if (!getopts('nzhHsteSc') or $opt_h) {
+    usage();
+}
+
+if ($opt_s){
+
+# expect to find attributeOrdering.txt file in same directory
+# as xmlpp is being run from
+    
+  my $scriptDir = $0;
+  if ($scriptDir =~ m#/#){
+    $scriptDir =~ s#/[^/]+$##;
+  }
+  else{
+    $scriptDir =".";
+  }
+    
+  # get attribute ordering from external file
+  if (open(SORTLIST, "<$scriptDir/attributeOrdering.txt")) {
+    @sortlist = <SORTLIST>;
+    chomp @sortlist;
+    close (SORTLIST);
+    @specialSort = grep(/^\w+/, @sortlist);
+  } 
+  else {      
+#   print STDERR  "Could not open $scriptDir/attributeOrdering.txt: $!\nWARNING attribute sorting will only be alphabetic\n\n";
+  }
+}
+
+
+# set line separator to ">" speeding up parsing of XML files
+# with no line breaks 
+
+$/ = ">";
+
+
+my $sortAttributes = $opt_s;
+my $newLineComments = $opt_c;
+my $splitAttributes = $opt_t;
+my $schemaHackMode = $opt_S;
+my $normaliseWhiteSpace = $opt_n;
+
+my $filename = $ARGV[0];
+if ($opt_z && (!$filename or $filename eq '-')) {
+    print STDERR "Error: I can't edit STDIN in place.\n";
+    usage();
+}
+
+if (!$opt_z && scalar(@ARGV) > 1) {
+    print STDERR "Warning: Multiple files specified without -z option\n"; 
+}
+
+my $fh;
+
+my $stdin;
+
+if (!$filename or $filename eq '-') {
+    $fh=*STDIN;
+    $stdin=1;
+} else {
+    $fh = open_next_file() or exit(1);
+    $stdin=0;
+}
+
+do {
+    $indent=0;
+    $textContent='';
+    $lastTag=undef;
+    $output = '';
+    my $re_name = "(?:[A-Za-z0-9_:][A-Za-z0-9_:.-]*)";
+    my $re_attr = "(?:'[^']*'|\"[^\"]*\")";
+    my $input;
+
+    while ($input .= <$fh>) {
+        while ($input) {
+            if ($input =~ s/^<($re_name)((?:\s+$re_name\s*=\s*$re_attr)*\s*)(\/?)>(.*)$/$4/s ) {
+                my %attr;
+                my ($name,$attr,$selfclose) = ($1,$2,$3);
+                while ($attr =~ m/($re_name)\s*=\s*($re_attr)/gs) {
+                    my ($name,$value) = ($1,$2);
+                    $value =~ s/^["'](.*)["']$/$1/s;
+                    $attr{$name} = $value;
+                }
+                if ($opt_e) {
+                    parseStart($name, 0, %attr);
+                    if ($selfclose) { parseEnd($name) }
+                } else {
+                    parseStart($name, $selfclose, %attr);
+                }
+            } elsif ($input =~ s/^<\/($re_name)\s*>(.*)$/$2/s) {
+                parseEnd($1);
+            } elsif ($input =~ s/^<!--(.*?)-->(.*)$/$2/s) { 
+                parseComment($1);
+            } elsif ($input =~ s/^([^<]+)(.*)$/$2/s) {
+                parseDefault($1);
+            } elsif ($input =~ s/^(<\?[^>]*\?>)(.*)$/$2/s) {
+                parsePI("$1\n");
+            } elsif ($input =~ s/^(<\!DOCTYPE[^\[>]*(\[[^\]]*\])?[^>]*>)(.*)$/$3/s) {
+                parseDoctype("$1");
+            } else {
+                last;
+            }
+        }
+        if (eof($fh)) {
+            last;
+        }
+    }
+
+
+    if ($input) {
+        $input =~ m/([^\n]+)/gs;
+        print STDERR "WARNING: junk remaining on input: $1\n";
+    }
+    $fh->close();
+
+    if (!$opt_z) {
+        if(!$opt_H){ 
+            print "$output\n"
+        } else {
+            print html_escape($output)."\n"
+        }
+    } else {
+        if ($input) { 
+            print STDERR "Not overwriting file\n";
+        } else {
+            open FOUT,"> $filename" or die "Cannot overwrite file: $!";
+            if(!$opt_H){
+                print FOUT "$output\n"
+            } else {
+                print FOUT html_escape($output)."\n"
+            }
+            close FOUT
+        }
+    }
+} while (
+    !$stdin && $opt_z && ($fh = open_next_file(\$filename))
+  );
+  
+
+
+sub parseStart {
+    my $s = shift;
+    my $selfclose = shift;
+    my %attr = @_;
+
+    $textContent =~ s/\s+$//; 
+    printContent($textContent);
+
+    if($inAnnotation) {
+        return;
+    }
+
+    if($schemaHackMode and $s =~ m/(^|:)annotation$/) {
+        $inAnnotation = 1;
+        $textContent = '';
+        $lastTag = 1;
+        return;
+    }
+    if (length($output)) {
+        $output .= "\n";
+    }
+
+    $output .= "  " x $indent;
+    $output .= "<$s";
+    my @k = keys %attr;
+
+    if ($sortAttributes && (scalar(@k) > 1) ){
+
+      my @alphaSorted;
+      my @needSpecialSort;
+      my @final;
+      my $isSpecial;
+
+      # sort attributes alphabetically (default ordering)
+      @alphaSorted = sort @k;
+
+      # read through sorted list, if attribute doesn't have specified
+      # sort order, push it onto the end of the final array (this maintains
+      # alphabetic order). Else create a list that has attributes needing
+      # special ordering.
+      foreach $attribute (@alphaSorted){
+        $isSpecial = 0;
+        foreach $sortAttrib (@specialSort){
+          if ($attribute eq $sortAttrib){
+            push @needSpecialSort, $attribute;
+            $isSpecial = 1;
+          }
+        }
+        if (!$isSpecial){
+          push @final, $attribute;
+        }
+      }
+
+      # now read through the specialSort list backwards looking for
+      # any match in the needSpecialSort list. Unshift this onto the 
+      # front of the final array to maintain proper order.
+      foreach my $attribute (reverse @specialSort){
+        foreach (@needSpecialSort){
+          if ($attribute eq $_){
+            unshift @final, $attribute;
+          }
+        }
+      }
+
+      @k = @final;
+    }
+
+    foreach my $attr (@k) {
+        # 
+        # Remove (min|max)Occurs = 1 if schemaHackMode
+        #
+        if ($schemaHackMode and $attr =~ m/^(minOccurs|maxOccurs)$/ and $attr{$attr} eq "1") {
+            next;
+        }
+
+        if ($splitAttributes) {
+            $output .= "\n"."  " x $indent." ";
+        }
+        if ($attr{$attr} =~ /'/) {
+            $output .= " $attr=\"$attr{$attr}\"";
+        } else {
+            $output .= " $attr='$attr{$attr}'";
+        }
+    }
+    if ($splitAttributes and @k) {
+        $output .= "\n"."  " x $indent;
+    }
+    if ($selfclose) {
+        $output .= " />";
+        $lastTag = 0;
+    } else {
+        $output .= ">";
+        $indent++;
+        $lastTag = 1;
+    }
+    $textContent = '';
+}
+
+sub parseEnd {
+    my $s = shift;
+
+    if($inAnnotation) {
+        if($s =~ m/(^|:)annotation$/) {
+            $inAnnotation = 0;
+        }
+        return;
+    }
+
+    if($normaliseWhiteSpace) {
+        $textContent =~ s/^\s*(.*?)\s*$/$1/;
+    }
+    $indent--;
+    printContent($textContent);
+    if ($lastTag == 0) {
+        $output .= "\n";
+        $output .= "  " x $indent;
+    } 
+    $output .= "</$s>";
+    $textContent = '';
+    $lastTag = 0;
+}
+
+sub parseDefault {
+    my $s = shift;
+    if($inAnnotation) { return }
+    $textContent .= "$s";
+}
+
+sub parsePI {
+    my $s = shift;
+    $output .= "$s";
+}
+
+sub parseDoctype {
+    my $s = shift;
+    if ($s =~ /^([^\[]*\[)([^\]]*)(\].*)$/ms) {
+      $start = $1;
+      $DTD = $2;
+      $finish = $3;
+      $DTD =~ s/\</\n  \</msg;
+      $output .= "$start$DTD\n$finish\n";
+    } else {
+      $output .= "$s";
+    }
+}
+
+sub parseComment {
+    my $s = shift; 
+    if($inAnnotation) { return }
+    printContent($textContent,1);
+    if ($s =~ /([^\<]*)(<.*>)(.*)/ms) {
+      $start = $1;
+      $xml = $2;
+      $finish = $3;
+      $xml =~ s/\</\n\</msg;
+      $xml =~ s/(\n\s*\n?)+/\n/msg;
+      $xml =~ s/^\s*//msg;
+      $xml =~ s/\s*$//msg;
+      $s = "$start\n$xml\n$finish";
+    }
+    $s =~ s/\n\s*$/\n  /msg;
+    if ($newLineComments) {
+        $output .= "\n<!--$s-->\n";
+    } else {
+        $output .= "<!--$s-->";
+    }
+    $textContent='';
+}
+
+sub printContent {
+    my $s = shift;
+    my $printLF = shift;
+    my ($LF,$ret) = ("","");
+
+    if ($s =~ m/\n\s*$/) {
+        $LF = "\n"; 
+    }
+    if ($s =~ m/^[\s\n]*$/) {
+        $ret = undef;
+    } else {
+        $output .= "$s";
+        $ret = 1;
+    }
+    if ($printLF) {
+        $output .= $LF;
+    }
+}
+
+
+sub html_escape {
+    my $s = shift;
+    $s =~ s/&/&amp;/gsm;
+    $s =~ s/</&lt;/gsm;
+     $s =~ s/>/&gt;/gsm;
+    return $s;
+}
+
+sub open_next_file {
+    my $filename = shift;
+    $$filename = shift @ARGV;
+    while ($$filename and ! -f $$filename) {
+        print STDERR "WARNING: Could not find file: $$filename\n";
+        $$filename = shift @ARGV;
+    }
+    if(!$$filename) {
+        return undef;
+    }
+    my $fh = new FileHandle;
+    $fh->open("< $$filename") or die "Can't open $$filename: $!";
+    return $fh;
+}
+
+sub usage {
+    print STDERR <<EOF;
+usage: $0 [ options ] [ file.xml ... ]
+
+options:
+  -h  display this help message
+  -H  escape characters (useful for further processing)
+  -t  split attributes, one per line (useful for diff)
+  -s  sort attributes (useful for diff)
+  -z  in place edit (zap)
+  -e  expand self closing tags (useful for diff)
+  -S  schema hack mode (used by xmldiff)
+  -c  place comments on new line.
+  -n  normalise whitespace (remove leading and trailing whitespace from nodes
+      with text content.
+
+EOF
+    exit 1;
+}
+
author	Tommy Reynolds <Tommy.Reynolds@MegaCoder.com>	2005-12-15 03:19:56 +0000
committer	Tommy Reynolds <Tommy.Reynolds@MegaCoder.com>	2005-12-15 03:19:56 +0000
commit	4842dac2982e1f91ed13aaac0241ddb43bda5ed1 (patch)
tree	14b1acab6a2c1af76580180f83ea238221430057 /bin
parent	a2b64d8da9f057e5db398238f06dbbdeb279ef41 (diff)
download	fedora-doc-utils-4842dac2982e1f91ed13aaac0241ddb43bda5ed1.tar.gz fedora-doc-utils-4842dac2982e1f91ed13aaac0241ddb43bda5ed1.tar.xz fedora-doc-utils-4842dac2982e1f91ed13aaac0241ddb43bda5ed1.zip