XML Diff

From b03caf04dee61dbf7e0f8c6a931a54fe268df24c Mon Sep 17 00:00:00 2001 From: Tommy Reynolds Date: Thu, 15 Dec 2005 06:15:49 +0000 Subject: Switched to our own XMLFORMAT tool to prepare the files for comparison. As long as the original files have been normalized by XMLFORMAT, then the line numbers output by XMLDIFF will be correct. Why would you *not* work with normalized files? --- bin/xmldiff.pl | 45 ++----- bin/xmlpp | 405 --------------------------------------------------------- 2 files changed, 8 insertions(+), 442 deletions(-) delete mode 100755 bin/xmlpp diff --git a/bin/xmldiff.pl b/bin/xmldiff.pl index 6b358e5..faeab63 100644 --- a/bin/xmldiff.pl +++ b/bin/xmldiff.pl @@ -13,12 +13,12 @@ # #Change this if xmlpp is not in your current path -#for example: $XMLPP = "./xmlpp"; -$XMLPP = "xmlpp"; +#for example: $XMLFORMAT = "./xmlpp"; +# $XMLFORMAT = "xmlpp"; use Getopt::Std; -getopts('tscupChHSi'); +getopts('scupChi'); if ($opt_h || @ARGV != 2) { usage(); @@ -54,45 +54,18 @@ if( $opt_s ) { $diffOpts .= "--new-line-format='+ %l\n' "; $diffOpts .= "--old-line-format='- %l\n' "; $diffOpts .= "--unchanged-line-format=' %l\n' "; -} elsif( $opt_H ) { - $diffOpts .= "--changed-group-format='%<%>' "; - $diffOpts .= " --new-group-format='%>' "; - $diffOpts .= "--old-group-format='%<' "; - $diffOpts .= "--new-line-format='+ %l\n' "; - $diffOpts .= "--old-line-format='- %l\n' "; - $diffOpts .= "--unchanged-line-format=' %l\n' "; } -# Set up xmlpp options - -my $prettyOpts = $opt_t ? "-t " : ""; -$prettyOpts .= $opt_S ? "-S " : ""; -$prettyOpts .= $opt_H ? "-H " : ""; -$prettyOpts .= "-s -e "; +$XMLFORMAT = "xmlformat"; $file1 = "xmlppTEMP1.$$"; $file2 = "xmlppTEMP2.$$"; my $results = 0; -$results += system("$XMLPP $prettyOpts '$ARGV[0]' > $file1"); -$results += system("$XMLPP $prettyOpts '$ARGV[1]' > $file2"); - -if($opt_H) { - print "\n"; - print " \n"; - print " XML Diff\n"; - print " \n"; - print " \n"; - print "

";
-	$results += system("/usr/bin/diff -bB $diffOpts $file1 $file2");
-	# Do not add extra whitespace before the

- print "\n"; - print " \n"; - print "\n"; - -} else { - $results += system("/usr/bin/diff -bB $diffOpts $file1 $file2"); -} +$results += system("$XMLFORMAT '$ARGV[0]' > $file1"); +$results += system("$XMLFORMAT '$ARGV[1]' > $file2"); + +$results += system("/usr/bin/diff -bB $diffOpts $file1 $file2"); unlink($file1,$file2); @@ -111,9 +84,7 @@ mode must be one of: -C vaguely CVS like unified diff options: - -H HTML output -t split attributes - good for spotting changes in attributes - -S schema hack mode - good for diffing schemas -i ignore element and attribute contents EOF diff --git a/bin/xmlpp b/bin/xmlpp deleted file mode 100755 index 380a2a6..0000000 --- a/bin/xmlpp +++ /dev/null @@ -1,405 +0,0 @@ -#!/usr/bin/perl -w - -# -# Copyright (c) 2002, DecisionSoft Limited All rights reserved. -# Please see: -# http://software.decisionsoft.com/licence.html -# for more information. -# - -# $Revision: 1.3 $ -# -# xmlpp: XML pretty printing -# - -# For custom attribute sorting create an attributeOrdering.txt file that -# lists each attributes separated by a newline in the order you would like -# them to be sorted separated by a newline. Then use the -s option. - -use FileHandle; -use Fcntl; -use Getopt::Std; - -use vars qw($opt_h $opt_H $opt_s $opt_z $opt_t $opt_e $opt_S $opt_c $opt_n); - -my $indent=0; -my $textContent=''; -my $lastTag=undef; -my $output; -my $inAnnotation = 0; - - -if (!getopts('nzhHsteSc') or $opt_h) { - usage(); -} - -if ($opt_s){ - -# expect to find attributeOrdering.txt file in same directory -# as xmlpp is being run from - - my $scriptDir = $0; - if ($scriptDir =~ m#/#){ - $scriptDir =~ s#/[^/]+$##; - } - else{ - $scriptDir ="."; - } - - # get attribute ordering from external file - if (open(SORTLIST, "<$scriptDir/attributeOrdering.txt")) { - @sortlist = ; - chomp @sortlist; - close (SORTLIST); - @specialSort = grep(/^\w+/, @sortlist); - } - else { -# print STDERR "Could not open $scriptDir/attributeOrdering.txt: $!\nWARNING attribute sorting will only be alphabetic\n\n"; - } -} - - -# set line separator to ">" speeding up parsing of XML files -# with no line breaks - -$/ = ">"; - - -my $sortAttributes = $opt_s; -my $newLineComments = $opt_c; -my $splitAttributes = $opt_t; -my $schemaHackMode = $opt_S; -my $normaliseWhiteSpace = $opt_n; - -my $filename = $ARGV[0]; -if ($opt_z && (!$filename or $filename eq '-')) { - print STDERR "Error: I can't edit STDIN in place.\n"; - usage(); -} - -if (!$opt_z && scalar(@ARGV) > 1) { - print STDERR "Warning: Multiple files specified without -z option\n"; -} - -my $fh; - -my $stdin; - -if (!$filename or $filename eq '-') { - $fh=*STDIN; - $stdin=1; -} else { - $fh = open_next_file() or exit(1); - $stdin=0; -} - -do { - $indent=0; - $textContent=''; - $lastTag=undef; - $output = ''; - my $re_name = "(?:[A-Za-z0-9_:][A-Za-z0-9_:.-]*)"; - my $re_attr = "(?:'[^']*'|\"[^\"]*\")"; - my $input; - - while ($input .= <$fh>) { - while ($input) { - if ($input =~ s/^<($re_name)((?:\s+$re_name\s*=\s*$re_attr)*\s*)(\/?)>(.*)$/$4/s ) { - my %attr; - my ($name,$attr,$selfclose) = ($1,$2,$3); - while ($attr =~ m/($re_name)\s*=\s*($re_attr)/gs) { - my ($name,$value) = ($1,$2); - $value =~ s/^["'](.*)["']$/$1/s; - $attr{$name} = $value; - } - if ($opt_e) { - parseStart($name, 0, %attr); - if ($selfclose) { parseEnd($name) } - } else { - parseStart($name, $selfclose, %attr); - } - } elsif ($input =~ s/^<\/($re_name)\s*>(.*)$/$2/s) { - parseEnd($1); - } elsif ($input =~ s/^(.*)$/$2/s) { - parseComment($1); - } elsif ($input =~ s/^([^<]+)(.*)$/$2/s) { - parseDefault($1); - } elsif ($input =~ s/^(<\?[^>]*\?>)(.*)$/$2/s) { - parsePI("$1\n"); - } elsif ($input =~ s/^(<\!DOCTYPE[^\[>]*(\[[^\]]*\])?[^>]*>)(.*)$/$3/s) { - parseDoctype("$1"); - } elsif ($input =~ s/^(<\!ENTITY[^>]*>)(.*)$/$2/s) { - $output .= "$1"; - } else { - last; - } - } - if (eof($fh)) { - last; - } - } - - - if ($input) { - $input =~ m/([^\n]+)/gs; - print STDERR "WARNING: junk remaining on input: $1\n"; - } - $fh->close(); - - if (!$opt_z) { - if(!$opt_H){ - print "$output\n" - } else { - print html_escape($output)."\n" - } - } else { - if ($input) { - print STDERR "Not overwriting file\n"; - } else { - open FOUT,"> $filename" or die "Cannot overwrite file: $!"; - if(!$opt_H){ - print FOUT "$output\n" - } else { - print FOUT html_escape($output)."\n" - } - close FOUT - } - } -} while ( - !$stdin && $opt_z && ($fh = open_next_file(\$filename)) - ); - - - -sub parseStart { - my $s = shift; - my $selfclose = shift; - my %attr = @_; - - $textContent =~ s/\s+$//; - printContent($textContent); - - if($inAnnotation) { - return; - } - - if($schemaHackMode and $s =~ m/(^|:)annotation$/) { - $inAnnotation = 1; - $textContent = ''; - $lastTag = 1; - return; - } - if (length($output)) { - $output .= "\n"; - } - - $output .= " " x $indent; - $output .= "<$s"; - my @k = keys %attr; - - if ($sortAttributes && (scalar(@k) > 1) ){ - - my @alphaSorted; - my @needSpecialSort; - my @final; - my $isSpecial; - - # sort attributes alphabetically (default ordering) - @alphaSorted = sort @k; - - # read through sorted list, if attribute doesn't have specified - # sort order, push it onto the end of the final array (this maintains - # alphabetic order). Else create a list that has attributes needing - # special ordering. - foreach $attribute (@alphaSorted){ - $isSpecial = 0; - foreach $sortAttrib (@specialSort){ - if ($attribute eq $sortAttrib){ - push @needSpecialSort, $attribute; - $isSpecial = 1; - } - } - if (!$isSpecial){ - push @final, $attribute; - } - } - - # now read through the specialSort list backwards looking for - # any match in the needSpecialSort list. Unshift this onto the - # front of the final array to maintain proper order. - foreach my $attribute (reverse @specialSort){ - foreach (@needSpecialSort){ - if ($attribute eq $_){ - unshift @final, $attribute; - } - } - } - - @k = @final; - } - - foreach my $attr (@k) { - # - # Remove (min|max)Occurs = 1 if schemaHackMode - # - if ($schemaHackMode and $attr =~ m/^(minOccurs|maxOccurs)$/ and $attr{$attr} eq "1") { - next; - } - - if ($splitAttributes) { - $output .= "\n"." " x $indent." "; - } - if ($attr{$attr} =~ /'/) { - $output .= " $attr=\"$attr{$attr}\""; - } else { - $output .= " $attr='$attr{$attr}'"; - } - } - if ($splitAttributes and @k) { - $output .= "\n"." " x $indent; - } - if ($selfclose) { - $output .= " />"; - $lastTag = 0; - } else { - $output .= ">"; - $indent++; - $lastTag = 1; - } - $textContent = ''; -} - -sub parseEnd { - my $s = shift; - - if($inAnnotation) { - if($s =~ m/(^|:)annotation$/) { - $inAnnotation = 0; - } - return; - } - - if($normaliseWhiteSpace) { - $textContent =~ s/^\s*(.*?)\s*$/$1/; - } - $indent--; - printContent($textContent); - if ($lastTag == 0) { - $output .= "\n"; - $output .= " " x $indent; - } - $output .= ""; - $textContent = ''; - $lastTag = 0; -} - -sub parseDefault { - my $s = shift; - if($inAnnotation) { return } - $textContent .= "$s"; -} - -sub parsePI { - my $s = shift; - $output .= "$s"; -} - -sub parseDoctype { - my $s = shift; - if ($s =~ /^([^\[]*\[)([^\]]*)(\].*)$/ms) { - $start = $1; - $DTD = $2; - $finish = $3; - $DTD =~ s/\)(.*)/ms) { - $start = $1; - $xml = $2; - $finish = $3; - $xml =~ s/\\n"; - } else { - $output .= ""; - } - $textContent=''; -} - -sub printContent { - my $s = shift; - my $printLF = shift; - my ($LF,$ret) = ("",""); - - if ($s =~ m/\n\s*$/) { - $LF = "\n"; - } - if ($s =~ m/^[\s\n]*$/) { - $ret = undef; - } else { - $output .= "$s"; - $ret = 1; - } - if ($printLF) { - $output .= $LF; - } -} - - -sub html_escape { - my $s = shift; - $s =~ s/&/&/gsm; - $s =~ s//>/gsm; - return $s; -} - -sub open_next_file { - my $filename = shift; - $$filename = shift @ARGV; - while ($$filename and ! -f $$filename) { - print STDERR "WARNING: Could not find file: $$filename\n"; - $$filename = shift @ARGV; - } - if(!$$filename) { - return undef; - } - my $fh = new FileHandle; - $fh->open("< $$filename") or die "Can't open $$filename: $!"; - return $fh; -} - -sub usage { - print STDERR <