diff options
author | Tommy Reynolds <Tommy.Reynolds@MegaCoder.com> | 2005-12-15 03:19:56 +0000 |
---|---|---|
committer | Tommy Reynolds <Tommy.Reynolds@MegaCoder.com> | 2005-12-15 03:19:56 +0000 |
commit | 4842dac2982e1f91ed13aaac0241ddb43bda5ed1 (patch) | |
tree | 14b1acab6a2c1af76580180f83ea238221430057 /bin | |
parent | a2b64d8da9f057e5db398238f06dbbdeb279ef41 (diff) | |
download | fedora-doc-utils-4842dac2982e1f91ed13aaac0241ddb43bda5ed1.tar.gz fedora-doc-utils-4842dac2982e1f91ed13aaac0241ddb43bda5ed1.tar.xz fedora-doc-utils-4842dac2982e1f91ed13aaac0241ddb43bda5ed1.zip |
Here is a very simple XMLDIFF tool that I've hacked to work reasonably
with the FDP files. Look in the "xmldiff.pl" file for information about
its origin.
Diffstat (limited to 'bin')
-rwxr-xr-x | bin/xmldiff | 3 | ||||
-rw-r--r-- | bin/xmldiff.pl | 126 | ||||
-rwxr-xr-x | bin/xmlpp | 403 |
3 files changed, 532 insertions, 0 deletions
diff --git a/bin/xmldiff b/bin/xmldiff new file mode 100755 index 0000000..9e12206 --- /dev/null +++ b/bin/xmldiff @@ -0,0 +1,3 @@ +#!/bin/sh +export PATH=`dirname $0`:${PATH} +exec perl `basename $0`.pl $@ diff --git a/bin/xmldiff.pl b/bin/xmldiff.pl new file mode 100644 index 0000000..0e59561 --- /dev/null +++ b/bin/xmldiff.pl @@ -0,0 +1,126 @@ +#!/usr/bin/perl +# +# Copyright (c) 2002, DecisionSoft Limited All rights reserved. +# Please see: +# http://software.decisionsoft.com/licence.html +# for more information. +# +# Modified for the Fedora Docs Project by Tommy.Reynolds@MegaCoder.com +# + +# +# xmldiff: xmldiff program - uses xmlpp, which must be on the ${PATH} +# + +#Change this if xmlpp is not in your current path +#for example: $XMLPP = "./xmlpp"; +$XMLPP = "xmlpp"; + +use Getopt::Std; + +getopts('tscupChHSi'); + +if ($opt_h || @ARGV != 2) { + usage(); +} + +my $diffOpts; + +my $outputFmt = $opt_u + $opt_c + $opt_s + $opt_p + $opt_C; +if( $outputFmt == 0 ) { + $outputFmt = $opt_p = 1; +} +if( $outputFmt > 1 ) { + print STDERR "Error: Only one mode may be specified\n"; + usage(); +} + +if( $opt_s ) { + # Standard diff, no playing around +} +if ( $opt_c ) { + # Plain context diff + $diffOpts .= "-c "; +} +if( $opt_u ) { + # Plain unified diff + $diffOpts .= "-u "; +} +if( $opt_p ) { + # Colorized unified diff + # $diffOpts .= "-u "; + $diffOpts .= "--new-line-format='[1m[33m+ %l\n[m' "; + $diffOpts .= "--old-line-format='[1m[31m- %l\n[m' "; + $diffOpts .= "--unchanged-line-format='[1m[30m %l[m\n' "; +} +if( $opt_C ) { + $diffOpts .= "--changed-group-format='\n<<<<<<<<<<<<<<\n%<==============\n%>>>>>>>>>>>>>>>\n\n' "; + $diffOpts .= "--new-line-format='+ %l\n' "; + $diffOpts .= "--old-line-format='- %l\n' "; + $diffOpts .= "--unchanged-line-format=' %l\n' "; +} +if( $opt_H ) { + $diffOpts .= "--changed-group-format='%<%>' "; + $diffOpts .= " --new-group-format='%>' "; + $diffOpts .= "--old-group-format='%<' "; + $diffOpts .= "--new-line-format='<font color=\"green\">+ %l</font>\n' "; + $diffOpts .= "--old-line-format='<font color=\"red\">- %l</font>\n' "; + $diffOpts .= "--unchanged-line-format='<font color=\"gray\"> %l</font>\n' "; +} + +# Set up xmlpp options + +my $prettyOpts = $opt_t ? "-t " : ""; +$prettyOpts .= $opt_S ? "-S " : ""; +$prettyOpts .= $opt_H ? "-H " : ""; +$prettyOpts .= "-s -e "; + +$file1 = "xmlppTEMP1.$$"; +$file2 = "xmlppTEMP2.$$"; + +my $results = 0; +$results += system("$XMLPP $prettyOpts '$ARGV[0]' > $file1"); +$results += system("$XMLPP $prettyOpts '$ARGV[1]' > $file2"); + +if($opt_H) { + print "<HTML>\n"; + print " <HEAD>\n"; + print " <TITLE>XML Diff</TITLE>\n"; + print " </HEAD>\n"; + print " <BODY bgcolor=\"#FEFEFF\">\n"; + print " <PRE>"; + $results += system("/usr/bin/diff -bB $diffOpts $file1 $file2"); + # Do not add extra whitespace before the </PRE> + print "</PRE>\n"; + print " </BODY>\n"; + print "</HTML>\n"; + +} else { + $results += system("/usr/bin/diff -bB $diffOpts $file1 $file2"); +} + + +unlink($file1,$file2); + +exit( $results ); + +sub usage { + print STDERR <<EOF; +usage: $0 [ mode ] [ options ] oldfile.xml newfile.xml + +mode must be one of: + -p coloured unified diff [default] + -c context diff + -u unified diff + -s standard diff output + -C vaguely CVS like unified diff + +options: + -H HTML output + -t split attributes - good for spotting changes in attributes + -S schema hack mode - good for diffing schemas + -i ignore element and attribute contents + +EOF + exit 1; +} diff --git a/bin/xmlpp b/bin/xmlpp new file mode 100755 index 0000000..3de854a --- /dev/null +++ b/bin/xmlpp @@ -0,0 +1,403 @@ +#!/usr/bin/perl -w + +# +# Copyright (c) 2002, DecisionSoft Limited All rights reserved. +# Please see: +# http://software.decisionsoft.com/licence.html +# for more information. +# + +# $Revision: 1.1 $ +# +# xmlpp: XML pretty printing +# + +# For custom attribute sorting create an attributeOrdering.txt file that +# lists each attributes separated by a newline in the order you would like +# them to be sorted separated by a newline. Then use the -s option. + +use FileHandle; +use Fcntl; +use Getopt::Std; + +use vars qw($opt_h $opt_H $opt_s $opt_z $opt_t $opt_e $opt_S $opt_c $opt_n); + +my $indent=0; +my $textContent=''; +my $lastTag=undef; +my $output; +my $inAnnotation = 0; + + +if (!getopts('nzhHsteSc') or $opt_h) { + usage(); +} + +if ($opt_s){ + +# expect to find attributeOrdering.txt file in same directory +# as xmlpp is being run from + + my $scriptDir = $0; + if ($scriptDir =~ m#/#){ + $scriptDir =~ s#/[^/]+$##; + } + else{ + $scriptDir ="."; + } + + # get attribute ordering from external file + if (open(SORTLIST, "<$scriptDir/attributeOrdering.txt")) { + @sortlist = <SORTLIST>; + chomp @sortlist; + close (SORTLIST); + @specialSort = grep(/^\w+/, @sortlist); + } + else { +# print STDERR "Could not open $scriptDir/attributeOrdering.txt: $!\nWARNING attribute sorting will only be alphabetic\n\n"; + } +} + + +# set line separator to ">" speeding up parsing of XML files +# with no line breaks + +$/ = ">"; + + +my $sortAttributes = $opt_s; +my $newLineComments = $opt_c; +my $splitAttributes = $opt_t; +my $schemaHackMode = $opt_S; +my $normaliseWhiteSpace = $opt_n; + +my $filename = $ARGV[0]; +if ($opt_z && (!$filename or $filename eq '-')) { + print STDERR "Error: I can't edit STDIN in place.\n"; + usage(); +} + +if (!$opt_z && scalar(@ARGV) > 1) { + print STDERR "Warning: Multiple files specified without -z option\n"; +} + +my $fh; + +my $stdin; + +if (!$filename or $filename eq '-') { + $fh=*STDIN; + $stdin=1; +} else { + $fh = open_next_file() or exit(1); + $stdin=0; +} + +do { + $indent=0; + $textContent=''; + $lastTag=undef; + $output = ''; + my $re_name = "(?:[A-Za-z0-9_:][A-Za-z0-9_:.-]*)"; + my $re_attr = "(?:'[^']*'|\"[^\"]*\")"; + my $input; + + while ($input .= <$fh>) { + while ($input) { + if ($input =~ s/^<($re_name)((?:\s+$re_name\s*=\s*$re_attr)*\s*)(\/?)>(.*)$/$4/s ) { + my %attr; + my ($name,$attr,$selfclose) = ($1,$2,$3); + while ($attr =~ m/($re_name)\s*=\s*($re_attr)/gs) { + my ($name,$value) = ($1,$2); + $value =~ s/^["'](.*)["']$/$1/s; + $attr{$name} = $value; + } + if ($opt_e) { + parseStart($name, 0, %attr); + if ($selfclose) { parseEnd($name) } + } else { + parseStart($name, $selfclose, %attr); + } + } elsif ($input =~ s/^<\/($re_name)\s*>(.*)$/$2/s) { + parseEnd($1); + } elsif ($input =~ s/^<!--(.*?)-->(.*)$/$2/s) { + parseComment($1); + } elsif ($input =~ s/^([^<]+)(.*)$/$2/s) { + parseDefault($1); + } elsif ($input =~ s/^(<\?[^>]*\?>)(.*)$/$2/s) { + parsePI("$1\n"); + } elsif ($input =~ s/^(<\!DOCTYPE[^\[>]*(\[[^\]]*\])?[^>]*>)(.*)$/$3/s) { + parseDoctype("$1"); + } else { + last; + } + } + if (eof($fh)) { + last; + } + } + + + if ($input) { + $input =~ m/([^\n]+)/gs; + print STDERR "WARNING: junk remaining on input: $1\n"; + } + $fh->close(); + + if (!$opt_z) { + if(!$opt_H){ + print "$output\n" + } else { + print html_escape($output)."\n" + } + } else { + if ($input) { + print STDERR "Not overwriting file\n"; + } else { + open FOUT,"> $filename" or die "Cannot overwrite file: $!"; + if(!$opt_H){ + print FOUT "$output\n" + } else { + print FOUT html_escape($output)."\n" + } + close FOUT + } + } +} while ( + !$stdin && $opt_z && ($fh = open_next_file(\$filename)) + ); + + + +sub parseStart { + my $s = shift; + my $selfclose = shift; + my %attr = @_; + + $textContent =~ s/\s+$//; + printContent($textContent); + + if($inAnnotation) { + return; + } + + if($schemaHackMode and $s =~ m/(^|:)annotation$/) { + $inAnnotation = 1; + $textContent = ''; + $lastTag = 1; + return; + } + if (length($output)) { + $output .= "\n"; + } + + $output .= " " x $indent; + $output .= "<$s"; + my @k = keys %attr; + + if ($sortAttributes && (scalar(@k) > 1) ){ + + my @alphaSorted; + my @needSpecialSort; + my @final; + my $isSpecial; + + # sort attributes alphabetically (default ordering) + @alphaSorted = sort @k; + + # read through sorted list, if attribute doesn't have specified + # sort order, push it onto the end of the final array (this maintains + # alphabetic order). Else create a list that has attributes needing + # special ordering. + foreach $attribute (@alphaSorted){ + $isSpecial = 0; + foreach $sortAttrib (@specialSort){ + if ($attribute eq $sortAttrib){ + push @needSpecialSort, $attribute; + $isSpecial = 1; + } + } + if (!$isSpecial){ + push @final, $attribute; + } + } + + # now read through the specialSort list backwards looking for + # any match in the needSpecialSort list. Unshift this onto the + # front of the final array to maintain proper order. + foreach my $attribute (reverse @specialSort){ + foreach (@needSpecialSort){ + if ($attribute eq $_){ + unshift @final, $attribute; + } + } + } + + @k = @final; + } + + foreach my $attr (@k) { + # + # Remove (min|max)Occurs = 1 if schemaHackMode + # + if ($schemaHackMode and $attr =~ m/^(minOccurs|maxOccurs)$/ and $attr{$attr} eq "1") { + next; + } + + if ($splitAttributes) { + $output .= "\n"." " x $indent." "; + } + if ($attr{$attr} =~ /'/) { + $output .= " $attr=\"$attr{$attr}\""; + } else { + $output .= " $attr='$attr{$attr}'"; + } + } + if ($splitAttributes and @k) { + $output .= "\n"." " x $indent; + } + if ($selfclose) { + $output .= " />"; + $lastTag = 0; + } else { + $output .= ">"; + $indent++; + $lastTag = 1; + } + $textContent = ''; +} + +sub parseEnd { + my $s = shift; + + if($inAnnotation) { + if($s =~ m/(^|:)annotation$/) { + $inAnnotation = 0; + } + return; + } + + if($normaliseWhiteSpace) { + $textContent =~ s/^\s*(.*?)\s*$/$1/; + } + $indent--; + printContent($textContent); + if ($lastTag == 0) { + $output .= "\n"; + $output .= " " x $indent; + } + $output .= "</$s>"; + $textContent = ''; + $lastTag = 0; +} + +sub parseDefault { + my $s = shift; + if($inAnnotation) { return } + $textContent .= "$s"; +} + +sub parsePI { + my $s = shift; + $output .= "$s"; +} + +sub parseDoctype { + my $s = shift; + if ($s =~ /^([^\[]*\[)([^\]]*)(\].*)$/ms) { + $start = $1; + $DTD = $2; + $finish = $3; + $DTD =~ s/\</\n \</msg; + $output .= "$start$DTD\n$finish\n"; + } else { + $output .= "$s"; + } +} + +sub parseComment { + my $s = shift; + if($inAnnotation) { return } + printContent($textContent,1); + if ($s =~ /([^\<]*)(<.*>)(.*)/ms) { + $start = $1; + $xml = $2; + $finish = $3; + $xml =~ s/\</\n\</msg; + $xml =~ s/(\n\s*\n?)+/\n/msg; + $xml =~ s/^\s*//msg; + $xml =~ s/\s*$//msg; + $s = "$start\n$xml\n$finish"; + } + $s =~ s/\n\s*$/\n /msg; + if ($newLineComments) { + $output .= "\n<!--$s-->\n"; + } else { + $output .= "<!--$s-->"; + } + $textContent=''; +} + +sub printContent { + my $s = shift; + my $printLF = shift; + my ($LF,$ret) = ("",""); + + if ($s =~ m/\n\s*$/) { + $LF = "\n"; + } + if ($s =~ m/^[\s\n]*$/) { + $ret = undef; + } else { + $output .= "$s"; + $ret = 1; + } + if ($printLF) { + $output .= $LF; + } +} + + +sub html_escape { + my $s = shift; + $s =~ s/&/&/gsm; + $s =~ s/</</gsm; + $s =~ s/>/>/gsm; + return $s; +} + +sub open_next_file { + my $filename = shift; + $$filename = shift @ARGV; + while ($$filename and ! -f $$filename) { + print STDERR "WARNING: Could not find file: $$filename\n"; + $$filename = shift @ARGV; + } + if(!$$filename) { + return undef; + } + my $fh = new FileHandle; + $fh->open("< $$filename") or die "Can't open $$filename: $!"; + return $fh; +} + +sub usage { + print STDERR <<EOF; +usage: $0 [ options ] [ file.xml ... ] + +options: + -h display this help message + -H escape characters (useful for further processing) + -t split attributes, one per line (useful for diff) + -s sort attributes (useful for diff) + -z in place edit (zap) + -e expand self closing tags (useful for diff) + -S schema hack mode (used by xmldiff) + -c place comments on new line. + -n normalise whitespace (remove leading and trailing whitespace from nodes + with text content. + +EOF + exit 1; +} + |