#!/usr/bin/perl use strict; use warnings; # csvdiff: Show differences between CSV files # (m)'19 [05 Mar 2019] Copyright H.M.Brand 2009-2023 our $VERSION = "1.02 - 20190305"; sub usage { my $err = shift and select STDERR; print "usage: csvdiff [--no-color] [--html] [-w|-b|-Z] file.csv file.csv\n", " provides colorized diff on sorted CSV files\n", " assuming first line is header and first field is the key\n", " --no-color do not use colors\n", " -h --html produce HTML output\n", " -w --ignore-all-space ignore all whistespace is all fields\n", " -b --ignore-space-change ignore whitespace length changes\n", " -Z --ignore-trailing-space ignore trailing whitespace per field\n", " -o F --output=F send output to file F\n"; exit $err; } # usage use Getopt::Long qw(:config bundling nopermute ); my $opt_c = !$ENV{NO_COLOR}; GetOptions ( "help|?" => sub { usage (0); }, "V|version" => sub { print "csvdiff [$VERSION]\n"; exit 0 }, "w|ignore-all-space!" => \my $opt_w, "b|ignore-ws|ignore-space-change!" => \my $opt_b, "Z|ignore-trailing-space!" => \my $opt_Z, "c|color|colour!" => \ $opt_c, "h|html" => \my $opt_h, "o|output=s" => \my $opt_o, ) or usage (1); @ARGV == 2 or usage (1); if ($opt_o) { open STDOUT, ">", $opt_o or die "$opt_o: $!\n"; } use HTML::Entities; use Term::ANSIColor qw(:constants); use Text::CSV_XS; my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 0 }); if ($opt_h) { binmode STDOUT, ":encoding(utf-8)"; print <<EOH; <?xml version="1.0" encoding="utf-8"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"> <head> <title>CFI School updates</title> <meta name="Generator" content="perl $]" /> <meta name="Author" content="@{[scalar getpwuid $<]}" /> <meta name="Description" content="CSV diff @ARGV" /> <style type="text/css"> .rd { background: #ffe0e0; } .gr { background: #e0ffe0; } .hd { background: #e0e0ff; } .b0 { background: #e0e0e0; } .b1 { background: #f0f0f0; } .r { color: red; } .g { color: green; } </style> </head> <body> <h1>CSV diff @ARGV</h1> <table> EOH $::{RED} = sub { "\cA\rr"; }; $::{GREEN} = sub { "\cA\rg"; }; $::{RESET} = sub { ""; }; } elsif (!$opt_c) { $::{$_} = sub { "" } for qw( RED GREEN RESET ); } my @f; my $opt_n = 1; foreach my $x (0, 1) { open my $fh, "<", $ARGV[$x] or die "$ARGV[$x]: $!\n"; my $n = 0; while (1) { my $row = $csv->getline ($fh) or last; @$row and push @{$f[$x]}, $row; $n++ && $row->[0] =~ m/\D/ and $opt_n = 0; } } my @n = map { $#{$f[$_]} } 0, 1; my @i = (1, 1); my $hdr = "# csvdiff < $ARGV[0] > $ARGV[1]\n"; $f[$_][1+$n[$_]][0] = $opt_n ? 2147483647 : "\xff\xff\xff\xff" for 0, 1; my %cls; %cls = ( "b" => 0, "-" => sub { "rd" }, "+" => sub { "gr" }, "H" => sub { "hd" }, "<" => sub { $cls{b} ^= 1; "b$cls{b}" }, ">" => sub { "b$cls{b}" }, ); sub show { my ($pfx, $x) = @_; my $row = $f[$x][$i[$x]++] or return; if ($opt_h) { my $bg = $cls{$pfx}->(); print qq{ <tr class="$bg">}, (map{"<td".(s/^\cA\r([gr])//?qq{ class="$1"}:"").">$_</td>"}@$row), "</tr>\n"; return; } print $hdr, $pfx, " ", $pfx eq "-" ? RED : $pfx eq "+" ? GREEN : ""; $csv->print (*STDOUT, $row); print RESET, "\n"; $hdr = ""; } # show # Skip first line of both are same: it probably is a header my @h0 = @{$f[0][0]}; my @h1 = @{$f[1][0]}; if ("@h0" eq "@h1") { if ($opt_h) { $i[0]--; show ("H", 0); } shift @{$f[0]}; shift @{$f[1]}; } my $x = 0; while ($i[0] <= $n[0] || $i[1] <= $n[1]) { my @r0 = @{$f[0][$i[0]]}; my @r1 = @{$f[1][$i[1]]}; if ($opt_n) { $r0[0] < $r1[0] and show ("-", 0), next; $r0[0] > $r1[0] and show ("+", 1), next; } else { $r0[0] lt $r1[0] and show ("-", 0), next; $r0[0] gt $r1[0] and show ("+", 1), next; } my @v0 = @r0; my @v1 = @r1; $opt_Z and s/[\r\n\s]+\z// for @v0, @v1; $opt_b and s/[\r\n\s]+/ /g for @v0, @v1; $opt_w and s/[\r\n\s]+//g for @v0, @v1; { local $" = "\xFF"; "@v0" eq "@v1" and $i[0]++, $i[1]++, next; } foreach my $c (1 .. $#h0) { my ($L, $R) = map { $_ // "" } $r0[$c], $r1[$c]; $L eq $R and next; $f[0][$i[0]][$c] = RED . $L . RESET; $f[1][$i[1]][$c] = GREEN . $R . RESET; } show ("<", 0); show (">", 1); } $opt_h and print " </table>\n</body>\n</html>\n"; close STDOUT;
Generated by dwww version 1.15 on Sun Jun 30 10:52:01 CEST 2024.