dwww Home | Show directory contents | Find package

#!/usr/bin/perl

use strict;
use warnings;

# csvdiff: Show differences between CSV files
#          (m)'19 [05 Mar 2019] Copyright H.M.Brand 2009-2023

our $VERSION = "1.02 - 20190305";

sub usage {
    my $err = shift and select STDERR;
    print "usage: csvdiff [--no-color] [--html] [-w|-b|-Z] file.csv file.csv\n",
        "  provides colorized diff on sorted CSV files\n",
        "  assuming first line is header and first field is the key\n",
        "          --no-color               do not use colors\n",
        "    -h    --html                   produce HTML output\n",
        "    -w    --ignore-all-space       ignore all whistespace is all fields\n",
        "    -b    --ignore-space-change    ignore whitespace length changes\n",
        "    -Z    --ignore-trailing-space  ignore trailing whitespace per field\n",
        "    -o F  --output=F               send output to file F\n";
    exit $err;
    } # usage

use Getopt::Long qw(:config bundling nopermute );
my $opt_c = !$ENV{NO_COLOR};
GetOptions (
    "help|?"            => sub { usage (0); },
    "V|version"         => sub { print "csvdiff [$VERSION]\n"; exit 0 },

    "w|ignore-all-space!"               => \my $opt_w,
    "b|ignore-ws|ignore-space-change!"  => \my $opt_b,
    "Z|ignore-trailing-space!"          => \my $opt_Z,

    "c|color|colour!"                   => \   $opt_c,
    "h|html"                            => \my $opt_h,

    "o|output=s"                        => \my $opt_o,
    ) or usage (1);

@ARGV == 2 or usage (1);

if ($opt_o) {
    open STDOUT, ">", $opt_o or die "$opt_o: $!\n";
    }

use HTML::Entities;
use Term::ANSIColor qw(:constants);
use Text::CSV_XS;
my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 0 });

if ($opt_h) {
    binmode STDOUT, ":encoding(utf-8)";
    print <<EOH;
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
  <title>CFI School updates</title>
  <meta name="Generator"     content="perl $]" />
  <meta name="Author"        content="@{[scalar getpwuid $<]}" />
  <meta name="Description"   content="CSV diff @ARGV" />
  <style type="text/css">
    .rd { background:   #ffe0e0;        }
    .gr { background:   #e0ffe0;        }
    .hd { background:   #e0e0ff;        }
    .b0 { background:   #e0e0e0;        }
    .b1 { background:   #f0f0f0;        }
    .r  { color:        red;            }
    .g  { color:        green;          }
    </style>
  </head>
<body>

<h1>CSV diff @ARGV</h1>

<table>
EOH
    $::{RED}    = sub { "\cA\rr";       };
    $::{GREEN}  = sub { "\cA\rg";       };
    $::{RESET}  = sub { "";             };
    }
elsif (!$opt_c) {
    $::{$_} = sub { "" } for qw( RED GREEN RESET );
    }

my @f;
my $opt_n = 1;
foreach my $x (0, 1) {
    open my $fh, "<", $ARGV[$x] or die "$ARGV[$x]: $!\n";
    my $n = 0;
    while (1) {
        my $row = $csv->getline ($fh) or last;
        @$row and push @{$f[$x]}, $row;
        $n++ && $row->[0] =~ m/\D/ and $opt_n = 0;
        }
    }
my @n   = map { $#{$f[$_]} } 0, 1;
my @i   = (1, 1);
my $hdr = "# csvdiff   < $ARGV[0]    > $ARGV[1]\n";

$f[$_][1+$n[$_]][0] = $opt_n ? 2147483647 : "\xff\xff\xff\xff" for 0, 1;

my %cls;
   %cls = (
    "b" => 0,
    "-" => sub { "rd" },
    "+" => sub { "gr" },
    "H" => sub { "hd" },
    "<" => sub { $cls{b} ^= 1; "b$cls{b}" },
    ">" => sub { "b$cls{b}" },
    );

sub show {
    my ($pfx, $x) = @_;
    my $row = $f[$x][$i[$x]++] or return;

    if ($opt_h) {
        my $bg = $cls{$pfx}->();
        print qq{  <tr class="$bg">},
            (map{"<td".(s/^\cA\r([gr])//?qq{ class="$1"}:"").">$_</td>"}@$row),
            "</tr>\n";
        return;
        }

    print $hdr, $pfx, " ", $pfx eq "-" ? RED : $pfx eq "+" ? GREEN : "";
    $csv->print (*STDOUT, $row);
    print RESET, "\n";
    $hdr = "";
    } # show

# Skip first line of both are same: it probably is a header
my @h0 = @{$f[0][0]};
my @h1 = @{$f[1][0]};
if ("@h0" eq "@h1") {
    if ($opt_h) {
        $i[0]--;
        show ("H", 0);
        }
    shift @{$f[0]};
    shift @{$f[1]};
    }

my $x = 0;
while ($i[0] <= $n[0] || $i[1] <= $n[1]) {
    my @r0 = @{$f[0][$i[0]]};
    my @r1 = @{$f[1][$i[1]]};

    if ($opt_n) {
        $r0[0] <  $r1[0] and show ("-", 0), next;
        $r0[0] >  $r1[0] and show ("+", 1), next;
        }
    else {
        $r0[0] lt $r1[0] and show ("-", 0), next;
        $r0[0] gt $r1[0] and show ("+", 1), next;
        }

    my @v0 = @r0;
    my @v1 = @r1;
    $opt_Z and s/[\r\n\s]+\z// for @v0, @v1;
    $opt_b and s/[\r\n\s]+/ /g for @v0, @v1;
    $opt_w and s/[\r\n\s]+//g  for @v0, @v1;

    {   local $" = "\xFF";
        "@v0" eq "@v1" and $i[0]++, $i[1]++, next;
        }

    foreach my $c (1 .. $#h0) {
        my ($L, $R) = map { $_ // "" } $r0[$c], $r1[$c];
        $L eq $R and next;
        $f[0][$i[0]][$c] = RED   . $L . RESET;
        $f[1][$i[1]][$c] = GREEN . $R . RESET;
        }

    show ("<", 0);
    show (">", 1);
    }

$opt_h and print "  </table>\n</body>\n</html>\n";

close STDOUT;

Generated by dwww version 1.15 on Sun Jun 30 10:52:01 CEST 2024.