dwww Home | Show directory contents | Find package

#!/usr/bin/perl

# Print information about forms and their controls present in the HTML.
# See also HTML::Form module

use strict;
use warnings;

use HTML::PullParser ();
use HTML::Entities qw(decode_entities);
use Data::Dumper qw(Dumper);

my @FORM_TAGS = qw(form input textarea button select option);

my $p = HTML::PullParser->new(
    file        => shift || "xxx.html",
    start       => 'tag, attr',
    end         => 'tag',
    text        => '@{text}',
    report_tags => \@FORM_TAGS,
) || die "$!";

# a little helper function
sub get_text {
    my ($p, $stop) = @_;
    my $text;
    while (defined(my $t = $p->get_token)) {
        if (ref $t) {
            $p->unget_token($t) unless $t->[0] eq $stop;
            last;
        }
        else {
            $text .= $t;
        }
    }
    return $text;
}

my @forms;
while (defined(my $t = $p->get_token)) {
    next unless ref $t;    # skip text
    if ($t->[0] eq "form") {
        shift @$t;
        push(@forms, $t);
        while (defined(my $t = $p->get_token)) {
            next unless ref $t;    # skip text
            last if $t->[0] eq "/form";
            if ($t->[0] eq "select") {
                my $sel = $t;
                push(@{$forms[-1]}, $t);
                while (defined(my $t = $p->get_token)) {
                    next unless ref $t;    # skip text
                    last if $t->[0] eq "/select";

                    #print "select ", Dumper($t), "\n";
                    if ($t->[0] eq "option") {
                        my $value = $t->[1]->{value};
                        my $text  = get_text($p, "/option");
                        unless (defined $value) {
                            $value = decode_entities($text);
                        }
                        push(@$sel, $value);
                    }
                    else {
                        warn "$t->[0] inside select";
                    }
                }
            }
            elsif ($t->[0] =~ /^\/?option$/) {
                warn "option tag outside select";
            }
            elsif ($t->[0] eq "textarea") {
                push(@{$forms[-1]}, $t);
                $t->[1]{value} = get_text($p, "/textarea");
            }
            elsif ($t->[0] =~ m,^/,) {
                warn "stray $t->[0] tag";
            }
            else {
                push(@{$forms[-1]}, $t);
            }
        }
    }
    else {
        warn "form tag $t->[0] outside form";
    }
}

print Dumper(\@forms), "\n";

Generated by dwww version 1.15 on Sun Jun 30 10:07:45 CEST 2024.