=head1 DESCRIPTION Print the Nth table found in HTML text. =head1 EXAMPLES Print the 4th table found in the file x.html: perl -w nth-table.pl table=4
111
END
111
=cut # returns the contents of a named file # sub getfile { my $name = shift; my $fh; #printf STDERR "getfile: my dir=:%s:\n", $abs_path; if (!open($fh, "<$name" )) { #printf STDERR "%s\n", "can't open '$name': $!"; my $msg = sprintf "%s\n", "can't open '$name': $!"; die $msg; } my $sav = $/; $/ = undef; my $var = <$fh>; $/ = $sav; $var; } # returns the contents of the standard input stream # sub slurp { my $var = ''; my $sav = $/; $/ = undef; $var = ; $/ = $sav; return $var; } # Given table number and HTML, # return a three-part array: (before, table, after) # sub get_table { my($num, $html) = @_; my $tail = $html; my $pre = ''; # the HTML before the indicated table my $post = ''; # the HTML after the indicated table my $table = ''; # the indicated table # find the start of the table # for (my $tt=0; $tt<$num; $tt++) { if ( $tail =~ /\s]/i ) { my($before,$match,$after) = ($`,$&,$'); if ($tt + 1 < $num) { $tail = $after; $pre .= $before . $match; } else { $tail = $after; $table = $match; $pre .= $before; } } else { die "can't find table $num"; } } # find the end of the table; scanning past embedded tables # my $ends_wanted = 1; while ($ends_wanted) { if ( $tail =~ qr,(\s])|(),i ) { my($before,$match,$after) = ($`,$&,$'); if ( defined($1) && $1 ne '' ) { $ends_wanted++; $table .= $before . $match; $tail = $after; } else { if ($ends_wanted-- > 1) { $table .= $before . $match; $tail = $after; } else { $table .= $before . $match; $post = $after; $tail = ''; } } } else { die "can't find end of table $num"; } } return ($pre,$table,$post); } # end sub ### Begin program ### my $table_num = 1; my $infile = ''; my $all_flag = 0; my $pre_marker = ''; my $post_marker = ''; my $newline = 0; my $debug = 1; # command line for (my $i=0; $i<@ARGV; $i++) { my $arg = $ARGV[$i]; if ($arg =~ /^debug=(.+)$/ ) { $debug = $1; } elsif ($arg =~ /^table=(\d+)$/ ) { $table_num = $1; } elsif ($arg =~ /^table=/ ) { die "bad arg '$arg'"; } elsif ($arg =~ /^infile=(.+)$/ ) { $infile = $1; } elsif ($arg =~ /^all=(.+)$/ ) { $all_flag = $1; } elsif ($arg =~ /^pre=(.+)$/s ) { $pre_marker = $1; } elsif ($arg =~ /^post=(.+)$/s ) { $post_marker = $1; } elsif ($arg =~ /^newline=(.+)$/ ) { $newline = $1; } } my $hh = $infile ne '' && $infile ne '-' ? getfile($infile) : slurp() ; my($pre,$table,$post) = get_table($table_num,$hh); print $pre if ($all_flag); print $pre_marker; print $table; print $post_marker; print $post if ($all_flag); for (my $i=0; $i<$newline; $i++) { print "\n"; }