#!/usr/bin/perl # Format a text document. # # The document is assumed to be in a particular style, # having sections of free-form text separated by # lines matching one of several patterns (see "dividing line" # in the comments below). # # Each section is formatted unless flagged to prevent this. # Formatting, which is done using an external command, # consists of arranging the text into a column of a set width. # $format_command='cx'; # my $var = ''; my $sav = $/; $/ = undef; $var = <>; $/ = $sav; use IO::File; # Returns the contents of a file # sub _get_file { my $fh = new IO::File; my $contents = ''; my $name = shift || die "_get_file: no file name"; open($fh, "<$name" )|| die "can't open '$name': $!"; { local $/ = undef; $contents = <$fh>; } close $fh; return $contents; } $tmp1 = "/tmp/tmp1-$$"; $tmp2 = "/tmp/tmp2-$$"; $result = ""; $rrdateline = qr{^(?:(Sun|Mon|Tue|Wed|Thu|Fri|Sat) )?(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Nov|Dec) ([0-9][0-9]?)( [12][09][0-9][0-9])?\n}; $rrblankline = qr{^[ \t]*\n}; $rrdashline = qr{^[-=]+[ \t]*\n}; # Split the file into sections # @lines = split "\n", $var; @sections = (); $curr_section = ''; for ($lineno = 0; $lineno < 0+@lines; $lineno++ ) { # get the next line $line = $lines[$lineno] . "\n"; # see if it's a dividing line; # if so, it goes into its own section. # There are >1 styles of dividing lines # don't combine matches into one. if ($line =~ $rrdateline || $line =~ ':$' || $line =~ $rrblankline || $line =~ $rrdashline) { if (length($curr_section) > 0) { push @sections, $curr_section; $curr_section = ''; } push @sections, $line; } else { $curr_section .= $line; } } if (length($curr_section) > 0) { push @sections, $curr_section; } while (0+@sections > 0) { $ch = shift @sections; #print STDERR "ch = ||$ch||"; if ($ch =~ /^%NO_FORMAT%[ \t]*\n/s) { $ch = $'; print $ch; } else { open(my $fh1, '>', $tmp1) or die $!; print $fh1 $ch, "\n"; ## add newline close $fh1; system("$format_command <$tmp1 >$tmp2"); $result = &_get_file($tmp2); chomp $result; ######### remove newline print $result; #print "\n" if $result !~ /\n$/s; } } print "\n"; unlink $tmp1, $tmp2;