# perl script =head1 DESCRIPTION This script was used after entering data for the 2007 Landbird Monitoring Program (LBMP) on the Helena National Forest. The script reads tab-separated data, and copies some columns to later rows, when the later rows have nothing in those columns. Takes a text file with columns separated by tabs. For each row (line), if all of certain columns are blank, the data for the columns is copied from a previous row. =head1 STATUS Used successfully in 2007 =cut ## duplicate these columns (first column has index 0) @dup = ([0,12],[24,31]); sub is_empty { my $thing = shift; return (defined($thing) && $thing =~ /\S/) ? 0 : 1; } ## special processing, given the array of columns for one line ## sub forget_prev { @prev = (); } # Function to run before copying data, for 2007 # $special_1 = sub { my @line = @_; my ($site_idx, $pid_idx, $date_idx) = (1,6,5); my ($abund_idx, $sex_idx, $loc_idx) = (19,20,21); my ($ival_idx) = (13); # The point is identified by a site number and point number my $pid = $line[$pid_idx]; my $site = $line[$site_idx]; if (0 && $site eq '712077') { print STDERR "got : ", join("\t", @line), "\n"; } # For lines with explicit point-id ... # if (! &is_empty($pid) && !&is_empty($site)) { my $prev_pid = $prev[$pid_idx]; my $prev_site = $prev[$site_idx]; # clear the saved data if different point or date # if (! &is_empty($prev_pid) && ! &is_empty($prev_site)) { if ($prev_site ne $site || $prev_pid ne $pid) { &forget_prev; } else { # same point, see if same date # my ($date, $prev_date) = ($line[$date_idx], $prev[$date_idx]); die "no date in line $. :", join('|',@prev) if &is_empty($prev_date); if ($date ne $prev_date) { &forget_prev; } } } # Set the GPS waypoint name if not already # if (0) { # not in 2007 if ( &is_empty( $line[$waypoint_idx] ) ) { $pid =~ /(\d+)([A-Z]?)/ or die "bad point id '$pid' in line $. :", join('|',@line); my ($n,$l) = ($1,(defined $2 ? $2 : '')); my $waypoint = sprintf("%04d",$n) . $l; $line[$waypoint_idx] = $waypoint; } } } # # set standard values for some fields, if empty # $line[$abund_idx] = 1 if (&is_empty($line[$abund_idx])); $line[$sex_idx] = 'U' if (&is_empty($line[$sex_idx])); $line[$loc_idx] = 0 if (&is_empty($line[$loc_idx]) && $line[$ival_idx] != 5); # interval 5 means before or after the count period return @line; }; @prev = (); LOOP: while (<>) { #print STDERR $., ':', $_, ':'; if ($. == 1) { print $_; next LOOP; } chomp; @f = split "\t"; @f = &$special_1(@f); for ($j=0; $j<@dup; $j++) { $section = $dup[$j]; ($s,$e) = @$section[0,1]; #print $s," - ", $e, "\n"; die "bad interval $s,$e" if $s>=$e; $copy_it = 1; CHECK: for ($k=$s; $k<=$e; $k++) { #if (defined($f[$k]) && $f[$k] =~ /\w/) { if ( ! &is_empty($f[$k]) ) { $copy_it = 0; last CHECK; } } #print STDERR "line $.: cols=",@f+0," :", @f, "\n"; #print "slice [$s,$e]: ",join(',',@f[$s,$e]), "\n"; if ($copy_it) { @f[$s .. $e] = @prev[$s .. $e]; } } @f = map { defined($_) ? $_ : '' } @f; @prev = @f; print join "\t",@f, "\n"; }