# perl script =head1 DESCRIPTION This script was used after entering data for the 2008 Landbird Monitoring Program (LBMP) on the Helena National Forest. The script reads tab-separated data, and copies some columns to later rows, when the later rows have nothing in those columns. Takes a text file with columns separated by tabs. For each row (line), if all of certain columns are blank, the data for the columns is copied from a previous row. =head1 STATUS Used successfully in 2007... =cut ## duplicate these columns (first column has index 0) @dup = ([0,12],[23,33]); sub is_empty { my $thing = shift; return (defined($thing) && $thing =~ /\S/) ? 0 : 1; } ## special processing, given the array of columns for one line ## sub forget_prev { @prev = (); } # Function to run before copying data, for 2008 # $special_1 = sub { my @line = @_; my ($site_idx, $pid_idx, $date_idx) = (4,6,3); my ($abund_idx, $sex_idx, $loc_idx) = (14,15,20); my ($ival_idx) = (21); my ($waypoint_idx, $sitename_idx) = (23, 5); # The point is identified by a site identifier and point number my $pid = $line[$pid_idx]; my $site = $line[$site_idx]; my $sitename = $line[$sitename_idx]; # Set the point-id to "N/A" for certain projects # if (!&is_empty($sitename) && $sitename =~ /^HAY PEGGY|^CLANCY-UNIONVILLE/) { if (&is_empty($pid)) { $pid = $line[$pid_idx] = 'N/A'; } elsif ($pid !~ qr,N/A,) { die "bad point/stop identifier in line $."; } } # For lines with explicit point-id ... # if (! &is_empty($pid) && !&is_empty($site)) { my $prev_pid = $prev[$pid_idx]; my $prev_site = $prev[$site_idx]; # clear the saved data if different point or date # if (! &is_empty($prev_pid) && ! &is_empty($prev_site)) { if ($prev_site ne $site || $prev_pid ne $pid) { &forget_prev; } else { # same point, see if same date # my ($date, $prev_date) = ($line[$date_idx], $prev[$date_idx]); die "no date in line $. :", join('|',@prev) if &is_empty($prev_date); if ($date ne $prev_date) { &forget_prev; } } } # Set/modify the GPS waypoint name if not already # if (1) { # true in 2008 $waypoint = $line[$waypoint_idx]; if ( &is_empty( $waypoint ) && !&is_empty( $site )) { if ($pid eq 'N/A') { die "bad site in line $." if ($site !~ /^[A-Z]/); $waypoint = $site; } else { print STDERR "waypoint_idx='$waypoint_idx', waypoint name '$waypoint',site='$site',pid='$pid'\n"; die "no waypoint name in line $.:" . join(',',@line); } } # Modify the waypoint name if numeric if (!&is_empty( $waypoint ) && $waypoint =~ /^[\d][\d]*/) { $waypoint = '#' . $waypoint; } $line[$waypoint_idx] = $waypoint; } } # # set standard values for some fields, if empty # $line[$abund_idx] = 1 if (&is_empty($line[$abund_idx])); $line[$sex_idx] = 'U' if (&is_empty($line[$sex_idx])); $line[$loc_idx] = 0 if (&is_empty($line[$loc_idx]) && $line[$ival_idx] != 5); # interval 5 means before or after the count period return @line; }; @prev = (); LOOP: while (<>) { #print STDERR $., ':', $_, ':'; if ($. == 1) { print $_; next LOOP; } chomp; @f = split "\t"; @f = &$special_1(@f); for ($j=0; $j<@dup; $j++) { $section = $dup[$j]; ($s,$e) = @$section[0,1]; #print $s," - ", $e, "\n"; die "bad interval $s,$e" if $s>=$e; $copy_it = 1; CHECK: for ($k=$s; $k<=$e; $k++) { #if (defined($f[$k]) && $f[$k] =~ /\w/) { if ( ! &is_empty($f[$k]) ) { $copy_it = 0; last CHECK; } } #print STDERR "line $.: cols=",@f+0," :", @f, "\n"; #print "slice [$s,$e]: ",join(',',@f[$s,$e]), "\n"; if ($copy_it) { @f[$s .. $e] = @prev[$s .. $e]; } } @f = map { defined($_) ? $_ : '' } @f; @prev = @f; print join "\t",@f, "\n"; }