################################################################################
#
# Example "peg_ini.pl".
#
################################################################################

use strict;
use warnings;

my $Is_Win32 = $^O eq 'MSWin32';

# Declare global vars set/used by peg.
our ($Code_on_match2, %Env, @Exclude_dirs, @Exclude_exts, $HOME_dir, @Ini_files,
    $Newline, %Peg_longopt, %Peg_p, %Peg_Q, %Peg_z, @Perlexpr_mung,
    $Verbose);

sub Warn
{
    my $msg = join '', @_;
    $msg =~ s/\015?\012\z//; # chomp_
    print STDERR "peg_ini: $msg\n";

} # Warn


sub Die
{
    Warn @_;
    exit(2);

} # Die

################################################################################

# Given a FILENO return the corresponding list of matching files.
# Handles: (a) 22 (b) -1 (c) 1,2,3 (d) 1-3 (e) 1 2 3 (f) 1..3 (g) 1-2,3 etc.
{
    my (@matches, $matches_are_fullpaths);

    sub n2file
    {
	my $get_fullpaths;  # Default to relative paths, but:
	if (ref $_[0]) {    # n2file(\0, ...) := return full paths
	    $get_fullpaths = 1;
	    shift;
	}
	if ($get_fullpaths xor $matches_are_fullpaths) {
	    $matches_are_fullpaths = $get_fullpaths;
	    @matches = ();
	}
	unless (@matches) {
	    @matches = last_matches($get_fullpaths) or die "no matches found";
	}
	my @n;
	foreach my $fileno (@_) {
	    foreach my $r (split /[,\s]+/, $fileno) {
		# Assume "22-" or "22.." indicates 'to the end'.
		$r .= "0" if $r =~ /^\d+(?:-|\.\.)$/;

		if ($r =~ /^(\d+)(?:-|\.\.)(\d+)$/) {
		    my ($from, $to) = ($1, $2);
		    # Assume "44-7" means "44-47".
		    if ($from >= 10 and $to <= 9 and $from =~ /(\d)$/ and $to > $1) {
			$to += $from - ($from % 10);
		    }
		    # Assume "22..0" means from 22 to the end.
		    if ($to == 0) {
			$to = @matches;
		    }
		    die "bad range: $r" if $from > @matches or $from > $to;
		    $to = @matches if $to > @matches;
		    push @n, $from..$to;
		} elsif ($r =~ /^-?\d+$/) {
		    push @n, $r;
		} else {
		    die "bad fileno: $fileno";
		}
	    }
	}
	die "no FILENO found" unless @n;
	my @files;
	foreach my $n (@n) {
	    my $idx = $n == 0 ? 0 : $n > 0 ? $n - 1 : $n;
	    die "fileno $n out of range" if $idx >= @matches or $idx < -@matches;
	    push @files, $matches[$idx];
	}
	return wantarray ? @files : $files[0];
    }
}

################################################################################

# Convert a 'PERLEXPR' to a Perl expression.
sub make_expr {
    my $pe = shift;
# Old style:
#    if ($pe =~ /^[\w\s\-\.\,\'\:\;\#]*$/) { # simple
#	$pe = "/" . quotemeta($pe) . "/";
#    }
    unless ($pe =~ m{^[\+\$]|/}) {
	$pe = "/" . $pe . "/";
    }
    eval "if (0 and ($pe)) {}";
    $@ and die "bad PERLEXPR: $pe\n$@";
    return $pe;
}

################################################################################

=head2 B<--opt [LONGOPT]>

Show help for peg longopts.

If a B<LONGOPT> is specified then just the documentation for that longopt is
shown; otherwise all the longopts are displayed along with their first line
of POD.

It assumes that longopts are defined in the following way:

    =head2 B<--opt-name>

    A brief one line description.

    More detailed description here eg. B<--opt-name> does I<x y z>.
    etc. etc.

    =cut

    # Immediately followed by its definition.
    $Peg_longopt{'opt-name'} = sub {
        my ($argv_ref, $files_ref) = @_;
        # ...
    };

If the B<-V> verbose option is also used, then the Perl code for the longopt
is also shown.

=cut

$Peg_longopt{opt} = sub {
    my $argv_ref = shift;
    my $opt = shift @$argv_ref;
    if (@$argv_ref and $argv_ref->[0] eq '-V') { # Handle trailing -V here.
	++$Verbose;
	shift @$argv_ref;
    }
    # Build up hashes containing the POD and code for all the longopts defined
    # in the ini files. This assumes a consistent coding style!
    my (%pod, %code);
    foreach my $f (@Ini_files) {
	open my $fin, "<", $f or die "can't open $f: $!";
	while (<$fin>) {
	    if (/^=head2 B<--?([\w-]+)/) {
		my $o = $1;
		{ do {
		    push @{ $pod{$o} }, $_;
		    last if /^=cut/;
		} while (<$fin>) }
	    }
	    if (/^\$Peg_longopt\{['"]?([\w-]+)/) {
		my $o = $1;
		{ do {
		    push @{ $code{$o} }, $_;
		    last if /^(\$Peg_longopt.*)?\};$/;
		} while (<$fin>) }
	    }
	}
    }
    if ($opt) {
	$opt =~ s/^--?//;
	die "no documentation found for '$opt'" unless exists $pod{$opt};
	print "\n", pod2txt(join '', @{$pod{$opt}});
	print "\n# Perl code =>\n\n", @{$code{$opt}} if $Verbose;
    } else {
	print "\n# Peg longopts =>\n\n";
	foreach my $opt (sort keys %Peg_longopt) {
	    next if $opt =~ /^help$/; # skip peg's builtin longopts.
	    my $dots = '.' x (12 - length($opt));
	    my $descr = exists $pod{$opt} ? ${$pod{$opt}}[2] : '';
	    $descr =~ s/\015?\012\z//; # chomp_
	    $descr =~ s/\b[A-Z]<([^>]+)>/$1/g; # remove POD escapes.
	    print "   $opt $dots $descr\n";
	}
    }
    exit;
};

# Format POD into raw text.
sub pod2txt {
    my $txt = shift;
    require Pod::PlainText;
    my $parser = Pod::PlainText->new(indent => 4, sentence => 0, width => 72);
    open(my $txt_fh, "<", \$txt) and
    open(my $out_fh, ">", \my $out_txt) or die "can't open: $!";
    $parser->parse_from_filehandle($txt_fh, $out_fh);
    $out_txt =~ s/\015?\012\z//; # chomp_
    return $out_txt;
}

################################################################################

=head2 B<--find FINDARG>

Find files matching the given argument.

If the FINDARG is a simple string then files whose tail matches it
are printed. Otherwise the FINDARG is taken as a PERLEXPR passed to B<-p>.
For example, C<peg --find peg>, C<peg --find .pm> or C<peg --find /foo/>.

=cut

$Peg_longopt{find} = sub {
    my $argv_ref = shift;
    @$argv_ref or die "expected TAILMATCH or /PATTERN/ argument";
    my $p_arg = shift @$argv_ref;
    if ($p_arg =~ /^[\w\.\-]{2,}/) {
	$p_arg = "m," . quotemeta($p_arg) . "\[^\\\\/]*\$,i";
    }
    Warn "-l +1 -p $p_arg";
    unshift @$argv_ref, '-Y,p', '+1', '-ddlnp', $p_arg;
};

################################################################################

=head2 B<--pager>

Pipe output thro a pager.

This can be disabled with either B<--nopager> or B<--pagerx>.

=cut

$Peg_longopt{pager} = sub {
    my $argv_ref = shift;
    return if ($::Already_paging
	    or grep /^--?(nopager|pagerx)$/, @$argv_ref
	    or ! -t STDOUT);
    $::Already_paging = 1;
    unshift @$argv_ref, '-##';
    my $less;
    foreach my $f ("C:/cygwin/bin/less.exe", "/usr/bin/less") {
	if (-x $f) {
	    $less = $f;
	    last;
	}
    }
    defined $less or die "failed to find a 'less' pager";
    # less options:
    #   -m = long-prompt. Shows "byte 1234" instead of ":".
    #   -F = Quit if entire file fits on first screen.
    #   -R = Output "raw" control characters.
    #   -X = Don't use termcap init/deinit strings.
    open(PAGER_OUT, '|-', "$less -mFRX")
	or die "unable to pipe STDOUT via less: $!\n";
    *STDOUT = \*PAGER_OUT;
    *STDERR = \*PAGER_OUT;
};

################################################################################

=head2 B<--pagerx>

Option to comment out --pager on the cmdline.

=cut

$Peg_longopt{pagerx} = sub {};

################################################################################

=head2 B<--loop PERLCODE>

Run some I<perl> code for each previously matched file.

The following Perl variables are defined:

    $_  filename
    $f  filename
    $b  backslashed version of filename
    $d  directory
    $e  escaped version of filename
	    eg. "a/b c/Copy of d.pl" -> "a_b_c_Copy_of_d.pl"
    $E  escaped version of filename in same directory
	    eg. "a/b c/Copy of d.pl" -> "a/b c/Copy_of_d.pl"
    $t  tail of filename eg. "Copy of d.pl"

=cut

$Peg_longopt{loop} = sub {
    my $argv_ref = shift;
    my $code = shift @$argv_ref;
    ($code and !@$argv_ref) or unshift(@$argv_ref, '--opt', 'loop'), return;
    $code =~ /\bunlink[^\(]/ and die "unlink? Use unlink(...) to override";
    foreach my $f (last_matches()) {
	(my $b = $f) =~ tr|/|\\|;
	(my $d = $f) =~ s|(/)?[^/]+$| $1 ? '' : '.' |e;
	(my $e = $f) =~ s|[^\w\.\-]|_|g;
	(my $t = $f) =~ s|^(.*\/)||;
	my $Ed = $1 || '';
	(my $Et = $t) =~ s|[^\w\.\-]|_|g;
	my $E = "$Ed$Et";
	$_ = $f;
	print "\n=> $f\n";
	no strict; # ???
	eval $code;
	$@ and die "error with code: $code\n", $@;
    }
    exit;
};

################################################################################

=head2 B<--vim FILENO>

Open one of the last matched files in vim.

=cut

$Peg_longopt{vim} = sub {
    my $argv_ref = shift;
    @$argv_ref or die "expected FILENO";
    my @files;
    foreach my $file (n2file(@$argv_ref)) {
	print "# $file\n";
	push @files, ($file =~ /\s/) ? "\"$file\"" : $file;
    }
    system "vim " . join " ", @files;
    exit;
};

################################################################################

=head2 B<--ifdef>

Get full C/C++ #if context.

=cut

$Peg_longopt{ifdef} = sub {
    my $argv_ref = shift;
    # Turn on both context matchers, but don't match.
    # We then set the #ifdef context into $Context_line2 using -P code.
    unshift @$argv_ref, "-z", "+0", "-zz", "+0";
    $Env{PEG_CONTEXT_FORMAT2} = '$_';
    $Env{PEG_Z_INDEPENDENT} = 1;
    unshift @$argv_ref, "-PPPPP", <<'EOT';
	@::Cxt = ();
EOT
    unshift @$argv_ref, "-P", <<'EOT';
	# PEG_NEWLINE_NEUTRAL
	# Notes.
	# * some compilers allow whitespace preceding the '#' in preprocessor lines.
	# * does not handle backslash extended lines.
	if (/^\s*\#/) {
	    my $new_cxt = 1;
	    if (/^\s*\#\s*if(n?def)?\b/) {
		push @::Cxt, [$_, $.];
	    }
	    elsif (/^\s*\#\s*elif\b/) {
		$::Cxt[$#::Cxt] = [$_, $.];
	    }
	    elsif (/^(\s*\#\s*else)\b/) {
		my $else_line = $1;
		if (@::Cxt) {
		    my $if_line = $::Cxt[$#::Cxt]->[0];
		    if ($if_line !~ /^\s*\#\s*elif/) {
			$if_line =~ s/[\n\r\t ]+\z//;
			$else_line = "$else_line  /* $if_line */$Newline";
		    } else {
			$else_line = $_;
		    }
		    $::Cxt[$#::Cxt] = [$else_line, $.];
		} else {
		    # Found a #else before seeing a #if !
		    $new_cxt = 0;
		}
	    }
	    elsif (/^\s*\#\s*endif\b/) {
		pop @::Cxt;
	    }
	    else {
		$new_cxt = 0;
	    }
	    # Context_lineno2 is set to ensure correct ordering (handled by peg).
	    if ($new_cxt) {
		if (@::Cxt) {
		    $Context_line2 = '';
		    for (@::Cxt) { # trim trailing whitespace, and use native newline
			$_->[0] =~ s/[ \t\r\n]+\z//;
			$_->[0] .= $Newline;
		    }
		    # Minimize padding to ensure #'s aligned.
		    my $max_lineno_len = 1;
		    foreach my $cxt_elem (@::Cxt) {
			my (undef, $lineno) = @$cxt_elem;
			my $len = length $lineno;
			$max_lineno_len = $len if $len > $max_lineno_len;
		    }
		    foreach my $cxt_elem (@::Cxt) {
			my ($line, $lineno) = @$cxt_elem;
			my $pad = ' ' x (1 + $max_lineno_len - length($lineno));
			$line =~ s/^\s+//;
			$Context_line2 .= "#### ($lineno)$pad$line";
		    }
		    $Context_lineno2 = $.;
		} elsif ($Printed_Context_line2) {
		    $Context_line2 = "#### *none*$Newline";
		    $Context_lineno2 = $.;
		} else {
		    $Context_line2 = undef;
		}
		if (defined $Printed_Context_line2 and defined $Context_line2
			and $Context_line2 eq $Printed_Context_line2) {
		    # Ensure we don't reprint the same context eg.
		    # #if CXT
		    # ...match1
		    # #if SOMETHINGELSE
		    # #endif
		    # ...match2          // do not repeat CXT
		    #
		    $Context_line2 = undef;
		}
	    }
	}

EOT
};

################################################################################

=head2 B<--pod>

Only search B<POD>.

=cut

$Peg_longopt{pod} = sub {
    my $argv_ref = shift;
    unshift @$argv_ref, '-P' => <<'EOT';
	next unless /^=[a-z]/ .. /^=cut/; # POD can start with head1/item/pod etc.
EOT
};

################################################################################

=head2 B<--ipc>

Ignore Perl comments & POD.

=cut

$Peg_longopt{ipc} = sub {
    my $argv_ref = shift;
    unshift @$argv_ref, '-P' => <<'EOT';
	next if /^\#/;
	next if /^=[a-z]/ .. /^=cut/; # POD can start with head1/item/pod etc.
	last if /^__(?:END|DATA)__/;
	s/(?<!\\)\#.*$//; # strip Perl comments from search string
EOT
};

################################################################################

=head2 B<--icc>

Ignore C comments.

XXX not 100% accurate... but works in the typical cases. Needs a lexer
style solution to handle cases such as C<"a /* comment in a string ">.

=cut

$Peg_longopt{icc} = sub {
    my $argv_ref = shift;
    unshift @$argv_ref, '-PPPPP' => <<'EOT';
	$In_comment = 0;
EOT
    unshift @$argv_ref, '-P' => <<'EOT';
	if ($In_comment) {
	    if (s|^.*?\*/||) {
		$In_comment = 0;
	    } else {
		next;
	    }
	}
	s|/\*.*?\*/||g; # /* ... */
	s|//.*$||;      # // ...
	if (s|/\*.*||) {
	    $In_comment = 1;
	    # NB. still search non comment part of line.
	}
EOT
};

################################################################################

=head2 B<--tag>

Print a I<tag> for each match that can be used by B<--tagv>.

Each matched line is prefixed with a tag consisting of alphabetic characters.
This tag can then be passed to B<--tagv> to view the matched line in F<vim>.

Use B<--notag> to override this.

=cut

my $tagfile = $HOME_dir . ".peg_tags";

$Peg_longopt{tag} = sub {
    my $argv_ref = shift;
    return if grep /^--?(notag|tagv)$/, @$argv_ref; # cf. peg -tag foo -tagv a
    return if $::Tag; # guard against "peg --tag --tag ..."
    unshift @$argv_ref, "-PP" => "\n\t# PEG_NO_FORK\n"; # since $::Tag needs to be global!
    open TAGFILE, ">", $tagfile or die "can't write to $tagfile: $!";
    eval "END { close TAGFILE }";
    if (grep m|\bpager\b|, @ARGV) {
	select((select(\*TAGFILE), $| = 1)[0]); # autoflush
    }
    print TAGFILE cwd(), "\n"; # first line is the cwd.
    $::Tag = 'a';
    # NB. tags may have gaps if -oo is used.
    $Code_on_match2 = <<'EOT';
	BEGIN { local $_ = 'x'; colorall('X', 'dm'); $::Tagcol = $Col{'dm'} }; # hack
	my $tag = $::Tag++;
	print TAGFILE "$tag:$.:$File\n";
	print $::Tagcol, $tag, ':', $Col_Reset;
EOT
};

################################################################################

=head2 B<--tagv TAG>

View a tagged line in F<vim>.

See B<--tag>.

=cut

$Peg_longopt{tagv} = sub {
    my $argv_ref = shift;
    my $tag = shift @$argv_ref or die "expected TAG argument";
    $tag =~ s/:$//;
    $tag =~ /^[a-z]+$/ or die "wonky tag argument: $tag";
    open my $fin, "<", $tagfile or die "can't open $tagfile: $!";
    my $cwd = <$fin>;
    chomp $cwd;
    my ($file, $lineno);
    while (<$fin>) {
	if (/^$tag:/og) {
	    /(\d+):(.+)/g or die "unexpected tag file format: $_";
	    ($lineno, $file) = ($1, $2);
	    last;
	}
    }
    die "match not found for $tag" unless $file;
    unless ($file =~ m|^(\w:)?[\\\/]|) {
	$file = $cwd . $file; # NB. cwd ends in a slash
    }
    close $fin;
    print "# ($lineno) $file\n";
    system "vim +$lineno \"$file\"";
    exit;
};

################################################################################

=head2 B<--and PERLEXPR>

Only test lines matching PERLEXPR.

=cut

$Peg_longopt{'and'} = sub { _andnot(1, @_) };

################################################################################

=head2 B<--not PERLEXPR>

Do not test lines matching PERLEXPR.

It is exactly equivalent to C<--and !(PERLEXPR)>.

=cut

$Peg_longopt{'not'} = sub { _andnot(0, @_) };

################################################################################

sub _andnot {
    my $and = shift;
    my $argv_ref = shift;
    @$argv_ref or die "expected PERLEXPR";
    my $pe = shift @$argv_ref;
    $pe = make_expr($pe);
    push @Perlexpr_mung, sub {
	my $perlexpr_ref = shift;
	# NB. the order of expressions below ensures it is
	# the original PERLEXPR that gets colored.
	$$perlexpr_ref = $and
	    ?  "($pe) and $$perlexpr_ref"
	    : "!($pe) and $$perlexpr_ref";
    };
}

################################################################################

=head2 B<--idir DIR> or B<--idir DIR1:DIR2:...>

Exclude the given directory names from being searched.

Adds the given directory names to C<@Exclude_dirs>.

=cut

$Peg_longopt{'idir'} = sub {
    my ($argv_ref, $files_ref) = @_;
    @$argv_ref or die "expected DIR list";
    my @dir_names = split /:+/, shift @$argv_ref;
    if (grep /[\\\/]/, @dir_names) {
	die "directory paths not supported; use -p instead";
    }
    push @Exclude_dirs, @dir_names;
};

################################################################################

=head2 B<--perl>

Only process Perl files.

Files are adjudged to be Perl if they have a B<pl> or B<pm> extension, or
if they do not have a file extension and have a first line starting with C<#!>
and also containing the string C<perl>.

=cut

$Peg_longopt{'perl'} = sub {
    my $argv_ref = shift;
    unshift @$argv_ref,
	'-p' => "(!/\\./ or /\\.p[lm]\$/)",
	'-PPPPP' => <<'EOT',
	    # PEG_NO_RESET
	    unless ($File =~ /\./) {
		my $line = <F>;
		warn_ "V: --perl: #!? $line" if $::Verbose;
		if ($line =~ /^\#!.*perl/) {
		    if (seek F, 0, 0) {
			$. = 0;
		    } else {
			warn_ "--perl: seek failed $File: $!"; # -Q ?
		    }
		} else {
		    close F;
		    return;
		}
	    }
EOT
};

################################################################################

$Env{PEG_COLOR} = 'f=lg,c=ly,l=lc,b=lm,n=lw,m=lr,z=wob,y=lyor,k=lc';

$Env{PEG_JJ_MODE} = 'csh'; # used if -JJJ

$Env{PEG_OPTIONS} = "-IIIJJJssT#+_";

$Env{PEG_QFIND_ARGS} .= " -n";

################################################################################

push @Exclude_dirs, qw(
    .git
);

push @Exclude_exts, qw(
    bak dll exe exists obj
);

################################################################################

$Peg_p{c} = 'c:cpp:h:hpp:xs:y';
$Peg_p{h} = 'h:hpp';
$Peg_p{p} = 'pl:pm:pod:t';

$Peg_p{htm} = 'htm:html';

################################################################################

$Peg_z{c} = <<'EOT';
# PEG_FAST_Z_CONTEXT
# PEG_Z_PRIMARY_COLOR
	(
	    # A multi line #define. Only valid while lines are \'d.
	    (/^\#\s*define\s+\w+.*\\$/ and $::Multi_line_define = 1) # context
		or
	    (($::Multi_line_define and (/\\$/
		? undef # still in mld
		: ($::Multi_line_define == 2
		    ? ($::Multi_line_define = $Context_line = undef) # beyond mld
		    : ($::Multi_line_define = 2))) # last line of mld
	    ) and 0) # not context
	)
    or
	(
	    # Functions.
	    /^\w[\w\s\*\&:~]*\(/ # (1) looks like a function
		and
	    not /^(?:if|for|switch|while)\b/ # (2) and isn't a statement
		and
	    (
		$::L = $_,
		$::L =~ s/\/\*.*?\*\/|\/[\*\/].*//g, # remove comments
		$::L !~ /[!^%;\"]/ # (3) and isn't a expression/statement
	    )
	)
    or
	# An unnamed "typedef struct".
	(/^typedef\s+struct\s*(?:\{[^\}]*)?$/ and do {{
	    # Read forward to find the struct name!
	    # Do the entire file in one pass.
	    unless ($::Last_file eq $File) {
		$::Last_file = $File;
		%::Typedef_struct = ();
		my $start_pos = tell(F);
		my $start_line = $.;
		my $typedef_struct_line = $.;
		my $inside = 1;
		while (<F>) {
		    if ($inside) {
			if (/^\}\s+(\w+)/) {
			    $::Typedef_struct{$typedef_struct_line} = $1;
			    $inside = undef;
			}
		    } elsif (/^typedef\s+struct\s*(?:\{[^\}]*)?$/) {
			$typedef_struct_line = $.;
			$inside = 1;
		    }
		}
		# Restore IO position.
		$. = $start_line;
		seek F, $start_pos, 0
		    or die "PEG_Z_C: cannot seek back in $File: $!\n";
	    }
	    my $found;
	    if (exists $::Typedef_struct{$.}) {
		$_ = "typedef struct " . $::Typedef_struct{$.} . " {" . $Newline;
		$found = 1;
	    }
	    $found;
	}})
    or
	(/^(?:typedef\s+struct|struct|template)\s+\w+/ and not /[,;\)]/)
    or
	(/^class\s+\w+\s*$/)
    or
	(
	    # Clear the context if outside function/typedef scope.
	    ($prev_line and $prev_line =~ /^\}/ and $Context_line = undef),
	    ($prev_line = $_),
	    undef
	)
EOT

# Perl subroutines & POD.
$Peg_z{p} = '/^(?:\s*sub\s+\w|=head|__(?:END|DATA)__)/';

################################################################################

sub process_tar_slow {
    my ($file, $fullpath) = @_;
    my $cmd = "tar -tf \"$file\"";
    Warn "running $cmd" if $Verbose;
    my @filelist = `$cmd`;
    if ($?
	    # Heuristic - seen "tar -tf" give correct results AND error code!
	    and @filelist < 3
    ) {
	Warn "failed to get file list from $fullpath: $?", @filelist;
	return 0; # signal to process the file as usual
    }
    foreach my $f (@filelist) {
	$f =~ s/\015?\012\z//;
	next if $f =~ m|/$|; # skip directory names
	next unless pp($f);
	$cmd = qq(tar -xOf "$file" "$f");
	Warn "running $cmd" if $Verbose;
	open(my $fh, "$cmd|")
	    or Die "can't extract $f from $fullpath: $!";
	Q($fh, "$fullpath # $f", 1);
	close $fh;
    }
    return 1;

} # process_tar_slow


sub process_tar_fast {
    my ($file, $fullpath) = @_;
    my $cmd = "tar -xOf \"$file\"";
    my $fh;
    Warn "running $cmd" if $Verbose;
    if (!open($fh, "$cmd|")) {
	Warn "can't extract $fullpath: $!";
	return 0;
    }
    Q($fh, $fullpath);
    close $fh;
    return 1;

} # process_tar_fast


# Process the contents of a .tar.gz file by file.
sub process_targz_slow {
    my ($file, $fullpath) = @_;
    require File::Temp;
    my ($fh, $tempfile) = File::Temp::tempfile
	("peg-targz-XXXXX", SUFFIX => '.tar', UNLINK => 1);
    close $fh;
    my $cmd = qq(gzip -dc "$file" > "$tempfile");
    Warn "running $cmd" if $Verbose;
    system $cmd and Die "error: $cmd: $?";
    process_tar_slow($tempfile, $fullpath);
    unlink $tempfile;
    return 1;

} # process_targz_slow


# Process the contents of a .tar.gz as one entity.
sub process_targz_fast {
    my ($file, $fullpath) = @_;
    my $cmd = qq(gzip -dc "$file" | tar -xOf -);
    Warn "running $cmd" if $Verbose;
    my $fh;
    if (!open($fh, "$cmd|")) {
	Warn "can't extract $fullpath: $!";
	return 0;
    }
    Q($fh, $fullpath);
    close $fh;
    return 1;

} # process_targz_fast


# Process each individual file within a ".zip" file.
sub process_zip_slow {
    my ($file, $fullpath) = @_;
    my $cmd = "unzip -Z1 \"$file\" 2>&1";
    Warn "running $cmd" if $Verbose;
    my @filelist = `$cmd`;
    if ($?) {
	Warn "unzip failed with $fullpath: $?", @filelist;
	return 0; # signal to process the file as usual
    }
    Warn "zip contains @{[ scalar @filelist ]} files" if $Verbose;
    foreach my $f (@filelist) {
	$f =~ s/\015?\012\z//;
	next unless pp($f);
	my $cmd = qq(unzip -p "$file" "$f");
	Warn "running $cmd" if $Verbose;
	open(my $fh, "$cmd|")
	    or Die "can't extract $f from $fullpath: $!";
	Q($fh, "$fullpath # $f", 1);
	close $fh;
    }
    return 1;

} # process_zip_slow


# Process the entire contents inside a ".zip" file as one.
sub process_zip_fast {
    my ($file, $fullpath) = @_;
    my $cmd = qq(unzip -p "$file");
    Warn "running $cmd" if $Verbose;
    open(my $fh, "$cmd|")
	or Die "can't unzip $fullpath: $!";
    Q($fh, $fullpath);
    close $fh;
    return 1;

} # process_zip_fast


sub process_gz {
    my ($file, $fullpath) = @_;
    my $cmd = qq(gzip -dc "$file");
    Warn "running $cmd" if $Verbose;
    open(my $fh, "$cmd|")
	or Die "error: $cmd: $!";
    Q($fh, $fullpath);
    close $fh;
    return 1;

} # process_gz


sub process_pdf {
    my ($file, $fullpath) = @_;
    require File::Temp;
    my ($fh, $tempfile) = File::Temp::tempfile
	("peg-pdf-XXXXX", SUFFIX => '.pdf', UNLINK => 1);
    close $fh;
    my $cmd = "pdftotext \"$file\" $tempfile";
    Warn "running $cmd" if $Verbose;
    system $cmd;
    if ($?) {
	Warn "pdftotext failed: $?";
	unlink $tempfile;
	return 0;
    }
    unless (open($fh, "<", $tempfile)) {
	Warn "could not open $tempfile: $!";
	unlink $tempfile;
	return 0;
    }
    Q($fh, $fullpath);
    close $fh;
    unlink $tempfile;
    return 1;

} # process_pdf


sub process_tar {
    return process_tar_slow(@_) if pp();
    Warn "use -pp /./ to search each file within the tar file"
	unless $::Done_use_pp_warning++;
    return process_tar_fast(@_);

} # process_tar


sub process_targz {
    return process_targz_slow(@_) if pp();
    Warn "use -pp /./ to search each file within the tar.gz file"
	unless $::Done_use_pp_warning++;
    return process_targz_fast(@_);

} # process_targz


sub process_zip {
    return process_zip_slow(@_) if pp();
    Warn "use -pp /./ to search each file within the zip file"
	unless $::Done_use_pp_warning++;
    return process_zip_fast(@_);

} # process_zip


%Peg_Q = (
    'pdf'     => \&process_pdf,
    '*gz'     => \&process_gz,
    '*tar'    => \&process_tar,
    '*tar.gz' => \&process_targz,
    '*zip'    => \&process_zip,
);

################################################################################

sub mv {
    @_ == 2 or die "Usage: mv(SRC, DEST)\n";
    my ($src, $dest) = @_;
    defined $src  or die "mv: undefined SRC\n";
    defined $dest or die "mv: undefined DEST\n";
    -f $src       or die "mv: SRC does not exist: $src\n";
    -f $dest     and die "mv: DEST exists: $dest\n"; # NB. DEST may be a DIR
    require File::Copy;
    File::Copy::move($src, $dest) or die "mv: failed: $!\n";

} # mv

sub cp {
    @_ == 2 or die "Usage: cp(SRC, DEST)\n";
    my ($src, $dest) = @_;
    defined $src  or die "cp: undefined SRC\n";
    defined $dest or die "cp: undefined DEST\n";
    -f $src       or die "cp: SRC does not exist: $src\n";
    -f $dest     and die "cp: DEST exists: $dest\n"; # NB. DEST may be a DIR
    require File::Copy;
    File::Copy::copy($src, $dest) or die "cp: failed: $!\n";

} # cp

################################################################################
#
# A Win32 optimized version of File::Find::find.
#

if ($Is_Win32 and grep /^-.*[dt]/, @ARGV) { eval <<'EOT';

$INC{'File/Find.pm'} = __FILE__; # makes "require File::Find" a NOP.

$File::Find::Mtime = 0;  # ensure defined

sub File::Find::find {
    my ($wanted, @dirs) = @_;
    my $callback = $wanted->{wanted};
    my $silent   = $wanted->{silent};
    my $pp       = $wanted->{preprocess};

    for (@dirs) {
	# Ensure there is a trailing "/" on all directory names.
	$_ .= '/' unless m|[\\/]$|;
    }

    @dirs = reverse @dirs;
    my (@d, @f, %M);
    while (defined (my $dir = pop @dirs)) {
	opendir my $dirh, $dir
	    or ($silent || print STDERR "peg: can't opendir $dir: $!\n"), next;
	@d = @f = %M = ();
	$dir =~ s|^\.[/\\]||;
	while (defined (my $f = readdir $dirh)) {
	    next if ($f eq '.' or $f eq '..');
	    if (-d "$dir$f") {
		push @d, $f;
	    } else {
		push @f, $f;
		$M{$f} = _M(); # NB. respect $::Consider_ctime.
	    }
	}
	closedir $dirh;
	if (@f) {
	    @f = $pp->(@f) if $pp;
	    foreach my $f (@f) {
		$File::Find::name = $_ = "$dir$f";
		$File::Find::Mtime = exists $M{$f} ? $M{$f} : 0;
		$callback->(); # allow errors to propagate to caller.
	    }
	}
	if (@d) {
	    @d = $pp->(@d) if $pp;
	    push @dirs, reverse map "$dir$_/", @d;
	}
    }
    $File::Find::Mtime = 0;
}

EOT

die $@ if $@; }

################################################################################


# Avoid "used only once" warnings.
1 or ($File::Find::name, $File::Find::name);