Perl grep clone

lobach Tue, 25 Feb 2003 14:54:54 -0800

OK, I am not having any luck figuring out the regex stuff.. Can somebody
share with me a perl version of grep that works or tell me the "Perl
Expression" for this..The reason I need something like perl is because the
2048 character limitation on grep... We have situations where the line can
be much longer than that.. Thanks. Steve.
We need to be able to use the -c (count) and -h (suppress filename) thanks,
Steve.
counting lines:
node13:/prod/app$ grep -c ".*"  ./client/tmp/BATCH/failed/in/PPR_RESP.log
1858571
node13:/prod/app$ peg -c  ".*"  ./client/tmp/BATCH/failed/in/PPR_RESP.log
peg: error in Perl expression: (.*)
syntax error at (eval 1) line 1, near "(."
node13:/prod/app$
AND
search for lines with . (period) in the search string
node13:/prod/app/data/journal$ grep "RES\.1\.25" sjl
RES.1.2504178found
RES.1.250417RES.1.250417RES.1.250417RES.1.250417
RES.1.250417RES.1.250417RES.1.250417RES.1.250417
node13:/prod/app/data/journal$ peg "RES\.1\.25" sjl
peg: error in Perl expression: (RES\.1\.25)
Backslash found where operator expected at (eval 1) line 1, near "RES\"
Backslash found where operator expected at (eval 1) line 1, near ".1\"
        (Missing operator before \?)
syntax error at (eval 1) line 1, near "RES\"
node13:/prod/app/data/journal$




#!/usr/local/bin/perl -w

my $VERSION = '1.00';
my $Usage = "Usage: peg [<options>|-help] <perlexpr> [<files>]\n";
my ($Dirs_specified, $Eval, $Implicit_C, $No_slurp);
my (@Files, %Options, @Perlexpr, @Warnings) = ();
my ($After, $Before) = (2, 2);
my $Perlexpr = '';

$SIG{'__WARN__'} = sub { push @Warnings, @_; };

process_ARGV();

check_Perlexpr();

$g ||= $s;

find_files() if ($d || $r);

build_Eval();

show_debug() if $D;

reset 'a-z';

eval $Eval;

die "\npeg: run time eval error:\n", @Warnings, $@,
    "\n...when eval'ing:\n$Eval\n...with input:\n$_\n";


sub process_ARGV
{
  my $options = 1;
  my $context = 'C';

  if ($_ = $ENV{'PEG_OPTIONS'}) {
    unshift @ARGV, (/^-/ ? $_ : "-$_");
  }

  while (defined ($_ = shift @ARGV)) {
    if ($f) {
      open(F, "<$_") || die "peg: can't open $_: $!\n";
      while (<F>) {
        chomp;
        push @Perlexpr, $_ unless /^$/;
      }
      close F;
      $f = 0;
    }
    elsif ($options && s/^-(?=.)//) {
      /^help$/ && help();
      while (s/^(.)//) {
        my $opt = $1;
        if ($opt =~ /^[abcdfghilnoqrstvwxyABCDEFGHLNOPQSTXYZ]$/) {
          ${$opt} = $Options{$opt} = 1;
          $context = $opt if ($opt =~ /^[ABC]$/);
        }
        elsif ($opt =~ /^\d$/) {
          while (s/^(\d)//) { $opt = (10 * $opt) + $1; }
          $After  = $opt if ($context ne 'B');
          $Before = $opt if ($context ne 'A');
          $Implicit_C = 1;
        }
        elsif ($opt eq '-') { $options = 0; }
        elsif ($opt eq 'V') { die "peg v$VERSION (Dec 1999)\n"; }
        else { die "peg: illegal option -- $opt\n$Usage"; }
      }
    }
    elsif ([EMAIL PROTECTED] || (($o || $O) && $options)) {
      push @Perlexpr, $_;
    }
    else {
      push @Files, $_;
    }
  }

  die $Usage unless @Perlexpr;

} # process_ARGV


sub check_Perlexpr
{
  my $regexp = $G || $Q || $i || $w || $x;

  foreach (@Perlexpr) {
    ($Q && !$E) || ($No_slurp ||= /[\^\$]/);
    next if ($E || !($regexp || /^\w+$/));
    $Q ? ($_ = quotemeta($_)) : (s/\//\\\//g);
    $_ = '\b' . $_ . '\b' if ($w && !$x);
    $_ = '^' . $_ . '$' if $x;
    $_ = '/' . $_ . '/';
    $_ .= 'i' if $i;
  }
  if ($O) {
    $Perlexpr .= join(",\n\t", map({"(\$Match$_ ||= (" . $Perlexpr[$_] .
"))"} (0..$#Perlexpr)),
      ('(' . join(' && ', map {"\$Match$_"} (0 .. $#Perlexpr)) . ')'));
  }
  else {
    $Perlexpr = join("\n\t|| ", map {"($_)"} @Perlexpr);
  }
  $Perlexpr = 'not (' . $Perlexpr . ')' if $v;
  local ($a, $b, $c, $d, $f, $g, $h, $i, $l, $n, $o, $q, $r, $s, $t, $v, $w,
$x, $y,
         $A, $B, $C, $D, $E, $F, $G, $H, $L, $N, $O, $P, $Q, $S, $T, $X, $Y,
$Z);
  eval "\$_ = ''; if ($Perlexpr) {}";

  die "peg: error in Perl expression: $Perlexpr\n", @Warnings, $@ if $@;

} # check_Perlexpr


sub find_files
{
  if ($d && @Files) {
    my ($start_dir, $dir, @dirs, @files);
    foreach (@Files) {
      (-d $_) ? push @dirs, $_ : push @files, $_;
    }
    if ($Dirs_specified = @dirs) {
      @Files = @files;
      require Cwd;
      $start_dir = Cwd::cwd() || die "peg: cannot determine current
directory\n";
      foreach $dir (@dirs) {
        chdir($dir)
          || (($s || print STDERR "peg: can't chdir to $dir: $!\n"), next);
        find($dir);
        chdir($start_dir)
          || die "peg: can't chdir back to starting directory $start_dir:
$!\n";
      }
    }
  }
  find('.') if $r;

  if ([EMAIL PROTECTED] && ($r || ($d && $Dirs_specified)) && !$X) {
    print STDERR "peg: no files found\n" if !$s;
    exit(1);
  }

} # find_files


sub find
{
  my $cwd = shift;
  my (@f, $f, $ff);

  opendir(DIR, '.')
    || (($g || print STDERR "peg: can't opendir $cwd: $!\n"), return);
  @f = readdir DIR;
  closedir DIR;

  foreach $f (@f) {
    next if ($f eq '.' || $f eq '..');
    $ff = "$cwd/$f";
    lstat $f;
    if (-d _) {
      chdir($f)
        || (($g || print STDERR "peg: can't chdir to $ff: $!\n"), next);
      find($ff);
      chdir('..')
        || die "peg: can't chdir back to .. from $ff: $!\n";
    }
    else {
      push @Files, $ff;
    }
  }

} # find


sub help
{
  system("perldoc peg") && die "\npeg: perldoc: $?\n";
  exit;

} # help


sub show_debug
{
  print "peg: Warnings =>\n", @Warnings, "\n" if @Warnings;
  print "peg: Options => ", sort(keys %Options),
    (($_ = $ENV{'PEG_OPTIONS'}) ? " (PEG_OPTIONS = $_)" : ''), "\n\n";
  print "peg: Files =>\n", (map {"\t$_\n"} @Files), "\n";
  print "peg: Perl code =>\n$Eval\n";
  exit;

} # show_debug


sub build_Eval
{
  my ($context, $gap, $nonmatch_print, $output, $print, $reset, @my_vars);

  if ($O) {
    $l = 1;
    $A = $B = $C = $Implicit_C = $c = $L = $q = $Z = 0;
  }
  $No_slurp = @Files = ('-') if ([EMAIL PROTECTED] && !$X);
  $No_slurp ||= $x;
  $C = 1 if ($Implicit_C && !($A || $B));
  $A = $B = 1 if $C;
  $context = $A || $B || $C;
  $c = $l = $L = $q = $S = $Z = 0 if $context;
  $h = 1 if (@Files <= 1 && !(($d && $Dirs_specified) || $r || $X));
  $reset = 1 if (((@Files > 1) || $X) && $Perlexpr =~ /[EMAIL PROTECTED]/ &&
!$x);
  $h = 0 if $H;
  if ($c || $l || $L || $O || $q || $Z) {
    $a = 1;
    $b = $N = $S = $T = 0;
  }
  $y = 1 if (($l || $L || $q) && !$No_slurp);
  $N = 0 if $T;
  $F = 0 if ($F && $Perlexpr !~ /\bF\b/);
  $P = 0 if ($P && $Perlexpr !~ /\bP\b/);
  $a = 1 if $S;

  if ($c) {
    $L = $q = $Z = 0;
  }
  elsif ($L) {
    $q = $Z = 0;
  }
  elsif ($l) {
    $q = $Z = 0;
    $output = '"$File\n"';
    $t = 1;
  }
  elsif ($Z) {
    $q = 0;
  }
  elsif ($q) {}
  else {
    $output = '';
    $output = "\$Offset:" if $b;
    $output = "\$.:$output" if $n;
    $output = "\$File:$output" if !$h;
    $output = "\"$output\$_\"" if $output;
  }

  if (defined $output) {
    $print = 'print' . ($output ? " $output" : '') . ';';
    $print .= ' last;' if ($t && !$context);
  }
  if ($context) {
    $output ||= '$_';
    $gap = ($A ? $After : 0) + ($B ? $Before : 0);
    ($nonmatch_print = $print) =~ s/:/-/g;
    $output =~ s/:/-/g;
    $Perlexpr = "\$First_match && ($Perlexpr)" if $t;
  }

  @my_vars = (($context ? '$After' : ()),
    ($B ? '@Before' : ()),
    ($a ? () : '$Binary_file'),
    ($c ? '$Count' : ()),
    ($F ? '@F' : ()),
    ($context ? '$First_match' : ()),
    '$File',
    ($L ? '$Found' : ()),
    ($b ? '$Length' : ()),
    ($O ? (map {"\$Match$_"} (0..$#Perlexpr)): ()),
    ($context ? '$Matched' : ()),
    ($b ? '$Offset' : ()),
    ($P ? ('$P', '@P') : ()),
    ($Z ? '$Z' : ()));

  $Eval = '';
  $Eval .= "while (<STDIN>) { chomp; push [EMAIL PROTECTED], \$_; }\n" .
           "[EMAIL PROTECTED] || (" . ($s ? '' : '(print STDERR "peg: no files
found\n"), ')
           . "exit(1));\n" if $X;
  $Eval .= "\$| = 1;\n" if !$q;
  $Eval .= ($y ? "undef \$/;\n" : ($Y ? "\$/ = '';\n" : ''));
  $Eval .= "my \$Exit_code = 1;\n" if !$q;
  $Eval .= 'my (' . join(", ", @my_vars) . ");\n";
  $Eval .= "foreach \$File ([EMAIL PROTECTED]) {\n";
  $Eval .= "  open(FILE, \"<\$File\")";
  $Eval .= $s ? " || next;\n"
              : "\n    || ((print STDERR \"peg: can't open \$File:
\$!\\n\"), next);\n";
  $Eval .= "  \$After = $After;\n" if $A;
  $Eval .= "  [EMAIL PROTECTED] = ();\n" if $B;
  $Eval .= "  \$Binary_file = -B FILE;\n" if !$a;
  $Eval .= "  \$Count = 0;\n" if $c;
  $Eval .= "  \$Found = 0;\n" if $L;
  $Eval .= "  \$Offset = 0;\n" if $b;
  $Eval .= "  \$First_match = 1;\n" if $context;
  $Eval .= '  ' . join(" = ", map {"\$Match$_"} (0..$#Perlexpr)) . " = 0;\n"
if $O;
  $Eval .= "  [EMAIL PROTECTED] = ();\n" if $P;
  $Eval .= "  \$Z = '';\n" if $Z;
  $Eval .= "  while (<FILE>) {\n";
  $Eval .= "    \$P = \$_;\n" if $P;
  $Eval .= "    \$Length = length;\n" if $b;
  $Eval .= "    [EMAIL PROTECTED] = split;\n" if $F;
  $Eval .= "    shift [EMAIL PROTECTED] if ([EMAIL PROTECTED] > $Before);\n" if $B;
  $Eval .= "    study;\n" if (@Perlexpr > 5);
  $Eval .= "    if ($Perlexpr) {\n" if !$S;
  $Eval .= '      ' . ($q ? 'exit(0)' : '$Exit_code = 0') . ";\n";
  $Eval .= '      $Binary_file && ((print "Binary file $File matches\n"),
last);' . "\n" if !$a;
  $Eval .= "      $Perlexpr;\n" if $S;
  $Eval .= "      chomp; \$_ .= \"\\n\";\n" if $N;
  $Eval .= "      chomp; \$_ .= ' ';\n" if $T;
  $Eval .= "      ++\$Count;\n" if $c;
  $Eval .= "      \$Found = 1;\n      last;\n" if $L;
  $Eval .= "      print \"--\\n\" if (\$Matched++ && (\$First_match ||
(\$After > $gap)));\n" if $context;
  $Eval .= "      print [EMAIL PROTECTED];\n" if $B;
  $Eval .= "      $print\n" if $print;
  $Eval .= "      \$After = 0;\n" if $A;
  $Eval .= "      [EMAIL PROTECTED] = ();\n" if $B;
  $Eval .= "      \$First_match = 0;\n" if $context;
  $Eval .= "    }\n" if !$S;
  $Eval .= "    elsif (++\$After <= $After) {\n
  $nonmatch_print\n    }\n" if $A;
  $Eval .= "    else {\n" if ($B || ($context && $t));
  $Eval .= "      ++\$After;\n" if (!$A && $B);
  $Eval .= "      push [EMAIL PROTECTED], $output;\n" if $B;
  $Eval .= "      last if !\$First_match;\n" if ($context && $t);
  $Eval .= "    }\n" if ($B || ($context && $t));
  $Eval .= "    \$Offset += \$Length;\n" if $b;
  $Eval .= "    push [EMAIL PROTECTED], \$P;\n" if $P;
  $Eval .= "  }\n";
  $Eval .= '  print "\n";' . "\n" if $T;
  $Eval .= '  print "' . ($h ? '' : '$File:') . '$Count\n";' . "\n" if $c;
  $Eval .= "  chomp \$Z;\n  print \"" . ($h ? '' : '$File:') . "\$Z\\n\";\n"
if $Z;
  $Eval .= '  if (!$Found) { print "$File\n"; }' . "\n" if $L;
  $Eval .= "  reset 'a-z';\n" if $reset;
  $Eval .= "  close FILE;\n}\n";
  $Eval .= 'exit(' . ($q ? '1' : '$Exit_code') . ");\n";

} # build_Eval

__END__

=head1 NAME

peg - Perl expression grep

=head1 SYNOPSIS

peg [<options>|-help] <perlexpr> [<files>]

=head1 DESCRIPTION

Peg is a grep(1) clone. It uses a Perl expressions to match lines
from a list of input files, or standard input if none specified.

Internally, peg eval's code that resembles the following pseudo-Perl:

    foreach $File ( <files> ) {
        open(FILE, "<$File");
        while (<FILE>) {
            if ( <perlexpr> ) {
                print;
            }
        }
    }

Thus, each input line is available as the Perl variable C<$_>, and this
will be printed if <perlexpr> is true. In particular, to match lines
according to a Perl regular expression pattern, it is necessary to place
it within the pattern matching operator, which defaults to searching C<$_>.

  eg% peg '/\bneedle\b/i' haystack

Note that <perlexpr> can be any Perl expression, and is not limited just
to regular expressions.

=head1 OPTIONS

The options include equivalents to most of those of standard grep(1),
including the GNU extensions. They can be grouped anywhere in the
argument list (except after '--'), and can also be set via the
environment variable "PEG_OPTIONS".

If less than two files specified, then B<-h> is assumed.

Selection and interpretation of <perlexpr>:

=over 4

=item B<-E>

Overrides B<-G> & B<-Q>. Assume <perlexpr> is a Perl expression
(this is the default behavior).

=item B<-G>

Assume <perlexpr> is a Perl regular expression pattern to be matched.
This option is implicit if any of B<-i>, B<-w>, B<-x> are used, or if
<perlexpr> matches /^\w+$/ (ie. is entirely alphanumeric). Thus,
"peg foo bar" is equivalent to "peg '/foo/' bar".

=item B<-Q>

Overrides B<-G>. Assume <perlexpr> is a fixed literal string to be matched.
Thus, C<"peg -Q 'fo+' bar"> is equivalent to "peg '/fo\+/' bar".

=item B<-f> <file>

The following argument is a file containing further <perlexpr>'s.
(Note, this is the only option that takes an argument). Lines
will be adjudged to match if they match any of the <perlexpr>'s.

=item B<-o>

Arguments following the B<-o> option up until '--' are interpreted
as further <perlexpr>'s. Lines will be adjudged to match if they
match any of the <perlexpr>'s. For example, C<"peg -o foo bar baz -- file">
is equivalent to C<"peg '/foo/ or /bar/ or /baz/' file">.

=item B<-O>

This option is similar to B<-ol>, except each <perlexpr> must match
independently. As with B<-o>, arguments following the B<-O> option
up until '--' are interpreted as further <perlexpr>'s.

=item B<-i>

Enables B<-G>. Ignore case distinctions.

=item B<-v>

Negates the sense of <perlexpr>.

=item B<-w>

Enables B<-G>. Force <perlexpr> to match only whole words.

=item B<-x>

Enables B<-G>. Force <perlexpr> to match only whole lines.

=back

File selection:

=over 4

=item B<-d>

Any directories listed in the argument list will be searched recursively
for files to work upon.

=item B<-r>

Work upon all files in and beneath the current directory.

=item B<-X>

Interpret STDIN as a stream of filenames to process.
It provides a builtin B<xargs(1)> facility. (See example 6).

=back

Basic output control:

=over 4

=item B<-a>

Do not suppress binary output. The default behavior for when
a match occurs on a binary file is to print "Binary file <filename>
matches".

=item B<-A> B<-B> B<-C> B<-NUM>

These options specify that matching lines should be shown with lines of
surrounding I<context>. B<-A> shows lines of trailing (I<after>) context;
B<-B> shows lines of leading (I<before>) context; B<-C> shows both leading
and trailing context. B<-NUM> sets the number of lines of context for the
most recently specified context option (the default is 2) or assumes B<-C>
if none specified. That is, B<-B1A3> specifies one line of leading context
and three lines of trailing context.

=item B<-b>

Print the byte offset within the input file.

=item B<-c>

Print only a count of the input lines that match <perlexpr>.

=item B<-h>

Suppress filenames being printed when searching multiple files.

=item B<-H>

Print the filename for each match.

=item B<-l>

Print only the names of files which match <perlexpr> at least once.

=item B<-L>

Print only the names of files which don't match <perlexpr> anywhere.

=item B<-n>

Print the input line number.

=item B<-t>

Print only the first match in any one file.

=back

Peg specials:

=over 4

=item B<-D>

Prints out the internal Perl code that would otherwise be eval'ed.

=item B<-F>

Provide an array @F which is the result of a split applied to
the input line.

=item B<-N>

Ensure each printed line ends in a newline. This is only necessary
if <perlexpr> leaves C<$_> without a trailing newline. (See example 2).

=item B<-P>

Provide an array @P of the input up until that point. $P[-1] is the
previous line. This provides a mechanism to allow matches to be made
over consecutive lines. (See example 4).

=item B<-S>

Always print the input line. This enables stream editing with s///.

=item B<-T>

Print each file's output on one single line, with each line separated
by a single whitespace.

=item B<-y>

Treat each file as a single line.

=item B<-Y>

Treat paragraphs (text delimited by blank lines) as single lines.

=item B<-Z>

Print the value of $Z at EOF. (See example 5).

=back

Miscellaneous:

=over 4

=item B<-g>

Suppress the error messages about unreadable directories outputted
when either B<-d> or B<-r> is used.

=item B<-q>

Write nothing to STDOUT. Exit 0 if a match is found, else exit 1.

=item B<-s>

Suppress all error messages about unreadable files and directories.

=item B<-V>

Display peg's version number and exit.

=item B<-->

Explicitly end options. Allows filenames beginning with a -.
Also used by the B<-o> and B<-O> options to determine which arguments are
<perlexpr>'s and which are files.

=back

=head1 EXAMPLES

  1. Search recursively for all VHDL constant declarations:

    % peg -r '/^\s*constant\s.*:=/i'

  2. Find the instance names of CTS buffers in a verilog netlist:

    % peg -N '/^\s*CTS\w*\s+(\w+)\s*\(/ and $_ = $1' foo.v

  3. Extract the entity declaration section from a VHDL file:

    % peg 's/\s*--.*$//, /^\s*entity\b/i .. /^\s*end\b/i' bar.vhd

  4. Search for the sequence A,B,C split over 3 consecutive lines:

    % peg -PB2n '$P[-2]=~/A/ and $P[-1]=~/B/ and /C/' bam

  5. Sum up the entries in the last column of a file:

    % peg -ZF '$Z += $F[-1]' report.txt

  6. Search for "main" in C files below the current directory.

    % find . -name "*.c" | peg -Xw main

=head1 ENVIRONMENT

The environment variable PEG_OPTIONS can be used to set options.

=head1 EXIT STATUS

The following exit values are returned:

  0            one or more matches were found
  1            no matches were found
  >1           peg did not complete normally

=head1 SCRIPT CATEGORIES

Search

=head1 README

This script is yet another Perl grep(1).

=head1 SEE ALSO

L<perl(1)>, L<perlre>, L<grep(1)>.

=head1 AUTHOR

Alex Davies <[EMAIL PROTECTED]>

=head1 COPYRIGHT

Copyright (c) 1999 Alex Davies. All rights reserved. This program is
free software; you can redistribute it and/or modify it under the same
terms as Perl itself.

=cut



-- 
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Perl grep clone

Reply via email to