[perl #19406] [PATCH] creates a script which describes source files

via RT Tue, 24 Dec 2002 08:26:24 -0800

# New Ticket Created by  [EMAIL PROTECTED] 
# Please include the string:  [perl #19406]
# in the subject line of all future correspondence about this issue. 
# <URL: http://rt.perl.org/rt2/Ticket/Display.html?id=19406 >



This patch creates a script
  tools/dev/extract_file_descriptions.pl

It produces a browsable list of file names with brief descriptions, and
is intended to help familiarize new developers with the layout of parrot.

The descriptions are extracted from leading comments and embedded pods.

I found the output to be surprisingly easy and interesting to read.
Your mileage may vary.

When run
  $ pwd
  parrot/
  $ perl tools/dev/extract_file_descriptions.pl
it prints out entries like

   * ./ChangeLog

     2002-12-18 20:38  sfink: changes since 0.0.8

           - Allow suppression of cgoto core to save memory during compile
           * Native function calling interface
           * Major rewrite of stack and list aggregates
           - Scalar PMC added
           * Scratchpads implemented
           - Win32 libraries
           - Internal memory subsystem documentation
           * Preliminary DotGNU support
           - Packfile fingerprinting
           * Buffer/PMC unification (into PObjs)
           * stabs debugging information support
           * Major Jako overhaul, including:
             - imcc integration
      [...]


   * ./build_nativecall.pl

     Build up the native call routines.


   * ./byteorder.c

     Byteordering functions
        These are assigned to a vtable in PackFile at each load.
        If the vtable method is called for conversion from the
        native byteorder, it is a noop and will work, but the
        caller should know if the PackFile byteorder is native
        and skip the conversion and just map it in.

   ( ./warnings.c )

The last being an file without an obvious descriptive comment or pod.

The default output is currently something like 6k lines, and 100KB.
This was a quick hack, written yesterday evening.
But it looks vaguely plausible.

Mitchell Charity



-- attachment  1 ------------------------------------------------------
url: http://rt.perl.org/rt2/attach/46190/36166/854f4b/extract_file_descriptions.patch

diff -urN ./MANIFEST ../parrot-describe/MANIFEST
--- ./MANIFEST  Sun Dec 15 09:20:09 2002
+++ ../parrot-describe/MANIFEST Tue Dec 24 11:05:13 2002
@@ -1741,6 +1741,7 @@
 t/src/sprintf.t
 test_main.c
 tools/dev/check_source_standards.pl
+tools/dev/extract_file_descriptions.pl
 tools/dev/genrpt.pl
 tools/dev/lib_deps.pl
 tools/dev/manicheck.pl
diff -urN ./tools/dev/extract_file_descriptions.pl 
../parrot-describe/tools/dev/extract_file_descriptions.pl
--- ./tools/dev/extract_file_descriptions.pl    Wed Dec 31 19:00:00 1969
+++ ../parrot-describe/tools/dev/extract_file_descriptions.pl   Tue Dec 24 10:57:34 
+2002
@@ -0,0 +1,206 @@
+#!/usr/bin/perl -w
+
+# This script extracts descriptions from the Parrot source files.
+
+#  Doables:
+#   - Given a directory argument, should recursively descend.
+#   - Should create descriptive hashes earlier, before files are filtered.
+#     So one can skip, say a binary file, but still have it listed in the output.
+#     For instance, it is nice to see where the .pbc's land.
+#   - Allow indescribable heads to be mixed in with rest, for when exploring
+#     a location is more important than big-picture browsing.
+#   - Is absense of "[...]"s in the indescribable listing a ui consistency violation?
+#   - Fragment describe_file() - it shouldn't both `cat` and dispatch on file suffix.
+#   - Finish making this usable as a library.
+#   - It would be nice to have a ParrotSourceFile class of course. ;) (a jest - sort 
+of.)
+#   - Misc: rationalize indent; clarify desc emptiness contract;
+#     review readability of intra-comment blank line elimination; pod handling;
+#     
+
+use Regexp::Common qw/comment/;
+use Getopt::Long;
+use strict;
+
+if(1) {
+    my $show_full = 0;
+    GetOptions('plus-misses' => \$show_full)
+       || die ("Usage: $0 [--plus-misses] [files...]\n\n".
+               "FILES defaults to a recursive \" find . \".\n\n".
+               "--plus-misses creates a second section, with the heads of any\n".
+               "files which had familiar types, but from which descriptions\n".
+               "were not obtained.\n\n");
+
+    my @files = @ARGV ? @ARGV : &files_worth_describing;
+
+    print "This file was generated by $0\non ".scalar(localtime).".\n\n";
+    print "Files in ( parenthesis ) did not have extractable descriptions.\n";
+    if($show_full) {
+       print "Their heads are included in a second section below, "
+           ."marked with \"#=#=#=\".\n";
+    } else {
+       print "Run this script with --plus-misses, and a second section will be "
+           ."included,\nwith the heads of these indescribable files.\n";
+    }
+    print "\n";
+    my @no_descriptions;
+    foreach (@files) {
+       my $info = &describe_file($_);
+       if($info->{desc}) {
+           print "\n* $info->{path}\n\n$info->{desc}\n";
+       } else {
+           print "( $info->{path} )\n";
+           push(@no_descriptions,$info);
+       }
+    }
+    print "\n\n";
+    if($show_full) {
+       print "\n".("#="x35)
+           ."\n\nFiles from which descriptions were not obtained:\n\n";
+       foreach (@no_descriptions) {
+           my $top = $_->{top};
+           $top =~ s/^/>  /mg;
+           print "- $_->{path}\n\n$top\n";
+       }
+    }
+    exit(0);
+}
+
+sub describe_file {
+    my($path) = @_;
+    my $info = {};
+    $info->{path} = $path;
+    my $text = `cat $info->{path}`;
+    my($top) = $text =~ /^(([^\n]*\n){1,15})/;
+    $info->{top} = $top;
+    return &describe_c_file($info,$text)
+       if $path =~ /\.([chlyC]|cpp|cola|xs|pmc)$|[_\.][ch]\.in$/;
+    return &describe_perl_file($info,$text)
+       if $path =~ /\.(pl|pm|t|PL|pod|pasm)$|_pm\.in$/
+           or $text =~ /^\#/;
+    return &describe_misc_file($info,$text);
+}
+
+sub describe_misc_file {
+    my($info,$text) = @_;
+    my $top = $text;
+    &clip_excess_lines($top,15);
+    $top =~ s/^ {0,1}(\S)/  $1/mg; # minimum indent
+    $info->{desc} = $top;
+    return $info;
+}
+
+sub describe_perl_file {
+    my($info,$text) = @_;
+    my $desc;
+    if($text =~ /^\#/) { # perl file (or sh)
+       my($comment) = $text =~ /^(\#[^\n]*\n( *\n)?(\#[^\n]*\n)*)/s;
+       die "internal bug" if !$comment;
+       local $_ = $comment;
+       s/^\#\* /\# /mg;         # #*
+       s/^\#(\#|\*)+\s*$/\#/mg; # line of "*"s or "#"s
+       s/^\#\!.+//m;            # #!
+       s/^\# *[a-z0-9]+\.(p[ml]|pasm)\s*\n//mi; # own file name - kludgy
+       s/\#\s*Copyright[^\n]+\n(\# *\S[^\n]*\n)*/\#\n/s;
+       s/^\#\s*(\$Id: .+)\n//m; $info->{Id} = $1;
+       s/^\#\s*Author:.+//m;
+       
+       s/^\s*\n//mg; # truly blank lines, between the # comment lines
+       s/\n(\# *\n){2,}/\n\#\n/sg; # crush down double blank lines
+       s/^\s*(\# *\n)+//s;         # remove leading
+       s/\n(\# *\n)+\s*$/\n/s;     # remove trailing
+       s/^\#//mg; # get rid of #
+       #s/^( *\n)+//s;
+       s/^\s*$//s; # normalize emptiness
+       $comment = $_;
+       $desc = $info->{perl_comment} = $comment;
+    }
+    if(!$desc && $text =~ /^=head1/m) { # try an embedded pod
+       my($doc) = $text =~ /(?:^|\n)=head1(.+)/s;
+       die "internal bug" if !$doc;
+       local $_ = $doc;
+       s/^\s*(NAME|TITLE)\s*//;
+       s/\n=.*//s;
+       # It would be nice to get the beginning of any DESCRIPTION. FIXME
+       $doc = $_;
+       $desc = $info->{pod_doc} = $doc;
+    }
+    do {
+       $desc =~ s/^ {0,1}(\S)/  $1/mg; # minimum indent
+       &clip_excess_lines($desc);
+    } if $desc;
+    $info->{desc} = $desc;
+    return $info;
+}
+
+sub describe_c_file {
+    my($info,$text) = @_;
+    my $comment_is_at_beginning = $text =~ /^\/\*/;
+    my($first_comment) = $text =~ /($RE{comment}{C})/;
+      ($first_comment) = $text =~ /(($RE{comment}{'C++'}\s*)+)/
+         if !$first_comment;
+
+    return $info if !$first_comment;
+
+    local $_ = $first_comment;
+
+    s/^\/\*//; s/\*\/$//; # /*  */
+    s/^ *\/\///mg;   # //
+    s/^ ?\*\*//mg;   # |**
+    s/^ {0,2}\*//mg; # | *
+
+    s/^ *(\$Id: .+)\n//m; $info->{Id} = $1;
+
+    my $desc;
+    if(/Overview:/) { # normal parrot code files
+       my $label = qr/ *[A-Z][a-zA-Z ]+:/;
+       ($desc) = /(?:^|\n) *Overview: *\n(((?!$label) *[^\n]+\n)+)/s;
+       $info->{warning} .= "There was an Overview:, but it wasn't used.";
+    }
+    if($info->{path} =~ /\Wicu\Wsource/) {
+       s/Copyright \(C\) [^\n]+\n *Corporation [^\n]+\n//;
+       s/^ *(file name|encoding|tab size|indentation|created (on|by)):.*//mg;
+       $desc = $_;
+    }
+    if(!$desc) {
+       s/^ *[a-z0-9_]+\.[chly]\s*\n//mi; # own filename - kludgy.
+       $desc = $_;
+       $desc = ""  # it's only emacs variables at the end of the file
+           if(!$comment_is_at_beginning && /c-indentation-style/);
+    }
+
+    $_ = $desc;
+    s/^ *\*+\s*$//mg; # line of "*"s.
+    s/\n( *\n){2,}/\n\n/sg; # excess blank lines
+    s/^\s*//s; s/\s*$/\n/s; # trim (and ends with a newline)
+    s/^ {0,1}(\S)/  $1/mg; # minimum indent
+    s/^\s*$//s;            # normalize emptiness
+    &clip_excess_lines($_);
+    $info->{desc} = $_;
+    return $info;
+}
+
+sub files_worth_describing {
+    use File::Find;
+    my @files;
+    find(sub {
+       my $name = $File::Find::name;
+       return if $name =~ /CVS|cvsignore|core|\.[oa]|\.(so|brk|dsp|tmp)$/;
+       return if $name =~ /locales\W[a-z_]+\.txt$/i;
+       return if $name =~ /icu\Wsource/; # icu cleanup code above needs work
+       return if -d $_;
+       return if -B $_;
+       push(@files,$name);
+    }, ".");
+    return @files;
+}
+
+sub clip_excess_lines {
+    my $cnt = defined $_[1] ? $_[1] : 20;
+    $_[0] =~ s/(([^\n]*\n){0,$cnt}).*/$1   [...]\n/s
+       if $_[0] =~ tr/\n/\n/ > $cnt;
+    return undef;
+}
+
+
+1;
+__END__

[perl #19406] [PATCH] creates a script which describes source files

Reply via email to