Hi Joe,
    This is good!  I had something similar cooking - specifically for SCM 
validation.

My SCM attempt caught a few more issues:
    - check git branch if specified
    - check validitiy of "T:" entry, otherwise warn of malformed entry.

Example malformed (current next has two instances):

9740 T:      git://git.infradead.org/nvme.git

Should be:
9740 T:      git git://git.infradead.org/nvme.git


Also - I believe you intended on warning on all bad SCM entries, not just newly 
discovered ones?
Your change correctly finds a previously $isbad, however the print is enclosed 
in an else preventing output.

I was going to inline these, but in my haste to understand in incorporate 
changes I sanitized whitespace
(BTW - I see both tabs and spaces, which is preferred in this file?)

The below git branch special-casing is for these:
567:T:  git git://people.freedesktop.org/~airlied/linux (part of drm maint)
3671:T: git git://git.linaro.org/people/vireshk/linux.git (For ARM Updates)

See bottom for my suggestions.

--Tom



On Mon, Nov 06, 2017 at 09:27:25AM -0800, Joe Perches wrote:
> Check for duplicate section headers and link reachability.
> 
> Miscellanea:
> 
> o Add --self-test=<foo> options (sections, patterns and scm for now)
>   where the default without options is all tests
> o Rename check_maintainers_patterns to self_test
> o Rename self_test_pattern_info to self_test_info
> 
> Signed-off-by: Joe Perches <j...@perches.com>
> cc: Tom Saeger <tom.sae...@oracle.com>
> ---
>  scripts/get_maintainer.pl | 114 
> +++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 97 insertions(+), 17 deletions(-)
> 
> diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
> index c68a5d1ba709..748bff0790a8 100755
> --- a/scripts/get_maintainer.pl
> +++ b/scripts/get_maintainer.pl
> @@ -57,7 +57,7 @@ my $sections = 0;
>  my $file_emails = 0;
>  my $from_filename = 0;
>  my $pattern_depth = 0;
> -my $self_test = 0;
> +my $self_test = undef;
>  my $version = 0;
>  my $help = 0;
>  my $find_maintainer_files = 0;
> @@ -221,7 +221,7 @@ if (-f $ignore_file) {
>  
>  if ($#ARGV > 0) {
>      foreach (@ARGV) {
> -        if ($_ eq "-self-test" || $_ eq "--self-test") {
> +        if ($_ =~ /^-{1,2}self-test(?:=|$)/) {
>              die "$P: using --self-test does not allow any other option or 
> argument\n";
>          }
>      }
> @@ -263,7 +263,7 @@ if (!GetOptions(
>               'fe|file-emails!' => \$file_emails,
>               'f|file' => \$from_filename,
>               'find-maintainer-files' => \$find_maintainer_files,
> -             'self-test' => \$self_test,
> +             'self-test:s' => \$self_test,
>               'v|version' => \$version,
>               'h|help|usage' => \$help,
>               )) {
> @@ -280,9 +280,9 @@ if ($version != 0) {
>      exit 0;
>  }
>  
> -if ($self_test) {
> +if (defined $self_test) {
>      read_all_maintainer_files();
> -    check_maintainers_patterns();
> +    self_test();
>      exit 0;
>  }
>  
> @@ -329,7 +329,7 @@ if (!top_of_kernel_tree($lk_path)) {
>  my @typevalue = ();
>  my %keyword_hash;
>  my @mfiles = ();
> -my @self_test_pattern_info = ();
> +my @self_test_info = ();
>  
>  sub read_maintainer_file {
>      my ($file) = @_;
> @@ -339,6 +339,7 @@ sub read_maintainer_file {
>      my $i = 1;
>      while (<$maint>) {
>       my $line = $_;
> +     chomp $line;
>  
>       if ($line =~ m/^([A-Z]):\s*(.*)/) {
>           my $type = $1;
> @@ -353,17 +354,16 @@ sub read_maintainer_file {
>               if ((-d $value)) {
>                   $value =~ s@([^/])$@$1/@;
>               }
> -             if ($self_test) {
> -                     push(@self_test_pattern_info, {file=>$file, 
> line=>$line, linenr=>$i, pat=>$value});
> -             }
>           } elsif ($type eq "K") {
>               $keyword_hash{@typevalue} = $value;
>           }
>           push(@typevalue, "$type:$value");
>       } elsif (!(/^\s*$/ || /^\s*\#/)) {
> -         $line =~ s/\n$//g;
>           push(@typevalue, $line);
>       }
> +     if (defined $self_test) {
> +         push(@self_test_info, {file=>$file, linenr=>$i, line=>$line});
> +     }
>       $i++;
>      }
>      close($maint);
> @@ -614,17 +614,97 @@ if ($web) {
>  
>  exit($exit);
>  
> -sub check_maintainers_patterns {
> +sub self_test {
>      my @lsfiles = ();
> +    my @good_links = ();
> +    my @bad_links = ();
> +    my @section_headers = ();
>  
>      @lsfiles = vcs_list_files($lk_path);
>  
> -    for my $x (@self_test_pattern_info) {
> -        if (!grep(m@^$x->{pat}@, @lsfiles)) {
> -            my $line = $x->{line};
> -            chomp($line);
> -            print("$x->{file}:$x->{linenr}: warning: no matches $line\n");
> -        }
> +    for my $x (@self_test_info) {
> +
> +     ## Section header duplication
> +     if (($self_test eq "" || $self_test =~ /\bsections\b/) &&
> +         $x->{line} =~ /^\S[^:]/) {
> +         if (grep(m@^\Q$x->{line}\E@, @section_headers)) {
> +             print("$x->{file}:$x->{linenr}: warning: duplicate section 
> header\t$x->{line}\n");
> +         } else {
> +             push(@section_headers, $x->{line});
> +         }
> +     }
> +     next if ($x->{line} !~ /^([A-Z]):\s*(.*)/);
> +
> +     my $type = $1;
> +     my $value = $2;
> +
> +     ## Filename pattern matching
> +     if (($type eq "F" || $type eq "X") &&
> +         ($self_test eq "" || $self_test =~ /\bpatterns\b/)) {
> +         $value =~ s@\.@\\\.@g;       ##Convert . to \.
> +         $value =~ s/\*/\.\*/g;       ##Convert * to .*
> +         $value =~ s/\?/\./g;         ##Convert ? to .
> +         ##if pattern is a directory and it lacks a trailing slash, add one
> +         if ((-d $value)) {
> +             $value =~ s@([^/])$@$1/@;
> +         }
> +         if (!grep(m@^$value@, @lsfiles)) {
> +             print("$x->{file}:$x->{linenr}: warning: no file 
> matches\t$x->{line}\n");
> +         }
> +
> +     ## Link reachability
> +     } elsif (($type eq "W" ||
> +               $type eq "B" && $value =~ /^https?:/) &&
> +              ($self_test eq "" || $self_test =~ /\blinks\b/)) {
> +         next if (grep(m@^\Q$value\E$@, @good_links));
> +         my $isbad = 0;
> +         if (grep(m@^\Q$value\E$@, @bad_links)) {
> +             $isbad = 1;
> +         } else {
> +             my $output = `wget --spider -q --no-check-certificate --timeout 
> 10 --tries 1 $value`;
> +             if ($? == 0) {
> +                 push(@good_links, $value);
> +             } else {
> +                 push(@bad_links, $value);
> +                 $isbad = 1;
> +             }
> +         }
> +         if ($isbad) {
> +             print("$x->{file}:$x->{linenr}: warning: possible bad 
> link\t$x->{line}\n");
> +         }
> +
> +     ## SCM reachability
> +     } elsif (($type eq "T" && $value =~ /^(?:git|quilt|hg)\s+\S/) &&
> +              ($self_test eq "" || $self_test =~ /\bscm\b/)) {
> +         next if (grep(m@^\Q$value\E$@, @good_links));
> +         my $isbad = 0;
> +         if (grep(m@^\Q$value\E$@, @bad_links)) {
> +             $isbad = 1;
> +         } else {
> +             if ($value =~ /^git\s+(\S+)/) {
> +                 my $url = $1;
> +                 my $output = `git ls-remote --exit-code -h "$url" > 
> /dev/null 2>&1`;
> +                 if ($? == 0) {
> +                     push(@good_links, $value);
> +                 } else {
> +                     push(@bad_links, $value);
> +                     $isbad = 1;
> +                 }
> +             } elsif ($value =~ /^(?:quilt|hg)\s+(https?:\S+)/) {
> +                 my $url = $1;
> +                 my $output = `wget --spider -q --no-check-certificate 
> --timeout 10 --tries 1 $url`;
> +                 if ($? == 0) {
> +                     push(@good_links, $value);
> +                 } else {
> +                     push(@bad_links, $value);
> +                     $isbad = 1;
> +                 }
> +             }
> +             if ($isbad) {
> +                 print("$x->{file}:$x->{linenr}: warning: possible bad 
> link\t$x->{line}\n");
> +             }
> +         }
>      }
>  }
>  
> -- 
> 2.15.0
> 

Changed SCM portion to this, which picks up a few more warnings...
Checks git branch on remote if specified.
Perhaps a $ismalformed category or some other way to deal with malformed 
entries?  Or just
move up to first check of SCM?

         ## SCM reachability
         } elsif (($type eq "T") && ($self_test eq "" || $self_test =~ 
/\bscm\b/)) {
             next if (grep(m@^\Q$value\E$@, @good_links));
             my $isbad = 0;
             if (grep(m@^\Q$value\E$@, @bad_links)) {
                 $isbad = 1;
             } else {
                 if ($value !~ /^(?:git|quilt|hg)\s+\S/) {
                     print("$x->{file}:$x->{linenr}: warning: malformed 
entry\t$x->{line}\n");
                 } elsif ($value =~ /^git\s+(\S+)(\s+([^\(]+\S+))?/) {
                     my $url = $1;
                     my $branch = "";
                     $branch = $3 if $3;
                     my $output = `git ls-remote --exit-code -h "$url" $branch> 
/dev/null 2>&1`;
                     if ($? == 0) {
                         push(@good_links, $value);
                     } else {
                         push(@bad_links, $value);
                         $isbad = 1;
                     }
                 } elsif ($value =~ /^(?:quilt|hg)\s+(https?:\S+)/) {
                     my $url = $1;
                     my $output = `wget --spider -q --no-check-certificate 
--timeout 10 --tries 1 $url`;
                     if ($? == 0) {
                         push(@good_links, $value);
                     } else {
                         push(@bad_links, $value);
                         $isbad = 1;
                     }
                 }
             }
             if ($isbad) {
                 print("$x->{file}:$x->{linenr}: warning: possible bad 
link\t$x->{line}\n");
             }
         }
     }
  }

Reply via email to