Hi maintainers,

Just a kindly ping for review. :-)

Thanks,
Zhao

On Fri, Dec 15, 2023 at 06:34:48PM +0800, Zhao Liu wrote:
> Date: Fri, 15 Dec 2023 18:34:48 +0800
> From: Zhao Liu <zhao1....@linux.intel.com>
> Subject: [PATCH v2] scripts/checkpatch: Support codespell checking
> X-Mailer: git-send-email 2.34.1
> 
> From: Zhao Liu <zhao1....@intel.com>
> 
> Add two spelling check options (--codespell and --codespellfile) to
> enhance spelling check through dictionary, which copied the Linux
> kernel's implementation in checkpatch.pl.
> 
> This check uses the dictionary at "/usr/share/codespell/dictionary.txt"
> by default, if there is no dictionary specified under this path, it
> will look for the dictionary of python3's codespell (This requires user
> to add python3's path in environment variable $PATH, and to install
> codespell by "pip install codespell").
> 
> Tested-by: Yongwei Ma <yongwei...@intel.com>
> Signed-off-by: Zhao Liu <zhao1....@intel.com>
> ---
> Changes since v1:
> * Drop the default dictionary "selling.text" and just support optional
>   spelling check via --codespell and --codespellfile. (Thomas)
> 
> v1: 
> https://lore.kernel.org/qemu-devel/20231204082917.2430223-1-zhao1....@linux.intel.com/
> 
> (CC more maintainers who are suggested by get_maintainer.pl in v2.)
> ---
>  scripts/checkpatch.pl | 125 +++++++++++++++++++++++++++++++++++-------
>  1 file changed, 105 insertions(+), 20 deletions(-)
> 
> diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
> index 6e4100d2a41c..45a5c66e3eab 100755
> --- a/scripts/checkpatch.pl
> +++ b/scripts/checkpatch.pl
> @@ -35,6 +35,9 @@ my $summary_file = 0;
>  my $root;
>  my %debug;
>  my $help = 0;
> +my $codespell = 0;
> +my $codespellfile = "/usr/share/codespell/dictionary.txt";
> +my $user_codespellfile = "";
>  
>  sub help {
>       my ($exitcode) = @_;
> @@ -66,6 +69,9 @@ Options:
>                               is all off)
>    --test-only=WORD           report only warnings/errors containing WORD
>                               literally
> +  --codespell                Use the codespell dictionary for spelling/typos
> +                             (default:$codespellfile)
> +  --codespellfile            Use this codespell dictionary
>    --color[=WHEN]             Use colors 'always', 'never', or only when 
> output
>                               is a terminal ('auto'). Default is 'auto'.
>    -h, --help, --version      display this help and exit
> @@ -85,28 +91,50 @@ foreach (@ARGV) {
>  }
>  
>  GetOptions(
> -     'q|quiet+'      => \$quiet,
> -     'tree!'         => \$tree,
> -     'signoff!'      => \$chk_signoff,
> -     'patch!'        => \$chk_patch,
> -     'branch!'       => \$chk_branch,
> -     'emacs!'        => \$emacs,
> -     'terse!'        => \$terse,
> -     'f|file!'       => \$file,
> -     'strict!'       => \$no_warnings,
> -     'root=s'        => \$root,
> -     'summary!'      => \$summary,
> -     'mailback!'     => \$mailback,
> -     'summary-file!' => \$summary_file,
> -
> -     'debug=s'       => \%debug,
> -     'test-only=s'   => \$tst_only,
> -     'color=s'       => \$color,
> -     'no-color'      => sub { $color = 'never'; },
> -     'h|help'        => \$help,
> -     'version'       => \$help
> +     'q|quiet+'              => \$quiet,
> +     'tree!'                 => \$tree,
> +     'signoff!'              => \$chk_signoff,
> +     'patch!'                => \$chk_patch,
> +     'branch!'               => \$chk_branch,
> +     'emacs!'                => \$emacs,
> +     'terse!'                => \$terse,
> +     'f|file!'               => \$file,
> +     'strict!'               => \$no_warnings,
> +     'root=s'                => \$root,
> +     'summary!'              => \$summary,
> +     'mailback!'             => \$mailback,
> +     'summary-file!'         => \$summary_file,
> +     'debug=s'               => \%debug,
> +     'test-only=s'           => \$tst_only,
> +     'codespell!'            => \$codespell,
> +     'codespellfile=s'       => \$user_codespellfile,
> +     'color=s'               => \$color,
> +     'no-color'              => sub { $color = 'never'; },
> +     'h|help'                => \$help,
> +     'version'               => \$help
>  ) or help(1);
>  
> +if ($user_codespellfile) {
> +     # Use the user provided codespell file unconditionally
> +     $codespellfile = $user_codespellfile;
> +} elsif (!(-f $codespellfile)) {
> +     # If /usr/share/codespell/dictionary.txt is not present, try to find it
> +     # under codespell's install directory: 
> <codespell_root>/data/dictionary.txt
> +     if (($codespell || $help) && which("python3") ne "") {
> +             my $python_codespell_dict = << "EOF";
> +
> +import os.path as op
> +import codespell_lib
> +codespell_dir = op.dirname(codespell_lib.__file__)
> +codespell_file = op.join(codespell_dir, 'data', 'dictionary.txt')
> +print(codespell_file, end='')
> +EOF
> +
> +             my $codespell_dict = `python3 -c "$python_codespell_dict" 2> 
> /dev/null`;
> +             $codespellfile = $codespell_dict if (-f $codespell_dict);
> +     }
> +}
> +
>  help(0) if ($help);
>  
>  my $exit = 0;
> @@ -337,6 +365,36 @@ our @typeList = (
>       qr{guintptr},
>  );
>  
> +# Load common spelling mistakes and build regular expression list.
> +my $misspellings;
> +my %spelling_fix;
> +
> +if ($codespell) {
> +     if (open(my $spelling, '<', $codespellfile)) {
> +             while (<$spelling>) {
> +                     my $line = $_;
> +
> +                     $line =~ s/\s*\n?$//g;
> +                     $line =~ s/^\s*//g;
> +
> +                     next if ($line =~ m/^\s*#/);
> +                     next if ($line =~ m/^\s*$/);
> +                     next if ($line =~ m/, disabled/i);
> +
> +                     $line =~ s/,.*$//;
> +
> +                     my ($suspect, $fix) = split(/->/, $line);
> +
> +                     $spelling_fix{$suspect} = $fix;
> +             }
> +             close($spelling);
> +     } else {
> +             warn "No codespell typos will be found - file '$codespellfile': 
> $!\n";
> +     }
> +}
> +
> +$misspellings = join("|", sort keys %spelling_fix) if keys %spelling_fix;
> +
>  # This can be modified by sub possible.  Since it can be empty, be careful
>  # about regexes that always match, because they can cause infinite loops.
>  our @modifierList = (
> @@ -477,6 +535,18 @@ sub top_of_kernel_tree {
>       return 1;
>  }
>  
> +sub which {
> +     my ($bin) = @_;
> +
> +     foreach my $path (split(/:/, $ENV{PATH})) {
> +             if (-e "$path/$bin") {
> +                     return "$path/$bin";
> +             }
> +     }
> +
> +     return "";
> +}
> +
>  sub expand_tabs {
>       my ($str) = @_;
>  
> @@ -1585,6 +1655,21 @@ sub process {
>                       WARN("8-bit UTF-8 used in possible commit log\n" . 
> $herecurr);
>               }
>  
> +# Check for various typo / spelling mistakes
> +             if (defined($misspellings) &&
> +                 ($in_commit_log || $line =~ /^(?:\+|Subject:)/i)) {
> +                     while ($rawline =~ 
> /(?:^|[^\w\-'`])($misspellings)(?:[^\w\-'`]|$)/gi) {
> +                             my $typo = $1;
> +                             my $blank = copy_spacing($rawline);
> +                             my $ptr = substr($blank, 0, $-[1]) . "^" x 
> length($typo);
> +                             my $hereptr = "$hereline$ptr\n";
> +                             my $typo_fix = $spelling_fix{lc($typo)};
> +                             $typo_fix = ucfirst($typo_fix) if ($typo =~ 
> /^[A-Z]/);
> +                             $typo_fix = uc($typo_fix) if ($typo =~ 
> /^[A-Z]+$/);
> +                             WARN("'$typo' may be misspelled - perhaps 
> '$typo_fix'?\n" . $hereptr);
> +                     }
> +             }
> +
>  # ignore non-hunk lines and lines being removed
>               next if (!$hunk_line || $line =~ /^-/);
>  
> -- 
> 2.34.1
> 
> 

Reply via email to