Reject all legacy 8-bit character sets and allow only ASCII or UTF-8 to
be added to files or used in patch descriptions.

Signed-off-by: David Woodhouse <[EMAIL PROTECTED]>

Signed-off-by: asd
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 277c326..7a7f283 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -395,6 +395,22 @@ sub process {
                        $clean = 0;
                }
 
+# UTF-8 regex found at 
http://www.w3.org/International/questions/qa-forms-utf-8.en.php
+               if ( ($realfile =~ /^$/ || $line =~ /^\+/) &&
+                    !($line =~ m/^(
+                               [\x09\x0A\x0D\x20-\x7E]              # ASCII
+                               | [\xC2-\xDF][\x80-\xBF]             # 
non-overlong 2-byte
+                               |  \xE0[\xA0-\xBF][\x80-\xBF]        # 
excluding overlongs
+                               | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}  # straight 
3-byte
+                               |  \xED[\x80-\x9F][\x80-\xBF]        # 
excluding surrogates
+                               |  \xF0[\x90-\xBF][\x80-\xBF]{2}     # planes 
1-3
+                               | [\xF1-\xF3][\x80-\xBF]{3}          # planes 
4-15
+                               |  \xF4[\x80-\x8F][\x80-\xBF]{2}     # plane 16
+                               )*$/x ) ) {
+                       print "Invalid UTF-8\n";
+                       print "$herecurr";
+                       $clean = 0;
+               }
 #ignore lines being removed
                if ($line=~/^-/) {next;}
 


-- 
dwmw2

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to