Added two switches to bin/find-german-comments
The first is --line-numbers (-l), which outputs a filename only once,
followed by (mostly) neat formatting of the line numbers with flagged
comments. The second is --threshold (t), which suppresses any output for
comments that have less than [t] flagged comments. This should help with
false positives, since it seems that files with only 1 or 2 "German"
comments are usually being flagged for hexadecimal code or something
else entirely.

therefore: ../bin/find-german-comments -l -t1 > german.txt 
will create a text file populated only with filenames and line numbers
for files in the current directory (and subs) that have more than one
comment in them.
>From 5499422324c6cfea18e699450b5d594a6ce27e9c Mon Sep 17 00:00:00 2001
From: Tom Thorogood <t...@tomthorogood.com>
Date: Tue, 13 Mar 2012 22:50:13 -0400
Subject: [PATCH] Add options to bin/find-german-comments to help weed out false positives

---
 bin/find-german-comments |   46 ++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/bin/find-german-comments b/bin/find-german-comments
index e0ce382..6400fc8 100755
--- a/bin/find-german-comments
+++ b/bin/find-german-comments
@@ -44,6 +44,10 @@ class Parser:
             help="Only print the filenames of files containing German comments")
         op.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False,
             help="Turn on verbose mode (print progress to stderr)")
+        op.add_option("-l", "--line-numbers", action="store_true", dest="line_numbers", default=False,
+            help="Prints the filenames and line numbers only.")
+        op.add_option("-t", "--threshold", action="store", dest="THRESHOLD", default=0,
+            help="When used with '--line-numbers', only bothers outputting comment info if there are more than X number of flagged comments. Useful for weeding out false positives.")
         self.options, args = op.parse_args()
         try:
             dir = args[0]
@@ -141,7 +145,45 @@ class Parser:
         """
         checks each comment in a file
         """
-        if not self.options.filenames_only:
+        def tab_calc (string):
+            START = 40 #Default of 10 tabs
+            if len(string) >= START:
+                return 1, 0
+            diff = START - len(string)
+            if diff % 4 is not 0:
+                padding = 1
+            else:
+                padding = 0
+            return (diff/4)+padding
+
+        if self.options.line_numbers:
+            TABS = "\t"*10
+            path_linenums = []
+            for linenum, s in self.get_comments(path):
+                if self.is_german(s):
+                    path_linenums.append(linenum)
+            valid = len(path_linenums) > int(self.options.THRESHOLD)
+            sys.stderr.write("%s ... %s positives -- %s\n" % (path, str(len(path_linenums)), str(valid)))
+            if valid:
+                if len(path) + (len(path_linenums)*4) > 75:
+                    print "%s:\n" % path
+                    while(path_linenums):
+                        i = 0
+                        numline = []
+                        while i < 10:
+                            try:
+                                numline.append(path_linenums[0])
+                                path_linenums.remove(path_linenums[0])
+                            except IndexError:
+                                i = 10
+                            i+=1
+                        numline = [str(i) for i in numline]
+                        print "%s%s" %(TABS, ",".join(numline))
+                else:
+                    path_linenums = [str(i) for i in path_linenums]
+                    print "%s:%s%s" % (path,"\t"*tab_calc(path),",".join(path_linenums))
+
+        elif not self.options.filenames_only: 
             for linenum, s in self.get_comments(path):
                 if self.is_german(s):
                     print "%s:%s: %s" % (path, linenum, s)
-- 
1.7.4.1

_______________________________________________
LibreOffice mailing list
LibreOffice@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/libreoffice

Reply via email to