Added two switches to bin/find-german-comments The first is --line-numbers (-l), which outputs a filename only once, followed by (mostly) neat formatting of the line numbers with flagged comments. The second is --threshold (t), which suppresses any output for comments that have less than [t] flagged comments. This should help with false positives, since it seems that files with only 1 or 2 "German" comments are usually being flagged for hexadecimal code or something else entirely.
therefore: ../bin/find-german-comments -l -t1 > german.txt will create a text file populated only with filenames and line numbers for files in the current directory (and subs) that have more than one comment in them.
>From 5499422324c6cfea18e699450b5d594a6ce27e9c Mon Sep 17 00:00:00 2001 From: Tom Thorogood <t...@tomthorogood.com> Date: Tue, 13 Mar 2012 22:50:13 -0400 Subject: [PATCH] Add options to bin/find-german-comments to help weed out false positives --- bin/find-german-comments | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 files changed, 44 insertions(+), 2 deletions(-) diff --git a/bin/find-german-comments b/bin/find-german-comments index e0ce382..6400fc8 100755 --- a/bin/find-german-comments +++ b/bin/find-german-comments @@ -44,6 +44,10 @@ class Parser: help="Only print the filenames of files containing German comments") op.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, help="Turn on verbose mode (print progress to stderr)") + op.add_option("-l", "--line-numbers", action="store_true", dest="line_numbers", default=False, + help="Prints the filenames and line numbers only.") + op.add_option("-t", "--threshold", action="store", dest="THRESHOLD", default=0, + help="When used with '--line-numbers', only bothers outputting comment info if there are more than X number of flagged comments. Useful for weeding out false positives.") self.options, args = op.parse_args() try: dir = args[0] @@ -141,7 +145,45 @@ class Parser: """ checks each comment in a file """ - if not self.options.filenames_only: + def tab_calc (string): + START = 40 #Default of 10 tabs + if len(string) >= START: + return 1, 0 + diff = START - len(string) + if diff % 4 is not 0: + padding = 1 + else: + padding = 0 + return (diff/4)+padding + + if self.options.line_numbers: + TABS = "\t"*10 + path_linenums = [] + for linenum, s in self.get_comments(path): + if self.is_german(s): + path_linenums.append(linenum) + valid = len(path_linenums) > int(self.options.THRESHOLD) + sys.stderr.write("%s ... %s positives -- %s\n" % (path, str(len(path_linenums)), str(valid))) + if valid: + if len(path) + (len(path_linenums)*4) > 75: + print "%s:\n" % path + while(path_linenums): + i = 0 + numline = [] + while i < 10: + try: + numline.append(path_linenums[0]) + path_linenums.remove(path_linenums[0]) + except IndexError: + i = 10 + i+=1 + numline = [str(i) for i in numline] + print "%s%s" %(TABS, ",".join(numline)) + else: + path_linenums = [str(i) for i in path_linenums] + print "%s:%s%s" % (path,"\t"*tab_calc(path),",".join(path_linenums)) + + elif not self.options.filenames_only: for linenum, s in self.get_comments(path): if self.is_german(s): print "%s:%s: %s" % (path, linenum, s) -- 1.7.4.1
_______________________________________________ LibreOffice mailing list LibreOffice@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice