Added two switches to bin/find-german-comments
The first is --line-numbers (-l), which outputs a filename only once,
followed by (mostly) neat formatting of the line numbers with flagged
comments. The second is --threshold (t), which suppresses any output for
comments that have less than [t] flagged comments. This should help with
false positives, since it seems that files with only 1 or 2 "German"
comments are usually being flagged for hexadecimal code or something
else entirely.
therefore: ../bin/find-german-comments -l -t1 > german.txt
will create a text file populated only with filenames and line numbers
for files in the current directory (and subs) that have more than one
comment in them.
From 5499422324c6cfea18e699450b5d594a6ce27e9c Mon Sep 17 00:00:00 2001
From: Tom Thorogood <tom@tomthorogood.com>
Date: Tue, 13 Mar 2012 22:50:13 -0400
Subject: [PATCH] Add options to bin/find-german-comments to help weed out false positives
---
bin/find-german-comments | 46 ++++++++++++++++++++++++++++++++++++++++++++--
1 files changed, 44 insertions(+), 2 deletions(-)
diff --git a/bin/find-german-comments b/bin/find-german-comments
index e0ce382..6400fc8 100755
--- a/bin/find-german-comments
+++ b/bin/find-german-comments
@@ -44,6 +44,10 @@ class Parser:
help="Only print the filenames of files containing German comments")
op.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False,
help="Turn on verbose mode (print progress to stderr)")
+ op.add_option("-l", "--line-numbers", action="store_true", dest="line_numbers",
default=False,
+ help="Prints the filenames and line numbers only.")
+ op.add_option("-t", "--threshold", action="store", dest="THRESHOLD", default=0,
+ help="When used with '--line-numbers', only bothers outputting comment info if there
are more than X number of flagged comments. Useful for weeding out false positives.")
self.options, args = op.parse_args()
try:
dir = args[0]
@@ -141,7 +145,45 @@ class Parser:
"""
checks each comment in a file
"""
- if not self.options.filenames_only:
+ def tab_calc (string):
+ START = 40 #Default of 10 tabs
+ if len(string) >= START:
+ return 1, 0
+ diff = START - len(string)
+ if diff % 4 is not 0:
+ padding = 1
+ else:
+ padding = 0
+ return (diff/4)+padding
+
+ if self.options.line_numbers:
+ TABS = "\t"*10
+ path_linenums = []
+ for linenum, s in self.get_comments(path):
+ if self.is_german(s):
+ path_linenums.append(linenum)
+ valid = len(path_linenums) > int(self.options.THRESHOLD)
+ sys.stderr.write("%s ... %s positives -- %s\n" % (path, str(len(path_linenums)),
str(valid)))
+ if valid:
+ if len(path) + (len(path_linenums)*4) > 75:
+ print "%s:\n" % path
+ while(path_linenums):
+ i = 0
+ numline = []
+ while i < 10:
+ try:
+ numline.append(path_linenums[0])
+ path_linenums.remove(path_linenums[0])
+ except IndexError:
+ i = 10
+ i+=1
+ numline = [str(i) for i in numline]
+ print "%s%s" %(TABS, ",".join(numline))
+ else:
+ path_linenums = [str(i) for i in path_linenums]
+ print "%s:%s%s" % (path,"\t"*tab_calc(path),",".join(path_linenums))
+
+ elif not self.options.filenames_only:
for linenum, s in self.get_comments(path):
if self.is_german(s):
print "%s:%s: %s" % (path, linenum, s)
--
1.7.4.1
Context
- [PATCH] Add Switches to find-german-comments to aid in weeding false positives · Tom Thorogood
Privacy Policy |
Impressum (Legal Info) |
Copyright information: Unless otherwise specified, all text and images
on this website are licensed under the
Creative Commons Attribution-Share Alike 3.0 License.
This does not include the source code of LibreOffice, which is
licensed under the Mozilla Public License (
MPLv2).
"LibreOffice" and "The Document Foundation" are
registered trademarks of their corresponding registered owners or are
in actual use as trademarks in one or more countries. Their respective
logos and icons are also subject to international copyright laws. Use
thereof is explained in our
trademark policy.