scripts/regression-hotspots.py |   23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

New commits:
commit a0176ee710a845567fff9c61608bd2393160c895
Author:     Ilmari Lauhakangas <ilmari.lauhakan...@libreoffice.org>
AuthorDate: Thu Apr 11 22:50:48 2024 +0300
Commit:     Ilmari Lauhakangas <ilmari.lauhakan...@libreoffice.org>
CommitDate: Thu Apr 11 21:59:57 2024 +0200

    regression-hotspots: optimise by running git log only once instead of
    
    eight thousand times. Takes running time from 10 hours to 30 seconds for me.
    Also more accurate results as previously the bug tracker ID in the commit
    message was accepted even when only appearing after the first line.
    
    Change-Id: I75f77eb0e3f5c884f35d639608752225f5085c4c
    Reviewed-on: https://gerrit.libreoffice.org/c/dev-tools/+/166014
    Tested-by: Ilmari Lauhakangas <ilmari.lauhakan...@libreoffice.org>
    Reviewed-by: Ilmari Lauhakangas <ilmari.lauhakan...@libreoffice.org>

diff --git a/scripts/regression-hotspots.py b/scripts/regression-hotspots.py
index 6ea80ba6..8a4fb76d 100755
--- a/scripts/regression-hotspots.py
+++ b/scripts/regression-hotspots.py
@@ -8,6 +8,7 @@
 #
 # Uses https://github.com/gitpython-developers/GitPython
 # Results published in 
https://wiki.documentfoundation.org/Development/RegressionHotspots
+# Run in LibreOffice core directory. Shouldn't take more than a minute.
 
 import sys
 import re
@@ -18,7 +19,7 @@ from urllib.request import urlopen, URLError
 from io import BytesIO
 
 def get_fixed_regression_bugs():
-    url = 
'https://bugs.documentfoundation.org/buglist.cgi?bug_status=UNCONFIRMED&bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&bug_status=RESOLVED&bug_status=VERIFIED&bug_status=CLOSED&bug_status=NEEDINFO&bug_status=PLEASETEST&columnlist=&keywords=regression%2C%20&keywords_type=allwords&limit=0&list_id=354018&product=LibreOffice&query_format=advanced&resolution=FIXED&ctype=csv&human=0'
+    url = 
'https://bugs.documentfoundation.org/buglist.cgi?columnlist=&keywords=regression%2C%20&keywords_type=allwords&limit=0&product=LibreOffice&resolution=FIXED&ctype=csv&human=0'
 
     ctx = ssl.create_default_context()
     ctx.check_hostname = False
@@ -57,9 +58,23 @@ if __name__ == '__main__':
     fixed_regression_ids = get_fixed_regression_bugs()
     sys.stderr.write('found %d fixed regressions: %s
' % (len(fixed_regression_ids), fixed_regression_ids))
 
-    for bug_id in fixed_regression_ids:
-        sys.stderr.write('working on bug %d
' % bug_id)
-        lognames = git.Git('.').execute(['git', 'log', 
'--grep=[fdo|tdf]#'+str(bug_id), '--pretty=tformat:', '--name-only'])
+    # build a dictionary of hashes and bug IDs from all commits targeting a 
report in FDO/TDF Bugzilla
+    gitbugs = {}
+    buglog = git.Git('.').execute(['git', 'log', '--grep=(fdo|tdf)#', '-E', 
'--oneline', '--since=1.10.2010'])
+    if buglog:
+        for line in buglog.split('
'):
+            githash = line.partition(' ')[0]
+            # the regex search will ignore any commits hit by the grep where 
fdo|tdf# occurred below
+            # the first line - this is desirable as the referred bug ID should 
appear in the subject line
+            bugid = re.search(r"(?:fdo|tdf)#([0-9]+)", line)
+            if bugid:
+                gitbugs[githash] = int(bugid.group(1))
+
+    # filter by the bug IDs we got from the Bugzilla query
+    regression_hashes = [key for key, value in gitbugs.items() if value in 
fixed_regression_ids]
+
+    for githash in regression_hashes:
+        lognames = git.Git('.').execute(['git', 'show', githash, 
'--pretty=tformat:', '--name-only'])
         if lognames:
             for filename in lognames.split('
'):
                 if not excluderegex.search(filename):

Reply via email to