Hello all,

after Gitk brought my shabby development machine (Core2Duo, 4 GB RAM, Ubuntu 
16.10, no swap to save the SSD) to its knees once more than I'm comfortable 
with, I decided to investigate this issue.

Result of this investigation is, my Git repo has a commit with a diff of some 
365'000 lines and Gitk tries to display all of them, consuming more than 1.5 GB 
of memory.

The solution is to cut off diffs at 50'000 lines for the display. This consumes 
about 350 MB RAM, still a lot. These first 50'000 lines are shown, followed by 
a copyable message on how to view the full diff on the command line. Diffs 
shorter than this limit are displayed as before.

To test the waters whether such a change is welcome, here's the patch as I 
currently use it. If this patch makes sense I'll happily apply change requests 
and bring it more in line with Git's patch submission expectations. The patch 
is made against git(k) version 2.9.3, the one coming with latest Ubuntu. Please 
also note that this is the first time I wrote some Tcl code, so the strategy 
used might not follow best Tcl practices.

$ diff -uw /usr/bin/gitk.org /usr/bin/gitk
--- /usr/bin/gitk.org   2016-08-16 22:32:47.000000000 +0200
+++ /usr/bin/gitk       2016-11-04 20:06:14.805920404 +0100
@@ -7,6 +7,15 @@
 # and distributed under the terms of the GNU General Public Licence,
 # either version 2, or (at your option) any later version.
 
+# Markus: trying to limit memory consumption. It happened that
+#         complex commits led to more than 1.5 GB of memory usage.
+#
+# The problem was identified to be caused by extremely long diffs. The
+# commit leading to this research had some 365'000 lines of diff, consuming
+# these 1.5 GB when drawn into the canvas. The solution is to limit diffs to
+# 50'000 lines and skipping the rest. In case of a cutoff, a CLI command for
+# getting the full diff is shown.
+
 package require Tk
 
 proc hasworktree {} {
@@ -7956,6 +7965,7 @@
 
 proc getblobdiffs {ids} {
     global blobdifffd diffids env
+    global parseddifflines
     global treediffs
     global diffcontext
     global ignorespace
@@ -7987,6 +7997,7 @@
     }
     fconfigure $bdf -blocking 0 -encoding binary -eofchar {}
     set blobdifffd($ids) $bdf
+    set parseddifflines 0
     initblobdiffvars
     filerun $bdf [list getblobdiffline $bdf $diffids]
 }
@@ -8063,20 +8074,34 @@
 
 proc getblobdiffline {bdf ids} {
     global diffids blobdifffd
+    global parseddifflines
     global ctext
 
     set nr 0
+    set maxlines 50000
     $ctext conf -state normal
     while {[incr nr] <= 1000 && [gets $bdf line] >= 0} {
+        incr parseddifflines
+        if {$parseddifflines >= $maxlines} {
+            break
+        }
        if {$ids != $diffids || $bdf != $blobdifffd($ids)} {
            catch {close $bdf}
            return 0
        }
        parseblobdiffline $ids $line
     }
+    if {$parseddifflines >= $maxlines} {
+        $ctext insert end "\n------------------" hunksep
+        $ctext insert end " Lines exceeding $maxlines skipped " hunksep
+        $ctext insert end "------------------\n\n" hunksep
+        $ctext insert end "To get a full diff, run\n\n" hunksep
+        $ctext insert end "  git diff-tree -p -C --cc $ids\n\n" hunksep
+        $ctext insert end "on the command line.\n" hunksep
+    }
     $ctext conf -state disabled
     blobdiffmaybeseehere [eof $bdf]
-    if {[eof $bdf]} {
+    if {[eof $bdf] || $parseddifflines >= $maxlines} {
        catch {close $bdf}
        return 0
     }
@@ -9093,6 +9118,7 @@
 
 proc diffcommits {a b} {
     global diffcontext diffids blobdifffd diffinhdr currdiffsubmod
+    global parseddifflines
 
     set tmpdir [gitknewtmpdir]
     set fna [file join $tmpdir "commit-[string range $a 0 7]"]
@@ -9114,6 +9140,7 @@
     set blobdifffd($diffids) $fd
     set diffinhdr 0
     set currdiffsubmod ""
+    set parseddifflines 0
     filerun $fd [list getblobdiffline $fd $diffids]
 }
 

Cheers,
Markus

-- 
- - - - - - - - - - - - - - - - - - -
Dipl. Ing. (FH) Markus Hitter
http://www.jump-ing.de/

Reply via email to