> which then causes the associated block to execute:
> 278                     char *rchg1 = dd1->rchg;
> 279                     long *rindex1 = dd1->rindex;
> 280
> 281                     for (; off1 < lim1; off1++)
> 282                             rchg1[rindex1[off1]] = 1;
> and then that's the end of the function!
> 308             return 0;
> 309     }

> It's completely relying on rchg to be filled with zeros before it's called :D 
> that's definitely the issue here. ... or it looks like it to me now ... 
> hopefully if i do that, then this assertion will pass and a new one will rise 
> later.

Ok! So indeed wiping rchg did make that assertion pass.

@@ -839,6 +842,9 @@ private:
     void do_diff()
     {
         auto xe = &this->xe;
+        // for now rchg is wiped as the git/xdiff algorithms assume this
+        std::memset(xe->xdf1.rchg - 1, 0, (size_t)(xe->xdf1.nrec + 2) * 
sizeof(*xe->xdf1.rchg));
+        std::memset(xe->xdf2.rchg - 1, 0, (size_t)(xe->xdf2.nrec + 2) * 
sizeof(*xe->xdf2.rchg));
         switch (XDF_DIFF_ALG(xp.flags)) {
         case XDF_PATIENCE_DIFF:
             mustbe0(xdl_do_patience_diff(&xp, xe));

The next bug was a coding mistake in one of the consume loops, I wasn't walking 
the linked list resulting in iterating the same node forever:
@@ -362,17 +362,20 @@ public:

             /* if the classifier entry hash been consumed, recover it, then 
classify the line. otherwise, classify the line, then update the line pointer 
in case old lines are consumed. */
             auto rchash_hi = (long) XDL_HASHLONG(crec->ha, cf->hbits);
-            auto * rcrec_link = &cf.consumed_rchash[(size_t)rchash_hi];
+            xdlclass_t ** prev;
             xdlclass_t * rcrec;
-            while (*rcrec_link) {
-                rcrec = *rcrec_link;
+            for (
+                prev = &cf.consumed_rchash[(size_t)rchash_hi];
+                (rcrec = *prev);
+                prev = &(*prev)->next
+            ) {
                 if (rcrec->ha == crec->ha && rcrec->size == crec->size) {
                     break;
                 };
             }
-            if (*rcrec_link) {
+            if (rcrec) {
                 assert(rcrec->next == nullptr && "hash collision in consumed 
lines; should check for existing matching entry before using consumed entry");
-                *rcrec_link = rcrec->next;
+                *prev = rcrec->next;

                 rcrec->line = crec->ptr;

That's two bugs fixed!

The next bug is a super-fun one: _a memory error_. I have so many memory checks 
going on for this code. Here we have it:

$ ./diff_xdiff                                                                  
                                                                                
                                                                                
                                                                                
  [28/1799]
diff_main: Null case.
WINDOW RESIZE0=>2
consuming rec ptr 0x7f2942e00a00
consuming rec ptr 0x502000000250
WINDOW RESIZE2=>4
consuming rec ptr 0x7f2942e00a02
consuming rec ptr 0x502000000312
WINDOW RESIZE4=>8
consuming rec ptr 0x7f2942e00a04
consuming rec ptr 0x502000000334
diff_main: Equality.
WINDOW RESIZE0=>2
WINDOW RESIZE2=>4
WINDOW RESIZE4=>8
consuming rec ptr 0x7f2942e01200
consuming rec ptr 0x502000000550
consuming rec ptr 0x7f2942e01202
consuming rec ptr 0x502000000552
WINDOW RESIZE8=>16
consuming rec ptr 0x7f2942e01204
=================================================================
==15016==ERROR: AddressSanitizer: heap-use-after-free on address 0x5070000002f8 
at pc 0x55d895dbd0a9 bp 0x7ffeba337450 sp 0x7ffeba337448
READ of size 8 at 0x5070000002f8 thread T0
    #0 0x55d895dbd0a8 in 
AsymmetricStreamingXDiff::assert_no_dangling_pointers() 
/home/karl3/projects/zinc/src/diff_xdiff.cpp:765
    #1 0x55d895dbf402 in 
AsymmetricStreamingXDiff::extend_env(std::basic_string_view<char, 
std::char_traits<char> >) /home/karl3/projects/zinc/src/diff_xdiff.cpp:818
    #2 0x55d895d9667c in diff /home/karl3/projects/zinc/src/diff_xdiff.cpp:695
    #3 0x55d895dec067 in 
std::__n4861::coroutine_handle<zinc::__generator_promise_base<Diff, 
zinc::generator<Diff, Diff, zinc::use_allocator_arg> > >::resume() const 
/usr/include/c++/12/coroutine:244
    #4 0x55d895dddad3 in zinc::__generator_promise_base<Diff, 
zinc::generator<Diff, Diff, zinc::use_allocator_arg> >::resume() 
../include/zinc/__generator.hpp:548
    #5 0x55d895dcf32d in zinc::generator<Diff, Diff, 
zinc::use_allocator_arg>::iterator::operator++() 
../include/zinc/__generator.hpp:822
    #6 0x55d895d9b7d3 in diff_main 
/home/karl3/projects/zinc/src/diff_xdiff.cpp:1029
    #7 0x55d895d9dbc8 in test_diff_xdiff() 
/home/karl3/projects/zinc/src/diff_xdiff.cpp:1074
    #8 0x55d895da4928 in main /home/karl3/projects/zinc/src/diff_xdiff.cpp:1108
    #9 0x7f2944a33d67 in __libc_start_call_main 
../sysdeps/nptl/libc_start_call_main.h:58
    #10 0x7f2944a33e24 in __libc_start_main_impl ../csu/libc-start.c:360
    #11 0x55d895d8c640 in _start 
(/home/karl3/projects/zinc/src/diff_xdiff+0xb4640) (BuildId: 
1dcce094fc42295839b19dfd2bae3a3a7afd2442)

0x5070000002f8 is located 56 bytes inside of 80-byte region 
[0x5070000002c0,0x507000000310)
freed by thread T0 here:
    #0 0x7f29458f3918 in free 
../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:52
    #1 0x55d895db5cb7 in DynamicXDFile::cha_consume_(long) 
/home/karl3/projects/zinc/src/diff_xdiff.cpp:554
    #2 0x55d895db331e in DynamicXDFile::consume(long) 
/home/karl3/projects/zinc/src/diff_xdiff.cpp:463
    #3 0x55d895d96edf in diff /home/karl3/projects/zinc/src/diff_xdiff.cpp:701
    #4 0x55d895dec067 in 
std::__n4861::coroutine_handle<zinc::__generator_promise_base<Diff, 
zinc::generator<Diff, Diff, zinc::use_allocator_arg> > >::resume() const 
/usr/include/c++/12/coroutine:244
    #5 0x55d895dddad3 in zinc::__generator_promise_base<Diff, 
zinc::generator<Diff, Diff, zinc::use_allocator_arg> >::resume() 
../include/zinc/__generator.hpp:548
    #6 0x55d895dcf32d in zinc::generator<Diff, Diff, 
zinc::use_allocator_arg>::iterator::operator++() 
../include/zinc/__generator.hpp:822
    #7 0x55d895d9b7d3 in diff_main 
/home/karl3/projects/zinc/src/diff_xdiff.cpp:1029
    #8 0x55d895d9dbc8 in test_diff_xdiff() 
/home/karl3/projects/zinc/src/diff_xdiff.cpp:1074
    #9 0x55d895da4928 in main /home/karl3/projects/zinc/src/diff_xdiff.cpp:1108
    #10 0x7f2944a33d67 in __libc_start_call_main 
../sysdeps/nptl/libc_start_call_main.h:58

previously allocated by thread T0 here:
    #0 0x7f29458f4c77 in malloc 
../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:69
    #1 0x55d895e12542 in xdl_cha_alloc 
/home/karl3/projects/zinc/third_party/xdiff/xutils.c:101
    #2 0x55d895dae6d4 in DynamicXDFile::extend_pre(s_xpparam*, long, 
std::basic_string_view<char, std::char_traits<char> >) 
/home/karl3/projects/zinc/src/diff_xdiff.cpp:357
    #3 0x55d895db947b in 
AsymmetricStreamingXDiff::AsymmetricStreamingXDiff(std::basic_string_view<char, 
std::char_traits<char> >, long, unsigned long) 
/home/karl3/projects/zinc/src/diff_xdiff.cpp:668
    #4 0x55d895d9ab24 in diff_main 
/home/karl3/projects/zinc/src/diff_xdiff.cpp:1018
    #5 0x55d895d9dbc8 in test_diff_xdiff() 
/home/karl3/projects/zinc/src/diff_xdiff.cpp:1074
    #6 0x55d895da4928 in main /home/karl3/projects/zinc/src/diff_xdiff.cpp:1108
    #7 0x7f2944a33d67 in __libc_start_call_main 
../sysdeps/nptl/libc_start_call_main.h:58

SUMMARY: AddressSanitizer: heap-use-after-free 
/home/karl3/projects/zinc/src/diff_xdiff.cpp:765 in 
AsymmetricStreamingXDiff::assert_no_dangling_pointers()

One of the fun things about these memory errors (now in these modern times i 
seem to have stumbled on when they actually get reported) is that there are 
three separate stack traces: one for violation, one for deallocation, and one 
for allocation.

Reply via email to