When running a command like "git show" or "git diff" in a partial clone,
batch all missing blobs to be fetched as one request.

This is similar to c0c578b33c ("unpack-trees: batch fetching of missing
blobs", 2017-12-08), but for another command.

Signed-off-by: Jonathan Tan <jonathanta...@google.com>
---
Here's an improvement for those having partial clones.

I couldn't find a good place to place the test (a place that checks how
diff interfaces with the object store would be ideal), so I created a
new one. Let me know if there's a better place to put it.
---
 diff.c                        | 27 +++++++++++++
 t/t4067-diff-partial-clone.sh | 76 +++++++++++++++++++++++++++++++++++
 2 files changed, 103 insertions(+)
 create mode 100755 t/t4067-diff-partial-clone.sh

diff --git a/diff.c b/diff.c
index ec5c095199..0e08d05b14 100644
--- a/diff.c
+++ b/diff.c
@@ -25,6 +25,7 @@
 #include "packfile.h"
 #include "parse-options.h"
 #include "help.h"
+#include "fetch-object.h"
 
 #ifdef NO_FAST_WORKING_DIRECTORY
 #define FAST_WORKING_DIRECTORY 0
@@ -6067,6 +6068,32 @@ static void diff_flush_patch_all_file_pairs(struct 
diff_options *o)
        if (o->color_moved)
                o->emitted_symbols = &esm;
 
+       if (repository_format_partial_clone) {
+               /*
+                * Prefetch the diff pairs that are about to be flushed.
+                */
+               struct oid_array to_fetch = OID_ARRAY_INIT;
+               int fetch_if_missing_store = fetch_if_missing;
+
+               fetch_if_missing = 0;
+               for (i = 0; i < q->nr; i++) {
+                       struct diff_filepair *p = q->queue[i];
+                       if (!check_pair_status(p))
+                               continue;
+                       if (p->one && p->one->oid_valid &&
+                           !has_object_file(&p->one->oid))
+                               oid_array_append(&to_fetch, &p->one->oid);
+                       if (p->two && p->two->oid_valid &&
+                           !has_object_file(&p->two->oid))
+                               oid_array_append(&to_fetch, &p->two->oid);
+               }
+               if (to_fetch.nr)
+                       fetch_objects(repository_format_partial_clone,
+                                     to_fetch.oid, to_fetch.nr);
+               fetch_if_missing = fetch_if_missing_store;
+               oid_array_clear(&to_fetch);
+       }
+
        for (i = 0; i < q->nr; i++) {
                struct diff_filepair *p = q->queue[i];
                if (check_pair_status(p))
diff --git a/t/t4067-diff-partial-clone.sh b/t/t4067-diff-partial-clone.sh
new file mode 100755
index 0000000000..38f03be114
--- /dev/null
+++ b/t/t4067-diff-partial-clone.sh
@@ -0,0 +1,76 @@
+#!/bin/sh
+
+test_description='behavior of diff when reading objects in a partial clone'
+
+. ./test-lib.sh
+
+test_expect_success 'git show batches blobs' '
+       test_create_repo server &&
+       echo a >server/a &&
+       echo b >server/b &&
+       git -C server add a b &&
+       git -C server commit -m x &&
+
+       test_config -C server uploadpack.allowfilter 1 &&
+       test_config -C server uploadpack.allowanysha1inwant 1 &&
+       git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
+
+       # Ensure that there is exactly 1 negotiation by checking that there is
+       # only 1 "done" line sent. ("done" marks the end of negotiation.)
+       GIT_TRACE_PACKET="$(pwd)/trace" git -C client show HEAD &&
+       grep "git> done" trace >done_lines &&
+       test_line_count = 1 done_lines
+'
+
+test_expect_success 'diff batches blobs' '
+       rm -rf server client trace &&
+
+       test_create_repo server &&
+       echo a >server/a &&
+       echo b >server/b &&
+       git -C server add a b &&
+       git -C server commit -m x &&
+       echo c >server/c &&
+       echo d >server/d &&
+       git -C server add c d &&
+       git -C server commit -m x &&
+
+       test_config -C server uploadpack.allowfilter 1 &&
+       test_config -C server uploadpack.allowanysha1inwant 1 &&
+       git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
+
+       # Ensure that there is exactly 1 negotiation by checking that there is
+       # only 1 "done" line sent. ("done" marks the end of negotiation.)
+       GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
+       grep "git> done" trace >done_lines &&
+       test_line_count = 1 done_lines
+'
+
+test_expect_success 'diff skips same-OID blobs' '
+       rm -rf server client trace &&
+
+       test_create_repo server &&
+       echo a >server/a &&
+       echo b >server/b &&
+       git -C server add a b &&
+       git -C server commit -m x &&
+       echo another-a >server/a &&
+       git -C server add a &&
+       git -C server commit -m x &&
+
+       test_config -C server uploadpack.allowfilter 1 &&
+       test_config -C server uploadpack.allowanysha1inwant 1 &&
+       git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
+
+       echo a | git hash-object --stdin >hash-old-a &&
+       echo another-a | git hash-object --stdin >hash-new-a &&
+       echo b | git hash-object --stdin >hash-b &&
+
+       # Ensure that only a and another-a are fetched.
+       GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
+       grep "want $(cat hash-old-a)" trace &&
+       grep "want $(cat hash-new-a)" trace &&
+       ! grep "want $(cat hash-b)" trace
+'
+
+test_done
-- 
2.21.0.155.ge902e9bcae.dirty

Reply via email to