Thanks, everyone for the review.
Changes from v1:
- used test_when_finished (Szeder)
- used flag to inhibit fetching of missing objects (Dscho)
- moved the prefetch so that it also works if we request rename
detection, and included a test demonstrating that (not sure if that
was what Peff meant)
- used QUICK flag (I agree that the rescan is probably not that
valuable here)
Peff also suggested that I use an oidset instead of an oidarray in order
to eliminate duplicates, but that makes it difficult to interface with
fetch_objects(), which takes a pointer and a length (which is
convenient, because if we want to fetch a single object, we can just
point to it and use a length of 1). Maybe the best idea is to have
oidset maintain its own OID array, and have each hash entry store an
index instead of an OID. For now I added a NEEDSWORK.
Jonathan Tan (2):
sha1-file: support OBJECT_INFO_FOR_PREFETCH
diff: batch fetching of missing blobs
diff.c | 32 +++++++++++
object-store.h | 6 ++
sha1-file.c | 3 +-
t/t4067-diff-partial-clone.sh | 103 ++++++++++++++++++++++++++++++++++
unpack-trees.c | 17 +++---
5 files changed, 152 insertions(+), 9 deletions(-)
create mode 100755 t/t4067-diff-partial-clone.sh
Range-diff against v1:
-: ---------- > 1: 068861632b sha1-file: support OBJECT_INFO_FOR_PREFETCH
1: d1e604239b ! 2: 44de02e584 diff: batch fetching of missing blobs
@@ -9,12 +9,6 @@
blobs", 2017-12-08), but for another command.
Signed-off-by: Jonathan Tan <[email protected]>
- ---
- Here's an improvement for those having partial clones.
-
- I couldn't find a good place to place the test (a place that checks how
- diff interfaces with the object store would be ideal), so I created a
- new one. Let me know if there's a better place to put it.
diff --git a/diff.c b/diff.c
--- a/diff.c
@@ -28,38 +22,45 @@
#ifdef NO_FAST_WORKING_DIRECTORY
#define FAST_WORKING_DIRECTORY 0
@@
- if (o->color_moved)
- o->emitted_symbols = &esm;
+ QSORT(q->queue, q->nr, diffnamecmp);
+ }
++static void add_if_missing(struct oid_array *to_fetch,
++ const struct diff_filespec *filespec)
++{
++ if (filespec && filespec->oid_valid &&
++ oid_object_info_extended(the_repository, &filespec->oid, NULL,
++ OBJECT_INFO_FOR_PREFETCH))
++ oid_array_append(to_fetch, &filespec->oid);
++}
++
+ void diffcore_std(struct diff_options *options)
+ {
+ if (repository_format_partial_clone) {
+ /*
+ * Prefetch the diff pairs that are about to be flushed.
+ */
++ int i;
++ struct diff_queue_struct *q = &diff_queued_diff;
+ struct oid_array to_fetch = OID_ARRAY_INIT;
-+ int fetch_if_missing_store = fetch_if_missing;
+
-+ fetch_if_missing = 0;
+ for (i = 0; i < q->nr; i++) {
+ struct diff_filepair *p = q->queue[i];
-+ if (!check_pair_status(p))
-+ continue;
-+ if (p->one && p->one->oid_valid &&
-+ !has_object_file(&p->one->oid))
-+ oid_array_append(&to_fetch, &p->one->oid);
-+ if (p->two && p->two->oid_valid &&
-+ !has_object_file(&p->two->oid))
-+ oid_array_append(&to_fetch, &p->two->oid);
++ add_if_missing(&to_fetch, p->one);
++ add_if_missing(&to_fetch, p->two);
+ }
+ if (to_fetch.nr)
++ /*
++ * NEEDSWORK: Consider deduplicating the OIDs sent.
++ */
+ fetch_objects(repository_format_partial_clone,
+ to_fetch.oid, to_fetch.nr);
-+ fetch_if_missing = fetch_if_missing_store;
+ oid_array_clear(&to_fetch);
+ }
+
- for (i = 0; i < q->nr; i++) {
- struct diff_filepair *p = q->queue[i];
- if (check_pair_status(p))
+ /* NOTE please keep the following in sync with diff_tree_combined() */
+ if (options->skip_stat_unmatch)
+ diffcore_skip_stat_unmatch(options);
diff --git a/t/t4067-diff-partial-clone.sh b/t/t4067-diff-partial-clone.sh
new file mode 100755
@@ -73,6 +74,8 @@
+. ./test-lib.sh
+
+test_expect_success 'git show batches blobs' '
++ test_when_finished "rm -rf server client trace" &&
++
+ test_create_repo server &&
+ echo a >server/a &&
+ echo b >server/b &&
@@ -91,7 +94,7 @@
+'
+
+test_expect_success 'diff batches blobs' '
-+ rm -rf server client trace &&
++ test_when_finished "rm -rf server client trace" &&
+
+ test_create_repo server &&
+ echo a >server/a &&
@@ -115,7 +118,7 @@
+'
+
+test_expect_success 'diff skips same-OID blobs' '
-+ rm -rf server client trace &&
++ test_when_finished "rm -rf server client trace" &&
+
+ test_create_repo server &&
+ echo a >server/a &&
@@ -141,4 +144,29 @@
+ ! grep "want $(cat hash-b)" trace
+'
+
++test_expect_success 'diff with rename detection batches blobs' '
++ test_when_finished "rm -rf server client trace" &&
++
++ test_create_repo server &&
++ echo a >server/a &&
++ printf "b\nb\nb\nb\nb\n" >server/b &&
++ git -C server add a b &&
++ git -C server commit -m x &&
++ rm server/b &&
++ printf "b\nb\nb\nb\nbX\n" >server/c &&
++ git -C server add c &&
++ git -C server commit -a -m x &&
++
++ test_config -C server uploadpack.allowfilter 1 &&
++ test_config -C server uploadpack.allowanysha1inwant 1 &&
++ git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
++
++ # Ensure that there is exactly 1 negotiation by checking that there is
++ # only 1 "done" line sent. ("done" marks the end of negotiation.)
++ GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff -M HEAD^ HEAD >out &&
++ grep "similarity index" out &&
++ grep "git> done" trace >done_lines &&
++ test_line_count = 1 done_lines
++'
++
+test_done
--
2.21.0.197.gd478713db0