From: Derrick Stolee <dsto...@microsoft.com>

Signed-off-by: Derrick Stolee <dsto...@microsoft.com>
---
 commit-graph.c          | 248 +++++++++++++++++++++++++++++++++++++++-
 commit-graph.h          |   1 +
 t/t5318-commit-graph.sh |   2 +-
 3 files changed, 244 insertions(+), 7 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 5f6193277a..44448aabe4 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -40,6 +40,8 @@
 #define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
                        + GRAPH_FANOUT_SIZE + the_hash_algo->rawsz)
 
+#define MAX_SPLIT_COMMITS 64000
+
 char *get_commit_graph_filename(const char *obj_dir)
 {
        return xstrfmt("%s/info/commit-graph", obj_dir);
@@ -619,9 +621,14 @@ struct write_commit_graph_context {
        unsigned long approx_nr_objects;
        struct progress *progress;
        int progress_done;
+       int num_commit_graphs_before;
+       int num_commit_graphs_after;
+       uint32_t new_num_commits_in_base;
+       struct commit_graph *new_base_graph;
        uint64_t progress_cnt;
        unsigned append:1,
-                report_progress:1;
+                report_progress:1,
+                split:1;
 };
 
 static void write_graph_chunk_fanout(struct hashfile *f,
@@ -691,6 +698,16 @@ static void write_graph_chunk_data(struct hashfile *f, int 
hash_len,
                                              ctx->commits.nr,
                                              commit_to_sha1);
 
+                       if (edge_value >= 0)
+                               edge_value += ctx->new_num_commits_in_base;
+                       else {
+                               uint32_t pos;
+                               if (find_commit_in_graph(parent->item,
+                                                        ctx->new_base_graph,
+                                                        &pos))
+                                       edge_value = pos;
+                       }
+
                        if (edge_value < 0)
                                BUG("missing parent %s for commit %s",
                                    oid_to_hex(&parent->item->object.oid),
@@ -711,6 +728,17 @@ static void write_graph_chunk_data(struct hashfile *f, int 
hash_len,
                                              ctx->commits.list,
                                              ctx->commits.nr,
                                              commit_to_sha1);
+
+                       if (edge_value >= 0)
+                               edge_value += ctx->new_num_commits_in_base;
+                       else {
+                               uint32_t pos;
+                               if (find_commit_in_graph(parent->item,
+                                                        ctx->new_base_graph,
+                                                        &pos))
+                                       edge_value = pos;
+                       }
+
                        if (edge_value < 0)
                                BUG("missing parent %s for commit %s",
                                    oid_to_hex(&parent->item->object.oid),
@@ -768,6 +796,16 @@ static void write_graph_chunk_extra_edges(struct hashfile 
*f,
                                                  ctx->commits.nr,
                                                  commit_to_sha1);
 
+                       if (edge_value >= 0)
+                               edge_value += ctx->new_num_commits_in_base;
+                       else {
+                               uint32_t pos;
+                               if (find_commit_in_graph(parent->item,
+                                                        ctx->new_base_graph,
+                                                        &pos))
+                                       edge_value = pos;
+                       }
+
                        if (edge_value < 0)
                                BUG("missing parent %s for commit %s",
                                    oid_to_hex(&parent->item->object.oid),
@@ -782,7 +820,7 @@ static void write_graph_chunk_extra_edges(struct hashfile 
*f,
        }
 }
 
-static int commit_compare(const void *_a, const void *_b)
+static int oid_compare(const void *_a, const void *_b)
 {
        const struct object_id *a = (const struct object_id *)_a;
        const struct object_id *b = (const struct object_id *)_b;
@@ -859,7 +897,13 @@ static void close_reachable(struct 
write_commit_graph_context *ctx)
                display_progress(ctx->progress, i + 1);
                commit = lookup_commit(ctx->r, &ctx->oids.list[i]);
 
-               if (commit && !parse_commit_no_graph(commit))
+               if (!commit)
+                       continue;
+               if (ctx->split) {
+                       if (!parse_commit(commit) &&
+                           commit->graph_pos == COMMIT_NOT_FROM_GRAPH)
+                               add_missing_parents(ctx, commit);
+               } else if (!parse_commit_no_graph(commit))
                        add_missing_parents(ctx, commit);
        }
        stop_progress(&ctx->progress);
@@ -1051,12 +1095,20 @@ static uint32_t count_distinct_commits(struct 
write_commit_graph_context *ctx)
                        _("Counting distinct commits in commit graph"),
                        ctx->oids.nr);
        display_progress(ctx->progress, 0); /* TODO: Measure QSORT() progress */
-       QSORT(ctx->oids.list, ctx->oids.nr, commit_compare);
+       QSORT(ctx->oids.list, ctx->oids.nr, oid_compare);
 
        for (i = 1; i < ctx->oids.nr; i++) {
                display_progress(ctx->progress, i + 1);
-               if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i]))
+               if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) {
+                       if (ctx->split) {
+                               struct commit *c = lookup_commit(ctx->r, 
&ctx->oids.list[i]);
+
+                               if (!c || c->graph_pos != COMMIT_NOT_FROM_GRAPH)
+                                       continue;
+                       }
+
                        count_distinct++;
+               }
        }
        stop_progress(&ctx->progress);
 
@@ -1079,7 +1131,13 @@ static void copy_oids_to_commits(struct 
write_commit_graph_context *ctx)
                if (i > 0 && oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i]))
                        continue;
 
+               ALLOC_GROW(ctx->commits.list, ctx->commits.nr + 1, 
ctx->commits.alloc);
                ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, 
&ctx->oids.list[i]);
+
+               if (ctx->split &&
+                   ctx->commits.list[ctx->commits.nr]->graph_pos != 
COMMIT_NOT_FROM_GRAPH)
+                       continue;
+
                parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]);
 
                for (parent = ctx->commits.list[ctx->commits.nr]->parents;
@@ -1105,7 +1163,13 @@ static int write_commit_graph_file(struct 
write_commit_graph_context *ctx)
        struct strbuf progress_title = STRBUF_INIT;
        int num_chunks = ctx->num_extra_edges ? 4 : 3;
 
-       ctx->graph_name = get_commit_graph_filename(ctx->obj_dir);
+       if (ctx->num_commit_graphs_after > 1)
+               ctx->graph_name = get_split_graph_filename(
+                                       ctx->obj_dir,
+                                       ctx->num_commit_graphs_after - 1);
+       else
+               ctx->graph_name = get_commit_graph_filename(ctx->obj_dir);
+
        if (safe_create_leading_directories(ctx->graph_name)) {
                UNLEAK(ctx->graph_name);
                error(_("unable to create leading directories of %s"),
@@ -1167,11 +1231,166 @@ static int write_commit_graph_file(struct 
write_commit_graph_context *ctx)
 
        close_commit_graph(ctx->r);
        finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC);
+
+       while (ctx->num_commit_graphs_before > ctx->num_commit_graphs_after) {
+               char *graph_name = get_split_graph_filename(
+                                       ctx->obj_dir,
+                                       --ctx->num_commit_graphs_before);
+               unlink(graph_name);
+               free(graph_name);
+       }
+
        commit_lock_file(&lk);
 
        return 0;
 }
 
+static size_t expected_commit_graph_size(size_t num_commits)
+{
+       return GRAPH_HEADER_SIZE + GRAPH_FANOUT_SIZE + 6 * 
GRAPH_CHUNKLOOKUP_WIDTH +
+               (num_commits + 1) * (GRAPH_DATA_WIDTH + the_hash_algo->rawsz);
+}
+
+static void split_graph_merge_strategy(struct write_commit_graph_context *ctx)
+{
+       struct commit_graph *g = ctx->r->objects->commit_graph;
+       uint32_t num_commits = ctx->commits.nr;
+       size_t expected_size = expected_commit_graph_size(num_commits);
+
+       ctx->num_commit_graphs_before = 0;
+       while (g) {
+               ctx->num_commit_graphs_before++;
+               g = g->base_graph;
+       }
+
+       g = ctx->r->objects->commit_graph;
+       ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1;
+
+       while (g && (g->data_len <= 2 * expected_size || num_commits > 
MAX_SPLIT_COMMITS)) {
+               num_commits += g->num_commits;
+               expected_size = expected_commit_graph_size(num_commits);
+               g = g->base_graph;
+               ctx->num_commit_graphs_after--;
+       }
+}
+
+static void merge_commit_graph(struct write_commit_graph_context *ctx,
+                              struct commit_graph *g)
+{
+       uint32_t i;
+       uint32_t offset = g->num_commits_in_base;
+
+       for (i = 0; i < g->num_commits; i++) {
+               struct object_id oid;
+               struct commit *result;
+
+               display_progress(ctx->progress, i + 1);
+
+               load_oid_from_graph(g, i + offset, &oid);
+
+               /* only add commits if they still exist in the repo */
+               result = lookup_commit_reference_gently(ctx->r, &oid, 1);
+
+               if (result) {
+                       ALLOC_GROW(ctx->commits.list, ctx->commits.nr + 1, 
ctx->commits.alloc);
+                       ctx->commits.list[ctx->commits.nr] = result;
+                       ctx->commits.nr++;
+               }
+       }
+}
+
+static int commit_compare(const void *_a, const void *_b)
+{
+       const struct commit *a = *(const struct commit **)_a;
+       const struct commit *b = *(const struct commit **)_b;
+       return oidcmp(&a->object.oid, &b->object.oid);
+}
+
+static void deduplicate_commits(struct write_commit_graph_context *ctx)
+{
+       uint32_t i, num_parents, last_distinct = 0, duplicates = 0;
+       struct commit_list *parent;
+
+       if (ctx->report_progress)
+               ctx->progress = start_delayed_progress(
+                                       _("De-duplicating merged commits"),
+                                       ctx->commits.nr);
+
+       QSORT(ctx->commits.list, ctx->commits.nr, commit_compare);
+
+       ctx->num_extra_edges = 0;
+       for (i = 1; i < ctx->commits.nr; i++) {
+               display_progress(ctx->progress, i);
+
+               if (oideq(&ctx->commits.list[last_distinct]->object.oid,
+                         &ctx->commits.list[i]->object.oid)) {
+                       duplicates++;
+               } else {
+                       if (duplicates)
+                               ctx->commits.list[last_distinct + 1] = 
ctx->commits.list[i];
+                       last_distinct++;
+
+                       num_parents = 0;
+                       for (parent = ctx->commits.list[i]->parents; parent; 
parent = parent->next)
+                               num_parents++;
+
+                       if (num_parents > 2)
+                               ctx->num_extra_edges += num_parents - 2;
+               }
+       }
+
+       ctx->commits.nr -= duplicates;
+       stop_progress(&ctx->progress);
+}
+
+static void merge_commit_graphs(struct write_commit_graph_context *ctx)
+{
+       struct commit_graph *g = ctx->r->objects->commit_graph;
+       uint32_t current_graph_number = ctx->num_commit_graphs_before;
+       struct strbuf progress_title = STRBUF_INIT;
+
+       while (g && current_graph_number >= ctx->num_commit_graphs_after) {
+               current_graph_number--;
+
+               if (ctx->report_progress) {
+                       if (current_graph_number)
+                               strbuf_addf(&progress_title,
+                                           _("Merging commit-graph-%d"),
+                                           current_graph_number);
+                       else
+                               strbuf_addstr(&progress_title,
+                                             _("Merging commit-graph"));
+                       ctx->progress = 
start_delayed_progress(progress_title.buf, 0);
+               }
+
+               merge_commit_graph(ctx, g);
+               stop_progress(&ctx->progress);
+               strbuf_release(&progress_title);
+
+               g = g->base_graph;
+       }
+
+       if (g) {
+               ctx->new_base_graph = g;
+               ctx->new_num_commits_in_base = g->num_commits + 
g->num_commits_in_base;
+       }
+
+       deduplicate_commits(ctx);
+}
+
+static void collapse_all_commit_graphs(struct write_commit_graph_context *ctx)
+{
+       struct commit_graph *g = ctx->r->objects->commit_graph;
+
+       ctx->num_commit_graphs_after = 1;
+       ctx->num_commit_graphs_before = 0;
+
+       while (g) {
+               ctx->num_commit_graphs_before++;
+               g = g->base_graph;
+       }
+}
+
 int write_commit_graph(const char *obj_dir,
                       struct string_list *pack_indexes,
                       struct string_list *commit_hex,
@@ -1189,10 +1408,17 @@ int write_commit_graph(const char *obj_dir,
        ctx->obj_dir = obj_dir;
        ctx->append = flags & COMMIT_GRAPH_APPEND ? 1 : 0;
        ctx->report_progress = flags & COMMIT_GRAPH_PROGRESS ? 1 : 0;
+       ctx->split = flags & COMMIT_GRAPH_SPLIT ? 1 : 0;
+
+       if (ctx->split)
+               prepare_commit_graph(ctx->r);
 
        ctx->approx_nr_objects = approximate_object_count();
        ctx->oids.alloc = ctx->approx_nr_objects / 32;
 
+       if (ctx->split && ctx->oids.alloc > MAX_SPLIT_COMMITS)
+               ctx->oids.alloc = MAX_SPLIT_COMMITS;
+
        if (ctx->append) {
                prepare_commit_graph_one(ctx->r, ctx->obj_dir);
                if (ctx->r->objects->commit_graph)
@@ -1243,6 +1469,16 @@ int write_commit_graph(const char *obj_dir,
                goto cleanup;
        }
 
+       if (!ctx->commits.nr)
+               goto cleanup;
+
+       if (ctx->split) {
+               split_graph_merge_strategy(ctx);
+
+               merge_commit_graphs(ctx);
+       } else
+               collapse_all_commit_graphs(ctx);
+
        compute_generation_numbers(ctx);
 
        res = write_commit_graph_file(ctx);
diff --git a/commit-graph.h b/commit-graph.h
index 170920720d..7a39ae2278 100644
--- a/commit-graph.h
+++ b/commit-graph.h
@@ -71,6 +71,7 @@ int generation_numbers_enabled(struct repository *r);
 
 #define COMMIT_GRAPH_APPEND     (1 << 0)
 #define COMMIT_GRAPH_PROGRESS   (1 << 1)
+#define COMMIT_GRAPH_SPLIT      (1 << 2)
 
 int write_commit_graph_reachable(const char *obj_dir, unsigned int flags);
 int write_commit_graph(const char *obj_dir,
diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh
index e80c1cac02..d621608500 100755
--- a/t/t5318-commit-graph.sh
+++ b/t/t5318-commit-graph.sh
@@ -20,7 +20,7 @@ test_expect_success 'verify graph with no graph file' '
 test_expect_success 'write graph with no packs' '
        cd "$TRASH_DIRECTORY/full" &&
        git commit-graph write --object-dir . &&
-       test_path_is_file info/commit-graph
+       test_path_is_missing info/commit-graph
 '
 
 test_expect_success 'create commits and repack' '
-- 
gitgitgadget

Reply via email to