On Fri, Aug 3, 2018 at 10:39 PM Nguyễn Thái Ngọc Duy <[email protected]> wrote:
>
> This is a micro optimization that probably only shines on repos with
> deep directory structure. Instead of allocating and freeing a new
> cache_entry in every iteration, we reuse the last one and only update
> the parts that are new each iteration.
>
> Signed-off-by: Nguyễn Thái Ngọc Duy <[email protected]>
> Signed-off-by: Junio C Hamano <[email protected]>
> ---
> unpack-trees.c | 29 ++++++++++++++++++++---------
> 1 file changed, 20 insertions(+), 9 deletions(-)
>
> diff --git a/unpack-trees.c b/unpack-trees.c
> index ba3d2e947e..c8defc2015 100644
> --- a/unpack-trees.c
> +++ b/unpack-trees.c
> @@ -694,6 +694,8 @@ static int traverse_by_cache_tree(int pos, int
> nr_entries, int nr_names,
> {
> struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
> struct unpack_trees_options *o = info->data;
> + struct cache_entry *tree_ce = NULL;
> + int ce_len = 0;
> int i, d;
>
> if (!o->merge)
> @@ -708,30 +710,39 @@ static int traverse_by_cache_tree(int pos, int
> nr_entries, int nr_names,
> * in the first place.
> */
> for (i = 0; i < nr_entries; i++) {
> - struct cache_entry *tree_ce;
> - int len, rc;
> + int new_ce_len, len, rc;
>
> src[0] = o->src_index->cache[pos + i];
>
> len = ce_namelen(src[0]);
> - tree_ce = xcalloc(1, cache_entry_size(len));
> + new_ce_len = cache_entry_size(len);
> +
> + if (new_ce_len > ce_len) {
> + new_ce_len <<= 1;
> + tree_ce = xrealloc(tree_ce, new_ce_len);
> + memset(tree_ce, 0, new_ce_len);
> + ce_len = new_ce_len;
> +
> + tree_ce->ce_flags = create_ce_flags(0);
> +
> + for (d = 1; d <= nr_names; d++)
> + src[d] = tree_ce;
> + }
>
> tree_ce->ce_mode = src[0]->ce_mode;
> - tree_ce->ce_flags = create_ce_flags(0);
> tree_ce->ce_namelen = len;
> oidcpy(&tree_ce->oid, &src[0]->oid);
> memcpy(tree_ce->name, src[0]->name, len + 1);
>
> - for (d = 1; d <= nr_names; d++)
> - src[d] = tree_ce;
> -
> rc = call_unpack_fn((const struct cache_entry * const *)src,
> o);
> - free(tree_ce);
> - if (rc < 0)
> + if (rc < 0) {
> + free(tree_ce);
> return rc;
> + }
>
> mark_ce_used(src[0], o);
> }
> + free(tree_ce);
> if (o->debug_unpack)
> printf("Unpacked %d entries from %s to %s using cache-tree\n",
> nr_entries,
> --
> 2.18.0.656.gda699b98b3
Seems reasonable, when we really do have to invoke call_unpack_fn.
I'm still curious if there are reasons why we couldn't just skip that
call (at least when o->fn is one of {oneway_merge, twoway_merge,
threeway_merge, bind_merge}), but I already brought that up in my
comments on patch 2.