Heiko Voigt <hvo...@hvoigt.net> writes:

> +static struct sha1_array *get_sha1s_from_list(struct string_list *submodules,
> +             const char *path)
> +{
> +     struct string_list_item *item;
> +     struct sha1_array *hashes;
> +
> +     item = string_list_insert(submodules, path);
> +     if (item->util)
> +             return (struct sha1_array *) item->util;
> +
> +     hashes = (struct sha1_array *) xmalloc(sizeof(struct sha1_array));
> +     /* NEEDSWORK: should we add an initializer function for
> +      * sha1_array ? */
> +     memset(hashes, 0, sizeof(struct sha1_array));
> +     item->util = hashes;


        /* NEEDSWORK: should we have SHA1_ARRAY_INIT etc.? */
        item->util = xcalloc(1, sizeof(struct sha1_array));

>  static void collect_submodules_from_diff(struct diff_queue_struct *q,
>                                        struct diff_options *options,
>                                        void *data)
>  {
>       int i;
> -     struct string_list *needs_pushing = data;
> +     struct string_list *submodules = data;
>  
>       for (i = 0; i < q->nr; i++) {
>               struct diff_filepair *p = q->queue[i];
> +             struct sha1_array *hashes;
>               if (!S_ISGITLINK(p->two->mode))
>                       continue;
> -             if (submodule_needs_pushing(p->two->path, p->two->oid.hash))
> -                     string_list_insert(needs_pushing, p->two->path);
> +             hashes = get_sha1s_from_list(submodules, p->two->path);
> +             sha1_array_append(hashes, p->two->oid.hash);
>       }
>  }

So the idea at this step is still let each commit in the top-level
history inspected for any submodule change, but the result is
collected in a mapping (submodule -> [ list of submodule commits ]).
As we do not expect too many "oops, the old commit was better, so
let's revert and rebind the old one from the submodule" in the
history of the top-level, appending and then running for-each-unique
is an efficient way, instead of first checking if we already have
it and then inserting new ones to maintain the uniqueness.

Makes sense.

> @@ -582,14 +601,41 @@ static void find_unpushed_submodule_commits(struct 
> commit *commit,
>       diff_tree_combined_merge(commit, 1, &rev);
>  }
>  
> +struct collect_submodule_from_sha1s_data {
> +     char *submodule_path;
> +     struct string_list *needs_pushing;
> +};
> +
> +static void collect_submodules_from_sha1s(const unsigned char sha1[20],
> +             void *data)
> +{
> +     struct collect_submodule_from_sha1s_data *me =
> +             (struct collect_submodule_from_sha1s_data *) data;
> +
> +     if (submodule_needs_pushing(me->submodule_path, sha1))
> +             string_list_insert(me->needs_pushing, me->submodule_path);
> +}

This is called from sha1_array_for_each_unique() that iterates over
the submodule commit object names for one submodule and then ends up
calling submodule_needs_pushing() number of times, which smells less
efficient than it could be.  You can ask

    rev-list <all the submodule commits to be pushed> --not --remotes

just once in the submodule repository.  I imagine that is what you'll
do in the next patch.

An obvious but much less efficient way to optimize this part would
be to see if me->needs_pushing already has me->submodule_path and
skip the check for submodule_needs_pushing(), but if you drop the
call by find_unpushed_submodule to sha1_array_for_each_unique() to
walk new submodule commits one by one, that would become irrelevant.

> +static void free_submodules_sha1s(struct string_list *submodules)
> +{
> +     int i;
> +     for (i = 0; i < submodules->nr; i++) {
> +             struct string_list_item *item = &submodules->items[i];
> +             struct sha1_array *hashes = (struct sha1_array *) item->util;
> +             sha1_array_clear(hashes);
> +     }
> +     string_list_clear(submodules, 1);
> +}
> +
>  int find_unpushed_submodules(unsigned char new_sha1[20],
>               const char *remotes_name, struct string_list *needs_pushing)
>  {
>       struct rev_info rev;
>       struct commit *commit;
>       const char *argv[] = {NULL, NULL, "--not", "NULL", NULL};
> -     int argc = ARRAY_SIZE(argv) - 1;
> +     int argc = ARRAY_SIZE(argv) - 1, i;
>       char *sha1_copy;
> +     struct string_list submodules = STRING_LIST_INIT_DUP;
>  
>       struct strbuf remotes_arg = STRBUF_INIT;
>  
> @@ -603,12 +649,23 @@ int find_unpushed_submodules(unsigned char new_sha1[20],
>               die("revision walk setup failed");
>  
>       while ((commit = get_revision(&rev)) != NULL)
> -             find_unpushed_submodule_commits(commit, needs_pushing);
> +             find_unpushed_submodule_commits(commit, &submodules);
>  
>       reset_revision_walk();
>       free(sha1_copy);
>       strbuf_release(&remotes_arg);
>  
> +     for (i = 0; i < submodules.nr; i++) {
> +             struct string_list_item *item = &submodules.items[i];
> +             struct collect_submodule_from_sha1s_data data;
> +             data.submodule_path = item->string;
> +             data.needs_pushing = needs_pushing;
> +             sha1_array_for_each_unique((struct sha1_array *) item->util,
> +                             collect_submodules_from_sha1s,
> +                             &data);
> +     }
> +     free_submodules_sha1s(&submodules);
> +
>       return needs_pushing->nr;
>  }

Reply via email to