Refreshing the index is usually very fast, but it can still take a
long time sometimes. Cold cache is one, or something else silly (*).
In this case, it's good to show something to let the user know "git
status" is not hanging, it's just busy doing something.

(*) I got called by my colleague because her "git status" took very
    long and looked pretty much like hanging. After a bit of strace,
    it looks to me that git was trying to rehash every single file,
    and this was a big repository. This process could take minutes.

    In this case, I think it was probably because she copied this
    repository to a new place and stat data did not match anymore. So
    git fell back to hashing.

Signed-off-by: Nguyễn Thái Ngọc Duy <[email protected]>
---
 I need to get this out of my head before I forget. I obviously think
 this is a good idea and could be done in more places, even just to
 deal with cold cache. The hint about "git status -uno" for example,
 could be accompanied by a progress bar for scanning for untracked
 files...

 Another note about rehashing files as part of refresh. We probably
 could do better by hashing in parallel. Or perhaps not because having
 a big lock around object database pretty much kills performance, and
 if I remember correctly none of my core was 100% consumed (i.e. CPU
 bottleneck as an indication that multithread is a good idea...)

 Anyway that's it! Weekend after a long week! I'll read mails and
 respond tomorrow.

 builtin/am.c     |  2 +-
 builtin/commit.c |  6 ++++--
 cache.h          |  7 +++++--
 preload-index.c  | 44 +++++++++++++++++++++++++++++++++++++++-----
 read-cache.c     | 10 ++++++++++
 sequencer.c      |  2 +-
 6 files changed, 60 insertions(+), 11 deletions(-)

diff --git a/builtin/am.c b/builtin/am.c
index 5e866d17c7..22a93cfef3 100644
--- a/builtin/am.c
+++ b/builtin/am.c
@@ -2324,7 +2324,7 @@ int cmd_am(int argc, const char **argv, const char 
*prefix)
        /* Ensure a valid committer ident can be constructed */
        git_committer_info(IDENT_STRICT);
 
-       if (read_index_preload(&the_index, NULL) < 0)
+       if (read_index_preload(&the_index, NULL, 0) < 0)
                die(_("failed to read the index"));
 
        if (in_progress) {
diff --git a/builtin/commit.c b/builtin/commit.c
index 0d9828e29e..eaf639ece6 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -1355,8 +1355,10 @@ int cmd_status(int argc, const char **argv, const char 
*prefix)
                       PATHSPEC_PREFER_FULL,
                       prefix, argv);
 
-       read_cache_preload(&s.pathspec);
-       refresh_index(&the_index, REFRESH_QUIET|REFRESH_UNMERGED, &s.pathspec, 
NULL, NULL);
+       read_index_preload(&the_index, &s.pathspec, REFRESH_PROGRESS);
+       refresh_index(&the_index,
+                     REFRESH_QUIET|REFRESH_UNMERGED|REFRESH_PROGRESS,
+                     &s.pathspec, NULL, NULL);
 
        if (use_optional_locks())
                fd = hold_locked_index(&index_lock, 0);
diff --git a/cache.h b/cache.h
index 4d014541ab..35da02be90 100644
--- a/cache.h
+++ b/cache.h
@@ -410,7 +410,7 @@ void validate_cache_entries(const struct index_state 
*istate);
 
 #define read_cache() read_index(&the_index)
 #define read_cache_from(path) read_index_from(&the_index, (path), 
(get_git_dir()))
-#define read_cache_preload(pathspec) read_index_preload(&the_index, (pathspec))
+#define read_cache_preload(pathspec) read_index_preload(&the_index, 
(pathspec), 0)
 #define is_cache_unborn() is_index_unborn(&the_index)
 #define read_cache_unmerged() read_index_unmerged(&the_index)
 #define discard_cache() discard_index(&the_index)
@@ -659,7 +659,9 @@ extern int daemonize(void);
 /* Initialize and use the cache information */
 struct lock_file;
 extern int read_index(struct index_state *);
-extern int read_index_preload(struct index_state *, const struct pathspec 
*pathspec);
+extern int read_index_preload(struct index_state *,
+                             const struct pathspec *pathspec,
+                             unsigned int refresh_flags);
 extern int do_read_index(struct index_state *istate, const char *path,
                         int must_exist); /* for testting only! */
 extern int read_index_from(struct index_state *, const char *path,
@@ -814,6 +816,7 @@ extern void fill_stat_cache_info(struct cache_entry *ce, 
struct stat *st);
 #define REFRESH_IGNORE_MISSING 0x0008  /* ignore non-existent */
 #define REFRESH_IGNORE_SUBMODULES      0x0010  /* ignore submodules */
 #define REFRESH_IN_PORCELAIN   0x0020  /* user friendly output, not "needs 
update" */
+#define REFRESH_PROGRESS       0x0040  /* show progress bar if stderr is tty */
 extern int refresh_index(struct index_state *, unsigned int flags, const 
struct pathspec *pathspec, char *seen, const char *header_msg);
 extern struct cache_entry *refresh_cache_entry(struct index_state *, struct 
cache_entry *, unsigned int);
 
diff --git a/preload-index.c b/preload-index.c
index 71cd2437a3..bf7dbed779 100644
--- a/preload-index.c
+++ b/preload-index.c
@@ -5,10 +5,12 @@
 #include "pathspec.h"
 #include "dir.h"
 #include "fsmonitor.h"
+#include "progress.h"
 
 #ifdef NO_PTHREADS
 static void preload_index(struct index_state *index,
-                         const struct pathspec *pathspec)
+                         const struct pathspec *pathspec,
+                         unsigned int refresh_flags)
 {
        ; /* nothing */
 }
@@ -25,16 +27,23 @@ static void preload_index(struct index_state *index,
 #define MAX_PARALLEL (20)
 #define THREAD_COST (500)
 
+struct progress_data {
+       unsigned long n;
+       struct progress *progress;
+       pthread_mutex_t mutex;
+};
+
 struct thread_data {
        pthread_t pthread;
        struct index_state *index;
        struct pathspec pathspec;
+       struct progress_data *progress;
        int offset, nr;
 };
 
 static void *preload_thread(void *_data)
 {
-       int nr;
+       int nr, last_nr;
        struct thread_data *p = _data;
        struct index_state *index = p->index;
        struct cache_entry **cep = index->cache + p->offset;
@@ -43,6 +52,7 @@ static void *preload_thread(void *_data)
        nr = p->nr;
        if (nr + p->offset > index->cache_nr)
                nr = index->cache_nr - p->offset;
+       last_nr = nr;
 
        do {
                struct cache_entry *ce = *cep++;
@@ -58,6 +68,15 @@ static void *preload_thread(void *_data)
                        continue;
                if (ce->ce_flags & CE_FSMONITOR_VALID)
                        continue;
+               if (p->progress && !(nr & 31)) {
+                       struct progress_data *pd = p->progress;
+
+                       pthread_mutex_lock(&pd->mutex);
+                       pd->n += last_nr - nr;
+                       display_progress(pd->progress, pd->n);
+                       pthread_mutex_unlock(&pd->mutex);
+                       last_nr = nr;
+               }
                if (!ce_path_match(index, ce, &p->pathspec, NULL))
                        continue;
                if (threaded_has_symlink_leading_path(&cache, ce->name, 
ce_namelen(ce)))
@@ -74,11 +93,13 @@ static void *preload_thread(void *_data)
 }
 
 static void preload_index(struct index_state *index,
-                         const struct pathspec *pathspec)
+                         const struct pathspec *pathspec,
+                         unsigned int refresh_flags)
 {
        int threads, i, work, offset;
        struct thread_data data[MAX_PARALLEL];
        uint64_t start = getnanotime();
+       struct progress_data pd;
 
        if (!core_preload_index)
                return;
@@ -93,6 +114,13 @@ static void preload_index(struct index_state *index,
        offset = 0;
        work = DIV_ROUND_UP(index->cache_nr, threads);
        memset(&data, 0, sizeof(data));
+
+       memset(&pd, 0, sizeof(pd));
+       if (refresh_flags & REFRESH_PROGRESS && isatty(2)) {
+               pd.progress = start_delayed_progress(_("Refreshing index"), 
index->cache_nr);
+               pthread_mutex_init(&pd.mutex, NULL);
+       }
+
        for (i = 0; i < threads; i++) {
                struct thread_data *p = data+i;
                p->index = index;
@@ -100,6 +128,8 @@ static void preload_index(struct index_state *index,
                        copy_pathspec(&p->pathspec, pathspec);
                p->offset = offset;
                p->nr = work;
+               if (pd.progress)
+                       p->progress = &pd;
                offset += work;
                if (pthread_create(&p->pthread, NULL, preload_thread, p))
                        die("unable to create threaded lstat");
@@ -109,15 +139,19 @@ static void preload_index(struct index_state *index,
                if (pthread_join(p->pthread, NULL))
                        die("unable to join threaded lstat");
        }
+       if (pd.progress)
+               stop_progress(&pd.progress);
+
        trace_performance_since(start, "preload index");
 }
 #endif
 
 int read_index_preload(struct index_state *index,
-                      const struct pathspec *pathspec)
+                      const struct pathspec *pathspec,
+                      unsigned int refresh_flags)
 {
        int retval = read_index(index);
 
-       preload_index(index, pathspec);
+       preload_index(index, pathspec, refresh_flags);
        return retval;
 }
diff --git a/read-cache.c b/read-cache.c
index 7b1354d759..e8725a5162 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -23,6 +23,7 @@
 #include "split-index.h"
 #include "utf8.h"
 #include "fsmonitor.h"
+#include "progress.h"
 
 /* Mask for the name length in ce_flags in the on-disk index */
 
@@ -1477,6 +1478,11 @@ int refresh_index(struct index_state *istate, unsigned 
int flags,
        const char *added_fmt;
        const char *unmerged_fmt;
        uint64_t start = getnanotime();
+       struct progress *progress = NULL;
+
+       if (flags & REFRESH_PROGRESS && isatty(2))
+               progress = start_delayed_progress(_("Refresh index"),
+                                                 istate->cache_nr);
 
        modified_fmt = (in_porcelain ? "M\t%s\n" : "%s: needs update\n");
        deleted_fmt = (in_porcelain ? "D\t%s\n" : "%s: needs update\n");
@@ -1516,6 +1522,8 @@ int refresh_index(struct index_state *istate, unsigned 
int flags,
                new_entry = refresh_cache_ent(istate, ce, options, 
&cache_errno, &changed);
                if (new_entry == ce)
                        continue;
+               if (progress)
+                       display_progress(progress, i);
                if (!new_entry) {
                        const char *fmt;
 
@@ -1547,6 +1555,8 @@ int refresh_index(struct index_state *istate, unsigned 
int flags,
 
                replace_index_entry(istate, i, new_entry);
        }
+       if (progress)
+               stop_progress(&progress);
        trace_performance_since(start, "refresh index");
        return has_errors;
 }
diff --git a/sequencer.c b/sequencer.c
index dc2c58d464..e0cd17df70 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -1909,7 +1909,7 @@ static int read_and_refresh_cache(struct replay_opts 
*opts)
 {
        struct lock_file index_lock = LOCK_INIT;
        int index_fd = hold_locked_index(&index_lock, 0);
-       if (read_index_preload(&the_index, NULL) < 0) {
+       if (read_index_preload(&the_index, NULL, 0) < 0) {
                rollback_lock_file(&index_lock);
                return error(_("git %s: failed to read the index"),
                        _(action_name(opts)));
-- 
2.19.0.rc0.337.ge906d732e7

Reply via email to