A non-basename pattern that does not contain /**/ can't match anything
outside the attached directory. Record its directory level and avoid
matching unless the pathname is also at the same directory level.
This optimization shines when there are a lot of non-basename patterns
are the root .gitignore and big/deep worktree. Due to the cascading
rule of .gitignore, patterns in the root .gitignore are checked for
_all_ entries in the worktree.
before after
user 0m0.424s 0m0.365s
user 0m0.427s 0m0.366s
user 0m0.432s 0m0.374s
user 0m0.435s 0m0.374s
user 0m0.435s 0m0.377s
user 0m0.437s 0m0.381s
user 0m0.439s 0m0.381s
user 0m0.440s 0m0.383s
user 0m0.450s 0m0.384s
user 0m0.454s 0m0.384s
Signed-off-by: Nguyễn Thái Ngọc Duy <[email protected]>
---
attr.c | 3 ++-
dir.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++------------------
dir.h | 9 ++++++++-
3 files changed, 60 insertions(+), 20 deletions(-)
diff --git a/attr.c b/attr.c
index 1818ba5..7764ddd 100644
--- a/attr.c
+++ b/attr.c
@@ -254,7 +254,8 @@ static struct match_attr *parse_attr_line(const char *line,
const char *src,
parse_exclude_pattern(&res->u.pat.pattern,
&res->u.pat.patternlen,
&res->u.pat.flags,
- &res->u.pat.nowildcardlen);
+ &res->u.pat.nowildcardlen,
+ NULL);
if (res->u.pat.flags & EXC_FLAG_MUSTBEDIR)
res->u.pat.patternlen++;
if (res->u.pat.flags & EXC_FLAG_NEGATIVE) {
diff --git a/dir.c b/dir.c
index 880b5e6..de7a6ba 100644
--- a/dir.c
+++ b/dir.c
@@ -360,10 +360,12 @@ static int no_wildcard(const char *string)
void parse_exclude_pattern(const char **pattern,
int *patternlen,
int *flags,
- int *nowildcardlen)
+ int *nowildcardlen,
+ int *dirs_p)
{
const char *p = *pattern;
size_t i, len;
+ int dirs;
*flags = 0;
if (*p == '!') {
@@ -375,12 +377,15 @@ void parse_exclude_pattern(const char **pattern,
len--;
*flags |= EXC_FLAG_MUSTBEDIR;
}
- for (i = 0; i < len; i++) {
+ for (i = 0, dirs = 0; i < len; i++) {
if (p[i] == '/')
- break;
+ dirs++;
}
- if (i == len)
+ if (!dirs)
*flags |= EXC_FLAG_NODIR;
+ else if (*p == '/')
+ dirs--;
+
*nowildcardlen = simple_length(p);
/*
* we should have excluded the trailing slash from 'p' too,
@@ -393,6 +398,8 @@ void parse_exclude_pattern(const char **pattern,
*flags |= EXC_FLAG_ENDSWITH;
*pattern = p;
*patternlen = len;
+ if (dirs_p)
+ *dirs_p = dirs;
}
void add_exclude(const char *string, const char *base,
@@ -402,8 +409,9 @@ void add_exclude(const char *string, const char *base,
int patternlen;
int flags;
int nowildcardlen;
+ int dirs;
- parse_exclude_pattern(&string, &patternlen, &flags, &nowildcardlen);
+ parse_exclude_pattern(&string, &patternlen, &flags, &nowildcardlen,
&dirs);
if (flags & EXC_FLAG_MUSTBEDIR) {
char *s;
x = xmalloc(sizeof(*x) + patternlen + 1);
@@ -415,11 +423,26 @@ void add_exclude(const char *string, const char *base,
x = xmalloc(sizeof(*x));
x->pattern = string;
}
+ /*
+ * TODO: nowildcardlen < patternlen is a stricter than
+ * necessary mainly to exclude "**" that breaks directory
+ * boundary. Patterns like "/foo-*" should be fine.
+ */
+ if ((flags & EXC_FLAG_NODIR) || nowildcardlen < patternlen)
+ dirs = -1;
+ else {
+ int i;
+ for (i = 0; i < baselen; i++) {
+ if (base[i] == '/')
+ dirs++;
+ }
+ }
x->patternlen = patternlen;
x->nowildcardlen = nowildcardlen;
x->base = base;
x->baselen = baselen;
x->flags = flags;
+ x->dirs = dirs;
x->srcpos = srcpos;
ALLOC_GROW(el->excludes, el->nr + 1, el->alloc);
el->excludes[el->nr++] = x;
@@ -701,7 +724,7 @@ int match_pathname(const char *pathname, int pathlen,
* matched, or NULL for undecided.
*/
static struct exclude *last_exclude_matching_from_list(const char *pathname,
- int pathlen,
+ int pathlen, int dirs,
const char *basename,
int *dtype,
struct exclude_list *el)
@@ -732,6 +755,9 @@ static struct exclude
*last_exclude_matching_from_list(const char *pathname,
continue;
}
+ if (dirs >= 0 && x->dirs >= 0 && x->dirs != dirs)
+ continue;
+
assert(x->baselen == 0 || x->base[x->baselen - 1] == '/');
if (match_pathname(pathname, pathlen,
x->base, x->baselen ? x->baselen - 1 : 0,
@@ -750,7 +776,8 @@ int is_excluded_from_list(const char *pathname,
struct exclude_list *el)
{
struct exclude *exclude;
- exclude = last_exclude_matching_from_list(pathname, pathlen, basename,
dtype, el);
+ exclude = last_exclude_matching_from_list(pathname, pathlen, -1,
+ basename, dtype, el);
if (exclude)
return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
return -1; /* undecided */
@@ -765,6 +792,7 @@ int is_excluded_from_list(const char *pathname,
static struct exclude *last_exclude_matching(struct dir_struct *dir,
const char *pathname,
int pathlen,
+ int dirs,
int *dtype_p)
{
int i, j;
@@ -779,8 +807,8 @@ static struct exclude *last_exclude_matching(struct
dir_struct *dir,
group = &dir->exclude_list_group[i];
for (j = group->nr - 1; j >= 0; j--) {
exclude = last_exclude_matching_from_list(
- pathname, pathlen, basename, dtype_p,
- &group->el[j]);
+ pathname, pathlen, dir->dir_level,
+ basename, dtype_p, &group->el[j]);
if (exclude)
return exclude;
}
@@ -794,11 +822,11 @@ static struct exclude *last_exclude_matching(struct
dir_struct *dir,
* Returns 1 if true, otherwise 0.
*/
static int is_excluded(struct dir_struct *dir,
- const char *pathname, int pathlen,
+ const char *pathname, int pathlen, int dirs,
int *dtype_p)
{
struct exclude *exclude =
- last_exclude_matching(dir, pathname, pathlen, dtype_p);
+ last_exclude_matching(dir, pathname, pathlen, dirs, dtype_p);
if (exclude)
return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
return 0;
@@ -862,7 +890,7 @@ struct exclude *last_exclude_matching_path(struct
path_exclude_check *check,
int dt = DT_DIR;
exclude = last_exclude_matching(check->dir,
path->buf, path->len,
- &dt);
+ -1, &dt);
if (exclude) {
check->exclude = exclude;
return exclude;
@@ -874,7 +902,7 @@ struct exclude *last_exclude_matching_path(struct
path_exclude_check *check,
/* An entry in the index; cannot be a directory with subentries */
strbuf_setlen(path, 0);
- return last_exclude_matching(check->dir, name, namelen, dtype);
+ return last_exclude_matching(check->dir, name, namelen, -1, dtype);
}
/*
@@ -1248,11 +1276,11 @@ enum path_treatment {
};
static enum path_treatment treat_one_path(struct dir_struct *dir,
- struct strbuf *path,
+ struct strbuf *path, int dirs,
const struct path_simplify *simplify,
int dtype, struct dirent *de)
{
- int exclude = is_excluded(dir, path->buf, path->len, &dtype);
+ int exclude = is_excluded(dir, path->buf, path->len, dirs, &dtype);
if (exclude && (dir->flags & DIR_COLLECT_IGNORED)
&& exclude_matches_pathspec(path->buf, path->len, simplify))
dir_add_ignored(dir, path->buf, path->len);
@@ -1310,7 +1338,7 @@ static enum path_treatment treat_path(struct dir_struct
*dir,
return path_ignored;
dtype = DTYPE(de);
- return treat_one_path(dir, path, simplify, dtype, de);
+ return treat_one_path(dir, path, -1, simplify, dtype, de);
}
/*
@@ -1338,6 +1366,7 @@ static int read_directory_recursive(struct dir_struct
*dir,
if (!fdir)
goto out;
+ dir->dir_level++;
while ((de = readdir(fdir)) != NULL) {
switch (treat_path(dir, de, &path, baselen, simplify)) {
case path_recurse:
@@ -1357,6 +1386,7 @@ static int read_directory_recursive(struct dir_struct
*dir,
}
closedir(fdir);
out:
+ dir->dir_level--;
strbuf_release(&path);
return contents;
@@ -1427,7 +1457,7 @@ static int treat_leading_path(struct dir_struct *dir,
break;
if (simplify_away(sb.buf, sb.len, simplify))
break;
- if (treat_one_path(dir, &sb, simplify,
+ if (treat_one_path(dir, &sb, -1, simplify,
DT_DIR, NULL) == path_ignored)
break; /* do not recurse into it */
if (len <= baselen) {
@@ -1447,8 +1477,10 @@ int read_directory(struct dir_struct *dir, const char
*path, int len, const char
return dir->nr;
simplify = create_simplify(pathspec);
- if (!len || treat_leading_path(dir, path, len, simplify))
+ if (!len || treat_leading_path(dir, path, len, simplify)) {
+ dir->dir_level = -1;
read_directory_recursive(dir, path, len, 0, simplify);
+ }
free_simplify(simplify);
qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *),
cmp_name);
diff --git a/dir.h b/dir.h
index 560ade4..c434f1c 100644
--- a/dir.h
+++ b/dir.h
@@ -45,6 +45,7 @@ struct exclude_list {
const char *base;
int baselen;
int flags;
+ int dirs;
/*
* Counting starts from 1 for line numbers in ignore files,
@@ -87,6 +88,8 @@ struct dir_struct {
/* Exclude info */
const char *exclude_per_dir;
+ int dir_level;
+
/*
* We maintain three groups of exclude pattern lists:
*
@@ -171,7 +174,11 @@ extern struct exclude_list *add_exclude_list(struct
dir_struct *dir,
extern int add_excludes_from_file_to_list(const char *fname, const char *base,
int baselen,
struct exclude_list *el, int
check_index);
extern void add_excludes_from_file(struct dir_struct *, const char *fname);
-extern void parse_exclude_pattern(const char **string, int *patternlen, int
*flags, int *nowildcardlen);
+extern void parse_exclude_pattern(const char **string,
+ int *patternlen,
+ int *flags,
+ int *nowildcardlen,
+ int *dirs);
extern void add_exclude(const char *string, const char *base,
int baselen, struct exclude_list *el, int srcpos);
extern void clear_exclude_list(struct exclude_list *el);
--
1.8.1.2.536.gf441e6d
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html