[PATCH v4 1/3] t7063: use --force-untracked-cache to speed up a bit
When in the middle of t7063, we are sure untracked cache is supported, so we can use --force-untracked-cache to skip the support detection phase and save a few seconds. It's also good that --force-untracked-cache is exercised in the test suite. Signed-off-by: Nguyễn Thái Ngọc Duy --- t/t7063-status-untracked-cache.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t7063-status-untracked-cache.sh b/t/t7063-status-untracked-cache.sh index ff23f4e..744e922 100755 --- a/t/t7063-status-untracked-cache.sh +++ b/t/t7063-status-untracked-cache.sh @@ -402,7 +402,7 @@ test_expect_success 'set up sparse checkout' ' echo "done/[a-z]*" >.git/info/sparse-checkout && test_config core.sparsecheckout true && git checkout master && - git update-index --untracked-cache && + git update-index --force-untracked-cache && git status --porcelain >/dev/null && # prime the cache test_path_is_missing done/.gitignore && test_path_is_file done/one -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 0/3] dt/untracked-subdir
The first patch is a split from David's v3 patch because it should belong to dt/untracked-sparse. The second is basically David's v3. The third patch fixes untracked_cache_invalidate_path(). David fixed it in v2, but there's another bug in this code. David Turner (1): untracked-cache: fix subdirectory handling Nguyễn Thái Ngọc Duy (2): t7063: use --force-untracked-cache to speed up a bit untracked cache: fix entry invalidation dir.c | 82 +++--- t/t7063-status-untracked-cache.sh | 102 -- 2 files changed, 163 insertions(+), 21 deletions(-) -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 2/3] untracked-cache: fix subdirectory handling
From: David Turner Previously, some calls lookup_untracked would pass a full path. But lookup_untracked assumes that the portion of the path up to and including to the untracked_cache_dir has been removed. So lookup_untracked would be looking in the untracked_cache for 'foo' for 'foo/bar' (instead of just looking for 'bar'). This would cause untracked cache corruption. Instead, treat_directory learns to track the base length of the parent directory, so that only the last path component is passed to lookup_untracked. Helped-by: Nguyễn Thái Ngọc Duy Signed-off-by: David Turner Signed-off-by: Nguyễn Thái Ngọc Duy --- dir.c | 14 t/t7063-status-untracked-cache.sh | 72 ++- 2 files changed, 79 insertions(+), 7 deletions(-) diff --git a/dir.c b/dir.c index e7b89fe..cd4ac77 100644 --- a/dir.c +++ b/dir.c @@ -1297,7 +1297,7 @@ static enum exist_status directory_exists_in_index(const char *dirname, int len) */ static enum path_treatment treat_directory(struct dir_struct *dir, struct untracked_cache_dir *untracked, - const char *dirname, int len, int exclude, + const char *dirname, int len, int baselen, int exclude, const struct path_simplify *simplify) { /* The "len-1" is to strip the final '/' */ @@ -1324,7 +1324,8 @@ static enum path_treatment treat_directory(struct dir_struct *dir, if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES)) return exclude ? path_excluded : path_untracked; - untracked = lookup_untracked(dir->untracked, untracked, dirname, len); + untracked = lookup_untracked(dir->untracked, untracked, +dirname + baselen, len - baselen); return read_directory_recursive(dir, dirname, len, untracked, 1, simplify); } @@ -1444,6 +1445,7 @@ static int get_dtype(struct dirent *de, const char *path, int len) static enum path_treatment treat_one_path(struct dir_struct *dir, struct untracked_cache_dir *untracked, struct strbuf *path, + int baselen, const struct path_simplify *simplify, int dtype, struct dirent *de) { @@ -1495,8 +1497,8 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, return path_none; case DT_DIR: strbuf_addch(path, '/'); - return treat_directory(dir, untracked, path->buf, path->len, exclude, - simplify); + return treat_directory(dir, untracked, path->buf, path->len, + baselen, exclude, simplify); case DT_REG: case DT_LNK: return exclude ? path_excluded : path_untracked; @@ -1557,7 +1559,7 @@ static enum path_treatment treat_path(struct dir_struct *dir, return path_none; dtype = DTYPE(de); - return treat_one_path(dir, untracked, path, simplify, dtype, de); + return treat_one_path(dir, untracked, path, baselen, simplify, dtype, de); } static void add_untracked(struct untracked_cache_dir *dir, const char *name) @@ -1827,7 +1829,7 @@ static int treat_leading_path(struct dir_struct *dir, break; if (simplify_away(sb.buf, sb.len, simplify)) break; - if (treat_one_path(dir, NULL, &sb, simplify, + if (treat_one_path(dir, NULL, &sb, baselen, simplify, DT_DIR, NULL) == path_none) break; /* do not recurse into it */ if (len <= baselen) { diff --git a/t/t7063-status-untracked-cache.sh b/t/t7063-status-untracked-cache.sh index 744e922..22393b9 100755 --- a/t/t7063-status-untracked-cache.sh +++ b/t/t7063-status-untracked-cache.sh @@ -408,7 +408,8 @@ test_expect_success 'set up sparse checkout' ' test_path_is_file done/one ' -test_expect_success 'create files, some of which are gitignored' ' +test_expect_success 'create/modify files, some of which are gitignored' ' + echo two bis >done/two && echo three >done/three && # three is gitignored echo four >done/four && # four is gitignored at a higher level echo five >done/five # five is not gitignored @@ -420,6 +421,7 @@ test_expect_success 'test sparse status with untracked cache' ' GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ git status --porcelain >../status.actual && cat >../status.expect <../status.actual && cat >
[PATCH v4 3/3] untracked cache: fix entry invalidation
First, the current code in untracked_cache_invalidate_path() is wrong because it can only handle paths "a" or "a/b", not "a/b/c" because lookup_untracked() only looks for entries directly under the given directory. In the last case, it will look for the entry "b/c" in directory "a" instead. This means if you delete or add an entry in a subdirectory, untracked cache may become out of date because it does not invalidate properly. This is noticed by David Turner. The second problem is about invalidation inside a fully untracked/excluded directory. In this case we may have to invalidate back to root. See the comment block for detail. Signed-off-by: Nguyễn Thái Ngọc Duy --- dir.c | 68 --- t/t7063-status-untracked-cache.sh | 28 +++- 2 files changed, 83 insertions(+), 13 deletions(-) diff --git a/dir.c b/dir.c index cd4ac77..c1edabf 100644 --- a/dir.c +++ b/dir.c @@ -2616,23 +2616,67 @@ done2: return uc; } +static void invalidate_one_directory(struct untracked_cache *uc, +struct untracked_cache_dir *ucd) +{ + uc->dir_invalidated++; + ucd->valid = 0; + ucd->untracked_nr = 0; +} + +/* + * Normally when an entry is added or removed from a directory, + * invalidating that directory is enough. No need to touch its + * ancestors. When a directory is shown as "foo/bar/" in git-status + * however, deleting or adding an entry may have cascading effect. + * + * Say the "foo/bar/file" has become untracked, we need to tell the + * untracked_cache_dir of "foo" that "bar/" is not an untracked + * directory any more (because "bar" is managed by foo as an untracked + * "file"). + * + * Similarly, if "foo/bar/file" moves from untracked to tracked and it + * was the last untracked entry in the entire "foo", we should show + * "foo/" instead. Which means we have to invalidate past "bar" up to + * "foo". + * + * This function traverses all directories from root to leaf. If there + * is a chance of one of the above cases happening, we invalidate back + * to root. Otherwise we just invalidate the leaf. There may be a more + * sophisticated way than checking for SHOW_OTHER_DIRECTORIES to + * detect these cases and avoid unnecessary invalidation, for example, + * checking for the untracked entry named "bar/" in "foo", but for now + * stick to something safe and simple. + */ +static int invalidate_one_component(struct untracked_cache *uc, + struct untracked_cache_dir *dir, + const char *path, int len) +{ + const char *rest = strchr(path, '/'); + + if (rest) { + int component_len = rest - path; + struct untracked_cache_dir *d = + lookup_untracked(uc, dir, path, component_len); + int ret = + invalidate_one_component(uc, d, rest + 1, +len - (component_len + 1)); + if (ret) + invalidate_one_directory(uc, dir); + return ret; + } + + invalidate_one_directory(uc, dir); + return uc->dir_flags & DIR_SHOW_OTHER_DIRECTORIES; +} + void untracked_cache_invalidate_path(struct index_state *istate, const char *path) { - const char *sep; - struct untracked_cache_dir *d; if (!istate->untracked || !istate->untracked->root) return; - sep = strrchr(path, '/'); - if (sep) - d = lookup_untracked(istate->untracked, -istate->untracked->root, -path, sep - path); - else - d = istate->untracked->root; - istate->untracked->dir_invalidated++; - d->valid = 0; - d->untracked_nr = 0; + invalidate_one_component(istate->untracked, istate->untracked->root, +path, strlen(path)); } void untracked_cache_remove_from_index(struct index_state *istate, diff --git a/t/t7063-status-untracked-cache.sh b/t/t7063-status-untracked-cache.sh index 22393b9..37a24c1 100755 --- a/t/t7063-status-untracked-cache.sh +++ b/t/t7063-status-untracked-cache.sh @@ -375,7 +375,7 @@ EOF node creation: 0 gitignore invalidation: 0 directory invalidation: 0 -opendir: 1 +opendir: 2 EOF test_cmp ../trace.expect ../trace ' @@ -543,4 +543,30 @@ EOF test_cmp ../trace.expect ../trace ' +test_expect_success 'move entry in subdir from untracked to cached' ' + git add dtwo/two && + git status --porcelain >../status.actual && + cat >../status.expect <../status.actual && + cat >../status.expect <http://vger.kernel.org/majordomo-info.html
[PATCH v4 04/10] grep/icase: avoid kwsset on literal non-ascii strings
When we detect the pattern is just a literal string, we avoid heavy regex engine and use fast substring search implemented in kwsset.c. But kws uses git-ctype which is locale-independent so it does not know how to fold case properly outside ascii range. Let regcomp or pcre take care of this case instead. Slower, but accurate. Helped-by: René Scharfe Noticed-by: Plamen Totev Signed-off-by: Nguyễn Thái Ngọc Duy --- grep.c | 7 ++- t/t7812-grep-icase-non-ascii.sh (new +x) | 23 +++ 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100755 t/t7812-grep-icase-non-ascii.sh diff --git a/grep.c b/grep.c index bd32f66..d795b0e 100644 --- a/grep.c +++ b/grep.c @@ -4,6 +4,7 @@ #include "xdiff-interface.h" #include "diff.h" #include "diffcore.h" +#include "commit.h" static int grep_source_load(struct grep_source *gs); static int grep_source_is_binary(struct grep_source *gs); @@ -398,12 +399,16 @@ static int is_fixed(const char *s, size_t len) static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { + int icase_non_ascii; int err; p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; + icase_non_ascii = + (opt->regflags & REG_ICASE || p->ignore_case) && + has_non_ascii(p->pattern); - if (is_fixed(p->pattern, p->patternlen)) + if (!icase_non_ascii && is_fixed(p->pattern, p->patternlen)) p->fixed = 1; else if (opt->fixed) { p->fixed = 1; diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh new file mode 100755 index 000..6eff490 --- /dev/null +++ b/t/t7812-grep-icase-non-ascii.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +test_description='grep icase on non-English locales' + +. ./lib-gettext.sh + +test_expect_success GETTEXT_LOCALE 'setup' ' + printf "TILRAUN: Halló Heimur!" >file && + git add file && + LC_ALL="$is_IS_locale" && + export LC_ALL +' + +test_have_prereq GETTEXT_LOCALE && +test-regex "HALLÓ" "Halló" ICASE && +test_set_prereq REGEX_LOCALE + +test_expect_success REGEX_LOCALE 'grep literal string, no -F' ' + git grep -i "TILRAUN: Halló Heimur!" && + git grep -i "TILRAUN: HALLÓ HEIMUR!" +' + +test_done -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 02/10] grep: break down an "if" stmt in preparation for next changes
Signed-off-by: Nguyễn Thái Ngọc Duy --- grep.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/grep.c b/grep.c index b58c7c6..bd32f66 100644 --- a/grep.c +++ b/grep.c @@ -403,9 +403,11 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; - if (opt->fixed || is_fixed(p->pattern, p->patternlen)) + if (is_fixed(p->pattern, p->patternlen)) p->fixed = 1; - else + else if (opt->fixed) { + p->fixed = 1; + } else p->fixed = 0; if (p->fixed) { -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 01/10] grep: allow -F -i combination
-F means "no regex", not "case sensitive" so it should not override -i Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/grep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/grep.c b/builtin/grep.c index d04f440..2d392e9 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -806,7 +806,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) if (!opt.pattern_list) die(_("no pattern given.")); - if (!opt.fixed && opt.ignore_case) + if (opt.ignore_case) opt.regflags |= REG_ICASE; compile_grep_patterns(&opt); -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 03/10] test-regex: expose full regcomp() to the command line
Signed-off-by: Nguyễn Thái Ngọc Duy --- test-regex.c | 56 ++-- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/test-regex.c b/test-regex.c index 0dc598e..3b5641c 100644 --- a/test-regex.c +++ b/test-regex.c @@ -1,19 +1,63 @@ #include "git-compat-util.h" +#include "gettext.h" + +struct reg_flag { + const char *name; + int flag; +}; + +static struct reg_flag reg_flags[] = { + { "EXTENDED",REG_EXTENDED }, + { "NEWLINE", REG_NEWLINE}, + { "ICASE", REG_ICASE }, + { "NOTBOL", REG_NOTBOL }, +#ifdef REG_STARTEND + { "STARTEND",REG_STARTEND }, +#endif + { NULL, 0 } +}; int main(int argc, char **argv) { - char *pat = "[^={} \t]+"; - char *str = "={}\nfred"; + const char *pat; + const char *str; + int flags = 0; regex_t r; regmatch_t m[1]; - if (regcomp(&r, pat, REG_EXTENDED | REG_NEWLINE)) + if (argc == 1) { + /* special case, bug check */ + pat = "[^={} \t]+"; + str = "={}\nfred"; + flags = REG_EXTENDED | REG_NEWLINE; + } else { + argv++; + pat = *argv++; + str = *argv++; + while (*argv) { + struct reg_flag *rf; + for (rf = reg_flags; rf->name; rf++) + if (!strcmp(*argv, rf->name)) { + flags |= rf->flag; + break; + } + if (!rf->name) + die("do not recognize %s", *argv); + argv++; + } + git_setup_gettext(); + } + + if (regcomp(&r, pat, flags)) die("failed regcomp() for pattern '%s'", pat); - if (regexec(&r, str, 1, m, 0)) - die("no match of pattern '%s' to string '%s'", pat, str); + if (regexec(&r, str, 1, m, 0)) { + if (argc == 1) + die("no match of pattern '%s' to string '%s'", pat, str); + return 1; + } /* http://sourceware.org/bugzilla/show_bug.cgi?id=3957 */ - if (m[0].rm_so == 3) /* matches '\n' when it should not */ + if (argc == 1 && m[0].rm_so == 3) /* matches '\n' when it should not */ die("regex bug confirmed: re-build git with NO_REGEX=1"); exit(0); -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 00/10] icase match on non-ascii
Sorry it took more than a month for a simple reroll. Free time (with energy left) is rare these days. v4 adds system regex's icase support detection and only runs tests in these cases. This should fix test failures on Windows where compat regex does not support icase. Diff from v3 below diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh index d07fa20..a5475bb 100755 --- a/t/t7812-grep-icase-non-ascii.sh +++ b/t/t7812-grep-icase-non-ascii.sh @@ -11,7 +11,11 @@ test_expect_success GETTEXT_LOCALE 'setup' ' export LC_ALL ' -test_expect_success GETTEXT_LOCALE 'grep literal string, no -F' ' +test_have_prereq GETTEXT_LOCALE && +test-regex "HALLÓ" "Halló" ICASE && +test_set_prereq REGEX_LOCALE + +test_expect_success REGEX_LOCALE 'grep literal string, no -F' ' git grep -i "TILRAUN: Halló Heimur!" && git grep -i "TILRAUN: HALLÓ HEIMUR!" ' @@ -31,7 +35,7 @@ test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre utf-8 string with "+"' ' test_cmp expected actual ' -test_expect_success GETTEXT_LOCALE 'grep literal string, with -F' ' +test_expect_success REGEX_LOCALE 'grep literal string, with -F' ' git grep --debug -i -F "TILRAUN: Halló Heimur!" 2>&1 >/dev/null | grep fixed >debug1 && echo "fixedTILRAUN: Halló Heimur!" >expect1 && @@ -43,7 +47,7 @@ test_expect_success GETTEXT_LOCALE 'grep literal string, with -F' ' test_cmp expect2 debug2 ' -test_expect_success GETTEXT_LOCALE 'grep string with regex, with -F' ' +test_expect_success REGEX_LOCALE 'grep string with regex, with -F' ' printf "^*TILR^AUN:.* \\Halló \$He[]imur!\$" >file && git grep --debug -i -F "^*TILR^AUN:.* \\Halló \$He[]imur!\$" 2>&1 >/dev/null | @@ -57,7 +61,7 @@ test_expect_success GETTEXT_LOCALE 'grep string with regex, with -F' ' test_cmp expect2 debug2 ' -test_expect_success GETTEXT_LOCALE 'pickaxe -i on non-ascii' ' +test_expect_success REGEX_LOCALE 'pickaxe -i on non-ascii' ' git commit -m first && git log --format=%f -i -S"TILRAUN: HALLÓ HEIMUR!" >actual && echo first >expected && diff --git a/test-regex.c b/test-regex.c index 0dc598e..3b5641c 100644 --- a/test-regex.c +++ b/test-regex.c @@ -1,19 +1,63 @@ #include "git-compat-util.h" +#include "gettext.h" + +struct reg_flag { + const char *name; + int flag; +}; + +static struct reg_flag reg_flags[] = { + { "EXTENDED",REG_EXTENDED }, + { "NEWLINE", REG_NEWLINE}, + { "ICASE", REG_ICASE }, + { "NOTBOL", REG_NOTBOL }, +#ifdef REG_STARTEND + { "STARTEND",REG_STARTEND }, +#endif + { NULL, 0 } +}; int main(int argc, char **argv) { - char *pat = "[^={} \t]+"; - char *str = "={}\nfred"; + const char *pat; + const char *str; + int flags = 0; regex_t r; regmatch_t m[1]; - if (regcomp(&r, pat, REG_EXTENDED | REG_NEWLINE)) + if (argc == 1) { + /* special case, bug check */ + pat = "[^={} \t]+"; + str = "={}\nfred"; + flags = REG_EXTENDED | REG_NEWLINE; + } else { + argv++; + pat = *argv++; + str = *argv++; + while (*argv) { + struct reg_flag *rf; + for (rf = reg_flags; rf->name; rf++) + if (!strcmp(*argv, rf->name)) { + flags |= rf->flag; + break; + } + if (!rf->name) + die("do not recognize %s", *argv); + argv++; + } + git_setup_gettext(); + } + + if (regcomp(&r, pat, flags)) die("failed regcomp() for pattern '%s'", pat); - if (regexec(&r, str, 1, m, 0)) - die("no match of pattern '%s' to string '%s'", pat, str); + if (regexec(&r, str, 1, m, 0)) { + if (argc == 1) + die("no match of pattern '%s' to string '%s'", pat, str); + return 1; + } /* http://sourceware.org/bugzilla/show_bug.cgi?id=3957 */ - if (m[0].rm_so == 3) /* matches '\n' when it should not */ + if (argc == 1 && m[0].rm_so == 3) /* matches '\n' when it should not */ die("regex bug confirmed: re-build git with NO_REGEX=1"); exit(0); -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 05/10] grep/icase: avoid kwsset when -F is specified
Similar to the previous commit, we can't use kws on icase search outside ascii range. But we can't simply pass the pattern to regcomp/pcre like the previous commit because it may contain regex special characters, so we need to quote the regex first. To avoid misquote traps that could lead to undefined behavior, we always stick to basic regex engine in this case. We don't need fancy features for grepping a literal string anyway. basic_regex_quote_buf() assumes that if the pattern is in a multibyte encoding, ascii chars must be unambiguously encoded as single bytes. This is true at least for UTF-8. For others, let's wait until people yell up. Chances are nobody uses multibyte, non utf-8 charsets any more.. Helped-by: René Scharfe Noticed-by: Plamen Totev Signed-off-by: Nguyễn Thái Ngọc Duy --- grep.c | 25 - quote.c | 37 + quote.h | 1 + t/t7812-grep-icase-non-ascii.sh | 26 ++ 4 files changed, 88 insertions(+), 1 deletion(-) diff --git a/grep.c b/grep.c index d795b0e..8fce54f 100644 --- a/grep.c +++ b/grep.c @@ -5,6 +5,7 @@ #include "diff.h" #include "diffcore.h" #include "commit.h" +#include "quote.h" static int grep_source_load(struct grep_source *gs); static int grep_source_is_binary(struct grep_source *gs); @@ -397,6 +398,24 @@ static int is_fixed(const char *s, size_t len) return 1; } +static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) +{ + struct strbuf sb = STRBUF_INIT; + int err; + + basic_regex_quote_buf(&sb, p->pattern); + err = regcomp(&p->regexp, sb.buf, opt->regflags & ~REG_EXTENDED); + if (opt->debug) + fprintf(stderr, "fixed%s\n", sb.buf); + strbuf_release(&sb); + if (err) { + char errbuf[1024]; + regerror(err, &p->regexp, errbuf, 1024); + regfree(&p->regexp); + compile_regexp_failed(p, errbuf); + } +} + static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { int icase_non_ascii; @@ -411,7 +430,11 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) if (!icase_non_ascii && is_fixed(p->pattern, p->patternlen)) p->fixed = 1; else if (opt->fixed) { - p->fixed = 1; + p->fixed = !icase_non_ascii; + if (!p->fixed) { + compile_fixed_regexp(p, opt); + return; + } } else p->fixed = 0; diff --git a/quote.c b/quote.c index 7920e18..43a8057 100644 --- a/quote.c +++ b/quote.c @@ -439,3 +439,40 @@ void tcl_quote_buf(struct strbuf *sb, const char *src) } strbuf_addch(sb, '"'); } + +void basic_regex_quote_buf(struct strbuf *sb, const char *src) +{ + char c; + + if (*src == '^') { + /* only beginning '^' is special and needs quoting */ + strbuf_addch(sb, '\\'); + strbuf_addch(sb, *src++); + } + if (*src == '*') + /* beginning '*' is not special, no quoting */ + strbuf_addch(sb, *src++); + + while ((c = *src++)) { + switch (c) { + case '[': + case '.': + case '\\': + case '*': + strbuf_addch(sb, '\\'); + strbuf_addch(sb, c); + break; + + case '$': + /* only the end '$' is special and needs quoting */ + if (*src == '\0') + strbuf_addch(sb, '\\'); + strbuf_addch(sb, c); + break; + + default: + strbuf_addch(sb, c); + break; + } + } +} diff --git a/quote.h b/quote.h index 99e04d3..362d315 100644 --- a/quote.h +++ b/quote.h @@ -67,5 +67,6 @@ extern char *quote_path_relative(const char *in, const char *prefix, extern void perl_quote_buf(struct strbuf *sb, const char *src); extern void python_quote_buf(struct strbuf *sb, const char *src); extern void tcl_quote_buf(struct strbuf *sb, const char *src); +extern void basic_regex_quote_buf(struct strbuf *sb, const char *src); #endif diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh index 6eff490..aba6b15 100755 --- a/t/t7812-grep-icase-non-ascii.sh +++ b/t/t7812-grep-icase-non-ascii.sh @@ -20,4 +20,30 @@ test_expect_success REGEX_LOCALE 'grep lite
[PATCH v4 06/10] grep/pcre: prepare locale-dependent tables for icase matching
The default tables are usually built with C locale and only suitable for LANG=C or similar. This should make case insensitive search work correctly for all single-byte charsets. Signed-off-by: Nguyễn Thái Ngọc Duy --- grep.c | 8 ++-- grep.h | 1 + t/t7813-grep-icase-iso.sh (new +x) | 19 +++ 3 files changed, 26 insertions(+), 2 deletions(-) create mode 100755 t/t7813-grep-icase-iso.sh diff --git a/grep.c b/grep.c index 8fce54f..f0fbf99 100644 --- a/grep.c +++ b/grep.c @@ -324,11 +324,14 @@ static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) int erroffset; int options = PCRE_MULTILINE; - if (opt->ignore_case) + if (opt->ignore_case) { + if (has_non_ascii(p->pattern)) + p->pcre_tables = pcre_maketables(); options |= PCRE_CASELESS; + } p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset, - NULL); + p->pcre_tables); if (!p->pcre_regexp) compile_regexp_failed(p, error); @@ -362,6 +365,7 @@ static void free_pcre_regexp(struct grep_pat *p) { pcre_free(p->pcre_regexp); pcre_free(p->pcre_extra_info); + pcre_free((void *)p->pcre_tables); } #else /* !USE_LIBPCRE */ static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) diff --git a/grep.h b/grep.h index 95f197a..cee4357 100644 --- a/grep.h +++ b/grep.h @@ -48,6 +48,7 @@ struct grep_pat { regex_t regexp; pcre *pcre_regexp; pcre_extra *pcre_extra_info; + const unsigned char *pcre_tables; kwset_t kws; unsigned fixed:1; unsigned ignore_case:1; diff --git a/t/t7813-grep-icase-iso.sh b/t/t7813-grep-icase-iso.sh new file mode 100755 index 000..efef7fb --- /dev/null +++ b/t/t7813-grep-icase-iso.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +test_description='grep icase on non-English locales' + +. ./lib-gettext.sh + +test_expect_success GETTEXT_ISO_LOCALE 'setup' ' + printf "TILRAUN: Hall� Heimur!" >file && + git add file && + LC_ALL="$is_IS_iso_locale" && + export LC_ALL +' + +test_expect_success GETTEXT_ISO_LOCALE,LIBPCRE 'grep pcre string' ' + git grep --perl-regexp -i "TILRAUN: H.ll� Heimur!" && + git grep --perl-regexp -i "TILRAUN: H.LL� HEIMUR!" +' + +test_done -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 07/10] gettext: add is_utf8_locale()
This function returns true if git is running under an UTF-8 locale. pcre in the next patch will need this. is_encoding_utf8() is used instead of strcmp() to catch both "utf-8" and "utf8" suffixes. When built with no gettext support, we peek in several env variables to detect UTF-8. pcre library might support utf-8 even if libc is built without locale support.. The peeking code is a copy from compat/regex/regcomp.c Helped-by: Ævar Arnfjörð Bjarmason Signed-off-by: Nguyễn Thái Ngọc Duy --- gettext.c | 24 ++-- gettext.h | 1 + 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/gettext.c b/gettext.c index a268a2c..db727ea 100644 --- a/gettext.c +++ b/gettext.c @@ -18,6 +18,8 @@ # endif #endif +static const char *charset; + /* * Guess the user's preferred languages from the value in LANGUAGE environment * variable and LC_MESSAGES locale category if NO_GETTEXT is not defined. @@ -65,7 +67,6 @@ static int test_vsnprintf(const char *fmt, ...) return ret; } -static const char *charset; static void init_gettext_charset(const char *domain) { /* @@ -172,8 +173,27 @@ int gettext_width(const char *s) { static int is_utf8 = -1; if (is_utf8 == -1) - is_utf8 = !strcmp(charset, "UTF-8"); + is_utf8 = is_utf8_locale(); return is_utf8 ? utf8_strwidth(s) : strlen(s); } #endif + +int is_utf8_locale(void) +{ +#ifdef NO_GETTEXT + if (!charset) { + const char *env = getenv("LC_ALL"); + if (!env || !*env) + env = getenv("LC_CTYPE"); + if (!env || !*env) + env = getenv("LANG"); + if (!env) + env = ""; + if (strchr(env, '.')) + env = strchr(env, '.') + 1; + charset = xstrdup(env); + } +#endif + return is_encoding_utf8(charset); +} diff --git a/gettext.h b/gettext.h index 33696a4..7eee64a 100644 --- a/gettext.h +++ b/gettext.h @@ -90,5 +90,6 @@ const char *Q_(const char *msgid, const char *plu, unsigned long n) #endif const char *get_preferred_languages(void); +extern int is_utf8_locale(void); #endif -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 08/10] grep/pcre: support utf-8
In the previous change in this function, we add locale support for single-byte encodings only. It looks like pcre only supports utf-* as multibyte encodings, the others are left in the cold (which is fine). We need to enable PCRE_UTF8 so pcre can find character boundary correctly. It's needed for case folding (when --ignore-case is used) or '*', '+' or similar syntax is used. The "has_non_ascii()" check is to be on the conservative side. If there's non-ascii in the pattern, the searched content could still be in utf-8, but we can treat it just like a byte stream and everything should work. If we force utf-8 based on locale only and pcre validates utf-8 and the file content is in non-utf8 encoding, things break. Noticed-by: Plamen Totev Helped-by: Plamen Totev Signed-off-by: Nguyễn Thái Ngọc Duy --- grep.c | 2 ++ t/t7812-grep-icase-non-ascii.sh | 15 +++ 2 files changed, 17 insertions(+) diff --git a/grep.c b/grep.c index f0fbf99..07621c1 100644 --- a/grep.c +++ b/grep.c @@ -329,6 +329,8 @@ static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) p->pcre_tables = pcre_maketables(); options |= PCRE_CASELESS; } + if (is_utf8_locale() && has_non_ascii(p->pattern)) + options |= PCRE_UTF8; p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset, p->pcre_tables); diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh index aba6b15..8896410 100755 --- a/t/t7812-grep-icase-non-ascii.sh +++ b/t/t7812-grep-icase-non-ascii.sh @@ -20,6 +20,21 @@ test_expect_success REGEX_LOCALE 'grep literal string, no -F' ' git grep -i "TILRAUN: HALLÓ HEIMUR!" ' +test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre utf-8 icase' ' + git grep --perl-regexp"TILRAUN: H.lló Heimur!" && + git grep --perl-regexp -i "TILRAUN: H.lló Heimur!" && + git grep --perl-regexp -i "TILRAUN: H.LLÓ HEIMUR!" +' + +test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre utf-8 string with "+"' ' + printf "TILRAUN: Hallóó Heimur!" >file2 && + git add file2 && + git grep -l --perl-regexp "TILRAUN: H.lló+ Heimur!" >actual && + echo file >expected && + echo file2 >>expected && + test_cmp expected actual +' + test_expect_success REGEX_LOCALE 'grep literal string, with -F' ' git grep --debug -i -F "TILRAUN: Halló Heimur!" 2>&1 >/dev/null | grep fixed >debug1 && -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 10/10] diffcore-pickaxe: support case insensitive match on non-ascii
Similar to the "grep -F -i" case, we can't use kws on icase search outside ascii range, so we quote the string and pass it to regcomp as a basic regexp and let regex engine deal with case sensitivity. The new test is put in t7812 instead of t4209-log-pickaxe because lib-gettext.sh might cause problems elsewhere, probably.. Noticed-by: Plamen Totev Signed-off-by: Nguyễn Thái Ngọc Duy --- diffcore-pickaxe.c | 11 +++ t/t7812-grep-icase-non-ascii.sh | 7 +++ 2 files changed, 18 insertions(+) diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index 7a718fc..6946c15 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -7,6 +7,8 @@ #include "diffcore.h" #include "xdiff-interface.h" #include "kwset.h" +#include "commit.h" +#include "quote.h" typedef int (*pickaxe_fn)(mmfile_t *one, mmfile_t *two, struct diff_options *o, @@ -212,6 +214,15 @@ void diffcore_pickaxe(struct diff_options *o) cflags |= REG_ICASE; err = regcomp(®ex, needle, cflags); regexp = ®ex; + } else if (DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE) && + has_non_ascii(needle)) { + struct strbuf sb = STRBUF_INIT; + int cflags = REG_NEWLINE | REG_ICASE; + + basic_regex_quote_buf(&sb, needle); + err = regcomp(®ex, sb.buf, cflags); + strbuf_release(&sb); + regexp = ®ex; } else { kws = kwsalloc(DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE) ? tolower_trans_tbl : NULL); diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh index 8896410..a5475bb 100755 --- a/t/t7812-grep-icase-non-ascii.sh +++ b/t/t7812-grep-icase-non-ascii.sh @@ -61,4 +61,11 @@ test_expect_success REGEX_LOCALE 'grep string with regex, with -F' ' test_cmp expect2 debug2 ' +test_expect_success REGEX_LOCALE 'pickaxe -i on non-ascii' ' + git commit -m first && + git log --format=%f -i -S"TILRAUN: HALLÓ HEIMUR!" >actual && + echo first >expected && + test_cmp expected actual +' + test_done -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 09/10] diffcore-pickaxe: "share" regex error handling code
There's another regcomp code block coming in this function. By moving the error handling code out of this block, we don't have to add the same error handling code in the new block. Signed-off-by: Nguyễn Thái Ngọc Duy --- diffcore-pickaxe.c | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index 185f86b..7a718fc 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -204,20 +204,13 @@ void diffcore_pickaxe(struct diff_options *o) int opts = o->pickaxe_opts; regex_t regex, *regexp = NULL; kwset_t kws = NULL; + int err = 0; if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) { - int err; int cflags = REG_EXTENDED | REG_NEWLINE; if (DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE)) cflags |= REG_ICASE; err = regcomp(®ex, needle, cflags); - if (err) { - /* The POSIX.2 people are surely sick */ - char errbuf[1024]; - regerror(err, ®ex, errbuf, 1024); - regfree(®ex); - die("invalid regex: %s", errbuf); - } regexp = ®ex; } else { kws = kwsalloc(DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE) @@ -225,6 +218,13 @@ void diffcore_pickaxe(struct diff_options *o) kwsincr(kws, needle, strlen(needle)); kwsprep(kws); } + if (err) { + /* The POSIX.2 people are surely sick */ + char errbuf[1024]; + regerror(err, ®ex, errbuf, 1024); + regfree(®ex); + die("invalid regex: %s", errbuf); + } /* Might want to warn when both S and G are on; I don't care... */ pickaxe(&diff_queued_diff, o, regexp, kws, -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 1/5] path.c: delete an extra space
Signed-off-by: Nguyễn Thái Ngọc Duy --- path.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/path.c b/path.c index 95acbaf..a536ee3 100644 --- a/path.c +++ b/path.c @@ -431,7 +431,7 @@ const char *enter_repo(const char *path, int strict) } if (!suffix[i]) return NULL; - gitfile = read_gitfile(used_path) ; + gitfile = read_gitfile(used_path); if (gitfile) strcpy(used_path, gitfile); if (chdir(used_path)) -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 2/5] enter_repo: avoid duplicating logic, use is_git_directory() instead
Signed-off-by: Nguyễn Thái Ngọc Duy --- path.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/path.c b/path.c index a536ee3..7340e11 100644 --- a/path.c +++ b/path.c @@ -441,8 +441,7 @@ const char *enter_repo(const char *path, int strict) else if (chdir(path)) return NULL; - if (access("objects", X_OK) == 0 && access("refs", X_OK) == 0 && - validate_headref("HEAD") == 0) { + if (is_git_directory(".")) { set_git_dir("."); check_repository_format(); return path; -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 3/5] enter_repo: allow .git files in strict mode
Strict mode is about not guessing where .git is. If the user points to a .git file, we know exactly where the target .git dir will be. Signed-off-by: Nguyễn Thái Ngọc Duy --- path.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/path.c b/path.c index 7340e11..32d4ca6 100644 --- a/path.c +++ b/path.c @@ -438,8 +438,13 @@ const char *enter_repo(const char *path, int strict) return NULL; path = validated_path; } - else if (chdir(path)) - return NULL; + else { + const char *gitfile = read_gitfile(used_path); + if (gitfile) + path = gitfile; + if (chdir(path)) + return NULL; + } if (is_git_directory(".")) { set_git_dir("."); -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 4/5] clone: allow --local from a linked checkout
Noticed-by: Bjørnar Snoksrud Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/clone.c | 6 -- t/t2025-worktree-add.sh | 5 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 578da85..836fb64 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -424,8 +424,10 @@ static void clone_local(const char *src_repo, const char *dest_repo) } else { struct strbuf src = STRBUF_INIT; struct strbuf dest = STRBUF_INIT; - strbuf_addf(&src, "%s/objects", src_repo); - strbuf_addf(&dest, "%s/objects", dest_repo); + get_common_dir(&src, src_repo); + get_common_dir(&dest, dest_repo); + strbuf_addstr(&src, "/objects"); + strbuf_addf(&dest, "/objects"); copy_or_link_directory(&src, &dest, src_repo, src.len); strbuf_release(&src); strbuf_release(&dest); diff --git a/t/t2025-worktree-add.sh b/t/t2025-worktree-add.sh index 8267411..3694174 100755 --- a/t/t2025-worktree-add.sh +++ b/t/t2025-worktree-add.sh @@ -193,4 +193,9 @@ test_expect_success '"add" -B/--detach mutually exclusive' ' test_must_fail git worktree add -B poodle --detach bamboo master ' +test_expect_success 'local clone from linked checkout' ' + git clone --local here here-clone && + ( cd here-clone && git fsck ) +' + test_done -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 0/5] fix local clone from a linked checkout
On Wed, Jul 15, 2015 at 8:25 PM, Duy Nguyen wrote: > On Wed, Jul 15, 2015 at 11:40:18AM +0200, Bjørnar Snoksrud wrote: >> I reported this before, but now I have a nice topic to hang it on - >> >> I have re-reproduced the bug using a build from master as of today, >> using the new worktree commands. > > Something like the following patch should work if you need it now. > > Because this may conflict (in the test cases) with Eric's series to > move "git checkout --to" to "git worktree add", and because the next > release is already delayed to let "git worktree add" in, I think we > could keep this patch out of tree for now. I will split it up, add > tests and resubmit once the release is out. Please remind me if you > see nothing from me for too long. Here it is. Mostly the same as the previous patch except that the last patch is new. Nguyễn Thái Ngọc Duy (5): path.c: delete an extra space enter_repo: avoid duplicating logic, use is_git_directory() instead enter_repo: allow .git files in strict mode clone: allow --local from a linked checkout clone: better error when --reference is a linked checkout builtin/clone.c | 13 ++--- path.c | 14 +- t/t2025-worktree-add.sh | 5 + 3 files changed, 24 insertions(+), 8 deletions(-) -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 5/5] clone: better error when --reference is a linked checkout
Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/clone.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/builtin/clone.c b/builtin/clone.c index 836fb64..7a010bb 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -294,9 +294,14 @@ static int add_one_reference(struct string_list_item *item, void *cb_data) char *ref_git_git = mkpathdup("%s/.git", ref_git); free(ref_git); ref_git = ref_git_git; - } else if (!is_directory(mkpath("%s/objects", ref_git))) + } else if (!is_directory(mkpath("%s/objects", ref_git))) { + struct strbuf sb = STRBUF_INIT; + if (get_common_dir(&sb, ref_git)) + die(_("reference repository '%s' as a linked checkout is not supported yet."), + item->string); die(_("reference repository '%s' is not a local repository."), item->string); + } if (!access(mkpath("%s/shallow", ref_git), F_OK)) die(_("reference repository '%s' is shallow"), item->string); -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/8] Resurrect "diff-lib.c: adjust position of i-t-a entries in diff"
On Thu, Jun 25, 2015 at 8:07 PM, Junio C Hamano wrote: >>> Perhaps a good and safe way forward to resurrect what d95d728a >>> wanted to do is to first add an option to tell run_diff_index() and >>> run_diff_files() which behaviour the caller wants to see, add that >>> only to the caller in wt-status.c? Then incrementally pass that >>> option from more callsites that we are absolutely certain that want >>> this different worldview with respect to i-t-a? >> >> Agreed. > > OK. Perhaps then first I should do that revert and we'll > incrementally rebuild on top. Here comes the rebuild. This adds --shift-ita option (and an internal flag) to enable this behavior. Those only take the last two patches. The remaining is to make sure we handle i-t-a entries correctly in some commands. Nguyễn Thái Ngọc Duy (8): blame: remove obsolete comment Add and use convenient macro ce_intent_to_add() apply: fix adding new files on i-t-a entries apply: make sure check_preimage() does not leave empty file on error checkout(-index): do not checkout i-t-a entries grep: make it clear i-t-a entries are ignored diff.h: extend "flags" field to 64 bits because we're out of bits Resurrect "diff-lib.c: adjust position of i-t-a entries in diff" Documentation/diff-options.txt | 6 +++ builtin/apply.c| 13 --- builtin/blame.c| 5 --- builtin/checkout-index.c | 5 ++- builtin/checkout.c | 2 + builtin/commit.c | 2 +- builtin/grep.c | 2 +- builtin/rm.c | 2 +- cache-tree.c | 2 +- cache.h| 1 + diff-lib.c | 18 - diff.c | 4 +- diff.h | 9 +++-- read-cache.c | 4 +- t/t2203-add-intent.sh | 83 +- wt-status.c| 2 + 16 files changed, 134 insertions(+), 26 deletions(-) -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/8] Add and use convenient macro ce_intent_to_add()
Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/rm.c | 2 +- cache-tree.c | 2 +- cache.h | 1 + read-cache.c | 4 ++-- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/builtin/rm.c b/builtin/rm.c index 80b972f..8829b09 100644 --- a/builtin/rm.c +++ b/builtin/rm.c @@ -211,7 +211,7 @@ static int check_local_mod(unsigned char *head, int index_only) * "intent to add" entry. */ if (local_changes && staged_changes) { - if (!index_only || !(ce->ce_flags & CE_INTENT_TO_ADD)) + if (!index_only || !ce_intent_to_add(ce)) string_list_append(&files_staged, name); } else if (!index_only) { diff --git a/cache-tree.c b/cache-tree.c index feace8b..c865bd2 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -377,7 +377,7 @@ static int update_one(struct cache_tree *it, * they are not part of generated trees. Invalidate up * to root to force cache-tree users to read elsewhere. */ - if (ce->ce_flags & CE_INTENT_TO_ADD) { + if (ce_intent_to_add(ce)) { to_invalidate = 1; continue; } diff --git a/cache.h b/cache.h index 4e25271..41d811c 100644 --- a/cache.h +++ b/cache.h @@ -241,6 +241,7 @@ static inline unsigned create_ce_flags(unsigned stage) #define ce_uptodate(ce) ((ce)->ce_flags & CE_UPTODATE) #define ce_skip_worktree(ce) ((ce)->ce_flags & CE_SKIP_WORKTREE) #define ce_mark_uptodate(ce) ((ce)->ce_flags |= CE_UPTODATE) +#define ce_intent_to_add(ce) ((ce)->ce_flags & CE_INTENT_TO_ADD) #define ce_permissions(mode) (((mode) & 0100) ? 0755 : 0644) static inline unsigned int create_ce_mode(unsigned int mode) diff --git a/read-cache.c b/read-cache.c index 89dbc08..f849cbd 100644 --- a/read-cache.c +++ b/read-cache.c @@ -327,7 +327,7 @@ int ie_match_stat(const struct index_state *istate, * by definition never matches what is in the work tree until it * actually gets added. */ - if (ce->ce_flags & CE_INTENT_TO_ADD) + if (ce_intent_to_add(ce)) return DATA_CHANGED | TYPE_CHANGED | MODE_CHANGED; changed = ce_match_stat_basic(ce, st); @@ -1251,7 +1251,7 @@ int refresh_index(struct index_state *istate, unsigned int flags, if (cache_errno == ENOENT) fmt = deleted_fmt; - else if (ce->ce_flags & CE_INTENT_TO_ADD) + else if (ce_intent_to_add(ce)) fmt = added_fmt; /* must be before other checks */ else if (changed & TYPE_CHANGED) fmt = typechange_fmt; -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/8] apply: fix adding new files on i-t-a entries
Applying a patch that adds a file when that file is registered with "git add -N" will fail with message "already exists in index" because git-apply checks, sees those i-t-a entries and aborts. git-apply does not realize those are for bookkeeping only, they do not really exist in the index. This patch tightens the "exists in index" check, ignoring i-t-a entries. Reported-by: Patrick Higgins Reported-by: Bjørnar Snoksrud Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/apply.c | 9 + t/t2203-add-intent.sh | 13 + 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/builtin/apply.c b/builtin/apply.c index 54aba4e..76b58a1 100644 --- a/builtin/apply.c +++ b/builtin/apply.c @@ -3553,10 +3553,11 @@ static int check_to_create(const char *new_name, int ok_if_exists) { struct stat nst; - if (check_index && - cache_name_pos(new_name, strlen(new_name)) >= 0 && - !ok_if_exists) - return EXISTS_IN_INDEX; + if (check_index && !ok_if_exists) { + int pos = cache_name_pos(new_name, strlen(new_name)); + if (pos >= 0 && !ce_intent_to_add(active_cache[pos])) + return EXISTS_IN_INDEX; + } if (cached) return 0; diff --git a/t/t2203-add-intent.sh b/t/t2203-add-intent.sh index 2a4a749..bb5ef2b 100755 --- a/t/t2203-add-intent.sh +++ b/t/t2203-add-intent.sh @@ -82,5 +82,18 @@ test_expect_success 'cache-tree invalidates i-t-a paths' ' test_cmp expect actual ' +test_expect_success 'apply adds new file on i-t-a entry' ' + git init apply && + ( + cd apply && + echo newcontent >newfile && + git add newfile && + git diff --cached >patch && + rm .git/index && + git add -N newfile && + git apply --cached patch + ) +' + test_done -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/8] apply: make sure check_preimage() does not leave empty file on error
The test case probably describes the test scenario the best. We have a patch to modify some file but the base file is gone. Because check_preimage() finds an index entry with the same old_name, it tries to restore the on-disk base file with cached content with checkout_target() and move on. If this old_name is i-t-a, before this patch "apply -3" will call checkout_target() which will write an empty file (i-t-a entries always have "empty blob" SHA-1), then it'll fail at verify_index_match() (i-t-a entries are always "modified") and the operation is aborted. This empty file should not be created. Compare to the case where old_name does not exist in index, "apply -3" fails with a different error message "...: does not exist" but it does not touch worktree. This patch makes sure the same happens to i-t-a entries. load_current() shares the same code pattern (look up an index entry, if on-disk version is not found, restore using the cached version). Fix it too (even though it's not exercised in any test case) Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/apply.c | 4 ++-- t/t2203-add-intent.sh | 16 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/builtin/apply.c b/builtin/apply.c index 76b58a1..b185c97 100644 --- a/builtin/apply.c +++ b/builtin/apply.c @@ -3348,7 +3348,7 @@ static int load_current(struct image *image, struct patch *patch) die("BUG: patch to %s is not a creation", patch->old_name); pos = cache_name_pos(name, strlen(name)); - if (pos < 0) + if (pos < 0 || ce_intent_to_add(active_cache[pos])) return error(_("%s: does not exist in index"), name); ce = active_cache[pos]; if (lstat(name, &st)) { @@ -3501,7 +3501,7 @@ static int check_preimage(struct patch *patch, struct cache_entry **ce, struct s if (check_index && !previous) { int pos = cache_name_pos(old_name, strlen(old_name)); - if (pos < 0) { + if (pos < 0 || ce_intent_to_add(active_cache[pos])) { if (patch->is_new < 0) goto is_new; return error(_("%s: does not exist in index"), old_name); diff --git a/t/t2203-add-intent.sh b/t/t2203-add-intent.sh index bb5ef2b..96c8755 100755 --- a/t/t2203-add-intent.sh +++ b/t/t2203-add-intent.sh @@ -95,5 +95,21 @@ test_expect_success 'apply adds new file on i-t-a entry' ' ) ' +test_expect_success 'apply:check_preimage() not creating empty file' ' + git init check-preimage && + ( + cd check-preimage && + echo oldcontent >newfile && + git add newfile && + echo newcontent >newfile && + git diff >patch && + rm .git/index && + git add -N newfile && + rm newfile && + test_must_fail git apply -3 patch && + ! test -f newfile + ) +' + test_done -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/8] blame: remove obsolete comment
That "someday" in the comment happened two years later in b65982b (Optimize "diff-index --cached" using cache-tree - 2009-05-20) Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/blame.c | 5 - 1 file changed, 5 deletions(-) diff --git a/builtin/blame.c b/builtin/blame.c index 4db01c1..942f302 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -2379,11 +2379,6 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt, ce->ce_mode = create_ce_mode(mode); add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE); - /* -* We are not going to write this out, so this does not matter -* right now, but someday we might optimize diff-index --cached -* with cache-tree information. -*/ cache_tree_invalidate_path(&the_index, path); return commit; -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 5/8] checkout(-index): do not checkout i-t-a entries
The cached blob of i-t-a entries are empty blob. By checkout, we delete the content we have. Don't do it. This is done higher up instead of inside checkout_entry() because we would have limited options in there: silently ignore, loudly ignore, die. At higher level we can do better reporting. For example, "git checkout -- foo" will complain that "foo" does not match pathspec, just like when "foo" is not registered with "git add -N" Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/checkout-index.c | 5 - builtin/checkout.c | 2 ++ t/t2203-add-intent.sh| 34 ++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/builtin/checkout-index.c b/builtin/checkout-index.c index 8028c37..eca975d 100644 --- a/builtin/checkout-index.c +++ b/builtin/checkout-index.c @@ -56,7 +56,8 @@ static int checkout_file(const char *name, const char *prefix) while (pos < active_nr) { struct cache_entry *ce = active_cache[pos]; if (ce_namelen(ce) != namelen || - memcmp(ce->name, name, namelen)) + memcmp(ce->name, name, namelen) || + ce_intent_to_add(ce)) break; has_same_name = 1; pos++; @@ -99,6 +100,8 @@ static void checkout_all(const char *prefix, int prefix_length) if (ce_stage(ce) != checkout_stage && (CHECKOUT_ALL != checkout_stage || !ce_stage(ce))) continue; + if (ce_intent_to_add(ce)) + continue; if (prefix && *prefix && (ce_namelen(ce) <= prefix_length || memcmp(prefix, ce->name, prefix_length))) diff --git a/builtin/checkout.c b/builtin/checkout.c index e1403be..02889d4 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -300,6 +300,8 @@ static int checkout_paths(const struct checkout_opts *opts, * anything to this entry at all. */ continue; + if (ce_intent_to_add(ce)) + continue; /* * Either this entry came from the tree-ish we are * checking the paths out of, or we are checking out diff --git a/t/t2203-add-intent.sh b/t/t2203-add-intent.sh index 96c8755..d0f36a4 100755 --- a/t/t2203-add-intent.sh +++ b/t/t2203-add-intent.sh @@ -111,5 +111,39 @@ test_expect_success 'apply:check_preimage() not creating empty file' ' ) ' +test_expect_success 'checkout ignores i-t-a' ' + git init checkout && + ( + cd checkout && + echo data >file && + git add -N file && + test_must_fail git checkout -- file && + echo data >expected && + test_cmp expected file + ) +' + +test_expect_success 'checkout-index ignores i-t-a' ' + ( + cd checkout && + git checkout-index file && + echo data >expected && + test_cmp expected file + ) +' + +test_expect_success 'checkout-index --all ignores i-t-a' ' + ( + cd checkout && + echo data >anotherfile && + git add anotherfile && + rm anotherfile && + git checkout-index --all && + echo data >expected && + test_cmp expected file && + test_cmp expected anotherfile + ) +' + test_done -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 6/8] grep: make it clear i-t-a entries are ignored
The expression "!S_ISREG(ce)" covers i-t-a entries as well because ce->ce_mode would be zero then. I could make a comment saying that, but it's probably better just to comment with code, in case i-t-a entry content changes in future. Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/grep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/grep.c b/builtin/grep.c index d04f440..f508179 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -375,7 +375,7 @@ static int grep_cache(struct grep_opt *opt, const struct pathspec *pathspec, int for (nr = 0; nr < active_nr; nr++) { const struct cache_entry *ce = active_cache[nr]; - if (!S_ISREG(ce->ce_mode)) + if (!S_ISREG(ce->ce_mode) || ce_intent_to_add(ce)) continue; if (!ce_path_match(ce, pathspec, NULL)) continue; -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 7/8] diff.h: extend "flags" field to 64 bits because we're out of bits
I renamed both "flags" and "touched_flags" fields while making this patch to make sure I was aware of how these flags were manipulated (besides DIFF_OPT* macros). So hopefully I didn't miss anything. Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/commit.c | 2 +- diff-lib.c | 4 ++-- diff.c | 2 +- diff.h | 8 +--- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/builtin/commit.c b/builtin/commit.c index 4cbd5ff..95f7296 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -895,7 +895,7 @@ static int prepare_to_commit(const char *index_file, const char *prefix, * submodules which were manually staged, which would * be really confusing. */ - int diff_flags = DIFF_OPT_OVERRIDE_SUBMODULE_CONFIG; + diff_flags_t diff_flags = DIFF_OPT_OVERRIDE_SUBMODULE_CONFIG; if (ignore_submodule_arg && !strcmp(ignore_submodule_arg, "all")) diff_flags |= DIFF_OPT_IGNORE_SUBMODULES; diff --git a/diff-lib.c b/diff-lib.c index 241a843..ae09034 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -71,7 +71,7 @@ static int match_stat_with_submodule(struct diff_options *diffopt, { int changed = ce_match_stat(ce, st, ce_option); if (S_ISGITLINK(ce->ce_mode)) { - unsigned orig_flags = diffopt->flags; + diff_flags_t orig_flags = diffopt->flags; if (!DIFF_OPT_TST(diffopt, OVERRIDE_SUBMODULE_CONFIG)) set_diffopt_flags_from_submodule_config(diffopt, ce->name); if (DIFF_OPT_TST(diffopt, IGNORE_SUBMODULES)) @@ -516,7 +516,7 @@ int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt) return 0; } -int index_differs_from(const char *def, int diff_flags) +int index_differs_from(const char *def, diff_flags_t diff_flags) { struct rev_info rev; struct setup_revision_opt opt; diff --git a/diff.c b/diff.c index 7deac90..2485870 100644 --- a/diff.c +++ b/diff.c @@ -4912,7 +4912,7 @@ int diff_can_quit_early(struct diff_options *opt) static int is_submodule_ignored(const char *path, struct diff_options *options) { int ignored = 0; - unsigned orig_flags = options->flags; + diff_flags_t orig_flags = options->flags; if (!DIFF_OPT_TST(options, OVERRIDE_SUBMODULE_CONFIG)) set_diffopt_flags_from_submodule_config(options, path); if (DIFF_OPT_TST(options, IGNORE_SUBMODULES)) diff --git a/diff.h b/diff.h index f7208ad..4241aa5 100644 --- a/diff.h +++ b/diff.h @@ -110,13 +110,15 @@ enum diff_words_type { DIFF_WORDS_COLOR }; +typedef uint64_t diff_flags_t; + struct diff_options { const char *orderfile; const char *pickaxe; const char *single_follow; const char *a_prefix, *b_prefix; - unsigned flags; - unsigned touched_flags; + diff_flags_t flags; + diff_flags_t touched_flags; /* diff-filter bits */ unsigned int filter; @@ -347,7 +349,7 @@ extern int diff_result_code(struct diff_options *, int); extern void diff_no_index(struct rev_info *, int, const char **, const char *); -extern int index_differs_from(const char *def, int diff_flags); +extern int index_differs_from(const char *def, diff_flags_t diff_flags); extern size_t fill_textconv(struct userdiff_driver *driver, struct diff_filespec *df, -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 7/8] diff.h: extend "flags" field to 64 bits because we're out of bits
I renamed both "flags" and "touched_flags" fields while making this patch to make sure I was aware of how these flags were manipulated (besides DIFF_OPT* macros). So hopefully I didn't miss anything. Signed-off-by: Nguyễn Thái Ngọc Duy --- Resend to the right recipients. I screwed send-email up.. builtin/commit.c | 2 +- diff-lib.c | 4 ++-- diff.c | 2 +- diff.h | 8 +--- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/builtin/commit.c b/builtin/commit.c index 4cbd5ff..95f7296 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -895,7 +895,7 @@ static int prepare_to_commit(const char *index_file, const char *prefix, * submodules which were manually staged, which would * be really confusing. */ - int diff_flags = DIFF_OPT_OVERRIDE_SUBMODULE_CONFIG; + diff_flags_t diff_flags = DIFF_OPT_OVERRIDE_SUBMODULE_CONFIG; if (ignore_submodule_arg && !strcmp(ignore_submodule_arg, "all")) diff_flags |= DIFF_OPT_IGNORE_SUBMODULES; diff --git a/diff-lib.c b/diff-lib.c index 241a843..ae09034 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -71,7 +71,7 @@ static int match_stat_with_submodule(struct diff_options *diffopt, { int changed = ce_match_stat(ce, st, ce_option); if (S_ISGITLINK(ce->ce_mode)) { - unsigned orig_flags = diffopt->flags; + diff_flags_t orig_flags = diffopt->flags; if (!DIFF_OPT_TST(diffopt, OVERRIDE_SUBMODULE_CONFIG)) set_diffopt_flags_from_submodule_config(diffopt, ce->name); if (DIFF_OPT_TST(diffopt, IGNORE_SUBMODULES)) @@ -516,7 +516,7 @@ int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt) return 0; } -int index_differs_from(const char *def, int diff_flags) +int index_differs_from(const char *def, diff_flags_t diff_flags) { struct rev_info rev; struct setup_revision_opt opt; diff --git a/diff.c b/diff.c index 7deac90..2485870 100644 --- a/diff.c +++ b/diff.c @@ -4912,7 +4912,7 @@ int diff_can_quit_early(struct diff_options *opt) static int is_submodule_ignored(const char *path, struct diff_options *options) { int ignored = 0; - unsigned orig_flags = options->flags; + diff_flags_t orig_flags = options->flags; if (!DIFF_OPT_TST(options, OVERRIDE_SUBMODULE_CONFIG)) set_diffopt_flags_from_submodule_config(options, path); if (DIFF_OPT_TST(options, IGNORE_SUBMODULES)) diff --git a/diff.h b/diff.h index f7208ad..4241aa5 100644 --- a/diff.h +++ b/diff.h @@ -110,13 +110,15 @@ enum diff_words_type { DIFF_WORDS_COLOR }; +typedef uint64_t diff_flags_t; + struct diff_options { const char *orderfile; const char *pickaxe; const char *single_follow; const char *a_prefix, *b_prefix; - unsigned flags; - unsigned touched_flags; + diff_flags_t flags; + diff_flags_t touched_flags; /* diff-filter bits */ unsigned int filter; @@ -347,7 +349,7 @@ extern int diff_result_code(struct diff_options *, int); extern void diff_no_index(struct rev_info *, int, const char **, const char *); -extern int index_differs_from(const char *def, int diff_flags); +extern int index_differs_from(const char *def, diff_flags_t diff_flags); extern size_t fill_textconv(struct userdiff_driver *driver, struct diff_filespec *df, -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 8/8] Resurrect "diff-lib.c: adjust position of i-t-a entries in diff"
The original commit d95d728aba06a34394d15466045cbdabdada58a2 was reverted in commit 78cc1a540ba127b13f2f3fd531777b57f3a9cd46 because we were (and still are) not ready for a new world order. A lot more investigation must be done to see what is impacted. See the 78cc1a5 for details. This patch takes a smaller and safer step. The new behavior is controlled by SHIFT_INTENT_TO_ADD flag. We can gradually move more diff users to the new behavior after we are sure it's safe to do so. This flag is exposed to outside as "--shift-ita". "git status" sets this flag unconditionally to display i-t-a entries correctly. See d95d728 for details about "git status" and i-t-a entries. Signed-off-by: Nguyễn Thái Ngọc Duy --- Documentation/diff-options.txt | 6 ++ diff-lib.c | 14 ++ diff.c | 2 ++ diff.h | 1 + t/t2203-add-intent.sh | 20 ++-- wt-status.c| 2 ++ 6 files changed, 43 insertions(+), 2 deletions(-) diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index d56ca90..b69db29 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -556,5 +556,11 @@ endif::git-format-patch[] --no-prefix:: Do not show any source or destination prefix. +--shift-ita:: + By default entries added by "git add -N" appear as an existing + empty file in "git diff" and a new file in "git diff --cached". + This option makes the entry appear as a new file in "git diff" + and non-existent in "git diff --cached". + For more detailed explanation on these common options, see also linkgit:gitdiffcore[7]. diff --git a/diff-lib.c b/diff-lib.c index ae09034..2546f5a 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -212,6 +212,12 @@ int run_diff_files(struct rev_info *revs, unsigned int option) ce->sha1, !is_null_sha1(ce->sha1), ce->name, 0); continue; + } else if (DIFF_OPT_TST(&revs->diffopt, SHIFT_INTENT_TO_ADD) && + ce_intent_to_add(ce)) { + diff_addremove(&revs->diffopt, '+', ce->ce_mode, + EMPTY_BLOB_SHA1_BIN, 0, + ce->name, 0); + continue; } changed = match_stat_with_submodule(&revs->diffopt, ce, &st, @@ -376,6 +382,14 @@ static void do_oneway_diff(struct unpack_trees_options *o, struct rev_info *revs = o->unpack_data; int match_missing, cached; + /* i-t-a entries do not actually exist in the index */ + if (DIFF_OPT_TST(&revs->diffopt, SHIFT_INTENT_TO_ADD) && + idx && ce_intent_to_add(idx)) { + idx = NULL; + if (!tree) + return; /* nothing to diff.. */ + } + /* if the entry is not checked out, don't examine work tree */ cached = o->index_only || (idx && ((idx->ce_flags & CE_VALID) || ce_skip_worktree(idx))); diff --git a/diff.c b/diff.c index 2485870..e554cad 100644 --- a/diff.c +++ b/diff.c @@ -3890,6 +3890,8 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) return parse_submodule_opt(options, arg); else if (skip_prefix(arg, "--ws-error-highlight=", &arg)) return parse_ws_error_highlight(options, arg); + else if (!strcmp(arg, "--shift-ita")) + DIFF_OPT_SET(options, SHIFT_INTENT_TO_ADD); /* misc options */ else if (!strcmp(arg, "-z")) diff --git a/diff.h b/diff.h index 4241aa5..68a4400 100644 --- a/diff.h +++ b/diff.h @@ -92,6 +92,7 @@ typedef struct strbuf *(*diff_prefix_fn_t)(struct diff_options *opt, void *data) #define DIFF_OPT_FUNCCONTEXT (1 << 29) #define DIFF_OPT_PICKAXE_IGNORE_CASE (1 << 30) #define DIFF_OPT_DEFAULT_FOLLOW_RENAMES (1 << 31) +#define DIFF_OPT_SHIFT_INTENT_TO_ADD (1ULL << 32) #define DIFF_OPT_TST(opts, flag)((opts)->flags & DIFF_OPT_##flag) #define DIFF_OPT_TOUCHED(opts, flag)((opts)->touched_flags & DIFF_OPT_##flag) diff --git a/t/t2203-add-intent.sh b/t/t2203-add-intent.sh index d0f36a4..7c90eee 100755 --- a/t/t2203-add-intent.sh +++ b/t/t2203-add-intent.sh @@ -5,10 +5,24 @@ test_description='Intent to add' . ./test-lib.sh test_expect_success 'intent to add' ' + test_commit 1 && + git rm 1.t && + echo hello >1.t && echo hello >f
[PATCH v3] gc: save log from daemonized gc --auto and print it next time
While commit 9f673f9 (gc: config option for running --auto in background - 2014-02-08) helps reduce some complaints about 'gc --auto' hogging the terminal, it creates another set of problems. The latest in this set is, as the result of daemonizing, stderr is closed and all warnings are lost. This warning at the end of cmd_gc() is particularly important because it tells the user how to avoid "gc --auto" running repeatedly. Because stderr is closed, the user does not know, naturally they complain about 'gc --auto' wasting CPU. Daemonized gc now saves stderr to $GIT_DIR/gc.log. Following gc runs will not be daemonized and gc.log printed out until the user removes gc.log. Signed-off-by: Nguyễn Thái Ngọc Duy --- Let's try again. Compared to v2 [1], this version does not delete gc.log automatically any more. The user needs to take action, then delete gc.log to bring it background again. [1] http://thread.gmane.org/gmane.comp.version-control.git/266182/focus=266320 builtin/gc.c | 36 +++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/builtin/gc.c b/builtin/gc.c index bcc75d9..00a83e1 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -43,9 +43,20 @@ static struct argv_array prune_worktrees = ARGV_ARRAY_INIT; static struct argv_array rerere = ARGV_ARRAY_INIT; static char *pidfile; +static struct strbuf log_filename = STRBUF_INIT; +static int daemonized; static void remove_pidfile(void) { + if (daemonized && log_filename.len) { + struct stat st; + + close(2); + if (stat(log_filename.buf, &st) || + !st.st_size || + rename(log_filename.buf, git_path("gc.log"))) + unlink(log_filename.buf); + } if (pidfile) unlink(pidfile); } @@ -330,13 +341,24 @@ int cmd_gc(int argc, const char **argv, const char *prefix) fprintf(stderr, _("See \"git help gc\" for manual housekeeping.\n")); } if (detach_auto) { + struct strbuf sb = STRBUF_INIT; + if (strbuf_read_file(&sb, git_path("gc.log"), 0) > 0) { + warning(_("last gc run reported:\n" + "%s\n" + "running in foreground until %s is removed"), + sb.buf, git_path("gc.log")); + detach_auto = 0; + } + strbuf_release(&sb); + } + if (detach_auto) { if (gc_before_repack()) return -1; /* * failure to daemonize is ok, we'll continue * in foreground */ - daemonize(); + daemonized = !daemonize(); } } else add_repack_all_option(); @@ -349,6 +371,18 @@ int cmd_gc(int argc, const char **argv, const char *prefix) name, (uintmax_t)pid); } + if (daemonized) { + int fd; + + strbuf_addstr(&log_filename, git_path("gc.log_XX")); + fd = xmkstemp(log_filename.buf); + if (fd >= 0) { + dup2(fd, 2); + close(fd); + } else + strbuf_release(&log_filename); + } + if (gc_before_repack()) return -1; -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/2] gitignore, re-inclusion fix
This is an old problem. I attempted once [1] and then was reminded [2] with some more comments on the original patch. Let's try again. The problem is this .gitignore currently does not work, but it should: /abc !/abc/def/ghi This patch fixes that by realizing that the last rule may re-include something in abc/def so it does not exclude "abc" and "abc/def" outright to give the last rule a chance to match. [1] http://article.gmane.org/gmane.comp.version-control.git/259870 [2] http://thread.gmane.org/gmane.comp.version-control.git/265901/focus=266227 Nguyễn Thái Ngọc Duy (2): dir.c: make last_exclude_matching_from_list() run til the end dir.c: don't exclude whole dir prematurely if neg pattern may match Documentation/gitignore.txt| 21 ++--- dir.c | 89 +++--- t/t3001-ls-files-others-exclude.sh | 20 + 3 files changed, 118 insertions(+), 12 deletions(-) -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/2] dir.c: make last_exclude_matching_from_list() run til the end
Signed-off-by: Nguyễn Thái Ngọc Duy --- dir.c | 15 ++- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/dir.c b/dir.c index c00c7e2..3a7630a 100644 --- a/dir.c +++ b/dir.c @@ -901,6 +901,7 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname, int *dtype, struct exclude_list *el) { + struct exclude *exc = NULL; /* undecided */ int i; if (!el->nr) @@ -922,18 +923,22 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname, if (match_basename(basename, pathlen - (basename - pathname), exclude, prefix, x->patternlen, - x->flags)) - return x; + x->flags)) { + exc = x; + break; + } continue; } assert(x->baselen == 0 || x->base[x->baselen - 1] == '/'); if (match_pathname(pathname, pathlen, x->base, x->baselen ? x->baselen - 1 : 0, - exclude, prefix, x->patternlen, x->flags)) - return x; + exclude, prefix, x->patternlen, x->flags)) { + exc = x; + break; + } } - return NULL; /* undecided */ + return exc; } /* -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/2] dir.c: don't exclude whole dir prematurely if neg pattern may match
If there is a pattern "!foo/bar", this patch makes it not exclude "foo" right away. This gives us a chance to examine "foo" and re-include "foo/bar". In order for it to detect that the directory under examination should not be excluded right away, in other words it is a parent directory of a negative pattern, the "directory path" of the negative pattern must be literal. Patterns like "!f?o/bar" can't stop "foo" from being excluded. Basename matching (i.e. "no slashes in the pattern") or must-be-dir matching (i.e. "trailing slash in the pattern") does not work well with this. For example, if we descend in "foo" and are examining "foo/abc", current code for "foo/" pattern will check if path "foo/abc", not "foo", is a directory. The same problem with basename matching. These may need big code reorg to make it work. Signed-off-by: Nguyễn Thái Ngọc Duy --- Documentation/gitignore.txt| 21 --- dir.c | 76 +- t/t3001-ls-files-others-exclude.sh | 20 ++ 3 files changed, 109 insertions(+), 8 deletions(-) diff --git a/Documentation/gitignore.txt b/Documentation/gitignore.txt index 473623d..889a72a 100644 --- a/Documentation/gitignore.txt +++ b/Documentation/gitignore.txt @@ -82,12 +82,9 @@ PATTERN FORMAT - An optional prefix "`!`" which negates the pattern; any matching file excluded by a previous pattern will become - included again. It is not possible to re-include a file if a parent - directory of that file is excluded. Git doesn't list excluded - directories for performance reasons, so any patterns on contained - files have no effect, no matter where they are defined. - Put a backslash ("`\`") in front of the first "`!`" for patterns - that begin with a literal "`!`", for example, "`\!important!.txt`". + included again. It is possible to re-include a file if a parent + directory of that file is excluded, with restrictions. See section + NOTES for detail. - If the pattern ends with a slash, it is removed for the purpose of the following description, but it would only find @@ -141,6 +138,18 @@ not tracked by Git remain untracked. To stop tracking a file that is currently tracked, use 'git rm --cached'. +To re-include a file when its parent directory is excluded, the +following conditions must match: + + - The directory part in the re-include rules must be literal (i.e. no + wildcards) + + - The rules to exclude the parent directory must not end with a + trailing slash. + + - The rules to exclude the parent directory must have at least one + slash. + EXAMPLES diff --git a/dir.c b/dir.c index 3a7630a..a1f711c 100644 --- a/dir.c +++ b/dir.c @@ -882,6 +882,25 @@ int match_pathname(const char *pathname, int pathlen, */ if (!patternlen && !namelen) return 1; + /* +* This can happen when we ignore some exclude rules +* on directories in other to see if negative rules +* may match. E.g. +* +* /abc +* !/abc/def/ghi +* +* The pattern of interest is "/abc". On the first +* try, we should match path "abc" with this pattern +* in the "if" statement right above, but the caller +* ignores it. +* +* On the second try with paths within "abc", +* e.g. "abc/xyz", we come here and try to match it +* with "/abc". +*/ + if (!patternlen && namelen && *name == '/') + return 1; } return fnmatch_icase_mem(pattern, patternlen, @@ -890,6 +909,48 @@ int match_pathname(const char *pathname, int pathlen, } /* + * Return non-zero if pathname is a directory and an ancestor of the + * literal path in a (negative) pattern. This is used to keep + * descending in "foo" and "foo/bar" when the pattern is + * "!foo/bar/.gitignore". "foo/notbar" will not be descended however. + */ +static int match_neg_path(const char *pathname, int pathlen, int *dtype, + const char *base, int baselen, + const char *pattern, int prefix, int patternlen, + int flags) +{ + assert((flags & EXC_FLAG_NEGATIVE) && !(flags & EXC_FLAG_NODIR)); + + if (*dtype == DT_UNKNOWN) + *dtype = get_dtype(NULL, pathname, pathlen); + if (*dtype != DT_DIR) + return 0; + + if (*pattern
[PATCH] setup: update the right file in multiple checkouts
This code is introduced in 23af91d (prune: strategies for linked checkouts - 2014-11-30), and it's supposed to implement this rule from that commit's message: - linked checkouts are supposed to keep its location in $R/gitdir up to date. The use case is auto fixup after a manual checkout move. Note the name, "$R/gitdir", not "$R/gitfile". Correct the path to be updated accordingly. While at there, make sure I/O errors are not silently dropped. Signed-off-by: Nguyễn Thái Ngọc Duy --- The code was right in v2 [1] and became "gitfile" since v3 [2]. I need to reconsider my code quality after this :( [1] http://article.gmane.org/gmane.comp.version-control.git/239299 [2] http://article.gmane.org/gmane.comp.version-control.git/242325 setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.c b/setup.c index 5f9f07d..64bf2b4 100644 --- a/setup.c +++ b/setup.c @@ -402,9 +402,9 @@ static void update_linked_gitdir(const char *gitfile, const char *gitdir) struct strbuf path = STRBUF_INIT; struct stat st; - strbuf_addf(&path, "%s/gitfile", gitdir); + strbuf_addf(&path, "%s/gitdir", gitdir); if (stat(path.buf, &st) || st.st_mtime + 24 * 3600 < time(NULL)) - write_file(path.buf, 0, "%s\n", gitfile); + write_file(path.buf, 1, "%s\n", gitfile); strbuf_release(&path); } -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 5/5] clone: better error when --reference is a linked checkout
Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/clone.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/builtin/clone.c b/builtin/clone.c index 39d4adf..3e14491 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -294,9 +294,14 @@ static int add_one_reference(struct string_list_item *item, void *cb_data) char *ref_git_git = mkpathdup("%s/.git", ref_git); free(ref_git); ref_git = ref_git_git; - } else if (!is_directory(mkpath("%s/objects", ref_git))) + } else if (!is_directory(mkpath("%s/objects", ref_git))) { + struct strbuf sb = STRBUF_INIT; + if (get_common_dir(&sb, ref_git)) + die(_("reference repository '%s' as a linked checkout is not supported yet."), + item->string); die(_("reference repository '%s' is not a local repository."), item->string); + } if (!access(mkpath("%s/shallow", ref_git), F_OK)) die(_("reference repository '%s' is shallow"), item->string); -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 3/5] enter_repo: allow .git files in strict mode
Strict mode is about not guessing where .git is. If the user points to a .git file, we know exactly where the target .git dir will be. This is needed even in local clone case because transport.c code uses upload-pack for fetching remote refs. Signed-off-by: Nguyễn Thái Ngọc Duy --- path.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/path.c b/path.c index 7340e11..32d4ca6 100644 --- a/path.c +++ b/path.c @@ -438,8 +438,13 @@ const char *enter_repo(const char *path, int strict) return NULL; path = validated_path; } - else if (chdir(path)) - return NULL; + else { + const char *gitfile = read_gitfile(used_path); + if (gitfile) + path = gitfile; + if (chdir(path)) + return NULL; + } if (is_git_directory(".")) { set_git_dir("."); -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 4/5] clone: allow --local from a linked checkout
Noticed-by: Bjørnar Snoksrud Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/clone.c | 6 -- t/t2025-worktree-add.sh | 5 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 578da85..39d4adf 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -424,8 +424,10 @@ static void clone_local(const char *src_repo, const char *dest_repo) } else { struct strbuf src = STRBUF_INIT; struct strbuf dest = STRBUF_INIT; - strbuf_addf(&src, "%s/objects", src_repo); - strbuf_addf(&dest, "%s/objects", dest_repo); + get_common_dir(&src, src_repo); + get_common_dir(&dest, dest_repo); + strbuf_addstr(&src, "/objects"); + strbuf_addstr(&dest, "/objects"); copy_or_link_directory(&src, &dest, src_repo, src.len); strbuf_release(&src); strbuf_release(&dest); diff --git a/t/t2025-worktree-add.sh b/t/t2025-worktree-add.sh index 8267411..3694174 100755 --- a/t/t2025-worktree-add.sh +++ b/t/t2025-worktree-add.sh @@ -193,4 +193,9 @@ test_expect_success '"add" -B/--detach mutually exclusive' ' test_must_fail git worktree add -B poodle --detach bamboo master ' +test_expect_success 'local clone from linked checkout' ' + git clone --local here here-clone && + ( cd here-clone && git fsck ) +' + test_done -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 1/5] path.c: delete an extra space
Signed-off-by: Nguyễn Thái Ngọc Duy --- path.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/path.c b/path.c index 95acbaf..a536ee3 100644 --- a/path.c +++ b/path.c @@ -431,7 +431,7 @@ const char *enter_repo(const char *path, int strict) } if (!suffix[i]) return NULL; - gitfile = read_gitfile(used_path) ; + gitfile = read_gitfile(used_path); if (gitfile) strcpy(used_path, gitfile); if (chdir(used_path)) -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 2/5] enter_repo: avoid duplicating logic, use is_git_directory() instead
Signed-off-by: Nguyễn Thái Ngọc Duy --- path.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/path.c b/path.c index a536ee3..7340e11 100644 --- a/path.c +++ b/path.c @@ -441,8 +441,7 @@ const char *enter_repo(const char *path, int strict) else if (chdir(path)) return NULL; - if (access("objects", X_OK) == 0 && access("refs", X_OK) == 0 && - validate_headref("HEAD") == 0) { + if (is_git_directory(".")) { set_git_dir("."); check_repository_format(); return path; -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 0/5] fix local clone from a linked checkout
Fixes strbuf_addf in 4/5. Makes a note about the relation of enter_repo to this local clone in 3/5. Changes since v1 -- 8< -- diff --git a/builtin/clone.c b/builtin/clone.c index 7a010bb..3e14491 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -432,7 +432,7 @@ static void clone_local(const char *src_repo, const char *dest_repo) get_common_dir(&src, src_repo); get_common_dir(&dest, dest_repo); strbuf_addstr(&src, "/objects"); - strbuf_addf(&dest, "/objects"); + strbuf_addstr(&dest, "/objects"); copy_or_link_directory(&src, &dest, src_repo, src.len); strbuf_release(&src); strbuf_release(&dest); -- 8< -- Nguyễn Thái Ngọc Duy (5): path.c: delete an extra space enter_repo: avoid duplicating logic, use is_git_directory() instead enter_repo: allow .git files in strict mode clone: allow --local from a linked checkout clone: better error when --reference is a linked checkout builtin/clone.c | 13 ++--- path.c | 14 +- t/t2025-worktree-add.sh | 5 + 3 files changed, 24 insertions(+), 8 deletions(-) -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 0/2] gitignore, re-inclusion fix
No code change. Explain why 1/2 is needed in the commit message. Nguyễn Thái Ngọc Duy (2): dir.c: make last_exclude_matching_from_list() run til the end dir.c: don't exclude whole dir prematurely if neg pattern may match Documentation/gitignore.txt| 21 ++--- dir.c | 89 +++--- t/t3001-ls-files-others-exclude.sh | 20 + 3 files changed, 118 insertions(+), 12 deletions(-) -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 1/2] dir.c: make last_exclude_matching_from_list() run til the end
The next patch adds some post processing to the result value before it's returned to the caller. Make all branches reach the end of the function, so we can do it all in one place. Signed-off-by: Nguyễn Thái Ngọc Duy --- dir.c | 15 ++- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/dir.c b/dir.c index c00c7e2..3a7630a 100644 --- a/dir.c +++ b/dir.c @@ -901,6 +901,7 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname, int *dtype, struct exclude_list *el) { + struct exclude *exc = NULL; /* undecided */ int i; if (!el->nr) @@ -922,18 +923,22 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname, if (match_basename(basename, pathlen - (basename - pathname), exclude, prefix, x->patternlen, - x->flags)) - return x; + x->flags)) { + exc = x; + break; + } continue; } assert(x->baselen == 0 || x->base[x->baselen - 1] == '/'); if (match_pathname(pathname, pathlen, x->base, x->baselen ? x->baselen - 1 : 0, - exclude, prefix, x->patternlen, x->flags)) - return x; + exclude, prefix, x->patternlen, x->flags)) { + exc = x; + break; + } } - return NULL; /* undecided */ + return exc; } /* -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 2/2] dir.c: don't exclude whole dir prematurely if neg pattern may match
If there is a pattern "!foo/bar", this patch makes it not exclude "foo" right away. This gives us a chance to examine "foo" and re-include "foo/bar". In order for it to detect that the directory under examination should not be excluded right away, in other words it is a parent directory of a negative pattern, the "directory path" of the negative pattern must be literal. Patterns like "!f?o/bar" can't stop "foo" from being excluded. Basename matching (i.e. "no slashes in the pattern") or must-be-dir matching (i.e. "trailing slash in the pattern") does not work well with this. For example, if we descend in "foo" and are examining "foo/abc", current code for "foo/" pattern will check if path "foo/abc", not "foo", is a directory. The same problem with basename matching. These may need big code reorg to make it work. Signed-off-by: Nguyễn Thái Ngọc Duy --- Documentation/gitignore.txt| 21 --- dir.c | 76 +- t/t3001-ls-files-others-exclude.sh | 20 ++ 3 files changed, 109 insertions(+), 8 deletions(-) diff --git a/Documentation/gitignore.txt b/Documentation/gitignore.txt index 473623d..889a72a 100644 --- a/Documentation/gitignore.txt +++ b/Documentation/gitignore.txt @@ -82,12 +82,9 @@ PATTERN FORMAT - An optional prefix "`!`" which negates the pattern; any matching file excluded by a previous pattern will become - included again. It is not possible to re-include a file if a parent - directory of that file is excluded. Git doesn't list excluded - directories for performance reasons, so any patterns on contained - files have no effect, no matter where they are defined. - Put a backslash ("`\`") in front of the first "`!`" for patterns - that begin with a literal "`!`", for example, "`\!important!.txt`". + included again. It is possible to re-include a file if a parent + directory of that file is excluded, with restrictions. See section + NOTES for detail. - If the pattern ends with a slash, it is removed for the purpose of the following description, but it would only find @@ -141,6 +138,18 @@ not tracked by Git remain untracked. To stop tracking a file that is currently tracked, use 'git rm --cached'. +To re-include a file when its parent directory is excluded, the +following conditions must be met: + + - The directory part in the re-include rules must be literal (i.e. no + wildcards) + + - The rules to exclude the parent directory must not end with a + trailing slash. + + - The rules to exclude the parent directory must have at least one + slash. + EXAMPLES diff --git a/dir.c b/dir.c index 3a7630a..a1f711c 100644 --- a/dir.c +++ b/dir.c @@ -882,6 +882,25 @@ int match_pathname(const char *pathname, int pathlen, */ if (!patternlen && !namelen) return 1; + /* +* This can happen when we ignore some exclude rules +* on directories in other to see if negative rules +* may match. E.g. +* +* /abc +* !/abc/def/ghi +* +* The pattern of interest is "/abc". On the first +* try, we should match path "abc" with this pattern +* in the "if" statement right above, but the caller +* ignores it. +* +* On the second try with paths within "abc", +* e.g. "abc/xyz", we come here and try to match it +* with "/abc". +*/ + if (!patternlen && namelen && *name == '/') + return 1; } return fnmatch_icase_mem(pattern, patternlen, @@ -890,6 +909,48 @@ int match_pathname(const char *pathname, int pathlen, } /* + * Return non-zero if pathname is a directory and an ancestor of the + * literal path in a (negative) pattern. This is used to keep + * descending in "foo" and "foo/bar" when the pattern is + * "!foo/bar/.gitignore". "foo/notbar" will not be descended however. + */ +static int match_neg_path(const char *pathname, int pathlen, int *dtype, + const char *base, int baselen, + const char *pattern, int prefix, int patternlen, + int flags) +{ + assert((flags & EXC_FLAG_NEGATIVE) && !(flags & EXC_FLAG_NODIR)); + + if (*dtype == DT_UNKNOWN) + *dtype = get_dtype(NULL, pathname, pathlen); + if (*dtype != DT_DIR) + return 0; + + if (*pattern
[PATCH v4] gc: save log from daemonized gc --auto and print it next time
While commit 9f673f9 (gc: config option for running --auto in background - 2014-02-08) helps reduce some complaints about 'gc --auto' hogging the terminal, it creates another set of problems. The latest in this set is, as the result of daemonizing, stderr is closed and all warnings are lost. This warning at the end of cmd_gc() is particularly important because it tells the user how to avoid "gc --auto" running repeatedly. Because stderr is closed, the user does not know, naturally they complain about 'gc --auto' wasting CPU. Daemonized gc now saves stderr to $GIT_DIR/gc.log. Following gc --auto will not run and gc.log printed out until the user removes gc.log. Signed-off-by: Nguyễn Thái Ngọc Duy --- When gc.log exists, gc --auto now simply exits builtin/gc.c | 33 - 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/builtin/gc.c b/builtin/gc.c index bcc75d9..2c3aaeb 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -43,9 +43,20 @@ static struct argv_array prune_worktrees = ARGV_ARRAY_INIT; static struct argv_array rerere = ARGV_ARRAY_INIT; static char *pidfile; +static struct strbuf log_filename = STRBUF_INIT; +static int daemonized; static void remove_pidfile(void) { + if (daemonized && log_filename.len) { + struct stat st; + + close(2); + if (stat(log_filename.buf, &st) || + !st.st_size || + rename(log_filename.buf, git_path("gc.log"))) + unlink(log_filename.buf); + } if (pidfile) unlink(pidfile); } @@ -330,13 +341,21 @@ int cmd_gc(int argc, const char **argv, const char *prefix) fprintf(stderr, _("See \"git help gc\" for manual housekeeping.\n")); } if (detach_auto) { + struct strbuf sb = STRBUF_INIT; + if (strbuf_read_file(&sb, git_path("gc.log"), 0) > 0) + return error(_("last gc run reported:\n" + "%s\n" + "not running until %s is removed"), +sb.buf, git_path("gc.log")); + strbuf_release(&sb); + if (gc_before_repack()) return -1; /* * failure to daemonize is ok, we'll continue * in foreground */ - daemonize(); + daemonized = !daemonize(); } } else add_repack_all_option(); @@ -349,6 +368,18 @@ int cmd_gc(int argc, const char **argv, const char *prefix) name, (uintmax_t)pid); } + if (daemonized) { + int fd; + + strbuf_addstr(&log_filename, git_path("gc.log_XX")); + fd = xmkstemp(log_filename.buf); + if (fd >= 0) { + dup2(fd, 2); + close(fd); + } else + strbuf_release(&log_filename); + } + if (gc_before_repack()) return -1; -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5] gc: save log from daemonized gc --auto and print it next time
While commit 9f673f9 (gc: config option for running --auto in background - 2014-02-08) helps reduce some complaints about 'gc --auto' hogging the terminal, it creates another set of problems. The latest in this set is, as the result of daemonizing, stderr is closed and all warnings are lost. This warning at the end of cmd_gc() is particularly important because it tells the user how to avoid "gc --auto" running repeatedly. Because stderr is closed, the user does not know, naturally they complain about 'gc --auto' wasting CPU. Daemonized gc now saves stderr to $GIT_DIR/gc.log. Following gc --auto will not run and gc.log printed out until the user removes gc.log. Signed-off-by: Nguyễn Thái Ngọc Duy --- This is the lock-based version. I'm sending an alternative that does not need atexit() with comparison between the two. builtin/gc.c | 56 +++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/builtin/gc.c b/builtin/gc.c index bcc75d9..4e738fa 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -43,6 +43,7 @@ static struct argv_array prune_worktrees = ARGV_ARRAY_INIT; static struct argv_array rerere = ARGV_ARRAY_INIT; static char *pidfile; +static struct lock_file log_lock; static void remove_pidfile(void) { @@ -57,6 +58,28 @@ static void remove_pidfile_on_signal(int signo) raise(signo); } +static void process_log_file(void) +{ + struct stat st; + if (!fstat(log_lock.fd, &st) && st.st_size) + commit_lock_file(&log_lock); + else + rollback_lock_file(&log_lock); +} + +static void process_log_file_at_exit(void) +{ + fflush(stderr); + process_log_file(); +} + +static void process_log_file_on_signal(int signo) +{ + process_log_file(); + sigchain_pop(signo); + raise(signo); +} + static void git_config_date_string(const char *key, const char **output) { if (git_config_get_string_const(key, output)) @@ -253,6 +276,24 @@ static const char *lock_repo_for_gc(int force, pid_t* ret_pid) return NULL; } +static int report_last_gc_error(void) +{ + struct strbuf sb = STRBUF_INIT; + int ret; + + ret = strbuf_read_file(&sb, git_path("gc.log"), 0); + if (ret > 0) + return error(_("The last gc run reported the following. " + "Please correct the root cause\n" + "and remove %s.\n" + "Automatic cleanup will not be performed " + "until the file is removed.\n\n" + "%s"), +git_path("gc.log"), sb.buf); + strbuf_release(&sb); + return 0; +} + static int gc_before_repack(void) { if (pack_refs && run_command_v_opt(pack_refs_cmd.argv, RUN_GIT_CMD)) @@ -274,6 +315,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix) int force = 0; const char *name; pid_t pid; + int daemonized = 0; struct option builtin_gc_options[] = { OPT__QUIET(&quiet, N_("suppress progress reporting")), @@ -330,13 +372,16 @@ int cmd_gc(int argc, const char **argv, const char *prefix) fprintf(stderr, _("See \"git help gc\" for manual housekeeping.\n")); } if (detach_auto) { + if (report_last_gc_error()) + return -1; + if (gc_before_repack()) return -1; /* * failure to daemonize is ok, we'll continue * in foreground */ - daemonize(); + daemonized = !daemonize(); } } else add_repack_all_option(); @@ -349,6 +394,15 @@ int cmd_gc(int argc, const char **argv, const char *prefix) name, (uintmax_t)pid); } + if (daemonized) { + hold_lock_file_for_update(&log_lock, + git_path("gc.log"), + LOCK_DIE_ON_ERROR); + dup2(log_lock.fd, 2); + sigchain_push_common(process_log_file_on_signal); + atexit(process_log_file_at_exit); + } + if (gc_before_repack()) return -1; -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[Alt. PATCH v5] gc: save log from daemonized gc --auto and print it next time
While commit 9f673f9 (gc: config option for running --auto in background - 2014-02-08) helps reduce some complaints about 'gc --auto' hogging the terminal, it creates another set of problems. The latest in this set is, as the result of daemonizing, stderr is closed and all warnings are lost. This warning at the end of cmd_gc() is particularly important because it tells the user how to avoid "gc --auto" running repeatedly. Because stderr is closed, the user does not know, naturally they complain about 'gc --auto' wasting CPU. Daemonized gc now saves stderr to $GIT_DIR/gc.log. Following gc --auto will not run and gc.log printed out until the user removes gc.log. Signed-off-by: Nguyễn Thái Ngọc Duy --- The lock-based version has an advantage that the following gc runs will never see partial gc.log. But it requires some more hook at atexit() and maybe signal handler. This version avoids that, and gc.log can be kept even if gc is SIGKILL'd (unlikely because gc itself does not do anything that can upset the kernel), but then it's racy. I think I perfer the lock-based version. builtin/gc.c | 66 +++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/builtin/gc.c b/builtin/gc.c index bcc75d9..3d42ef7 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -43,6 +43,8 @@ static struct argv_array prune_worktrees = ARGV_ARRAY_INIT; static struct argv_array rerere = ARGV_ARRAY_INIT; static char *pidfile; +static struct strbuf log_filename = STRBUF_INIT; +static int daemonized; static void remove_pidfile(void) { @@ -253,6 +255,41 @@ static const char *lock_repo_for_gc(int force, pid_t* ret_pid) return NULL; } +static int report_last_gc_error(void) +{ + struct strbuf sb = STRBUF_INIT; + int ret; + + if (!access(git_path("gc.pid"), R_OK)) + /* +* Either background gc is still running, or we have a stale +* gc.pid. +* +* If the former, it's not the right time to look at gc.log +* yet. We'll exit shortly after lock_repo_for_gc(). +* +* If the latter, we'll run gc --auto one more time. But +* because gc.log is appended, the old log (if exists) won't +* be lost. One extra gc run is not that big a deal to avoid. +*/ + return 0; + + ret = strbuf_read_file(&sb, git_path("gc.log"), 0); + if (ret > 0) + return error(_("The last gc run reported the following. " + "Please correct the root cause\n" + "and remove %s. Automatic cleanup will not " + "be performed\n" + "until the file is removed.\n\n" + "%s"), +git_path("gc.log"), sb.buf); + else if (!ret) + /* racy, but in the worst case we waste one more gc run */ + unlink(git_path("gc.log")); + strbuf_release(&sb); + return 0; +} + static int gc_before_repack(void) { if (pack_refs && run_command_v_opt(pack_refs_cmd.argv, RUN_GIT_CMD)) @@ -330,13 +367,16 @@ int cmd_gc(int argc, const char **argv, const char *prefix) fprintf(stderr, _("See \"git help gc\" for manual housekeeping.\n")); } if (detach_auto) { + if (report_last_gc_error()) + return -1; + if (gc_before_repack()) return -1; /* * failure to daemonize is ok, we'll continue * in foreground */ - daemonize(); + daemonized = !daemonize(); } } else add_repack_all_option(); @@ -349,6 +389,18 @@ int cmd_gc(int argc, const char **argv, const char *prefix) name, (uintmax_t)pid); } + if (daemonized) { + int fd; + + strbuf_git_path(&log_filename, "gc.log"); + fd = open(log_filename.buf, O_WRONLY | O_CREAT | O_APPEND, 0644); + if (fd >= 0) { + dup2(fd, 2); + close(fd); + } else + strbuf_release(&log_filename); + } + if (gc_before_repack()) return -1; @@ -376,6 +428,17 @@ int cmd_gc(int argc, const char **argv, const char *prefix) warning(_("There are too many unreachable loose objects; "
[PATCH/RFC] Add test_repo_expect_success for running tests in a new repository
This could be convenient when tests are independent from the rest in the same file. Normally we would do this test_expect_success '...' ' git init foo && ( cd foo &&
[PATCH v3 1/2] dir.c: make last_exclude_matching_from_list() run til the end
The next patch adds some post processing to the result value before it's returned to the caller. Keep all branches reach the end of the function, so we can do it all in one place. Signed-off-by: Nguyễn Thái Ngọc Duy --- dir.c | 15 ++- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/dir.c b/dir.c index c00c7e2..3a7630a 100644 --- a/dir.c +++ b/dir.c @@ -901,6 +901,7 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname, int *dtype, struct exclude_list *el) { + struct exclude *exc = NULL; /* undecided */ int i; if (!el->nr) @@ -922,18 +923,22 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname, if (match_basename(basename, pathlen - (basename - pathname), exclude, prefix, x->patternlen, - x->flags)) - return x; + x->flags)) { + exc = x; + break; + } continue; } assert(x->baselen == 0 || x->base[x->baselen - 1] == '/'); if (match_pathname(pathname, pathlen, x->base, x->baselen ? x->baselen - 1 : 0, - exclude, prefix, x->patternlen, x->flags)) - return x; + exclude, prefix, x->patternlen, x->flags)) { + exc = x; + break; + } } - return NULL; /* undecided */ + return exc; } /* -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 0/2] gitignore, re-inclusion fix
Changes since v2 -- 8< -- diff --git a/Documentation/gitignore.txt b/Documentation/gitignore.txt index 889a72a..79a1948 100644 --- a/Documentation/gitignore.txt +++ b/Documentation/gitignore.txt @@ -82,9 +82,12 @@ PATTERN FORMAT - An optional prefix "`!`" which negates the pattern; any matching file excluded by a previous pattern will become - included again. It is possible to re-include a file if a parent - directory of that file is excluded, with restrictions. See section - NOTES for detail. + included again. + Put a backslash ("`\`") in front of the first "`!`" for patterns + that begin with a literal "`!`", for example, "`\!important!.txt`". + It is possible to re-include a file if a parent directory of that + file is excluded if certain conditions are met. See section NOTES + for detail. - If the pattern ends with a slash, it is removed for the purpose of the following description, but it would only find @@ -138,8 +141,11 @@ not tracked by Git remain untracked. To stop tracking a file that is currently tracked, use 'git rm --cached'. -To re-include a file when its parent directory is excluded, the -following conditions must be met: +To re-include files or directories when their parent directory is +excluded, the following conditions must be met: + + - The rules to exclude a directory and re-include a subset back must + be in the same .gitignore file. - The directory part in the re-include rules must be literal (i.e. no wildcards) diff --git a/t/t3001-ls-files-others-exclude.sh b/t/t3001-ls-files-others-exclude.sh index 9de49a6..da257c0 100755 --- a/t/t3001-ls-files-others-exclude.sh +++ b/t/t3001-ls-files-others-exclude.sh @@ -310,9 +310,14 @@ test_expect_success 'negative patterns' ' ( cd reinclude && cat >.gitignore <<-\EOF && + /fooo /foo !foo/bar/bar EOF + mkdir fooo && + cat >fooo/.gitignore <<-\EOF && + !/* + EOF mkdir -p foo/bar && touch abc foo/def foo/bar/ghi foo/bar/bar && git ls-files -o --exclude-standard >../actual && -- 8< -- Nguyễn Thái Ngọc Duy (2): dir.c: make last_exclude_matching_from_list() run til the end dir.c: don't exclude whole dir prematurely if neg pattern may match Documentation/gitignore.txt| 23 -- dir.c | 89 +++--- t/t3001-ls-files-others-exclude.sh | 25 +++ 3 files changed, 127 insertions(+), 10 deletions(-) -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 2/2] dir.c: don't exclude whole dir prematurely if neg pattern may match
If there is a pattern "!foo/bar", this patch makes it not exclude "foo" right away. This gives us a chance to examine "foo" and re-include "foo/bar". In order for it to detect that the directory under examination should not be excluded right away, in other words it is a parent directory of a negative pattern, the "directory path" of the negative pattern must be literal. Patterns like "!f?o/bar" can't stop "foo" from being excluded. Basename matching (i.e. "no slashes in the pattern") or must-be-dir matching (i.e. "trailing slash in the pattern") does not work well with this. For example, if we descend in "foo" and are examining "foo/abc", current code for "foo/" pattern will check if path "foo/abc", not "foo", is a directory. The same problem with basename matching. These may need big code reorg to make it work. Signed-off-by: Nguyễn Thái Ngọc Duy --- Documentation/gitignore.txt| 23 ++-- dir.c | 76 +- t/t3001-ls-files-others-exclude.sh | 25 + 3 files changed, 118 insertions(+), 6 deletions(-) diff --git a/Documentation/gitignore.txt b/Documentation/gitignore.txt index 473623d..79a1948 100644 --- a/Documentation/gitignore.txt +++ b/Documentation/gitignore.txt @@ -82,12 +82,12 @@ PATTERN FORMAT - An optional prefix "`!`" which negates the pattern; any matching file excluded by a previous pattern will become - included again. It is not possible to re-include a file if a parent - directory of that file is excluded. Git doesn't list excluded - directories for performance reasons, so any patterns on contained - files have no effect, no matter where they are defined. + included again. Put a backslash ("`\`") in front of the first "`!`" for patterns that begin with a literal "`!`", for example, "`\!important!.txt`". + It is possible to re-include a file if a parent directory of that + file is excluded if certain conditions are met. See section NOTES + for detail. - If the pattern ends with a slash, it is removed for the purpose of the following description, but it would only find @@ -141,6 +141,21 @@ not tracked by Git remain untracked. To stop tracking a file that is currently tracked, use 'git rm --cached'. +To re-include files or directories when their parent directory is +excluded, the following conditions must be met: + + - The rules to exclude a directory and re-include a subset back must + be in the same .gitignore file. + + - The directory part in the re-include rules must be literal (i.e. no + wildcards) + + - The rules to exclude the parent directory must not end with a + trailing slash. + + - The rules to exclude the parent directory must have at least one + slash. + EXAMPLES diff --git a/dir.c b/dir.c index 3a7630a..a1f711c 100644 --- a/dir.c +++ b/dir.c @@ -882,6 +882,25 @@ int match_pathname(const char *pathname, int pathlen, */ if (!patternlen && !namelen) return 1; + /* +* This can happen when we ignore some exclude rules +* on directories in other to see if negative rules +* may match. E.g. +* +* /abc +* !/abc/def/ghi +* +* The pattern of interest is "/abc". On the first +* try, we should match path "abc" with this pattern +* in the "if" statement right above, but the caller +* ignores it. +* +* On the second try with paths within "abc", +* e.g. "abc/xyz", we come here and try to match it +* with "/abc". +*/ + if (!patternlen && namelen && *name == '/') + return 1; } return fnmatch_icase_mem(pattern, patternlen, @@ -890,6 +909,48 @@ int match_pathname(const char *pathname, int pathlen, } /* + * Return non-zero if pathname is a directory and an ancestor of the + * literal path in a (negative) pattern. This is used to keep + * descending in "foo" and "foo/bar" when the pattern is + * "!foo/bar/.gitignore". "foo/notbar" will not be descended however. + */ +static int match_neg_path(const char *pathname, int pathlen, int *dtype, + const char *base, int baselen, + const char *pattern, int prefix, int patternlen, + int flags) +{ + assert((flags & EXC_FLAG_NEGATIVE) && !(flags & EXC_FLAG_NODIR));
[PATCH] ls-remote.txt: delete unsupported option
-u has never been supported, but it was mentioned since 0a2bb55 (git ls-remote: make usage string match manpage - 2008-11-11). Nobody has complained about it for seven years, it's probably safe to say nobody cares. So let's remove "-u" in documents instead of adding code to support it. While at there, fix --upload-pack syntax too. Signed-off-by: Nguyễn Thái Ngọc Duy --- Documentation/git-ls-remote.txt | 3 +-- builtin/ls-remote.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Documentation/git-ls-remote.txt b/Documentation/git-ls-remote.txt index 2e22915..d510c05 100644 --- a/Documentation/git-ls-remote.txt +++ b/Documentation/git-ls-remote.txt @@ -9,7 +9,7 @@ git-ls-remote - List references in a remote repository SYNOPSIS [verse] -'git ls-remote' [--heads] [--tags] [-u | --upload-pack ] +'git ls-remote' [--heads] [--tags] [--upload-pack=] [--exit-code] [...] DESCRIPTION @@ -29,7 +29,6 @@ OPTIONS both, references stored in refs/heads and refs/tags are displayed. --u :: --upload-pack=:: Specify the full path of 'git-upload-pack' on the remote host. This allows listing references from repositories accessed via diff --git a/builtin/ls-remote.c b/builtin/ls-remote.c index 4554dbc..5e9d545 100644 --- a/builtin/ls-remote.c +++ b/builtin/ls-remote.c @@ -4,7 +4,7 @@ #include "remote.h" static const char ls_remote_usage[] = -"git ls-remote [--heads] [--tags] [-u | --upload-pack ]\n" +"git ls-remote [--heads] [--tags] [--upload-pack=]\n" " [-q | --quiet] [--exit-code] [--get-url] [ [...]]"; /* -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 0/6] fix local clone from a linked checkout
This fixes another embarassing bug in 4/6 and adds tests to make sure it works. Changes since v3 -- 8< -- diff --git a/path.c b/path.c index 32d4ca6..a346134 100644 --- a/path.c +++ b/path.c @@ -439,7 +439,7 @@ const char *enter_repo(const char *path, int strict) path = validated_path; } else { - const char *gitfile = read_gitfile(used_path); + const char *gitfile = read_gitfile(path); if (gitfile) path = gitfile; if (chdir(path)) diff --git a/t/t0002-gitfile.sh b/t/t0002-gitfile.sh index 9393322..9670e8c 100755 --- a/t/t0002-gitfile.sh +++ b/t/t0002-gitfile.sh @@ -116,4 +116,46 @@ test_expect_success 'setup_git_dir twice in subdir' ' ) ' +test_expect_success 'enter_repo non-strict mode' ' + test_create_repo enter_repo && + ( + cd enter_repo && + test_tick && + test_commit foo && + mv .git .realgit && + echo "gitdir: .realgit" >.git + ) && + git ls-remote enter_repo >actual && + cat >expected <<-\EOF && + 946e985ab20de757ca5b872b16d64e92ff3803a9HEAD + 946e985ab20de757ca5b872b16d64e92ff3803a9refs/heads/master + 946e985ab20de757ca5b872b16d64e92ff3803a9refs/tags/foo + EOF + test_cmp expected actual +' + +test_expect_success 'enter_repo linked checkout' ' + ( + cd enter_repo && + git worktree add ../foo refs/tags/foo + ) && + git ls-remote foo >actual && + cat >expected <<-\EOF && + 946e985ab20de757ca5b872b16d64e92ff3803a9HEAD + 946e985ab20de757ca5b872b16d64e92ff3803a9refs/heads/master + 946e985ab20de757ca5b872b16d64e92ff3803a9refs/tags/foo + EOF + test_cmp expected actual +' + +test_expect_success 'enter_repo strict mode' ' + git ls-remote --upload-pack="git upload-pack --strict" foo/.git >actual && + cat >expected <<-\EOF && + 946e985ab20de757ca5b872b16d64e92ff3803a9HEAD + 946e985ab20de757ca5b872b16d64e92ff3803a9refs/heads/master + 946e985ab20de757ca5b872b16d64e92ff3803a9refs/tags/foo + EOF + test_cmp expected actual +' + test_done -- 8< -- Nguyễn Thái Ngọc Duy (6): path.c: delete an extra space t0002: add test for enter_repo(), non-strict mode enter_repo: avoid duplicating logic, use is_git_directory() instead enter_repo: allow .git files in strict mode clone: allow --local from a linked checkout clone: better error when --reference is a linked checkout builtin/clone.c | 13 ++--- path.c | 14 +- t/t0002-gitfile.sh | 42 ++ t/t2025-worktree-add.sh | 5 + 4 files changed, 66 insertions(+), 8 deletions(-) -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 6/6] clone: better error when --reference is a linked checkout
Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/clone.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/builtin/clone.c b/builtin/clone.c index 39d4adf..3e14491 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -294,9 +294,14 @@ static int add_one_reference(struct string_list_item *item, void *cb_data) char *ref_git_git = mkpathdup("%s/.git", ref_git); free(ref_git); ref_git = ref_git_git; - } else if (!is_directory(mkpath("%s/objects", ref_git))) + } else if (!is_directory(mkpath("%s/objects", ref_git))) { + struct strbuf sb = STRBUF_INIT; + if (get_common_dir(&sb, ref_git)) + die(_("reference repository '%s' as a linked checkout is not supported yet."), + item->string); die(_("reference repository '%s' is not a local repository."), item->string); + } if (!access(mkpath("%s/shallow", ref_git), F_OK)) die(_("reference repository '%s' is shallow"), item->string); -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 5/6] clone: allow --local from a linked checkout
Noticed-by: Bjørnar Snoksrud Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/clone.c | 6 -- t/t2025-worktree-add.sh | 5 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 578da85..39d4adf 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -424,8 +424,10 @@ static void clone_local(const char *src_repo, const char *dest_repo) } else { struct strbuf src = STRBUF_INIT; struct strbuf dest = STRBUF_INIT; - strbuf_addf(&src, "%s/objects", src_repo); - strbuf_addf(&dest, "%s/objects", dest_repo); + get_common_dir(&src, src_repo); + get_common_dir(&dest, dest_repo); + strbuf_addstr(&src, "/objects"); + strbuf_addstr(&dest, "/objects"); copy_or_link_directory(&src, &dest, src_repo, src.len); strbuf_release(&src); strbuf_release(&dest); diff --git a/t/t2025-worktree-add.sh b/t/t2025-worktree-add.sh index 8267411..3694174 100755 --- a/t/t2025-worktree-add.sh +++ b/t/t2025-worktree-add.sh @@ -193,4 +193,9 @@ test_expect_success '"add" -B/--detach mutually exclusive' ' test_must_fail git worktree add -B poodle --detach bamboo master ' +test_expect_success 'local clone from linked checkout' ' + git clone --local here here-clone && + ( cd here-clone && git fsck ) +' + test_done -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 3/6] enter_repo: avoid duplicating logic, use is_git_directory() instead
It matters for linked checkouts where 'refs' directory won't be available in $GIT_DIR. is_git_directory() knows about $GIT_COMMON_DIR and can handle this case. Signed-off-by: Nguyễn Thái Ngọc Duy --- path.c | 3 +-- t/t0002-gitfile.sh | 14 ++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/path.c b/path.c index a536ee3..7340e11 100644 --- a/path.c +++ b/path.c @@ -441,8 +441,7 @@ const char *enter_repo(const char *path, int strict) else if (chdir(path)) return NULL; - if (access("objects", X_OK) == 0 && access("refs", X_OK) == 0 && - validate_headref("HEAD") == 0) { + if (is_git_directory(".")) { set_git_dir("."); check_repository_format(); return path; diff --git a/t/t0002-gitfile.sh b/t/t0002-gitfile.sh index 545bfe2..2e709cc 100755 --- a/t/t0002-gitfile.sh +++ b/t/t0002-gitfile.sh @@ -134,4 +134,18 @@ test_expect_success 'enter_repo non-strict mode' ' test_cmp expected actual ' +test_expect_success 'enter_repo linked checkout' ' + ( + cd enter_repo && + git worktree add ../foo refs/tags/foo + ) && + git ls-remote foo >actual && + cat >expected <<-\EOF && + 946e985ab20de757ca5b872b16d64e92ff3803a9HEAD + 946e985ab20de757ca5b872b16d64e92ff3803a9refs/heads/master + 946e985ab20de757ca5b872b16d64e92ff3803a9refs/tags/foo + EOF + test_cmp expected actual +' + test_done -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 2/6] t0002: add test for enter_repo(), non-strict mode
Signed-off-by: Nguyễn Thái Ngọc Duy --- t/t0002-gitfile.sh | 18 ++ 1 file changed, 18 insertions(+) diff --git a/t/t0002-gitfile.sh b/t/t0002-gitfile.sh index 9393322..545bfe2 100755 --- a/t/t0002-gitfile.sh +++ b/t/t0002-gitfile.sh @@ -116,4 +116,22 @@ test_expect_success 'setup_git_dir twice in subdir' ' ) ' +test_expect_success 'enter_repo non-strict mode' ' + test_create_repo enter_repo && + ( + cd enter_repo && + test_tick && + test_commit foo && + mv .git .realgit && + echo "gitdir: .realgit" >.git + ) && + git ls-remote enter_repo >actual && + cat >expected <<-\EOF && + 946e985ab20de757ca5b872b16d64e92ff3803a9HEAD + 946e985ab20de757ca5b872b16d64e92ff3803a9refs/heads/master + 946e985ab20de757ca5b872b16d64e92ff3803a9refs/tags/foo + EOF + test_cmp expected actual +' + test_done -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 4/6] enter_repo: allow .git files in strict mode
Strict mode is about not guessing where .git is. If the user points to a .git file, we know exactly where the target .git dir will be. This makes it possible to serve .git files as repository on the server side. This may be needed even in local clone case because transport.c code uses upload-pack for fetching remote refs. But right now the clone/transport code goes with non-strict. Signed-off-by: Nguyễn Thái Ngọc Duy --- path.c | 9 +++-- t/t0002-gitfile.sh | 10 ++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/path.c b/path.c index 7340e11..a346134 100644 --- a/path.c +++ b/path.c @@ -438,8 +438,13 @@ const char *enter_repo(const char *path, int strict) return NULL; path = validated_path; } - else if (chdir(path)) - return NULL; + else { + const char *gitfile = read_gitfile(path); + if (gitfile) + path = gitfile; + if (chdir(path)) + return NULL; + } if (is_git_directory(".")) { set_git_dir("."); diff --git a/t/t0002-gitfile.sh b/t/t0002-gitfile.sh index 2e709cc..9670e8c 100755 --- a/t/t0002-gitfile.sh +++ b/t/t0002-gitfile.sh @@ -148,4 +148,14 @@ test_expect_success 'enter_repo linked checkout' ' test_cmp expected actual ' +test_expect_success 'enter_repo strict mode' ' + git ls-remote --upload-pack="git upload-pack --strict" foo/.git >actual && + cat >expected <<-\EOF && + 946e985ab20de757ca5b872b16d64e92ff3803a9HEAD + 946e985ab20de757ca5b872b16d64e92ff3803a9refs/heads/master + 946e985ab20de757ca5b872b16d64e92ff3803a9refs/tags/foo + EOF + test_cmp expected actual +' + test_done -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 1/6] path.c: delete an extra space
Signed-off-by: Nguyễn Thái Ngọc Duy --- path.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/path.c b/path.c index 95acbaf..a536ee3 100644 --- a/path.c +++ b/path.c @@ -431,7 +431,7 @@ const char *enter_repo(const char *path, int strict) } if (!suffix[i]) return NULL; - gitfile = read_gitfile(used_path) ; + gitfile = read_gitfile(used_path); if (gitfile) strcpy(used_path, gitfile); if (chdir(used_path)) -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] read-cache: fix untracked cache invalidation when split-index is used
Before this change, t7063.17 fails. The actual action though happens at t7063.16 where the entry "two" is added back to index after being removed in the .13. Here we expect a directory invalidate at .16 and none at .17 where untracked cache is refreshed. But things do not go as expected when GIT_TEST_SPLIT_INDEX is set. The different behavior that happens at .16 when split index is used: the entry "two", when deleted at .13, is simply marked "deleted". When .16 executes, the entry resurfaces from the version in base index. This happens in merge_base_index() where add_index_entry() is called to add "two" back from the base index. This is where the bug comes from. The add_index_entry() is called with ADD_CACHE_KEEP_CACHE_TREE flag because this version of "two" is not new, it does not break either cache-tree or untracked cache. The code should check this flag and not invalidate untracked cache. This causes a second invalidation violates test expectation. The fix is obvious. Noticed-by: Thomas Gummerer Signed-off-by: Nguyễn Thái Ngọc Duy --- PS. perhaps I should rename ADD_CACHE_KEEP_CACHE_TREE to ADD_CACHE_KEEP_CACHE, or add a new flag .._KEEP_UNTRACKED_CACHE to avoid confusion. On Sun, Jun 7, 2015 at 1:20 PM, Junio C Hamano wrote: > Thomas Gummerer writes: > >> When running the test suite with GIT_TEST_SPLIT_INDEX set, tests 17-18 >> in t7063 fail. Unset GIT_TEST_SPLIT_INDEX at the beginning of the test, >> in order to fix it. > > That is not fixing but sweeping the problem under the rug, is it? Indeed. > Duy, untracked-cache is a fairly new toy and I wouldn't be surprised > if it has un-thought-out interaction with split-index which is also > a fairly new exotic toy. As both are from you, can you take a look > at it? Still a bit slow to address, but I marked Thomas' mail for investigation when it came. > We may want to make it easier to run tests with TEST-SPLIT-INDEX, if > we envision that the feature will bring us sufficient benefit and we > would eventually want to encourage its use to more people. As it > stands now, only people who are curious enough opt into trying it > out by exporting the environment, which would be done by a tiny > minority of developers and users. Untracked cache, split index and the last part (*) all aim at a smaller user audience with large work trees. They are not really what a common user needs, but I hope they do have users. We could make the test suite run twice, a normal run and a test-split-index run. But I'm not sure if it really justifies doubling test time. We will have to deal with this situation when/if pack-v4 is merged because we would want to exercise both v3 and v4 as much as possible. (*) the last part should keep index read time small enough even if the index is very large and avoid lstat() at refresh time with the help from watchman. read-cache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/read-cache.c b/read-cache.c index 723d48d..309ccc7 100644 --- a/read-cache.c +++ b/read-cache.c @@ -999,7 +999,8 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e } pos = -pos-1; - untracked_cache_add_to_index(istate, ce->name); + if (!(option & ADD_CACHE_KEEP_CACHE_TREE)) + untracked_cache_add_to_index(istate, ce->name); /* * Inserting a merged entry ("stage 0") into the index -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] checkout: don't check worktrees when not necessary
When --patch or pathspecs are passed to git checkout, the working tree will not be switching branch, so there's no need to check if the branch that we are running checkout on is already checked out. Original-patch-by: Spencer Baugh Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/checkout.c | 23 +++ t/t2025-checkout-to.sh | 8 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/builtin/checkout.c b/builtin/checkout.c index 9b49f0e..e227f64 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -1110,7 +1110,6 @@ static int parse_branchname_arg(int argc, const char **argv, { struct tree **source_tree = &opts->source_tree; const char **new_branch = &opts->new_branch; - int force_detach = opts->force_detach; int argcount = 0; unsigned char branch_rev[20]; const char *arg; @@ -1231,17 +1230,6 @@ static int parse_branchname_arg(int argc, const char **argv, else new->path = NULL; /* not an existing branch */ - if (new->path && !force_detach && !*new_branch) { - unsigned char sha1[20]; - int flag; - char *head_ref = resolve_refdup("HEAD", 0, sha1, &flag); - if (head_ref && - (!(flag & REF_ISSYMREF) || strcmp(head_ref, new->path)) && - !opts->ignore_other_worktrees) - check_linked_checkouts(new); - free(head_ref); - } - new->commit = lookup_commit_reference_gently(rev, 1); if (!new->commit) { /* not a commit */ @@ -1321,6 +1309,17 @@ static int checkout_branch(struct checkout_opts *opts, die(_("Cannot switch branch to a non-commit '%s'"), new->name); + if (new->path && !opts->force_detach && !opts->new_branch) { + unsigned char sha1[20]; + int flag; + char *head_ref = resolve_refdup("HEAD", 0, sha1, &flag); + if (head_ref && + (!(flag & REF_ISSYMREF) || strcmp(head_ref, new->path)) && + !opts->ignore_other_worktrees) + check_linked_checkouts(new); + free(head_ref); + } + if (opts->new_worktree) return prepare_linked_checkout(opts, new); diff --git a/t/t2025-checkout-to.sh b/t/t2025-checkout-to.sh index f8e4df4..a8d9336 100755 --- a/t/t2025-checkout-to.sh +++ b/t/t2025-checkout-to.sh @@ -28,6 +28,14 @@ test_expect_success 'checkout --to refuses to checkout locked branch' ' ! test -d .git/worktrees/zere ' +test_expect_success 'checking out paths not complaining about linked checkouts' ' + ( + cd existing_empty && + echo dirty >>init.t && + git checkout master -- init.t + ) +' + test_expect_success 'checkout --to a new worktree' ' git rev-parse HEAD >expect && git checkout --detach --to here master && -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] strbuf: stop out-of-boundary warnings from Coverity
It usually goes like this strbuf sb = STRBUF_INIT; if (!strncmp(sb.buf, "foo", 3)) printf("%s", sb.buf + 3); Coverity thinks that printf() can be executed, and because initial sb.buf only has one character (from strbuf_slopbuf), sb.buf + 3 is out of bound. What it does not recognize is strbuf_slopbuf[0] is always (*) zero. We always do some string comparison before jumping ahead to "sb.buf + 3" and those operations will stop out of bound accesses. Just make strbuf_slopbuf[] large enough to keep Coverity happy. If it's happy, we'll have cleaner defect list and better chances of spotting true defects. (*) It's not entirely wrong though. Somebody may do sb.buf[0] = 'f' right after variable declaration and ruin all unused strbuf. Signed-off-by: Nguyễn Thái Ngọc Duy --- There are lots of false warnings like this from Coverity. I just wanted to kill them off so we can spot more serious problems easier. I can't really verify that this patch shuts off those warnings because scan.coverity.com policy does not allow forks. I had another patch that avoids corrupting strbuf_slopbuf, by putting it to .rodata section. The patch is more invasive though, because this statement buf.buf[buf.len] = '\0' now has to make sure buf.buf is not strbuf_slopbuf. It feels safer, but probably not enough to justify the change. strbuf.c | 5 + 1 file changed, 5 insertions(+) diff --git a/strbuf.c b/strbuf.c index 0d4f4e5..0d7c3cf 100644 --- a/strbuf.c +++ b/strbuf.c @@ -16,7 +16,12 @@ int starts_with(const char *str, const char *prefix) * buf is non NULL and ->buf is NUL terminated even for a freshly * initialized strbuf. */ +#ifndef __COVERITY__ char strbuf_slopbuf[1]; +#else +/* Stop so many incorrect out-of-boundary warnings from Coverity */ +char strbuf_slopbuf[64]; +#endif void strbuf_init(struct strbuf *sb, size_t hint) { -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] apply: fix adding new files on i-t-a entries
Since d95d728 (diff-lib.c: adjust position of i-t-a entries in diff - 2015-03-16), a normal "git diff" on i-t-a entries would produce a diff that _adds_ those files, not just adding content to existing and empty files like before. This is correct. Unfortunately, applying such a patch on the same repository that has the same i-t-a entries will fail with message "already exists in index" because git-apply checks, sees those i-t-a entries and aborts. git-apply does not realize those are for bookkeeping only, they do not really exist in the index. This patch tightens the "exists in index" check, ignoring i-t-a entries. For fixing the above problem, only the change in check_to_create() is needed. For other changes, - load_current(), reporting "not exists in index" is better than "does not match index" - check_preimage(), similar to load_current(), but it may also use ce_mode from i-t-a entry which is always zero - get_current_sha1(), or actually build_fake_ancestor(), we should not add i-t-a entries to the temporary index, at least not without also adding i-t-a flag back Since "git add -p" and "git am" use "git apply" underneath, they are broken too before this patch and fixed now. Reported-by: Patrick Higgins Reported-by: Bjørnar Snoksrud Signed-off-by: Nguyễn Thái Ngọc Duy --- I think I'm opening a can of worms with d95d728. There's nothing wrong with that patch per se, but with this issue popping up, I need to go over all {cache,index}_name_pos call sites and see what would be the sensible behavior when i-t-a entries are involved. So far blame, rm and checkout-entry and "checkout " are on my to-think-or-fix list. But this patch can get in early to fix a real regression instead of waiting for one big series. A lot more discussions will be had before that series gets in good shape. builtin/apply.c | 8 cache.h | 2 ++ read-cache.c | 12 t/t2203-add-intent.sh | 10 ++ 4 files changed, 28 insertions(+), 4 deletions(-) diff --git a/builtin/apply.c b/builtin/apply.c index 146be97..4f813ac 100644 --- a/builtin/apply.c +++ b/builtin/apply.c @@ -3344,7 +3344,7 @@ static int load_current(struct image *image, struct patch *patch) if (!patch->is_new) die("BUG: patch to %s is not a creation", patch->old_name); - pos = cache_name_pos(name, strlen(name)); + pos = exists_in_cache(name, strlen(name)); if (pos < 0) return error(_("%s: does not exist in index"), name); ce = active_cache[pos]; @@ -3497,7 +3497,7 @@ static int check_preimage(struct patch *patch, struct cache_entry **ce, struct s } if (check_index && !previous) { - int pos = cache_name_pos(old_name, strlen(old_name)); + int pos = exists_in_cache(old_name, strlen(old_name)); if (pos < 0) { if (patch->is_new < 0) goto is_new; @@ -3551,7 +3551,7 @@ static int check_to_create(const char *new_name, int ok_if_exists) struct stat nst; if (check_index && - cache_name_pos(new_name, strlen(new_name)) >= 0 && + exists_in_cache(new_name, strlen(new_name)) >= 0 && !ok_if_exists) return EXISTS_IN_INDEX; if (cached) @@ -3824,7 +3824,7 @@ static int get_current_sha1(const char *path, unsigned char *sha1) if (read_cache() < 0) return -1; - pos = cache_name_pos(path, strlen(path)); + pos = exists_in_cache(path, strlen(path)); if (pos < 0) return -1; hashcpy(sha1, active_cache[pos]->sha1); diff --git a/cache.h b/cache.h index 571c98f..6a44cb6 100644 --- a/cache.h +++ b/cache.h @@ -341,6 +341,7 @@ extern void free_name_hash(struct index_state *istate); #define discard_cache() discard_index(&the_index) #define unmerged_cache() unmerged_index(&the_index) #define cache_name_pos(name, namelen) index_name_pos(&the_index,(name),(namelen)) +#define exists_in_cache(name, namelen) exists_in_index(&the_index,(name),(namelen)) #define add_cache_entry(ce, option) add_index_entry(&the_index, (ce), (option)) #define rename_cache_entry_at(pos, new_name) rename_index_entry_at(&the_index, (pos), (new_name)) #define remove_cache_entry_at(pos) remove_index_entry_at(&the_index, (pos)) @@ -512,6 +513,7 @@ extern int verify_path(const char *path); extern struct cache_entry *index_dir_exists(struct index_state *istate, const char *name, int namelen); extern struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int igncase); extern int index_name_pos(const struct index_state *, const char *name, int namelen); +ex
[PATCH] blame: remove obsolete comment
That "someday" in the comment happened two years later in b65982b (Optimize "diff-index --cached" using cache-tree - 2009-05-20) Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/blame.c | 5 - 1 file changed, 5 deletions(-) diff --git a/builtin/blame.c b/builtin/blame.c index b3e948e..b077bb6 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -2371,11 +2371,6 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt, ce->ce_mode = create_ce_mode(mode); add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE); - /* -* We are not going to write this out, so this does not matter -* right now, but someday we might optimize diff-index --cached -* with cache-tree information. -*/ cache_tree_invalidate_path(&the_index, path); return commit; -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] setup: set env $GIT_WORK_TREE when work tree is set, like $GIT_DIR
In the test case, we run setup_git_dir_gently() the first time to read $GIT_DIR/config so that we can resolve aliases. We'll enter setup_discovered_git_dir() and may or may not call set_git_dir() near the end of the function, depending on whether the detected git dir is ".git" or not. This set_git_dir() will set env var $GIT_DIR. For normal repo, git dir detected via setup_discovered_git_dir() will be ".git", and set_git_dir() is not called. If .git file is used however, the git dir can't be ".git" and set_git_dir() is called and $GIT_DIR set. This is the key of this problem. If we expand an alias (or autocorrect command names), then setup_git_dir_gently() is run the second time. If $GIT_DIR is not set in the first run, we run the same setup_discovered_git_dir() as before. Nothing to see. If it is, however, we'll enter setup_explicit_git_dir() this time. This is where the "fun" is. The legacy behavior is, if $GIT_WORK_TREE is not set but $GIT_DIR is, cwd is chosen as worktree's top. If you happen to stand at worktree's top when you do this, all is well. If you are in a subdir "foo/bar" (real worktree's top is "foo"), this behavior bites you: your detected worktree is now "foo/bar", but the first run correctly detected worktree as "foo". You get "internal error: work tree has already been set" as a result. Bottom line is, when $GIT_DIR is set, $GIT_WORK_TREE should be set too unless there's no work tree. But setting $GIT_WORK_TREE inside set_git_dir() may backfire. We don't know at that point if work tree is already configured by the caller. So set it when work tree is detected. It does not harm if $GIT_WORK_TREE is set while $GIT_DIR is not. Reported-by: Bjørnar Snoksrud Signed-off-by: Nguyễn Thái Ngọc Duy --- environment.c | 2 ++ t/t0002-gitfile.sh | 17 + 2 files changed, 19 insertions(+) diff --git a/environment.c b/environment.c index 61c685b..8f1b249 100644 --- a/environment.c +++ b/environment.c @@ -231,6 +231,8 @@ void set_git_work_tree(const char *new_work_tree) } git_work_tree_initialized = 1; work_tree = xstrdup(real_path(new_work_tree)); + if (setenv(GIT_WORK_TREE_ENVIRONMENT, work_tree, 1)) + error("Could not set GIT_WORK_TREE to '%s'", work_tree); } const char *get_git_work_tree(void) diff --git a/t/t0002-gitfile.sh b/t/t0002-gitfile.sh index 37e9396..9393322 100755 --- a/t/t0002-gitfile.sh +++ b/t/t0002-gitfile.sh @@ -99,4 +99,21 @@ test_expect_success 'check rev-list' ' test "$SHA" = "$(git rev-list HEAD)" ' +test_expect_success 'setup_git_dir twice in subdir' ' + git init sgd && + ( + cd sgd && + git config alias.lsfi ls-files && + mv .git .realgit && + echo "gitdir: .realgit" >.git && + mkdir subdir && + cd subdir && + >foo && + git add foo && + git lsfi >actual && + echo foo >expected && + test_cmp expected actual + ) +' + test_done -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] worktree: new place for "git prune --worktrees"
Commit 23af91d (prune: strategies for linked checkouts - 2014-11-30) adds "--worktrees" to "git prune" without realizing that "git prune" is for object database only. This patch moves the same functionality to a new command "git worktree". Signed-off-by: Nguyễn Thái Ngọc Duy --- In future I probably move the big block of text in git-checkout.txt to git-worktree.txt and add "git worktree list". But let's start with something small and simple before "git prune --worktrees" is shipped out. .gitignore | 1 + Documentation/git-prune.txt | 3 - Documentation/git-worktree.txt (new) | 48 + Makefile | 1 + builtin.h| 1 + builtin/gc.c | 2 +- builtin/prune.c | 99 -- builtin/worktree.c (new) | 133 +++ command-list.txt | 1 + git.c| 1 + t/t2026-prune-linked-checkouts.sh| 22 +++--- 11 files changed, 198 insertions(+), 114 deletions(-) create mode 100644 Documentation/git-worktree.txt create mode 100644 builtin/worktree.c diff --git a/.gitignore b/.gitignore index 422c538..a685ec1 100644 --- a/.gitignore +++ b/.gitignore @@ -171,6 +171,7 @@ /git-verify-tag /git-web--browse /git-whatchanged +/git-worktree /git-write-tree /git-core-*/?* /gitweb/GITWEB-BUILD-OPTIONS diff --git a/Documentation/git-prune.txt b/Documentation/git-prune.txt index 1cf3bed..7a493c8 100644 --- a/Documentation/git-prune.txt +++ b/Documentation/git-prune.txt @@ -48,9 +48,6 @@ OPTIONS --expire :: Only expire loose objects older than . ---worktrees:: - Prune dead working tree information in $GIT_DIR/worktrees. - ...:: In addition to objects reachable from any of our references, keep objects diff --git a/Documentation/git-worktree.txt b/Documentation/git-worktree.txt new file mode 100644 index 000..b9072e0 --- /dev/null +++ b/Documentation/git-worktree.txt @@ -0,0 +1,48 @@ +git-worktree(1) +=== + +NAME + +git-worktree - Manage multiple worktrees + + +SYNOPSIS + +[verse] +'git worktree prune' [-n] [-v] [--expire ] + +DESCRIPTION +--- + +Manage multiple worktrees attached to the same repository. These are +created by the command `git checkout --to`. + +COMMANDS + +prune:: + +Prune working tree information in $GIT_DIR/worktrees. + +OPTIONS +--- + +-n:: +--dry-run:: + Do not remove anything; just report what it would + remove. + +-v:: +--verbose:: + Report all removals. + +--expire :: + Only expire unused worktrees older than . + +SEE ALSO + + +linkgit:git-checkout[1] + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Makefile b/Makefile index 149f1c7..89785f6 100644 --- a/Makefile +++ b/Makefile @@ -909,6 +909,7 @@ BUILTIN_OBJS += builtin/var.o BUILTIN_OBJS += builtin/verify-commit.o BUILTIN_OBJS += builtin/verify-pack.o BUILTIN_OBJS += builtin/verify-tag.o +BUILTIN_OBJS += builtin/worktree.o BUILTIN_OBJS += builtin/write-tree.o GITLIBS = $(LIB_FILE) $(XDIFF_LIB) diff --git a/builtin.h b/builtin.h index b87df70..9e04f97 100644 --- a/builtin.h +++ b/builtin.h @@ -133,6 +133,7 @@ extern int cmd_verify_commit(int argc, const char **argv, const char *prefix); extern int cmd_verify_tag(int argc, const char **argv, const char *prefix); extern int cmd_version(int argc, const char **argv, const char *prefix); extern int cmd_whatchanged(int argc, const char **argv, const char *prefix); +extern int cmd_worktree(int argc, const char **argv, const char *prefix); extern int cmd_write_tree(int argc, const char **argv, const char *prefix); extern int cmd_verify_pack(int argc, const char **argv, const char *prefix); extern int cmd_show_ref(int argc, const char **argv, const char *prefix); diff --git a/builtin/gc.c b/builtin/gc.c index 36fe333..4957c39 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -293,7 +293,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix) argv_array_pushl(&reflog, "reflog", "expire", "--all", NULL); argv_array_pushl(&repack, "repack", "-d", "-l", NULL); argv_array_pushl(&prune, "prune", "--expire", NULL); - argv_array_pushl(&prune_worktrees, "prune", "--worktrees", "--expire", NULL); + argv_array_pushl(&prune_worktrees, "worktree", "prune", "--expire", NULL); argv_array_pushl(&rerere, "rerere", "gc", NULL); gc_config(); diff --git a/builtin/prune.c b/builtin/prune.c index 0c73246..10b03d3 100644 --- a/builtin/prune.c +++ b/builtin/prune.c @@ -6,7 +6,6 @@ #include "re
[PATCH] Avoid the need of "--" when wildcard pathspec is used
When "--" is lacking from the command line and a command can take both revs and paths, the idea is if an argument can be seen as both an extended SHA-1 and a path, then "--" is required or git refuses to continue. It's currently implemented as: (1) if an argument is rev, then it must not exist in worktree (2) else, it must exist in worktree (3) else, "--" is required. These rules work for literal paths, but when wildcard pathspec is involved, it always requires the user to add "--" because it fails (2) and (1) is never met. This patch prefers wildcard over extended sha-1 syntax that includes wildcards, so that we can specify wildcards to select paths in worktree without "--" most of the time. If wildcards are found in extended sha-1 syntax, then "--" is _always_ required. Because ref names don't allow wildcards, you can only hit that "needs '--'" new rule if you use ":/", "^{/}" or ":wildcards/in/literal/paths". So it's really rare. The rules after this patch become: (1) if an arg is a rev, then it must either exist in worktree or not a wild card (2) else, it either exists in worktree or is a wild card (3) else, "--" is required. Signed-off-by: Nguyễn Thái Ngọc Duy --- setup.c | 2 ++ t/t2019-checkout-ambiguous-ref.sh | 26 ++ 2 files changed, 28 insertions(+) diff --git a/setup.c b/setup.c index 82c0cc2..f7cb93b 100644 --- a/setup.c +++ b/setup.c @@ -143,6 +143,8 @@ int check_filename(const char *prefix, const char *arg) name = arg + 2; } else if (!no_wildcard(arg)) return 1; + else if (!no_wildcard(arg)) + return 1; else if (prefix) name = prefix_filename(prefix, strlen(prefix), arg); else diff --git a/t/t2019-checkout-ambiguous-ref.sh b/t/t2019-checkout-ambiguous-ref.sh index b99d519..e5ceba3 100755 --- a/t/t2019-checkout-ambiguous-ref.sh +++ b/t/t2019-checkout-ambiguous-ref.sh @@ -56,4 +56,30 @@ test_expect_success VAGUENESS_SUCCESS 'checkout reports switch to branch' ' test_i18ngrep ! "^HEAD is now at" stderr ' +test_expect_success 'wildcard ambiguation' ' + git init ambi && + ( + cd ambi && + echo a >a.c && + git add a.c && + echo b >a.c && + git checkout "*.c" && + echo a >expect && + test_cmp expect a.c + ) +' + +test_expect_success 'wildcard ambiguation (2)' ' + git init ambi2 && + ( + cd ambi2 && + echo a >"*.c" && + git add . && + test_must_fail git show :"*.c" && + git show :"*.c" -- >actual && + echo a >expect && + test_cmp expect actual + ) +' + test_done -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2] Add tests for wildcard "path vs ref" disambiguation
Commit 28fcc0b (pathspec: avoid the need of "--" when wildcard is used - 2015-05-02) changes how the disambiguation rules work. This patch adds some tests to demonstrate, basically, if wildcard characters are in an argument: - if the argument is valid extended sha-1 syntax, "--" must be used - otherwise the argument is considered a path, even without "--" And wildcard can appear in extended sha-1 syntax, either as part of regex in ":/" or as the literal path in ":". The latter case is less likely to happen in real world. But if you do ":/" a lot, you may need to type "--" more. Signed-off-by: Nguyễn Thái Ngọc Duy --- v2 just adds tests as the code is already in 'master' for some time. t/t2019-checkout-ambiguous-ref.sh | 26 ++ 1 file changed, 26 insertions(+) diff --git a/t/t2019-checkout-ambiguous-ref.sh b/t/t2019-checkout-ambiguous-ref.sh index b99d519..e5ceba3 100755 --- a/t/t2019-checkout-ambiguous-ref.sh +++ b/t/t2019-checkout-ambiguous-ref.sh @@ -56,4 +56,30 @@ test_expect_success VAGUENESS_SUCCESS 'checkout reports switch to branch' ' test_i18ngrep ! "^HEAD is now at" stderr ' +test_expect_success 'wildcard ambiguation, paths win' ' + git init ambi && + ( + cd ambi && + echo a >a.c && + git add a.c && + echo b >a.c && + git checkout "*.c" && + echo a >expect && + test_cmp expect a.c + ) +' + +test_expect_success 'wildcard ambiguation, refs lose' ' + git init ambi2 && + ( + cd ambi2 && + echo a >"*.c" && + git add . && + test_must_fail git show :"*.c" && + git show :"*.c" -- >actual && + echo a >expect && + test_cmp expect actual + ) +' + test_done -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] grep: use regcomp() for icase search with non-ascii patterns
Noticed-by: Plamen Totev Signed-off-by: Nguyễn Thái Ngọc Duy --- grep.c | 14 +++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/grep.c b/grep.c index b58c7c6..48db15a 100644 --- a/grep.c +++ b/grep.c @@ -378,7 +378,7 @@ static void free_pcre_regexp(struct grep_pat *p) } #endif /* !USE_LIBPCRE */ -static int is_fixed(const char *s, size_t len) +static int is_fixed(const char *s, size_t len, int ignore_icase) { size_t i; @@ -391,6 +391,13 @@ static int is_fixed(const char *s, size_t len) for (i = 0; i < len; i++) { if (is_regex_special(s[i])) return 0; + /* +* The builtin substring search can only deal with case +* insensitivity in ascii range. If there is something outside +* of that range, fall back to regcomp. +*/ + if (ignore_icase && (unsigned char)s[i] >= 128) + return 0; } return 1; @@ -398,18 +405,19 @@ static int is_fixed(const char *s, size_t len) static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { + int ignore_icase = opt->regflags & REG_ICASE || p->ignore_case; int err; p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; - if (opt->fixed || is_fixed(p->pattern, p->patternlen)) + if (opt->fixed || is_fixed(p->pattern, p->patternlen, ignore_icase)) p->fixed = 1; else p->fixed = 0; if (p->fixed) { - if (opt->regflags & REG_ICASE || p->ignore_case) + if (ignore_case) p->kws = kwsalloc(tolower_trans_tbl); else p->kws = kwsalloc(NULL); -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 4/9] grep/icase: avoid kwsset when -F is specified
Similar to the previous commit, we can't use kws on icase search outside ascii range. But we can't simply pass the pattern to regcomp/pcre like the previous commit because it may contain regex special characters, so we need to quote the regex first. To avoid misquote traps that could lead to undefined behavior, we always stick to basic regex engine in this case. We don't need fancy features for grepping a literal string anyway. basic_regex_quote_buf() assumes that if the pattern is in a multibyte encoding, ascii chars must be unambiguously encoded as single bytes. This is true at least for UTF-8. For others, let's wait until people yell up. Chances are nobody uses multibyte, non utf-8 charsets any more.. Helped-by: René Scharfe Noticed-by: Plamen Totev Signed-off-by: Nguyễn Thái Ngọc Duy --- grep.c | 25 - quote.c | 37 + quote.h | 1 + t/t7812-grep-icase-non-ascii.sh | 26 ++ 4 files changed, 88 insertions(+), 1 deletion(-) diff --git a/grep.c b/grep.c index d795b0e..8fce54f 100644 --- a/grep.c +++ b/grep.c @@ -5,6 +5,7 @@ #include "diff.h" #include "diffcore.h" #include "commit.h" +#include "quote.h" static int grep_source_load(struct grep_source *gs); static int grep_source_is_binary(struct grep_source *gs); @@ -397,6 +398,24 @@ static int is_fixed(const char *s, size_t len) return 1; } +static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) +{ + struct strbuf sb = STRBUF_INIT; + int err; + + basic_regex_quote_buf(&sb, p->pattern); + err = regcomp(&p->regexp, sb.buf, opt->regflags & ~REG_EXTENDED); + if (opt->debug) + fprintf(stderr, "fixed%s\n", sb.buf); + strbuf_release(&sb); + if (err) { + char errbuf[1024]; + regerror(err, &p->regexp, errbuf, 1024); + regfree(&p->regexp); + compile_regexp_failed(p, errbuf); + } +} + static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { int icase_non_ascii; @@ -411,7 +430,11 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) if (!icase_non_ascii && is_fixed(p->pattern, p->patternlen)) p->fixed = 1; else if (opt->fixed) { - p->fixed = 1; + p->fixed = !icase_non_ascii; + if (!p->fixed) { + compile_fixed_regexp(p, opt); + return; + } } else p->fixed = 0; diff --git a/quote.c b/quote.c index 7920e18..43a8057 100644 --- a/quote.c +++ b/quote.c @@ -439,3 +439,40 @@ void tcl_quote_buf(struct strbuf *sb, const char *src) } strbuf_addch(sb, '"'); } + +void basic_regex_quote_buf(struct strbuf *sb, const char *src) +{ + char c; + + if (*src == '^') { + /* only beginning '^' is special and needs quoting */ + strbuf_addch(sb, '\\'); + strbuf_addch(sb, *src++); + } + if (*src == '*') + /* beginning '*' is not special, no quoting */ + strbuf_addch(sb, *src++); + + while ((c = *src++)) { + switch (c) { + case '[': + case '.': + case '\\': + case '*': + strbuf_addch(sb, '\\'); + strbuf_addch(sb, c); + break; + + case '$': + /* only the end '$' is special and needs quoting */ + if (*src == '\0') + strbuf_addch(sb, '\\'); + strbuf_addch(sb, c); + break; + + default: + strbuf_addch(sb, c); + break; + } + } +} diff --git a/quote.h b/quote.h index 99e04d3..362d315 100644 --- a/quote.h +++ b/quote.h @@ -67,5 +67,6 @@ extern char *quote_path_relative(const char *in, const char *prefix, extern void perl_quote_buf(struct strbuf *sb, const char *src); extern void python_quote_buf(struct strbuf *sb, const char *src); extern void tcl_quote_buf(struct strbuf *sb, const char *src); +extern void basic_regex_quote_buf(struct strbuf *sb, const char *src); #endif diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh index 63a2630..c945589 100755 --- a/t/t7812-grep-icase-non-ascii.sh +++ b/t/t7812-grep-icase-non-ascii.sh @@ -16,4 +16,30 @@ test_expect_success GETTEXT_LOCALE 'grep litera
[PATCH v2 0/9] icase match on non-ascii
This series fix case insensitive matching on non-ascii charsets. "grep -i", "grep -F -i", "grep --pcre-regexp -i" and "log -i -S" are fixed. Side note, I almost added the third has_non_ascii() function. Maybe we should consider merging the two existing has_non_ascii() functions back, or rename one to something else. Patch 5 is "funny". The patch itself is in iso-8859-1, but my name in the commit message is in utf-8. Let's see how it goes on the apply side. We may need to fix something in this area.. Nguyễn Thái Ngọc Duy (9): grep: allow -F -i combination grep: break down an "if" stmt in preparation for next changes grep/icase: avoid kwsset on literal non-ascii strings grep/icase: avoid kwsset when -F is specified grep/pcre: prepare locale-dependent tables for icase matching gettext: add is_utf8_locale() grep/pcre: support utf-8 diffcore-pickaxe: "share" regex error handling code diffcore-pickaxe: support case insensitive match on non-ascii builtin/grep.c | 2 +- diffcore-pickaxe.c | 27 ++- gettext.c| 7 +++- gettext.h| 5 +++ grep.c | 43 --- grep.h | 1 + quote.c | 37 quote.h | 1 + t/t7812-grep-icase-non-ascii.sh (new +x) | 58 t/t7813-grep-icase-non-ascii.sh (new +x) | 19 +++ 10 files changed, 186 insertions(+), 14 deletions(-) create mode 100755 t/t7812-grep-icase-non-ascii.sh create mode 100755 t/t7813-grep-icase-non-ascii.sh -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 3/9] grep/icase: avoid kwsset on literal non-ascii strings
When we detect the pattern is just a literal string, we avoid heavy regex engine and use fast substring search implemented in kwsset.c. But kws uses git-ctype which is locale-independent so it does not know how to fold case properly outside ascii range. Let regcomp or pcre take care of this case instead. Slower, but accurate. Helped-by: René Scharfe Noticed-by: Plamen Totev Signed-off-by: Nguyễn Thái Ngọc Duy --- grep.c | 7 ++- t/t7812-grep-icase-non-ascii.sh (new +x) | 19 +++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100755 t/t7812-grep-icase-non-ascii.sh diff --git a/grep.c b/grep.c index bd32f66..d795b0e 100644 --- a/grep.c +++ b/grep.c @@ -4,6 +4,7 @@ #include "xdiff-interface.h" #include "diff.h" #include "diffcore.h" +#include "commit.h" static int grep_source_load(struct grep_source *gs); static int grep_source_is_binary(struct grep_source *gs); @@ -398,12 +399,16 @@ static int is_fixed(const char *s, size_t len) static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { + int icase_non_ascii; int err; p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; + icase_non_ascii = + (opt->regflags & REG_ICASE || p->ignore_case) && + has_non_ascii(p->pattern); - if (is_fixed(p->pattern, p->patternlen)) + if (!icase_non_ascii && is_fixed(p->pattern, p->patternlen)) p->fixed = 1; else if (opt->fixed) { p->fixed = 1; diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh new file mode 100755 index 000..63a2630 --- /dev/null +++ b/t/t7812-grep-icase-non-ascii.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +test_description='grep icase on non-English locales' + +. ./lib-gettext.sh + +test_expect_success GETTEXT_LOCALE 'setup' ' + printf "TILRAUN: Halló Heimur!" >file && + git add file && + LC_ALL="$is_IS_locale" && + export LC_ALL +' + +test_expect_success GETTEXT_LOCALE 'grep literal string, no -F' ' + git grep -i "TILRAUN: Halló Heimur!" && + git grep -i "TILRAUN: HALLÓ HEIMUR!" +' + +test_done -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 9/9] diffcore-pickaxe: support case insensitive match on non-ascii
Similar to the "grep -F -i" case, we can't use kws on icase search outside ascii range, quote we quote the string and pass it to regcomp as a basic regexp and let regex engine deal with case sensitivity. The new test is put in t7812 instead of t4209-log-pickaxe because lib-gettext.sh might cause problems elsewhere, probably.. Noticed-by: Plamen Totev Signed-off-by: Nguyễn Thái Ngọc Duy --- diffcore-pickaxe.c | 11 +++ t/t7812-grep-icase-non-ascii.sh | 7 +++ 2 files changed, 18 insertions(+) diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index 7a718fc..6946c15 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -7,6 +7,8 @@ #include "diffcore.h" #include "xdiff-interface.h" #include "kwset.h" +#include "commit.h" +#include "quote.h" typedef int (*pickaxe_fn)(mmfile_t *one, mmfile_t *two, struct diff_options *o, @@ -212,6 +214,15 @@ void diffcore_pickaxe(struct diff_options *o) cflags |= REG_ICASE; err = regcomp(®ex, needle, cflags); regexp = ®ex; + } else if (DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE) && + has_non_ascii(needle)) { + struct strbuf sb = STRBUF_INIT; + int cflags = REG_NEWLINE | REG_ICASE; + + basic_regex_quote_buf(&sb, needle); + err = regcomp(®ex, sb.buf, cflags); + strbuf_release(&sb); + regexp = ®ex; } else { kws = kwsalloc(DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE) ? tolower_trans_tbl : NULL); diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh index 1306cc0..20a39d0 100755 --- a/t/t7812-grep-icase-non-ascii.sh +++ b/t/t7812-grep-icase-non-ascii.sh @@ -48,4 +48,11 @@ test_expect_success GETTEXT_LOCALE 'grep string with regex, with -F' ' test_cmp expect2 debug2 ' +test_expect_success GETTEXT_LOCALE 'pickaxe -i on non-ascii' ' + git commit -m first && + git log --format=%f -i -S"TILRAUN: HALLÓ HEIMUR!" >actual && + echo first >expected && + test_cmp expected actual +' + test_done -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 2/9] grep: break down an "if" stmt in preparation for next changes
Signed-off-by: Nguyễn Thái Ngọc Duy --- grep.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/grep.c b/grep.c index b58c7c6..bd32f66 100644 --- a/grep.c +++ b/grep.c @@ -403,9 +403,11 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; - if (opt->fixed || is_fixed(p->pattern, p->patternlen)) + if (is_fixed(p->pattern, p->patternlen)) p->fixed = 1; - else + else if (opt->fixed) { + p->fixed = 1; + } else p->fixed = 0; if (p->fixed) { -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 6/9] gettext: add is_utf8_locale()
This function returns true if git is running under an UTF-8 locale. pcre in the next patch will need this. Signed-off-by: Nguyễn Thái Ngọc Duy --- gettext.c | 7 ++- gettext.h | 5 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/gettext.c b/gettext.c index 7378ba2..601bc80 100644 --- a/gettext.c +++ b/gettext.c @@ -166,12 +166,17 @@ void git_setup_gettext(void) textdomain("git"); } +int is_utf8_locale(void) +{ + return !strcmp(charset, "UTF-8"); +} + /* return the number of columns of string 's' in current locale */ int gettext_width(const char *s) { static int is_utf8 = -1; if (is_utf8 == -1) - is_utf8 = !strcmp(charset, "UTF-8"); + is_utf8 = is_utf8_locale(); return is_utf8 ? utf8_strwidth(s) : strlen(s); } diff --git a/gettext.h b/gettext.h index 33696a4..5e733d4 100644 --- a/gettext.h +++ b/gettext.h @@ -31,6 +31,7 @@ #ifndef NO_GETTEXT extern void git_setup_gettext(void); extern int gettext_width(const char *s); +extern int is_utf8_locale(void); #else static inline void git_setup_gettext(void) { @@ -39,6 +40,10 @@ static inline int gettext_width(const char *s) { return strlen(s); } +static inline int is_utf8_locale(void) +{ + return 0; +} #endif #ifdef GETTEXT_POISON -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 7/9] grep/pcre: support utf-8
In the previous change in this function, we add locale support for single-byte encodings only. It looks like pcre only supports utf-* as multibyte encodings, the others are left in the cold (which is fine). We need to enable PCRE_UTF8 so pcre can parse the string correctly before folding case. Noticed-by: Plamen Totev Signed-off-by: Nguyễn Thái Ngọc Duy --- grep.c | 2 ++ t/t7812-grep-icase-non-ascii.sh | 6 ++ 2 files changed, 8 insertions(+) diff --git a/grep.c b/grep.c index c79aa70..7c9e437 100644 --- a/grep.c +++ b/grep.c @@ -326,6 +326,8 @@ static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) if (opt->ignore_case) { p->pcre_tables = pcre_maketables(); + if (is_utf8_locale()) + options |= PCRE_UTF8; options |= PCRE_CASELESS; } diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh index c945589..1306cc0 100755 --- a/t/t7812-grep-icase-non-ascii.sh +++ b/t/t7812-grep-icase-non-ascii.sh @@ -16,6 +16,12 @@ test_expect_success GETTEXT_LOCALE 'grep literal string, no -F' ' git grep -i "TILRAUN: HALLÓ HEIMUR!" ' +test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre string' ' + git grep --perl-regexp"TILRAUN: H.lló Heimur!" && + git grep --perl-regexp -i "TILRAUN: H.lló Heimur!" && + git grep --perl-regexp -i "TILRAUN: H.LLÓ HEIMUR!" +' + test_expect_success GETTEXT_LOCALE 'grep literal string, with -F' ' git grep --debug -i -F "TILRAUN: Halló Heimur!" 2>&1 >/dev/null | grep fixed >debug1 && -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 1/9] grep: allow -F -i combination
-F means "no regex", not "case sensitive" so it should not override -i Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/grep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/grep.c b/builtin/grep.c index d04f440..2d392e9 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -806,7 +806,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) if (!opt.pattern_list) die(_("no pattern given.")); - if (!opt.fixed && opt.ignore_case) + if (opt.ignore_case) opt.regflags |= REG_ICASE; compile_grep_patterns(&opt); -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 8/9] diffcore-pickaxe: "share" regex error handling code
There's another regcomp code block coming in this function. By moving the error handling code out of this block, we don't have to add the same error handling code in the new block. Signed-off-by: Nguyễn Thái Ngọc Duy --- diffcore-pickaxe.c | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index 185f86b..7a718fc 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -204,20 +204,13 @@ void diffcore_pickaxe(struct diff_options *o) int opts = o->pickaxe_opts; regex_t regex, *regexp = NULL; kwset_t kws = NULL; + int err = 0; if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) { - int err; int cflags = REG_EXTENDED | REG_NEWLINE; if (DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE)) cflags |= REG_ICASE; err = regcomp(®ex, needle, cflags); - if (err) { - /* The POSIX.2 people are surely sick */ - char errbuf[1024]; - regerror(err, ®ex, errbuf, 1024); - regfree(®ex); - die("invalid regex: %s", errbuf); - } regexp = ®ex; } else { kws = kwsalloc(DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE) @@ -225,6 +218,13 @@ void diffcore_pickaxe(struct diff_options *o) kwsincr(kws, needle, strlen(needle)); kwsprep(kws); } + if (err) { + /* The POSIX.2 people are surely sick */ + char errbuf[1024]; + regerror(err, ®ex, errbuf, 1024); + regfree(®ex); + die("invalid regex: %s", errbuf); + } /* Might want to warn when both S and G are on; I don't care... */ pickaxe(&diff_queued_diff, o, regexp, kws, -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 5/9] grep/pcre: prepare locale-dependent tables for icase matching
The default tables are usually built with C locale and only suitable for LANG=C or similar. This should make case insensitive search work correctly for all single-byte charsets. Signed-off-by: Nguyễn Thái Ngọc Duy --- grep.c | 7 +-- grep.h | 1 + t/t7813-grep-icase-non-ascii.sh (new +x) | 19 +++ 3 files changed, 25 insertions(+), 2 deletions(-) create mode 100755 t/t7813-grep-icase-non-ascii.sh diff --git a/grep.c b/grep.c index 8fce54f..c79aa70 100644 --- a/grep.c +++ b/grep.c @@ -324,11 +324,13 @@ static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) int erroffset; int options = PCRE_MULTILINE; - if (opt->ignore_case) + if (opt->ignore_case) { + p->pcre_tables = pcre_maketables(); options |= PCRE_CASELESS; + } p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset, - NULL); + p->pcre_tables); if (!p->pcre_regexp) compile_regexp_failed(p, error); @@ -362,6 +364,7 @@ static void free_pcre_regexp(struct grep_pat *p) { pcre_free(p->pcre_regexp); pcre_free(p->pcre_extra_info); + pcre_free((void *)p->pcre_tables); } #else /* !USE_LIBPCRE */ static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) diff --git a/grep.h b/grep.h index 95f197a..cee4357 100644 --- a/grep.h +++ b/grep.h @@ -48,6 +48,7 @@ struct grep_pat { regex_t regexp; pcre *pcre_regexp; pcre_extra *pcre_extra_info; + const unsigned char *pcre_tables; kwset_t kws; unsigned fixed:1; unsigned ignore_case:1; diff --git a/t/t7813-grep-icase-non-ascii.sh b/t/t7813-grep-icase-non-ascii.sh new file mode 100755 index 000..efef7fb --- /dev/null +++ b/t/t7813-grep-icase-non-ascii.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +test_description='grep icase on non-English locales' + +. ./lib-gettext.sh + +test_expect_success GETTEXT_ISO_LOCALE 'setup' ' + printf "TILRAUN: Hall� Heimur!" >file && + git add file && + LC_ALL="$is_IS_iso_locale" && + export LC_ALL +' + +test_expect_success GETTEXT_ISO_LOCALE,LIBPCRE 'grep pcre string' ' + git grep --perl-regexp -i "TILRAUN: H.ll� Heimur!" && + git grep --perl-regexp -i "TILRAUN: H.LL� HEIMUR!" +' + +test_done -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/2] pack-objects: rename the field "type" to "real_type"
This is to avoid the too generic name "type" and harmonize with the naming in index-pack. There's a subtle difference though: real_type in index-pack is what the upper level see, no delta types (after delta resolution). But real_type in pack-objects is the type to be written in the pack, delta types are fine (it's actually markers for reused deltas) Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/pack-objects.c | 36 ++-- pack-bitmap-write.c| 6 +++--- pack-objects.h | 2 +- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 80fe8c7..e03bf3e 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -244,7 +244,7 @@ static unsigned long write_no_reuse_object(struct sha1file *f, struct object_ent struct git_istream *st = NULL; if (!usable_delta) { - if (entry->type == OBJ_BLOB && + if (entry->real_type == OBJ_BLOB && entry->size > big_file_threshold && (st = open_istream(entry->idx.sha1, &type, &size, NULL)) != NULL) buf = NULL; @@ -348,7 +348,7 @@ static unsigned long write_reuse_object(struct sha1file *f, struct object_entry struct pack_window *w_curs = NULL; struct revindex_entry *revidx; off_t offset; - enum object_type type = entry->type; + enum object_type type = entry->real_type; unsigned long datalen; unsigned char header[10], dheader[10]; unsigned hdrlen; @@ -452,11 +452,11 @@ static unsigned long write_object(struct sha1file *f, to_reuse = 0; /* explicit */ else if (!entry->in_pack) to_reuse = 0; /* can't reuse what we don't have */ - else if (entry->type == OBJ_REF_DELTA || entry->type == OBJ_OFS_DELTA) + else if (entry->real_type == OBJ_REF_DELTA || entry->real_type == OBJ_OFS_DELTA) /* check_object() decided it for us ... */ to_reuse = usable_delta; /* ... but pack split may override that */ - else if (entry->type != entry->in_pack_type) + else if (entry->real_type != entry->in_pack_type) to_reuse = 0; /* pack has delta which is unusable */ else if (entry->delta) to_reuse = 0; /* we want to pack afresh */ @@ -676,8 +676,8 @@ static struct object_entry **compute_write_order(void) * And then all remaining commits and tags. */ for (i = last_untagged; i < to_pack.nr_objects; i++) { - if (objects[i].type != OBJ_COMMIT && - objects[i].type != OBJ_TAG) + if (objects[i].real_type != OBJ_COMMIT && + objects[i].real_type != OBJ_TAG) continue; add_to_write_order(wo, &wo_end, &objects[i]); } @@ -686,7 +686,7 @@ static struct object_entry **compute_write_order(void) * And then all the trees. */ for (i = last_untagged; i < to_pack.nr_objects; i++) { - if (objects[i].type != OBJ_TREE) + if (objects[i].real_type != OBJ_TREE) continue; add_to_write_order(wo, &wo_end, &objects[i]); } @@ -994,7 +994,7 @@ static void create_object_entry(const unsigned char *sha1, entry = packlist_alloc(&to_pack, sha1, index_pos); entry->hash = hash; if (type) - entry->type = type; + entry->real_type = type; if (exclude) entry->preferred_base = 1; else @@ -1355,9 +1355,9 @@ static void check_object(struct object_entry *entry) switch (entry->in_pack_type) { default: /* Not a delta hence we've already got all we need. */ - entry->type = entry->in_pack_type; + entry->real_type = entry->in_pack_type; entry->in_pack_header_size = used; - if (entry->type < OBJ_COMMIT || entry->type > OBJ_BLOB) + if (entry->real_type < OBJ_COMMIT || entry->real_type > OBJ_BLOB) goto give_up; unuse_pack(&w_curs); return; @@ -1411,7 +1411,7 @@ static void check_object(struct object_entry *entry) * deltify other objects against, in order to avoid * circular deltas. */ - entry->type = entry->in_pack_type; + entry->real_type = entry->in_pack_type;
[PATCH 1/2] index-pack: rename the field "type" to "in_pack_type"
We have two types in this code: in-pack and canonical. "in_pack_type" makes it clearer than plain "type". Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/index-pack.c | 42 +- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 48fa472..797e571 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -19,7 +19,7 @@ struct object_entry { struct pack_idx_entry idx; unsigned long size; unsigned char hdr_size; - signed char type; + signed char in_pack_type; signed char real_type; }; @@ -493,7 +493,7 @@ static void *unpack_raw_entry(struct object_entry *obj, p = fill(1); c = *p; use(1); - obj->type = (c >> 4) & 7; + obj->in_pack_type = (c >> 4) & 7; size = (c & 15); shift = 4; while (c & 0x80) { @@ -505,7 +505,7 @@ static void *unpack_raw_entry(struct object_entry *obj, } obj->size = size; - switch (obj->type) { + switch (obj->in_pack_type) { case OBJ_REF_DELTA: hashcpy(ref_sha1, fill(20)); use(20); @@ -534,11 +534,11 @@ static void *unpack_raw_entry(struct object_entry *obj, case OBJ_TAG: break; default: - bad_object(obj->idx.offset, _("unknown object type %d"), obj->type); + bad_object(obj->idx.offset, _("unknown object type %d"), obj->in_pack_type); } obj->hdr_size = consumed_bytes - obj->idx.offset; - data = unpack_entry_data(obj->idx.offset, obj->size, obj->type, sha1); + data = unpack_entry_data(obj->idx.offset, obj->size, obj->in_pack_type, sha1); obj->idx.crc32 = input_crc32; return data; } @@ -631,7 +631,7 @@ static int find_ofs_delta(const off_t offset, enum object_type type) int cmp; cmp = compare_ofs_delta_bases(offset, delta->offset, - type, objects[delta->obj_no].type); + type, objects[delta->obj_no].in_pack_type); if (!cmp) return next; if (cmp < 0) { @@ -685,7 +685,7 @@ static int find_ref_delta(const unsigned char *sha1, enum object_type type) int cmp; cmp = compare_ref_delta_bases(sha1, delta->sha1, - type, objects[delta->obj_no].type); + type, objects[delta->obj_no].in_pack_type); if (!cmp) return next; if (cmp < 0) { @@ -759,7 +759,7 @@ static int check_collison(struct object_entry *entry) enum object_type type; unsigned long size; - if (entry->size <= big_file_threshold || entry->type != OBJ_BLOB) + if (entry->size <= big_file_threshold || entry->in_pack_type != OBJ_BLOB) return -1; memset(&data, 0, sizeof(data)); @@ -767,7 +767,7 @@ static int check_collison(struct object_entry *entry) data.st = open_istream(entry->idx.sha1, &type, &size, NULL); if (!data.st) return -1; - if (size != entry->size || type != entry->type) + if (size != entry->size || type != entry->in_pack_type) die(_("SHA1 COLLISION FOUND WITH %s !"), sha1_to_hex(entry->idx.sha1)); unpack_data(entry, compare_objects, &data); @@ -891,7 +891,7 @@ static void *get_base_data(struct base_data *c) struct base_data **delta = NULL; int delta_nr = 0, delta_alloc = 0; - while (is_delta_type(c->obj->type) && !c->data) { + while (is_delta_type(c->obj->in_pack_type) && !c->data) { ALLOC_GROW(delta, delta_nr + 1, delta_alloc); delta[delta_nr++] = c; c = c->base; @@ -1085,7 +1085,7 @@ static void *threaded_second_pass(void *data) counter_unlock(); work_lock(); while (nr_dispatched < nr_objects && - is_delta_type(objects[nr_dispatched].type)) + is_delta_type(objects[nr_dispatched].in_pack_type)) nr_dispatched++; if (nr_dispatched >= nr_objects) { work_unlock(); @@ -1121,12 +1121,12 @@ static void parse_pack_objects(unsigned char *sha1) struct object_entry *obj = &objects[i]; void *data = unpack_raw_entry(obj, &ofs_delta->offset,
[PATCH v3 0/9] icase match on non-ascii
Compared to v2 - fix grep/pcre on utf-8 even in case is sensitive - peek at $LANG and friends anyway for utf-8 detection even when gettext support is not built in git - s/quote we quote/so we quote/ in 9/9 - rename t7813, s/non-ascii/iso/ Nguyễn Thái Ngọc Duy (9): grep: allow -F -i combination grep: break down an "if" stmt in preparation for next changes grep/icase: avoid kwsset on literal non-ascii strings grep/icase: avoid kwsset when -F is specified grep/pcre: prepare locale-dependent tables for icase matching gettext: add is_utf8_locale() grep/pcre: support utf-8 diffcore-pickaxe: "share" regex error handling code diffcore-pickaxe: support case insensitive match on non-ascii builtin/grep.c | 2 +- diffcore-pickaxe.c | 27 + gettext.c| 24 +++- gettext.h| 1 + grep.c | 44 +++-- grep.h | 1 + quote.c | 37 ++ quote.h | 1 + t/t7812-grep-icase-non-ascii.sh (new +x) | 67 t/t7813-grep-icase-iso.sh (new +x) | 19 + 10 files changed, 208 insertions(+), 15 deletions(-) create mode 100755 t/t7812-grep-icase-non-ascii.sh create mode 100755 t/t7813-grep-icase-iso.sh -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 1/9] grep: allow -F -i combination
-F means "no regex", not "case sensitive" so it should not override -i Signed-off-by: Nguyễn Thái Ngọc Duy --- builtin/grep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/grep.c b/builtin/grep.c index d04f440..2d392e9 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -806,7 +806,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) if (!opt.pattern_list) die(_("no pattern given.")); - if (!opt.fixed && opt.ignore_case) + if (opt.ignore_case) opt.regflags |= REG_ICASE; compile_grep_patterns(&opt); -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 2/9] grep: break down an "if" stmt in preparation for next changes
Signed-off-by: Nguyễn Thái Ngọc Duy --- grep.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/grep.c b/grep.c index b58c7c6..bd32f66 100644 --- a/grep.c +++ b/grep.c @@ -403,9 +403,11 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; - if (opt->fixed || is_fixed(p->pattern, p->patternlen)) + if (is_fixed(p->pattern, p->patternlen)) p->fixed = 1; - else + else if (opt->fixed) { + p->fixed = 1; + } else p->fixed = 0; if (p->fixed) { -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 4/9] grep/icase: avoid kwsset when -F is specified
Similar to the previous commit, we can't use kws on icase search outside ascii range. But we can't simply pass the pattern to regcomp/pcre like the previous commit because it may contain regex special characters, so we need to quote the regex first. To avoid misquote traps that could lead to undefined behavior, we always stick to basic regex engine in this case. We don't need fancy features for grepping a literal string anyway. basic_regex_quote_buf() assumes that if the pattern is in a multibyte encoding, ascii chars must be unambiguously encoded as single bytes. This is true at least for UTF-8. For others, let's wait until people yell up. Chances are nobody uses multibyte, non utf-8 charsets any more.. Helped-by: René Scharfe Noticed-by: Plamen Totev Signed-off-by: Nguyễn Thái Ngọc Duy --- grep.c | 25 - quote.c | 37 + quote.h | 1 + t/t7812-grep-icase-non-ascii.sh | 26 ++ 4 files changed, 88 insertions(+), 1 deletion(-) diff --git a/grep.c b/grep.c index d795b0e..8fce54f 100644 --- a/grep.c +++ b/grep.c @@ -5,6 +5,7 @@ #include "diff.h" #include "diffcore.h" #include "commit.h" +#include "quote.h" static int grep_source_load(struct grep_source *gs); static int grep_source_is_binary(struct grep_source *gs); @@ -397,6 +398,24 @@ static int is_fixed(const char *s, size_t len) return 1; } +static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) +{ + struct strbuf sb = STRBUF_INIT; + int err; + + basic_regex_quote_buf(&sb, p->pattern); + err = regcomp(&p->regexp, sb.buf, opt->regflags & ~REG_EXTENDED); + if (opt->debug) + fprintf(stderr, "fixed%s\n", sb.buf); + strbuf_release(&sb); + if (err) { + char errbuf[1024]; + regerror(err, &p->regexp, errbuf, 1024); + regfree(&p->regexp); + compile_regexp_failed(p, errbuf); + } +} + static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { int icase_non_ascii; @@ -411,7 +430,11 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) if (!icase_non_ascii && is_fixed(p->pattern, p->patternlen)) p->fixed = 1; else if (opt->fixed) { - p->fixed = 1; + p->fixed = !icase_non_ascii; + if (!p->fixed) { + compile_fixed_regexp(p, opt); + return; + } } else p->fixed = 0; diff --git a/quote.c b/quote.c index 7920e18..43a8057 100644 --- a/quote.c +++ b/quote.c @@ -439,3 +439,40 @@ void tcl_quote_buf(struct strbuf *sb, const char *src) } strbuf_addch(sb, '"'); } + +void basic_regex_quote_buf(struct strbuf *sb, const char *src) +{ + char c; + + if (*src == '^') { + /* only beginning '^' is special and needs quoting */ + strbuf_addch(sb, '\\'); + strbuf_addch(sb, *src++); + } + if (*src == '*') + /* beginning '*' is not special, no quoting */ + strbuf_addch(sb, *src++); + + while ((c = *src++)) { + switch (c) { + case '[': + case '.': + case '\\': + case '*': + strbuf_addch(sb, '\\'); + strbuf_addch(sb, c); + break; + + case '$': + /* only the end '$' is special and needs quoting */ + if (*src == '\0') + strbuf_addch(sb, '\\'); + strbuf_addch(sb, c); + break; + + default: + strbuf_addch(sb, c); + break; + } + } +} diff --git a/quote.h b/quote.h index 99e04d3..362d315 100644 --- a/quote.h +++ b/quote.h @@ -67,5 +67,6 @@ extern char *quote_path_relative(const char *in, const char *prefix, extern void perl_quote_buf(struct strbuf *sb, const char *src); extern void python_quote_buf(struct strbuf *sb, const char *src); extern void tcl_quote_buf(struct strbuf *sb, const char *src); +extern void basic_regex_quote_buf(struct strbuf *sb, const char *src); #endif diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh index 63a2630..c945589 100755 --- a/t/t7812-grep-icase-non-ascii.sh +++ b/t/t7812-grep-icase-non-ascii.sh @@ -16,4 +16,30 @@ test_expect_success GETTEXT_LOCALE 'grep litera
[PATCH v3 3/9] grep/icase: avoid kwsset on literal non-ascii strings
When we detect the pattern is just a literal string, we avoid heavy regex engine and use fast substring search implemented in kwsset.c. But kws uses git-ctype which is locale-independent so it does not know how to fold case properly outside ascii range. Let regcomp or pcre take care of this case instead. Slower, but accurate. Helped-by: René Scharfe Noticed-by: Plamen Totev Signed-off-by: Nguyễn Thái Ngọc Duy --- grep.c | 7 ++- t/t7812-grep-icase-non-ascii.sh (new +x) | 19 +++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100755 t/t7812-grep-icase-non-ascii.sh diff --git a/grep.c b/grep.c index bd32f66..d795b0e 100644 --- a/grep.c +++ b/grep.c @@ -4,6 +4,7 @@ #include "xdiff-interface.h" #include "diff.h" #include "diffcore.h" +#include "commit.h" static int grep_source_load(struct grep_source *gs); static int grep_source_is_binary(struct grep_source *gs); @@ -398,12 +399,16 @@ static int is_fixed(const char *s, size_t len) static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { + int icase_non_ascii; int err; p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; + icase_non_ascii = + (opt->regflags & REG_ICASE || p->ignore_case) && + has_non_ascii(p->pattern); - if (is_fixed(p->pattern, p->patternlen)) + if (!icase_non_ascii && is_fixed(p->pattern, p->patternlen)) p->fixed = 1; else if (opt->fixed) { p->fixed = 1; diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh new file mode 100755 index 000..63a2630 --- /dev/null +++ b/t/t7812-grep-icase-non-ascii.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +test_description='grep icase on non-English locales' + +. ./lib-gettext.sh + +test_expect_success GETTEXT_LOCALE 'setup' ' + printf "TILRAUN: Halló Heimur!" >file && + git add file && + LC_ALL="$is_IS_locale" && + export LC_ALL +' + +test_expect_success GETTEXT_LOCALE 'grep literal string, no -F' ' + git grep -i "TILRAUN: Halló Heimur!" && + git grep -i "TILRAUN: HALLÓ HEIMUR!" +' + +test_done -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 5/9] grep/pcre: prepare locale-dependent tables for icase matching
The default tables are usually built with C locale and only suitable for LANG=C or similar. This should make case insensitive search work correctly for all single-byte charsets. Signed-off-by: Nguyễn Thái Ngọc Duy --- grep.c | 8 ++-- grep.h | 1 + t/t7813-grep-icase-iso.sh (new +x) | 19 +++ 3 files changed, 26 insertions(+), 2 deletions(-) create mode 100755 t/t7813-grep-icase-iso.sh diff --git a/grep.c b/grep.c index 8fce54f..f0fbf99 100644 --- a/grep.c +++ b/grep.c @@ -324,11 +324,14 @@ static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) int erroffset; int options = PCRE_MULTILINE; - if (opt->ignore_case) + if (opt->ignore_case) { + if (has_non_ascii(p->pattern)) + p->pcre_tables = pcre_maketables(); options |= PCRE_CASELESS; + } p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset, - NULL); + p->pcre_tables); if (!p->pcre_regexp) compile_regexp_failed(p, error); @@ -362,6 +365,7 @@ static void free_pcre_regexp(struct grep_pat *p) { pcre_free(p->pcre_regexp); pcre_free(p->pcre_extra_info); + pcre_free((void *)p->pcre_tables); } #else /* !USE_LIBPCRE */ static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) diff --git a/grep.h b/grep.h index 95f197a..cee4357 100644 --- a/grep.h +++ b/grep.h @@ -48,6 +48,7 @@ struct grep_pat { regex_t regexp; pcre *pcre_regexp; pcre_extra *pcre_extra_info; + const unsigned char *pcre_tables; kwset_t kws; unsigned fixed:1; unsigned ignore_case:1; diff --git a/t/t7813-grep-icase-iso.sh b/t/t7813-grep-icase-iso.sh new file mode 100755 index 000..efef7fb --- /dev/null +++ b/t/t7813-grep-icase-iso.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +test_description='grep icase on non-English locales' + +. ./lib-gettext.sh + +test_expect_success GETTEXT_ISO_LOCALE 'setup' ' + printf "TILRAUN: Hall� Heimur!" >file && + git add file && + LC_ALL="$is_IS_iso_locale" && + export LC_ALL +' + +test_expect_success GETTEXT_ISO_LOCALE,LIBPCRE 'grep pcre string' ' + git grep --perl-regexp -i "TILRAUN: H.ll� Heimur!" && + git grep --perl-regexp -i "TILRAUN: H.LL� HEIMUR!" +' + +test_done -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 7/9] grep/pcre: support utf-8
In the previous change in this function, we add locale support for single-byte encodings only. It looks like pcre only supports utf-* as multibyte encodings, the others are left in the cold (which is fine). We need to enable PCRE_UTF8 so pcre can find character boundary correctly. It's needed for case folding (when --ignore-case is used) or '*', '+' or similar syntax is used. The "has_non_ascii()" check is to be on the conservative side. If there's non-ascii in the pattern, the searched content could still be in utf-8, but we can treat it just like a byte stream and everything should work. If we force utf-8 based on locale only and pcre validates utf-8 and the file content is in non-utf8 encoding, things break. Noticed-by: Plamen Totev Helped-by: Plamen Totev Signed-off-by: Nguyễn Thái Ngọc Duy --- grep.c | 2 ++ t/t7812-grep-icase-non-ascii.sh | 15 +++ 2 files changed, 17 insertions(+) diff --git a/grep.c b/grep.c index f0fbf99..07621c1 100644 --- a/grep.c +++ b/grep.c @@ -329,6 +329,8 @@ static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) p->pcre_tables = pcre_maketables(); options |= PCRE_CASELESS; } + if (is_utf8_locale() && has_non_ascii(p->pattern)) + options |= PCRE_UTF8; p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset, p->pcre_tables); diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh index c945589..e861a15 100755 --- a/t/t7812-grep-icase-non-ascii.sh +++ b/t/t7812-grep-icase-non-ascii.sh @@ -16,6 +16,21 @@ test_expect_success GETTEXT_LOCALE 'grep literal string, no -F' ' git grep -i "TILRAUN: HALLÓ HEIMUR!" ' +test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre utf-8 icase' ' + git grep --perl-regexp"TILRAUN: H.lló Heimur!" && + git grep --perl-regexp -i "TILRAUN: H.lló Heimur!" && + git grep --perl-regexp -i "TILRAUN: H.LLÓ HEIMUR!" +' + +test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre utf-8 string with "+"' ' + printf "TILRAUN: Hallóó Heimur!" >file2 && + git add file2 && + git grep -l --perl-regexp "TILRAUN: H.lló+ Heimur!" >actual && + echo file >expected && + echo file2 >>expected && + test_cmp expected actual +' + test_expect_success GETTEXT_LOCALE 'grep literal string, with -F' ' git grep --debug -i -F "TILRAUN: Halló Heimur!" 2>&1 >/dev/null | grep fixed >debug1 && -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 6/9] gettext: add is_utf8_locale()
This function returns true if git is running under an UTF-8 locale. pcre in the next patch will need this. is_encoding_utf8() is used instead of strcmp() to catch both "utf-8" and "utf8" suffixes. When built with no gettext support, we peek in several env variables to detect UTF-8. pcre library might support utf-8 even if libc or git is built without locale support.. The peeking code is a copy from compat/regex/regcomp.c. Helped-by: Torsten Bögershausen Signed-off-by: Nguyễn Thái Ngọc Duy --- gettext.c | 24 ++-- gettext.h | 1 + 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/gettext.c b/gettext.c index 7378ba2..aaf1688 100644 --- a/gettext.c +++ b/gettext.c @@ -18,6 +18,8 @@ # endif #endif +static const char *charset; + /* * Guess the user's preferred languages from the value in LANGUAGE environment * variable and LC_MESSAGES locale category if NO_GETTEXT is not defined. @@ -65,7 +67,6 @@ static int test_vsnprintf(const char *fmt, ...) return ret; } -static const char *charset; static void init_gettext_charset(const char *domain) { /* @@ -171,8 +172,27 @@ int gettext_width(const char *s) { static int is_utf8 = -1; if (is_utf8 == -1) - is_utf8 = !strcmp(charset, "UTF-8"); + is_utf8 = is_utf8_locale(); return is_utf8 ? utf8_strwidth(s) : strlen(s); } #endif + +int is_utf8_locale(void) +{ +#ifdef NO_GETTEXT + if (!charset) { + const char *env = getenv("LC_ALL"); + if (!env || !*env) + env = getenv("LC_CTYPE"); + if (!env || !*env) + env = getenv("LANG"); + if (!env) + env = ""; + if (strchr(env, '.')) + env = strchr(env, '.') + 1; + charset = xstrdup(env); + } +#endif + return is_encoding_utf8(charset); +} diff --git a/gettext.h b/gettext.h index 33696a4..7eee64a 100644 --- a/gettext.h +++ b/gettext.h @@ -90,5 +90,6 @@ const char *Q_(const char *msgid, const char *plu, unsigned long n) #endif const char *get_preferred_languages(void); +extern int is_utf8_locale(void); #endif -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 8/9] diffcore-pickaxe: "share" regex error handling code
There's another regcomp code block coming in this function. By moving the error handling code out of this block, we don't have to add the same error handling code in the new block. Signed-off-by: Nguyễn Thái Ngọc Duy --- diffcore-pickaxe.c | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index 185f86b..7a718fc 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -204,20 +204,13 @@ void diffcore_pickaxe(struct diff_options *o) int opts = o->pickaxe_opts; regex_t regex, *regexp = NULL; kwset_t kws = NULL; + int err = 0; if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) { - int err; int cflags = REG_EXTENDED | REG_NEWLINE; if (DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE)) cflags |= REG_ICASE; err = regcomp(®ex, needle, cflags); - if (err) { - /* The POSIX.2 people are surely sick */ - char errbuf[1024]; - regerror(err, ®ex, errbuf, 1024); - regfree(®ex); - die("invalid regex: %s", errbuf); - } regexp = ®ex; } else { kws = kwsalloc(DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE) @@ -225,6 +218,13 @@ void diffcore_pickaxe(struct diff_options *o) kwsincr(kws, needle, strlen(needle)); kwsprep(kws); } + if (err) { + /* The POSIX.2 people are surely sick */ + char errbuf[1024]; + regerror(err, ®ex, errbuf, 1024); + regfree(®ex); + die("invalid regex: %s", errbuf); + } /* Might want to warn when both S and G are on; I don't care... */ pickaxe(&diff_queued_diff, o, regexp, kws, -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 9/9] diffcore-pickaxe: support case insensitive match on non-ascii
Similar to the "grep -F -i" case, we can't use kws on icase search outside ascii range, so we quote the string and pass it to regcomp as a basic regexp and let regex engine deal with case sensitivity. The new test is put in t7812 instead of t4209-log-pickaxe because lib-gettext.sh might cause problems elsewhere, probably.. Noticed-by: Plamen Totev Signed-off-by: Nguyễn Thái Ngọc Duy --- diffcore-pickaxe.c | 11 +++ t/t7812-grep-icase-non-ascii.sh | 7 +++ 2 files changed, 18 insertions(+) diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index 7a718fc..6946c15 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -7,6 +7,8 @@ #include "diffcore.h" #include "xdiff-interface.h" #include "kwset.h" +#include "commit.h" +#include "quote.h" typedef int (*pickaxe_fn)(mmfile_t *one, mmfile_t *two, struct diff_options *o, @@ -212,6 +214,15 @@ void diffcore_pickaxe(struct diff_options *o) cflags |= REG_ICASE; err = regcomp(®ex, needle, cflags); regexp = ®ex; + } else if (DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE) && + has_non_ascii(needle)) { + struct strbuf sb = STRBUF_INIT; + int cflags = REG_NEWLINE | REG_ICASE; + + basic_regex_quote_buf(&sb, needle); + err = regcomp(®ex, sb.buf, cflags); + strbuf_release(&sb); + regexp = ®ex; } else { kws = kwsalloc(DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE) ? tolower_trans_tbl : NULL); diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh index e861a15..d07fa20 100755 --- a/t/t7812-grep-icase-non-ascii.sh +++ b/t/t7812-grep-icase-non-ascii.sh @@ -57,4 +57,11 @@ test_expect_success GETTEXT_LOCALE 'grep string with regex, with -F' ' test_cmp expect2 debug2 ' +test_expect_success GETTEXT_LOCALE 'pickaxe -i on non-ascii' ' + git commit -m first && + git log --format=%f -i -S"TILRAUN: HALLÓ HEIMUR!" >actual && + echo first >expected && + test_cmp expected actual +' + test_done -- 2.3.0.rc1.137.g477eb31 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 00/11] nd/icase updates
This update drops 1/12, which is an unnecessary change, and changes a couple of echo/printf to test_write_lines. One of those echo uses backlashes and causes problems with Debian dash. Interdiff therefore is not really interesting diff --git a/builtin/grep.c b/builtin/grep.c index 46c5ba1..8c516a9 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -809,7 +809,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) if (!opt.pattern_list) die(_("no pattern given.")); - if (opt.ignore_case) + if (!opt.fixed && opt.ignore_case) opt.regflags |= REG_ICASE; compile_grep_patterns(&opt); diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh index 4176625..169fd8d 100755 --- a/t/t7812-grep-icase-non-ascii.sh +++ b/t/t7812-grep-icase-non-ascii.sh @@ -5,7 +5,7 @@ test_description='grep icase on non-English locales' . ./lib-gettext.sh test_expect_success GETTEXT_LOCALE 'setup' ' - printf "TILRAUN: Halló Heimur!" >file && + test_write_lines "TILRAUN: Halló Heimur!" >file && git add file && LC_ALL="$is_IS_locale" && export LC_ALL @@ -27,7 +27,7 @@ test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre utf-8 icase' ' ' test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre utf-8 string with "+"' ' - printf "TILRAUN: Hallóó Heimur!" >file2 && + test_write_lines "TILRAUN: Hallóó Heimur!" >file2 && git add file2 && git grep -l --perl-regexp "TILRAUN: H.lló+ Heimur!" >actual && echo file >expected && @@ -38,26 +38,26 @@ test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre utf-8 string with "+"' ' test_expect_success REGEX_LOCALE 'grep literal string, with -F' ' git grep --debug -i -F "TILRAUN: Halló Heimur!" 2>&1 >/dev/null | grep fixed >debug1 && - echo "fixed TILRAUN: Halló Heimur!" >expect1 && + test_write_lines "fixed TILRAUN: Halló Heimur!" >expect1 && test_cmp expect1 debug1 && git grep --debug -i -F "TILRAUN: HALLÓ HEIMUR!" 2>&1 >/dev/null | grep fixed >debug2 && - echo "fixed TILRAUN: HALLÓ HEIMUR!" >expect2 && + test_write_lines "fixed TILRAUN: HALLÓ HEIMUR!" >expect2 && test_cmp expect2 debug2 ' test_expect_success REGEX_LOCALE 'grep string with regex, with -F' ' - printf "^*TILR^AUN:.* \\Halló \$He[]imur!\$" >file && + test_write_lines "^*TILR^AUN:.* \\Halló \$He[]imur!\$" >file && git grep --debug -i -F "^*TILR^AUN:.* \\Halló \$He[]imur!\$" 2>&1 >/dev/null | grep fixed >debug1 && - echo "fixed \\^*TILR^AUN:\\.\\* Halló \$He\\[]imur!\\\$" >expect1 && + test_write_lines "fixed \\^*TILR^AUN:\\.\\* Halló \$He\\[]imur!\\\$" >expect1 && test_cmp expect1 debug1 && git grep --debug -i -F "^*TILR^AUN:.* \\HALLÓ \$HE[]IMUR!\$" 2>&1 >/dev/null | grep fixed >debug2 && - echo "fixed \\^*TILR^AUN:\\.\\* HALLÓ \$HE\\[]IMUR!\\\$" >expect2 && + test_write_lines "fixed \\^*TILR^AUN:\\.\\* HALLÓ \$HE\\[]IMUR!\\\$" >expect2 && test_cmp expect2 debug2 ' Nguyễn Thái Ngọc Duy (11): grep: break down an "if" stmt in preparation for next changes test-regex: isolate the bug test code test-regex: expose full regcomp() to the command line grep/icase: avoid kwsset on literal non-ascii strings grep/icase: avoid kwsset when -F is specified grep/pcre: prepare locale-dependent tables for icase matching gettext: add is_utf8_locale() grep/pcre: support utf-8 diffcore-pickaxe: "share" regex error handling code diffcore-pickaxe: support case insensitive match on non-ascii grep.c: reuse "icase" variable diffcore-pickaxe.c | 27 gettext.c| 24 ++- gettext.h| 1 + grep.c | 47 + grep.h | 1 + quote.c | 37 + quote.h | 1 + t/t0070-fundamental.sh | 2 +- t/t7812-grep-icase-non-ascii.sh (new +x) | 71 t/t7813-grep-icase-iso.sh (new +x) | 19 + test-regex.c | 59 +- 11 files changed, 269 insertions(+), 20 deletions(-) create mode 100755 t/t7812-grep-icase-non-ascii.sh create mode 100755 t/t7813-grep-icase-iso.sh -- 2.8.2.526.g02eed6d -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 01/11] grep: break down an "if" stmt in preparation for next changes
Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- grep.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/grep.c b/grep.c index 7b2b96a..609f218 100644 --- a/grep.c +++ b/grep.c @@ -403,7 +403,9 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; - if (opt->fixed || is_fixed(p->pattern, p->patternlen)) + if (opt->fixed) { + p->fixed = 1; + } else if (is_fixed(p->pattern, p->patternlen)) p->fixed = 1; else p->fixed = 0; -- 2.8.2.526.g02eed6d -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 05/11] grep/icase: avoid kwsset when -F is specified
Similar to the previous commit, we can't use kws on icase search outside ascii range. But we can't simply pass the pattern to regcomp/pcre like the previous commit because it may contain regex special characters, so we need to quote the regex first. To avoid misquote traps that could lead to undefined behavior, we always stick to basic regex engine in this case. We don't need fancy features for grepping a literal string anyway. basic_regex_quote_buf() assumes that if the pattern is in a multibyte encoding, ascii chars must be unambiguously encoded as single bytes. This is true at least for UTF-8. For others, let's wait until people yell up. Chances are nobody uses multibyte, non utf-8 charsets anymore. Noticed-by: Plamen Totev Helped-by: René Scharfe Helped-by: Eric Sunshine Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- grep.c | 25 - quote.c | 37 + quote.h | 1 + t/t7812-grep-icase-non-ascii.sh | 26 ++ 4 files changed, 88 insertions(+), 1 deletion(-) diff --git a/grep.c b/grep.c index f192727..6de58f3 100644 --- a/grep.c +++ b/grep.c @@ -5,6 +5,7 @@ #include "diff.h" #include "diffcore.h" #include "commit.h" +#include "quote.h" static int grep_source_load(struct grep_source *gs); static int grep_source_is_binary(struct grep_source *gs); @@ -397,6 +398,24 @@ static int is_fixed(const char *s, size_t len) return 1; } +static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) +{ + struct strbuf sb = STRBUF_INIT; + int err; + + basic_regex_quote_buf(&sb, p->pattern); + err = regcomp(&p->regexp, sb.buf, opt->regflags & ~REG_EXTENDED); + if (opt->debug) + fprintf(stderr, "fixed %s\n", sb.buf); + strbuf_release(&sb); + if (err) { + char errbuf[1024]; + regerror(err, &p->regexp, errbuf, sizeof(errbuf)); + regfree(&p->regexp); + compile_regexp_failed(p, errbuf); + } +} + static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { int icase, ascii_only; @@ -408,7 +427,11 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) ascii_only = !has_non_ascii(p->pattern); if (opt->fixed) { - p->fixed = 1; + p->fixed = !icase || ascii_only; + if (!p->fixed) { + compile_fixed_regexp(p, opt); + return; + } } else if ((!icase || ascii_only) && is_fixed(p->pattern, p->patternlen)) p->fixed = 1; diff --git a/quote.c b/quote.c index fe884d2..c67adb7 100644 --- a/quote.c +++ b/quote.c @@ -440,3 +440,40 @@ void tcl_quote_buf(struct strbuf *sb, const char *src) } strbuf_addch(sb, '"'); } + +void basic_regex_quote_buf(struct strbuf *sb, const char *src) +{ + char c; + + if (*src == '^') { + /* only beginning '^' is special and needs quoting */ + strbuf_addch(sb, '\\'); + strbuf_addch(sb, *src++); + } + if (*src == '*') + /* beginning '*' is not special, no quoting */ + strbuf_addch(sb, *src++); + + while ((c = *src++)) { + switch (c) { + case '[': + case '.': + case '\\': + case '*': + strbuf_addch(sb, '\\'); + strbuf_addch(sb, c); + break; + + case '$': + /* only the end '$' is special and needs quoting */ + if (*src == '\0') + strbuf_addch(sb, '\\'); + strbuf_addch(sb, c); + break; + + default: + strbuf_addch(sb, c); + break; + } + } +} diff --git a/quote.h b/quote.h index 99e04d3..362d315 100644 --- a/quote.h +++ b/quote.h @@ -67,5 +67,6 @@ extern char *quote_path_relative(const char *in, const char *prefix, extern void perl_quote_buf(struct strbuf *sb, const char *src); extern void python_quote_buf(struct strbuf *sb, const char *src); extern void tcl_quote_buf(struct strbuf *sb, const char *src); +extern void basic_regex_quote_buf(struct strbuf *sb, const char *src); #endif diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh index b78a774..1929809 100755 --- a/t/t7812-grep-icase-non-ascii.sh +++ b/t/t781
[PATCH 04/11] grep/icase: avoid kwsset on literal non-ascii strings
When we detect the pattern is just a literal string, we avoid heavy regex engine and use fast substring search implemented in kwsset.c. But kws uses git-ctype which is locale-independent so it does not know how to fold case properly outside ascii range. Let regcomp or pcre take care of this case instead. Slower, but accurate. Noticed-by: Plamen Totev Helped-by: René Scharfe Helped-by: Eric Sunshine Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- grep.c | 7 ++- t/t7812-grep-icase-non-ascii.sh (new +x) | 23 +++ 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100755 t/t7812-grep-icase-non-ascii.sh diff --git a/grep.c b/grep.c index 609f218..f192727 100644 --- a/grep.c +++ b/grep.c @@ -4,6 +4,7 @@ #include "xdiff-interface.h" #include "diff.h" #include "diffcore.h" +#include "commit.h" static int grep_source_load(struct grep_source *gs); static int grep_source_is_binary(struct grep_source *gs); @@ -398,14 +399,18 @@ static int is_fixed(const char *s, size_t len) static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { + int icase, ascii_only; int err; p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; + icase = opt->regflags & REG_ICASE || p->ignore_case; + ascii_only = !has_non_ascii(p->pattern); if (opt->fixed) { p->fixed = 1; - } else if (is_fixed(p->pattern, p->patternlen)) + } else if ((!icase || ascii_only) && + is_fixed(p->pattern, p->patternlen)) p->fixed = 1; else p->fixed = 0; diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh new file mode 100755 index 000..b78a774 --- /dev/null +++ b/t/t7812-grep-icase-non-ascii.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +test_description='grep icase on non-English locales' + +. ./lib-gettext.sh + +test_expect_success GETTEXT_LOCALE 'setup' ' + test_write_lines "TILRAUN: Halló Heimur!" >file && + git add file && + LC_ALL="$is_IS_locale" && + export LC_ALL +' + +test_have_prereq GETTEXT_LOCALE && +test-regex "HALLÓ" "Halló" ICASE && +test_set_prereq REGEX_LOCALE + +test_expect_success REGEX_LOCALE 'grep literal string, no -F' ' + git grep -i "TILRAUN: Halló Heimur!" && + git grep -i "TILRAUN: HALLÓ HEIMUR!" +' + +test_done -- 2.8.2.526.g02eed6d -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 02/11] test-regex: isolate the bug test code
This is in preparation to turn test-regex into some generic regex testing command. Helped-by: Eric Sunshine Helped-by: Ramsay Jones Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- t/t0070-fundamental.sh | 2 +- test-regex.c | 12 ++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/t/t0070-fundamental.sh b/t/t0070-fundamental.sh index 5ed69a6..991ed2a 100755 --- a/t/t0070-fundamental.sh +++ b/t/t0070-fundamental.sh @@ -31,7 +31,7 @@ test_expect_success 'git_mkstemps_mode does not fail if fd 0 is not open' ' test_expect_success 'check for a bug in the regex routines' ' # if this test fails, re-build git with NO_REGEX=1 - test-regex + test-regex --bug ' test_done diff --git a/test-regex.c b/test-regex.c index 0dc598e..67a1a65 100644 --- a/test-regex.c +++ b/test-regex.c @@ -1,6 +1,6 @@ #include "git-compat-util.h" -int main(int argc, char **argv) +static int test_regex_bug(void) { char *pat = "[^={} \t]+"; char *str = "={}\nfred"; @@ -16,5 +16,13 @@ int main(int argc, char **argv) if (m[0].rm_so == 3) /* matches '\n' when it should not */ die("regex bug confirmed: re-build git with NO_REGEX=1"); - exit(0); + return 0; +} + +int main(int argc, char **argv) +{ + if (argc == 2 && !strcmp(argv[1], "--bug")) + return test_regex_bug(); + else + usage("test-regex --bug"); } -- 2.8.2.526.g02eed6d -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 08/11] grep/pcre: support utf-8
In the previous change in this function, we add locale support for single-byte encodings only. It looks like pcre only supports utf-* as multibyte encodings, the others are left in the cold (which is fine). We need to enable PCRE_UTF8 so pcre can find character boundary correctly. It's needed for case folding (when --ignore-case is used) or '*', '+' or similar syntax is used. The "has_non_ascii()" check is to be on the conservative side. If there's non-ascii in the pattern, the searched content could still be in utf-8, but we can treat it just like a byte stream and everything should work. If we force utf-8 based on locale only and pcre validates utf-8 and the file content is in non-utf8 encoding, things break. Noticed-by: Plamen Totev Helped-by: Plamen Totev Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- grep.c | 2 ++ t/t7812-grep-icase-non-ascii.sh | 15 +++ 2 files changed, 17 insertions(+) diff --git a/grep.c b/grep.c index 22f4d99..6e99b01 100644 --- a/grep.c +++ b/grep.c @@ -329,6 +329,8 @@ static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) p->pcre_tables = pcre_maketables(); options |= PCRE_CASELESS; } + if (is_utf8_locale() && has_non_ascii(p->pattern)) + options |= PCRE_UTF8; p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset, p->pcre_tables); diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh index 1929809..08ae4c9 100755 --- a/t/t7812-grep-icase-non-ascii.sh +++ b/t/t7812-grep-icase-non-ascii.sh @@ -20,6 +20,21 @@ test_expect_success REGEX_LOCALE 'grep literal string, no -F' ' git grep -i "TILRAUN: HALLÓ HEIMUR!" ' +test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre utf-8 icase' ' + git grep --perl-regexp"TILRAUN: H.lló Heimur!" && + git grep --perl-regexp -i "TILRAUN: H.lló Heimur!" && + git grep --perl-regexp -i "TILRAUN: H.LLÓ HEIMUR!" +' + +test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre utf-8 string with "+"' ' + test_write_lines "TILRAUN: Hallóó Heimur!" >file2 && + git add file2 && + git grep -l --perl-regexp "TILRAUN: H.lló+ Heimur!" >actual && + echo file >expected && + echo file2 >>expected && + test_cmp expected actual +' + test_expect_success REGEX_LOCALE 'grep literal string, with -F' ' git grep --debug -i -F "TILRAUN: Halló Heimur!" 2>&1 >/dev/null | grep fixed >debug1 && -- 2.8.2.526.g02eed6d -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 09/11] diffcore-pickaxe: "share" regex error handling code
There's another regcomp code block coming in this function. By moving the error handling code out of this block, we don't have to add the same error handling code in the new block. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- diffcore-pickaxe.c | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index 7715c13..69c6567 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -204,20 +204,13 @@ void diffcore_pickaxe(struct diff_options *o) int opts = o->pickaxe_opts; regex_t regex, *regexp = NULL; kwset_t kws = NULL; + int err = 0; if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) { - int err; int cflags = REG_EXTENDED | REG_NEWLINE; if (DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE)) cflags |= REG_ICASE; err = regcomp(®ex, needle, cflags); - if (err) { - /* The POSIX.2 people are surely sick */ - char errbuf[1024]; - regerror(err, ®ex, errbuf, 1024); - regfree(®ex); - die("invalid regex: %s", errbuf); - } regexp = ®ex; } else { kws = kwsalloc(DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE) @@ -225,6 +218,13 @@ void diffcore_pickaxe(struct diff_options *o) kwsincr(kws, needle, strlen(needle)); kwsprep(kws); } + if (err) { + /* The POSIX.2 people are surely sick */ + char errbuf[1024]; + regerror(err, ®ex, errbuf, 1024); + regfree(®ex); + die("invalid regex: %s", errbuf); + } /* Might want to warn when both S and G are on; I don't care... */ pickaxe(&diff_queued_diff, o, regexp, kws, -- 2.8.2.526.g02eed6d -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 03/11] test-regex: expose full regcomp() to the command line
Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- test-regex.c | 51 +-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/test-regex.c b/test-regex.c index 67a1a65..eff26f5 100644 --- a/test-regex.c +++ b/test-regex.c @@ -1,4 +1,21 @@ #include "git-compat-util.h" +#include "gettext.h" + +struct reg_flag { + const char *name; + int flag; +}; + +static struct reg_flag reg_flags[] = { + { "EXTENDED",REG_EXTENDED }, + { "NEWLINE", REG_NEWLINE}, + { "ICASE", REG_ICASE }, + { "NOTBOL", REG_NOTBOL }, +#ifdef REG_STARTEND + { "STARTEND",REG_STARTEND }, +#endif + { NULL, 0 } +}; static int test_regex_bug(void) { @@ -21,8 +38,38 @@ static int test_regex_bug(void) int main(int argc, char **argv) { + const char *pat; + const char *str; + int flags = 0; + regex_t r; + regmatch_t m[1]; + if (argc == 2 && !strcmp(argv[1], "--bug")) return test_regex_bug(); - else - usage("test-regex --bug"); + else if (argc < 3) + usage("test-regex --bug\n" + "test-regex []"); + + argv++; + pat = *argv++; + str = *argv++; + while (*argv) { + struct reg_flag *rf; + for (rf = reg_flags; rf->name; rf++) + if (!strcmp(*argv, rf->name)) { + flags |= rf->flag; + break; + } + if (!rf->name) + die("do not recognize %s", *argv); + argv++; + } + git_setup_gettext(); + + if (regcomp(&r, pat, flags)) + die("failed regcomp() for pattern '%s'", pat); + if (regexec(&r, str, 1, m, 0)) + return 1; + + return 0; } -- 2.8.2.526.g02eed6d -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html