[PATCH v4 1/3] t7063: use --force-untracked-cache to speed up a bit

2015-08-19 Thread Nguyễn Thái Ngọc Duy
When in the middle of t7063, we are sure untracked cache is supported,
so we can use --force-untracked-cache to skip the support detection
phase and save a few seconds. It's also good that --force-untracked-cache
is exercised in the test suite.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 t/t7063-status-untracked-cache.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/t/t7063-status-untracked-cache.sh 
b/t/t7063-status-untracked-cache.sh
index ff23f4e..744e922 100755
--- a/t/t7063-status-untracked-cache.sh
+++ b/t/t7063-status-untracked-cache.sh
@@ -402,7 +402,7 @@ test_expect_success 'set up sparse checkout' '
echo "done/[a-z]*" >.git/info/sparse-checkout &&
test_config core.sparsecheckout true &&
git checkout master &&
-   git update-index --untracked-cache &&
+   git update-index --force-untracked-cache &&
git status --porcelain >/dev/null && # prime the cache
test_path_is_missing done/.gitignore &&
test_path_is_file done/one
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 0/3] dt/untracked-subdir

2015-08-19 Thread Nguyễn Thái Ngọc Duy
The first patch is a split from David's v3 patch because it should
belong to dt/untracked-sparse. The second is basically David's v3.
The third patch fixes untracked_cache_invalidate_path(). David fixed
it in v2, but there's another bug in this code.

David Turner (1):
  untracked-cache: fix subdirectory handling

Nguyễn Thái Ngọc Duy (2):
  t7063: use --force-untracked-cache to speed up a bit
  untracked cache: fix entry invalidation

 dir.c |  82 +++---
 t/t7063-status-untracked-cache.sh | 102 --
 2 files changed, 163 insertions(+), 21 deletions(-)

-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 2/3] untracked-cache: fix subdirectory handling

2015-08-19 Thread Nguyễn Thái Ngọc Duy
From: David Turner 

Previously, some calls lookup_untracked would pass a full path.  But
lookup_untracked assumes that the portion of the path up to and
including to the untracked_cache_dir has been removed.  So
lookup_untracked would be looking in the untracked_cache for 'foo' for
'foo/bar' (instead of just looking for 'bar').  This would cause
untracked cache corruption.

Instead, treat_directory learns to track the base length of the parent
directory, so that only the last path component is passed to
lookup_untracked.

Helped-by: Nguyễn Thái Ngọc Duy 
Signed-off-by: David Turner 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 dir.c | 14 
 t/t7063-status-untracked-cache.sh | 72 ++-
 2 files changed, 79 insertions(+), 7 deletions(-)

diff --git a/dir.c b/dir.c
index e7b89fe..cd4ac77 100644
--- a/dir.c
+++ b/dir.c
@@ -1297,7 +1297,7 @@ static enum exist_status directory_exists_in_index(const 
char *dirname, int len)
  */
 static enum path_treatment treat_directory(struct dir_struct *dir,
struct untracked_cache_dir *untracked,
-   const char *dirname, int len, int exclude,
+   const char *dirname, int len, int baselen, int exclude,
const struct path_simplify *simplify)
 {
/* The "len-1" is to strip the final '/' */
@@ -1324,7 +1324,8 @@ static enum path_treatment treat_directory(struct 
dir_struct *dir,
if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
return exclude ? path_excluded : path_untracked;
 
-   untracked = lookup_untracked(dir->untracked, untracked, dirname, len);
+   untracked = lookup_untracked(dir->untracked, untracked,
+dirname + baselen, len - baselen);
return read_directory_recursive(dir, dirname, len,
untracked, 1, simplify);
 }
@@ -1444,6 +1445,7 @@ static int get_dtype(struct dirent *de, const char *path, 
int len)
 static enum path_treatment treat_one_path(struct dir_struct *dir,
  struct untracked_cache_dir *untracked,
  struct strbuf *path,
+ int baselen,
  const struct path_simplify *simplify,
  int dtype, struct dirent *de)
 {
@@ -1495,8 +1497,8 @@ static enum path_treatment treat_one_path(struct 
dir_struct *dir,
return path_none;
case DT_DIR:
strbuf_addch(path, '/');
-   return treat_directory(dir, untracked, path->buf, path->len, 
exclude,
-   simplify);
+   return treat_directory(dir, untracked, path->buf, path->len,
+  baselen, exclude, simplify);
case DT_REG:
case DT_LNK:
return exclude ? path_excluded : path_untracked;
@@ -1557,7 +1559,7 @@ static enum path_treatment treat_path(struct dir_struct 
*dir,
return path_none;
 
dtype = DTYPE(de);
-   return treat_one_path(dir, untracked, path, simplify, dtype, de);
+   return treat_one_path(dir, untracked, path, baselen, simplify, dtype, 
de);
 }
 
 static void add_untracked(struct untracked_cache_dir *dir, const char *name)
@@ -1827,7 +1829,7 @@ static int treat_leading_path(struct dir_struct *dir,
break;
if (simplify_away(sb.buf, sb.len, simplify))
break;
-   if (treat_one_path(dir, NULL, &sb, simplify,
+   if (treat_one_path(dir, NULL, &sb, baselen, simplify,
   DT_DIR, NULL) == path_none)
break; /* do not recurse into it */
if (len <= baselen) {
diff --git a/t/t7063-status-untracked-cache.sh 
b/t/t7063-status-untracked-cache.sh
index 744e922..22393b9 100755
--- a/t/t7063-status-untracked-cache.sh
+++ b/t/t7063-status-untracked-cache.sh
@@ -408,7 +408,8 @@ test_expect_success 'set up sparse checkout' '
test_path_is_file done/one
 '
 
-test_expect_success 'create files, some of which are gitignored' '
+test_expect_success 'create/modify files, some of which are gitignored' '
+   echo two bis >done/two &&
echo three >done/three && # three is gitignored
echo four >done/four && # four is gitignored at a higher level
echo five >done/five # five is not gitignored
@@ -420,6 +421,7 @@ test_expect_success 'test sparse status with untracked 
cache' '
GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
git status --porcelain >../status.actual &&
cat >../status.expect <../status.actual &&
cat >

[PATCH v4 3/3] untracked cache: fix entry invalidation

2015-08-19 Thread Nguyễn Thái Ngọc Duy
First, the current code in untracked_cache_invalidate_path() is wrong
because it can only handle paths "a" or "a/b", not "a/b/c" because
lookup_untracked() only looks for entries directly under the given
directory. In the last case, it will look for the entry "b/c" in
directory "a" instead. This means if you delete or add an entry in a
subdirectory, untracked cache may become out of date because it does not
invalidate properly. This is noticed by David Turner.

The second problem is about invalidation inside a fully untracked/excluded
directory. In this case we may have to invalidate back to root. See the
comment block for detail.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 dir.c | 68 ---
 t/t7063-status-untracked-cache.sh | 28 +++-
 2 files changed, 83 insertions(+), 13 deletions(-)

diff --git a/dir.c b/dir.c
index cd4ac77..c1edabf 100644
--- a/dir.c
+++ b/dir.c
@@ -2616,23 +2616,67 @@ done2:
return uc;
 }
 
+static void invalidate_one_directory(struct untracked_cache *uc,
+struct untracked_cache_dir *ucd)
+{
+   uc->dir_invalidated++;
+   ucd->valid = 0;
+   ucd->untracked_nr = 0;
+}
+
+/*
+ * Normally when an entry is added or removed from a directory,
+ * invalidating that directory is enough. No need to touch its
+ * ancestors. When a directory is shown as "foo/bar/" in git-status
+ * however, deleting or adding an entry may have cascading effect.
+ *
+ * Say the "foo/bar/file" has become untracked, we need to tell the
+ * untracked_cache_dir of "foo" that "bar/" is not an untracked
+ * directory any more (because "bar" is managed by foo as an untracked
+ * "file").
+ *
+ * Similarly, if "foo/bar/file" moves from untracked to tracked and it
+ * was the last untracked entry in the entire "foo", we should show
+ * "foo/" instead. Which means we have to invalidate past "bar" up to
+ * "foo".
+ *
+ * This function traverses all directories from root to leaf. If there
+ * is a chance of one of the above cases happening, we invalidate back
+ * to root. Otherwise we just invalidate the leaf. There may be a more
+ * sophisticated way than checking for SHOW_OTHER_DIRECTORIES to
+ * detect these cases and avoid unnecessary invalidation, for example,
+ * checking for the untracked entry named "bar/" in "foo", but for now
+ * stick to something safe and simple.
+ */
+static int invalidate_one_component(struct untracked_cache *uc,
+   struct untracked_cache_dir *dir,
+   const char *path, int len)
+{
+   const char *rest = strchr(path, '/');
+
+   if (rest) {
+   int component_len = rest - path;
+   struct untracked_cache_dir *d =
+   lookup_untracked(uc, dir, path, component_len);
+   int ret =
+   invalidate_one_component(uc, d, rest + 1,
+len - (component_len + 1));
+   if (ret)
+   invalidate_one_directory(uc, dir);
+   return ret;
+   }
+
+   invalidate_one_directory(uc, dir);
+   return uc->dir_flags & DIR_SHOW_OTHER_DIRECTORIES;
+}
+
 void untracked_cache_invalidate_path(struct index_state *istate,
 const char *path)
 {
-   const char *sep;
-   struct untracked_cache_dir *d;
if (!istate->untracked || !istate->untracked->root)
return;
-   sep = strrchr(path, '/');
-   if (sep)
-   d = lookup_untracked(istate->untracked,
-istate->untracked->root,
-path, sep - path);
-   else
-   d = istate->untracked->root;
-   istate->untracked->dir_invalidated++;
-   d->valid = 0;
-   d->untracked_nr = 0;
+   invalidate_one_component(istate->untracked, istate->untracked->root,
+path, strlen(path));
 }
 
 void untracked_cache_remove_from_index(struct index_state *istate,
diff --git a/t/t7063-status-untracked-cache.sh 
b/t/t7063-status-untracked-cache.sh
index 22393b9..37a24c1 100755
--- a/t/t7063-status-untracked-cache.sh
+++ b/t/t7063-status-untracked-cache.sh
@@ -375,7 +375,7 @@ EOF
 node creation: 0
 gitignore invalidation: 0
 directory invalidation: 0
-opendir: 1
+opendir: 2
 EOF
test_cmp ../trace.expect ../trace
 '
@@ -543,4 +543,30 @@ EOF
test_cmp ../trace.expect ../trace
 '
 
+test_expect_success 'move entry in subdir from untracked to cached' '
+   git add dtwo/two &&
+   git status --porcelain >../status.actual &&
+   cat >../status.expect <../status.actual &&
+   cat >../status.expect <http://vger.kernel.org/majordomo-info.html


[PATCH v4 04/10] grep/icase: avoid kwsset on literal non-ascii strings

2015-08-21 Thread Nguyễn Thái Ngọc Duy
When we detect the pattern is just a literal string, we avoid heavy
regex engine and use fast substring search implemented in kwsset.c.
But kws uses git-ctype which is locale-independent so it does not know
how to fold case properly outside ascii range. Let regcomp or pcre
take care of this case instead. Slower, but accurate.

Helped-by: René Scharfe 
Noticed-by: Plamen Totev 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 grep.c   |  7 ++-
 t/t7812-grep-icase-non-ascii.sh (new +x) | 23 +++
 2 files changed, 29 insertions(+), 1 deletion(-)
 create mode 100755 t/t7812-grep-icase-non-ascii.sh

diff --git a/grep.c b/grep.c
index bd32f66..d795b0e 100644
--- a/grep.c
+++ b/grep.c
@@ -4,6 +4,7 @@
 #include "xdiff-interface.h"
 #include "diff.h"
 #include "diffcore.h"
+#include "commit.h"
 
 static int grep_source_load(struct grep_source *gs);
 static int grep_source_is_binary(struct grep_source *gs);
@@ -398,12 +399,16 @@ static int is_fixed(const char *s, size_t len)
 
 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 {
+   int icase_non_ascii;
int err;
 
p->word_regexp = opt->word_regexp;
p->ignore_case = opt->ignore_case;
+   icase_non_ascii =
+   (opt->regflags & REG_ICASE || p->ignore_case) &&
+   has_non_ascii(p->pattern);
 
-   if (is_fixed(p->pattern, p->patternlen))
+   if (!icase_non_ascii && is_fixed(p->pattern, p->patternlen))
p->fixed = 1;
else if (opt->fixed) {
p->fixed = 1;
diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh
new file mode 100755
index 000..6eff490
--- /dev/null
+++ b/t/t7812-grep-icase-non-ascii.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+test_description='grep icase on non-English locales'
+
+. ./lib-gettext.sh
+
+test_expect_success GETTEXT_LOCALE 'setup' '
+   printf "TILRAUN: Halló Heimur!" >file &&
+   git add file &&
+   LC_ALL="$is_IS_locale" &&
+   export LC_ALL
+'
+
+test_have_prereq GETTEXT_LOCALE &&
+test-regex "HALLÓ" "Halló" ICASE &&
+test_set_prereq REGEX_LOCALE
+
+test_expect_success REGEX_LOCALE 'grep literal string, no -F' '
+   git grep -i "TILRAUN: Halló Heimur!" &&
+   git grep -i "TILRAUN: HALLÓ HEIMUR!"
+'
+
+test_done
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 02/10] grep: break down an "if" stmt in preparation for next changes

2015-08-21 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 grep.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/grep.c b/grep.c
index b58c7c6..bd32f66 100644
--- a/grep.c
+++ b/grep.c
@@ -403,9 +403,11 @@ static void compile_regexp(struct grep_pat *p, struct 
grep_opt *opt)
p->word_regexp = opt->word_regexp;
p->ignore_case = opt->ignore_case;
 
-   if (opt->fixed || is_fixed(p->pattern, p->patternlen))
+   if (is_fixed(p->pattern, p->patternlen))
p->fixed = 1;
-   else
+   else if (opt->fixed) {
+   p->fixed = 1;
+   } else
p->fixed = 0;
 
if (p->fixed) {
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 01/10] grep: allow -F -i combination

2015-08-21 Thread Nguyễn Thái Ngọc Duy
-F means "no regex", not "case sensitive" so it should not override -i

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/grep.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index d04f440..2d392e9 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -806,7 +806,7 @@ int cmd_grep(int argc, const char **argv, const char 
*prefix)
 
if (!opt.pattern_list)
die(_("no pattern given."));
-   if (!opt.fixed && opt.ignore_case)
+   if (opt.ignore_case)
opt.regflags |= REG_ICASE;
 
compile_grep_patterns(&opt);
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 03/10] test-regex: expose full regcomp() to the command line

2015-08-21 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 test-regex.c | 56 ++--
 1 file changed, 50 insertions(+), 6 deletions(-)

diff --git a/test-regex.c b/test-regex.c
index 0dc598e..3b5641c 100644
--- a/test-regex.c
+++ b/test-regex.c
@@ -1,19 +1,63 @@
 #include "git-compat-util.h"
+#include "gettext.h"
+
+struct reg_flag {
+   const char *name;
+   int flag;
+};
+
+static struct reg_flag reg_flags[] = {
+   { "EXTENDED",REG_EXTENDED   },
+   { "NEWLINE", REG_NEWLINE},
+   { "ICASE",   REG_ICASE  },
+   { "NOTBOL",  REG_NOTBOL },
+#ifdef REG_STARTEND
+   { "STARTEND",REG_STARTEND   },
+#endif
+   { NULL, 0 }
+};
 
 int main(int argc, char **argv)
 {
-   char *pat = "[^={} \t]+";
-   char *str = "={}\nfred";
+   const char *pat;
+   const char *str;
+   int flags = 0;
regex_t r;
regmatch_t m[1];
 
-   if (regcomp(&r, pat, REG_EXTENDED | REG_NEWLINE))
+   if (argc == 1) {
+   /* special case, bug check */
+   pat = "[^={} \t]+";
+   str = "={}\nfred";
+   flags = REG_EXTENDED | REG_NEWLINE;
+   } else {
+   argv++;
+   pat = *argv++;
+   str = *argv++;
+   while (*argv) {
+   struct reg_flag *rf;
+   for (rf = reg_flags; rf->name; rf++)
+   if (!strcmp(*argv, rf->name)) {
+   flags |= rf->flag;
+   break;
+   }
+   if (!rf->name)
+   die("do not recognize %s", *argv);
+   argv++;
+   }
+   git_setup_gettext();
+   }
+
+   if (regcomp(&r, pat, flags))
die("failed regcomp() for pattern '%s'", pat);
-   if (regexec(&r, str, 1, m, 0))
-   die("no match of pattern '%s' to string '%s'", pat, str);
+   if (regexec(&r, str, 1, m, 0)) {
+   if (argc == 1)
+   die("no match of pattern '%s' to string '%s'", pat, 
str);
+   return 1;
+   }
 
/* http://sourceware.org/bugzilla/show_bug.cgi?id=3957  */
-   if (m[0].rm_so == 3) /* matches '\n' when it should not */
+   if (argc == 1 && m[0].rm_so == 3) /* matches '\n' when it should not */
die("regex bug confirmed: re-build git with NO_REGEX=1");
 
exit(0);
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 00/10] icase match on non-ascii

2015-08-21 Thread Nguyễn Thái Ngọc Duy
Sorry it took more than a month for a simple reroll. Free time (with
energy left) is rare these days. v4 adds system regex's icase support
detection and only runs tests in these cases. This should fix test
failures on Windows where compat regex does not support icase.

Diff from v3 below

diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh
index d07fa20..a5475bb 100755
--- a/t/t7812-grep-icase-non-ascii.sh
+++ b/t/t7812-grep-icase-non-ascii.sh
@@ -11,7 +11,11 @@ test_expect_success GETTEXT_LOCALE 'setup' '
export LC_ALL
 '
 
-test_expect_success GETTEXT_LOCALE 'grep literal string, no -F' '
+test_have_prereq GETTEXT_LOCALE &&
+test-regex "HALLÓ" "Halló" ICASE &&
+test_set_prereq REGEX_LOCALE
+
+test_expect_success REGEX_LOCALE 'grep literal string, no -F' '
git grep -i "TILRAUN: Halló Heimur!" &&
git grep -i "TILRAUN: HALLÓ HEIMUR!"
 '
@@ -31,7 +35,7 @@ test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre utf-8 
string with "+"' '
test_cmp expected actual
 '
 
-test_expect_success GETTEXT_LOCALE 'grep literal string, with -F' '
+test_expect_success REGEX_LOCALE 'grep literal string, with -F' '
git grep --debug -i -F "TILRAUN: Halló Heimur!"  2>&1 >/dev/null |
 grep fixed >debug1 &&
echo "fixedTILRAUN: Halló Heimur!" >expect1 &&
@@ -43,7 +47,7 @@ test_expect_success GETTEXT_LOCALE 'grep literal string, with 
-F' '
test_cmp expect2 debug2
 '
 
-test_expect_success GETTEXT_LOCALE 'grep string with regex, with -F' '
+test_expect_success REGEX_LOCALE 'grep string with regex, with -F' '
printf "^*TILR^AUN:.* \\Halló \$He[]imur!\$" >file &&
 
git grep --debug -i -F "^*TILR^AUN:.* \\Halló \$He[]imur!\$" 2>&1 
>/dev/null |
@@ -57,7 +61,7 @@ test_expect_success GETTEXT_LOCALE 'grep string with regex, 
with -F' '
test_cmp expect2 debug2
 '
 
-test_expect_success GETTEXT_LOCALE 'pickaxe -i on non-ascii' '
+test_expect_success REGEX_LOCALE 'pickaxe -i on non-ascii' '
git commit -m first &&
git log --format=%f -i -S"TILRAUN: HALLÓ HEIMUR!" >actual &&
echo first >expected &&
diff --git a/test-regex.c b/test-regex.c
index 0dc598e..3b5641c 100644
--- a/test-regex.c
+++ b/test-regex.c
@@ -1,19 +1,63 @@
 #include "git-compat-util.h"
+#include "gettext.h"
+
+struct reg_flag {
+   const char *name;
+   int flag;
+};
+
+static struct reg_flag reg_flags[] = {
+   { "EXTENDED",REG_EXTENDED   },
+   { "NEWLINE", REG_NEWLINE},
+   { "ICASE",   REG_ICASE  },
+   { "NOTBOL",  REG_NOTBOL },
+#ifdef REG_STARTEND
+   { "STARTEND",REG_STARTEND   },
+#endif
+   { NULL, 0 }
+};
 
 int main(int argc, char **argv)
 {
-   char *pat = "[^={} \t]+";
-   char *str = "={}\nfred";
+   const char *pat;
+   const char *str;
+   int flags = 0;
regex_t r;
regmatch_t m[1];
 
-   if (regcomp(&r, pat, REG_EXTENDED | REG_NEWLINE))
+   if (argc == 1) {
+   /* special case, bug check */
+   pat = "[^={} \t]+";
+   str = "={}\nfred";
+   flags = REG_EXTENDED | REG_NEWLINE;
+   } else {
+   argv++;
+   pat = *argv++;
+   str = *argv++;
+   while (*argv) {
+   struct reg_flag *rf;
+   for (rf = reg_flags; rf->name; rf++)
+   if (!strcmp(*argv, rf->name)) {
+   flags |= rf->flag;
+   break;
+   }
+   if (!rf->name)
+   die("do not recognize %s", *argv);
+   argv++;
+   }
+   git_setup_gettext();
+   }
+
+   if (regcomp(&r, pat, flags))
die("failed regcomp() for pattern '%s'", pat);
-   if (regexec(&r, str, 1, m, 0))
-   die("no match of pattern '%s' to string '%s'", pat, str);
+   if (regexec(&r, str, 1, m, 0)) {
+   if (argc == 1)
+   die("no match of pattern '%s' to string '%s'", pat, 
str);
+   return 1;
+   }
 
/* http://sourceware.org/bugzilla/show_bug.cgi?id=3957  */
-   if (m[0].rm_so == 3) /* matches '\n' when it should not */
+   if (argc == 1 && m[0].rm_so == 3) /* matches '\n' when it should not */
die("regex bug confirmed: re-build git with NO_REGEX=1");
 
exit(0);
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 05/10] grep/icase: avoid kwsset when -F is specified

2015-08-21 Thread Nguyễn Thái Ngọc Duy
Similar to the previous commit, we can't use kws on icase search
outside ascii range. But we can't simply pass the pattern to
regcomp/pcre like the previous commit because it may contain regex
special characters, so we need to quote the regex first.

To avoid misquote traps that could lead to undefined behavior, we
always stick to basic regex engine in this case. We don't need fancy
features for grepping a literal string anyway.

basic_regex_quote_buf() assumes that if the pattern is in a multibyte
encoding, ascii chars must be unambiguously encoded as single
bytes. This is true at least for UTF-8. For others, let's wait until
people yell up. Chances are nobody uses multibyte, non utf-8 charsets
any more..

Helped-by: René Scharfe 
Noticed-by: Plamen Totev 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 grep.c  | 25 -
 quote.c | 37 +
 quote.h |  1 +
 t/t7812-grep-icase-non-ascii.sh | 26 ++
 4 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/grep.c b/grep.c
index d795b0e..8fce54f 100644
--- a/grep.c
+++ b/grep.c
@@ -5,6 +5,7 @@
 #include "diff.h"
 #include "diffcore.h"
 #include "commit.h"
+#include "quote.h"
 
 static int grep_source_load(struct grep_source *gs);
 static int grep_source_is_binary(struct grep_source *gs);
@@ -397,6 +398,24 @@ static int is_fixed(const char *s, size_t len)
return 1;
 }
 
+static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
+{
+   struct strbuf sb = STRBUF_INIT;
+   int err;
+
+   basic_regex_quote_buf(&sb, p->pattern);
+   err = regcomp(&p->regexp, sb.buf, opt->regflags & ~REG_EXTENDED);
+   if (opt->debug)
+   fprintf(stderr, "fixed%s\n", sb.buf);
+   strbuf_release(&sb);
+   if (err) {
+   char errbuf[1024];
+   regerror(err, &p->regexp, errbuf, 1024);
+   regfree(&p->regexp);
+   compile_regexp_failed(p, errbuf);
+   }
+}
+
 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 {
int icase_non_ascii;
@@ -411,7 +430,11 @@ static void compile_regexp(struct grep_pat *p, struct 
grep_opt *opt)
if (!icase_non_ascii && is_fixed(p->pattern, p->patternlen))
p->fixed = 1;
else if (opt->fixed) {
-   p->fixed = 1;
+   p->fixed = !icase_non_ascii;
+   if (!p->fixed) {
+   compile_fixed_regexp(p, opt);
+   return;
+   }
} else
p->fixed = 0;
 
diff --git a/quote.c b/quote.c
index 7920e18..43a8057 100644
--- a/quote.c
+++ b/quote.c
@@ -439,3 +439,40 @@ void tcl_quote_buf(struct strbuf *sb, const char *src)
}
strbuf_addch(sb, '"');
 }
+
+void basic_regex_quote_buf(struct strbuf *sb, const char *src)
+{
+   char c;
+
+   if (*src == '^') {
+   /* only beginning '^' is special and needs quoting */
+   strbuf_addch(sb, '\\');
+   strbuf_addch(sb, *src++);
+   }
+   if (*src == '*')
+   /* beginning '*' is not special, no quoting */
+   strbuf_addch(sb, *src++);
+
+   while ((c = *src++)) {
+   switch (c) {
+   case '[':
+   case '.':
+   case '\\':
+   case '*':
+   strbuf_addch(sb, '\\');
+   strbuf_addch(sb, c);
+   break;
+
+   case '$':
+   /* only the end '$' is special and needs quoting */
+   if (*src == '\0')
+   strbuf_addch(sb, '\\');
+   strbuf_addch(sb, c);
+   break;
+
+   default:
+   strbuf_addch(sb, c);
+   break;
+   }
+   }
+}
diff --git a/quote.h b/quote.h
index 99e04d3..362d315 100644
--- a/quote.h
+++ b/quote.h
@@ -67,5 +67,6 @@ extern char *quote_path_relative(const char *in, const char 
*prefix,
 extern void perl_quote_buf(struct strbuf *sb, const char *src);
 extern void python_quote_buf(struct strbuf *sb, const char *src);
 extern void tcl_quote_buf(struct strbuf *sb, const char *src);
+extern void basic_regex_quote_buf(struct strbuf *sb, const char *src);
 
 #endif
diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh
index 6eff490..aba6b15 100755
--- a/t/t7812-grep-icase-non-ascii.sh
+++ b/t/t7812-grep-icase-non-ascii.sh
@@ -20,4 +20,30 @@ test_expect_success REGEX_LOCALE 'grep lite

[PATCH v4 06/10] grep/pcre: prepare locale-dependent tables for icase matching

2015-08-21 Thread Nguyễn Thái Ngọc Duy
The default tables are usually built with C locale and only suitable
for LANG=C or similar.  This should make case insensitive search work
correctly for all single-byte charsets.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 grep.c |  8 ++--
 grep.h |  1 +
 t/t7813-grep-icase-iso.sh (new +x) | 19 +++
 3 files changed, 26 insertions(+), 2 deletions(-)
 create mode 100755 t/t7813-grep-icase-iso.sh

diff --git a/grep.c b/grep.c
index 8fce54f..f0fbf99 100644
--- a/grep.c
+++ b/grep.c
@@ -324,11 +324,14 @@ static void compile_pcre_regexp(struct grep_pat *p, const 
struct grep_opt *opt)
int erroffset;
int options = PCRE_MULTILINE;
 
-   if (opt->ignore_case)
+   if (opt->ignore_case) {
+   if (has_non_ascii(p->pattern))
+   p->pcre_tables = pcre_maketables();
options |= PCRE_CASELESS;
+   }
 
p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
-   NULL);
+ p->pcre_tables);
if (!p->pcre_regexp)
compile_regexp_failed(p, error);
 
@@ -362,6 +365,7 @@ static void free_pcre_regexp(struct grep_pat *p)
 {
pcre_free(p->pcre_regexp);
pcre_free(p->pcre_extra_info);
+   pcre_free((void *)p->pcre_tables);
 }
 #else /* !USE_LIBPCRE */
 static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
diff --git a/grep.h b/grep.h
index 95f197a..cee4357 100644
--- a/grep.h
+++ b/grep.h
@@ -48,6 +48,7 @@ struct grep_pat {
regex_t regexp;
pcre *pcre_regexp;
pcre_extra *pcre_extra_info;
+   const unsigned char *pcre_tables;
kwset_t kws;
unsigned fixed:1;
unsigned ignore_case:1;
diff --git a/t/t7813-grep-icase-iso.sh b/t/t7813-grep-icase-iso.sh
new file mode 100755
index 000..efef7fb
--- /dev/null
+++ b/t/t7813-grep-icase-iso.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+test_description='grep icase on non-English locales'
+
+. ./lib-gettext.sh
+
+test_expect_success GETTEXT_ISO_LOCALE 'setup' '
+   printf "TILRAUN: Hall� Heimur!" >file &&
+   git add file &&
+   LC_ALL="$is_IS_iso_locale" &&
+   export LC_ALL
+'
+
+test_expect_success GETTEXT_ISO_LOCALE,LIBPCRE 'grep pcre string' '
+   git grep --perl-regexp -i "TILRAUN: H.ll� Heimur!" &&
+   git grep --perl-regexp -i "TILRAUN: H.LL� HEIMUR!"
+'
+
+test_done
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 07/10] gettext: add is_utf8_locale()

2015-08-21 Thread Nguyễn Thái Ngọc Duy
This function returns true if git is running under an UTF-8
locale. pcre in the next patch will need this.

is_encoding_utf8() is used instead of strcmp() to catch both "utf-8"
and "utf8" suffixes.

When built with no gettext support, we peek in several env variables
to detect UTF-8. pcre library might support utf-8 even if libc is
built without locale support.. The peeking code is a copy from
compat/regex/regcomp.c

Helped-by: Ævar Arnfjörð Bjarmason 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 gettext.c | 24 ++--
 gettext.h |  1 +
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/gettext.c b/gettext.c
index a268a2c..db727ea 100644
--- a/gettext.c
+++ b/gettext.c
@@ -18,6 +18,8 @@
 #  endif
 #endif
 
+static const char *charset;
+
 /*
  * Guess the user's preferred languages from the value in LANGUAGE environment
  * variable and LC_MESSAGES locale category if NO_GETTEXT is not defined.
@@ -65,7 +67,6 @@ static int test_vsnprintf(const char *fmt, ...)
return ret;
 }
 
-static const char *charset;
 static void init_gettext_charset(const char *domain)
 {
/*
@@ -172,8 +173,27 @@ int gettext_width(const char *s)
 {
static int is_utf8 = -1;
if (is_utf8 == -1)
-   is_utf8 = !strcmp(charset, "UTF-8");
+   is_utf8 = is_utf8_locale();
 
return is_utf8 ? utf8_strwidth(s) : strlen(s);
 }
 #endif
+
+int is_utf8_locale(void)
+{
+#ifdef NO_GETTEXT
+   if (!charset) {
+   const char *env = getenv("LC_ALL");
+   if (!env || !*env)
+   env = getenv("LC_CTYPE");
+   if (!env || !*env)
+   env = getenv("LANG");
+   if (!env)
+   env = "";
+   if (strchr(env, '.'))
+   env = strchr(env, '.') + 1;
+   charset = xstrdup(env);
+   }
+#endif
+   return is_encoding_utf8(charset);
+}
diff --git a/gettext.h b/gettext.h
index 33696a4..7eee64a 100644
--- a/gettext.h
+++ b/gettext.h
@@ -90,5 +90,6 @@ const char *Q_(const char *msgid, const char *plu, unsigned 
long n)
 #endif
 
 const char *get_preferred_languages(void);
+extern int is_utf8_locale(void);
 
 #endif
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 08/10] grep/pcre: support utf-8

2015-08-21 Thread Nguyễn Thái Ngọc Duy
In the previous change in this function, we add locale support for
single-byte encodings only. It looks like pcre only supports utf-* as
multibyte encodings, the others are left in the cold (which is
fine).

We need to enable PCRE_UTF8 so pcre can find character boundary
correctly. It's needed for case folding (when --ignore-case is used)
or '*', '+' or similar syntax is used.

The "has_non_ascii()" check is to be on the conservative side. If
there's non-ascii in the pattern, the searched content could still be
in utf-8, but we can treat it just like a byte stream and everything
should work. If we force utf-8 based on locale only and pcre validates
utf-8 and the file content is in non-utf8 encoding, things break.

Noticed-by: Plamen Totev 
Helped-by: Plamen Totev 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 grep.c  |  2 ++
 t/t7812-grep-icase-non-ascii.sh | 15 +++
 2 files changed, 17 insertions(+)

diff --git a/grep.c b/grep.c
index f0fbf99..07621c1 100644
--- a/grep.c
+++ b/grep.c
@@ -329,6 +329,8 @@ static void compile_pcre_regexp(struct grep_pat *p, const 
struct grep_opt *opt)
p->pcre_tables = pcre_maketables();
options |= PCRE_CASELESS;
}
+   if (is_utf8_locale() && has_non_ascii(p->pattern))
+   options |= PCRE_UTF8;
 
p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
  p->pcre_tables);
diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh
index aba6b15..8896410 100755
--- a/t/t7812-grep-icase-non-ascii.sh
+++ b/t/t7812-grep-icase-non-ascii.sh
@@ -20,6 +20,21 @@ test_expect_success REGEX_LOCALE 'grep literal string, no 
-F' '
git grep -i "TILRAUN: HALLÓ HEIMUR!"
 '
 
+test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre utf-8 icase' '
+   git grep --perl-regexp"TILRAUN: H.lló Heimur!" &&
+   git grep --perl-regexp -i "TILRAUN: H.lló Heimur!" &&
+   git grep --perl-regexp -i "TILRAUN: H.LLÓ HEIMUR!"
+'
+
+test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre utf-8 string with "+"' '
+   printf "TILRAUN: Hallóó Heimur!" >file2 &&
+   git add file2 &&
+   git grep -l --perl-regexp "TILRAUN: H.lló+ Heimur!" >actual &&
+   echo file >expected &&
+   echo file2 >>expected &&
+   test_cmp expected actual
+'
+
 test_expect_success REGEX_LOCALE 'grep literal string, with -F' '
git grep --debug -i -F "TILRAUN: Halló Heimur!"  2>&1 >/dev/null |
 grep fixed >debug1 &&
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 10/10] diffcore-pickaxe: support case insensitive match on non-ascii

2015-08-21 Thread Nguyễn Thái Ngọc Duy
Similar to the "grep -F -i" case, we can't use kws on icase search
outside ascii range, so we quote the string and pass it to regcomp as
a basic regexp and let regex engine deal with case sensitivity.

The new test is put in t7812 instead of t4209-log-pickaxe because
lib-gettext.sh might cause problems elsewhere, probably..

Noticed-by: Plamen Totev 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 diffcore-pickaxe.c  | 11 +++
 t/t7812-grep-icase-non-ascii.sh |  7 +++
 2 files changed, 18 insertions(+)

diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index 7a718fc..6946c15 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -7,6 +7,8 @@
 #include "diffcore.h"
 #include "xdiff-interface.h"
 #include "kwset.h"
+#include "commit.h"
+#include "quote.h"
 
 typedef int (*pickaxe_fn)(mmfile_t *one, mmfile_t *two,
  struct diff_options *o,
@@ -212,6 +214,15 @@ void diffcore_pickaxe(struct diff_options *o)
cflags |= REG_ICASE;
err = regcomp(®ex, needle, cflags);
regexp = ®ex;
+   } else if (DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE) &&
+  has_non_ascii(needle)) {
+   struct strbuf sb = STRBUF_INIT;
+   int cflags = REG_NEWLINE | REG_ICASE;
+
+   basic_regex_quote_buf(&sb, needle);
+   err = regcomp(®ex, sb.buf, cflags);
+   strbuf_release(&sb);
+   regexp = ®ex;
} else {
kws = kwsalloc(DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE)
   ? tolower_trans_tbl : NULL);
diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh
index 8896410..a5475bb 100755
--- a/t/t7812-grep-icase-non-ascii.sh
+++ b/t/t7812-grep-icase-non-ascii.sh
@@ -61,4 +61,11 @@ test_expect_success REGEX_LOCALE 'grep string with regex, 
with -F' '
test_cmp expect2 debug2
 '
 
+test_expect_success REGEX_LOCALE 'pickaxe -i on non-ascii' '
+   git commit -m first &&
+   git log --format=%f -i -S"TILRAUN: HALLÓ HEIMUR!" >actual &&
+   echo first >expected &&
+   test_cmp expected actual
+'
+
 test_done
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 09/10] diffcore-pickaxe: "share" regex error handling code

2015-08-21 Thread Nguyễn Thái Ngọc Duy
There's another regcomp code block coming in this function. By moving
the error handling code out of this block, we don't have to add the
same error handling code in the new block.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 diffcore-pickaxe.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index 185f86b..7a718fc 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -204,20 +204,13 @@ void diffcore_pickaxe(struct diff_options *o)
int opts = o->pickaxe_opts;
regex_t regex, *regexp = NULL;
kwset_t kws = NULL;
+   int err = 0;
 
if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) {
-   int err;
int cflags = REG_EXTENDED | REG_NEWLINE;
if (DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE))
cflags |= REG_ICASE;
err = regcomp(®ex, needle, cflags);
-   if (err) {
-   /* The POSIX.2 people are surely sick */
-   char errbuf[1024];
-   regerror(err, ®ex, errbuf, 1024);
-   regfree(®ex);
-   die("invalid regex: %s", errbuf);
-   }
regexp = ®ex;
} else {
kws = kwsalloc(DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE)
@@ -225,6 +218,13 @@ void diffcore_pickaxe(struct diff_options *o)
kwsincr(kws, needle, strlen(needle));
kwsprep(kws);
}
+   if (err) {
+   /* The POSIX.2 people are surely sick */
+   char errbuf[1024];
+   regerror(err, ®ex, errbuf, 1024);
+   regfree(®ex);
+   die("invalid regex: %s", errbuf);
+   }
 
/* Might want to warn when both S and G are on; I don't care... */
pickaxe(&diff_queued_diff, o, regexp, kws,
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 1/5] path.c: delete an extra space

2015-08-21 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 path.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/path.c b/path.c
index 95acbaf..a536ee3 100644
--- a/path.c
+++ b/path.c
@@ -431,7 +431,7 @@ const char *enter_repo(const char *path, int strict)
}
if (!suffix[i])
return NULL;
-   gitfile = read_gitfile(used_path) ;
+   gitfile = read_gitfile(used_path);
if (gitfile)
strcpy(used_path, gitfile);
if (chdir(used_path))
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 2/5] enter_repo: avoid duplicating logic, use is_git_directory() instead

2015-08-21 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 path.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/path.c b/path.c
index a536ee3..7340e11 100644
--- a/path.c
+++ b/path.c
@@ -441,8 +441,7 @@ const char *enter_repo(const char *path, int strict)
else if (chdir(path))
return NULL;
 
-   if (access("objects", X_OK) == 0 && access("refs", X_OK) == 0 &&
-   validate_headref("HEAD") == 0) {
+   if (is_git_directory(".")) {
set_git_dir(".");
check_repository_format();
return path;
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 3/5] enter_repo: allow .git files in strict mode

2015-08-21 Thread Nguyễn Thái Ngọc Duy
Strict mode is about not guessing where .git is. If the user points to a
.git file, we know exactly where the target .git dir will be.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 path.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/path.c b/path.c
index 7340e11..32d4ca6 100644
--- a/path.c
+++ b/path.c
@@ -438,8 +438,13 @@ const char *enter_repo(const char *path, int strict)
return NULL;
path = validated_path;
}
-   else if (chdir(path))
-   return NULL;
+   else {
+   const char *gitfile = read_gitfile(used_path);
+   if (gitfile)
+   path = gitfile;
+   if (chdir(path))
+   return NULL;
+   }
 
if (is_git_directory(".")) {
set_git_dir(".");
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 4/5] clone: allow --local from a linked checkout

2015-08-21 Thread Nguyễn Thái Ngọc Duy
Noticed-by: Bjørnar Snoksrud 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/clone.c | 6 --
 t/t2025-worktree-add.sh | 5 +
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/builtin/clone.c b/builtin/clone.c
index 578da85..836fb64 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -424,8 +424,10 @@ static void clone_local(const char *src_repo, const char 
*dest_repo)
} else {
struct strbuf src = STRBUF_INIT;
struct strbuf dest = STRBUF_INIT;
-   strbuf_addf(&src, "%s/objects", src_repo);
-   strbuf_addf(&dest, "%s/objects", dest_repo);
+   get_common_dir(&src, src_repo);
+   get_common_dir(&dest, dest_repo);
+   strbuf_addstr(&src, "/objects");
+   strbuf_addf(&dest, "/objects");
copy_or_link_directory(&src, &dest, src_repo, src.len);
strbuf_release(&src);
strbuf_release(&dest);
diff --git a/t/t2025-worktree-add.sh b/t/t2025-worktree-add.sh
index 8267411..3694174 100755
--- a/t/t2025-worktree-add.sh
+++ b/t/t2025-worktree-add.sh
@@ -193,4 +193,9 @@ test_expect_success '"add" -B/--detach mutually exclusive' '
test_must_fail git worktree add -B poodle --detach bamboo master
 '
 
+test_expect_success 'local clone from linked checkout' '
+   git clone --local here here-clone &&
+   ( cd here-clone && git fsck )
+'
+
 test_done
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 0/5] fix local clone from a linked checkout

2015-08-21 Thread Nguyễn Thái Ngọc Duy
On Wed, Jul 15, 2015 at 8:25 PM, Duy Nguyen  wrote:
> On Wed, Jul 15, 2015 at 11:40:18AM +0200, Bjørnar Snoksrud wrote:
>> I reported this before, but now I have a nice topic to hang it on -
>>
>> I have re-reproduced the bug using a build from master as of today,
>> using the new worktree commands.
>
> Something like the following patch should work if you need it now.
>
> Because this may conflict (in the test cases) with Eric's series to
> move "git checkout --to" to "git worktree add", and because the next
> release is already delayed to let "git worktree add" in, I think we
> could keep this patch out of tree for now. I will split it up, add
> tests and resubmit once the release is out. Please remind me if you
> see nothing from me for too long.

Here it is. Mostly the same as the previous patch except that the last
patch is new.

Nguyễn Thái Ngọc Duy (5):
  path.c: delete an extra space
  enter_repo: avoid duplicating logic, use is_git_directory() instead
  enter_repo: allow .git files in strict mode
  clone: allow --local from a linked checkout
  clone: better error when --reference is a linked checkout

 builtin/clone.c | 13 ++---
 path.c  | 14 +-
 t/t2025-worktree-add.sh |  5 +
 3 files changed, 24 insertions(+), 8 deletions(-)

-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 5/5] clone: better error when --reference is a linked checkout

2015-08-21 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/clone.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/builtin/clone.c b/builtin/clone.c
index 836fb64..7a010bb 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -294,9 +294,14 @@ static int add_one_reference(struct string_list_item 
*item, void *cb_data)
char *ref_git_git = mkpathdup("%s/.git", ref_git);
free(ref_git);
ref_git = ref_git_git;
-   } else if (!is_directory(mkpath("%s/objects", ref_git)))
+   } else if (!is_directory(mkpath("%s/objects", ref_git))) {
+   struct strbuf sb = STRBUF_INIT;
+   if (get_common_dir(&sb, ref_git))
+   die(_("reference repository '%s' as a linked checkout 
is not supported yet."),
+   item->string);
die(_("reference repository '%s' is not a local repository."),
item->string);
+   }
 
if (!access(mkpath("%s/shallow", ref_git), F_OK))
die(_("reference repository '%s' is shallow"), item->string);
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/8] Resurrect "diff-lib.c: adjust position of i-t-a entries in diff"

2015-08-21 Thread Nguyễn Thái Ngọc Duy
On Thu, Jun 25, 2015 at 8:07 PM, Junio C Hamano  wrote:
>>> Perhaps a good and safe way forward to resurrect what d95d728a
>>> wanted to do is to first add an option to tell run_diff_index() and
>>> run_diff_files() which behaviour the caller wants to see, add that
>>> only to the caller in wt-status.c?  Then incrementally pass that
>>> option from more callsites that we are absolutely certain that want
>>> this different worldview with respect to i-t-a?
>>
>> Agreed.
>
> OK.  Perhaps then first I should do that revert and we'll
> incrementally rebuild on top.

Here comes the rebuild. This adds --shift-ita option (and an internal
flag) to enable this behavior. Those only take the last two patches.
The remaining is to make sure we handle i-t-a entries correctly in
some commands.

Nguyễn Thái Ngọc Duy (8):
  blame: remove obsolete comment
  Add and use convenient macro ce_intent_to_add()
  apply: fix adding new files on i-t-a entries
  apply: make sure check_preimage() does not leave empty file on error
  checkout(-index): do not checkout i-t-a entries
  grep: make it clear i-t-a entries are ignored
  diff.h: extend "flags" field to 64 bits because we're out of bits
  Resurrect "diff-lib.c: adjust position of i-t-a entries in diff"

 Documentation/diff-options.txt |  6 +++
 builtin/apply.c| 13 ---
 builtin/blame.c|  5 ---
 builtin/checkout-index.c   |  5 ++-
 builtin/checkout.c |  2 +
 builtin/commit.c   |  2 +-
 builtin/grep.c |  2 +-
 builtin/rm.c   |  2 +-
 cache-tree.c   |  2 +-
 cache.h|  1 +
 diff-lib.c | 18 -
 diff.c |  4 +-
 diff.h |  9 +++--
 read-cache.c   |  4 +-
 t/t2203-add-intent.sh  | 83 +-
 wt-status.c|  2 +
 16 files changed, 134 insertions(+), 26 deletions(-)

-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/8] Add and use convenient macro ce_intent_to_add()

2015-08-21 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/rm.c | 2 +-
 cache-tree.c | 2 +-
 cache.h  | 1 +
 read-cache.c | 4 ++--
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/builtin/rm.c b/builtin/rm.c
index 80b972f..8829b09 100644
--- a/builtin/rm.c
+++ b/builtin/rm.c
@@ -211,7 +211,7 @@ static int check_local_mod(unsigned char *head, int 
index_only)
 * "intent to add" entry.
 */
if (local_changes && staged_changes) {
-   if (!index_only || !(ce->ce_flags & CE_INTENT_TO_ADD))
+   if (!index_only || !ce_intent_to_add(ce))
string_list_append(&files_staged, name);
}
else if (!index_only) {
diff --git a/cache-tree.c b/cache-tree.c
index feace8b..c865bd2 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -377,7 +377,7 @@ static int update_one(struct cache_tree *it,
 * they are not part of generated trees. Invalidate up
 * to root to force cache-tree users to read elsewhere.
 */
-   if (ce->ce_flags & CE_INTENT_TO_ADD) {
+   if (ce_intent_to_add(ce)) {
to_invalidate = 1;
continue;
}
diff --git a/cache.h b/cache.h
index 4e25271..41d811c 100644
--- a/cache.h
+++ b/cache.h
@@ -241,6 +241,7 @@ static inline unsigned create_ce_flags(unsigned stage)
 #define ce_uptodate(ce) ((ce)->ce_flags & CE_UPTODATE)
 #define ce_skip_worktree(ce) ((ce)->ce_flags & CE_SKIP_WORKTREE)
 #define ce_mark_uptodate(ce) ((ce)->ce_flags |= CE_UPTODATE)
+#define ce_intent_to_add(ce) ((ce)->ce_flags & CE_INTENT_TO_ADD)
 
 #define ce_permissions(mode) (((mode) & 0100) ? 0755 : 0644)
 static inline unsigned int create_ce_mode(unsigned int mode)
diff --git a/read-cache.c b/read-cache.c
index 89dbc08..f849cbd 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -327,7 +327,7 @@ int ie_match_stat(const struct index_state *istate,
 * by definition never matches what is in the work tree until it
 * actually gets added.
 */
-   if (ce->ce_flags & CE_INTENT_TO_ADD)
+   if (ce_intent_to_add(ce))
return DATA_CHANGED | TYPE_CHANGED | MODE_CHANGED;
 
changed = ce_match_stat_basic(ce, st);
@@ -1251,7 +1251,7 @@ int refresh_index(struct index_state *istate, unsigned 
int flags,
 
if (cache_errno == ENOENT)
fmt = deleted_fmt;
-   else if (ce->ce_flags & CE_INTENT_TO_ADD)
+   else if (ce_intent_to_add(ce))
fmt = added_fmt; /* must be before other checks 
*/
else if (changed & TYPE_CHANGED)
fmt = typechange_fmt;
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/8] apply: fix adding new files on i-t-a entries

2015-08-21 Thread Nguyễn Thái Ngọc Duy
Applying a patch that adds a file when that file is registered with "git
add -N" will fail with message "already exists in index" because
git-apply checks, sees those i-t-a entries and aborts. git-apply does
not realize those are for bookkeeping only, they do not really exist in
the index. This patch tightens the "exists in index" check, ignoring
i-t-a entries.

Reported-by: Patrick Higgins 
Reported-by: Bjørnar Snoksrud 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/apply.c   |  9 +
 t/t2203-add-intent.sh | 13 +
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/builtin/apply.c b/builtin/apply.c
index 54aba4e..76b58a1 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -3553,10 +3553,11 @@ static int check_to_create(const char *new_name, int 
ok_if_exists)
 {
struct stat nst;
 
-   if (check_index &&
-   cache_name_pos(new_name, strlen(new_name)) >= 0 &&
-   !ok_if_exists)
-   return EXISTS_IN_INDEX;
+   if (check_index && !ok_if_exists) {
+   int pos = cache_name_pos(new_name, strlen(new_name));
+   if (pos >= 0 && !ce_intent_to_add(active_cache[pos]))
+   return EXISTS_IN_INDEX;
+   }
if (cached)
return 0;
 
diff --git a/t/t2203-add-intent.sh b/t/t2203-add-intent.sh
index 2a4a749..bb5ef2b 100755
--- a/t/t2203-add-intent.sh
+++ b/t/t2203-add-intent.sh
@@ -82,5 +82,18 @@ test_expect_success 'cache-tree invalidates i-t-a paths' '
test_cmp expect actual
 '
 
+test_expect_success 'apply adds new file on i-t-a entry' '
+   git init apply &&
+   (
+   cd apply &&
+   echo newcontent >newfile &&
+   git add newfile &&
+   git diff --cached >patch &&
+   rm .git/index &&
+   git add -N newfile &&
+   git apply --cached patch
+   )
+'
+
 test_done
 
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/8] apply: make sure check_preimage() does not leave empty file on error

2015-08-21 Thread Nguyễn Thái Ngọc Duy
The test case probably describes the test scenario the best. We have a
patch to modify some file but the base file is gone. Because
check_preimage() finds an index entry with the same old_name, it tries
to restore the on-disk base file with cached content with
checkout_target() and move on.

If this old_name is i-t-a, before this patch "apply -3" will call
checkout_target() which will write an empty file (i-t-a entries always
have "empty blob" SHA-1), then it'll fail at verify_index_match()
(i-t-a entries are always "modified") and the operation is aborted.

This empty file should not be created. Compare to the case where
old_name does not exist in index, "apply -3" fails with a different
error message "...: does not exist" but it does not touch
worktree. This patch makes sure the same happens to i-t-a entries.

load_current() shares the same code pattern (look up an index entry,
if on-disk version is not found, restore using the cached
version). Fix it too (even though it's not exercised in any test case)

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/apply.c   |  4 ++--
 t/t2203-add-intent.sh | 16 
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/builtin/apply.c b/builtin/apply.c
index 76b58a1..b185c97 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -3348,7 +3348,7 @@ static int load_current(struct image *image, struct patch 
*patch)
die("BUG: patch to %s is not a creation", patch->old_name);
 
pos = cache_name_pos(name, strlen(name));
-   if (pos < 0)
+   if (pos < 0 || ce_intent_to_add(active_cache[pos]))
return error(_("%s: does not exist in index"), name);
ce = active_cache[pos];
if (lstat(name, &st)) {
@@ -3501,7 +3501,7 @@ static int check_preimage(struct patch *patch, struct 
cache_entry **ce, struct s
 
if (check_index && !previous) {
int pos = cache_name_pos(old_name, strlen(old_name));
-   if (pos < 0) {
+   if (pos < 0 || ce_intent_to_add(active_cache[pos])) {
if (patch->is_new < 0)
goto is_new;
return error(_("%s: does not exist in index"), 
old_name);
diff --git a/t/t2203-add-intent.sh b/t/t2203-add-intent.sh
index bb5ef2b..96c8755 100755
--- a/t/t2203-add-intent.sh
+++ b/t/t2203-add-intent.sh
@@ -95,5 +95,21 @@ test_expect_success 'apply adds new file on i-t-a entry' '
)
 '
 
+test_expect_success 'apply:check_preimage() not creating empty file' '
+   git init check-preimage &&
+   (
+   cd check-preimage &&
+   echo oldcontent >newfile &&
+   git add newfile &&
+   echo newcontent >newfile &&
+   git diff >patch &&
+   rm .git/index &&
+   git add -N newfile &&
+   rm newfile &&
+   test_must_fail git apply -3 patch &&
+   ! test -f newfile
+   )
+'
+
 test_done
 
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/8] blame: remove obsolete comment

2015-08-21 Thread Nguyễn Thái Ngọc Duy
That "someday" in the comment happened two years later in
b65982b (Optimize "diff-index --cached" using cache-tree - 2009-05-20)

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/blame.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/builtin/blame.c b/builtin/blame.c
index 4db01c1..942f302 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -2379,11 +2379,6 @@ static struct commit *fake_working_tree_commit(struct 
diff_options *opt,
ce->ce_mode = create_ce_mode(mode);
add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
 
-   /*
-* We are not going to write this out, so this does not matter
-* right now, but someday we might optimize diff-index --cached
-* with cache-tree information.
-*/
cache_tree_invalidate_path(&the_index, path);
 
return commit;
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/8] checkout(-index): do not checkout i-t-a entries

2015-08-21 Thread Nguyễn Thái Ngọc Duy
The cached blob of i-t-a entries are empty blob. By checkout, we delete
the content we have. Don't do it.

This is done higher up instead of inside checkout_entry() because we
would have limited options in there: silently ignore, loudly ignore,
die. At higher level we can do better reporting. For example, "git
checkout -- foo" will complain that "foo" does not match pathspec, just
like when "foo" is not registered with "git add -N"

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/checkout-index.c |  5 -
 builtin/checkout.c   |  2 ++
 t/t2203-add-intent.sh| 34 ++
 3 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/builtin/checkout-index.c b/builtin/checkout-index.c
index 8028c37..eca975d 100644
--- a/builtin/checkout-index.c
+++ b/builtin/checkout-index.c
@@ -56,7 +56,8 @@ static int checkout_file(const char *name, const char *prefix)
while (pos < active_nr) {
struct cache_entry *ce = active_cache[pos];
if (ce_namelen(ce) != namelen ||
-   memcmp(ce->name, name, namelen))
+   memcmp(ce->name, name, namelen) ||
+   ce_intent_to_add(ce))
break;
has_same_name = 1;
pos++;
@@ -99,6 +100,8 @@ static void checkout_all(const char *prefix, int 
prefix_length)
if (ce_stage(ce) != checkout_stage
&& (CHECKOUT_ALL != checkout_stage || !ce_stage(ce)))
continue;
+   if (ce_intent_to_add(ce))
+   continue;
if (prefix && *prefix &&
(ce_namelen(ce) <= prefix_length ||
 memcmp(prefix, ce->name, prefix_length)))
diff --git a/builtin/checkout.c b/builtin/checkout.c
index e1403be..02889d4 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -300,6 +300,8 @@ static int checkout_paths(const struct checkout_opts *opts,
 * anything to this entry at all.
 */
continue;
+   if (ce_intent_to_add(ce))
+   continue;
/*
 * Either this entry came from the tree-ish we are
 * checking the paths out of, or we are checking out
diff --git a/t/t2203-add-intent.sh b/t/t2203-add-intent.sh
index 96c8755..d0f36a4 100755
--- a/t/t2203-add-intent.sh
+++ b/t/t2203-add-intent.sh
@@ -111,5 +111,39 @@ test_expect_success 'apply:check_preimage() not creating 
empty file' '
)
 '
 
+test_expect_success 'checkout ignores i-t-a' '
+   git init checkout &&
+   (
+   cd checkout &&
+   echo data >file &&
+   git add -N file &&
+   test_must_fail git checkout -- file &&
+   echo data >expected &&
+   test_cmp expected file
+   )
+'
+
+test_expect_success 'checkout-index ignores i-t-a' '
+   (
+   cd checkout &&
+   git checkout-index file &&
+   echo data >expected &&
+   test_cmp expected file
+   )
+'
+
+test_expect_success 'checkout-index --all ignores i-t-a' '
+   (
+   cd checkout &&
+   echo data >anotherfile &&
+   git add anotherfile &&
+   rm anotherfile &&
+   git checkout-index --all &&
+   echo data >expected &&
+   test_cmp expected file &&
+   test_cmp expected anotherfile
+   )
+'
+
 test_done
 
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 6/8] grep: make it clear i-t-a entries are ignored

2015-08-21 Thread Nguyễn Thái Ngọc Duy
The expression "!S_ISREG(ce)" covers i-t-a entries as well because
ce->ce_mode would be zero then. I could make a comment saying that, but
it's probably better just to comment with code, in case i-t-a entry
content changes in future.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/grep.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index d04f440..f508179 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -375,7 +375,7 @@ static int grep_cache(struct grep_opt *opt, const struct 
pathspec *pathspec, int
 
for (nr = 0; nr < active_nr; nr++) {
const struct cache_entry *ce = active_cache[nr];
-   if (!S_ISREG(ce->ce_mode))
+   if (!S_ISREG(ce->ce_mode) || ce_intent_to_add(ce))
continue;
if (!ce_path_match(ce, pathspec, NULL))
continue;
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 7/8] diff.h: extend "flags" field to 64 bits because we're out of bits

2015-08-21 Thread Nguyễn Thái Ngọc Duy
I renamed both "flags" and "touched_flags" fields while making this
patch to make sure I was aware of how these flags were manipulated
(besides DIFF_OPT* macros). So hopefully I didn't miss anything.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/commit.c | 2 +-
 diff-lib.c   | 4 ++--
 diff.c   | 2 +-
 diff.h   | 8 +---
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/builtin/commit.c b/builtin/commit.c
index 4cbd5ff..95f7296 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -895,7 +895,7 @@ static int prepare_to_commit(const char *index_file, const 
char *prefix,
 * submodules which were manually staged, which would
 * be really confusing.
 */
-   int diff_flags = DIFF_OPT_OVERRIDE_SUBMODULE_CONFIG;
+   diff_flags_t diff_flags = 
DIFF_OPT_OVERRIDE_SUBMODULE_CONFIG;
if (ignore_submodule_arg &&
!strcmp(ignore_submodule_arg, "all"))
diff_flags |= DIFF_OPT_IGNORE_SUBMODULES;
diff --git a/diff-lib.c b/diff-lib.c
index 241a843..ae09034 100644
--- a/diff-lib.c
+++ b/diff-lib.c
@@ -71,7 +71,7 @@ static int match_stat_with_submodule(struct diff_options 
*diffopt,
 {
int changed = ce_match_stat(ce, st, ce_option);
if (S_ISGITLINK(ce->ce_mode)) {
-   unsigned orig_flags = diffopt->flags;
+   diff_flags_t orig_flags = diffopt->flags;
if (!DIFF_OPT_TST(diffopt, OVERRIDE_SUBMODULE_CONFIG))
set_diffopt_flags_from_submodule_config(diffopt, 
ce->name);
if (DIFF_OPT_TST(diffopt, IGNORE_SUBMODULES))
@@ -516,7 +516,7 @@ int do_diff_cache(const unsigned char *tree_sha1, struct 
diff_options *opt)
return 0;
 }
 
-int index_differs_from(const char *def, int diff_flags)
+int index_differs_from(const char *def, diff_flags_t diff_flags)
 {
struct rev_info rev;
struct setup_revision_opt opt;
diff --git a/diff.c b/diff.c
index 7deac90..2485870 100644
--- a/diff.c
+++ b/diff.c
@@ -4912,7 +4912,7 @@ int diff_can_quit_early(struct diff_options *opt)
 static int is_submodule_ignored(const char *path, struct diff_options *options)
 {
int ignored = 0;
-   unsigned orig_flags = options->flags;
+   diff_flags_t orig_flags = options->flags;
if (!DIFF_OPT_TST(options, OVERRIDE_SUBMODULE_CONFIG))
set_diffopt_flags_from_submodule_config(options, path);
if (DIFF_OPT_TST(options, IGNORE_SUBMODULES))
diff --git a/diff.h b/diff.h
index f7208ad..4241aa5 100644
--- a/diff.h
+++ b/diff.h
@@ -110,13 +110,15 @@ enum diff_words_type {
DIFF_WORDS_COLOR
 };
 
+typedef uint64_t diff_flags_t;
+
 struct diff_options {
const char *orderfile;
const char *pickaxe;
const char *single_follow;
const char *a_prefix, *b_prefix;
-   unsigned flags;
-   unsigned touched_flags;
+   diff_flags_t flags;
+   diff_flags_t touched_flags;
 
/* diff-filter bits */
unsigned int filter;
@@ -347,7 +349,7 @@ extern int diff_result_code(struct diff_options *, int);
 
 extern void diff_no_index(struct rev_info *, int, const char **, const char *);
 
-extern int index_differs_from(const char *def, int diff_flags);
+extern int index_differs_from(const char *def, diff_flags_t diff_flags);
 
 extern size_t fill_textconv(struct userdiff_driver *driver,
struct diff_filespec *df,
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 7/8] diff.h: extend "flags" field to 64 bits because we're out of bits

2015-08-21 Thread Nguyễn Thái Ngọc Duy
I renamed both "flags" and "touched_flags" fields while making this
patch to make sure I was aware of how these flags were manipulated
(besides DIFF_OPT* macros). So hopefully I didn't miss anything.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 Resend to the right recipients. I screwed send-email up..

 builtin/commit.c | 2 +-
 diff-lib.c   | 4 ++--
 diff.c   | 2 +-
 diff.h   | 8 +---
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/builtin/commit.c b/builtin/commit.c
index 4cbd5ff..95f7296 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -895,7 +895,7 @@ static int prepare_to_commit(const char *index_file, const 
char *prefix,
 * submodules which were manually staged, which would
 * be really confusing.
 */
-   int diff_flags = DIFF_OPT_OVERRIDE_SUBMODULE_CONFIG;
+   diff_flags_t diff_flags = 
DIFF_OPT_OVERRIDE_SUBMODULE_CONFIG;
if (ignore_submodule_arg &&
!strcmp(ignore_submodule_arg, "all"))
diff_flags |= DIFF_OPT_IGNORE_SUBMODULES;
diff --git a/diff-lib.c b/diff-lib.c
index 241a843..ae09034 100644
--- a/diff-lib.c
+++ b/diff-lib.c
@@ -71,7 +71,7 @@ static int match_stat_with_submodule(struct diff_options 
*diffopt,
 {
int changed = ce_match_stat(ce, st, ce_option);
if (S_ISGITLINK(ce->ce_mode)) {
-   unsigned orig_flags = diffopt->flags;
+   diff_flags_t orig_flags = diffopt->flags;
if (!DIFF_OPT_TST(diffopt, OVERRIDE_SUBMODULE_CONFIG))
set_diffopt_flags_from_submodule_config(diffopt, 
ce->name);
if (DIFF_OPT_TST(diffopt, IGNORE_SUBMODULES))
@@ -516,7 +516,7 @@ int do_diff_cache(const unsigned char *tree_sha1, struct 
diff_options *opt)
return 0;
 }
 
-int index_differs_from(const char *def, int diff_flags)
+int index_differs_from(const char *def, diff_flags_t diff_flags)
 {
struct rev_info rev;
struct setup_revision_opt opt;
diff --git a/diff.c b/diff.c
index 7deac90..2485870 100644
--- a/diff.c
+++ b/diff.c
@@ -4912,7 +4912,7 @@ int diff_can_quit_early(struct diff_options *opt)
 static int is_submodule_ignored(const char *path, struct diff_options *options)
 {
int ignored = 0;
-   unsigned orig_flags = options->flags;
+   diff_flags_t orig_flags = options->flags;
if (!DIFF_OPT_TST(options, OVERRIDE_SUBMODULE_CONFIG))
set_diffopt_flags_from_submodule_config(options, path);
if (DIFF_OPT_TST(options, IGNORE_SUBMODULES))
diff --git a/diff.h b/diff.h
index f7208ad..4241aa5 100644
--- a/diff.h
+++ b/diff.h
@@ -110,13 +110,15 @@ enum diff_words_type {
DIFF_WORDS_COLOR
 };
 
+typedef uint64_t diff_flags_t;
+
 struct diff_options {
const char *orderfile;
const char *pickaxe;
const char *single_follow;
const char *a_prefix, *b_prefix;
-   unsigned flags;
-   unsigned touched_flags;
+   diff_flags_t flags;
+   diff_flags_t touched_flags;
 
/* diff-filter bits */
unsigned int filter;
@@ -347,7 +349,7 @@ extern int diff_result_code(struct diff_options *, int);
 
 extern void diff_no_index(struct rev_info *, int, const char **, const char *);
 
-extern int index_differs_from(const char *def, int diff_flags);
+extern int index_differs_from(const char *def, diff_flags_t diff_flags);
 
 extern size_t fill_textconv(struct userdiff_driver *driver,
struct diff_filespec *df,
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 8/8] Resurrect "diff-lib.c: adjust position of i-t-a entries in diff"

2015-08-21 Thread Nguyễn Thái Ngọc Duy
The original commit d95d728aba06a34394d15466045cbdabdada58a2 was
reverted in commit 78cc1a540ba127b13f2f3fd531777b57f3a9cd46 because we
were (and still are) not ready for a new world order. A lot more
investigation must be done to see what is impacted. See the 78cc1a5 for
details.

This patch takes a smaller and safer step. The new behavior is
controlled by SHIFT_INTENT_TO_ADD flag. We can gradually move more diff
users to the new behavior after we are sure it's safe to do so. This
flag is exposed to outside as "--shift-ita".

"git status" sets this flag unconditionally to display i-t-a entries
correctly. See d95d728 for details about "git status" and i-t-a entries.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 Documentation/diff-options.txt |  6 ++
 diff-lib.c | 14 ++
 diff.c |  2 ++
 diff.h |  1 +
 t/t2203-add-intent.sh  | 20 ++--
 wt-status.c|  2 ++
 6 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index d56ca90..b69db29 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -556,5 +556,11 @@ endif::git-format-patch[]
 --no-prefix::
Do not show any source or destination prefix.
 
+--shift-ita::
+   By default entries added by "git add -N" appear as an existing
+   empty file in "git diff" and a new file in "git diff --cached".
+   This option makes the entry appear as a new file in "git diff"
+   and non-existent in "git diff --cached".
+
 For more detailed explanation on these common options, see also
 linkgit:gitdiffcore[7].
diff --git a/diff-lib.c b/diff-lib.c
index ae09034..2546f5a 100644
--- a/diff-lib.c
+++ b/diff-lib.c
@@ -212,6 +212,12 @@ int run_diff_files(struct rev_info *revs, unsigned int 
option)
   ce->sha1, 
!is_null_sha1(ce->sha1),
   ce->name, 0);
continue;
+   } else if (DIFF_OPT_TST(&revs->diffopt, 
SHIFT_INTENT_TO_ADD) &&
+  ce_intent_to_add(ce)) {
+   diff_addremove(&revs->diffopt, '+', ce->ce_mode,
+  EMPTY_BLOB_SHA1_BIN, 0,
+  ce->name, 0);
+   continue;
}
 
changed = match_stat_with_submodule(&revs->diffopt, ce, 
&st,
@@ -376,6 +382,14 @@ static void do_oneway_diff(struct unpack_trees_options *o,
struct rev_info *revs = o->unpack_data;
int match_missing, cached;
 
+   /* i-t-a entries do not actually exist in the index */
+   if (DIFF_OPT_TST(&revs->diffopt, SHIFT_INTENT_TO_ADD) &&
+   idx && ce_intent_to_add(idx)) {
+   idx = NULL;
+   if (!tree)
+   return; /* nothing to diff.. */
+   }
+
/* if the entry is not checked out, don't examine work tree */
cached = o->index_only ||
(idx && ((idx->ce_flags & CE_VALID) || ce_skip_worktree(idx)));
diff --git a/diff.c b/diff.c
index 2485870..e554cad 100644
--- a/diff.c
+++ b/diff.c
@@ -3890,6 +3890,8 @@ int diff_opt_parse(struct diff_options *options, const 
char **av, int ac)
return parse_submodule_opt(options, arg);
else if (skip_prefix(arg, "--ws-error-highlight=", &arg))
return parse_ws_error_highlight(options, arg);
+   else if (!strcmp(arg, "--shift-ita"))
+   DIFF_OPT_SET(options, SHIFT_INTENT_TO_ADD);
 
/* misc options */
else if (!strcmp(arg, "-z"))
diff --git a/diff.h b/diff.h
index 4241aa5..68a4400 100644
--- a/diff.h
+++ b/diff.h
@@ -92,6 +92,7 @@ typedef struct strbuf *(*diff_prefix_fn_t)(struct 
diff_options *opt, void *data)
 #define DIFF_OPT_FUNCCONTEXT (1 << 29)
 #define DIFF_OPT_PICKAXE_IGNORE_CASE (1 << 30)
 #define DIFF_OPT_DEFAULT_FOLLOW_RENAMES (1 << 31)
+#define DIFF_OPT_SHIFT_INTENT_TO_ADD (1ULL << 32)
 
 #define DIFF_OPT_TST(opts, flag)((opts)->flags & DIFF_OPT_##flag)
 #define DIFF_OPT_TOUCHED(opts, flag)((opts)->touched_flags & 
DIFF_OPT_##flag)
diff --git a/t/t2203-add-intent.sh b/t/t2203-add-intent.sh
index d0f36a4..7c90eee 100755
--- a/t/t2203-add-intent.sh
+++ b/t/t2203-add-intent.sh
@@ -5,10 +5,24 @@ test_description='Intent to add'
 . ./test-lib.sh
 
 test_expect_success 'intent to add' '
+   test_commit 1 &&
+   git rm 1.t &&
+   echo hello >1.t &&
echo hello >f

[PATCH v3] gc: save log from daemonized gc --auto and print it next time

2015-08-21 Thread Nguyễn Thái Ngọc Duy
While commit 9f673f9 (gc: config option for running --auto in
background - 2014-02-08) helps reduce some complaints about 'gc
--auto' hogging the terminal, it creates another set of problems.

The latest in this set is, as the result of daemonizing, stderr is
closed and all warnings are lost. This warning at the end of cmd_gc()
is particularly important because it tells the user how to avoid "gc
--auto" running repeatedly. Because stderr is closed, the user does
not know, naturally they complain about 'gc --auto' wasting CPU.

Daemonized gc now saves stderr to $GIT_DIR/gc.log. Following gc runs
will not be daemonized and gc.log printed out until the user removes
gc.log.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 Let's try again. Compared to v2 [1], this version does not delete
 gc.log automatically any more. The user needs to take action, then
 delete gc.log to bring it background again.

 [1] http://thread.gmane.org/gmane.comp.version-control.git/266182/focus=266320

 builtin/gc.c | 36 +++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/builtin/gc.c b/builtin/gc.c
index bcc75d9..00a83e1 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -43,9 +43,20 @@ static struct argv_array prune_worktrees = ARGV_ARRAY_INIT;
 static struct argv_array rerere = ARGV_ARRAY_INIT;
 
 static char *pidfile;
+static struct strbuf log_filename = STRBUF_INIT;
+static int daemonized;
 
 static void remove_pidfile(void)
 {
+   if (daemonized && log_filename.len) {
+   struct stat st;
+
+   close(2);
+   if (stat(log_filename.buf, &st) ||
+   !st.st_size ||
+   rename(log_filename.buf, git_path("gc.log")))
+   unlink(log_filename.buf);
+   }
if (pidfile)
unlink(pidfile);
 }
@@ -330,13 +341,24 @@ int cmd_gc(int argc, const char **argv, const char 
*prefix)
fprintf(stderr, _("See \"git help gc\" for manual 
housekeeping.\n"));
}
if (detach_auto) {
+   struct strbuf sb = STRBUF_INIT;
+   if (strbuf_read_file(&sb, git_path("gc.log"), 0) > 0) {
+   warning(_("last gc run reported:\n"
+ "%s\n"
+ "running in foreground until %s is 
removed"),
+   sb.buf, git_path("gc.log"));
+   detach_auto = 0;
+   }
+   strbuf_release(&sb);
+   }
+   if (detach_auto) {
if (gc_before_repack())
return -1;
/*
 * failure to daemonize is ok, we'll continue
 * in foreground
 */
-   daemonize();
+   daemonized = !daemonize();
}
} else
add_repack_all_option();
@@ -349,6 +371,18 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
name, (uintmax_t)pid);
}
 
+   if (daemonized) {
+   int fd;
+
+   strbuf_addstr(&log_filename, git_path("gc.log_XX"));
+   fd = xmkstemp(log_filename.buf);
+   if (fd >= 0) {
+   dup2(fd, 2);
+   close(fd);
+   } else
+   strbuf_release(&log_filename);
+   }
+
if (gc_before_repack())
return -1;
 
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/2] gitignore, re-inclusion fix

2015-08-23 Thread Nguyễn Thái Ngọc Duy
This is an old problem. I attempted once [1] and then was reminded [2]
with some more comments on the original patch. Let's try again.

The problem is this .gitignore currently does not work, but it should:

/abc
!/abc/def/ghi

This patch fixes that by realizing that the last rule may re-include
something in abc/def so it does not exclude "abc" and "abc/def"
outright to give the last rule a chance to match.

[1] http://article.gmane.org/gmane.comp.version-control.git/259870
[2] http://thread.gmane.org/gmane.comp.version-control.git/265901/focus=266227

Nguyễn Thái Ngọc Duy (2):
  dir.c: make last_exclude_matching_from_list() run til the end
  dir.c: don't exclude whole dir prematurely if neg pattern may match

 Documentation/gitignore.txt| 21 ++---
 dir.c  | 89 +++---
 t/t3001-ls-files-others-exclude.sh | 20 +
 3 files changed, 118 insertions(+), 12 deletions(-)

-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] dir.c: make last_exclude_matching_from_list() run til the end

2015-08-23 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 dir.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/dir.c b/dir.c
index c00c7e2..3a7630a 100644
--- a/dir.c
+++ b/dir.c
@@ -901,6 +901,7 @@ static struct exclude 
*last_exclude_matching_from_list(const char *pathname,
   int *dtype,
   struct exclude_list *el)
 {
+   struct exclude *exc = NULL; /* undecided */
int i;
 
if (!el->nr)
@@ -922,18 +923,22 @@ static struct exclude 
*last_exclude_matching_from_list(const char *pathname,
if (match_basename(basename,
   pathlen - (basename - pathname),
   exclude, prefix, x->patternlen,
-  x->flags))
-   return x;
+  x->flags)) {
+   exc = x;
+   break;
+   }
continue;
}
 
assert(x->baselen == 0 || x->base[x->baselen - 1] == '/');
if (match_pathname(pathname, pathlen,
   x->base, x->baselen ? x->baselen - 1 : 0,
-  exclude, prefix, x->patternlen, x->flags))
-   return x;
+  exclude, prefix, x->patternlen, x->flags)) {
+   exc = x;
+   break;
+   }
}
-   return NULL; /* undecided */
+   return exc;
 }
 
 /*
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] dir.c: don't exclude whole dir prematurely if neg pattern may match

2015-08-23 Thread Nguyễn Thái Ngọc Duy
If there is a pattern "!foo/bar", this patch makes it not exclude "foo"
right away. This gives us a chance to examine "foo" and re-include
"foo/bar".

In order for it to detect that the directory under examination should
not be excluded right away, in other words it is a parent directory of a
negative pattern, the "directory path" of the negative pattern must be
literal. Patterns like "!f?o/bar" can't stop "foo" from being excluded.

Basename matching (i.e. "no slashes in the pattern") or must-be-dir
matching (i.e. "trailing slash in the pattern") does not work well with
this. For example, if we descend in "foo" and are examining "foo/abc",
current code for "foo/" pattern will check if path "foo/abc", not "foo",
is a directory. The same problem with basename matching. These may need
big code reorg to make it work.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 Documentation/gitignore.txt| 21 ---
 dir.c  | 76 +-
 t/t3001-ls-files-others-exclude.sh | 20 ++
 3 files changed, 109 insertions(+), 8 deletions(-)

diff --git a/Documentation/gitignore.txt b/Documentation/gitignore.txt
index 473623d..889a72a 100644
--- a/Documentation/gitignore.txt
+++ b/Documentation/gitignore.txt
@@ -82,12 +82,9 @@ PATTERN FORMAT
 
  - An optional prefix "`!`" which negates the pattern; any
matching file excluded by a previous pattern will become
-   included again. It is not possible to re-include a file if a parent
-   directory of that file is excluded. Git doesn't list excluded
-   directories for performance reasons, so any patterns on contained
-   files have no effect, no matter where they are defined.
-   Put a backslash ("`\`") in front of the first "`!`" for patterns
-   that begin with a literal "`!`", for example, "`\!important!.txt`".
+   included again. It is possible to re-include a file if a parent
+   directory of that file is excluded, with restrictions. See section
+   NOTES for detail.
 
  - If the pattern ends with a slash, it is removed for the
purpose of the following description, but it would only find
@@ -141,6 +138,18 @@ not tracked by Git remain untracked.
 To stop tracking a file that is currently tracked, use
 'git rm --cached'.
 
+To re-include a file when its parent directory is excluded, the
+following conditions must match:
+
+ - The directory part in the re-include rules must be literal (i.e. no
+   wildcards)
+
+ - The rules to exclude the parent directory must not end with a
+   trailing slash.
+
+ - The rules to exclude the parent directory must have at least one
+   slash.
+
 EXAMPLES
 
 
diff --git a/dir.c b/dir.c
index 3a7630a..a1f711c 100644
--- a/dir.c
+++ b/dir.c
@@ -882,6 +882,25 @@ int match_pathname(const char *pathname, int pathlen,
 */
if (!patternlen && !namelen)
return 1;
+   /*
+* This can happen when we ignore some exclude rules
+* on directories in other to see if negative rules
+* may match. E.g.
+*
+* /abc
+* !/abc/def/ghi
+*
+* The pattern of interest is "/abc". On the first
+* try, we should match path "abc" with this pattern
+* in the "if" statement right above, but the caller
+* ignores it.
+*
+* On the second try with paths within "abc",
+* e.g. "abc/xyz", we come here and try to match it
+* with "/abc".
+*/
+   if (!patternlen && namelen && *name == '/')
+   return 1;
}
 
return fnmatch_icase_mem(pattern, patternlen,
@@ -890,6 +909,48 @@ int match_pathname(const char *pathname, int pathlen,
 }
 
 /*
+ * Return non-zero if pathname is a directory and an ancestor of the
+ * literal path in a (negative) pattern. This is used to keep
+ * descending in "foo" and "foo/bar" when the pattern is
+ * "!foo/bar/.gitignore". "foo/notbar" will not be descended however.
+ */
+static int match_neg_path(const char *pathname, int pathlen, int *dtype,
+ const char *base, int baselen,
+ const char *pattern, int prefix, int patternlen,
+ int flags)
+{
+   assert((flags & EXC_FLAG_NEGATIVE) && !(flags & EXC_FLAG_NODIR));
+
+   if (*dtype == DT_UNKNOWN)
+   *dtype = get_dtype(NULL, pathname, pathlen);
+   if (*dtype != DT_DIR)
+   return 0;
+
+   if (*pattern

[PATCH] setup: update the right file in multiple checkouts

2015-08-25 Thread Nguyễn Thái Ngọc Duy
This code is introduced in 23af91d (prune: strategies for linked
checkouts - 2014-11-30), and it's supposed to implement this rule from
that commit's message:

 - linked checkouts are supposed to keep its location in $R/gitdir up
   to date. The use case is auto fixup after a manual checkout move.

Note the name, "$R/gitdir", not "$R/gitfile". Correct the path to be
updated accordingly.

While at there, make sure I/O errors are not silently dropped.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 The code was right in v2 [1] and became "gitfile" since v3 [2]. I
 need to reconsider my code quality after this :(

 [1] http://article.gmane.org/gmane.comp.version-control.git/239299
 [2] http://article.gmane.org/gmane.comp.version-control.git/242325

 setup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.c b/setup.c
index 5f9f07d..64bf2b4 100644
--- a/setup.c
+++ b/setup.c
@@ -402,9 +402,9 @@ static void update_linked_gitdir(const char *gitfile, const 
char *gitdir)
struct strbuf path = STRBUF_INIT;
struct stat st;
 
-   strbuf_addf(&path, "%s/gitfile", gitdir);
+   strbuf_addf(&path, "%s/gitdir", gitdir);
if (stat(path.buf, &st) || st.st_mtime + 24 * 3600 < time(NULL))
-   write_file(path.buf, 0, "%s\n", gitfile);
+   write_file(path.buf, 1, "%s\n", gitfile);
strbuf_release(&path);
 }
 
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 5/5] clone: better error when --reference is a linked checkout

2015-09-12 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/clone.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/builtin/clone.c b/builtin/clone.c
index 39d4adf..3e14491 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -294,9 +294,14 @@ static int add_one_reference(struct string_list_item 
*item, void *cb_data)
char *ref_git_git = mkpathdup("%s/.git", ref_git);
free(ref_git);
ref_git = ref_git_git;
-   } else if (!is_directory(mkpath("%s/objects", ref_git)))
+   } else if (!is_directory(mkpath("%s/objects", ref_git))) {
+   struct strbuf sb = STRBUF_INIT;
+   if (get_common_dir(&sb, ref_git))
+   die(_("reference repository '%s' as a linked checkout 
is not supported yet."),
+   item->string);
die(_("reference repository '%s' is not a local repository."),
item->string);
+   }
 
if (!access(mkpath("%s/shallow", ref_git), F_OK))
die(_("reference repository '%s' is shallow"), item->string);
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 3/5] enter_repo: allow .git files in strict mode

2015-09-12 Thread Nguyễn Thái Ngọc Duy
Strict mode is about not guessing where .git is. If the user points to a
.git file, we know exactly where the target .git dir will be.

This is needed even in local clone case because transport.c code uses
upload-pack for fetching remote refs.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 path.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/path.c b/path.c
index 7340e11..32d4ca6 100644
--- a/path.c
+++ b/path.c
@@ -438,8 +438,13 @@ const char *enter_repo(const char *path, int strict)
return NULL;
path = validated_path;
}
-   else if (chdir(path))
-   return NULL;
+   else {
+   const char *gitfile = read_gitfile(used_path);
+   if (gitfile)
+   path = gitfile;
+   if (chdir(path))
+   return NULL;
+   }
 
if (is_git_directory(".")) {
set_git_dir(".");
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 4/5] clone: allow --local from a linked checkout

2015-09-12 Thread Nguyễn Thái Ngọc Duy
Noticed-by: Bjørnar Snoksrud 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/clone.c | 6 --
 t/t2025-worktree-add.sh | 5 +
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/builtin/clone.c b/builtin/clone.c
index 578da85..39d4adf 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -424,8 +424,10 @@ static void clone_local(const char *src_repo, const char 
*dest_repo)
} else {
struct strbuf src = STRBUF_INIT;
struct strbuf dest = STRBUF_INIT;
-   strbuf_addf(&src, "%s/objects", src_repo);
-   strbuf_addf(&dest, "%s/objects", dest_repo);
+   get_common_dir(&src, src_repo);
+   get_common_dir(&dest, dest_repo);
+   strbuf_addstr(&src, "/objects");
+   strbuf_addstr(&dest, "/objects");
copy_or_link_directory(&src, &dest, src_repo, src.len);
strbuf_release(&src);
strbuf_release(&dest);
diff --git a/t/t2025-worktree-add.sh b/t/t2025-worktree-add.sh
index 8267411..3694174 100755
--- a/t/t2025-worktree-add.sh
+++ b/t/t2025-worktree-add.sh
@@ -193,4 +193,9 @@ test_expect_success '"add" -B/--detach mutually exclusive' '
test_must_fail git worktree add -B poodle --detach bamboo master
 '
 
+test_expect_success 'local clone from linked checkout' '
+   git clone --local here here-clone &&
+   ( cd here-clone && git fsck )
+'
+
 test_done
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 1/5] path.c: delete an extra space

2015-09-12 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 path.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/path.c b/path.c
index 95acbaf..a536ee3 100644
--- a/path.c
+++ b/path.c
@@ -431,7 +431,7 @@ const char *enter_repo(const char *path, int strict)
}
if (!suffix[i])
return NULL;
-   gitfile = read_gitfile(used_path) ;
+   gitfile = read_gitfile(used_path);
if (gitfile)
strcpy(used_path, gitfile);
if (chdir(used_path))
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 2/5] enter_repo: avoid duplicating logic, use is_git_directory() instead

2015-09-12 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 path.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/path.c b/path.c
index a536ee3..7340e11 100644
--- a/path.c
+++ b/path.c
@@ -441,8 +441,7 @@ const char *enter_repo(const char *path, int strict)
else if (chdir(path))
return NULL;
 
-   if (access("objects", X_OK) == 0 && access("refs", X_OK) == 0 &&
-   validate_headref("HEAD") == 0) {
+   if (is_git_directory(".")) {
set_git_dir(".");
check_repository_format();
return path;
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 0/5] fix local clone from a linked checkout

2015-09-12 Thread Nguyễn Thái Ngọc Duy
Fixes strbuf_addf in 4/5. Makes a note about the relation of
enter_repo to this local clone in 3/5. Changes since v1

-- 8< --
diff --git a/builtin/clone.c b/builtin/clone.c
index 7a010bb..3e14491 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -432,7 +432,7 @@ static void clone_local(const char *src_repo, const char 
*dest_repo)
get_common_dir(&src, src_repo);
get_common_dir(&dest, dest_repo);
strbuf_addstr(&src, "/objects");
-   strbuf_addf(&dest, "/objects");
+   strbuf_addstr(&dest, "/objects");
copy_or_link_directory(&src, &dest, src_repo, src.len);
strbuf_release(&src);
    strbuf_release(&dest);
-- 8< --

Nguyễn Thái Ngọc Duy (5):
  path.c: delete an extra space
  enter_repo: avoid duplicating logic, use is_git_directory() instead
  enter_repo: allow .git files in strict mode
  clone: allow --local from a linked checkout
  clone: better error when --reference is a linked checkout

 builtin/clone.c | 13 ++---
 path.c  | 14 +-
 t/t2025-worktree-add.sh |  5 +
 3 files changed, 24 insertions(+), 8 deletions(-)

-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 0/2] gitignore, re-inclusion fix

2015-09-12 Thread Nguyễn Thái Ngọc Duy
No code change. Explain why 1/2 is needed in the commit message.

Nguyễn Thái Ngọc Duy (2):
  dir.c: make last_exclude_matching_from_list() run til the end
  dir.c: don't exclude whole dir prematurely if neg pattern may match

 Documentation/gitignore.txt| 21 ++---
 dir.c  | 89 +++---
 t/t3001-ls-files-others-exclude.sh | 20 +
 3 files changed, 118 insertions(+), 12 deletions(-)

-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 1/2] dir.c: make last_exclude_matching_from_list() run til the end

2015-09-12 Thread Nguyễn Thái Ngọc Duy
The next patch adds some post processing to the result value before it's
returned to the caller. Make all branches reach the end of the function,
so we can do it all in one place.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 dir.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/dir.c b/dir.c
index c00c7e2..3a7630a 100644
--- a/dir.c
+++ b/dir.c
@@ -901,6 +901,7 @@ static struct exclude 
*last_exclude_matching_from_list(const char *pathname,
   int *dtype,
   struct exclude_list *el)
 {
+   struct exclude *exc = NULL; /* undecided */
int i;
 
if (!el->nr)
@@ -922,18 +923,22 @@ static struct exclude 
*last_exclude_matching_from_list(const char *pathname,
if (match_basename(basename,
   pathlen - (basename - pathname),
   exclude, prefix, x->patternlen,
-  x->flags))
-   return x;
+  x->flags)) {
+   exc = x;
+   break;
+   }
continue;
}
 
assert(x->baselen == 0 || x->base[x->baselen - 1] == '/');
if (match_pathname(pathname, pathlen,
   x->base, x->baselen ? x->baselen - 1 : 0,
-  exclude, prefix, x->patternlen, x->flags))
-   return x;
+  exclude, prefix, x->patternlen, x->flags)) {
+   exc = x;
+   break;
+   }
}
-   return NULL; /* undecided */
+   return exc;
 }
 
 /*
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 2/2] dir.c: don't exclude whole dir prematurely if neg pattern may match

2015-09-12 Thread Nguyễn Thái Ngọc Duy
If there is a pattern "!foo/bar", this patch makes it not exclude "foo"
right away. This gives us a chance to examine "foo" and re-include
"foo/bar".

In order for it to detect that the directory under examination should
not be excluded right away, in other words it is a parent directory of a
negative pattern, the "directory path" of the negative pattern must be
literal. Patterns like "!f?o/bar" can't stop "foo" from being excluded.

Basename matching (i.e. "no slashes in the pattern") or must-be-dir
matching (i.e. "trailing slash in the pattern") does not work well with
this. For example, if we descend in "foo" and are examining "foo/abc",
current code for "foo/" pattern will check if path "foo/abc", not "foo",
is a directory. The same problem with basename matching. These may need
big code reorg to make it work.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 Documentation/gitignore.txt| 21 ---
 dir.c  | 76 +-
 t/t3001-ls-files-others-exclude.sh | 20 ++
 3 files changed, 109 insertions(+), 8 deletions(-)

diff --git a/Documentation/gitignore.txt b/Documentation/gitignore.txt
index 473623d..889a72a 100644
--- a/Documentation/gitignore.txt
+++ b/Documentation/gitignore.txt
@@ -82,12 +82,9 @@ PATTERN FORMAT
 
  - An optional prefix "`!`" which negates the pattern; any
matching file excluded by a previous pattern will become
-   included again. It is not possible to re-include a file if a parent
-   directory of that file is excluded. Git doesn't list excluded
-   directories for performance reasons, so any patterns on contained
-   files have no effect, no matter where they are defined.
-   Put a backslash ("`\`") in front of the first "`!`" for patterns
-   that begin with a literal "`!`", for example, "`\!important!.txt`".
+   included again. It is possible to re-include a file if a parent
+   directory of that file is excluded, with restrictions. See section
+   NOTES for detail.
 
  - If the pattern ends with a slash, it is removed for the
purpose of the following description, but it would only find
@@ -141,6 +138,18 @@ not tracked by Git remain untracked.
 To stop tracking a file that is currently tracked, use
 'git rm --cached'.
 
+To re-include a file when its parent directory is excluded, the
+following conditions must be met:
+
+ - The directory part in the re-include rules must be literal (i.e. no
+   wildcards)
+
+ - The rules to exclude the parent directory must not end with a
+   trailing slash.
+
+ - The rules to exclude the parent directory must have at least one
+   slash.
+
 EXAMPLES
 
 
diff --git a/dir.c b/dir.c
index 3a7630a..a1f711c 100644
--- a/dir.c
+++ b/dir.c
@@ -882,6 +882,25 @@ int match_pathname(const char *pathname, int pathlen,
 */
if (!patternlen && !namelen)
return 1;
+   /*
+* This can happen when we ignore some exclude rules
+* on directories in other to see if negative rules
+* may match. E.g.
+*
+* /abc
+* !/abc/def/ghi
+*
+* The pattern of interest is "/abc". On the first
+* try, we should match path "abc" with this pattern
+* in the "if" statement right above, but the caller
+* ignores it.
+*
+* On the second try with paths within "abc",
+* e.g. "abc/xyz", we come here and try to match it
+* with "/abc".
+*/
+   if (!patternlen && namelen && *name == '/')
+   return 1;
}
 
return fnmatch_icase_mem(pattern, patternlen,
@@ -890,6 +909,48 @@ int match_pathname(const char *pathname, int pathlen,
 }
 
 /*
+ * Return non-zero if pathname is a directory and an ancestor of the
+ * literal path in a (negative) pattern. This is used to keep
+ * descending in "foo" and "foo/bar" when the pattern is
+ * "!foo/bar/.gitignore". "foo/notbar" will not be descended however.
+ */
+static int match_neg_path(const char *pathname, int pathlen, int *dtype,
+ const char *base, int baselen,
+ const char *pattern, int prefix, int patternlen,
+ int flags)
+{
+   assert((flags & EXC_FLAG_NEGATIVE) && !(flags & EXC_FLAG_NODIR));
+
+   if (*dtype == DT_UNKNOWN)
+   *dtype = get_dtype(NULL, pathname, pathlen);
+   if (*dtype != DT_DIR)
+   return 0;
+
+   if (*pattern

[PATCH v4] gc: save log from daemonized gc --auto and print it next time

2015-09-12 Thread Nguyễn Thái Ngọc Duy
While commit 9f673f9 (gc: config option for running --auto in
background - 2014-02-08) helps reduce some complaints about 'gc
--auto' hogging the terminal, it creates another set of problems.

The latest in this set is, as the result of daemonizing, stderr is
closed and all warnings are lost. This warning at the end of cmd_gc()
is particularly important because it tells the user how to avoid "gc
--auto" running repeatedly. Because stderr is closed, the user does
not know, naturally they complain about 'gc --auto' wasting CPU.

Daemonized gc now saves stderr to $GIT_DIR/gc.log. Following gc --auto
will not run and gc.log printed out until the user removes gc.log.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 When gc.log exists, gc --auto now simply exits

 builtin/gc.c | 33 -
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/builtin/gc.c b/builtin/gc.c
index bcc75d9..2c3aaeb 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -43,9 +43,20 @@ static struct argv_array prune_worktrees = ARGV_ARRAY_INIT;
 static struct argv_array rerere = ARGV_ARRAY_INIT;
 
 static char *pidfile;
+static struct strbuf log_filename = STRBUF_INIT;
+static int daemonized;
 
 static void remove_pidfile(void)
 {
+   if (daemonized && log_filename.len) {
+   struct stat st;
+
+   close(2);
+   if (stat(log_filename.buf, &st) ||
+   !st.st_size ||
+   rename(log_filename.buf, git_path("gc.log")))
+   unlink(log_filename.buf);
+   }
if (pidfile)
unlink(pidfile);
 }
@@ -330,13 +341,21 @@ int cmd_gc(int argc, const char **argv, const char 
*prefix)
fprintf(stderr, _("See \"git help gc\" for manual 
housekeeping.\n"));
}
if (detach_auto) {
+   struct strbuf sb = STRBUF_INIT;
+   if (strbuf_read_file(&sb, git_path("gc.log"), 0) > 0)
+   return error(_("last gc run reported:\n"
+  "%s\n"
+  "not running until %s is 
removed"),
+sb.buf, git_path("gc.log"));
+   strbuf_release(&sb);
+
if (gc_before_repack())
return -1;
/*
 * failure to daemonize is ok, we'll continue
 * in foreground
 */
-   daemonize();
+   daemonized = !daemonize();
}
} else
add_repack_all_option();
@@ -349,6 +368,18 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
name, (uintmax_t)pid);
}
 
+   if (daemonized) {
+   int fd;
+
+   strbuf_addstr(&log_filename, git_path("gc.log_XX"));
+   fd = xmkstemp(log_filename.buf);
+   if (fd >= 0) {
+   dup2(fd, 2);
+   close(fd);
+   } else
+   strbuf_release(&log_filename);
+   }
+
if (gc_before_repack())
return -1;
 
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5] gc: save log from daemonized gc --auto and print it next time

2015-09-18 Thread Nguyễn Thái Ngọc Duy
While commit 9f673f9 (gc: config option for running --auto in
background - 2014-02-08) helps reduce some complaints about 'gc
--auto' hogging the terminal, it creates another set of problems.

The latest in this set is, as the result of daemonizing, stderr is
closed and all warnings are lost. This warning at the end of cmd_gc()
is particularly important because it tells the user how to avoid "gc
--auto" running repeatedly. Because stderr is closed, the user does
not know, naturally they complain about 'gc --auto' wasting CPU.

Daemonized gc now saves stderr to $GIT_DIR/gc.log. Following gc --auto
will not run and gc.log printed out until the user removes gc.log.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 This is the lock-based version. I'm sending an alternative that does
 not need atexit() with comparison between the two.

 builtin/gc.c | 56 +++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/builtin/gc.c b/builtin/gc.c
index bcc75d9..4e738fa 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -43,6 +43,7 @@ static struct argv_array prune_worktrees = ARGV_ARRAY_INIT;
 static struct argv_array rerere = ARGV_ARRAY_INIT;
 
 static char *pidfile;
+static struct lock_file log_lock;
 
 static void remove_pidfile(void)
 {
@@ -57,6 +58,28 @@ static void remove_pidfile_on_signal(int signo)
raise(signo);
 }
 
+static void process_log_file(void)
+{
+   struct stat st;
+   if (!fstat(log_lock.fd, &st) && st.st_size)
+   commit_lock_file(&log_lock);
+   else
+   rollback_lock_file(&log_lock);
+}
+
+static void process_log_file_at_exit(void)
+{
+   fflush(stderr);
+   process_log_file();
+}
+
+static void process_log_file_on_signal(int signo)
+{
+   process_log_file();
+   sigchain_pop(signo);
+   raise(signo);
+}
+
 static void git_config_date_string(const char *key, const char **output)
 {
if (git_config_get_string_const(key, output))
@@ -253,6 +276,24 @@ static const char *lock_repo_for_gc(int force, pid_t* 
ret_pid)
return NULL;
 }
 
+static int report_last_gc_error(void)
+{
+   struct strbuf sb = STRBUF_INIT;
+   int ret;
+
+   ret = strbuf_read_file(&sb, git_path("gc.log"), 0);
+   if (ret > 0)
+   return error(_("The last gc run reported the following. "
+  "Please correct the root cause\n"
+  "and remove %s.\n"
+  "Automatic cleanup will not be performed "
+  "until the file is removed.\n\n"
+  "%s"),
+git_path("gc.log"), sb.buf);
+   strbuf_release(&sb);
+   return 0;
+}
+
 static int gc_before_repack(void)
 {
if (pack_refs && run_command_v_opt(pack_refs_cmd.argv, RUN_GIT_CMD))
@@ -274,6 +315,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
int force = 0;
const char *name;
pid_t pid;
+   int daemonized = 0;
 
struct option builtin_gc_options[] = {
OPT__QUIET(&quiet, N_("suppress progress reporting")),
@@ -330,13 +372,16 @@ int cmd_gc(int argc, const char **argv, const char 
*prefix)
fprintf(stderr, _("See \"git help gc\" for manual 
housekeeping.\n"));
}
if (detach_auto) {
+   if (report_last_gc_error())
+   return -1;
+
if (gc_before_repack())
return -1;
/*
 * failure to daemonize is ok, we'll continue
 * in foreground
 */
-   daemonize();
+   daemonized = !daemonize();
}
} else
add_repack_all_option();
@@ -349,6 +394,15 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
name, (uintmax_t)pid);
}
 
+   if (daemonized) {
+   hold_lock_file_for_update(&log_lock,
+ git_path("gc.log"),
+ LOCK_DIE_ON_ERROR);
+   dup2(log_lock.fd, 2);
+   sigchain_push_common(process_log_file_on_signal);
+   atexit(process_log_file_at_exit);
+   }
+
if (gc_before_repack())
return -1;
 
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Alt. PATCH v5] gc: save log from daemonized gc --auto and print it next time

2015-09-18 Thread Nguyễn Thái Ngọc Duy
While commit 9f673f9 (gc: config option for running --auto in
background - 2014-02-08) helps reduce some complaints about 'gc
--auto' hogging the terminal, it creates another set of problems.

The latest in this set is, as the result of daemonizing, stderr is
closed and all warnings are lost. This warning at the end of cmd_gc()
is particularly important because it tells the user how to avoid "gc
--auto" running repeatedly. Because stderr is closed, the user does
not know, naturally they complain about 'gc --auto' wasting CPU.

Daemonized gc now saves stderr to $GIT_DIR/gc.log. Following gc --auto
will not run and gc.log printed out until the user removes gc.log.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 The lock-based version has an advantage that the following gc runs
 will never see partial gc.log. But it requires some more hook at
 atexit() and maybe signal handler.
 
 This version avoids that, and gc.log can be kept even if gc is
 SIGKILL'd (unlikely because gc itself does not do anything that can
 upset the kernel), but then it's racy.

 I think I perfer the lock-based version.

 builtin/gc.c | 66 +++-
 1 file changed, 65 insertions(+), 1 deletion(-)

diff --git a/builtin/gc.c b/builtin/gc.c
index bcc75d9..3d42ef7 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -43,6 +43,8 @@ static struct argv_array prune_worktrees = ARGV_ARRAY_INIT;
 static struct argv_array rerere = ARGV_ARRAY_INIT;
 
 static char *pidfile;
+static struct strbuf log_filename = STRBUF_INIT;
+static int daemonized;
 
 static void remove_pidfile(void)
 {
@@ -253,6 +255,41 @@ static const char *lock_repo_for_gc(int force, pid_t* 
ret_pid)
return NULL;
 }
 
+static int report_last_gc_error(void)
+{
+   struct strbuf sb = STRBUF_INIT;
+   int ret;
+
+   if (!access(git_path("gc.pid"), R_OK))
+   /*
+* Either background gc is still running, or we have a stale
+* gc.pid.
+*
+* If the former, it's not the right time to look at gc.log
+* yet. We'll exit shortly after lock_repo_for_gc().
+*
+* If the latter, we'll run gc --auto one more time. But
+* because gc.log is appended, the old log (if exists) won't
+* be lost. One extra gc run is not that big a deal to avoid.
+*/
+   return 0;
+
+   ret = strbuf_read_file(&sb, git_path("gc.log"), 0);
+   if (ret > 0)
+   return error(_("The last gc run reported the following. "
+  "Please correct the root cause\n"
+  "and remove %s. Automatic cleanup will not "
+  "be performed\n"
+  "until the file is removed.\n\n"
+  "%s"),
+git_path("gc.log"), sb.buf);
+   else if (!ret)
+   /* racy, but in the worst case we waste one more gc run */
+   unlink(git_path("gc.log"));
+   strbuf_release(&sb);
+   return 0;
+}
+
 static int gc_before_repack(void)
 {
if (pack_refs && run_command_v_opt(pack_refs_cmd.argv, RUN_GIT_CMD))
@@ -330,13 +367,16 @@ int cmd_gc(int argc, const char **argv, const char 
*prefix)
fprintf(stderr, _("See \"git help gc\" for manual 
housekeeping.\n"));
}
if (detach_auto) {
+   if (report_last_gc_error())
+   return -1;
+
if (gc_before_repack())
return -1;
/*
 * failure to daemonize is ok, we'll continue
 * in foreground
 */
-   daemonize();
+   daemonized = !daemonize();
}
} else
add_repack_all_option();
@@ -349,6 +389,18 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
name, (uintmax_t)pid);
}
 
+   if (daemonized) {
+   int fd;
+
+   strbuf_git_path(&log_filename, "gc.log");
+   fd = open(log_filename.buf, O_WRONLY | O_CREAT | O_APPEND, 
0644);
+   if (fd >= 0) {
+   dup2(fd, 2);
+   close(fd);
+   } else
+   strbuf_release(&log_filename);
+   }
+
if (gc_before_repack())
return -1;
 
@@ -376,6 +428,17 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
warning(_("There are too many unreachable loose objects; "
   

[PATCH/RFC] Add test_repo_expect_success for running tests in a new repository

2015-09-19 Thread Nguyễn Thái Ngọc Duy
This could be convenient when tests are independent from the rest in the
same file. Normally we would do this

test_expect_success '...' '
git init foo &&
(
cd foo &&

[PATCH v3 1/2] dir.c: make last_exclude_matching_from_list() run til the end

2015-09-21 Thread Nguyễn Thái Ngọc Duy
The next patch adds some post processing to the result value before it's
returned to the caller. Keep all branches reach the end of the function,
so we can do it all in one place.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 dir.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/dir.c b/dir.c
index c00c7e2..3a7630a 100644
--- a/dir.c
+++ b/dir.c
@@ -901,6 +901,7 @@ static struct exclude 
*last_exclude_matching_from_list(const char *pathname,
   int *dtype,
   struct exclude_list *el)
 {
+   struct exclude *exc = NULL; /* undecided */
int i;
 
if (!el->nr)
@@ -922,18 +923,22 @@ static struct exclude 
*last_exclude_matching_from_list(const char *pathname,
if (match_basename(basename,
   pathlen - (basename - pathname),
   exclude, prefix, x->patternlen,
-  x->flags))
-   return x;
+  x->flags)) {
+   exc = x;
+   break;
+   }
continue;
}
 
assert(x->baselen == 0 || x->base[x->baselen - 1] == '/');
if (match_pathname(pathname, pathlen,
   x->base, x->baselen ? x->baselen - 1 : 0,
-  exclude, prefix, x->patternlen, x->flags))
-   return x;
+  exclude, prefix, x->patternlen, x->flags)) {
+   exc = x;
+   break;
+   }
}
-   return NULL; /* undecided */
+   return exc;
 }
 
 /*
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 0/2] gitignore, re-inclusion fix

2015-09-21 Thread Nguyễn Thái Ngọc Duy
Changes since v2

-- 8< --
diff --git a/Documentation/gitignore.txt b/Documentation/gitignore.txt
index 889a72a..79a1948 100644
--- a/Documentation/gitignore.txt
+++ b/Documentation/gitignore.txt
@@ -82,9 +82,12 @@ PATTERN FORMAT
 
  - An optional prefix "`!`" which negates the pattern; any
matching file excluded by a previous pattern will become
-   included again. It is possible to re-include a file if a parent
-   directory of that file is excluded, with restrictions. See section
-   NOTES for detail.
+   included again.
+   Put a backslash ("`\`") in front of the first "`!`" for patterns
+   that begin with a literal "`!`", for example, "`\!important!.txt`".
+   It is possible to re-include a file if a parent directory of that
+   file is excluded if certain conditions are met. See section NOTES
+   for detail.
 
  - If the pattern ends with a slash, it is removed for the
purpose of the following description, but it would only find
@@ -138,8 +141,11 @@ not tracked by Git remain untracked.
 To stop tracking a file that is currently tracked, use
 'git rm --cached'.
 
-To re-include a file when its parent directory is excluded, the
-following conditions must be met:
+To re-include files or directories when their parent directory is
+excluded, the following conditions must be met:
+
+ - The rules to exclude a directory and re-include a subset back must
+   be in the same .gitignore file.
 
  - The directory part in the re-include rules must be literal (i.e. no
wildcards)
diff --git a/t/t3001-ls-files-others-exclude.sh 
b/t/t3001-ls-files-others-exclude.sh
index 9de49a6..da257c0 100755
--- a/t/t3001-ls-files-others-exclude.sh
+++ b/t/t3001-ls-files-others-exclude.sh
@@ -310,9 +310,14 @@ test_expect_success 'negative patterns' '
(
cd reinclude &&
cat >.gitignore <<-\EOF &&
+   /fooo
/foo
!foo/bar/bar
EOF
+   mkdir fooo &&
+   cat >fooo/.gitignore <<-\EOF &&
+   !/*
+   EOF
mkdir -p foo/bar &&
    touch abc foo/def foo/bar/ghi foo/bar/bar &&
git ls-files -o --exclude-standard >../actual &&
-- 8< --

Nguyễn Thái Ngọc Duy (2):
  dir.c: make last_exclude_matching_from_list() run til the end
  dir.c: don't exclude whole dir prematurely if neg pattern may match

 Documentation/gitignore.txt| 23 --
 dir.c  | 89 +++---
 t/t3001-ls-files-others-exclude.sh | 25 +++
 3 files changed, 127 insertions(+), 10 deletions(-)

-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 2/2] dir.c: don't exclude whole dir prematurely if neg pattern may match

2015-09-21 Thread Nguyễn Thái Ngọc Duy
If there is a pattern "!foo/bar", this patch makes it not exclude "foo"
right away. This gives us a chance to examine "foo" and re-include
"foo/bar".

In order for it to detect that the directory under examination should
not be excluded right away, in other words it is a parent directory of a
negative pattern, the "directory path" of the negative pattern must be
literal. Patterns like "!f?o/bar" can't stop "foo" from being excluded.

Basename matching (i.e. "no slashes in the pattern") or must-be-dir
matching (i.e. "trailing slash in the pattern") does not work well with
this. For example, if we descend in "foo" and are examining "foo/abc",
current code for "foo/" pattern will check if path "foo/abc", not "foo",
is a directory. The same problem with basename matching. These may need
big code reorg to make it work.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 Documentation/gitignore.txt| 23 ++--
 dir.c  | 76 +-
 t/t3001-ls-files-others-exclude.sh | 25 +
 3 files changed, 118 insertions(+), 6 deletions(-)

diff --git a/Documentation/gitignore.txt b/Documentation/gitignore.txt
index 473623d..79a1948 100644
--- a/Documentation/gitignore.txt
+++ b/Documentation/gitignore.txt
@@ -82,12 +82,12 @@ PATTERN FORMAT
 
  - An optional prefix "`!`" which negates the pattern; any
matching file excluded by a previous pattern will become
-   included again. It is not possible to re-include a file if a parent
-   directory of that file is excluded. Git doesn't list excluded
-   directories for performance reasons, so any patterns on contained
-   files have no effect, no matter where they are defined.
+   included again.
Put a backslash ("`\`") in front of the first "`!`" for patterns
that begin with a literal "`!`", for example, "`\!important!.txt`".
+   It is possible to re-include a file if a parent directory of that
+   file is excluded if certain conditions are met. See section NOTES
+   for detail.
 
  - If the pattern ends with a slash, it is removed for the
purpose of the following description, but it would only find
@@ -141,6 +141,21 @@ not tracked by Git remain untracked.
 To stop tracking a file that is currently tracked, use
 'git rm --cached'.
 
+To re-include files or directories when their parent directory is
+excluded, the following conditions must be met:
+
+ - The rules to exclude a directory and re-include a subset back must
+   be in the same .gitignore file.
+
+ - The directory part in the re-include rules must be literal (i.e. no
+   wildcards)
+
+ - The rules to exclude the parent directory must not end with a
+   trailing slash.
+
+ - The rules to exclude the parent directory must have at least one
+   slash.
+
 EXAMPLES
 
 
diff --git a/dir.c b/dir.c
index 3a7630a..a1f711c 100644
--- a/dir.c
+++ b/dir.c
@@ -882,6 +882,25 @@ int match_pathname(const char *pathname, int pathlen,
 */
if (!patternlen && !namelen)
return 1;
+   /*
+* This can happen when we ignore some exclude rules
+* on directories in other to see if negative rules
+* may match. E.g.
+*
+* /abc
+* !/abc/def/ghi
+*
+* The pattern of interest is "/abc". On the first
+* try, we should match path "abc" with this pattern
+* in the "if" statement right above, but the caller
+* ignores it.
+*
+* On the second try with paths within "abc",
+* e.g. "abc/xyz", we come here and try to match it
+* with "/abc".
+*/
+   if (!patternlen && namelen && *name == '/')
+   return 1;
}
 
return fnmatch_icase_mem(pattern, patternlen,
@@ -890,6 +909,48 @@ int match_pathname(const char *pathname, int pathlen,
 }
 
 /*
+ * Return non-zero if pathname is a directory and an ancestor of the
+ * literal path in a (negative) pattern. This is used to keep
+ * descending in "foo" and "foo/bar" when the pattern is
+ * "!foo/bar/.gitignore". "foo/notbar" will not be descended however.
+ */
+static int match_neg_path(const char *pathname, int pathlen, int *dtype,
+ const char *base, int baselen,
+ const char *pattern, int prefix, int patternlen,
+ int flags)
+{
+   assert((flags & EXC_FLAG_NEGATIVE) && !(flags & EXC_FLAG_NODIR));

[PATCH] ls-remote.txt: delete unsupported option

2015-09-28 Thread Nguyễn Thái Ngọc Duy
-u  has never been supported, but it was mentioned since
0a2bb55 (git ls-remote: make usage string match manpage -
2008-11-11). Nobody has complained about it for seven years, it's
probably safe to say nobody cares. So let's remove "-u" in documents
instead of adding code to support it.

While at there, fix --upload-pack syntax too.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 Documentation/git-ls-remote.txt | 3 +--
 builtin/ls-remote.c | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/Documentation/git-ls-remote.txt b/Documentation/git-ls-remote.txt
index 2e22915..d510c05 100644
--- a/Documentation/git-ls-remote.txt
+++ b/Documentation/git-ls-remote.txt
@@ -9,7 +9,7 @@ git-ls-remote - List references in a remote repository
 SYNOPSIS
 
 [verse]
-'git ls-remote' [--heads] [--tags]  [-u  | --upload-pack ]
+'git ls-remote' [--heads] [--tags]  [--upload-pack=]
  [--exit-code]  [...]
 
 DESCRIPTION
@@ -29,7 +29,6 @@ OPTIONS
both, references stored in refs/heads and refs/tags are
displayed.
 
--u ::
 --upload-pack=::
Specify the full path of 'git-upload-pack' on the remote
host. This allows listing references from repositories accessed via
diff --git a/builtin/ls-remote.c b/builtin/ls-remote.c
index 4554dbc..5e9d545 100644
--- a/builtin/ls-remote.c
+++ b/builtin/ls-remote.c
@@ -4,7 +4,7 @@
 #include "remote.h"
 
 static const char ls_remote_usage[] =
-"git ls-remote [--heads] [--tags]  [-u  | --upload-pack ]\n"
+"git ls-remote [--heads] [--tags]  [--upload-pack=]\n"
 " [-q | --quiet] [--exit-code] [--get-url] [ 
[...]]";
 
 /*
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 0/6] fix local clone from a linked checkout

2015-09-28 Thread Nguyễn Thái Ngọc Duy
This fixes another embarassing bug in 4/6 and adds tests to make sure
it works. Changes since v3

-- 8< --
diff --git a/path.c b/path.c
index 32d4ca6..a346134 100644
--- a/path.c
+++ b/path.c
@@ -439,7 +439,7 @@ const char *enter_repo(const char *path, int strict)
path = validated_path;
}
else {
-   const char *gitfile = read_gitfile(used_path);
+   const char *gitfile = read_gitfile(path);
if (gitfile)
path = gitfile;
if (chdir(path))
diff --git a/t/t0002-gitfile.sh b/t/t0002-gitfile.sh
index 9393322..9670e8c 100755
--- a/t/t0002-gitfile.sh
+++ b/t/t0002-gitfile.sh
@@ -116,4 +116,46 @@ test_expect_success 'setup_git_dir twice in subdir' '
)
 '
 
+test_expect_success 'enter_repo non-strict mode' '
+   test_create_repo enter_repo &&
+   (
+   cd enter_repo &&
+   test_tick &&
+   test_commit foo &&
+   mv .git .realgit &&
+   echo "gitdir: .realgit" >.git
+   ) &&
+   git ls-remote enter_repo >actual &&
+   cat >expected <<-\EOF &&
+   946e985ab20de757ca5b872b16d64e92ff3803a9HEAD
+   946e985ab20de757ca5b872b16d64e92ff3803a9refs/heads/master
+   946e985ab20de757ca5b872b16d64e92ff3803a9refs/tags/foo
+   EOF
+   test_cmp expected actual
+'
+
+test_expect_success 'enter_repo linked checkout' '
+   (
+   cd enter_repo &&
+   git worktree add  ../foo refs/tags/foo
+   ) &&
+   git ls-remote foo >actual &&
+   cat >expected <<-\EOF &&
+   946e985ab20de757ca5b872b16d64e92ff3803a9HEAD
+   946e985ab20de757ca5b872b16d64e92ff3803a9refs/heads/master
+   946e985ab20de757ca5b872b16d64e92ff3803a9refs/tags/foo
+   EOF
+   test_cmp expected actual
+'
+
+test_expect_success 'enter_repo strict mode' '
+   git ls-remote --upload-pack="git upload-pack --strict" foo/.git >actual 
&&
+   cat >expected <<-\EOF &&
+   946e985ab20de757ca5b872b16d64e92ff3803a9HEAD
+   946e985ab20de757ca5b872b16d64e92ff3803a9refs/heads/master
+   946e985ab20de757ca5b872b16d64e92ff3803a9refs/tags/foo
+   EOF
+   test_cmp expected actual
+'
+
 test_done
-- 8< --

Nguyễn Thái Ngọc Duy (6):
  path.c: delete an extra space
  t0002: add test for enter_repo(), non-strict mode
  enter_repo: avoid duplicating logic, use is_git_directory() instead
  enter_repo: allow .git files in strict mode
  clone: allow --local from a linked checkout
  clone: better error when --reference is a linked checkout

 builtin/clone.c | 13 ++---
 path.c  | 14 +-
 t/t0002-gitfile.sh  | 42 ++
 t/t2025-worktree-add.sh |  5 +
 4 files changed, 66 insertions(+), 8 deletions(-)

-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 6/6] clone: better error when --reference is a linked checkout

2015-09-28 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/clone.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/builtin/clone.c b/builtin/clone.c
index 39d4adf..3e14491 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -294,9 +294,14 @@ static int add_one_reference(struct string_list_item 
*item, void *cb_data)
char *ref_git_git = mkpathdup("%s/.git", ref_git);
free(ref_git);
ref_git = ref_git_git;
-   } else if (!is_directory(mkpath("%s/objects", ref_git)))
+   } else if (!is_directory(mkpath("%s/objects", ref_git))) {
+   struct strbuf sb = STRBUF_INIT;
+   if (get_common_dir(&sb, ref_git))
+   die(_("reference repository '%s' as a linked checkout 
is not supported yet."),
+   item->string);
die(_("reference repository '%s' is not a local repository."),
item->string);
+   }
 
if (!access(mkpath("%s/shallow", ref_git), F_OK))
die(_("reference repository '%s' is shallow"), item->string);
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 5/6] clone: allow --local from a linked checkout

2015-09-28 Thread Nguyễn Thái Ngọc Duy
Noticed-by: Bjørnar Snoksrud 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/clone.c | 6 --
 t/t2025-worktree-add.sh | 5 +
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/builtin/clone.c b/builtin/clone.c
index 578da85..39d4adf 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -424,8 +424,10 @@ static void clone_local(const char *src_repo, const char 
*dest_repo)
} else {
struct strbuf src = STRBUF_INIT;
struct strbuf dest = STRBUF_INIT;
-   strbuf_addf(&src, "%s/objects", src_repo);
-   strbuf_addf(&dest, "%s/objects", dest_repo);
+   get_common_dir(&src, src_repo);
+   get_common_dir(&dest, dest_repo);
+   strbuf_addstr(&src, "/objects");
+   strbuf_addstr(&dest, "/objects");
copy_or_link_directory(&src, &dest, src_repo, src.len);
strbuf_release(&src);
strbuf_release(&dest);
diff --git a/t/t2025-worktree-add.sh b/t/t2025-worktree-add.sh
index 8267411..3694174 100755
--- a/t/t2025-worktree-add.sh
+++ b/t/t2025-worktree-add.sh
@@ -193,4 +193,9 @@ test_expect_success '"add" -B/--detach mutually exclusive' '
test_must_fail git worktree add -B poodle --detach bamboo master
 '
 
+test_expect_success 'local clone from linked checkout' '
+   git clone --local here here-clone &&
+   ( cd here-clone && git fsck )
+'
+
 test_done
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 3/6] enter_repo: avoid duplicating logic, use is_git_directory() instead

2015-09-28 Thread Nguyễn Thái Ngọc Duy
It matters for linked checkouts where 'refs' directory won't be
available in $GIT_DIR. is_git_directory() knows about $GIT_COMMON_DIR
and can handle this case.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 path.c |  3 +--
 t/t0002-gitfile.sh | 14 ++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/path.c b/path.c
index a536ee3..7340e11 100644
--- a/path.c
+++ b/path.c
@@ -441,8 +441,7 @@ const char *enter_repo(const char *path, int strict)
else if (chdir(path))
return NULL;
 
-   if (access("objects", X_OK) == 0 && access("refs", X_OK) == 0 &&
-   validate_headref("HEAD") == 0) {
+   if (is_git_directory(".")) {
set_git_dir(".");
check_repository_format();
return path;
diff --git a/t/t0002-gitfile.sh b/t/t0002-gitfile.sh
index 545bfe2..2e709cc 100755
--- a/t/t0002-gitfile.sh
+++ b/t/t0002-gitfile.sh
@@ -134,4 +134,18 @@ test_expect_success 'enter_repo non-strict mode' '
test_cmp expected actual
 '
 
+test_expect_success 'enter_repo linked checkout' '
+   (
+   cd enter_repo &&
+   git worktree add  ../foo refs/tags/foo
+   ) &&
+   git ls-remote foo >actual &&
+   cat >expected <<-\EOF &&
+   946e985ab20de757ca5b872b16d64e92ff3803a9HEAD
+   946e985ab20de757ca5b872b16d64e92ff3803a9refs/heads/master
+   946e985ab20de757ca5b872b16d64e92ff3803a9refs/tags/foo
+   EOF
+   test_cmp expected actual
+'
+
 test_done
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 2/6] t0002: add test for enter_repo(), non-strict mode

2015-09-28 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 t/t0002-gitfile.sh | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/t/t0002-gitfile.sh b/t/t0002-gitfile.sh
index 9393322..545bfe2 100755
--- a/t/t0002-gitfile.sh
+++ b/t/t0002-gitfile.sh
@@ -116,4 +116,22 @@ test_expect_success 'setup_git_dir twice in subdir' '
)
 '
 
+test_expect_success 'enter_repo non-strict mode' '
+   test_create_repo enter_repo &&
+   (
+   cd enter_repo &&
+   test_tick &&
+   test_commit foo &&
+   mv .git .realgit &&
+   echo "gitdir: .realgit" >.git
+   ) &&
+   git ls-remote enter_repo >actual &&
+   cat >expected <<-\EOF &&
+   946e985ab20de757ca5b872b16d64e92ff3803a9HEAD
+   946e985ab20de757ca5b872b16d64e92ff3803a9refs/heads/master
+   946e985ab20de757ca5b872b16d64e92ff3803a9refs/tags/foo
+   EOF
+   test_cmp expected actual
+'
+
 test_done
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 4/6] enter_repo: allow .git files in strict mode

2015-09-28 Thread Nguyễn Thái Ngọc Duy
Strict mode is about not guessing where .git is. If the user points to a
.git file, we know exactly where the target .git dir will be. This makes
it possible to serve .git files as repository on the server side.

This may be needed even in local clone case because transport.c code
uses upload-pack for fetching remote refs. But right now the
clone/transport code goes with non-strict.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 path.c |  9 +++--
 t/t0002-gitfile.sh | 10 ++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/path.c b/path.c
index 7340e11..a346134 100644
--- a/path.c
+++ b/path.c
@@ -438,8 +438,13 @@ const char *enter_repo(const char *path, int strict)
return NULL;
path = validated_path;
}
-   else if (chdir(path))
-   return NULL;
+   else {
+   const char *gitfile = read_gitfile(path);
+   if (gitfile)
+   path = gitfile;
+   if (chdir(path))
+   return NULL;
+   }
 
if (is_git_directory(".")) {
set_git_dir(".");
diff --git a/t/t0002-gitfile.sh b/t/t0002-gitfile.sh
index 2e709cc..9670e8c 100755
--- a/t/t0002-gitfile.sh
+++ b/t/t0002-gitfile.sh
@@ -148,4 +148,14 @@ test_expect_success 'enter_repo linked checkout' '
test_cmp expected actual
 '
 
+test_expect_success 'enter_repo strict mode' '
+   git ls-remote --upload-pack="git upload-pack --strict" foo/.git >actual 
&&
+   cat >expected <<-\EOF &&
+   946e985ab20de757ca5b872b16d64e92ff3803a9HEAD
+   946e985ab20de757ca5b872b16d64e92ff3803a9refs/heads/master
+   946e985ab20de757ca5b872b16d64e92ff3803a9refs/tags/foo
+   EOF
+   test_cmp expected actual
+'
+
 test_done
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 1/6] path.c: delete an extra space

2015-09-28 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 path.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/path.c b/path.c
index 95acbaf..a536ee3 100644
--- a/path.c
+++ b/path.c
@@ -431,7 +431,7 @@ const char *enter_repo(const char *path, int strict)
}
if (!suffix[i])
return NULL;
-   gitfile = read_gitfile(used_path) ;
+   gitfile = read_gitfile(used_path);
if (gitfile)
strcpy(used_path, gitfile);
if (chdir(used_path))
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] read-cache: fix untracked cache invalidation when split-index is used

2015-06-07 Thread Nguyễn Thái Ngọc Duy
Before this change, t7063.17 fails. The actual action though happens at
t7063.16 where the entry "two" is added back to index after being
removed in the .13. Here we expect a directory invalidate at .16 and
none at .17 where untracked cache is refreshed. But things do not go as
expected when GIT_TEST_SPLIT_INDEX is set.

The different behavior that happens at .16 when split index is used: the
entry "two", when deleted at .13, is simply marked "deleted". When .16
executes, the entry resurfaces from the version in base index. This
happens in merge_base_index() where add_index_entry() is called to add
"two" back from the base index.

This is where the bug comes from. The add_index_entry() is called with
ADD_CACHE_KEEP_CACHE_TREE flag because this version of "two" is not new,
it does not break either cache-tree or untracked cache. The code should
check this flag and not invalidate untracked cache. This causes a second
invalidation violates test expectation. The fix is obvious.

Noticed-by: Thomas Gummerer 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 PS. perhaps I should rename ADD_CACHE_KEEP_CACHE_TREE to
 ADD_CACHE_KEEP_CACHE, or add a new flag .._KEEP_UNTRACKED_CACHE to
 avoid confusion.

 On Sun, Jun 7, 2015 at 1:20 PM, Junio C Hamano  wrote:
 > Thomas Gummerer  writes:
 >
 >> When running the test suite with GIT_TEST_SPLIT_INDEX set, tests 17-18
 >> in t7063 fail.  Unset GIT_TEST_SPLIT_INDEX at the beginning of the test,
 >> in order to fix it.
 >
 > That is not fixing but sweeping the problem under the rug, is it?

 Indeed.

 > Duy, untracked-cache is a fairly new toy and I wouldn't be surprised
 > if it has un-thought-out interaction with split-index which is also
 > a fairly new exotic toy.  As both are from you, can you take a look
 > at it?

 Still a bit slow to address, but I marked Thomas' mail for
 investigation when it came.

 > We may want to make it easier to run tests with TEST-SPLIT-INDEX, if
 > we envision that the feature will bring us sufficient benefit and we
 > would eventually want to encourage its use to more people.  As it
 > stands now, only people who are curious enough opt into trying it
 > out by exporting the environment, which would be done by a tiny
 > minority of developers and users.

 Untracked cache, split index and the last part (*) all aim at a
 smaller user audience with large work trees. They are not really what
 a common user needs, but I hope they do have users.
 
 We could make the test suite run twice, a normal run and a
 test-split-index run. But I'm not sure if it really justifies
 doubling test time. We will have to deal with this situation when/if
 pack-v4 is merged because we would want to exercise both v3 and v4 as
 much as possible.

 (*) the last part should keep index read time small enough even if
 the index is very large and avoid lstat() at refresh time with
 the help from watchman.

 read-cache.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/read-cache.c b/read-cache.c
index 723d48d..309ccc7 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -999,7 +999,8 @@ static int add_index_entry_with_check(struct index_state 
*istate, struct cache_e
}
pos = -pos-1;
 
-   untracked_cache_add_to_index(istate, ce->name);
+   if (!(option & ADD_CACHE_KEEP_CACHE_TREE))
+   untracked_cache_add_to_index(istate, ce->name);
 
/*
 * Inserting a merged entry ("stage 0") into the index
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] checkout: don't check worktrees when not necessary

2015-06-12 Thread Nguyễn Thái Ngọc Duy
When --patch or pathspecs are passed to git checkout, the working tree
will not be switching branch, so there's no need to check if the branch
that we are running checkout on is already checked out.

Original-patch-by: Spencer Baugh 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/checkout.c | 23 +++
 t/t2025-checkout-to.sh |  8 
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/builtin/checkout.c b/builtin/checkout.c
index 9b49f0e..e227f64 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -1110,7 +1110,6 @@ static int parse_branchname_arg(int argc, const char 
**argv,
 {
struct tree **source_tree = &opts->source_tree;
const char **new_branch = &opts->new_branch;
-   int force_detach = opts->force_detach;
int argcount = 0;
unsigned char branch_rev[20];
const char *arg;
@@ -1231,17 +1230,6 @@ static int parse_branchname_arg(int argc, const char 
**argv,
else
new->path = NULL; /* not an existing branch */
 
-   if (new->path && !force_detach && !*new_branch) {
-   unsigned char sha1[20];
-   int flag;
-   char *head_ref = resolve_refdup("HEAD", 0, sha1, &flag);
-   if (head_ref &&
-   (!(flag & REF_ISSYMREF) || strcmp(head_ref, new->path)) &&
-   !opts->ignore_other_worktrees)
-   check_linked_checkouts(new);
-   free(head_ref);
-   }
-
new->commit = lookup_commit_reference_gently(rev, 1);
if (!new->commit) {
/* not a commit */
@@ -1321,6 +1309,17 @@ static int checkout_branch(struct checkout_opts *opts,
die(_("Cannot switch branch to a non-commit '%s'"),
new->name);
 
+   if (new->path && !opts->force_detach && !opts->new_branch) {
+   unsigned char sha1[20];
+   int flag;
+   char *head_ref = resolve_refdup("HEAD", 0, sha1, &flag);
+   if (head_ref &&
+   (!(flag & REF_ISSYMREF) || strcmp(head_ref, new->path)) &&
+   !opts->ignore_other_worktrees)
+   check_linked_checkouts(new);
+   free(head_ref);
+   }
+
if (opts->new_worktree)
return prepare_linked_checkout(opts, new);
 
diff --git a/t/t2025-checkout-to.sh b/t/t2025-checkout-to.sh
index f8e4df4..a8d9336 100755
--- a/t/t2025-checkout-to.sh
+++ b/t/t2025-checkout-to.sh
@@ -28,6 +28,14 @@ test_expect_success 'checkout --to refuses to checkout 
locked branch' '
! test -d .git/worktrees/zere
 '
 
+test_expect_success 'checking out paths not complaining about linked 
checkouts' '
+   (
+   cd existing_empty &&
+   echo dirty >>init.t &&
+   git checkout master -- init.t
+   )
+'
+
 test_expect_success 'checkout --to a new worktree' '
git rev-parse HEAD >expect &&
git checkout --detach --to here master &&
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] strbuf: stop out-of-boundary warnings from Coverity

2015-06-17 Thread Nguyễn Thái Ngọc Duy
It usually goes like this

strbuf sb = STRBUF_INIT;
if (!strncmp(sb.buf, "foo", 3))
   printf("%s", sb.buf + 3);

Coverity thinks that printf() can be executed, and because initial
sb.buf only has one character (from strbuf_slopbuf), sb.buf + 3 is out
of bound. What it does not recognize is strbuf_slopbuf[0] is always (*)
zero. We always do some string comparison before jumping ahead to
"sb.buf + 3" and those operations will stop out of bound accesses.

Just make strbuf_slopbuf[] large enough to keep Coverity happy. If it's
happy, we'll have cleaner defect list and better chances of spotting
true defects.

(*) It's not entirely wrong though. Somebody may do sb.buf[0] = 'f'
right after variable declaration and ruin all unused strbuf.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 There are lots of false warnings like this from Coverity. I just
 wanted to kill them off so we can spot more serious problems easier.
 I can't really verify that this patch shuts off those warnings
 because scan.coverity.com policy does not allow forks.

 I had another patch that avoids corrupting strbuf_slopbuf, by putting
 it to .rodata section. The patch is more invasive though, because
 this statement buf.buf[buf.len] = '\0' now has to make sure buf.buf
 is not strbuf_slopbuf. It feels safer, but probably not enough to
 justify the change.

 strbuf.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/strbuf.c b/strbuf.c
index 0d4f4e5..0d7c3cf 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -16,7 +16,12 @@ int starts_with(const char *str, const char *prefix)
  * buf is non NULL and ->buf is NUL terminated even for a freshly
  * initialized strbuf.
  */
+#ifndef __COVERITY__
 char strbuf_slopbuf[1];
+#else
+/* Stop so many incorrect out-of-boundary warnings from Coverity */
+char strbuf_slopbuf[64];
+#endif
 
 void strbuf_init(struct strbuf *sb, size_t hint)
 {
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] apply: fix adding new files on i-t-a entries

2015-06-23 Thread Nguyễn Thái Ngọc Duy
Since d95d728 (diff-lib.c: adjust position of i-t-a entries in diff -
2015-03-16), a normal "git diff" on i-t-a entries would produce a diff
that _adds_ those files, not just adding content to existing and empty
files like before.

This is correct. Unfortunately, applying such a patch on the same
repository that has the same i-t-a entries will fail with message
"already exists in index" because git-apply checks, sees those i-t-a
entries and aborts. git-apply does not realize those are for
bookkeeping only, they do not really exist in the index.

This patch tightens the "exists in index" check, ignoring i-t-a
entries. For fixing the above problem, only the change in
check_to_create() is needed. For other changes,

 - load_current(), reporting "not exists in index" is better than "does
   not match index"

 - check_preimage(), similar to load_current(), but it may also use
   ce_mode from i-t-a entry which is always zero

 - get_current_sha1(), or actually build_fake_ancestor(), we should not
   add i-t-a entries to the temporary index, at least not without also
   adding i-t-a flag back

Since "git add -p" and "git am" use "git apply" underneath, they are
broken too before this patch and fixed now.

Reported-by: Patrick Higgins 
Reported-by: Bjørnar Snoksrud 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 I think I'm opening a can of worms with d95d728. There's nothing
 wrong with that patch per se, but with this issue popping up, I need
 to go over all {cache,index}_name_pos call sites and see what would be
 the sensible behavior when i-t-a entries are involved.
 
 So far blame, rm and checkout-entry and "checkout " are on my
 to-think-or-fix list. But this patch can get in early to fix a real
 regression instead of waiting for one big series. A lot more
 discussions will be had before that series gets in good shape.

 builtin/apply.c   |  8 
 cache.h   |  2 ++
 read-cache.c  | 12 
 t/t2203-add-intent.sh | 10 ++
 4 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/builtin/apply.c b/builtin/apply.c
index 146be97..4f813ac 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -3344,7 +3344,7 @@ static int load_current(struct image *image, struct patch 
*patch)
if (!patch->is_new)
die("BUG: patch to %s is not a creation", patch->old_name);
 
-   pos = cache_name_pos(name, strlen(name));
+   pos = exists_in_cache(name, strlen(name));
if (pos < 0)
return error(_("%s: does not exist in index"), name);
ce = active_cache[pos];
@@ -3497,7 +3497,7 @@ static int check_preimage(struct patch *patch, struct 
cache_entry **ce, struct s
}
 
if (check_index && !previous) {
-   int pos = cache_name_pos(old_name, strlen(old_name));
+   int pos = exists_in_cache(old_name, strlen(old_name));
if (pos < 0) {
if (patch->is_new < 0)
goto is_new;
@@ -3551,7 +3551,7 @@ static int check_to_create(const char *new_name, int 
ok_if_exists)
struct stat nst;
 
if (check_index &&
-   cache_name_pos(new_name, strlen(new_name)) >= 0 &&
+   exists_in_cache(new_name, strlen(new_name)) >= 0 &&
!ok_if_exists)
return EXISTS_IN_INDEX;
if (cached)
@@ -3824,7 +3824,7 @@ static int get_current_sha1(const char *path, unsigned 
char *sha1)
 
if (read_cache() < 0)
return -1;
-   pos = cache_name_pos(path, strlen(path));
+   pos = exists_in_cache(path, strlen(path));
if (pos < 0)
return -1;
hashcpy(sha1, active_cache[pos]->sha1);
diff --git a/cache.h b/cache.h
index 571c98f..6a44cb6 100644
--- a/cache.h
+++ b/cache.h
@@ -341,6 +341,7 @@ extern void free_name_hash(struct index_state *istate);
 #define discard_cache() discard_index(&the_index)
 #define unmerged_cache() unmerged_index(&the_index)
 #define cache_name_pos(name, namelen) 
index_name_pos(&the_index,(name),(namelen))
+#define exists_in_cache(name, namelen) 
exists_in_index(&the_index,(name),(namelen))
 #define add_cache_entry(ce, option) add_index_entry(&the_index, (ce), (option))
 #define rename_cache_entry_at(pos, new_name) rename_index_entry_at(&the_index, 
(pos), (new_name))
 #define remove_cache_entry_at(pos) remove_index_entry_at(&the_index, (pos))
@@ -512,6 +513,7 @@ extern int verify_path(const char *path);
 extern struct cache_entry *index_dir_exists(struct index_state *istate, const 
char *name, int namelen);
 extern struct cache_entry *index_file_exists(struct index_state *istate, const 
char *name, int namelen, int igncase);
 extern int index_name_pos(const struct index_state *, const char *name, int 
namelen);
+ex

[PATCH] blame: remove obsolete comment

2015-06-25 Thread Nguyễn Thái Ngọc Duy
That "someday" in the comment happened two years later in
b65982b (Optimize "diff-index --cached" using cache-tree - 2009-05-20)

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/blame.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/builtin/blame.c b/builtin/blame.c
index b3e948e..b077bb6 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -2371,11 +2371,6 @@ static struct commit *fake_working_tree_commit(struct 
diff_options *opt,
ce->ce_mode = create_ce_mode(mode);
add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
 
-   /*
-* We are not going to write this out, so this does not matter
-* right now, but someday we might optimize diff-index --cached
-* with cache-tree information.
-*/
cache_tree_invalidate_path(&the_index, path);
 
return commit;
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] setup: set env $GIT_WORK_TREE when work tree is set, like $GIT_DIR

2015-06-26 Thread Nguyễn Thái Ngọc Duy
In the test case, we run setup_git_dir_gently() the first time to read
$GIT_DIR/config so that we can resolve aliases. We'll enter
setup_discovered_git_dir() and may or may not call set_git_dir() near
the end of the function, depending on whether the detected git dir is
".git" or not. This set_git_dir() will set env var $GIT_DIR.

For normal repo, git dir detected via setup_discovered_git_dir() will be
".git", and set_git_dir() is not called. If .git file is used however,
the git dir can't be ".git" and set_git_dir() is called and $GIT_DIR
set. This is the key of this problem.

If we expand an alias (or autocorrect command names), then
setup_git_dir_gently() is run the second time. If $GIT_DIR is not set in
the first run, we run the same setup_discovered_git_dir() as before.
Nothing to see. If it is, however, we'll enter setup_explicit_git_dir()
this time.

This is where the "fun" is. The legacy behavior is, if $GIT_WORK_TREE is
not set but $GIT_DIR is, cwd is chosen as worktree's top. If you happen
to stand at worktree's top when you do this, all is well. If you are in
a subdir "foo/bar" (real worktree's top is "foo"), this behavior bites
you: your detected worktree is now "foo/bar", but the first run
correctly detected worktree as "foo". You get "internal error: work tree
has already been set" as a result.

Bottom line is, when $GIT_DIR is set, $GIT_WORK_TREE should be set too
unless there's no work tree. But setting $GIT_WORK_TREE inside
set_git_dir() may backfire. We don't know at that point if work tree is
already configured by the caller. So set it when work tree is
detected. It does not harm if $GIT_WORK_TREE is set while $GIT_DIR is
not.

Reported-by: Bjørnar Snoksrud 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 environment.c  |  2 ++
 t/t0002-gitfile.sh | 17 +
 2 files changed, 19 insertions(+)

diff --git a/environment.c b/environment.c
index 61c685b..8f1b249 100644
--- a/environment.c
+++ b/environment.c
@@ -231,6 +231,8 @@ void set_git_work_tree(const char *new_work_tree)
}
git_work_tree_initialized = 1;
work_tree = xstrdup(real_path(new_work_tree));
+   if (setenv(GIT_WORK_TREE_ENVIRONMENT, work_tree, 1))
+   error("Could not set GIT_WORK_TREE to '%s'", work_tree);
 }
 
 const char *get_git_work_tree(void)
diff --git a/t/t0002-gitfile.sh b/t/t0002-gitfile.sh
index 37e9396..9393322 100755
--- a/t/t0002-gitfile.sh
+++ b/t/t0002-gitfile.sh
@@ -99,4 +99,21 @@ test_expect_success 'check rev-list' '
test "$SHA" = "$(git rev-list HEAD)"
 '
 
+test_expect_success 'setup_git_dir twice in subdir' '
+   git init sgd &&
+   (
+   cd sgd &&
+   git config alias.lsfi ls-files &&
+   mv .git .realgit &&
+   echo "gitdir: .realgit" >.git &&
+   mkdir subdir &&
+   cd subdir &&
+   >foo &&
+   git add foo &&
+   git lsfi >actual &&
+   echo foo >expected &&
+   test_cmp expected actual
+   )
+'
+
 test_done
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] worktree: new place for "git prune --worktrees"

2015-06-29 Thread Nguyễn Thái Ngọc Duy
Commit 23af91d (prune: strategies for linked checkouts - 2014-11-30)
adds "--worktrees" to "git prune" without realizing that "git prune" is
for object database only. This patch moves the same functionality to a
new command "git worktree".

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 In future I probably move the big block of text in git-checkout.txt to
 git-worktree.txt and add "git worktree list". But let's start with
 something small and simple before "git prune --worktrees" is shipped
 out.

 .gitignore   |   1 +
 Documentation/git-prune.txt  |   3 -
 Documentation/git-worktree.txt (new) |  48 +
 Makefile |   1 +
 builtin.h|   1 +
 builtin/gc.c |   2 +-
 builtin/prune.c  |  99 --
 builtin/worktree.c (new) | 133 +++
 command-list.txt |   1 +
 git.c|   1 +
 t/t2026-prune-linked-checkouts.sh|  22 +++---
 11 files changed, 198 insertions(+), 114 deletions(-)
 create mode 100644 Documentation/git-worktree.txt
 create mode 100644 builtin/worktree.c

diff --git a/.gitignore b/.gitignore
index 422c538..a685ec1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -171,6 +171,7 @@
 /git-verify-tag
 /git-web--browse
 /git-whatchanged
+/git-worktree
 /git-write-tree
 /git-core-*/?*
 /gitweb/GITWEB-BUILD-OPTIONS
diff --git a/Documentation/git-prune.txt b/Documentation/git-prune.txt
index 1cf3bed..7a493c8 100644
--- a/Documentation/git-prune.txt
+++ b/Documentation/git-prune.txt
@@ -48,9 +48,6 @@ OPTIONS
 --expire ::
Only expire loose objects older than .
 
---worktrees::
-   Prune dead working tree information in $GIT_DIR/worktrees.
-
 ...::
In addition to objects
reachable from any of our references, keep objects
diff --git a/Documentation/git-worktree.txt b/Documentation/git-worktree.txt
new file mode 100644
index 000..b9072e0
--- /dev/null
+++ b/Documentation/git-worktree.txt
@@ -0,0 +1,48 @@
+git-worktree(1)
+===
+
+NAME
+
+git-worktree - Manage multiple worktrees
+
+
+SYNOPSIS
+
+[verse]
+'git worktree prune' [-n] [-v] [--expire ]
+
+DESCRIPTION
+---
+
+Manage multiple worktrees attached to the same repository. These are
+created by the command `git checkout --to`.
+
+COMMANDS
+
+prune::
+
+Prune working tree information in $GIT_DIR/worktrees.
+
+OPTIONS
+---
+
+-n::
+--dry-run::
+   Do not remove anything; just report what it would
+   remove.
+
+-v::
+--verbose::
+   Report all removals.
+
+--expire ::
+   Only expire unused worktrees older than .
+
+SEE ALSO
+
+
+linkgit:git-checkout[1]
+
+GIT
+---
+Part of the linkgit:git[1] suite
diff --git a/Makefile b/Makefile
index 149f1c7..89785f6 100644
--- a/Makefile
+++ b/Makefile
@@ -909,6 +909,7 @@ BUILTIN_OBJS += builtin/var.o
 BUILTIN_OBJS += builtin/verify-commit.o
 BUILTIN_OBJS += builtin/verify-pack.o
 BUILTIN_OBJS += builtin/verify-tag.o
+BUILTIN_OBJS += builtin/worktree.o
 BUILTIN_OBJS += builtin/write-tree.o
 
 GITLIBS = $(LIB_FILE) $(XDIFF_LIB)
diff --git a/builtin.h b/builtin.h
index b87df70..9e04f97 100644
--- a/builtin.h
+++ b/builtin.h
@@ -133,6 +133,7 @@ extern int cmd_verify_commit(int argc, const char **argv, 
const char *prefix);
 extern int cmd_verify_tag(int argc, const char **argv, const char *prefix);
 extern int cmd_version(int argc, const char **argv, const char *prefix);
 extern int cmd_whatchanged(int argc, const char **argv, const char *prefix);
+extern int cmd_worktree(int argc, const char **argv, const char *prefix);
 extern int cmd_write_tree(int argc, const char **argv, const char *prefix);
 extern int cmd_verify_pack(int argc, const char **argv, const char *prefix);
 extern int cmd_show_ref(int argc, const char **argv, const char *prefix);
diff --git a/builtin/gc.c b/builtin/gc.c
index 36fe333..4957c39 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -293,7 +293,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
argv_array_pushl(&reflog, "reflog", "expire", "--all", NULL);
argv_array_pushl(&repack, "repack", "-d", "-l", NULL);
argv_array_pushl(&prune, "prune", "--expire", NULL);
-   argv_array_pushl(&prune_worktrees, "prune", "--worktrees", "--expire", 
NULL);
+   argv_array_pushl(&prune_worktrees, "worktree", "prune", "--expire", 
NULL);
argv_array_pushl(&rerere, "rerere", "gc", NULL);
 
gc_config();
diff --git a/builtin/prune.c b/builtin/prune.c
index 0c73246..10b03d3 100644
--- a/builtin/prune.c
+++ b/builtin/prune.c
@@ -6,7 +6,6 @@
 #include "re

[PATCH] Avoid the need of "--" when wildcard pathspec is used

2015-06-30 Thread Nguyễn Thái Ngọc Duy
When "--" is lacking from the command line and a command can take both
revs and paths, the idea is if an argument can be seen as both an
extended SHA-1 and a path, then "--" is required or git refuses to
continue. It's currently implemented as:

(1) if an argument is rev, then it must not exist in worktree

(2) else, it must exist in worktree

(3) else, "--" is required.

These rules work for literal paths, but when wildcard pathspec is
involved, it always requires the user to add "--" because it fails (2)
and (1) is never met.

This patch prefers wildcard over extended sha-1 syntax that includes
wildcards, so that we can specify wildcards to select paths in worktree
without "--" most of the time. If wildcards are found in extended sha-1
syntax, then "--" is _always_ required.

Because ref names don't allow wildcards, you can only hit that
"needs '--'" new rule if you use ":/", "^{/}" or
":wildcards/in/literal/paths". So it's really rare.

The rules after this patch become:

(1) if an arg is a rev, then it must either exist in worktree or not
    a wild card

(2) else, it either exists in worktree or is a wild card

(3) else, "--" is required.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 setup.c   |  2 ++
 t/t2019-checkout-ambiguous-ref.sh | 26 ++
 2 files changed, 28 insertions(+)

diff --git a/setup.c b/setup.c
index 82c0cc2..f7cb93b 100644
--- a/setup.c
+++ b/setup.c
@@ -143,6 +143,8 @@ int check_filename(const char *prefix, const char *arg)
name = arg + 2;
} else if (!no_wildcard(arg))
return 1;
+   else if (!no_wildcard(arg))
+   return 1;
else if (prefix)
name = prefix_filename(prefix, strlen(prefix), arg);
else
diff --git a/t/t2019-checkout-ambiguous-ref.sh 
b/t/t2019-checkout-ambiguous-ref.sh
index b99d519..e5ceba3 100755
--- a/t/t2019-checkout-ambiguous-ref.sh
+++ b/t/t2019-checkout-ambiguous-ref.sh
@@ -56,4 +56,30 @@ test_expect_success VAGUENESS_SUCCESS 'checkout reports 
switch to branch' '
test_i18ngrep ! "^HEAD is now at" stderr
 '
 
+test_expect_success 'wildcard ambiguation' '
+   git init ambi &&
+   (
+   cd ambi &&
+   echo a >a.c &&
+   git add a.c &&
+   echo b >a.c &&
+   git checkout "*.c" &&
+   echo a >expect &&
+   test_cmp expect a.c
+   )
+'
+
+test_expect_success 'wildcard ambiguation (2)' '
+   git init ambi2 &&
+   (
+   cd ambi2 &&
+   echo a >"*.c" &&
+   git add . &&
+   test_must_fail git show :"*.c" &&
+   git show :"*.c" -- >actual &&
+   echo a >expect &&
+   test_cmp expect actual
+   )
+'
+
 test_done
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] Add tests for wildcard "path vs ref" disambiguation

2015-07-01 Thread Nguyễn Thái Ngọc Duy
Commit 28fcc0b (pathspec: avoid the need of "--" when wildcard is used -
2015-05-02) changes how the disambiguation rules work. This patch adds
some tests to demonstrate, basically, if wildcard characters are in an
argument:

 - if the argument is valid extended sha-1 syntax, "--" must be used
 - otherwise the argument is considered a path, even without "--"

And wildcard can appear in extended sha-1 syntax, either as part of
regex in ":/" or as the literal path in ":". The latter
case is less likely to happen in real world. But if you do ":/" a lot,
you may need to type "--" more.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 v2 just adds tests as the code is already in 'master' for some time.

 t/t2019-checkout-ambiguous-ref.sh | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/t/t2019-checkout-ambiguous-ref.sh 
b/t/t2019-checkout-ambiguous-ref.sh
index b99d519..e5ceba3 100755
--- a/t/t2019-checkout-ambiguous-ref.sh
+++ b/t/t2019-checkout-ambiguous-ref.sh
@@ -56,4 +56,30 @@ test_expect_success VAGUENESS_SUCCESS 'checkout reports 
switch to branch' '
test_i18ngrep ! "^HEAD is now at" stderr
 '
 
+test_expect_success 'wildcard ambiguation, paths win' '
+   git init ambi &&
+   (
+   cd ambi &&
+   echo a >a.c &&
+   git add a.c &&
+   echo b >a.c &&
+   git checkout "*.c" &&
+   echo a >expect &&
+   test_cmp expect a.c
+   )
+'
+
+test_expect_success 'wildcard ambiguation, refs lose' '
+   git init ambi2 &&
+   (
+   cd ambi2 &&
+   echo a >"*.c" &&
+   git add . &&
+   test_must_fail git show :"*.c" &&
+   git show :"*.c" -- >actual &&
+   echo a >expect &&
+   test_cmp expect actual
+   )
+'
+
 test_done
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] grep: use regcomp() for icase search with non-ascii patterns

2015-07-06 Thread Nguyễn Thái Ngọc Duy
Noticed-by: Plamen Totev 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 grep.c | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/grep.c b/grep.c
index b58c7c6..48db15a 100644
--- a/grep.c
+++ b/grep.c
@@ -378,7 +378,7 @@ static void free_pcre_regexp(struct grep_pat *p)
 }
 #endif /* !USE_LIBPCRE */
 
-static int is_fixed(const char *s, size_t len)
+static int is_fixed(const char *s, size_t len, int ignore_icase)
 {
size_t i;
 
@@ -391,6 +391,13 @@ static int is_fixed(const char *s, size_t len)
for (i = 0; i < len; i++) {
if (is_regex_special(s[i]))
return 0;
+   /*
+* The builtin substring search can only deal with case
+* insensitivity in ascii range. If there is something outside
+* of that range, fall back to regcomp.
+*/
+   if (ignore_icase && (unsigned char)s[i] >= 128)
+   return 0;
}
 
return 1;
@@ -398,18 +405,19 @@ static int is_fixed(const char *s, size_t len)
 
 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 {
+   int ignore_icase = opt->regflags & REG_ICASE || p->ignore_case;
int err;
 
p->word_regexp = opt->word_regexp;
p->ignore_case = opt->ignore_case;
 
-   if (opt->fixed || is_fixed(p->pattern, p->patternlen))
+   if (opt->fixed || is_fixed(p->pattern, p->patternlen, ignore_icase))
p->fixed = 1;
else
p->fixed = 0;
 
if (p->fixed) {
-   if (opt->regflags & REG_ICASE || p->ignore_case)
+   if (ignore_case)
p->kws = kwsalloc(tolower_trans_tbl);
else
p->kws = kwsalloc(NULL);
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 4/9] grep/icase: avoid kwsset when -F is specified

2015-07-08 Thread Nguyễn Thái Ngọc Duy
Similar to the previous commit, we can't use kws on icase search
outside ascii range. But we can't simply pass the pattern to
regcomp/pcre like the previous commit because it may contain regex
special characters, so we need to quote the regex first.

To avoid misquote traps that could lead to undefined behavior, we
always stick to basic regex engine in this case. We don't need fancy
features for grepping a literal string anyway.

basic_regex_quote_buf() assumes that if the pattern is in a multibyte
encoding, ascii chars must be unambiguously encoded as single
bytes. This is true at least for UTF-8. For others, let's wait until
people yell up. Chances are nobody uses multibyte, non utf-8 charsets
any more..

Helped-by: René Scharfe 
Noticed-by: Plamen Totev 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 grep.c  | 25 -
 quote.c | 37 +
 quote.h |  1 +
 t/t7812-grep-icase-non-ascii.sh | 26 ++
 4 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/grep.c b/grep.c
index d795b0e..8fce54f 100644
--- a/grep.c
+++ b/grep.c
@@ -5,6 +5,7 @@
 #include "diff.h"
 #include "diffcore.h"
 #include "commit.h"
+#include "quote.h"
 
 static int grep_source_load(struct grep_source *gs);
 static int grep_source_is_binary(struct grep_source *gs);
@@ -397,6 +398,24 @@ static int is_fixed(const char *s, size_t len)
return 1;
 }
 
+static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
+{
+   struct strbuf sb = STRBUF_INIT;
+   int err;
+
+   basic_regex_quote_buf(&sb, p->pattern);
+   err = regcomp(&p->regexp, sb.buf, opt->regflags & ~REG_EXTENDED);
+   if (opt->debug)
+   fprintf(stderr, "fixed%s\n", sb.buf);
+   strbuf_release(&sb);
+   if (err) {
+   char errbuf[1024];
+   regerror(err, &p->regexp, errbuf, 1024);
+   regfree(&p->regexp);
+   compile_regexp_failed(p, errbuf);
+   }
+}
+
 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 {
int icase_non_ascii;
@@ -411,7 +430,11 @@ static void compile_regexp(struct grep_pat *p, struct 
grep_opt *opt)
if (!icase_non_ascii && is_fixed(p->pattern, p->patternlen))
p->fixed = 1;
else if (opt->fixed) {
-   p->fixed = 1;
+   p->fixed = !icase_non_ascii;
+   if (!p->fixed) {
+   compile_fixed_regexp(p, opt);
+   return;
+   }
} else
p->fixed = 0;
 
diff --git a/quote.c b/quote.c
index 7920e18..43a8057 100644
--- a/quote.c
+++ b/quote.c
@@ -439,3 +439,40 @@ void tcl_quote_buf(struct strbuf *sb, const char *src)
}
strbuf_addch(sb, '"');
 }
+
+void basic_regex_quote_buf(struct strbuf *sb, const char *src)
+{
+   char c;
+
+   if (*src == '^') {
+   /* only beginning '^' is special and needs quoting */
+   strbuf_addch(sb, '\\');
+   strbuf_addch(sb, *src++);
+   }
+   if (*src == '*')
+   /* beginning '*' is not special, no quoting */
+   strbuf_addch(sb, *src++);
+
+   while ((c = *src++)) {
+   switch (c) {
+   case '[':
+   case '.':
+   case '\\':
+   case '*':
+   strbuf_addch(sb, '\\');
+   strbuf_addch(sb, c);
+   break;
+
+   case '$':
+   /* only the end '$' is special and needs quoting */
+   if (*src == '\0')
+   strbuf_addch(sb, '\\');
+   strbuf_addch(sb, c);
+   break;
+
+   default:
+   strbuf_addch(sb, c);
+   break;
+   }
+   }
+}
diff --git a/quote.h b/quote.h
index 99e04d3..362d315 100644
--- a/quote.h
+++ b/quote.h
@@ -67,5 +67,6 @@ extern char *quote_path_relative(const char *in, const char 
*prefix,
 extern void perl_quote_buf(struct strbuf *sb, const char *src);
 extern void python_quote_buf(struct strbuf *sb, const char *src);
 extern void tcl_quote_buf(struct strbuf *sb, const char *src);
+extern void basic_regex_quote_buf(struct strbuf *sb, const char *src);
 
 #endif
diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh
index 63a2630..c945589 100755
--- a/t/t7812-grep-icase-non-ascii.sh
+++ b/t/t7812-grep-icase-non-ascii.sh
@@ -16,4 +16,30 @@ test_expect_success GETTEXT_LOCALE 'grep litera

[PATCH v2 0/9] icase match on non-ascii

2015-07-08 Thread Nguyễn Thái Ngọc Duy
This series fix case insensitive matching on non-ascii charsets.
"grep -i", "grep -F -i", "grep --pcre-regexp -i" and "log -i -S" are
fixed.

Side note, I almost added the third has_non_ascii() function. Maybe we
should consider merging the two existing has_non_ascii() functions
back, or rename one to something else.

Patch 5 is "funny". The patch itself is in iso-8859-1, but my name in
the commit message is in utf-8. Let's see how it goes on the apply
side. We may need to fix something in this area..

Nguyễn Thái Ngọc Duy (9):
  grep: allow -F -i combination
  grep: break down an "if" stmt in preparation for next changes
  grep/icase: avoid kwsset on literal non-ascii strings
  grep/icase: avoid kwsset when -F is specified
  grep/pcre: prepare locale-dependent tables for icase matching
  gettext: add is_utf8_locale()
  grep/pcre: support utf-8
  diffcore-pickaxe: "share" regex error handling code
  diffcore-pickaxe: support case insensitive match on non-ascii

 builtin/grep.c   |  2 +-
 diffcore-pickaxe.c   | 27 ++-
 gettext.c|  7 +++-
 gettext.h|  5 +++
 grep.c   | 43 ---
 grep.h   |  1 +
 quote.c  | 37 
 quote.h  |  1 +
 t/t7812-grep-icase-non-ascii.sh (new +x) | 58 
 t/t7813-grep-icase-non-ascii.sh (new +x) | 19 +++
 10 files changed, 186 insertions(+), 14 deletions(-)
 create mode 100755 t/t7812-grep-icase-non-ascii.sh
 create mode 100755 t/t7813-grep-icase-non-ascii.sh

-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 3/9] grep/icase: avoid kwsset on literal non-ascii strings

2015-07-08 Thread Nguyễn Thái Ngọc Duy
When we detect the pattern is just a literal string, we avoid heavy
regex engine and use fast substring search implemented in kwsset.c.
But kws uses git-ctype which is locale-independent so it does not know
how to fold case properly outside ascii range. Let regcomp or pcre
take care of this case instead. Slower, but accurate.

Helped-by: René Scharfe 
Noticed-by: Plamen Totev 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 grep.c   |  7 ++-
 t/t7812-grep-icase-non-ascii.sh (new +x) | 19 +++
 2 files changed, 25 insertions(+), 1 deletion(-)
 create mode 100755 t/t7812-grep-icase-non-ascii.sh

diff --git a/grep.c b/grep.c
index bd32f66..d795b0e 100644
--- a/grep.c
+++ b/grep.c
@@ -4,6 +4,7 @@
 #include "xdiff-interface.h"
 #include "diff.h"
 #include "diffcore.h"
+#include "commit.h"
 
 static int grep_source_load(struct grep_source *gs);
 static int grep_source_is_binary(struct grep_source *gs);
@@ -398,12 +399,16 @@ static int is_fixed(const char *s, size_t len)
 
 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 {
+   int icase_non_ascii;
int err;
 
p->word_regexp = opt->word_regexp;
p->ignore_case = opt->ignore_case;
+   icase_non_ascii =
+   (opt->regflags & REG_ICASE || p->ignore_case) &&
+   has_non_ascii(p->pattern);
 
-   if (is_fixed(p->pattern, p->patternlen))
+   if (!icase_non_ascii && is_fixed(p->pattern, p->patternlen))
p->fixed = 1;
else if (opt->fixed) {
p->fixed = 1;
diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh
new file mode 100755
index 000..63a2630
--- /dev/null
+++ b/t/t7812-grep-icase-non-ascii.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+test_description='grep icase on non-English locales'
+
+. ./lib-gettext.sh
+
+test_expect_success GETTEXT_LOCALE 'setup' '
+   printf "TILRAUN: Halló Heimur!" >file &&
+   git add file &&
+   LC_ALL="$is_IS_locale" &&
+   export LC_ALL
+'
+
+test_expect_success GETTEXT_LOCALE 'grep literal string, no -F' '
+   git grep -i "TILRAUN: Halló Heimur!" &&
+   git grep -i "TILRAUN: HALLÓ HEIMUR!"
+'
+
+test_done
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 9/9] diffcore-pickaxe: support case insensitive match on non-ascii

2015-07-08 Thread Nguyễn Thái Ngọc Duy
Similar to the "grep -F -i" case, we can't use kws on icase search
outside ascii range, quote we quote the string and pass it to regcomp
as a basic regexp and let regex engine deal with case sensitivity.

The new test is put in t7812 instead of t4209-log-pickaxe because
lib-gettext.sh might cause problems elsewhere, probably..

Noticed-by: Plamen Totev 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 diffcore-pickaxe.c  | 11 +++
 t/t7812-grep-icase-non-ascii.sh |  7 +++
 2 files changed, 18 insertions(+)

diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index 7a718fc..6946c15 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -7,6 +7,8 @@
 #include "diffcore.h"
 #include "xdiff-interface.h"
 #include "kwset.h"
+#include "commit.h"
+#include "quote.h"
 
 typedef int (*pickaxe_fn)(mmfile_t *one, mmfile_t *two,
  struct diff_options *o,
@@ -212,6 +214,15 @@ void diffcore_pickaxe(struct diff_options *o)
cflags |= REG_ICASE;
err = regcomp(®ex, needle, cflags);
regexp = ®ex;
+   } else if (DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE) &&
+  has_non_ascii(needle)) {
+   struct strbuf sb = STRBUF_INIT;
+   int cflags = REG_NEWLINE | REG_ICASE;
+
+   basic_regex_quote_buf(&sb, needle);
+   err = regcomp(®ex, sb.buf, cflags);
+   strbuf_release(&sb);
+   regexp = ®ex;
} else {
kws = kwsalloc(DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE)
   ? tolower_trans_tbl : NULL);
diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh
index 1306cc0..20a39d0 100755
--- a/t/t7812-grep-icase-non-ascii.sh
+++ b/t/t7812-grep-icase-non-ascii.sh
@@ -48,4 +48,11 @@ test_expect_success GETTEXT_LOCALE 'grep string with regex, 
with -F' '
test_cmp expect2 debug2
 '
 
+test_expect_success GETTEXT_LOCALE 'pickaxe -i on non-ascii' '
+   git commit -m first &&
+   git log --format=%f -i -S"TILRAUN: HALLÓ HEIMUR!" >actual &&
+   echo first >expected &&
+   test_cmp expected actual
+'
+
 test_done
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 2/9] grep: break down an "if" stmt in preparation for next changes

2015-07-08 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 grep.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/grep.c b/grep.c
index b58c7c6..bd32f66 100644
--- a/grep.c
+++ b/grep.c
@@ -403,9 +403,11 @@ static void compile_regexp(struct grep_pat *p, struct 
grep_opt *opt)
p->word_regexp = opt->word_regexp;
p->ignore_case = opt->ignore_case;
 
-   if (opt->fixed || is_fixed(p->pattern, p->patternlen))
+   if (is_fixed(p->pattern, p->patternlen))
p->fixed = 1;
-   else
+   else if (opt->fixed) {
+   p->fixed = 1;
+   } else
p->fixed = 0;
 
if (p->fixed) {
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 6/9] gettext: add is_utf8_locale()

2015-07-08 Thread Nguyễn Thái Ngọc Duy
This function returns true if git is running under an UTF-8
locale. pcre in the next patch will need this.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 gettext.c | 7 ++-
 gettext.h | 5 +
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/gettext.c b/gettext.c
index 7378ba2..601bc80 100644
--- a/gettext.c
+++ b/gettext.c
@@ -166,12 +166,17 @@ void git_setup_gettext(void)
textdomain("git");
 }
 
+int is_utf8_locale(void)
+{
+   return !strcmp(charset, "UTF-8");
+}
+
 /* return the number of columns of string 's' in current locale */
 int gettext_width(const char *s)
 {
static int is_utf8 = -1;
if (is_utf8 == -1)
-   is_utf8 = !strcmp(charset, "UTF-8");
+   is_utf8 = is_utf8_locale();
 
return is_utf8 ? utf8_strwidth(s) : strlen(s);
 }
diff --git a/gettext.h b/gettext.h
index 33696a4..5e733d4 100644
--- a/gettext.h
+++ b/gettext.h
@@ -31,6 +31,7 @@
 #ifndef NO_GETTEXT
 extern void git_setup_gettext(void);
 extern int gettext_width(const char *s);
+extern int is_utf8_locale(void);
 #else
 static inline void git_setup_gettext(void)
 {
@@ -39,6 +40,10 @@ static inline int gettext_width(const char *s)
 {
return strlen(s);
 }
+static inline int is_utf8_locale(void)
+{
+   return 0;
+}
 #endif
 
 #ifdef GETTEXT_POISON
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 7/9] grep/pcre: support utf-8

2015-07-08 Thread Nguyễn Thái Ngọc Duy
In the previous change in this function, we add locale support for
single-byte encodings only. It looks like pcre only supports utf-* as
multibyte encodings, the others are left in the cold (which is
fine). We need to enable PCRE_UTF8 so pcre can parse the string
correctly before folding case.

Noticed-by: Plamen Totev 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 grep.c  | 2 ++
 t/t7812-grep-icase-non-ascii.sh | 6 ++
 2 files changed, 8 insertions(+)

diff --git a/grep.c b/grep.c
index c79aa70..7c9e437 100644
--- a/grep.c
+++ b/grep.c
@@ -326,6 +326,8 @@ static void compile_pcre_regexp(struct grep_pat *p, const 
struct grep_opt *opt)
 
if (opt->ignore_case) {
p->pcre_tables = pcre_maketables();
+   if (is_utf8_locale())
+   options |= PCRE_UTF8;
options |= PCRE_CASELESS;
}
 
diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh
index c945589..1306cc0 100755
--- a/t/t7812-grep-icase-non-ascii.sh
+++ b/t/t7812-grep-icase-non-ascii.sh
@@ -16,6 +16,12 @@ test_expect_success GETTEXT_LOCALE 'grep literal string, no 
-F' '
git grep -i "TILRAUN: HALLÓ HEIMUR!"
 '
 
+test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre string' '
+   git grep --perl-regexp"TILRAUN: H.lló Heimur!" &&
+   git grep --perl-regexp -i "TILRAUN: H.lló Heimur!" &&
+   git grep --perl-regexp -i "TILRAUN: H.LLÓ HEIMUR!"
+'
+
 test_expect_success GETTEXT_LOCALE 'grep literal string, with -F' '
git grep --debug -i -F "TILRAUN: Halló Heimur!"  2>&1 >/dev/null |
 grep fixed >debug1 &&
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 1/9] grep: allow -F -i combination

2015-07-08 Thread Nguyễn Thái Ngọc Duy
-F means "no regex", not "case sensitive" so it should not override -i

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/grep.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index d04f440..2d392e9 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -806,7 +806,7 @@ int cmd_grep(int argc, const char **argv, const char 
*prefix)
 
if (!opt.pattern_list)
die(_("no pattern given."));
-   if (!opt.fixed && opt.ignore_case)
+   if (opt.ignore_case)
opt.regflags |= REG_ICASE;
 
compile_grep_patterns(&opt);
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 8/9] diffcore-pickaxe: "share" regex error handling code

2015-07-08 Thread Nguyễn Thái Ngọc Duy
There's another regcomp code block coming in this function. By moving
the error handling code out of this block, we don't have to add the
same error handling code in the new block.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 diffcore-pickaxe.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index 185f86b..7a718fc 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -204,20 +204,13 @@ void diffcore_pickaxe(struct diff_options *o)
int opts = o->pickaxe_opts;
regex_t regex, *regexp = NULL;
kwset_t kws = NULL;
+   int err = 0;
 
if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) {
-   int err;
int cflags = REG_EXTENDED | REG_NEWLINE;
if (DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE))
cflags |= REG_ICASE;
err = regcomp(®ex, needle, cflags);
-   if (err) {
-   /* The POSIX.2 people are surely sick */
-   char errbuf[1024];
-   regerror(err, ®ex, errbuf, 1024);
-   regfree(®ex);
-   die("invalid regex: %s", errbuf);
-   }
regexp = ®ex;
} else {
kws = kwsalloc(DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE)
@@ -225,6 +218,13 @@ void diffcore_pickaxe(struct diff_options *o)
kwsincr(kws, needle, strlen(needle));
kwsprep(kws);
}
+   if (err) {
+   /* The POSIX.2 people are surely sick */
+   char errbuf[1024];
+   regerror(err, ®ex, errbuf, 1024);
+   regfree(®ex);
+   die("invalid regex: %s", errbuf);
+   }
 
/* Might want to warn when both S and G are on; I don't care... */
pickaxe(&diff_queued_diff, o, regexp, kws,
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 5/9] grep/pcre: prepare locale-dependent tables for icase matching

2015-07-08 Thread Nguyễn Thái Ngọc Duy
The default tables are usually built with C locale and only suitable
for LANG=C or similar.  This should make case insensitive search work
correctly for all single-byte charsets.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 grep.c   |  7 +--
 grep.h   |  1 +
 t/t7813-grep-icase-non-ascii.sh (new +x) | 19 +++
 3 files changed, 25 insertions(+), 2 deletions(-)
 create mode 100755 t/t7813-grep-icase-non-ascii.sh

diff --git a/grep.c b/grep.c
index 8fce54f..c79aa70 100644
--- a/grep.c
+++ b/grep.c
@@ -324,11 +324,13 @@ static void compile_pcre_regexp(struct grep_pat *p, const 
struct grep_opt *opt)
int erroffset;
int options = PCRE_MULTILINE;
 
-   if (opt->ignore_case)
+   if (opt->ignore_case) {
+   p->pcre_tables = pcre_maketables();
options |= PCRE_CASELESS;
+   }
 
p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
-   NULL);
+ p->pcre_tables);
if (!p->pcre_regexp)
compile_regexp_failed(p, error);
 
@@ -362,6 +364,7 @@ static void free_pcre_regexp(struct grep_pat *p)
 {
pcre_free(p->pcre_regexp);
pcre_free(p->pcre_extra_info);
+   pcre_free((void *)p->pcre_tables);
 }
 #else /* !USE_LIBPCRE */
 static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
diff --git a/grep.h b/grep.h
index 95f197a..cee4357 100644
--- a/grep.h
+++ b/grep.h
@@ -48,6 +48,7 @@ struct grep_pat {
regex_t regexp;
pcre *pcre_regexp;
pcre_extra *pcre_extra_info;
+   const unsigned char *pcre_tables;
kwset_t kws;
unsigned fixed:1;
unsigned ignore_case:1;
diff --git a/t/t7813-grep-icase-non-ascii.sh b/t/t7813-grep-icase-non-ascii.sh
new file mode 100755
index 000..efef7fb
--- /dev/null
+++ b/t/t7813-grep-icase-non-ascii.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+test_description='grep icase on non-English locales'
+
+. ./lib-gettext.sh
+
+test_expect_success GETTEXT_ISO_LOCALE 'setup' '
+   printf "TILRAUN: Hall� Heimur!" >file &&
+   git add file &&
+   LC_ALL="$is_IS_iso_locale" &&
+   export LC_ALL
+'
+
+test_expect_success GETTEXT_ISO_LOCALE,LIBPCRE 'grep pcre string' '
+   git grep --perl-regexp -i "TILRAUN: H.ll� Heimur!" &&
+   git grep --perl-regexp -i "TILRAUN: H.LL� HEIMUR!"
+'
+
+test_done
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] pack-objects: rename the field "type" to "real_type"

2015-07-08 Thread Nguyễn Thái Ngọc Duy
This is to avoid the too generic name "type" and harmonize with the
naming in index-pack. There's a subtle difference though: real_type in
index-pack is what the upper level see, no delta types (after delta
resolution). But real_type in pack-objects is the type to be written in
the pack, delta types are fine (it's actually markers for reused deltas)

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/pack-objects.c | 36 ++--
 pack-bitmap-write.c|  6 +++---
 pack-objects.h |  2 +-
 3 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 80fe8c7..e03bf3e 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -244,7 +244,7 @@ static unsigned long write_no_reuse_object(struct sha1file 
*f, struct object_ent
struct git_istream *st = NULL;
 
if (!usable_delta) {
-   if (entry->type == OBJ_BLOB &&
+   if (entry->real_type == OBJ_BLOB &&
entry->size > big_file_threshold &&
(st = open_istream(entry->idx.sha1, &type, &size, NULL)) != 
NULL)
buf = NULL;
@@ -348,7 +348,7 @@ static unsigned long write_reuse_object(struct sha1file *f, 
struct object_entry
struct pack_window *w_curs = NULL;
struct revindex_entry *revidx;
off_t offset;
-   enum object_type type = entry->type;
+   enum object_type type = entry->real_type;
unsigned long datalen;
unsigned char header[10], dheader[10];
unsigned hdrlen;
@@ -452,11 +452,11 @@ static unsigned long write_object(struct sha1file *f,
to_reuse = 0;   /* explicit */
else if (!entry->in_pack)
to_reuse = 0;   /* can't reuse what we don't have */
-   else if (entry->type == OBJ_REF_DELTA || entry->type == OBJ_OFS_DELTA)
+   else if (entry->real_type == OBJ_REF_DELTA || entry->real_type == 
OBJ_OFS_DELTA)
/* check_object() decided it for us ... */
to_reuse = usable_delta;
/* ... but pack split may override that */
-   else if (entry->type != entry->in_pack_type)
+   else if (entry->real_type != entry->in_pack_type)
to_reuse = 0;   /* pack has delta which is unusable */
else if (entry->delta)
to_reuse = 0;   /* we want to pack afresh */
@@ -676,8 +676,8 @@ static struct object_entry **compute_write_order(void)
 * And then all remaining commits and tags.
 */
for (i = last_untagged; i < to_pack.nr_objects; i++) {
-   if (objects[i].type != OBJ_COMMIT &&
-   objects[i].type != OBJ_TAG)
+   if (objects[i].real_type != OBJ_COMMIT &&
+   objects[i].real_type != OBJ_TAG)
continue;
add_to_write_order(wo, &wo_end, &objects[i]);
}
@@ -686,7 +686,7 @@ static struct object_entry **compute_write_order(void)
 * And then all the trees.
 */
for (i = last_untagged; i < to_pack.nr_objects; i++) {
-   if (objects[i].type != OBJ_TREE)
+   if (objects[i].real_type != OBJ_TREE)
continue;
add_to_write_order(wo, &wo_end, &objects[i]);
}
@@ -994,7 +994,7 @@ static void create_object_entry(const unsigned char *sha1,
entry = packlist_alloc(&to_pack, sha1, index_pos);
entry->hash = hash;
if (type)
-   entry->type = type;
+   entry->real_type = type;
if (exclude)
entry->preferred_base = 1;
else
@@ -1355,9 +1355,9 @@ static void check_object(struct object_entry *entry)
switch (entry->in_pack_type) {
default:
/* Not a delta hence we've already got all we need. */
-   entry->type = entry->in_pack_type;
+   entry->real_type = entry->in_pack_type;
entry->in_pack_header_size = used;
-   if (entry->type < OBJ_COMMIT || entry->type > OBJ_BLOB)
+   if (entry->real_type < OBJ_COMMIT || entry->real_type > 
OBJ_BLOB)
goto give_up;
unuse_pack(&w_curs);
return;
@@ -1411,7 +1411,7 @@ static void check_object(struct object_entry *entry)
 * deltify other objects against, in order to avoid
 * circular deltas.
 */
-   entry->type = entry->in_pack_type;
+   entry->real_type = entry->in_pack_type;
   

[PATCH 1/2] index-pack: rename the field "type" to "in_pack_type"

2015-07-08 Thread Nguyễn Thái Ngọc Duy
We have two types in this code: in-pack and canonical. "in_pack_type"
makes it clearer than plain "type".

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/index-pack.c | 42 +-
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 48fa472..797e571 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -19,7 +19,7 @@ struct object_entry {
struct pack_idx_entry idx;
unsigned long size;
unsigned char hdr_size;
-   signed char type;
+   signed char in_pack_type;
signed char real_type;
 };
 
@@ -493,7 +493,7 @@ static void *unpack_raw_entry(struct object_entry *obj,
p = fill(1);
c = *p;
use(1);
-   obj->type = (c >> 4) & 7;
+   obj->in_pack_type = (c >> 4) & 7;
size = (c & 15);
shift = 4;
while (c & 0x80) {
@@ -505,7 +505,7 @@ static void *unpack_raw_entry(struct object_entry *obj,
}
obj->size = size;
 
-   switch (obj->type) {
+   switch (obj->in_pack_type) {
case OBJ_REF_DELTA:
hashcpy(ref_sha1, fill(20));
use(20);
@@ -534,11 +534,11 @@ static void *unpack_raw_entry(struct object_entry *obj,
case OBJ_TAG:
break;
default:
-   bad_object(obj->idx.offset, _("unknown object type %d"), 
obj->type);
+   bad_object(obj->idx.offset, _("unknown object type %d"), 
obj->in_pack_type);
}
obj->hdr_size = consumed_bytes - obj->idx.offset;
 
-   data = unpack_entry_data(obj->idx.offset, obj->size, obj->type, sha1);
+   data = unpack_entry_data(obj->idx.offset, obj->size, obj->in_pack_type, 
sha1);
obj->idx.crc32 = input_crc32;
return data;
 }
@@ -631,7 +631,7 @@ static int find_ofs_delta(const off_t offset, enum 
object_type type)
int cmp;
 
cmp = compare_ofs_delta_bases(offset, delta->offset,
- type, 
objects[delta->obj_no].type);
+ type, 
objects[delta->obj_no].in_pack_type);
if (!cmp)
return next;
if (cmp < 0) {
@@ -685,7 +685,7 @@ static int find_ref_delta(const unsigned char *sha1, enum 
object_type type)
int cmp;
 
cmp = compare_ref_delta_bases(sha1, delta->sha1,
- type, 
objects[delta->obj_no].type);
+ type, 
objects[delta->obj_no].in_pack_type);
if (!cmp)
return next;
if (cmp < 0) {
@@ -759,7 +759,7 @@ static int check_collison(struct object_entry *entry)
enum object_type type;
unsigned long size;
 
-   if (entry->size <= big_file_threshold || entry->type != OBJ_BLOB)
+   if (entry->size <= big_file_threshold || entry->in_pack_type != 
OBJ_BLOB)
return -1;
 
memset(&data, 0, sizeof(data));
@@ -767,7 +767,7 @@ static int check_collison(struct object_entry *entry)
data.st = open_istream(entry->idx.sha1, &type, &size, NULL);
if (!data.st)
return -1;
-   if (size != entry->size || type != entry->type)
+   if (size != entry->size || type != entry->in_pack_type)
die(_("SHA1 COLLISION FOUND WITH %s !"),
sha1_to_hex(entry->idx.sha1));
unpack_data(entry, compare_objects, &data);
@@ -891,7 +891,7 @@ static void *get_base_data(struct base_data *c)
struct base_data **delta = NULL;
int delta_nr = 0, delta_alloc = 0;
 
-   while (is_delta_type(c->obj->type) && !c->data) {
+   while (is_delta_type(c->obj->in_pack_type) && !c->data) {
ALLOC_GROW(delta, delta_nr + 1, delta_alloc);
delta[delta_nr++] = c;
c = c->base;
@@ -1085,7 +1085,7 @@ static void *threaded_second_pass(void *data)
counter_unlock();
work_lock();
while (nr_dispatched < nr_objects &&
-  is_delta_type(objects[nr_dispatched].type))
+  is_delta_type(objects[nr_dispatched].in_pack_type))
nr_dispatched++;
if (nr_dispatched >= nr_objects) {
work_unlock();
@@ -1121,12 +1121,12 @@ static void parse_pack_objects(unsigned char *sha1)
struct object_entry *obj = &objects[i];
void *data = unpack_raw_entry(obj, &ofs_delta->offset,

[PATCH v3 0/9] icase match on non-ascii

2015-07-14 Thread Nguyễn Thái Ngọc Duy
Compared to v2

 - fix grep/pcre on utf-8 even in case is sensitive
 - peek at $LANG and friends anyway for utf-8 detection even
   when gettext support is not built in git
 - s/quote we quote/so we quote/ in 9/9
 - rename t7813, s/non-ascii/iso/

Nguyễn Thái Ngọc Duy (9):
  grep: allow -F -i combination
  grep: break down an "if" stmt in preparation for next changes
  grep/icase: avoid kwsset on literal non-ascii strings
  grep/icase: avoid kwsset when -F is specified
  grep/pcre: prepare locale-dependent tables for icase matching
  gettext: add is_utf8_locale()
  grep/pcre: support utf-8
  diffcore-pickaxe: "share" regex error handling code
  diffcore-pickaxe: support case insensitive match on non-ascii

 builtin/grep.c   |  2 +-
 diffcore-pickaxe.c   | 27 +
 gettext.c| 24 +++-
 gettext.h|  1 +
 grep.c   | 44 +++--
 grep.h   |  1 +
 quote.c  | 37 ++
 quote.h  |  1 +
 t/t7812-grep-icase-non-ascii.sh (new +x) | 67 
 t/t7813-grep-icase-iso.sh (new +x)   | 19 +
 10 files changed, 208 insertions(+), 15 deletions(-)
 create mode 100755 t/t7812-grep-icase-non-ascii.sh
 create mode 100755 t/t7813-grep-icase-iso.sh

-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 1/9] grep: allow -F -i combination

2015-07-14 Thread Nguyễn Thái Ngọc Duy
-F means "no regex", not "case sensitive" so it should not override -i

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/grep.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index d04f440..2d392e9 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -806,7 +806,7 @@ int cmd_grep(int argc, const char **argv, const char 
*prefix)
 
if (!opt.pattern_list)
die(_("no pattern given."));
-   if (!opt.fixed && opt.ignore_case)
+   if (opt.ignore_case)
opt.regflags |= REG_ICASE;
 
compile_grep_patterns(&opt);
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 2/9] grep: break down an "if" stmt in preparation for next changes

2015-07-14 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 grep.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/grep.c b/grep.c
index b58c7c6..bd32f66 100644
--- a/grep.c
+++ b/grep.c
@@ -403,9 +403,11 @@ static void compile_regexp(struct grep_pat *p, struct 
grep_opt *opt)
p->word_regexp = opt->word_regexp;
p->ignore_case = opt->ignore_case;
 
-   if (opt->fixed || is_fixed(p->pattern, p->patternlen))
+   if (is_fixed(p->pattern, p->patternlen))
p->fixed = 1;
-   else
+   else if (opt->fixed) {
+   p->fixed = 1;
+   } else
p->fixed = 0;
 
if (p->fixed) {
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 4/9] grep/icase: avoid kwsset when -F is specified

2015-07-14 Thread Nguyễn Thái Ngọc Duy
Similar to the previous commit, we can't use kws on icase search
outside ascii range. But we can't simply pass the pattern to
regcomp/pcre like the previous commit because it may contain regex
special characters, so we need to quote the regex first.

To avoid misquote traps that could lead to undefined behavior, we
always stick to basic regex engine in this case. We don't need fancy
features for grepping a literal string anyway.

basic_regex_quote_buf() assumes that if the pattern is in a multibyte
encoding, ascii chars must be unambiguously encoded as single
bytes. This is true at least for UTF-8. For others, let's wait until
people yell up. Chances are nobody uses multibyte, non utf-8 charsets
any more..

Helped-by: René Scharfe 
Noticed-by: Plamen Totev 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 grep.c  | 25 -
 quote.c | 37 +
 quote.h |  1 +
 t/t7812-grep-icase-non-ascii.sh | 26 ++
 4 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/grep.c b/grep.c
index d795b0e..8fce54f 100644
--- a/grep.c
+++ b/grep.c
@@ -5,6 +5,7 @@
 #include "diff.h"
 #include "diffcore.h"
 #include "commit.h"
+#include "quote.h"
 
 static int grep_source_load(struct grep_source *gs);
 static int grep_source_is_binary(struct grep_source *gs);
@@ -397,6 +398,24 @@ static int is_fixed(const char *s, size_t len)
return 1;
 }
 
+static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
+{
+   struct strbuf sb = STRBUF_INIT;
+   int err;
+
+   basic_regex_quote_buf(&sb, p->pattern);
+   err = regcomp(&p->regexp, sb.buf, opt->regflags & ~REG_EXTENDED);
+   if (opt->debug)
+   fprintf(stderr, "fixed%s\n", sb.buf);
+   strbuf_release(&sb);
+   if (err) {
+   char errbuf[1024];
+   regerror(err, &p->regexp, errbuf, 1024);
+   regfree(&p->regexp);
+   compile_regexp_failed(p, errbuf);
+   }
+}
+
 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 {
int icase_non_ascii;
@@ -411,7 +430,11 @@ static void compile_regexp(struct grep_pat *p, struct 
grep_opt *opt)
if (!icase_non_ascii && is_fixed(p->pattern, p->patternlen))
p->fixed = 1;
else if (opt->fixed) {
-   p->fixed = 1;
+   p->fixed = !icase_non_ascii;
+   if (!p->fixed) {
+   compile_fixed_regexp(p, opt);
+   return;
+   }
} else
p->fixed = 0;
 
diff --git a/quote.c b/quote.c
index 7920e18..43a8057 100644
--- a/quote.c
+++ b/quote.c
@@ -439,3 +439,40 @@ void tcl_quote_buf(struct strbuf *sb, const char *src)
}
strbuf_addch(sb, '"');
 }
+
+void basic_regex_quote_buf(struct strbuf *sb, const char *src)
+{
+   char c;
+
+   if (*src == '^') {
+   /* only beginning '^' is special and needs quoting */
+   strbuf_addch(sb, '\\');
+   strbuf_addch(sb, *src++);
+   }
+   if (*src == '*')
+   /* beginning '*' is not special, no quoting */
+   strbuf_addch(sb, *src++);
+
+   while ((c = *src++)) {
+   switch (c) {
+   case '[':
+   case '.':
+   case '\\':
+   case '*':
+   strbuf_addch(sb, '\\');
+   strbuf_addch(sb, c);
+   break;
+
+   case '$':
+   /* only the end '$' is special and needs quoting */
+   if (*src == '\0')
+   strbuf_addch(sb, '\\');
+   strbuf_addch(sb, c);
+   break;
+
+   default:
+   strbuf_addch(sb, c);
+   break;
+   }
+   }
+}
diff --git a/quote.h b/quote.h
index 99e04d3..362d315 100644
--- a/quote.h
+++ b/quote.h
@@ -67,5 +67,6 @@ extern char *quote_path_relative(const char *in, const char 
*prefix,
 extern void perl_quote_buf(struct strbuf *sb, const char *src);
 extern void python_quote_buf(struct strbuf *sb, const char *src);
 extern void tcl_quote_buf(struct strbuf *sb, const char *src);
+extern void basic_regex_quote_buf(struct strbuf *sb, const char *src);
 
 #endif
diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh
index 63a2630..c945589 100755
--- a/t/t7812-grep-icase-non-ascii.sh
+++ b/t/t7812-grep-icase-non-ascii.sh
@@ -16,4 +16,30 @@ test_expect_success GETTEXT_LOCALE 'grep litera

[PATCH v3 3/9] grep/icase: avoid kwsset on literal non-ascii strings

2015-07-14 Thread Nguyễn Thái Ngọc Duy
When we detect the pattern is just a literal string, we avoid heavy
regex engine and use fast substring search implemented in kwsset.c.
But kws uses git-ctype which is locale-independent so it does not know
how to fold case properly outside ascii range. Let regcomp or pcre
take care of this case instead. Slower, but accurate.

Helped-by: René Scharfe 
Noticed-by: Plamen Totev 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 grep.c   |  7 ++-
 t/t7812-grep-icase-non-ascii.sh (new +x) | 19 +++
 2 files changed, 25 insertions(+), 1 deletion(-)
 create mode 100755 t/t7812-grep-icase-non-ascii.sh

diff --git a/grep.c b/grep.c
index bd32f66..d795b0e 100644
--- a/grep.c
+++ b/grep.c
@@ -4,6 +4,7 @@
 #include "xdiff-interface.h"
 #include "diff.h"
 #include "diffcore.h"
+#include "commit.h"
 
 static int grep_source_load(struct grep_source *gs);
 static int grep_source_is_binary(struct grep_source *gs);
@@ -398,12 +399,16 @@ static int is_fixed(const char *s, size_t len)
 
 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 {
+   int icase_non_ascii;
int err;
 
p->word_regexp = opt->word_regexp;
p->ignore_case = opt->ignore_case;
+   icase_non_ascii =
+   (opt->regflags & REG_ICASE || p->ignore_case) &&
+   has_non_ascii(p->pattern);
 
-   if (is_fixed(p->pattern, p->patternlen))
+   if (!icase_non_ascii && is_fixed(p->pattern, p->patternlen))
p->fixed = 1;
else if (opt->fixed) {
p->fixed = 1;
diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh
new file mode 100755
index 000..63a2630
--- /dev/null
+++ b/t/t7812-grep-icase-non-ascii.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+test_description='grep icase on non-English locales'
+
+. ./lib-gettext.sh
+
+test_expect_success GETTEXT_LOCALE 'setup' '
+   printf "TILRAUN: Halló Heimur!" >file &&
+   git add file &&
+   LC_ALL="$is_IS_locale" &&
+   export LC_ALL
+'
+
+test_expect_success GETTEXT_LOCALE 'grep literal string, no -F' '
+   git grep -i "TILRAUN: Halló Heimur!" &&
+   git grep -i "TILRAUN: HALLÓ HEIMUR!"
+'
+
+test_done
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 5/9] grep/pcre: prepare locale-dependent tables for icase matching

2015-07-14 Thread Nguyễn Thái Ngọc Duy
The default tables are usually built with C locale and only suitable
for LANG=C or similar.  This should make case insensitive search work
correctly for all single-byte charsets.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 grep.c |  8 ++--
 grep.h |  1 +
 t/t7813-grep-icase-iso.sh (new +x) | 19 +++
 3 files changed, 26 insertions(+), 2 deletions(-)
 create mode 100755 t/t7813-grep-icase-iso.sh

diff --git a/grep.c b/grep.c
index 8fce54f..f0fbf99 100644
--- a/grep.c
+++ b/grep.c
@@ -324,11 +324,14 @@ static void compile_pcre_regexp(struct grep_pat *p, const 
struct grep_opt *opt)
int erroffset;
int options = PCRE_MULTILINE;
 
-   if (opt->ignore_case)
+   if (opt->ignore_case) {
+   if (has_non_ascii(p->pattern))
+   p->pcre_tables = pcre_maketables();
options |= PCRE_CASELESS;
+   }
 
p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
-   NULL);
+ p->pcre_tables);
if (!p->pcre_regexp)
compile_regexp_failed(p, error);
 
@@ -362,6 +365,7 @@ static void free_pcre_regexp(struct grep_pat *p)
 {
pcre_free(p->pcre_regexp);
pcre_free(p->pcre_extra_info);
+   pcre_free((void *)p->pcre_tables);
 }
 #else /* !USE_LIBPCRE */
 static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
diff --git a/grep.h b/grep.h
index 95f197a..cee4357 100644
--- a/grep.h
+++ b/grep.h
@@ -48,6 +48,7 @@ struct grep_pat {
regex_t regexp;
pcre *pcre_regexp;
pcre_extra *pcre_extra_info;
+   const unsigned char *pcre_tables;
kwset_t kws;
unsigned fixed:1;
unsigned ignore_case:1;
diff --git a/t/t7813-grep-icase-iso.sh b/t/t7813-grep-icase-iso.sh
new file mode 100755
index 000..efef7fb
--- /dev/null
+++ b/t/t7813-grep-icase-iso.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+test_description='grep icase on non-English locales'
+
+. ./lib-gettext.sh
+
+test_expect_success GETTEXT_ISO_LOCALE 'setup' '
+   printf "TILRAUN: Hall� Heimur!" >file &&
+   git add file &&
+   LC_ALL="$is_IS_iso_locale" &&
+   export LC_ALL
+'
+
+test_expect_success GETTEXT_ISO_LOCALE,LIBPCRE 'grep pcre string' '
+   git grep --perl-regexp -i "TILRAUN: H.ll� Heimur!" &&
+   git grep --perl-regexp -i "TILRAUN: H.LL� HEIMUR!"
+'
+
+test_done
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 7/9] grep/pcre: support utf-8

2015-07-14 Thread Nguyễn Thái Ngọc Duy
In the previous change in this function, we add locale support for
single-byte encodings only. It looks like pcre only supports utf-* as
multibyte encodings, the others are left in the cold (which is
fine).

We need to enable PCRE_UTF8 so pcre can find character boundary
correctly. It's needed for case folding (when --ignore-case is used)
or '*', '+' or similar syntax is used.

The "has_non_ascii()" check is to be on the conservative side. If
there's non-ascii in the pattern, the searched content could still be
in utf-8, but we can treat it just like a byte stream and everything
should work. If we force utf-8 based on locale only and pcre validates
utf-8 and the file content is in non-utf8 encoding, things break.

Noticed-by: Plamen Totev 
Helped-by: Plamen Totev 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 grep.c  |  2 ++
 t/t7812-grep-icase-non-ascii.sh | 15 +++
 2 files changed, 17 insertions(+)

diff --git a/grep.c b/grep.c
index f0fbf99..07621c1 100644
--- a/grep.c
+++ b/grep.c
@@ -329,6 +329,8 @@ static void compile_pcre_regexp(struct grep_pat *p, const 
struct grep_opt *opt)
p->pcre_tables = pcre_maketables();
options |= PCRE_CASELESS;
}
+   if (is_utf8_locale() && has_non_ascii(p->pattern))
+   options |= PCRE_UTF8;
 
p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
  p->pcre_tables);
diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh
index c945589..e861a15 100755
--- a/t/t7812-grep-icase-non-ascii.sh
+++ b/t/t7812-grep-icase-non-ascii.sh
@@ -16,6 +16,21 @@ test_expect_success GETTEXT_LOCALE 'grep literal string, no 
-F' '
git grep -i "TILRAUN: HALLÓ HEIMUR!"
 '
 
+test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre utf-8 icase' '
+   git grep --perl-regexp"TILRAUN: H.lló Heimur!" &&
+   git grep --perl-regexp -i "TILRAUN: H.lló Heimur!" &&
+   git grep --perl-regexp -i "TILRAUN: H.LLÓ HEIMUR!"
+'
+
+test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre utf-8 string with "+"' '
+   printf "TILRAUN: Hallóó Heimur!" >file2 &&
+   git add file2 &&
+   git grep -l --perl-regexp "TILRAUN: H.lló+ Heimur!" >actual &&
+   echo file >expected &&
+   echo file2 >>expected &&
+   test_cmp expected actual
+'
+
 test_expect_success GETTEXT_LOCALE 'grep literal string, with -F' '
git grep --debug -i -F "TILRAUN: Halló Heimur!"  2>&1 >/dev/null |
 grep fixed >debug1 &&
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 6/9] gettext: add is_utf8_locale()

2015-07-14 Thread Nguyễn Thái Ngọc Duy
This function returns true if git is running under an UTF-8
locale. pcre in the next patch will need this.

is_encoding_utf8() is used instead of strcmp() to catch both "utf-8"
and "utf8" suffixes.

When built with no gettext support, we peek in several env variables
to detect UTF-8. pcre library might support utf-8 even if libc or git
is built without locale support.. The peeking code is a copy from
compat/regex/regcomp.c.

Helped-by: Torsten Bögershausen 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 gettext.c | 24 ++--
 gettext.h |  1 +
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/gettext.c b/gettext.c
index 7378ba2..aaf1688 100644
--- a/gettext.c
+++ b/gettext.c
@@ -18,6 +18,8 @@
 #  endif
 #endif
 
+static const char *charset;
+
 /*
  * Guess the user's preferred languages from the value in LANGUAGE environment
  * variable and LC_MESSAGES locale category if NO_GETTEXT is not defined.
@@ -65,7 +67,6 @@ static int test_vsnprintf(const char *fmt, ...)
return ret;
 }
 
-static const char *charset;
 static void init_gettext_charset(const char *domain)
 {
/*
@@ -171,8 +172,27 @@ int gettext_width(const char *s)
 {
static int is_utf8 = -1;
if (is_utf8 == -1)
-   is_utf8 = !strcmp(charset, "UTF-8");
+   is_utf8 = is_utf8_locale();
 
return is_utf8 ? utf8_strwidth(s) : strlen(s);
 }
 #endif
+
+int is_utf8_locale(void)
+{
+#ifdef NO_GETTEXT
+   if (!charset) {
+   const char *env = getenv("LC_ALL");
+   if (!env || !*env)
+   env = getenv("LC_CTYPE");
+   if (!env || !*env)
+   env = getenv("LANG");
+   if (!env)
+   env = "";
+   if (strchr(env, '.'))
+   env = strchr(env, '.') + 1;
+   charset = xstrdup(env);
+   }
+#endif
+   return is_encoding_utf8(charset);
+}
diff --git a/gettext.h b/gettext.h
index 33696a4..7eee64a 100644
--- a/gettext.h
+++ b/gettext.h
@@ -90,5 +90,6 @@ const char *Q_(const char *msgid, const char *plu, unsigned 
long n)
 #endif
 
 const char *get_preferred_languages(void);
+extern int is_utf8_locale(void);
 
 #endif
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 8/9] diffcore-pickaxe: "share" regex error handling code

2015-07-14 Thread Nguyễn Thái Ngọc Duy
There's another regcomp code block coming in this function. By moving
the error handling code out of this block, we don't have to add the
same error handling code in the new block.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 diffcore-pickaxe.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index 185f86b..7a718fc 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -204,20 +204,13 @@ void diffcore_pickaxe(struct diff_options *o)
int opts = o->pickaxe_opts;
regex_t regex, *regexp = NULL;
kwset_t kws = NULL;
+   int err = 0;
 
if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) {
-   int err;
int cflags = REG_EXTENDED | REG_NEWLINE;
if (DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE))
cflags |= REG_ICASE;
err = regcomp(®ex, needle, cflags);
-   if (err) {
-   /* The POSIX.2 people are surely sick */
-   char errbuf[1024];
-   regerror(err, ®ex, errbuf, 1024);
-   regfree(®ex);
-   die("invalid regex: %s", errbuf);
-   }
regexp = ®ex;
} else {
kws = kwsalloc(DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE)
@@ -225,6 +218,13 @@ void diffcore_pickaxe(struct diff_options *o)
kwsincr(kws, needle, strlen(needle));
kwsprep(kws);
}
+   if (err) {
+   /* The POSIX.2 people are surely sick */
+   char errbuf[1024];
+   regerror(err, ®ex, errbuf, 1024);
+   regfree(®ex);
+   die("invalid regex: %s", errbuf);
+   }
 
/* Might want to warn when both S and G are on; I don't care... */
pickaxe(&diff_queued_diff, o, regexp, kws,
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 9/9] diffcore-pickaxe: support case insensitive match on non-ascii

2015-07-14 Thread Nguyễn Thái Ngọc Duy
Similar to the "grep -F -i" case, we can't use kws on icase search
outside ascii range, so we quote the string and pass it to regcomp as
a basic regexp and let regex engine deal with case sensitivity.

The new test is put in t7812 instead of t4209-log-pickaxe because
lib-gettext.sh might cause problems elsewhere, probably..

Noticed-by: Plamen Totev 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 diffcore-pickaxe.c  | 11 +++
 t/t7812-grep-icase-non-ascii.sh |  7 +++
 2 files changed, 18 insertions(+)

diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index 7a718fc..6946c15 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -7,6 +7,8 @@
 #include "diffcore.h"
 #include "xdiff-interface.h"
 #include "kwset.h"
+#include "commit.h"
+#include "quote.h"
 
 typedef int (*pickaxe_fn)(mmfile_t *one, mmfile_t *two,
  struct diff_options *o,
@@ -212,6 +214,15 @@ void diffcore_pickaxe(struct diff_options *o)
cflags |= REG_ICASE;
err = regcomp(®ex, needle, cflags);
regexp = ®ex;
+   } else if (DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE) &&
+  has_non_ascii(needle)) {
+   struct strbuf sb = STRBUF_INIT;
+   int cflags = REG_NEWLINE | REG_ICASE;
+
+   basic_regex_quote_buf(&sb, needle);
+   err = regcomp(®ex, sb.buf, cflags);
+   strbuf_release(&sb);
+   regexp = ®ex;
} else {
kws = kwsalloc(DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE)
   ? tolower_trans_tbl : NULL);
diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh
index e861a15..d07fa20 100755
--- a/t/t7812-grep-icase-non-ascii.sh
+++ b/t/t7812-grep-icase-non-ascii.sh
@@ -57,4 +57,11 @@ test_expect_success GETTEXT_LOCALE 'grep string with regex, 
with -F' '
test_cmp expect2 debug2
 '
 
+test_expect_success GETTEXT_LOCALE 'pickaxe -i on non-ascii' '
+   git commit -m first &&
+   git log --format=%f -i -S"TILRAUN: HALLÓ HEIMUR!" >actual &&
+   echo first >expected &&
+   test_cmp expected actual
+'
+
 test_done
-- 
2.3.0.rc1.137.g477eb31

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 00/11] nd/icase updates

2016-06-23 Thread Nguyễn Thái Ngọc Duy
This update drops 1/12, which is an unnecessary change, and changes a
couple of echo/printf to test_write_lines. One of those echo uses
backlashes and causes problems with Debian dash.

Interdiff therefore is not really interesting

diff --git a/builtin/grep.c b/builtin/grep.c
index 46c5ba1..8c516a9 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -809,7 +809,7 @@ int cmd_grep(int argc, const char **argv, const char 
*prefix)
 
if (!opt.pattern_list)
die(_("no pattern given."));
-   if (opt.ignore_case)
+   if (!opt.fixed && opt.ignore_case)
opt.regflags |= REG_ICASE;
 
compile_grep_patterns(&opt);
diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh
index 4176625..169fd8d 100755
--- a/t/t7812-grep-icase-non-ascii.sh
+++ b/t/t7812-grep-icase-non-ascii.sh
@@ -5,7 +5,7 @@ test_description='grep icase on non-English locales'
 . ./lib-gettext.sh
 
 test_expect_success GETTEXT_LOCALE 'setup' '
-   printf "TILRAUN: Halló Heimur!" >file &&
+   test_write_lines "TILRAUN: Halló Heimur!" >file &&
git add file &&
LC_ALL="$is_IS_locale" &&
export LC_ALL
@@ -27,7 +27,7 @@ test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre utf-8 
icase' '
 '
 
 test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre utf-8 string with "+"' '
-   printf "TILRAUN: Hallóó Heimur!" >file2 &&
+   test_write_lines "TILRAUN: Hallóó Heimur!" >file2 &&
git add file2 &&
git grep -l --perl-regexp "TILRAUN: H.lló+ Heimur!" >actual &&
echo file >expected &&
@@ -38,26 +38,26 @@ test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre utf-8 
string with "+"' '
 test_expect_success REGEX_LOCALE 'grep literal string, with -F' '
git grep --debug -i -F "TILRAUN: Halló Heimur!"  2>&1 >/dev/null |
 grep fixed >debug1 &&
-   echo "fixed TILRAUN: Halló Heimur!" >expect1 &&
+   test_write_lines "fixed TILRAUN: Halló Heimur!" >expect1 &&
test_cmp expect1 debug1 &&
 
git grep --debug -i -F "TILRAUN: HALLÓ HEIMUR!"  2>&1 >/dev/null |
 grep fixed >debug2 &&
-   echo "fixed TILRAUN: HALLÓ HEIMUR!" >expect2 &&
+   test_write_lines "fixed TILRAUN: HALLÓ HEIMUR!" >expect2 &&
test_cmp expect2 debug2
 '
 
 test_expect_success REGEX_LOCALE 'grep string with regex, with -F' '
-   printf "^*TILR^AUN:.* \\Halló \$He[]imur!\$" >file &&
+   test_write_lines "^*TILR^AUN:.* \\Halló \$He[]imur!\$" >file &&
 
git grep --debug -i -F "^*TILR^AUN:.* \\Halló \$He[]imur!\$" 2>&1 
>/dev/null |
 grep fixed >debug1 &&
-   echo "fixed \\^*TILR^AUN:\\.\\* Halló \$He\\[]imur!\\\$" >expect1 &&
+   test_write_lines "fixed \\^*TILR^AUN:\\.\\* Halló 
\$He\\[]imur!\\\$" >expect1 &&
test_cmp expect1 debug1 &&
 
git grep --debug -i -F "^*TILR^AUN:.* \\HALLÓ \$HE[]IMUR!\$"  2>&1 
>/dev/null |
 grep fixed >debug2 &&
-   echo "fixed \\^*TILR^AUN:\\.\\* HALLÓ \$HE\\[]IMUR!\\\$" >expect2 &&
+   test_write_lines "fixed \\^*TILR^AUN:\\.\\* HALLÓ 
\$HE\\[]IMUR!\\\$" >expect2 &&
test_cmp expect2 debug2
 '
 

Nguyễn Thái Ngọc Duy (11):
  grep: break down an "if" stmt in preparation for next changes
  test-regex: isolate the bug test code
  test-regex: expose full regcomp() to the command line
  grep/icase: avoid kwsset on literal non-ascii strings
  grep/icase: avoid kwsset when -F is specified
  grep/pcre: prepare locale-dependent tables for icase matching
  gettext: add is_utf8_locale()
  grep/pcre: support utf-8
  diffcore-pickaxe: "share" regex error handling code
  diffcore-pickaxe: support case insensitive match on non-ascii
  grep.c: reuse "icase" variable

 diffcore-pickaxe.c   | 27 
 gettext.c| 24 ++-
 gettext.h|  1 +
 grep.c   | 47 +
 grep.h   |  1 +
 quote.c  | 37 +
 quote.h  |  1 +
 t/t0070-fundamental.sh   |  2 +-
 t/t7812-grep-icase-non-ascii.sh (new +x) | 71 
 t/t7813-grep-icase-iso.sh (new +x)   | 19 +
 test-regex.c | 59 +-
 11 files changed, 269 insertions(+), 20 deletions(-)
 create mode 100755 t/t7812-grep-icase-non-ascii.sh
 create mode 100755 t/t7813-grep-icase-iso.sh

-- 
2.8.2.526.g02eed6d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/11] grep: break down an "if" stmt in preparation for next changes

2016-06-23 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy 
Signed-off-by: Junio C Hamano 
---
 grep.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/grep.c b/grep.c
index 7b2b96a..609f218 100644
--- a/grep.c
+++ b/grep.c
@@ -403,7 +403,9 @@ static void compile_regexp(struct grep_pat *p, struct 
grep_opt *opt)
p->word_regexp = opt->word_regexp;
p->ignore_case = opt->ignore_case;
 
-   if (opt->fixed || is_fixed(p->pattern, p->patternlen))
+   if (opt->fixed) {
+   p->fixed = 1;
+   } else if (is_fixed(p->pattern, p->patternlen))
p->fixed = 1;
else
p->fixed = 0;
-- 
2.8.2.526.g02eed6d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 05/11] grep/icase: avoid kwsset when -F is specified

2016-06-23 Thread Nguyễn Thái Ngọc Duy
Similar to the previous commit, we can't use kws on icase search
outside ascii range. But we can't simply pass the pattern to
regcomp/pcre like the previous commit because it may contain regex
special characters, so we need to quote the regex first.

To avoid misquote traps that could lead to undefined behavior, we
always stick to basic regex engine in this case. We don't need fancy
features for grepping a literal string anyway.

basic_regex_quote_buf() assumes that if the pattern is in a multibyte
encoding, ascii chars must be unambiguously encoded as single
bytes. This is true at least for UTF-8. For others, let's wait until
people yell up. Chances are nobody uses multibyte, non utf-8 charsets
anymore.

Noticed-by: Plamen Totev 
Helped-by: René Scharfe 
Helped-by: Eric Sunshine 
Signed-off-by: Nguyễn Thái Ngọc Duy 
Signed-off-by: Junio C Hamano 
---
 grep.c  | 25 -
 quote.c | 37 +
 quote.h |  1 +
 t/t7812-grep-icase-non-ascii.sh | 26 ++
 4 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/grep.c b/grep.c
index f192727..6de58f3 100644
--- a/grep.c
+++ b/grep.c
@@ -5,6 +5,7 @@
 #include "diff.h"
 #include "diffcore.h"
 #include "commit.h"
+#include "quote.h"
 
 static int grep_source_load(struct grep_source *gs);
 static int grep_source_is_binary(struct grep_source *gs);
@@ -397,6 +398,24 @@ static int is_fixed(const char *s, size_t len)
return 1;
 }
 
+static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
+{
+   struct strbuf sb = STRBUF_INIT;
+   int err;
+
+   basic_regex_quote_buf(&sb, p->pattern);
+   err = regcomp(&p->regexp, sb.buf, opt->regflags & ~REG_EXTENDED);
+   if (opt->debug)
+   fprintf(stderr, "fixed %s\n", sb.buf);
+   strbuf_release(&sb);
+   if (err) {
+   char errbuf[1024];
+   regerror(err, &p->regexp, errbuf, sizeof(errbuf));
+   regfree(&p->regexp);
+   compile_regexp_failed(p, errbuf);
+   }
+}
+
 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 {
int icase, ascii_only;
@@ -408,7 +427,11 @@ static void compile_regexp(struct grep_pat *p, struct 
grep_opt *opt)
ascii_only = !has_non_ascii(p->pattern);
 
if (opt->fixed) {
-   p->fixed = 1;
+   p->fixed = !icase || ascii_only;
+   if (!p->fixed) {
+   compile_fixed_regexp(p, opt);
+   return;
+   }
} else if ((!icase || ascii_only) &&
   is_fixed(p->pattern, p->patternlen))
p->fixed = 1;
diff --git a/quote.c b/quote.c
index fe884d2..c67adb7 100644
--- a/quote.c
+++ b/quote.c
@@ -440,3 +440,40 @@ void tcl_quote_buf(struct strbuf *sb, const char *src)
}
strbuf_addch(sb, '"');
 }
+
+void basic_regex_quote_buf(struct strbuf *sb, const char *src)
+{
+   char c;
+
+   if (*src == '^') {
+   /* only beginning '^' is special and needs quoting */
+   strbuf_addch(sb, '\\');
+   strbuf_addch(sb, *src++);
+   }
+   if (*src == '*')
+   /* beginning '*' is not special, no quoting */
+   strbuf_addch(sb, *src++);
+
+   while ((c = *src++)) {
+   switch (c) {
+   case '[':
+   case '.':
+   case '\\':
+   case '*':
+   strbuf_addch(sb, '\\');
+   strbuf_addch(sb, c);
+   break;
+
+   case '$':
+   /* only the end '$' is special and needs quoting */
+   if (*src == '\0')
+   strbuf_addch(sb, '\\');
+   strbuf_addch(sb, c);
+   break;
+
+   default:
+   strbuf_addch(sb, c);
+   break;
+   }
+   }
+}
diff --git a/quote.h b/quote.h
index 99e04d3..362d315 100644
--- a/quote.h
+++ b/quote.h
@@ -67,5 +67,6 @@ extern char *quote_path_relative(const char *in, const char 
*prefix,
 extern void perl_quote_buf(struct strbuf *sb, const char *src);
 extern void python_quote_buf(struct strbuf *sb, const char *src);
 extern void tcl_quote_buf(struct strbuf *sb, const char *src);
+extern void basic_regex_quote_buf(struct strbuf *sb, const char *src);
 
 #endif
diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh
index b78a774..1929809 100755
--- a/t/t7812-grep-icase-non-ascii.sh
+++ b/t/t781

[PATCH 04/11] grep/icase: avoid kwsset on literal non-ascii strings

2016-06-23 Thread Nguyễn Thái Ngọc Duy
When we detect the pattern is just a literal string, we avoid heavy
regex engine and use fast substring search implemented in kwsset.c.
But kws uses git-ctype which is locale-independent so it does not know
how to fold case properly outside ascii range. Let regcomp or pcre
take care of this case instead. Slower, but accurate.

Noticed-by: Plamen Totev 
Helped-by: René Scharfe 
Helped-by: Eric Sunshine 
Signed-off-by: Nguyễn Thái Ngọc Duy 
Signed-off-by: Junio C Hamano 
---
 grep.c   |  7 ++-
 t/t7812-grep-icase-non-ascii.sh (new +x) | 23 +++
 2 files changed, 29 insertions(+), 1 deletion(-)
 create mode 100755 t/t7812-grep-icase-non-ascii.sh

diff --git a/grep.c b/grep.c
index 609f218..f192727 100644
--- a/grep.c
+++ b/grep.c
@@ -4,6 +4,7 @@
 #include "xdiff-interface.h"
 #include "diff.h"
 #include "diffcore.h"
+#include "commit.h"
 
 static int grep_source_load(struct grep_source *gs);
 static int grep_source_is_binary(struct grep_source *gs);
@@ -398,14 +399,18 @@ static int is_fixed(const char *s, size_t len)
 
 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 {
+   int icase, ascii_only;
int err;
 
p->word_regexp = opt->word_regexp;
p->ignore_case = opt->ignore_case;
+   icase  = opt->regflags & REG_ICASE || p->ignore_case;
+   ascii_only = !has_non_ascii(p->pattern);
 
if (opt->fixed) {
p->fixed = 1;
-   } else if (is_fixed(p->pattern, p->patternlen))
+   } else if ((!icase || ascii_only) &&
+  is_fixed(p->pattern, p->patternlen))
p->fixed = 1;
else
p->fixed = 0;
diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh
new file mode 100755
index 000..b78a774
--- /dev/null
+++ b/t/t7812-grep-icase-non-ascii.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+test_description='grep icase on non-English locales'
+
+. ./lib-gettext.sh
+
+test_expect_success GETTEXT_LOCALE 'setup' '
+   test_write_lines "TILRAUN: Halló Heimur!" >file &&
+   git add file &&
+   LC_ALL="$is_IS_locale" &&
+   export LC_ALL
+'
+
+test_have_prereq GETTEXT_LOCALE &&
+test-regex "HALLÓ" "Halló" ICASE &&
+test_set_prereq REGEX_LOCALE
+
+test_expect_success REGEX_LOCALE 'grep literal string, no -F' '
+   git grep -i "TILRAUN: Halló Heimur!" &&
+   git grep -i "TILRAUN: HALLÓ HEIMUR!"
+'
+
+test_done
-- 
2.8.2.526.g02eed6d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/11] test-regex: isolate the bug test code

2016-06-23 Thread Nguyễn Thái Ngọc Duy
This is in preparation to turn test-regex into some generic regex
testing command.

Helped-by: Eric Sunshine 
Helped-by: Ramsay Jones 
Signed-off-by: Nguyễn Thái Ngọc Duy 
Signed-off-by: Junio C Hamano 
---
 t/t0070-fundamental.sh |  2 +-
 test-regex.c   | 12 ++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/t/t0070-fundamental.sh b/t/t0070-fundamental.sh
index 5ed69a6..991ed2a 100755
--- a/t/t0070-fundamental.sh
+++ b/t/t0070-fundamental.sh
@@ -31,7 +31,7 @@ test_expect_success 'git_mkstemps_mode does not fail if fd 0 
is not open' '
 
 test_expect_success 'check for a bug in the regex routines' '
# if this test fails, re-build git with NO_REGEX=1
-   test-regex
+   test-regex --bug
 '
 
 test_done
diff --git a/test-regex.c b/test-regex.c
index 0dc598e..67a1a65 100644
--- a/test-regex.c
+++ b/test-regex.c
@@ -1,6 +1,6 @@
 #include "git-compat-util.h"
 
-int main(int argc, char **argv)
+static int test_regex_bug(void)
 {
char *pat = "[^={} \t]+";
char *str = "={}\nfred";
@@ -16,5 +16,13 @@ int main(int argc, char **argv)
if (m[0].rm_so == 3) /* matches '\n' when it should not */
die("regex bug confirmed: re-build git with NO_REGEX=1");
 
-   exit(0);
+   return 0;
+}
+
+int main(int argc, char **argv)
+{
+   if (argc == 2 && !strcmp(argv[1], "--bug"))
+   return test_regex_bug();
+   else
+   usage("test-regex --bug");
 }
-- 
2.8.2.526.g02eed6d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 08/11] grep/pcre: support utf-8

2016-06-23 Thread Nguyễn Thái Ngọc Duy
In the previous change in this function, we add locale support for
single-byte encodings only. It looks like pcre only supports utf-* as
multibyte encodings, the others are left in the cold (which is
fine).

We need to enable PCRE_UTF8 so pcre can find character boundary
correctly. It's needed for case folding (when --ignore-case is used)
or '*', '+' or similar syntax is used.

The "has_non_ascii()" check is to be on the conservative side. If
there's non-ascii in the pattern, the searched content could still be
in utf-8, but we can treat it just like a byte stream and everything
should work. If we force utf-8 based on locale only and pcre validates
utf-8 and the file content is in non-utf8 encoding, things break.

Noticed-by: Plamen Totev 
Helped-by: Plamen Totev 
Signed-off-by: Nguyễn Thái Ngọc Duy 
Signed-off-by: Junio C Hamano 
---
 grep.c  |  2 ++
 t/t7812-grep-icase-non-ascii.sh | 15 +++
 2 files changed, 17 insertions(+)

diff --git a/grep.c b/grep.c
index 22f4d99..6e99b01 100644
--- a/grep.c
+++ b/grep.c
@@ -329,6 +329,8 @@ static void compile_pcre_regexp(struct grep_pat *p, const 
struct grep_opt *opt)
p->pcre_tables = pcre_maketables();
options |= PCRE_CASELESS;
}
+   if (is_utf8_locale() && has_non_ascii(p->pattern))
+   options |= PCRE_UTF8;
 
p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
  p->pcre_tables);
diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh
index 1929809..08ae4c9 100755
--- a/t/t7812-grep-icase-non-ascii.sh
+++ b/t/t7812-grep-icase-non-ascii.sh
@@ -20,6 +20,21 @@ test_expect_success REGEX_LOCALE 'grep literal string, no 
-F' '
git grep -i "TILRAUN: HALLÓ HEIMUR!"
 '
 
+test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre utf-8 icase' '
+   git grep --perl-regexp"TILRAUN: H.lló Heimur!" &&
+   git grep --perl-regexp -i "TILRAUN: H.lló Heimur!" &&
+   git grep --perl-regexp -i "TILRAUN: H.LLÓ HEIMUR!"
+'
+
+test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre utf-8 string with "+"' '
+   test_write_lines "TILRAUN: Hallóó Heimur!" >file2 &&
+   git add file2 &&
+   git grep -l --perl-regexp "TILRAUN: H.lló+ Heimur!" >actual &&
+   echo file >expected &&
+   echo file2 >>expected &&
+   test_cmp expected actual
+'
+
 test_expect_success REGEX_LOCALE 'grep literal string, with -F' '
git grep --debug -i -F "TILRAUN: Halló Heimur!"  2>&1 >/dev/null |
 grep fixed >debug1 &&
-- 
2.8.2.526.g02eed6d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 09/11] diffcore-pickaxe: "share" regex error handling code

2016-06-23 Thread Nguyễn Thái Ngọc Duy
There's another regcomp code block coming in this function. By moving
the error handling code out of this block, we don't have to add the
same error handling code in the new block.

Signed-off-by: Nguyễn Thái Ngọc Duy 
Signed-off-by: Junio C Hamano 
---
 diffcore-pickaxe.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index 7715c13..69c6567 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -204,20 +204,13 @@ void diffcore_pickaxe(struct diff_options *o)
int opts = o->pickaxe_opts;
regex_t regex, *regexp = NULL;
kwset_t kws = NULL;
+   int err = 0;
 
if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) {
-   int err;
int cflags = REG_EXTENDED | REG_NEWLINE;
if (DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE))
cflags |= REG_ICASE;
err = regcomp(®ex, needle, cflags);
-   if (err) {
-   /* The POSIX.2 people are surely sick */
-   char errbuf[1024];
-   regerror(err, ®ex, errbuf, 1024);
-   regfree(®ex);
-   die("invalid regex: %s", errbuf);
-   }
regexp = ®ex;
} else {
kws = kwsalloc(DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE)
@@ -225,6 +218,13 @@ void diffcore_pickaxe(struct diff_options *o)
kwsincr(kws, needle, strlen(needle));
kwsprep(kws);
}
+   if (err) {
+   /* The POSIX.2 people are surely sick */
+   char errbuf[1024];
+   regerror(err, ®ex, errbuf, 1024);
+   regfree(®ex);
+   die("invalid regex: %s", errbuf);
+   }
 
/* Might want to warn when both S and G are on; I don't care... */
pickaxe(&diff_queued_diff, o, regexp, kws,
-- 
2.8.2.526.g02eed6d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/11] test-regex: expose full regcomp() to the command line

2016-06-23 Thread Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy 
Signed-off-by: Junio C Hamano 
---
 test-regex.c | 51 +--
 1 file changed, 49 insertions(+), 2 deletions(-)

diff --git a/test-regex.c b/test-regex.c
index 67a1a65..eff26f5 100644
--- a/test-regex.c
+++ b/test-regex.c
@@ -1,4 +1,21 @@
 #include "git-compat-util.h"
+#include "gettext.h"
+
+struct reg_flag {
+   const char *name;
+   int flag;
+};
+
+static struct reg_flag reg_flags[] = {
+   { "EXTENDED",REG_EXTENDED   },
+   { "NEWLINE", REG_NEWLINE},
+   { "ICASE",   REG_ICASE  },
+   { "NOTBOL",  REG_NOTBOL },
+#ifdef REG_STARTEND
+   { "STARTEND",REG_STARTEND   },
+#endif
+   { NULL, 0 }
+};
 
 static int test_regex_bug(void)
 {
@@ -21,8 +38,38 @@ static int test_regex_bug(void)
 
 int main(int argc, char **argv)
 {
+   const char *pat;
+   const char *str;
+   int flags = 0;
+   regex_t r;
+   regmatch_t m[1];
+
if (argc == 2 && !strcmp(argv[1], "--bug"))
return test_regex_bug();
-   else
-   usage("test-regex --bug");
+   else if (argc < 3)
+   usage("test-regex --bug\n"
+ "test-regex   []");
+
+   argv++;
+   pat = *argv++;
+   str = *argv++;
+   while (*argv) {
+   struct reg_flag *rf;
+   for (rf = reg_flags; rf->name; rf++)
+   if (!strcmp(*argv, rf->name)) {
+   flags |= rf->flag;
+   break;
+   }
+   if (!rf->name)
+   die("do not recognize %s", *argv);
+   argv++;
+   }
+   git_setup_gettext();
+
+   if (regcomp(&r, pat, flags))
+   die("failed regcomp() for pattern '%s'", pat);
+   if (regexec(&r, str, 1, m, 0))
+   return 1;
+
+   return 0;
 }
-- 
2.8.2.526.g02eed6d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


  1   2   3   4   5   6   7   8   9   10   >