[PATCH v10 6/6] http: respect protocol.*.allow=user for http-alternates

2016-12-14 Thread Brandon Williams
From: Jeff King 

The http-walker may fetch the http-alternates (or
alternates) file from a remote in order to find more
objects. This should count as a "not from the user" use of
the protocol. But because we implement the redirection
ourselves and feed the new URL to curl, it will use the
CURLOPT_PROTOCOLS rules, not the more restrictive
CURLOPT_REDIR_PROTOCOLS.

The ideal solution would be for each curl request we make to
know whether or not is directly from the user or part of an
alternates redirect, and then set CURLOPT_PROTOCOLS as
appropriate. However, that would require plumbing that
information through all of the various layers of the http
code.

Instead, let's check the protocol at the source: when we are
parsing the remote http-alternates file. The only downside
is that if there's any mismatch between what protocol we
think it is versus what curl thinks it is, it could violate
the policy.

To address this, we'll make the parsing err on the picky
side, and only allow protocols that it can parse
definitively. So for example, you can't elude the "http"
policy by asking for "HTTP://", even though curl might
handle it; we would reject it as unknown. The only unsafe
case would be if you have a URL that starts with "http://";
but curl interprets as another protocol. That seems like an
unlikely failure mode (and we are still protected by our
base CURLOPT_PROTOCOL setting, so the worst you could do is
trigger one of https, ftp, or ftps).

Signed-off-by: Jeff King 
---
 http-walker.c  | 52 --
 t/t5550-http-fetch-dumb.sh | 10 +
 2 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/http-walker.c b/http-walker.c
index c2f81cd..b34b6ac 100644
--- a/http-walker.c
+++ b/http-walker.c
@@ -3,6 +3,7 @@
 #include "walker.h"
 #include "http.h"
 #include "list.h"
+#include "transport.h"
 
 struct alt_base {
char *base;
@@ -160,6 +161,32 @@ static void prefetch(struct walker *walker, unsigned char 
*sha1)
 #endif
 }
 
+static int is_alternate_allowed(const char *url)
+{
+   const char *protocols[] = {
+   "http", "https", "ftp", "ftps"
+   };
+   int i;
+
+   for (i = 0; i < ARRAY_SIZE(protocols); i++) {
+   const char *end;
+   if (skip_prefix(url, protocols[i], &end) &&
+   starts_with(end, "://"))
+   break;
+   }
+
+   if (i >= ARRAY_SIZE(protocols)) {
+   warning("ignoring alternate with unknown protocol: %s", url);
+   return 0;
+   }
+   if (!is_transport_allowed(protocols[i], 0)) {
+   warning("ignoring alternate with restricted protocol: %s", url);
+   return 0;
+   }
+
+   return 1;
+}
+
 static void process_alternates_response(void *callback_data)
 {
struct alternates_request *alt_req =
@@ -274,17 +301,20 @@ static void process_alternates_response(void 
*callback_data)
struct strbuf target = STRBUF_INIT;
strbuf_add(&target, base, serverlen);
strbuf_add(&target, data + i, posn - i - 7);
-   warning("adding alternate object store: %s",
-   target.buf);
-   newalt = xmalloc(sizeof(*newalt));
-   newalt->next = NULL;
-   newalt->base = strbuf_detach(&target, NULL);
-   newalt->got_indices = 0;
-   newalt->packs = NULL;
-
-   while (tail->next != NULL)
-   tail = tail->next;
-   tail->next = newalt;
+
+   if (is_alternate_allowed(target.buf)) {
+   warning("adding alternate object store: 
%s",
+   target.buf);
+   newalt = xmalloc(sizeof(*newalt));
+   newalt->next = NULL;
+   newalt->base = strbuf_detach(&target, 
NULL);
+   newalt->got_indices = 0;
+   newalt->packs = NULL;
+
+   while (tail->next != NULL)
+   tail = tail->next;
+   tail->next = newalt;
+   }
}
}
i = posn + 1;
diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh
index 22011f0..c0ee29c 100755
--- a/t/t5550-http-fetch-dumb.sh
+++ b/t/t5550-http-fetch-dumb.sh
@@ -360,5 +360,15 @@ test_expect_success 'http-alternates cannot point at funny 
protocols' '
clone "$HTTPD_URL/dumb/evil.git" evil-file
 '
 
+test_e

[PATCH v10 0/6] transport protocol policy configuration

2016-12-14 Thread Brandon Williams
v10 of this series fixes the following:
* A few updates to the commit messages in order to better convey the reasoning
  behind the a few of the patches.
* Additional test to verify that curl redirects respect configured protocol
  policies.
* Patch added by Jeff King to make http alternates respect configured
  protocol policies.

Brandon Williams (5):
  lib-proto-disable: variable name fix
  http: always warn if libcurl version is too old
  transport: add protocol policy config option
  http: create function to get curl allowed protocols
  transport: add from_user parameter to is_transport_allowed

Jeff King (1):
  http: respect protocol.*.allow=user for http-alternates

 Documentation/config.txt |  46 +
 Documentation/git.txt|  38 ---
 git-submodule.sh |  12 ++--
 http-walker.c|  52 +++---
 http.c   |  36 ++
 t/lib-proto-disable.sh   | 142 ---
 t/t5509-fetch-push-namespaces.sh |   1 +
 t/t5550-http-fetch-dumb.sh   |  10 +++
 t/t5802-connect-helper.sh|   1 +
 t/t5812-proto-disable-http.sh|   7 ++
 transport.c  |  84 ---
 transport.h  |  19 +++---
 12 files changed, 363 insertions(+), 85 deletions(-)

-- 
2.8.0.rc3.226.g39d4020



[PATCH v10 5/6] transport: add from_user parameter to is_transport_allowed

2016-12-14 Thread Brandon Williams
Add a from_user parameter to is_transport_allowed() to allow http to be
able to distinguish between protocol restrictions for redirects versus
initial requests.  CURLOPT_REDIR_PROTOCOLS can now be set differently
from CURLOPT_PROTOCOLS to disallow use of protocols with the "user"
policy in redirects.

This change allows callers to query if a transport protocol is allowed,
given that the caller knows that the protocol is coming from the user
(1) or not from the user (0) such as redirects in libcurl.  If unknown a
-1 should be provided which falls back to reading
`GIT_PROTOCOL_FROM_USER` to determine if the protocol came from the
user.

Signed-off-by: Brandon Williams 
---
 http.c| 14 +++---
 t/t5812-proto-disable-http.sh |  7 +++
 transport.c   |  8 +---
 transport.h   | 13 ++---
 4 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/http.c b/http.c
index f7c488a..2208269 100644
--- a/http.c
+++ b/http.c
@@ -489,17 +489,17 @@ static void set_curl_keepalive(CURL *c)
 }
 #endif
 
-static long get_curl_allowed_protocols(void)
+static long get_curl_allowed_protocols(int from_user)
 {
long allowed_protocols = 0;
 
-   if (is_transport_allowed("http"))
+   if (is_transport_allowed("http", from_user))
allowed_protocols |= CURLPROTO_HTTP;
-   if (is_transport_allowed("https"))
+   if (is_transport_allowed("https", from_user))
allowed_protocols |= CURLPROTO_HTTPS;
-   if (is_transport_allowed("ftp"))
+   if (is_transport_allowed("ftp", from_user))
allowed_protocols |= CURLPROTO_FTP;
-   if (is_transport_allowed("ftps"))
+   if (is_transport_allowed("ftps", from_user))
allowed_protocols |= CURLPROTO_FTPS;
 
return allowed_protocols;
@@ -588,9 +588,9 @@ static CURL *get_curl_handle(void)
 #endif
 #if LIBCURL_VERSION_NUM >= 0x071304
curl_easy_setopt(result, CURLOPT_REDIR_PROTOCOLS,
-get_curl_allowed_protocols());
+get_curl_allowed_protocols(0));
curl_easy_setopt(result, CURLOPT_PROTOCOLS,
-get_curl_allowed_protocols());
+get_curl_allowed_protocols(-1));
 #else
warning("protocol restrictions not applied to curl redirects because\n"
"your curl version is too old (>= 7.19.4)");
diff --git a/t/t5812-proto-disable-http.sh b/t/t5812-proto-disable-http.sh
index 044cc15..d911afd 100755
--- a/t/t5812-proto-disable-http.sh
+++ b/t/t5812-proto-disable-http.sh
@@ -30,5 +30,12 @@ test_expect_success 'curl limits redirects' '
test_must_fail git clone "$HTTPD_URL/loop-redir/smart/repo.git"
 '
 
+test_expect_success 'http can be limited to from-user' '
+   git -c protocol.http.allow=user \
+   clone "$HTTPD_URL/smart/repo.git" plain.git &&
+   test_must_fail git -c protocol.http.allow=user \
+   clone "$HTTPD_URL/smart-redir-perm/repo.git" redir.git
+'
+
 stop_httpd
 test_done
diff --git a/transport.c b/transport.c
index fbd799d..f50c31a 100644
--- a/transport.c
+++ b/transport.c
@@ -676,7 +676,7 @@ static enum protocol_allow_config get_protocol_config(const 
char *type)
return PROTOCOL_ALLOW_USER_ONLY;
 }
 
-int is_transport_allowed(const char *type)
+int is_transport_allowed(const char *type, int from_user)
 {
const struct string_list *whitelist = protocol_whitelist();
if (whitelist)
@@ -688,7 +688,9 @@ int is_transport_allowed(const char *type)
case PROTOCOL_ALLOW_NEVER:
return 0;
case PROTOCOL_ALLOW_USER_ONLY:
-   return git_env_bool("GIT_PROTOCOL_FROM_USER", 1);
+   if (from_user < 0)
+   from_user = git_env_bool("GIT_PROTOCOL_FROM_USER", 1);
+   return from_user;
}
 
die("BUG: invalid protocol_allow_config type");
@@ -696,7 +698,7 @@ int is_transport_allowed(const char *type)
 
 void transport_check_allowed(const char *type)
 {
-   if (!is_transport_allowed(type))
+   if (!is_transport_allowed(type, -1))
die("transport '%s' not allowed", type);
 }
 
diff --git a/transport.h b/transport.h
index 3396e1d..4f1c801 100644
--- a/transport.h
+++ b/transport.h
@@ -142,10 +142,17 @@ struct transport {
 struct transport *transport_get(struct remote *, const char *);
 
 /*
- * Check whether a transport is allowed by the environment. Type should
- * generally be the URL scheme, as described in Documentation/git.txt
+ * Check whether a transport is allowed by the environment.
+ *
+ * Type should generally be the URL scheme, as described in
+ * Documentation/git.txt
+ *
+ * from

[PATCH v10 1/6] lib-proto-disable: variable name fix

2016-12-14 Thread Brandon Williams
The test_proto function assigns the positional parameters to named
variables, but then still refers to "$desc" as "$1". Using $desc is
more readable and less error-prone.

Signed-off-by: Brandon Williams 
---
 t/lib-proto-disable.sh | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/t/lib-proto-disable.sh b/t/lib-proto-disable.sh
index b0917d9..be88e9a 100644
--- a/t/lib-proto-disable.sh
+++ b/t/lib-proto-disable.sh
@@ -9,7 +9,7 @@ test_proto () {
proto=$2
url=$3
 
-   test_expect_success "clone $1 (enabled)" '
+   test_expect_success "clone $desc (enabled)" '
rm -rf tmp.git &&
(
GIT_ALLOW_PROTOCOL=$proto &&
@@ -18,7 +18,7 @@ test_proto () {
)
'
 
-   test_expect_success "fetch $1 (enabled)" '
+   test_expect_success "fetch $desc (enabled)" '
(
cd tmp.git &&
GIT_ALLOW_PROTOCOL=$proto &&
@@ -27,7 +27,7 @@ test_proto () {
)
'
 
-   test_expect_success "push $1 (enabled)" '
+   test_expect_success "push $desc (enabled)" '
(
cd tmp.git &&
GIT_ALLOW_PROTOCOL=$proto &&
@@ -36,7 +36,7 @@ test_proto () {
)
'
 
-   test_expect_success "push $1 (disabled)" '
+   test_expect_success "push $desc (disabled)" '
(
cd tmp.git &&
GIT_ALLOW_PROTOCOL=none &&
@@ -45,7 +45,7 @@ test_proto () {
)
'
 
-   test_expect_success "fetch $1 (disabled)" '
+   test_expect_success "fetch $desc (disabled)" '
(
cd tmp.git &&
GIT_ALLOW_PROTOCOL=none &&
@@ -54,7 +54,7 @@ test_proto () {
)
'
 
-   test_expect_success "clone $1 (disabled)" '
+   test_expect_success "clone $desc (disabled)" '
rm -rf tmp.git &&
(
GIT_ALLOW_PROTOCOL=none &&
-- 
2.8.0.rc3.226.g39d4020



Re: [PATCHv3 3/4] submodule: add flags to ok_to_remove_submodule

2016-12-14 Thread Brandon Williams
On 12/14, Stefan Beller wrote:
> In different contexts the question whether deleting a submodule is ok
> to remove may be answered differently.

This sentence is oddly worded.  Maybe this:

  In different context the question "Is it ok to delete a submodule?"
  may be answered differently.

-- 
Brandon Williams


[PATCH v7 2/7] submodules: add helper to determine if a submodule is initialized

2016-12-16 Thread Brandon Williams
Add the `is_submodule_initialized()` helper function to submodules.c.
`is_submodule_initialized()` performs a check to determine if the
submodule at the given path has been initialized.

Signed-off-by: Brandon Williams 
---
 submodule.c | 23 +++
 submodule.h |  1 +
 2 files changed, 24 insertions(+)

diff --git a/submodule.c b/submodule.c
index ee3198d..edffaa1 100644
--- a/submodule.c
+++ b/submodule.c
@@ -199,6 +199,29 @@ void gitmodules_config(void)
 }
 
 /*
+ * Determine if a submodule has been initialized at a given 'path'
+ */
+int is_submodule_initialized(const char *path)
+{
+   int ret = 0;
+   const struct submodule *module = NULL;
+
+   module = submodule_from_path(null_sha1, path);
+
+   if (module) {
+   char *key = xstrfmt("submodule.%s.url", module->name);
+   char *value = NULL;
+
+   ret = !git_config_get_string(key, &value);
+
+   free(value);
+   free(key);
+   }
+
+   return ret;
+}
+
+/*
  * Determine if a submodule has been populated at a given 'path'
  */
 int is_submodule_populated(const char *path)
diff --git a/submodule.h b/submodule.h
index c4af505..6ec5f2f 100644
--- a/submodule.h
+++ b/submodule.h
@@ -37,6 +37,7 @@ void set_diffopt_flags_from_submodule_config(struct 
diff_options *diffopt,
const char *path);
 int submodule_config(const char *var, const char *value, void *cb);
 void gitmodules_config(void);
+extern int is_submodule_initialized(const char *path);
 extern int is_submodule_populated(const char *path);
 int parse_submodule_update_strategy(const char *value,
struct submodule_update_strategy *dst);
-- 
2.8.0.rc3.226.g39d4020



[PATCH v7 0/7] recursively grep across submodules

2016-12-16 Thread Brandon Williams
Changes in v7:
* Rebased on 'origin/bw/realpath-wo-chdir' in order to fix the race condition
  that occurs when verifying a submodule's gitdir.
* Reverted is_submodule_populated() to use resolve_gitdir() now that there is
  no race condition.
* Added !MINGW to a test in t7814 so that it won't run on windows.  This is due
  to testing if colons in filenames are still handled correctly, yet windows
  doesn't allow colons in filenames.

Brandon Williams (7):
  submodules: add helper to determine if a submodule is populated
  submodules: add helper to determine if a submodule is initialized
  submodules: load gitmodules file from commit sha1
  grep: add submodules as a grep source type
  grep: optionally recurse into submodules
  grep: enable recurse-submodules to work on  objects
  grep: search history of moved submodules

 Documentation/git-grep.txt |  14 ++
 builtin/grep.c | 386 ++---
 cache.h|   2 +
 config.c   |   8 +-
 git.c  |   2 +-
 grep.c |  16 +-
 grep.h |   1 +
 submodule-config.c |   6 +-
 submodule-config.h |   3 +
 submodule.c|  50 +
 submodule.h|   3 +
 t/t7814-grep-recurse-submodules.sh | 241 +++
 tree-walk.c|  28 +++
 13 files changed, 729 insertions(+), 31 deletions(-)
 create mode 100755 t/t7814-grep-recurse-submodules.sh

-- 
2.8.0.rc3.226.g39d4020



[PATCH v7 3/7] submodules: load gitmodules file from commit sha1

2016-12-16 Thread Brandon Williams
teach submodules to load a '.gitmodules' file from a commit sha1.  This
enables the population of the submodule_cache to be based on the state
of the '.gitmodules' file from a particular commit.

Signed-off-by: Brandon Williams 
---
 cache.h|  2 ++
 config.c   |  8 
 submodule-config.c |  6 +++---
 submodule-config.h |  3 +++
 submodule.c| 12 
 submodule.h|  1 +
 6 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/cache.h b/cache.h
index e12a5d9..de237ca 100644
--- a/cache.h
+++ b/cache.h
@@ -1693,6 +1693,8 @@ extern int git_default_config(const char *, const char *, 
void *);
 extern int git_config_from_file(config_fn_t fn, const char *, void *);
 extern int git_config_from_mem(config_fn_t fn, const enum config_origin_type,
const char *name, const char *buf, 
size_t len, void *data);
+extern int git_config_from_blob_sha1(config_fn_t fn, const char *name,
+const unsigned char *sha1, void *data);
 extern void git_config_push_parameter(const char *text);
 extern int git_config_from_parameters(config_fn_t fn, void *data);
 extern void git_config(config_fn_t fn, void *);
diff --git a/config.c b/config.c
index 83fdecb..4d78e72 100644
--- a/config.c
+++ b/config.c
@@ -1214,10 +1214,10 @@ int git_config_from_mem(config_fn_t fn, const enum 
config_origin_type origin_typ
return do_config_from(&top, fn, data);
 }
 
-static int git_config_from_blob_sha1(config_fn_t fn,
-const char *name,
-const unsigned char *sha1,
-void *data)
+int git_config_from_blob_sha1(config_fn_t fn,
+ const char *name,
+ const unsigned char *sha1,
+ void *data)
 {
enum object_type type;
char *buf;
diff --git a/submodule-config.c b/submodule-config.c
index 098085b..8b9a2ef 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -379,9 +379,9 @@ static int parse_config(const char *var, const char *value, 
void *data)
return ret;
 }
 
-static int gitmodule_sha1_from_commit(const unsigned char *commit_sha1,
- unsigned char *gitmodules_sha1,
- struct strbuf *rev)
+int gitmodule_sha1_from_commit(const unsigned char *commit_sha1,
+  unsigned char *gitmodules_sha1,
+  struct strbuf *rev)
 {
int ret = 0;
 
diff --git a/submodule-config.h b/submodule-config.h
index d05c542..78584ba 100644
--- a/submodule-config.h
+++ b/submodule-config.h
@@ -29,6 +29,9 @@ const struct submodule *submodule_from_name(const unsigned 
char *commit_sha1,
const char *name);
 const struct submodule *submodule_from_path(const unsigned char *commit_sha1,
const char *path);
+extern int gitmodule_sha1_from_commit(const unsigned char *commit_sha1,
+ unsigned char *gitmodules_sha1,
+ struct strbuf *rev);
 void submodule_free(void);
 
 #endif /* SUBMODULE_CONFIG_H */
diff --git a/submodule.c b/submodule.c
index edffaa1..2600908 100644
--- a/submodule.c
+++ b/submodule.c
@@ -198,6 +198,18 @@ void gitmodules_config(void)
}
 }
 
+void gitmodules_config_sha1(const unsigned char *commit_sha1)
+{
+   struct strbuf rev = STRBUF_INIT;
+   unsigned char sha1[20];
+
+   if (gitmodule_sha1_from_commit(commit_sha1, sha1, &rev)) {
+   git_config_from_blob_sha1(submodule_config, rev.buf,
+ sha1, NULL);
+   }
+   strbuf_release(&rev);
+}
+
 /*
  * Determine if a submodule has been initialized at a given 'path'
  */
diff --git a/submodule.h b/submodule.h
index 6ec5f2f..9203d89 100644
--- a/submodule.h
+++ b/submodule.h
@@ -37,6 +37,7 @@ void set_diffopt_flags_from_submodule_config(struct 
diff_options *diffopt,
const char *path);
 int submodule_config(const char *var, const char *value, void *cb);
 void gitmodules_config(void);
+extern void gitmodules_config_sha1(const unsigned char *commit_sha1);
 extern int is_submodule_initialized(const char *path);
 extern int is_submodule_populated(const char *path);
 int parse_submodule_update_strategy(const char *value,
-- 
2.8.0.rc3.226.g39d4020



[PATCH v7 5/7] grep: optionally recurse into submodules

2016-12-16 Thread Brandon Williams
Allow grep to recognize submodules and recursively search for patterns in
each submodule.  This is done by forking off a process to recursively
call grep on each submodule.  The top level --super-prefix option is
used to pass a path to the submodule which can in turn be used to
prepend to output or in pathspec matching logic.

Recursion only occurs for submodules which have been initialized and
checked out by the parent project.  If a submodule hasn't been
initialized and checked out it is simply skipped.

In order to support the existing multi-threading infrastructure in grep,
output from each child process is captured in a strbuf so that it can be
later printed to the console in an ordered fashion.

To limit the number of theads that are created, each child process has
half the number of threads as its parents (minimum of 1), otherwise we
potentailly have a fork-bomb.

Signed-off-by: Brandon Williams 
---
 Documentation/git-grep.txt |   5 +
 builtin/grep.c | 300 ++---
 git.c  |   2 +-
 t/t7814-grep-recurse-submodules.sh |  99 
 4 files changed, 386 insertions(+), 20 deletions(-)
 create mode 100755 t/t7814-grep-recurse-submodules.sh

diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt
index 0ecea6e..17aa1ba 100644
--- a/Documentation/git-grep.txt
+++ b/Documentation/git-grep.txt
@@ -26,6 +26,7 @@ SYNOPSIS
   [--threads ]
   [-f ] [-e] 
   [--and|--or|--not|(|)|-e ...]
+  [--recurse-submodules]
   [ [--[no-]exclude-standard] [--cached | --no-index | --untracked] | 
...]
   [--] [...]
 
@@ -88,6 +89,10 @@ OPTIONS
mechanism.  Only useful when searching files in the current directory
with `--no-index`.
 
+--recurse-submodules::
+   Recursively search in each submodule that has been initialized and
+   checked out in the repository.
+
 -a::
 --text::
Process binary files as if they were text.
diff --git a/builtin/grep.c b/builtin/grep.c
index 8887b6a..dca0be6 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -18,12 +18,20 @@
 #include "quote.h"
 #include "dir.h"
 #include "pathspec.h"
+#include "submodule.h"
 
 static char const * const grep_usage[] = {
N_("git grep [] [-e]  [...] [[--] ...]"),
NULL
 };
 
+static const char *super_prefix;
+static int recurse_submodules;
+static struct argv_array submodule_options = ARGV_ARRAY_INIT;
+
+static int grep_submodule_launch(struct grep_opt *opt,
+const struct grep_source *gs);
+
 #define GREP_NUM_THREADS_DEFAULT 8
 static int num_threads;
 
@@ -174,7 +182,10 @@ static void *run(void *arg)
break;
 
opt->output_priv = w;
-   hit |= grep_source(opt, &w->source);
+   if (w->source.type == GREP_SOURCE_SUBMODULE)
+   hit |= grep_submodule_launch(opt, &w->source);
+   else
+   hit |= grep_source(opt, &w->source);
grep_source_clear_data(&w->source);
work_done(w);
}
@@ -300,6 +311,10 @@ static int grep_sha1(struct grep_opt *opt, const unsigned 
char *sha1,
if (opt->relative && opt->prefix_length) {
quote_path_relative(filename + tree_name_len, opt->prefix, 
&pathbuf);
strbuf_insert(&pathbuf, 0, filename, tree_name_len);
+   } else if (super_prefix) {
+   strbuf_add(&pathbuf, filename, tree_name_len);
+   strbuf_addstr(&pathbuf, super_prefix);
+   strbuf_addstr(&pathbuf, filename + tree_name_len);
} else {
strbuf_addstr(&pathbuf, filename);
}
@@ -328,10 +343,13 @@ static int grep_file(struct grep_opt *opt, const char 
*filename)
 {
struct strbuf buf = STRBUF_INIT;
 
-   if (opt->relative && opt->prefix_length)
+   if (opt->relative && opt->prefix_length) {
quote_path_relative(filename, opt->prefix, &buf);
-   else
+   } else {
+   if (super_prefix)
+   strbuf_addstr(&buf, super_prefix);
strbuf_addstr(&buf, filename);
+   }
 
 #ifndef NO_PTHREADS
if (num_threads) {
@@ -378,31 +396,260 @@ static void run_pager(struct grep_opt *opt, const char 
*prefix)
exit(status);
 }
 
-static int grep_cache(struct grep_opt *opt, const struct pathspec *pathspec, 
int cached)
+static void compile_submodule_options(const struct grep_opt *opt,
+ const struct pathspec *pathspec,
+ int cached, int untracked,
+ int opt_exclude, int use_index,
+ int patt

[PATCH v7 4/7] grep: add submodules as a grep source type

2016-12-16 Thread Brandon Williams
Add `GREP_SOURCE_SUBMODULE` as a grep_source type and cases for this new
type in the various switch statements in grep.c.

When initializing a grep_source with type `GREP_SOURCE_SUBMODULE` the
identifier can either be NULL (to indicate that the working tree will be
used) or a SHA1 (the REV of the submodule to be grep'd).  If the
identifier is a SHA1 then we want to fall through to the
`GREP_SOURCE_SHA1` case to handle the copying of the SHA1.

Signed-off-by: Brandon Williams 
---
 grep.c | 16 +++-
 grep.h |  1 +
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/grep.c b/grep.c
index 1194d35..0dbdc1d 100644
--- a/grep.c
+++ b/grep.c
@@ -1735,12 +1735,23 @@ void grep_source_init(struct grep_source *gs, enum 
grep_source_type type,
case GREP_SOURCE_FILE:
gs->identifier = xstrdup(identifier);
break;
+   case GREP_SOURCE_SUBMODULE:
+   if (!identifier) {
+   gs->identifier = NULL;
+   break;
+   }
+   /*
+* FALL THROUGH
+* If the identifier is non-NULL (in the submodule case) it
+* will be a SHA1 that needs to be copied.
+*/
case GREP_SOURCE_SHA1:
gs->identifier = xmalloc(20);
hashcpy(gs->identifier, identifier);
break;
case GREP_SOURCE_BUF:
gs->identifier = NULL;
+   break;
}
 }
 
@@ -1760,6 +1771,7 @@ void grep_source_clear_data(struct grep_source *gs)
switch (gs->type) {
case GREP_SOURCE_FILE:
case GREP_SOURCE_SHA1:
+   case GREP_SOURCE_SUBMODULE:
free(gs->buf);
gs->buf = NULL;
gs->size = 0;
@@ -1831,8 +1843,10 @@ static int grep_source_load(struct grep_source *gs)
return grep_source_load_sha1(gs);
case GREP_SOURCE_BUF:
return gs->buf ? 0 : -1;
+   case GREP_SOURCE_SUBMODULE:
+   break;
}
-   die("BUG: invalid grep_source type");
+   die("BUG: invalid grep_source type to load");
 }
 
 void grep_source_load_driver(struct grep_source *gs)
diff --git a/grep.h b/grep.h
index 5856a23..267534c 100644
--- a/grep.h
+++ b/grep.h
@@ -161,6 +161,7 @@ struct grep_source {
GREP_SOURCE_SHA1,
GREP_SOURCE_FILE,
GREP_SOURCE_BUF,
+   GREP_SOURCE_SUBMODULE,
} type;
void *identifier;
 
-- 
2.8.0.rc3.226.g39d4020



[PATCH v7 6/7] grep: enable recurse-submodules to work on objects

2016-12-16 Thread Brandon Williams
Teach grep to recursively search in submodules when provided with a
 object. This allows grep to search a submodule based on the state
of the submodule that is present in a commit of the super project.

When grep is provided with a  object, the name of the object is
prefixed to all output.  In order to provide uniformity of output
between the parent and child processes the option `--parent-basename`
has been added so that the child can preface all of it's output with the
name of the parent's object instead of the name of the commit SHA1 of
the submodule. This changes output from the command
`git grep -e. -l --recurse-submodules HEAD`

from:
  HEAD:file
  :sub/file

to:
  HEAD:file
  HEAD:sub/file

Signed-off-by: Brandon Williams 
---
 Documentation/git-grep.txt |  13 -
 builtin/grep.c |  76 ---
 t/t7814-grep-recurse-submodules.sh | 103 -
 tree-walk.c|  28 ++
 4 files changed, 211 insertions(+), 9 deletions(-)

diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt
index 17aa1ba..71f32f3 100644
--- a/Documentation/git-grep.txt
+++ b/Documentation/git-grep.txt
@@ -26,7 +26,7 @@ SYNOPSIS
   [--threads ]
   [-f ] [-e] 
   [--and|--or|--not|(|)|-e ...]
-  [--recurse-submodules]
+  [--recurse-submodules] [--parent-basename ]
   [ [--[no-]exclude-standard] [--cached | --no-index | --untracked] | 
...]
   [--] [...]
 
@@ -91,7 +91,16 @@ OPTIONS
 
 --recurse-submodules::
Recursively search in each submodule that has been initialized and
-   checked out in the repository.
+   checked out in the repository.  When used in combination with the
+option the prefix of all submodule output will be the name of
+   the parent project's  object.
+
+--parent-basename ::
+   For internal use only.  In order to produce uniform output with the
+   --recurse-submodules option, this option can be used to provide the
+   basename of a parent's  object to a submodule so the submodule
+   can prefix its output with the parent's name rather than the SHA1 of
+   the submodule.
 
 -a::
 --text::
diff --git a/builtin/grep.c b/builtin/grep.c
index dca0be6..5918a26 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -19,6 +19,7 @@
 #include "dir.h"
 #include "pathspec.h"
 #include "submodule.h"
+#include "submodule-config.h"
 
 static char const * const grep_usage[] = {
N_("git grep [] [-e]  [...] [[--] ...]"),
@@ -28,6 +29,7 @@ static char const * const grep_usage[] = {
 static const char *super_prefix;
 static int recurse_submodules;
 static struct argv_array submodule_options = ARGV_ARRAY_INIT;
+static const char *parent_basename;
 
 static int grep_submodule_launch(struct grep_opt *opt,
 const struct grep_source *gs);
@@ -534,19 +536,53 @@ static int grep_submodule_launch(struct grep_opt *opt,
 {
struct child_process cp = CHILD_PROCESS_INIT;
int status, i;
+   const char *end_of_base;
+   const char *name;
struct work_item *w = opt->output_priv;
 
+   end_of_base = strchr(gs->name, ':');
+   if (gs->identifier && end_of_base)
+   name = end_of_base + 1;
+   else
+   name = gs->name;
+
prepare_submodule_repo_env(&cp.env_array);
 
/* Add super prefix */
argv_array_pushf(&cp.args, "--super-prefix=%s%s/",
 super_prefix ? super_prefix : "",
-gs->name);
+name);
argv_array_push(&cp.args, "grep");
 
+   /*
+* Add basename of parent project
+* When performing grep on a tree object the filename is prefixed
+* with the object's name: 'tree-name:filename'.  In order to
+* provide uniformity of output we want to pass the name of the
+* parent project's object name to the submodule so the submodule can
+* prefix its output with the parent's name and not its own SHA1.
+*/
+   if (gs->identifier && end_of_base)
+   argv_array_pushf(&cp.args, "--parent-basename=%.*s",
+(int) (end_of_base - gs->name),
+gs->name);
+
/* Add options */
-   for (i = 0; i < submodule_options.argc; i++)
+   for (i = 0; i < submodule_options.argc; i++) {
+   /*
+* If there is a tree identifier for the submodule, add the
+* rev after adding the submodule options but before the
+* pathspecs.  To do this we listen for the '--' and insert the
+* sha1 before pushing the '--' onto 

[PATCH v7 7/7] grep: search history of moved submodules

2016-12-16 Thread Brandon Williams
If a submodule was renamed at any point since it's inception then if you
were to try and grep on a commit prior to the submodule being moved, you
wouldn't be able to find a working directory for the submodule since the
path in the past is different from the current path.

This patch teaches grep to find the .git directory for a submodule in
the parents .git/modules/ directory in the event the path to the
submodule in the commit that is being searched differs from the state of
the currently checked out commit.  If found, the child process that is
spawned to grep the submodule will chdir into its gitdir instead of a
working directory.

In order to override the explicit setting of submodule child process's
gitdir environment variable (which was introduced in '10f5c526')
`GIT_DIR_ENVIORMENT` needs to be pushed onto child process's env_array.
This allows the searching of history from a submodule's gitdir, rather
than from a working directory.

Signed-off-by: Brandon Williams 
---
 builtin/grep.c | 20 +--
 t/t7814-grep-recurse-submodules.sh | 41 ++
 2 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index 5918a26..2c727ef 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -547,6 +547,7 @@ static int grep_submodule_launch(struct grep_opt *opt,
name = gs->name;
 
prepare_submodule_repo_env(&cp.env_array);
+   argv_array_push(&cp.env_array, GIT_DIR_ENVIRONMENT);
 
/* Add super prefix */
argv_array_pushf(&cp.args, "--super-prefix=%s%s/",
@@ -615,8 +616,23 @@ static int grep_submodule(struct grep_opt *opt, const 
unsigned char *sha1,
 {
if (!is_submodule_initialized(path))
return 0;
-   if (!is_submodule_populated(path))
-   return 0;
+   if (!is_submodule_populated(path)) {
+   /*
+* If searching history, check for the presense of the
+* submodule's gitdir before skipping the submodule.
+*/
+   if (sha1) {
+   const struct submodule *sub =
+   submodule_from_path(null_sha1, path);
+   if (sub)
+   path = git_path("modules/%s", sub->name);
+
+   if (!(is_directory(path) && is_git_directory(path)))
+   return 0;
+   } else {
+   return 0;
+   }
+   }
 
 #ifndef NO_PTHREADS
if (num_threads) {
diff --git a/t/t7814-grep-recurse-submodules.sh 
b/t/t7814-grep-recurse-submodules.sh
index d5fc316..67247a0 100755
--- a/t/t7814-grep-recurse-submodules.sh
+++ b/t/t7814-grep-recurse-submodules.sh
@@ -186,6 +186,47 @@ test_expect_success !MINGW 'grep recurse submodule colon 
in name' '
test_cmp expect actual
 '
 
+test_expect_success 'grep history with moved submoules' '
+   git init parent &&
+   test_when_finished "rm -rf parent" &&
+   echo "foobar" >parent/file &&
+   git -C parent add file &&
+   git -C parent commit -m "add file" &&
+
+   git init sub &&
+   test_when_finished "rm -rf sub" &&
+   echo "foobar" >sub/file &&
+   git -C sub add file &&
+   git -C sub commit -m "add file" &&
+
+   git -C parent submodule add ../sub dir/sub &&
+   git -C parent commit -m "add submodule" &&
+
+   cat >expect <<-\EOF &&
+   dir/sub/file:foobar
+   file:foobar
+   EOF
+   git -C parent grep -e "foobar" --recurse-submodules >actual &&
+   test_cmp expect actual &&
+
+   git -C parent mv dir/sub sub-moved &&
+   git -C parent commit -m "moved submodule" &&
+
+   cat >expect <<-\EOF &&
+   file:foobar
+   sub-moved/file:foobar
+   EOF
+   git -C parent grep -e "foobar" --recurse-submodules >actual &&
+   test_cmp expect actual &&
+
+   cat >expect <<-\EOF &&
+   HEAD^:dir/sub/file:foobar
+   HEAD^:file:foobar
+   EOF
+   git -C parent grep -e "foobar" --recurse-submodules HEAD^ >actual &&
+   test_cmp expect actual
+'
+
 test_incompatible_with_recurse_submodules ()
 {
test_expect_success "--recurse-submodules and $1 are incompatible" "
-- 
2.8.0.rc3.226.g39d4020



[PATCH v7 1/7] submodules: add helper to determine if a submodule is populated

2016-12-16 Thread Brandon Williams
Add the `is_submodule_populated()` helper function to submodules.c.
`is_submodule_populated()` performes a check to see if a submodule has
been checkout out (and has a valid .git directory/file) at the given path.

Signed-off-by: Brandon Williams 
---
 submodule.c | 15 +++
 submodule.h |  1 +
 2 files changed, 16 insertions(+)

diff --git a/submodule.c b/submodule.c
index c85ba50..ee3198d 100644
--- a/submodule.c
+++ b/submodule.c
@@ -198,6 +198,21 @@ void gitmodules_config(void)
}
 }
 
+/*
+ * Determine if a submodule has been populated at a given 'path'
+ */
+int is_submodule_populated(const char *path)
+{
+   int ret = 0;
+   char *gitdir = xstrfmt("%s/.git", path);
+
+   if (resolve_gitdir(gitdir))
+   ret = 1;
+
+   free(gitdir);
+   return ret;
+}
+
 int parse_submodule_update_strategy(const char *value,
struct submodule_update_strategy *dst)
 {
diff --git a/submodule.h b/submodule.h
index d9e197a..c4af505 100644
--- a/submodule.h
+++ b/submodule.h
@@ -37,6 +37,7 @@ void set_diffopt_flags_from_submodule_config(struct 
diff_options *diffopt,
const char *path);
 int submodule_config(const char *var, const char *value, void *cb);
 void gitmodules_config(void);
+extern int is_submodule_populated(const char *path);
 int parse_submodule_update_strategy(const char *value,
struct submodule_update_strategy *dst);
 const char *submodule_strategy_to_string(const struct 
submodule_update_strategy *s);
-- 
2.8.0.rc3.226.g39d4020



[PATCH 0/3] push only submodules

2016-12-19 Thread Brandon Williams
This series teaches 'git push' to be able to only push submodules while leaving
a superproject unpushed.

This is a desirable feature in a scenario where updates to the
superproject are handled automatically by some other means, perhaps a
code review tool.  In this scenario a developer could make a change
which spans multiple submodules and then push their commits for code
review.  Upon completion of the code review, their commits can be
accepted and applied to their respective submodules while the code
review tool can then automatically update the superproject to the most
recent SHA1 of each submodule.  This would eliminate the merge conflicts
in the superproject that could occur if multiple people are contributing
to the same submodule.

Brandon Williams (3):
  transport: refactor flag #defines to be more readable
  submodules: add RECURSE_SUBMODULES_ONLY value
  push: add option to push only submodules

 builtin/push.c |  2 ++
 submodule-config.c |  2 ++
 submodule.h|  1 +
 t/t5531-deep-submodule-push.sh | 21 +
 transport.c| 15 +++
 transport.h| 31 ---
 6 files changed, 53 insertions(+), 19 deletions(-)

-- 
2.8.0.rc3.226.g39d4020



[PATCH 1/3] transport: refactor flag #defines to be more readable

2016-12-19 Thread Brandon Williams
All of the #defines for the TRANSPORT_* flags are hardcoded to be powers
of two.  This can be error prone when adding a new flag and is difficult
to read.  This patch refactors these defines to instead use a shift
operation to generate the flags.  This makes adding an additional flag
simpilar and makes the defines easier to read.

Signed-off-by: Brandon Williams 
---
 transport.h | 30 +++---
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/transport.h b/transport.h
index b8e4ee8..1b65458 100644
--- a/transport.h
+++ b/transport.h
@@ -131,21 +131,21 @@ struct transport {
enum transport_family family;
 };
 
-#define TRANSPORT_PUSH_ALL 1
-#define TRANSPORT_PUSH_FORCE 2
-#define TRANSPORT_PUSH_DRY_RUN 4
-#define TRANSPORT_PUSH_MIRROR 8
-#define TRANSPORT_PUSH_PORCELAIN 16
-#define TRANSPORT_PUSH_SET_UPSTREAM 32
-#define TRANSPORT_RECURSE_SUBMODULES_CHECK 64
-#define TRANSPORT_PUSH_PRUNE 128
-#define TRANSPORT_RECURSE_SUBMODULES_ON_DEMAND 256
-#define TRANSPORT_PUSH_NO_HOOK 512
-#define TRANSPORT_PUSH_FOLLOW_TAGS 1024
-#define TRANSPORT_PUSH_CERT_ALWAYS 2048
-#define TRANSPORT_PUSH_CERT_IF_ASKED 4096
-#define TRANSPORT_PUSH_ATOMIC 8192
-#define TRANSPORT_PUSH_OPTIONS 16384
+#define TRANSPORT_PUSH_ALL (1<<0)
+#define TRANSPORT_PUSH_FORCE   (1<<1)
+#define TRANSPORT_PUSH_DRY_RUN (1<<2)
+#define TRANSPORT_PUSH_MIRROR  (1<<3)
+#define TRANSPORT_PUSH_PORCELAIN   (1<<4)
+#define TRANSPORT_PUSH_SET_UPSTREAM(1<<5)
+#define TRANSPORT_RECURSE_SUBMODULES_CHECK (1<<6)
+#define TRANSPORT_PUSH_PRUNE   (1<<7)
+#define TRANSPORT_RECURSE_SUBMODULES_ON_DEMAND (1<<8)
+#define TRANSPORT_PUSH_NO_HOOK (1<<9)
+#define TRANSPORT_PUSH_FOLLOW_TAGS (1<<10)
+#define TRANSPORT_PUSH_CERT_ALWAYS (1<<11)
+#define TRANSPORT_PUSH_CERT_IF_ASKED   (1<<12)
+#define TRANSPORT_PUSH_ATOMIC  (1<<13)
+#define TRANSPORT_PUSH_OPTIONS (1<<14)
 
 extern int transport_summary_width(const struct ref *refs);
 
-- 
2.8.0.rc3.226.g39d4020



[PATCH 2/3] submodules: add RECURSE_SUBMODULES_ONLY value

2016-12-19 Thread Brandon Williams
Add the `RECURSE_SUBMODULES_ONLY` enum value to submodule.h.  This enum
value will be used in a later patch to push to indicate that only
submodules should be pushed, while the superproject should remain
unpushed.

Signed-off-by: Brandon Williams 
---
 submodule-config.c | 2 ++
 submodule.h| 1 +
 2 files changed, 3 insertions(+)

diff --git a/submodule-config.c b/submodule-config.c
index 098085b..33eb62d 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -251,6 +251,8 @@ static int parse_push_recurse(const char *opt, const char 
*arg,
return RECURSE_SUBMODULES_ON_DEMAND;
else if (!strcmp(arg, "check"))
return RECURSE_SUBMODULES_CHECK;
+   else if (!strcmp(arg, "only"))
+   return RECURSE_SUBMODULES_ONLY;
else if (die_on_error)
die("bad %s argument: %s", opt, arg);
else
diff --git a/submodule.h b/submodule.h
index 23d7668..4a83a4c 100644
--- a/submodule.h
+++ b/submodule.h
@@ -6,6 +6,7 @@ struct argv_array;
 struct sha1_array;
 
 enum {
+   RECURSE_SUBMODULES_ONLY = -5,
RECURSE_SUBMODULES_CHECK = -4,
RECURSE_SUBMODULES_ERROR = -3,
RECURSE_SUBMODULES_NONE = -2,
-- 
2.8.0.rc3.226.g39d4020



[PATCH 3/3] push: add option to push only submodules

2016-12-19 Thread Brandon Williams
Teach push the --recurse-submodules=only option.  This enables push to
recursively push all unpushed submodules while leaving the superproject
unpushed.

This is a desirable feature in a scenario where updates to the
superproject are handled automatically by some other means, perhaps a
code review tool.  In this scenario a developer could make a change
which spans multiple submodules and then push their commits for code
review.  Upon completion of the code review, their commits can be
accepted and applied to their respective submodules while the code
review tool can then automatically update the superproject to the most
recent SHA1 of each submodule.  This would eliminate the merge conflicts
in the superproject that could occur if multiple people are contributing
to the same submodule.

Signed-off-by: Brandon Williams 
---
 builtin/push.c |  2 ++
 t/t5531-deep-submodule-push.sh | 21 +
 transport.c| 15 +++
 transport.h|  1 +
 4 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/builtin/push.c b/builtin/push.c
index 3bb9d6b..9433797 100644
--- a/builtin/push.c
+++ b/builtin/push.c
@@ -565,6 +565,8 @@ int cmd_push(int argc, const char **argv, const char 
*prefix)
flags |= TRANSPORT_RECURSE_SUBMODULES_CHECK;
else if (recurse_submodules == RECURSE_SUBMODULES_ON_DEMAND)
flags |= TRANSPORT_RECURSE_SUBMODULES_ON_DEMAND;
+   else if (recurse_submodules == RECURSE_SUBMODULES_ONLY)
+   flags |= TRANSPORT_RECURSE_SUBMODULES_ONLY;
 
if (tags)
add_refspec("refs/tags/*");
diff --git a/t/t5531-deep-submodule-push.sh b/t/t5531-deep-submodule-push.sh
index 1524ff5..676d686 100755
--- a/t/t5531-deep-submodule-push.sh
+++ b/t/t5531-deep-submodule-push.sh
@@ -454,4 +454,25 @@ test_expect_success 'push --dry-run does not recursively 
update submodules' '
test_cmp expected_submodule actual_submodule
 '
 
+test_expect_success 'push --dry-run does not recursively update submodules' '
+   git -C work push --dry-run --recurse-submodules=only ../pub.git master 
&&
+
+   git -C submodule.git rev-parse master >actual_submodule &&
+   git -C pub.git rev-parse master >actual_pub &&
+   test_cmp expected_pub actual_pub &&
+   test_cmp expected_submodule actual_submodule
+'
+
+test_expect_success 'push only unpushed submodules recursively' '
+   git -C pub.git rev-parse master >expected_pub &&
+   git -C work/gar/bage rev-parse master >expected_submodule &&
+
+   git -C work push --recurse-submodules=only ../pub.git master &&
+
+   git -C submodule.git rev-parse master >actual_submodule &&
+   git -C pub.git rev-parse master >actual_pub &&
+   test_cmp expected_submodule actual_submodule &&
+   test_cmp expected_pub actual_pub
+'
+
 test_done
diff --git a/transport.c b/transport.c
index 04e5d66..20ebee8 100644
--- a/transport.c
+++ b/transport.c
@@ -947,7 +947,9 @@ int transport_push(struct transport *transport,
if (run_pre_push_hook(transport, remote_refs))
return -1;
 
-   if ((flags & TRANSPORT_RECURSE_SUBMODULES_ON_DEMAND) && 
!is_bare_repository()) {
+   if ((flags & (TRANSPORT_RECURSE_SUBMODULES_ON_DEMAND |
+ TRANSPORT_RECURSE_SUBMODULES_ONLY)) &&
+   !is_bare_repository()) {
struct ref *ref = remote_refs;
struct sha1_array commits = SHA1_ARRAY_INIT;
 
@@ -965,7 +967,8 @@ int transport_push(struct transport *transport,
}
 
if (((flags & TRANSPORT_RECURSE_SUBMODULES_CHECK) ||
-((flags & TRANSPORT_RECURSE_SUBMODULES_ON_DEMAND) &&
+((flags & (TRANSPORT_RECURSE_SUBMODULES_ON_DEMAND |
+   TRANSPORT_RECURSE_SUBMODULES_ONLY)) &&
  !pretend)) && !is_bare_repository()) {
struct ref *ref = remote_refs;
struct string_list needs_pushing = STRING_LIST_INIT_DUP;
@@ -984,7 +987,10 @@ int transport_push(struct transport *transport,
sha1_array_clear(&commits);
}
 
-   push_ret = transport->push_refs(transport, remote_refs, flags);
+   if (!(flags & TRANSPORT_RECURSE_SUBMODULES_ONLY))
+   push_ret = transport->push_refs(transport, remote_refs, 
flags);
+   else
+   push_ret = 0;
err = push_had_errors(remote_refs);
ret = push_ret | err;
 
@@ -996,7 +1002,8 @@ int transport_push(st

Re: [PATCH 2/2] dir.c: add retry logic to relocate_gitdir

2016-12-19 Thread Brandon Williams
r.
> + * Return 0 on success and -1 on a minor issue. Die in case the repository is
> + * fatally messed up.
>   */
> -void relocate_gitdir(const char *path, const char *old_git_dir, const char 
> *new_git_dir)
> +int relocate_gitdir(const char *path, const char *old_git_dir, const char 
> *new_git_dir)
>  {
> + char *git_dir = xstrdup(real_path(new_git_dir));
> + char *work_tree = xstrdup(real_path(path));
> + int c;

I guess in a later patch we could clean up these calls to real_path to
use real_pathdup from bw/realpath-wo-chdir.

> +
>   if (rename(old_git_dir, new_git_dir) < 0)
> - die_errno(_("could not migrate git directory from '%s' to 
> '%s'"),
> + die_errno(_("could not rename git directory from '%s' to '%s'"),
>   old_git_dir, new_git_dir);
>  
> - connect_work_tree_and_git_dir(path, new_git_dir);
> + c = point_gitlink_file_to(work_tree, git_dir);
> + if (c < 0) {
> + warning(_("tried to move the git directory from '%s' to '%s'"),
> + old_git_dir, new_git_dir);
> + warning(_("problems with creating a .git file in '%s' to point 
> to '%s'"),
> + work_tree, new_git_dir);
> + if (c == -1) {
> + warning(_("try to undo the move"));
> + if (rename(new_git_dir, old_git_dir) < 0)
> + die_errno(_("could not rename git directory 
> from '%s' to '%s'"),
> + new_git_dir, old_git_dir);
> + return -1;
> + } else if (c == -2) {
> + warning(_("The .git file is still there, "
> + "where the undo operation would move the git "
> + "directory"));
> + die(_("failed to undo the git directory relocation"));
> + }
> + };
> +
> + if (set_core_work_tree_to_connect(work_tree, git_dir) < 0) {
> + /*
> +  * At this point the git dir was moved and the gitlink file
> +  * was updated correctly, this leaves the repository in a
> +  * state that is not totally broken.  Setting the core.worktree
> +  * correctly would have been the last step to perform a
> +  * complete git directory relocation, but this is good enough
> +  * to not die.
> +  */
> + warning(_("could not set core.worktree in '%s' to point at 
> '%s'"),
> + git_dir, work_tree);
> + return -1;
> + }
> +
> + free(work_tree);
> + free(git_dir);
> + return 0;
>  }
> diff --git a/dir.h b/dir.h
> index bf23a470af..86f1cf790f 100644
> --- a/dir.h
> +++ b/dir.h
> @@ -336,7 +336,7 @@ void write_untracked_extension(struct strbuf *out, struct 
> untracked_cache *untra
>  void add_untracked_cache(struct index_state *istate);
>  void remove_untracked_cache(struct index_state *istate);
>  extern void connect_work_tree_and_git_dir(const char *work_tree, const char 
> *git_dir);
> -extern void relocate_gitdir(const char *path,
> - const char *old_git_dir,
> - const char *new_git_dir);
> +extern int relocate_gitdir(const char *path,
> +const char *old_git_dir,
> +const char *new_git_dir);
>  #endif
> diff --git a/submodule.c b/submodule.c
> index 45ccfb7ab4..fa1f44bb5a 100644
> --- a/submodule.c
> +++ b/submodule.c
> @@ -1277,7 +1277,8 @@ static void 
> relocate_single_git_dir_into_superproject(const char *prefix,
>   prefix ? prefix : "", path,
>   real_old_git_dir, real_new_git_dir);
>  
> - relocate_gitdir(path, real_old_git_dir, real_new_git_dir);
> + if (relocate_gitdir(path, real_old_git_dir, real_new_git_dir))
> + die(_("could not relocate git directory of '%s'"), path);
>  
>   free(old_git_dir);
>   free(real_old_git_dir);
> -- 
> 2.11.0.rc2.53.gb7b3fba.dirty
> 

-- 
Brandon Williams


Re: [PATCHv4 3/5] run-command: add {run,start,finish}_command_or_die

2016-12-19 Thread Brandon Williams
On 12/19, Stefan Beller wrote:
> In a later patch we want to report the exact command that we run in the
> error message. Add a convenient way to the run command API that runs the
> given command or reports the exact command as failure.
> 
> Signed-off-by: Stefan Beller 
> ---
>  run-command.c | 28 
>  run-command.h |  4 
>  2 files changed, 32 insertions(+)
> 
> diff --git a/run-command.c b/run-command.c
> index ca905a9e80..a0587db334 100644
> --- a/run-command.c
> +++ b/run-command.c
> @@ -279,6 +279,17 @@ static int wait_or_whine(pid_t pid, const char *argv0, 
> int in_signal)
>   return code;
>  }
>  
> +static void report_and_die(struct child_process *cmd, const char *action)
> +{
> + int i;
> + struct strbuf err = STRBUF_INIT;
> + if (cmd->git_cmd)
> + strbuf_addstr(&err, "git ");
> + for (i = 0; cmd->argv[i]; )

Missing the increment of i here.

> + strbuf_addf(&err, "'%s'", cmd->argv[i]);
> + die(_("could not %s %s"), action, err.buf);
> +}
> +
>  int start_command(struct child_process *cmd)
>  {
>   int need_in, need_out, need_err;
> @@ -546,6 +557,12 @@ int start_command(struct child_process *cmd)
>   return 0;
>  }
>  
> +void start_command_or_die(struct child_process *cmd)
> +{
> + if (start_command(cmd))
> + report_and_die(cmd, "start");
> +}
> +
>  int finish_command(struct child_process *cmd)
>  {
>   int ret = wait_or_whine(cmd->pid, cmd->argv[0], 0);
> @@ -558,6 +575,11 @@ int finish_command_in_signal(struct child_process *cmd)
>   return wait_or_whine(cmd->pid, cmd->argv[0], 1);
>  }
>  
> +void finish_command_or_die(struct child_process *cmd)
> +{
> + if (finish_command(cmd))
> + report_and_die(cmd, "finish");
> +}
>  
>  int run_command(struct child_process *cmd)
>  {
> @@ -592,6 +614,12 @@ int run_command_v_opt_cd_env(const char **argv, int opt, 
> const char *dir, const
>   return run_command(&cmd);
>  }
>  
> +void run_command_or_die(struct child_process *cmd)
> +{
> + if (finish_command(cmd))
> + report_and_die(cmd, "run");
> +}
> +
>  #ifndef NO_PTHREADS
>  static pthread_t main_thread;
>  static int main_thread_set;
> diff --git a/run-command.h b/run-command.h
> index dd1c78c28d..e4585885c5 100644
> --- a/run-command.h
> +++ b/run-command.h
> @@ -56,6 +56,10 @@ int finish_command(struct child_process *);
>  int finish_command_in_signal(struct child_process *);
>  int run_command(struct child_process *);
>  
> +void start_command_or_die(struct child_process *);
> +void finish_command_or_die(struct child_process *);
> +void run_command_or_die(struct child_process *);
> +
>  /*
>   * Returns the path to the hook file, or NULL if the hook is missing
>   * or disabled. Note that this points to static storage that will be
> -- 
> 2.11.0.rc2.53.gb7b3fba.dirty
> 

-- 
Brandon Williams


Re: [PATCH bw/realpath-wo-chdir] real_path: canonicalize directory separators in root parts

2016-12-21 Thread Brandon Williams
On 12/21, Johannes Sixt wrote:
> When an absolute path is resolved, resolution begins at the first path
> component after the root part. The root part is just copied verbatim,
> because it must not be inspected for symbolic links. For POSIX paths,
> this is just the initial slash, but on Windows, the root part has the
> forms c:\ or \\server\share. We do want to canonicalize the back-slashes
> in the root part because these parts are compared to the result of
> getcwd(), which does return a fully canonicalized path.
> 
> Factor out a helper that splits off the root part, and have it
> canonicalize the copied part.
> 
> This change was prompted because t1504-ceiling-dirs.sh caught a breakage
> in GIT_CEILING_DIRECTORIES handling on Windows.
> 
> Signed-off-by: Johannes Sixt 
> ---
>  This introduces the second #ifdef GIT_WINDOWS_NATIVE in this file.
>  It could be avoided if convert_slashes were defined as a do-nothing
>  on POSIX, but that would not help the other occurrence. Therefore,
>  I suggest to leave it at this.
> 
>  abspath.c | 29 +
>  1 file changed, 17 insertions(+), 12 deletions(-)
> 
> diff --git a/abspath.c b/abspath.c
> index 79ee310867..1d56f5ed9f 100644
> --- a/abspath.c
> +++ b/abspath.c
> @@ -48,6 +48,19 @@ static void get_next_component(struct strbuf *next, struct 
> strbuf *remaining)
>   strbuf_remove(remaining, 0, end - remaining->buf);
>  }
>  
> +/* copies root part from remaining to resolved, canonicalizing it on the way 
> */
> +static void get_root_part(struct strbuf *resolved, struct strbuf *remaining)
> +{
> + int offset = offset_1st_component(remaining->buf);
> +
> + strbuf_reset(resolved);
> + strbuf_add(resolved, remaining->buf, offset);
> +#ifdef GIT_WINDOWS_NATIVE
> + convert_slashes(resolved->buf);
> +#endif

So then the only extra cononicalization that is happening here is
converting '\\server\share' to '//server/share'? (or 'c:\' to 'c:/')

-- 
Brandon Williams


Re: [PATCH bw/realpath-wo-chdir] real_path: canonicalize directory separators in root parts

2016-12-22 Thread Brandon Williams
On 12/22, Johannes Sixt wrote:
> Am 21.12.2016 um 23:33 schrieb Brandon Williams:
> >On 12/21, Johannes Sixt wrote:
> >>+/* copies root part from remaining to resolved, canonicalizing it on the 
> >>way */
> >>+static void get_root_part(struct strbuf *resolved, struct strbuf 
> >>*remaining)
> >>+{
> >>+   int offset = offset_1st_component(remaining->buf);
> >>+
> >>+   strbuf_reset(resolved);
> >>+   strbuf_add(resolved, remaining->buf, offset);
> >>+#ifdef GIT_WINDOWS_NATIVE
> >>+   convert_slashes(resolved->buf);
> >>+#endif
> >
> >So then the only extra cononicalization that is happening here is
> >converting '\\server\share' to '//server/share'? (or 'c:\' to 'c:/')
> 
> Correct. All other directory separators are canonicalized by the
> primary function, strbuf_realpath.

Sounds good. Logically everything looks good to me.  And I like that
setting 'resolved' to the root of an abs path is pulled out into a
helper function.  It took me a couple extra seconds to realize that
offset_1st_component returns 0 with a relative path, which makes causes
the call to get_root_part to essentially be a noop (ie nothing is
resolved).

Thanks for helping get this to work on windows!

-- 
Brandon Williams


Re: [PATCH] pathspec: give better message for submodule related pathspec error

2016-12-28 Thread Brandon Williams
On 12/27, Stefan Beller wrote:
> Every once in a while someone complains to the mailing list to have
> run into this weird assertion[1].
> 
> The usual response from the mailing list is link to old discussions[2],
> and acknowledging the problem stating it is known.
> 
> For now just improve the user visible error message.
> 
> [1] https://www.google.com/search?q=item-%3Enowildcard_len
> [2] 
> http://git.661346.n2.nabble.com/assert-failed-in-submodule-edge-case-td7628687.html
> https://www.spinics.net/lists/git/msg249473.html
> 
> Signed-off-by: Stefan Beller 
> ---
> 
> If you were following the mailing list closely today, you may sense
> that I am cleaning up stalled branches. :)
> 
> I think such a hot fix is warranted given how often we had reports
> on the mailing list.
> 
> Thanks,
> Stefan
> 
>  pathspec.c | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/pathspec.c b/pathspec.c
> index 22ca74a126..d522f43331 100644
> --- a/pathspec.c
> +++ b/pathspec.c
> @@ -313,8 +313,11 @@ static unsigned prefix_pathspec(struct pathspec_item 
> *item,
>   }
>  
>   /* sanity checks, pathspec matchers assume these are sane */
> - assert(item->nowildcard_len <= item->len &&
> -item->prefix <= item->len);
> + if (item->nowildcard_len <= item->len &&
> + item->prefix <= item->len)
> + die (_("Path leads inside submodule '%s', but the submodule "
> +"was not recognized, i.e. not initialized or deleted"),
> +        ce->name);
>   return magic;

I haven't been following everything on the list these past couple days,
but are we sure this is caused by submodules?  Also variable 'ce'
shouldn't be in scope here.

-- 
Brandon Williams


Re: [PATCHv2] pathspec: give better message for submodule related pathspec error

2016-12-28 Thread Brandon Williams
On 12/28, Stefan Beller wrote:
> Every once in a while someone complains to the mailing list to have
> run into this weird assertion[1].
> 
> The usual response from the mailing list is link to old discussions[2],
> and acknowledging the problem stating it is known.
> 
> For now just improve the user visible error message.
> 
> [1] https://www.google.com/search?q=item-%3Enowildcard_len
> [2] 
> http://git.661346.n2.nabble.com/assert-failed-in-submodule-edge-case-td7628687.html
> https://www.spinics.net/lists/git/msg249473.html
> 
> Signed-off-by: Stefan Beller 
> ---
> 
> Peff wrote:
> > Don't you need to flip the logic here? An assert() triggers when the
> > condition is not true, but an "if" does the opposite. So "assert(X)"
> > should always become "if (!X) die(...)".
> 
> Duh! and it should compile as well. 
> 
> Thanks,
> Stefan
> 
>  pathspec.c | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/pathspec.c b/pathspec.c
> index 22ca74a126..4724d522f2 100644
> --- a/pathspec.c
> +++ b/pathspec.c
> @@ -313,8 +313,11 @@ static unsigned prefix_pathspec(struct pathspec_item 
> *item,
>   }
>  
>   /* sanity checks, pathspec matchers assume these are sane */
> - assert(item->nowildcard_len <= item->len &&
> -item->prefix <= item->len);
> + if (item->nowildcard_len > item->len ||
> + item->prefix > item->len)
> + die (_("Path leads inside submodule '%s', but the submodule "
> +"was not recognized, i.e. not initialized or deleted"),
> +item->original);
>   return magic;
>  }

Turns out I should comment on the most recent version of the patch :P
This looks better to me. (It resolves the issue with using a variable
not in scope).

-- 
Brandon Williams


Re: What's cooking in git.git (Dec 2016, #08; Tue, 27)

2016-12-28 Thread Brandon Williams
On 12/27, Junio C Hamano wrote:
> * bw/pathspec-cleanup (2016-12-14) 16 commits
>  - pathspec: rename prefix_pathspec to init_pathspec_item
>  - pathspec: small readability changes
>  - pathspec: create strip submodule slash helpers
>  - pathspec: create parse_element_magic helper
>  - pathspec: create parse_long_magic function
>  - pathspec: create parse_short_magic function
>  - pathspec: factor global magic into its own function
>  - pathspec: simpler logic to prefix original pathspec elements
>  - pathspec: always show mnemonic and name in unsupported_magic
>  - pathspec: remove unused variable from unsupported_magic
>  - pathspec: copy and free owned memory
>  - pathspec: remove the deprecated get_pathspec function
>  - ls-tree: convert show_recursive to use the pathspec struct interface
>  - dir: convert fill_directory to use the pathspec struct interface
>  - dir: remove struct path_simplify
>  - mv: remove use of deprecated 'get_pathspec()'
> 
>  Code clean-up in the pathspec API.
> 
>  Waiting for the (hopefully) final round of review before 'next'.

What more needs to be reviewed for this series?

-- 
Brandon Williams


Re: What's cooking in git.git (Dec 2016, #08; Tue, 27)

2017-01-03 Thread Brandon Williams
On 01/03, Duy Nguyen wrote:
> On Thu, Dec 29, 2016 at 5:06 PM, Duy Nguyen  wrote:
> > On Thu, Dec 29, 2016 at 1:18 AM, Brandon Williams  wrote:
> >> On 12/27, Junio C Hamano wrote:
> >>> * bw/pathspec-cleanup (2016-12-14) 16 commits
> >>>  - pathspec: rename prefix_pathspec to init_pathspec_item
> >>>  - pathspec: small readability changes
> >>>  - pathspec: create strip submodule slash helpers
> >>>  - pathspec: create parse_element_magic helper
> >>>  - pathspec: create parse_long_magic function
> >>>  - pathspec: create parse_short_magic function
> >>>  - pathspec: factor global magic into its own function
> >>>  - pathspec: simpler logic to prefix original pathspec elements
> >>>  - pathspec: always show mnemonic and name in unsupported_magic
> >>>  - pathspec: remove unused variable from unsupported_magic
> >>>  - pathspec: copy and free owned memory
> >>>  - pathspec: remove the deprecated get_pathspec function
> >>>  - ls-tree: convert show_recursive to use the pathspec struct interface
> >>>  - dir: convert fill_directory to use the pathspec struct interface
> >>>  - dir: remove struct path_simplify
> >>>  - mv: remove use of deprecated 'get_pathspec()'
> >>>
> >>>  Code clean-up in the pathspec API.
> >>>
> >>>  Waiting for the (hopefully) final round of review before 'next'.
> >>
> >> What more needs to be reviewed for this series?
> >
> > I wanted to have a look at it but I successfully managed to put if off
> > so far. Will do soonish.
> 
> I have just sent a couple of minor comments. The rest looks good!

Thanks!  I'll go take a look.

-- 
Brandon Williams


Re: [PATCH v3 08/16] pathspec: always show mnemonic and name in unsupported_magic

2017-01-03 Thread Brandon Williams
On 01/03, Duy Nguyen wrote:
> On Wed, Dec 14, 2016 at 6:14 AM, Brandon Williams  wrote:
> > @@ -340,8 +336,9 @@ static void NORETURN unsupported_magic(const char 
> > *pattern,
> > continue;
> > if (sb.len)
> > strbuf_addch(&sb, ' ');
> > -   if (short_magic & m->bit)
> > -   strbuf_addf(&sb, "'%c'", m->mnemonic);
> > +
> > +   if (m->mnemonic)
> > +   strbuf_addf(&sb, "'(%c)%s'", m->mnemonic, m->name);
> > else
> > strbuf_addf(&sb, "'%s'", m->name);
> > }
> 
> The die() call is out of diff context, but it'll print
> 
> pathspec magic not supported by this command: (!)top
> 
> which looks too much like :() pathspec syntax too me
> and threw me off a bit. And it's a bit cryptic, isn't it? Since this
> is meant for human, maybe we can just write
> 
> pathspec magic not supported by this command: top (mnemonic: '!')

I was trying to keep it short and sweet, turns out that ends up being
more difficult to understand.  I like your suggestion, it definitely
makes things much clearer.

-- 
Brandon Williams


[PATCH v4 03/16] dir: convert fill_directory to use the pathspec struct interface

2017-01-03 Thread Brandon Williams
Convert 'fill_directory()' to use the pathspec struct interface from
using the '_raw' entry in the pathspec struct.

Signed-off-by: Brandon Williams 
---
 dir.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/dir.c b/dir.c
index 1df61f10f..e8ddd7f8a 100644
--- a/dir.c
+++ b/dir.c
@@ -174,17 +174,21 @@ char *common_prefix(const struct pathspec *pathspec)
 
 int fill_directory(struct dir_struct *dir, const struct pathspec *pathspec)
 {
-   size_t len;
+   char *prefix;
+   size_t prefix_len;
 
/*
 * Calculate common prefix for the pathspec, and
 * use that to optimize the directory walk
 */
-   len = common_prefix_len(pathspec);
+   prefix = common_prefix(pathspec);
+   prefix_len = prefix ? strlen(prefix) : 0;
 
/* Read the directory and prune it */
-   read_directory(dir, pathspec->nr ? pathspec->_raw[0] : "", len, 
pathspec);
-   return len;
+   read_directory(dir, prefix, prefix_len, pathspec);
+
+   free(prefix);
+   return prefix_len;
 }
 
 int within_depth(const char *name, int namelen,
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v4 10/16] pathspec: factor global magic into its own function

2017-01-03 Thread Brandon Williams
Create helper functions to read the global magic environment variables
in additon to factoring out the global magic gathering logic into its
own function.

Signed-off-by: Brandon Williams 
---
 pathspec.c | 127 +
 1 file changed, 78 insertions(+), 49 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index 032436bc1..f760f44f9 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -87,6 +87,75 @@ static void prefix_magic(struct strbuf *sb, int prefixlen, 
unsigned magic)
strbuf_addf(sb, ",prefix:%d)", prefixlen);
 }
 
+static inline int get_literal_global(void)
+{
+   static int literal = -1;
+
+   if (literal < 0)
+   literal = git_env_bool(GIT_LITERAL_PATHSPECS_ENVIRONMENT, 0);
+
+   return literal;
+}
+
+static inline int get_glob_global(void)
+{
+   static int glob = -1;
+
+   if (glob < 0)
+   glob = git_env_bool(GIT_GLOB_PATHSPECS_ENVIRONMENT, 0);
+
+   return glob;
+}
+
+static inline int get_noglob_global(void)
+{
+   static int noglob = -1;
+
+   if (noglob < 0)
+   noglob = git_env_bool(GIT_NOGLOB_PATHSPECS_ENVIRONMENT, 0);
+
+   return noglob;
+}
+
+static inline int get_icase_global(void)
+{
+   static int icase = -1;
+
+   if (icase < 0)
+   icase = git_env_bool(GIT_ICASE_PATHSPECS_ENVIRONMENT, 0);
+
+   return icase;
+}
+
+static int get_global_magic(int element_magic)
+{
+   int global_magic = 0;
+
+   if (get_literal_global())
+   global_magic |= PATHSPEC_LITERAL;
+
+   /* --glob-pathspec is overridden by :(literal) */
+   if (get_glob_global() && !(element_magic & PATHSPEC_LITERAL))
+   global_magic |= PATHSPEC_GLOB;
+
+   if (get_glob_global() && get_noglob_global())
+   die(_("global 'glob' and 'noglob' pathspec settings are 
incompatible"));
+
+   if (get_icase_global())
+   global_magic |= PATHSPEC_ICASE;
+
+   if ((global_magic & PATHSPEC_LITERAL) &&
+   (global_magic & ~PATHSPEC_LITERAL))
+   die(_("global 'literal' pathspec setting is incompatible "
+ "with all other global pathspec settings"));
+
+   /* --noglob-pathspec adds :(literal) _unless_ :(glob) is specified */
+   if (get_noglob_global() && !(element_magic & PATHSPEC_GLOB))
+   global_magic |= PATHSPEC_LITERAL;
+
+   return global_magic;
+}
+
 /*
  * Take an element of a pathspec and check for magic signatures.
  * Append the result to the prefix. Return the magic bitmap.
@@ -104,46 +173,12 @@ static unsigned prefix_pathspec(struct pathspec_item 
*item, unsigned flags,
const char *prefix, int prefixlen,
const char *elt)
 {
-   static int literal_global = -1;
-   static int glob_global = -1;
-   static int noglob_global = -1;
-   static int icase_global = -1;
-   unsigned magic = 0, element_magic = 0, global_magic = 0;
+   unsigned magic = 0, element_magic = 0;
const char *copyfrom = elt;
char *match;
int i, pathspec_prefix = -1;
 
-   if (literal_global < 0)
-   literal_global = 
git_env_bool(GIT_LITERAL_PATHSPECS_ENVIRONMENT, 0);
-   if (literal_global)
-   global_magic |= PATHSPEC_LITERAL;
-
-   if (glob_global < 0)
-   glob_global = git_env_bool(GIT_GLOB_PATHSPECS_ENVIRONMENT, 0);
-   if (glob_global)
-   global_magic |= PATHSPEC_GLOB;
-
-   if (noglob_global < 0)
-   noglob_global = git_env_bool(GIT_NOGLOB_PATHSPECS_ENVIRONMENT, 
0);
-
-   if (glob_global && noglob_global)
-   die(_("global 'glob' and 'noglob' pathspec settings are 
incompatible"));
-
-
-   if (icase_global < 0)
-   icase_global = git_env_bool(GIT_ICASE_PATHSPECS_ENVIRONMENT, 0);
-   if (icase_global)
-   global_magic |= PATHSPEC_ICASE;
-
-   if ((global_magic & PATHSPEC_LITERAL) &&
-   (global_magic & ~PATHSPEC_LITERAL))
-   die(_("global 'literal' pathspec setting is incompatible "
- "with all other global pathspec settings"));
-
-   if (flags & PATHSPEC_LITERAL_PATH)
-   global_magic = 0;
-
-   if (elt[0] != ':' || literal_global ||
+   if (elt[0] != ':' || get_literal_global() ||
(flags & PATHSPEC_LITERAL_PATH)) {
; /* nothing to do */
} else if (elt[1] == '(') {
@@ -207,15 +242,11 @@ static unsigned prefix_pathspec(struct pathspec_item 
*item, unsigned flags,
 
magic |= element_magic;
 
-   /* --noglob-pathspec adds :(literal) _unle

[PATCH v4 04/16] ls-tree: convert show_recursive to use the pathspec struct interface

2017-01-03 Thread Brandon Williams
Convert 'show_recursive()' to use the pathspec struct interface from
using the '_raw' entry in the pathspec struct.

Signed-off-by: Brandon Williams 
---
 builtin/ls-tree.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c
index 0e30d8623..d7ebeb4ce 100644
--- a/builtin/ls-tree.c
+++ b/builtin/ls-tree.c
@@ -31,21 +31,18 @@ static const  char * const ls_tree_usage[] = {
 
 static int show_recursive(const char *base, int baselen, const char *pathname)
 {
-   const char **s;
+   int i;
 
if (ls_options & LS_RECURSIVE)
return 1;
 
-   s = pathspec._raw;
-   if (!s)
+   if (!pathspec.nr)
return 0;
 
-   for (;;) {
-   const char *spec = *s++;
+   for (i = 0; i < pathspec.nr; i++) {
+   const char *spec = pathspec.items[i].match;
int len, speclen;
 
-   if (!spec)
-   return 0;
if (strncmp(base, spec, baselen))
continue;
len = strlen(pathname);
@@ -59,6 +56,7 @@ static int show_recursive(const char *base, int baselen, 
const char *pathname)
continue;
return 1;
}
+   return 0;
 }
 
 static int show_tree(const unsigned char *sha1, struct strbuf *base,
@@ -175,8 +173,8 @@ int cmd_ls_tree(int argc, const char **argv, const char 
*prefix)
 * cannot be lifted until it is converted to use
 * match_pathspec() or tree_entry_interesting()
 */
-   parse_pathspec(&pathspec, PATHSPEC_GLOB | PATHSPEC_ICASE |
- PATHSPEC_EXCLUDE,
+   parse_pathspec(&pathspec, PATHSPEC_ALL_MAGIC &
+ ~(PATHSPEC_FROMTOP | PATHSPEC_LITERAL),
   PATHSPEC_PREFER_CWD,
   prefix, argv + 1);
for (i = 0; i < pathspec.nr; i++)
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v4 01/16] mv: remove use of deprecated 'get_pathspec()'

2017-01-03 Thread Brandon Williams
Convert the 'internal_copy_pathspec()' function to 'prefix_path()'
instead of using the deprecated 'get_pathspec()' interface.  Also,
rename 'internal_copy_pathspec()' to 'internal_prefix_pathspec()' to be
more descriptive of what the funciton is actually doing.

In addition to this, fix a memory leak caused by only duplicating some
of the pathspec elements.  Instead always duplicate all of the the
pathspec elements as an intermediate step (with modificationed based on
the passed in flags).  This way the intermediate strings can then be
freed after getting the result from 'prefix_path()'.

Signed-off-by: Brandon Williams 
---
 builtin/mv.c | 50 +++---
 1 file changed, 31 insertions(+), 19 deletions(-)

diff --git a/builtin/mv.c b/builtin/mv.c
index 2f43877bc..4e86dc523 100644
--- a/builtin/mv.c
+++ b/builtin/mv.c
@@ -4,6 +4,7 @@
  * Copyright (C) 2006 Johannes Schindelin
  */
 #include "builtin.h"
+#include "pathspec.h"
 #include "lockfile.h"
 #include "dir.h"
 #include "cache-tree.h"
@@ -19,31 +20,42 @@ static const char * const builtin_mv_usage[] = {
 #define DUP_BASENAME 1
 #define KEEP_TRAILING_SLASH 2
 
-static const char **internal_copy_pathspec(const char *prefix,
-  const char **pathspec,
-  int count, unsigned flags)
+static const char **internal_prefix_pathspec(const char *prefix,
+const char **pathspec,
+int count, unsigned flags)
 {
int i;
const char **result;
+   int prefixlen = prefix ? strlen(prefix) : 0;
ALLOC_ARRAY(result, count + 1);
-   COPY_ARRAY(result, pathspec, count);
-   result[count] = NULL;
+
+   /* Create an intermediate copy of the pathspec based on the flags */
for (i = 0; i < count; i++) {
-   int length = strlen(result[i]);
+   int length = strlen(pathspec[i]);
int to_copy = length;
+   char *it;
while (!(flags & KEEP_TRAILING_SLASH) &&
-  to_copy > 0 && is_dir_sep(result[i][to_copy - 1]))
+  to_copy > 0 && is_dir_sep(pathspec[i][to_copy - 1]))
to_copy--;
-   if (to_copy != length || flags & DUP_BASENAME) {
-   char *it = xmemdupz(result[i], to_copy);
-   if (flags & DUP_BASENAME) {
-   result[i] = xstrdup(basename(it));
-   free(it);
-   } else
-   result[i] = it;
+
+   it = xmemdupz(pathspec[i], to_copy);
+   if (flags & DUP_BASENAME) {
+   result[i] = xstrdup(basename(it));
+   free(it);
+   } else {
+   result[i] = it;
}
}
-   return get_pathspec(prefix, result);
+   result[count] = NULL;
+
+   /* Prefix the pathspec and free the old intermediate strings */
+   for (i = 0; i < count; i++) {
+   const char *match = prefix_path(prefix, prefixlen, result[i]);
+   free((char *) result[i]);
+   result[i] = match;
+   }
+
+   return result;
 }
 
 static const char *add_slash(const char *path)
@@ -130,7 +142,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
if (read_cache() < 0)
die(_("index file corrupt"));
 
-   source = internal_copy_pathspec(prefix, argv, argc, 0);
+   source = internal_prefix_pathspec(prefix, argv, argc, 0);
modes = xcalloc(argc, sizeof(enum update_mode));
/*
 * Keep trailing slash, needed to let
@@ -140,16 +152,16 @@ int cmd_mv(int argc, const char **argv, const char 
*prefix)
flags = KEEP_TRAILING_SLASH;
if (argc == 1 && is_directory(argv[0]) && !is_directory(argv[1]))
flags = 0;
-   dest_path = internal_copy_pathspec(prefix, argv + argc, 1, flags);
+   dest_path = internal_prefix_pathspec(prefix, argv + argc, 1, flags);
submodule_gitfile = xcalloc(argc, sizeof(char *));
 
if (dest_path[0][0] == '\0')
/* special case: "." was normalized to "" */
-   destination = internal_copy_pathspec(dest_path[0], argv, argc, 
DUP_BASENAME);
+   destination = internal_prefix_pathspec(dest_path[0], argv, 
argc, DUP_BASENAME);
else if (!lstat(dest_path[0], &st) &&
S_ISDIR(st.st_mode)) {
dest_path[0] = add_slash(dest_path[0]);
-   destination = internal_copy_pathspec(dest_path[0], argv, argc, 
DUP_BASEN

[PATCH v4 09/16] pathspec: simpler logic to prefix original pathspec elements

2017-01-03 Thread Brandon Williams
The logic used to prefix an original pathspec element with 'prefix'
magic is more general purpose and can be used for more than just short
magic.  Remove the extra code paths and rename 'prefix_short_magic' to
'prefix_magic' to better indicate that it can be used in more general
situations.

Also, slightly change the logic which decides when to prefix the
original element in order to prevent a pathspec of "." from getting
converted to "" (empty string).

Signed-off-by: Brandon Williams 
---
 pathspec.c | 33 +
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index ee87494c7..032436bc1 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -74,13 +74,12 @@ static struct pathspec_magic {
{ PATHSPEC_EXCLUDE, '!', "exclude" },
 };
 
-static void prefix_short_magic(struct strbuf *sb, int prefixlen,
-  unsigned short_magic)
+static void prefix_magic(struct strbuf *sb, int prefixlen, unsigned magic)
 {
int i;
strbuf_addstr(sb, ":(");
for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++)
-   if (short_magic & pathspec_magic[i].bit) {
+   if (magic & pathspec_magic[i].bit) {
if (sb->buf[sb->len - 1] != '(')
strbuf_addch(sb, ',');
strbuf_addstr(sb, pathspec_magic[i].name);
@@ -109,8 +108,8 @@ static unsigned prefix_pathspec(struct pathspec_item *item, 
unsigned flags,
static int glob_global = -1;
static int noglob_global = -1;
static int icase_global = -1;
-   unsigned magic = 0, short_magic = 0, global_magic = 0;
-   const char *copyfrom = elt, *long_magic_end = NULL;
+   unsigned magic = 0, element_magic = 0, global_magic = 0;
+   const char *copyfrom = elt;
char *match;
int i, pathspec_prefix = -1;
 
@@ -164,7 +163,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item, 
unsigned flags,
for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
if (strlen(pathspec_magic[i].name) == len &&
!strncmp(pathspec_magic[i].name, copyfrom, 
len)) {
-   magic |= pathspec_magic[i].bit;
+   element_magic |= pathspec_magic[i].bit;
break;
}
if (starts_with(copyfrom, "prefix:")) {
@@ -183,7 +182,6 @@ static unsigned prefix_pathspec(struct pathspec_item *item, 
unsigned flags,
}
if (*copyfrom != ')')
die(_("Missing ')' at the end of pathspec magic in 
'%s'"), elt);
-   long_magic_end = copyfrom;
copyfrom++;
} else {
/* shorthand */
@@ -196,7 +194,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item, 
unsigned flags,
break;
for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++)
if (pathspec_magic[i].mnemonic == ch) {
-   short_magic |= pathspec_magic[i].bit;
+   element_magic |= pathspec_magic[i].bit;
break;
}
if (ARRAY_SIZE(pathspec_magic) <= i)
@@ -207,7 +205,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item, 
unsigned flags,
copyfrom++;
}
 
-   magic |= short_magic;
+   magic |= element_magic;
 
/* --noglob-pathspec adds :(literal) _unless_ :(glob) is specified */
if (noglob_global && !(magic & PATHSPEC_GLOB))
@@ -242,18 +240,13 @@ static unsigned prefix_pathspec(struct pathspec_item 
*item, unsigned flags,
 * Prefix the pathspec (keep all magic) and assign to
 * original. Useful for passing to another command.
 */
-   if (flags & PATHSPEC_PREFIX_ORIGIN) {
+   if ((flags & PATHSPEC_PREFIX_ORIGIN) &&
+   prefixlen && !literal_global) {
struct strbuf sb = STRBUF_INIT;
-   if (prefixlen && !literal_global) {
-   /* Preserve the actual prefix length of each pattern */
-   if (short_magic)
-   prefix_short_magic(&sb, prefixlen, short_magic);
-   else if (long_magic_end) {
-   strbuf_add(&sb, elt, long_magic_end - elt);
-   strbuf_addf(&sb, ",prefix:%d)", prefixlen);
-   } else
-  

[PATCH v4 07/16] pathspec: remove unused variable from unsupported_magic

2017-01-03 Thread Brandon Williams
Removed unused variable 'n' from the 'unsupported_magic()' function.

Signed-off-by: Brandon Williams 
---
 pathspec.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index b8faa8f46..b9a3819d6 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -333,8 +333,8 @@ static void NORETURN unsupported_magic(const char *pattern,
   unsigned short_magic)
 {
struct strbuf sb = STRBUF_INIT;
-   int i, n;
-   for (n = i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
+   int i;
+   for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
const struct pathspec_magic *m = pathspec_magic + i;
if (!(magic & m->bit))
continue;
@@ -344,7 +344,6 @@ static void NORETURN unsupported_magic(const char *pattern,
strbuf_addf(&sb, "'%c'", m->mnemonic);
else
strbuf_addf(&sb, "'%s'", m->name);
-   n++;
}
/*
 * We may want to substitute "this command" with a command
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v4 14/16] pathspec: create strip submodule slash helpers

2017-01-03 Thread Brandon Williams
Factor out the logic responsible for stripping the trailing slash on
pathspecs referencing submodules into its own function.

Signed-off-by: Brandon Williams 
---
 pathspec.c | 68 ++
 1 file changed, 42 insertions(+), 26 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index fe811a0a4..4a1f8ea44 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -258,6 +258,44 @@ static const char *parse_element_magic(unsigned *magic, 
int *prefix_len,
return parse_short_magic(magic, elem);
 }
 
+static void strip_submodule_slash_cheap(struct pathspec_item *item)
+{
+   if (item->len >= 1 && item->match[item->len - 1] == '/') {
+   int i = cache_name_pos(item->match, item->len - 1);
+
+   if (i >= 0 && S_ISGITLINK(active_cache[i]->ce_mode)) {
+   item->len--;
+   item->match[item->len] = '\0';
+   }
+   }
+}
+
+static void strip_submodule_slash_expensive(struct pathspec_item *item)
+{
+   int i;
+
+   for (i = 0; i < active_nr; i++) {
+   struct cache_entry *ce = active_cache[i];
+   int ce_len = ce_namelen(ce);
+
+   if (!S_ISGITLINK(ce->ce_mode))
+   continue;
+
+   if (item->len <= ce_len || item->match[ce_len] != '/' ||
+   memcmp(ce->name, item->match, ce_len))
+   continue;
+
+   if (item->len == ce_len + 1) {
+   /* strip trailing slash */
+   item->len--;
+   item->match[item->len] = '\0';
+   } else {
+   die(_("Pathspec '%s' is in submodule '%.*s'"),
+   item->original, ce_len, ce->name);
+   }
+   }
+}
+
 /*
  * Take an element of a pathspec and check for magic signatures.
  * Append the result to the prefix. Return the magic bitmap.
@@ -278,7 +316,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item, 
unsigned flags,
unsigned magic = 0, element_magic = 0;
const char *copyfrom = elt;
char *match;
-   int i, pathspec_prefix = -1;
+   int pathspec_prefix = -1;
 
/* PATHSPEC_LITERAL_PATH ignores magic */
if (flags & PATHSPEC_LITERAL_PATH) {
@@ -329,33 +367,11 @@ static unsigned prefix_pathspec(struct pathspec_item 
*item, unsigned flags,
item->len = strlen(item->match);
item->prefix = prefixlen;
 
-   if ((flags & PATHSPEC_STRIP_SUBMODULE_SLASH_CHEAP) &&
-   (item->len >= 1 && item->match[item->len - 1] == '/') &&
-   (i = cache_name_pos(item->match, item->len - 1)) >= 0 &&
-   S_ISGITLINK(active_cache[i]->ce_mode)) {
-   item->len--;
-   match[item->len] = '\0';
-   }
+   if (flags & PATHSPEC_STRIP_SUBMODULE_SLASH_CHEAP)
+   strip_submodule_slash_cheap(item);
 
if (flags & PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE)
-   for (i = 0; i < active_nr; i++) {
-   struct cache_entry *ce = active_cache[i];
-   int ce_len = ce_namelen(ce);
-
-   if (!S_ISGITLINK(ce->ce_mode))
-   continue;
-
-   if (item->len <= ce_len || match[ce_len] != '/' ||
-   memcmp(ce->name, match, ce_len))
-   continue;
-   if (item->len == ce_len + 1) {
-   /* strip trailing slash */
-   item->len--;
-   match[item->len] = '\0';
-   } else
-   die (_("Pathspec '%s' is in submodule '%.*s'"),
-elt, ce_len, ce->name);
-   }
+   strip_submodule_slash_expensive(item);
 
if (magic & PATHSPEC_LITERAL)
item->nowildcard_len = item->len;
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v4 08/16] pathspec: always show mnemonic and name in unsupported_magic

2017-01-03 Thread Brandon Williams
For better clarity, always show the mnemonic and name of the unsupported
magic being used.  This lets users have a more clear understanding of
what magic feature isn't supported.  And if they supplied a mnemonic,
the user will be told what its corresponding name is which will allow
them to more easily search the man pages for that magic type.

This also avoids passing an extra parameter around the pathspec
initialization code.

Signed-off-by: Brandon Williams 
---
 pathspec.c | 24 +---
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index b9a3819d6..ee87494c7 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -101,9 +101,7 @@ static void prefix_short_magic(struct strbuf *sb, int 
prefixlen,
  * the prefix part must always match literally, and a single stupid
  * string cannot express such a case.
  */
-static unsigned prefix_pathspec(struct pathspec_item *item,
-   unsigned *p_short_magic,
-   unsigned flags,
+static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
const char *prefix, int prefixlen,
const char *elt)
 {
@@ -210,7 +208,6 @@ static unsigned prefix_pathspec(struct pathspec_item *item,
}
 
magic |= short_magic;
-   *p_short_magic = short_magic;
 
/* --noglob-pathspec adds :(literal) _unless_ :(glob) is specified */
if (noglob_global && !(magic & PATHSPEC_GLOB))
@@ -329,8 +326,7 @@ static int pathspec_item_cmp(const void *a_, const void *b_)
 }
 
 static void NORETURN unsupported_magic(const char *pattern,
-  unsigned magic,
-  unsigned short_magic)
+  unsigned magic)
 {
struct strbuf sb = STRBUF_INIT;
int i;
@@ -339,9 +335,11 @@ static void NORETURN unsupported_magic(const char *pattern,
if (!(magic & m->bit))
continue;
if (sb.len)
-   strbuf_addch(&sb, ' ');
-   if (short_magic & m->bit)
-   strbuf_addf(&sb, "'%c'", m->mnemonic);
+   strbuf_addstr(&sb, ", ");
+
+   if (m->mnemonic)
+   strbuf_addf(&sb, "'%s' (mnemonic: '%c')",
+   m->name, m->mnemonic);
else
strbuf_addf(&sb, "'%s'", m->name);
}
@@ -413,11 +411,9 @@ void parse_pathspec(struct pathspec *pathspec,
prefixlen = prefix ? strlen(prefix) : 0;
 
for (i = 0; i < n; i++) {
-   unsigned short_magic;
entry = argv[i];
 
-   item[i].magic = prefix_pathspec(item + i, &short_magic,
-   flags,
+   item[i].magic = prefix_pathspec(item + i, flags,
prefix, prefixlen, entry);
if ((flags & PATHSPEC_LITERAL_PATH) &&
!(magic_mask & PATHSPEC_LITERAL))
@@ -425,9 +421,7 @@ void parse_pathspec(struct pathspec *pathspec,
if (item[i].magic & PATHSPEC_EXCLUDE)
nr_exclude++;
if (item[i].magic & magic_mask)
-   unsupported_magic(entry,
- item[i].magic & magic_mask,
- short_magic);
+   unsupported_magic(entry, item[i].magic & magic_mask);
 
if ((flags & PATHSPEC_SYMLINK_LEADING_PATH) &&
has_symlink_leading_path(item[i].match, item[i].len)) {
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v4 05/16] pathspec: remove the deprecated get_pathspec function

2017-01-03 Thread Brandon Williams
Now that all callers of the old 'get_pathspec' interface have been
migrated to use the new pathspec struct interface it can be removed
from the codebase.

Since there are no more users of the '_raw' field in the pathspec struct
it can also be removed.  This patch also removes the old functionality
of modifying the const char **argv array that was passed into
parse_pathspec.  Instead the constructed 'match' string (which is a
pathspec element with the prefix prepended) is only stored in its
corresponding pathspec_item entry.

Signed-off-by: Brandon Williams 
---
 Documentation/technical/api-setup.txt |  2 --
 cache.h   |  1 -
 pathspec.c| 42 +++
 pathspec.h|  1 -
 4 files changed, 3 insertions(+), 43 deletions(-)

diff --git a/Documentation/technical/api-setup.txt 
b/Documentation/technical/api-setup.txt
index 540e45568..eb1fa9853 100644
--- a/Documentation/technical/api-setup.txt
+++ b/Documentation/technical/api-setup.txt
@@ -27,8 +27,6 @@ parse_pathspec(). This function takes several arguments:
 
 - prefix and args come from cmd_* functions
 
-get_pathspec() is obsolete and should never be used in new code.
-
 parse_pathspec() helps catch unsupported features and reject them
 politely. At a lower level, different pathspec-related functions may
 not support the same set of features. Such pathspec-sensitive
diff --git a/cache.h b/cache.h
index a50a61a19..0f80e01bd 100644
--- a/cache.h
+++ b/cache.h
@@ -514,7 +514,6 @@ extern void set_git_work_tree(const char *tree);
 
 #define ALTERNATE_DB_ENVIRONMENT "GIT_ALTERNATE_OBJECT_DIRECTORIES"
 
-extern const char **get_pathspec(const char *prefix, const char **pathspec);
 extern void setup_work_tree(void);
 extern const char *setup_git_directory_gently(int *);
 extern const char *setup_git_directory(void);
diff --git a/pathspec.c b/pathspec.c
index 22ca74a12..1f918cbae 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -103,7 +103,7 @@ static void prefix_short_magic(struct strbuf *sb, int 
prefixlen,
  */
 static unsigned prefix_pathspec(struct pathspec_item *item,
unsigned *p_short_magic,
-   const char **raw, unsigned flags,
+   unsigned flags,
const char *prefix, int prefixlen,
const char *elt)
 {
@@ -240,7 +240,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item,
if (!match)
die(_("%s: '%s' is outside repository"), elt, copyfrom);
}
-   *raw = item->match = match;
+   item->match = match;
/*
 * Prefix the pathspec (keep all magic) and assign to
 * original. Useful for passing to another command.
@@ -381,8 +381,6 @@ void parse_pathspec(struct pathspec *pathspec,
 
/* No arguments with prefix -> prefix pathspec */
if (!entry) {
-   static const char *raw[2];
-
if (flags & PATHSPEC_PREFER_FULL)
return;
 
@@ -394,10 +392,7 @@ void parse_pathspec(struct pathspec *pathspec,
item->original = prefix;
item->nowildcard_len = item->len = strlen(prefix);
item->prefix = item->len;
-   raw[0] = prefix;
-   raw[1] = NULL;
pathspec->nr = 1;
-   pathspec->_raw = raw;
return;
}
 
@@ -415,7 +410,6 @@ void parse_pathspec(struct pathspec *pathspec,
pathspec->nr = n;
ALLOC_ARRAY(pathspec->items, n);
item = pathspec->items;
-   pathspec->_raw = argv;
prefixlen = prefix ? strlen(prefix) : 0;
 
for (i = 0; i < n; i++) {
@@ -423,7 +417,7 @@ void parse_pathspec(struct pathspec *pathspec,
entry = argv[i];
 
item[i].magic = prefix_pathspec(item + i, &short_magic,
-   argv + i, flags,
+   flags,
prefix, prefixlen, entry);
if ((flags & PATHSPEC_LITERAL_PATH) &&
!(magic_mask & PATHSPEC_LITERAL))
@@ -457,36 +451,6 @@ void parse_pathspec(struct pathspec *pathspec,
}
 }
 
-/*
- * N.B. get_pathspec() is deprecated in favor of the "struct pathspec"
- * based interface - see pathspec.c:parse_pathspec().
- *
- * Arguments:
- *  - prefix - a path relative to the root of the working tree
- *  - pathspec - a list of paths underneath the prefix path
- *
- * Iterates over pathspec, prepending each path with prefix,
- * and return the resulting list.
- *
- * If pathspec is empty, return a singleton list containing prefix.
- *
- * If pathspec and prefix are both empty, return an em

[PATCH v4 06/16] pathspec: copy and free owned memory

2017-01-03 Thread Brandon Williams
The 'original' string entry in a pathspec_item is only duplicated some
of the time, instead always make a copy of the original and take
ownership of the memory.

Since both 'match' and 'original' string entries in a pathspec_item are
owned by the pathspec struct, they need to be freed when clearing the
pathspec struct (in 'clear_pathspec()') and duplicated when copying the
pathspec struct (in 'copy_pathspec()').

Also change the type of 'match' and 'original' to 'char *' in order to
more explicitly show the ownership of the memory.

Signed-off-by: Brandon Williams 
---
 pathspec.c | 23 +++
 pathspec.h |  4 ++--
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index 1f918cbae..b8faa8f46 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -259,8 +259,9 @@ static unsigned prefix_pathspec(struct pathspec_item *item,
}
strbuf_addstr(&sb, match);
item->original = strbuf_detach(&sb, NULL);
-   } else
-   item->original = elt;
+   } else {
+   item->original = xstrdup(elt);
+   }
item->len = strlen(item->match);
item->prefix = prefixlen;
 
@@ -388,8 +389,8 @@ void parse_pathspec(struct pathspec *pathspec,
die("BUG: PATHSPEC_PREFER_CWD requires arguments");
 
pathspec->items = item = xcalloc(1, sizeof(*item));
-   item->match = prefix;
-   item->original = prefix;
+   item->match = xstrdup(prefix);
+   item->original = xstrdup(prefix);
item->nowildcard_len = item->len = strlen(prefix);
item->prefix = item->len;
pathspec->nr = 1;
@@ -453,13 +454,27 @@ void parse_pathspec(struct pathspec *pathspec,
 
 void copy_pathspec(struct pathspec *dst, const struct pathspec *src)
 {
+   int i;
+
*dst = *src;
ALLOC_ARRAY(dst->items, dst->nr);
COPY_ARRAY(dst->items, src->items, dst->nr);
+
+   for (i = 0; i < dst->nr; i++) {
+   dst->items[i].match = xstrdup(src->items[i].match);
+   dst->items[i].original = xstrdup(src->items[i].original);
+   }
 }
 
 void clear_pathspec(struct pathspec *pathspec)
 {
+   int i;
+
+   for (i = 0; i < pathspec->nr; i++) {
+   free(pathspec->items[i].match);
+   free(pathspec->items[i].original);
+   }
free(pathspec->items);
pathspec->items = NULL;
+   pathspec->nr = 0;
 }
diff --git a/pathspec.h b/pathspec.h
index 70a592e91..49fd823dd 100644
--- a/pathspec.h
+++ b/pathspec.h
@@ -25,8 +25,8 @@ struct pathspec {
unsigned magic;
int max_depth;
struct pathspec_item {
-   const char *match;
-   const char *original;
+   char *match;
+   char *original;
unsigned magic;
int len, prefix;
int nowildcard_len;
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v4 13/16] pathspec: create parse_element_magic helper

2017-01-03 Thread Brandon Williams
Factor out the logic responsible for the magic in a pathspec element
into its own function.

Also avoid calling into the parsing functions when
`PATHSPEC_LITERAL_PATH` is specified since it causes magic to be
ignored and all paths to be treated as literals.

Signed-off-by: Brandon Williams 
---
 pathspec.c | 37 -
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index f1439f6f9..fe811a0a4 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -245,6 +245,19 @@ static const char *parse_short_magic(unsigned *magic, 
const char *elem)
return pos;
 }
 
+static const char *parse_element_magic(unsigned *magic, int *prefix_len,
+  const char *elem)
+{
+   if (elem[0] != ':' || get_literal_global())
+   return elem; /* nothing to do */
+   else if (elem[1] == '(')
+   /* longhand */
+   return parse_long_magic(magic, prefix_len, elem);
+   else
+   /* shorthand */
+   return parse_short_magic(magic, elem);
+}
+
 /*
  * Take an element of a pathspec and check for magic signatures.
  * Append the result to the prefix. Return the magic bitmap.
@@ -267,26 +280,16 @@ static unsigned prefix_pathspec(struct pathspec_item 
*item, unsigned flags,
char *match;
int i, pathspec_prefix = -1;
 
-   if (elt[0] != ':' || get_literal_global() ||
-   (flags & PATHSPEC_LITERAL_PATH)) {
-   ; /* nothing to do */
-   } else if (elt[1] == '(') {
-   /* longhand */
-   copyfrom = parse_long_magic(&element_magic,
-   &pathspec_prefix,
-   elt);
-   } else {
-   /* shorthand */
-   copyfrom = parse_short_magic(&element_magic, elt);
-   }
-
-   magic |= element_magic;
-
/* PATHSPEC_LITERAL_PATH ignores magic */
-   if (flags & PATHSPEC_LITERAL_PATH)
+   if (flags & PATHSPEC_LITERAL_PATH) {
magic = PATHSPEC_LITERAL;
-   else
+   } else {
+   copyfrom = parse_element_magic(&element_magic,
+  &pathspec_prefix,
+  elt);
+   magic |= element_magic;
magic |= get_global_magic(element_magic);
+   }
 
if (pathspec_prefix >= 0 &&
(prefixlen || (prefix && *prefix)))
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v4 11/16] pathspec: create parse_short_magic function

2017-01-03 Thread Brandon Williams
Factor out the logic responsible for parsing short magic into its own
function.

Signed-off-by: Brandon Williams 
---
 pathspec.c | 54 --
 1 file changed, 36 insertions(+), 18 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index f760f44f9..8574010d5 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -157,6 +157,41 @@ static int get_global_magic(int element_magic)
 }
 
 /*
+ * Parse the pathspec element looking for short magic
+ *
+ * saves all magic in 'magic'
+ * returns the position in 'elem' after all magic has been parsed
+ */
+static const char *parse_short_magic(unsigned *magic, const char *elem)
+{
+   const char *pos;
+
+   for (pos = elem + 1; *pos && *pos != ':'; pos++) {
+   char ch = *pos;
+   int i;
+
+   if (!is_pathspec_magic(ch))
+   break;
+
+   for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
+   if (pathspec_magic[i].mnemonic == ch) {
+   *magic |= pathspec_magic[i].bit;
+   break;
+   }
+   }
+
+   if (ARRAY_SIZE(pathspec_magic) <= i)
+   die(_("Unimplemented pathspec magic '%c' in '%s'"),
+   ch, elem);
+   }
+
+   if (*pos == ':')
+   pos++;
+
+   return pos;
+}
+
+/*
  * Take an element of a pathspec and check for magic signatures.
  * Append the result to the prefix. Return the magic bitmap.
  *
@@ -220,24 +255,7 @@ static unsigned prefix_pathspec(struct pathspec_item 
*item, unsigned flags,
copyfrom++;
} else {
/* shorthand */
-   for (copyfrom = elt + 1;
-*copyfrom && *copyfrom != ':';
-copyfrom++) {
-   char ch = *copyfrom;
-
-   if (!is_pathspec_magic(ch))
-   break;
-   for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++)
-   if (pathspec_magic[i].mnemonic == ch) {
-   element_magic |= pathspec_magic[i].bit;
-   break;
-   }
-   if (ARRAY_SIZE(pathspec_magic) <= i)
-   die(_("Unimplemented pathspec magic '%c' in 
'%s'"),
-   ch, elt);
-   }
-   if (*copyfrom == ':')
-   copyfrom++;
+   copyfrom = parse_short_magic(&element_magic, elt);
}
 
magic |= element_magic;
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v4 15/16] pathspec: small readability changes

2017-01-03 Thread Brandon Williams
A few small changes to improve readability.  This is done by grouping related
assignments, adding blank lines, ensuring lines are <80 characters, and
adding additional comments.

Signed-off-by: Brandon Williams 
---
 pathspec.c | 25 +++--
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index 4a1f8ea44..ae9e1401f 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -67,11 +67,11 @@ static struct pathspec_magic {
char mnemonic; /* this cannot be ':'! */
const char *name;
 } pathspec_magic[] = {
-   { PATHSPEC_FROMTOP, '/', "top" },
-   { PATHSPEC_LITERAL,   0, "literal" },
-   { PATHSPEC_GLOB,   '\0', "glob" },
-   { PATHSPEC_ICASE,  '\0', "icase" },
-   { PATHSPEC_EXCLUDE, '!', "exclude" },
+   { PATHSPEC_FROMTOP,  '/', "top" },
+   { PATHSPEC_LITERAL, '\0', "literal" },
+   { PATHSPEC_GLOB,'\0', "glob" },
+   { PATHSPEC_ICASE,   '\0', "icase" },
+   { PATHSPEC_EXCLUDE,  '!', "exclude" },
 };
 
 static void prefix_magic(struct strbuf *sb, int prefixlen, unsigned magic)
@@ -336,6 +336,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item, 
unsigned flags,
if ((magic & PATHSPEC_LITERAL) && (magic & PATHSPEC_GLOB))
die(_("%s: 'literal' and 'glob' are incompatible"), elt);
 
+   /* Create match string which will be used for pathspec matching */
if (pathspec_prefix >= 0) {
match = xstrdup(copyfrom);
prefixlen = pathspec_prefix;
@@ -343,11 +344,16 @@ static unsigned prefix_pathspec(struct pathspec_item 
*item, unsigned flags,
match = xstrdup(copyfrom);
prefixlen = 0;
} else {
-   match = prefix_path_gently(prefix, prefixlen, &prefixlen, 
copyfrom);
+   match = prefix_path_gently(prefix, prefixlen,
+  &prefixlen, copyfrom);
if (!match)
die(_("%s: '%s' is outside repository"), elt, copyfrom);
}
+
item->match = match;
+   item->len = strlen(item->match);
+   item->prefix = prefixlen;
+
/*
 * Prefix the pathspec (keep all magic) and assign to
 * original. Useful for passing to another command.
@@ -364,8 +370,6 @@ static unsigned prefix_pathspec(struct pathspec_item *item, 
unsigned flags,
} else {
item->original = xstrdup(elt);
}
-   item->len = strlen(item->match);
-   item->prefix = prefixlen;
 
if (flags & PATHSPEC_STRIP_SUBMODULE_SLASH_CHEAP)
strip_submodule_slash_cheap(item);
@@ -373,13 +377,14 @@ static unsigned prefix_pathspec(struct pathspec_item 
*item, unsigned flags,
if (flags & PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE)
strip_submodule_slash_expensive(item);
 
-   if (magic & PATHSPEC_LITERAL)
+   if (magic & PATHSPEC_LITERAL) {
item->nowildcard_len = item->len;
-   else {
+   } else {
item->nowildcard_len = simple_length(item->match);
if (item->nowildcard_len < prefixlen)
item->nowildcard_len = prefixlen;
}
+
item->flags = 0;
if (magic & PATHSPEC_GLOB) {
/*
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v4 00/16] pathspec cleanup

2017-01-03 Thread Brandon Williams
v4 addresses a few comments from Duy.
* [2/16] push the guard pathspec macro into simplify_away() and
  exclude_matches_pathsepc().
* [6/16] when freeing a pathspec struct, set pathsepc->nr = 0.
* [8/16] tweak the die message when using unsupported magic to be more human
  readable.

Brandon Williams (16):
  mv: remove use of deprecated 'get_pathspec()'
  dir: remove struct path_simplify
  dir: convert fill_directory to use the pathspec struct interface
  ls-tree: convert show_recursive to use the pathspec struct interface
  pathspec: remove the deprecated get_pathspec function
  pathspec: copy and free owned memory
  pathspec: remove unused variable from unsupported_magic
  pathspec: always show mnemonic and name in unsupported_magic
  pathspec: simpler logic to prefix original pathspec elements
  pathspec: factor global magic into its own function
  pathspec: create parse_short_magic function
  pathspec: create parse_long_magic function
  pathspec: create parse_element_magic helper
  pathspec: create strip submodule slash helpers
  pathspec: small readability changes
  pathspec: rename prefix_pathspec to init_pathspec_item

 Documentation/technical/api-setup.txt |   2 -
 builtin/ls-tree.c |  16 +-
 builtin/mv.c  |  50 ++--
 cache.h   |   1 -
 dir.c | 193 ++
 pathspec.c| 480 +++---
 pathspec.h|   5 +-
 7 files changed, 390 insertions(+), 357 deletions(-)

--- interdiff between v3 and v4

diff --git a/dir.c b/dir.c
index 15f7c9993..e8ddd7f8a 100644
--- a/dir.c
+++ b/dir.c
@@ -1353,6 +1353,15 @@ static int simplify_away(const char *path, int pathlen,
 {
int i;
 
+   if (pathspec)
+   guard_pathspec(pathspec,
+  pathspec_fromtop |
+  pathspec_maxdepth |
+  pathspec_literal |
+  pathspec_glob |
+  pathspec_icase |
+  pathspec_exclude);
+
if (!pathspec || !pathspec->nr)
return 0;
 
@@ -1385,6 +1394,15 @@ static int exclude_matches_pathspec(const char *path, 
int pathlen,
 {
int i;
 
+   if (pathspec)
+   guard_pathspec(pathspec,
+  pathspec_fromtop |
+  pathspec_maxdepth |
+  pathspec_literal |
+  pathspec_glob |
+  pathspec_icase |
+  pathspec_exclude);
+
if (!pathspec || !pathspec->nr)
return 0;
 
@@ -1996,15 +2014,6 @@ int read_directory(struct dir_struct *dir, const char 
*path,
 {
struct untracked_cache_dir *untracked;
 
-   if (pathspec)
-   guard_pathspec(pathspec,
-  pathspec_fromtop |
-  pathspec_maxdepth |
-  pathspec_literal |
-  pathspec_glob |
-  pathspec_icase |
-  pathspec_exclude);
-
if (has_symlink_leading_path(path, len))
return dir->nr;

diff --git a/pathspec.c b/pathspec.c
index d4efcf666..bcf3ba039 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -414,10 +414,11 @@ static void NORETURN unsupported_magic(const char 
*pattern,
if (!(magic & m->bit))
continue;
if (sb.len)
-   strbuf_addch(&sb, ' ');
+   strbuf_addstr(&sb, ", ");
 
if (m->mnemonic)
-   strbuf_addf(&sb, "'(%c)%s'", m->mnemonic, m->name);
+   strbuf_addf(&sb, "'%s' (mnemonic: '%c')",
+   m->name, m->mnemonic);
else
strbuf_addf(&sb, "'%s'", m->name);
}
@@ -544,4 +545,5 @@ void clear_pathspec(struct pathspec *pathspec)
}
free(pathspec->items);
pathspec->items = NULL;
+   pathspec->nr = 0;
 }

-- 
2.11.0.390.gc69c2f50cf-goog



Re: What's cooking in git.git (Dec 2016, #08; Tue, 27)

2017-01-03 Thread Brandon Williams
On 12/29, Lars Schneider wrote:
> 
> > On 28 Dec 2016, at 00:11, Junio C Hamano  wrote:
> > 
> > 
> > * bw/realpath-wo-chdir (2016-12-22) 5 commits
> >  (merged to 'next' on 2016-12-22 at fea8fa870f)
> > + real_path: canonicalize directory separators in root parts
> > + real_path: have callers use real_pathdup and strbuf_realpath
> > + real_path: create real_pathdup
> > + real_path: convert real_path_internal to strbuf_realpath
> > + real_path: resolve symlinks by hand
> > (this branch is used by bw/grep-recurse-submodules.)
> > 
> > The implementation of "real_path()" was to go there with chdir(2)
> > and call getcwd(3), but this obviously wouldn't be usable in a
> > threaded environment.  Rewrite it to manually resolve relative
> > paths including symbolic links in path components.
> 
> "real_path: resolve symlinks by hand" (05b458c) introduces
> "MAXSYMLINKS" which is already defined on macOS in
> 
> /usr/include/sys/param.h:197:9:
> 
>  * .., MAXSYMLINKS defines the
>  * maximum number of symbolic links that may be expanded in a path name.
>  * It should be set high enough to allow all legitimate uses, but halt
>  * infinite loops reasonably quickly.
>  */
> 
> 
> Log with JS: https://travis-ci.org/git/git/jobs/187092215
> Log without JS: 
> https://s3.amazonaws.com/archive.travis-ci.org/jobs/187092215/log.txt
> 
> - Lars
> 

Simple fix would be to just revert MAXSYMLINKS to be MAXDEPTH (which is
what the #define was before this series).  I'll resend the series with
that fix.

-- 
Brandon Williams


[PATCH v4 0/5] road to reentrant real_path

2017-01-03 Thread Brandon Williams
Only change with v4 is in [1/5] renaming the #define MAXSYMLINKS back to
MAXDEPTH due to a naming conflict brought up by Lars Schneider.

Brandon Williams (4):
  real_path: resolve symlinks by hand
  real_path: convert real_path_internal to strbuf_realpath
  real_path: create real_pathdup
  real_path: have callers use real_pathdup and strbuf_realpath

Johannes Sixt (1):
  real_path: canonicalize directory separators in root parts

 abspath.c | 225 +-
 builtin/init-db.c |   6 +-
 cache.h   |   3 +
 environment.c |   2 +-
 setup.c   |  13 ++--
 sha1_file.c   |   2 +-
 submodule.c   |   2 +-
 transport.c   |   2 +-
 worktree.c|   2 +-
 9 files changed, 173 insertions(+), 84 deletions(-)

--- interdiff with v3

diff --git a/abspath.c b/abspath.c
index 1d56f5ed9..3562d17bf 100644
--- a/abspath.c
+++ b/abspath.c
@@ -62,7 +62,7 @@ static void get_root_part(struct strbuf *resolved, struct 
strbuf *remaining)
 }
 
 /* We allow "recursive" symbolic links. Only within reason, though. */
-#define MAXSYMLINKS 5
+#define MAXDEPTH 5
 
 /*
  * Return the real path (i.e., absolute path, with symlinks resolved
@@ -138,10 +138,10 @@ char *strbuf_realpath(struct strbuf *resolved, const char 
*path,
ssize_t len;
strbuf_reset(&symlink);
 
-   if (num_symlinks++ > MAXSYMLINKS) {
+   if (num_symlinks++ > MAXDEPTH) {
if (die_on_error)
die("More than %d nested symlinks "
-   "on path '%s'", MAXSYMLINKS, path);
+   "on path '%s'", MAXDEPTH, path);
else
goto error_out;
}

-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v4 1/5] real_path: resolve symlinks by hand

2017-01-03 Thread Brandon Williams
The current implementation of real_path uses chdir() in order to resolve
symlinks.  Unfortunately this isn't thread-safe as chdir() affects a
process as a whole and not just an individual thread.  Instead perform
the symlink resolution by hand so that the calls to chdir() can be
removed, making real_path one step closer to being reentrant.

Signed-off-by: Brandon Williams 
---
 abspath.c | 188 ++
 1 file changed, 128 insertions(+), 60 deletions(-)

diff --git a/abspath.c b/abspath.c
index 2825de859..0f34636a8 100644
--- a/abspath.c
+++ b/abspath.c
@@ -11,6 +11,43 @@ int is_directory(const char *path)
return (!stat(path, &st) && S_ISDIR(st.st_mode));
 }
 
+/* removes the last path component from 'path' except if 'path' is root */
+static void strip_last_component(struct strbuf *path)
+{
+   size_t offset = offset_1st_component(path->buf);
+   size_t len = path->len;
+
+   /* Find start of the last component */
+   while (offset < len && !is_dir_sep(path->buf[len - 1]))
+   len--;
+   /* Skip sequences of multiple path-separators */
+   while (offset < len && is_dir_sep(path->buf[len - 1]))
+   len--;
+
+   strbuf_setlen(path, len);
+}
+
+/* get (and remove) the next component in 'remaining' and place it in 'next' */
+static void get_next_component(struct strbuf *next, struct strbuf *remaining)
+{
+   char *start = NULL;
+   char *end = NULL;
+
+   strbuf_reset(next);
+
+   /* look for the next component */
+   /* Skip sequences of multiple path-separators */
+   for (start = remaining->buf; is_dir_sep(*start); start++)
+   ; /* nothing */
+   /* Find end of the path component */
+   for (end = start; *end && !is_dir_sep(*end); end++)
+   ; /* nothing */
+
+   strbuf_add(next, start, end - start);
+   /* remove the component from 'remaining' */
+   strbuf_remove(remaining, 0, end - remaining->buf);
+}
+
 /* We allow "recursive" symbolic links. Only within reason, though. */
 #define MAXDEPTH 5
 
@@ -21,7 +58,6 @@ int is_directory(const char *path)
  * absolute_path().)  The return value is a pointer to a static
  * buffer.
  *
- * The input and all intermediate paths must be shorter than MAX_PATH.
  * The directory part of path (i.e., everything up to the last
  * dir_sep) must denote a valid, existing directory, but the last
  * component need not exist.  If die_on_error is set, then die with an
@@ -33,22 +69,16 @@ int is_directory(const char *path)
  */
 static const char *real_path_internal(const char *path, int die_on_error)
 {
-   static struct strbuf sb = STRBUF_INIT;
+   static struct strbuf resolved = STRBUF_INIT;
+   struct strbuf remaining = STRBUF_INIT;
+   struct strbuf next = STRBUF_INIT;
+   struct strbuf symlink = STRBUF_INIT;
char *retval = NULL;
-
-   /*
-* If we have to temporarily chdir(), store the original CWD
-* here so that we can chdir() back to it at the end of the
-* function:
-*/
-   struct strbuf cwd = STRBUF_INIT;
-
-   int depth = MAXDEPTH;
-   char *last_elem = NULL;
+   int num_symlinks = 0;
struct stat st;
 
/* We've already done it */
-   if (path == sb.buf)
+   if (path == resolved.buf)
return path;
 
if (!*path) {
@@ -58,74 +88,112 @@ static const char *real_path_internal(const char *path, 
int die_on_error)
goto error_out;
}
 
-   strbuf_reset(&sb);
-   strbuf_addstr(&sb, path);
-
-   while (depth--) {
-   if (!is_directory(sb.buf)) {
-   char *last_slash = find_last_dir_sep(sb.buf);
-   if (last_slash) {
-   last_elem = xstrdup(last_slash + 1);
-   strbuf_setlen(&sb, last_slash - sb.buf + 1);
-   } else {
-   last_elem = xmemdupz(sb.buf, sb.len);
-   strbuf_reset(&sb);
-   }
+   strbuf_reset(&resolved);
+
+   if (is_absolute_path(path)) {
+   /* absolute path; start with only root as being resolved */
+   int offset = offset_1st_component(path);
+   strbuf_add(&resolved, path, offset);
+   strbuf_addstr(&remaining, path + offset);
+   } else {
+   /* relative path; can use CWD as the initial resolved path */
+   if (strbuf_getcwd(&resolved)) {
+   if (die_on_error)
+   die_errno("unable to get current working 
directory");
+   else
+   goto error_out;
}

[PATCH v4 02/16] dir: remove struct path_simplify

2017-01-03 Thread Brandon Williams
Teach simplify_away() and exclude_matches_pathspec() to handle struct
pathspec directly, eliminating the need for the struct path_simplify.

Also renamed the len parameter to pathlen in exclude_matches_pathspec()
to match the parameter names used in simplify_away().

Signed-off-by: Brandon Williams 
---
 dir.c | 181 +-
 1 file changed, 78 insertions(+), 103 deletions(-)

diff --git a/dir.c b/dir.c
index bfa8c8a9a..1df61f10f 100644
--- a/dir.c
+++ b/dir.c
@@ -16,11 +16,6 @@
 #include "varint.h"
 #include "ewah/ewok.h"
 
-struct path_simplify {
-   int len;
-   const char *path;
-};
-
 /*
  * Tells read_directory_recursive how a file or directory should be treated.
  * Values are ordered by significance, e.g. if a directory contains both
@@ -50,7 +45,7 @@ struct cached_dir {
 
 static enum path_treatment read_directory_recursive(struct dir_struct *dir,
const char *path, int len, struct untracked_cache_dir *untracked,
-   int check_only, const struct path_simplify *simplify);
+   int check_only, const struct pathspec *pathspec);
 static int get_dtype(struct dirent *de, const char *path, int len);
 
 int fspathcmp(const char *a, const char *b)
@@ -1312,7 +1307,7 @@ static enum exist_status directory_exists_in_index(const 
char *dirname, int len)
 static enum path_treatment treat_directory(struct dir_struct *dir,
struct untracked_cache_dir *untracked,
const char *dirname, int len, int baselen, int exclude,
-   const struct path_simplify *simplify)
+   const struct pathspec *pathspec)
 {
/* The "len-1" is to strip the final '/' */
switch (directory_exists_in_index(dirname, len-1)) {
@@ -1341,7 +1336,7 @@ static enum path_treatment treat_directory(struct 
dir_struct *dir,
untracked = lookup_untracked(dir->untracked, untracked,
 dirname + baselen, len - baselen);
return read_directory_recursive(dir, dirname, len,
-   untracked, 1, simplify);
+   untracked, 1, pathspec);
 }
 
 /*
@@ -1349,24 +1344,34 @@ static enum path_treatment treat_directory(struct 
dir_struct *dir,
  * reading - if the path cannot possibly be in the pathspec,
  * return true, and we'll skip it early.
  */
-static int simplify_away(const char *path, int pathlen, const struct 
path_simplify *simplify)
+static int simplify_away(const char *path, int pathlen,
+const struct pathspec *pathspec)
 {
-   if (simplify) {
-   for (;;) {
-   const char *match = simplify->path;
-   int len = simplify->len;
+   int i;
 
-   if (!match)
-   break;
-   if (len > pathlen)
-   len = pathlen;
-   if (!memcmp(path, match, len))
-   return 0;
-   simplify++;
-   }
-   return 1;
+   if (pathspec)
+   GUARD_PATHSPEC(pathspec,
+  PATHSPEC_FROMTOP |
+  PATHSPEC_MAXDEPTH |
+  PATHSPEC_LITERAL |
+  PATHSPEC_GLOB |
+  PATHSPEC_ICASE |
+  PATHSPEC_EXCLUDE);
+
+   if (!pathspec || !pathspec->nr)
+   return 0;
+
+   for (i = 0; i < pathspec->nr; i++) {
+   const struct pathspec_item *item = &pathspec->items[i];
+   int len = item->nowildcard_len;
+
+   if (len > pathlen)
+   len = pathlen;
+   if (!ps_strncmp(item, item->match, path, len))
+   return 0;
}
-   return 0;
+
+   return 1;
 }
 
 /*
@@ -1380,19 +1385,34 @@ static int simplify_away(const char *path, int pathlen, 
const struct path_simpli
  *   2. the path is a directory prefix of some element in the
  *  pathspec
  */
-static int exclude_matches_pathspec(const char *path, int len,
-   const struct path_simplify *simplify)
-{
-   if (simplify) {
-   for (; simplify->path; simplify++) {
-   if (len == simplify->len
-   && !memcmp(path, simplify->path, len))
-   return 1;
-   if (len < simplify->len
-   && simplify->path[len] == '/'
-   && !memcmp(path, simplify->path, len))
-   return 1;
-   }
+static int exclude_matches_pathspec(const char *path, int pathlen,
+   const struct pathspec *pat

[PATCH v4 2/5] real_path: convert real_path_internal to strbuf_realpath

2017-01-03 Thread Brandon Williams
Change the name of real_path_internal to strbuf_realpath.  In addition
push the static strbuf up to its callers and instead take as a
parameter a pointer to a strbuf to use for the final result.

This change makes strbuf_realpath reentrant.

Signed-off-by: Brandon Williams 
---
 abspath.c | 53 +
 cache.h   |  2 ++
 2 files changed, 27 insertions(+), 28 deletions(-)

diff --git a/abspath.c b/abspath.c
index 0f34636a8..c3a6acd4d 100644
--- a/abspath.c
+++ b/abspath.c
@@ -55,21 +55,17 @@ static void get_next_component(struct strbuf *next, struct 
strbuf *remaining)
  * Return the real path (i.e., absolute path, with symlinks resolved
  * and extra slashes removed) equivalent to the specified path.  (If
  * you want an absolute path but don't mind links, use
- * absolute_path().)  The return value is a pointer to a static
- * buffer.
+ * absolute_path().)  Places the resolved realpath in the provided strbuf.
  *
  * The directory part of path (i.e., everything up to the last
  * dir_sep) must denote a valid, existing directory, but the last
  * component need not exist.  If die_on_error is set, then die with an
  * informative error message if there is a problem.  Otherwise, return
  * NULL on errors (without generating any output).
- *
- * If path is our buffer, then return path, as it's already what the
- * user wants.
  */
-static const char *real_path_internal(const char *path, int die_on_error)
+char *strbuf_realpath(struct strbuf *resolved, const char *path,
+ int die_on_error)
 {
-   static struct strbuf resolved = STRBUF_INIT;
struct strbuf remaining = STRBUF_INIT;
struct strbuf next = STRBUF_INIT;
struct strbuf symlink = STRBUF_INIT;
@@ -77,10 +73,6 @@ static const char *real_path_internal(const char *path, int 
die_on_error)
int num_symlinks = 0;
struct stat st;
 
-   /* We've already done it */
-   if (path == resolved.buf)
-   return path;
-
if (!*path) {
if (die_on_error)
die("The empty string is not a valid path");
@@ -88,16 +80,16 @@ static const char *real_path_internal(const char *path, int 
die_on_error)
goto error_out;
}
 
-   strbuf_reset(&resolved);
+   strbuf_reset(resolved);
 
if (is_absolute_path(path)) {
/* absolute path; start with only root as being resolved */
int offset = offset_1st_component(path);
-   strbuf_add(&resolved, path, offset);
+   strbuf_add(resolved, path, offset);
strbuf_addstr(&remaining, path + offset);
} else {
/* relative path; can use CWD as the initial resolved path */
-   if (strbuf_getcwd(&resolved)) {
+   if (strbuf_getcwd(resolved)) {
if (die_on_error)
die_errno("unable to get current working 
directory");
else
@@ -116,21 +108,21 @@ static const char *real_path_internal(const char *path, 
int die_on_error)
continue; /* '.' component */
} else if (next.len == 2 && !strcmp(next.buf, "..")) {
/* '..' component; strip the last path component */
-   strip_last_component(&resolved);
+   strip_last_component(resolved);
continue;
}
 
/* append the next component and resolve resultant path */
-   if (!is_dir_sep(resolved.buf[resolved.len - 1]))
-   strbuf_addch(&resolved, '/');
-   strbuf_addbuf(&resolved, &next);
+   if (!is_dir_sep(resolved->buf[resolved->len - 1]))
+   strbuf_addch(resolved, '/');
+   strbuf_addbuf(resolved, &next);
 
-   if (lstat(resolved.buf, &st)) {
+   if (lstat(resolved->buf, &st)) {
/* error out unless this was the last component */
if (errno != ENOENT || remaining.len) {
if (die_on_error)
die_errno("Invalid path '%s'",
- resolved.buf);
+ resolved->buf);
else
goto error_out;
}
@@ -146,12 +138,12 @@ static const char *real_path_internal(const char *path, 
int die_on_error)
goto error_out;
}
 
-   len = strbuf_readlink(&symlink, resolved.buf,
+   len = strbuf_readlin

[PATCH v4 5/5] real_path: canonicalize directory separators in root parts

2017-01-03 Thread Brandon Williams
From: Johannes Sixt 

When an absolute path is resolved, resolution begins at the first path
component after the root part. The root part is just copied verbatim,
because it must not be inspected for symbolic links. For POSIX paths,
this is just the initial slash, but on Windows, the root part has the
forms c:\ or \\server\share. We do want to canonicalize the back-slashes
in the root part because these parts are compared to the result of
getcwd(), which does return a fully canonicalized path.

Factor out a helper that splits off the root part, and have it
canonicalize the copied part.

This change was prompted because t1504-ceiling-dirs.sh caught a breakage
in GIT_CEILING_DIRECTORIES handling on Windows.

Signed-off-by: Johannes Sixt 
---
 abspath.c | 29 +
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/abspath.c b/abspath.c
index f4283f465..3562d17bf 100644
--- a/abspath.c
+++ b/abspath.c
@@ -48,6 +48,19 @@ static void get_next_component(struct strbuf *next, struct 
strbuf *remaining)
strbuf_remove(remaining, 0, end - remaining->buf);
 }
 
+/* copies root part from remaining to resolved, canonicalizing it on the way */
+static void get_root_part(struct strbuf *resolved, struct strbuf *remaining)
+{
+   int offset = offset_1st_component(remaining->buf);
+
+   strbuf_reset(resolved);
+   strbuf_add(resolved, remaining->buf, offset);
+#ifdef GIT_WINDOWS_NATIVE
+   convert_slashes(resolved->buf);
+#endif
+   strbuf_remove(remaining, 0, offset);
+}
+
 /* We allow "recursive" symbolic links. Only within reason, though. */
 #define MAXDEPTH 5
 
@@ -80,14 +93,10 @@ char *strbuf_realpath(struct strbuf *resolved, const char 
*path,
goto error_out;
}
 
-   strbuf_reset(resolved);
+   strbuf_addstr(&remaining, path);
+   get_root_part(resolved, &remaining);
 
-   if (is_absolute_path(path)) {
-   /* absolute path; start with only root as being resolved */
-   int offset = offset_1st_component(path);
-   strbuf_add(resolved, path, offset);
-   strbuf_addstr(&remaining, path + offset);
-   } else {
+   if (!resolved->len) {
/* relative path; can use CWD as the initial resolved path */
if (strbuf_getcwd(resolved)) {
if (die_on_error)
@@ -95,7 +104,6 @@ char *strbuf_realpath(struct strbuf *resolved, const char 
*path,
else
goto error_out;
}
-   strbuf_addstr(&remaining, path);
}
 
/* Iterate over the remaining path components */
@@ -150,10 +158,7 @@ char *strbuf_realpath(struct strbuf *resolved, const char 
*path,
 
if (is_absolute_path(symlink.buf)) {
/* absolute symlink; set resolved to root */
-   int offset = offset_1st_component(symlink.buf);
-   strbuf_reset(resolved);
-   strbuf_add(resolved, symlink.buf, offset);
-   strbuf_remove(&symlink, 0, offset);
+   get_root_part(resolved, &symlink);
} else {
/*
 * relative symlink
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v4 3/5] real_path: create real_pathdup

2017-01-03 Thread Brandon Williams
Create real_pathdup which returns a caller owned string of the resolved
realpath based on the provide path.

Signed-off-by: Brandon Williams 
---
 abspath.c | 13 +
 cache.h   |  1 +
 2 files changed, 14 insertions(+)

diff --git a/abspath.c b/abspath.c
index c3a6acd4d..f4283f465 100644
--- a/abspath.c
+++ b/abspath.c
@@ -205,6 +205,19 @@ const char *real_path_if_valid(const char *path)
return strbuf_realpath(&realpath, path, 0);
 }
 
+char *real_pathdup(const char *path)
+{
+   struct strbuf realpath = STRBUF_INIT;
+   char *retval = NULL;
+
+   if (strbuf_realpath(&realpath, path, 0))
+   retval = strbuf_detach(&realpath, NULL);
+
+   strbuf_release(&realpath);
+
+   return retval;
+}
+
 /*
  * Use this to get an absolute path from a relative one. If you want
  * to resolve links, you should use real_path.
diff --git a/cache.h b/cache.h
index 7a8129403..e12a5d912 100644
--- a/cache.h
+++ b/cache.h
@@ -1068,6 +1068,7 @@ char *strbuf_realpath(struct strbuf *resolved, const char 
*path,
  int die_on_error);
 const char *real_path(const char *path);
 const char *real_path_if_valid(const char *path);
+char *real_pathdup(const char *path);
 const char *absolute_path(const char *path);
 const char *remove_leading_path(const char *in, const char *prefix);
 const char *relative_path(const char *in, const char *prefix, struct strbuf 
*sb);
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v4 4/5] real_path: have callers use real_pathdup and strbuf_realpath

2017-01-03 Thread Brandon Williams
Migrate callers of real_path() who duplicate the retern value to use
real_pathdup or strbuf_realpath.

Signed-off-by: Brandon Williams 
---
 builtin/init-db.c |  6 +++---
 environment.c |  2 +-
 setup.c   | 13 -
 sha1_file.c   |  2 +-
 submodule.c   |  2 +-
 transport.c   |  2 +-
 worktree.c|  2 +-
 7 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/builtin/init-db.c b/builtin/init-db.c
index 2399b97d9..76d68fad0 100644
--- a/builtin/init-db.c
+++ b/builtin/init-db.c
@@ -338,7 +338,7 @@ int init_db(const char *git_dir, const char *real_git_dir,
 {
int reinit;
int exist_ok = flags & INIT_DB_EXIST_OK;
-   char *original_git_dir = xstrdup(real_path(git_dir));
+   char *original_git_dir = real_pathdup(git_dir);
 
if (real_git_dir) {
struct stat st;
@@ -489,7 +489,7 @@ int cmd_init_db(int argc, const char **argv, const char 
*prefix)
argc = parse_options(argc, argv, prefix, init_db_options, 
init_db_usage, 0);
 
if (real_git_dir && !is_absolute_path(real_git_dir))
-   real_git_dir = xstrdup(real_path(real_git_dir));
+   real_git_dir = real_pathdup(real_git_dir);
 
if (argc == 1) {
int mkdir_tried = 0;
@@ -560,7 +560,7 @@ int cmd_init_db(int argc, const char **argv, const char 
*prefix)
const char *git_dir_parent = strrchr(git_dir, '/');
if (git_dir_parent) {
char *rel = xstrndup(git_dir, git_dir_parent - git_dir);
-   git_work_tree_cfg = xstrdup(real_path(rel));
+   git_work_tree_cfg = real_pathdup(rel);
free(rel);
}
if (!git_work_tree_cfg)
diff --git a/environment.c b/environment.c
index 0935ec696..9b943d2d5 100644
--- a/environment.c
+++ b/environment.c
@@ -259,7 +259,7 @@ void set_git_work_tree(const char *new_work_tree)
return;
}
git_work_tree_initialized = 1;
-   work_tree = xstrdup(real_path(new_work_tree));
+   work_tree = real_pathdup(new_work_tree);
 }
 
 const char *get_git_work_tree(void)
diff --git a/setup.c b/setup.c
index fe572b82c..1b534a750 100644
--- a/setup.c
+++ b/setup.c
@@ -256,8 +256,10 @@ int get_common_dir_noenv(struct strbuf *sb, const char 
*gitdir)
strbuf_addbuf(&path, &data);
strbuf_addstr(sb, real_path(path.buf));
ret = 1;
-   } else
+   } else {
strbuf_addstr(sb, gitdir);
+   }
+
strbuf_release(&data);
strbuf_release(&path);
return ret;
@@ -692,7 +694,7 @@ static const char *setup_discovered_git_dir(const char 
*gitdir,
/* --work-tree is set without --git-dir; use discovered one */
if (getenv(GIT_WORK_TREE_ENVIRONMENT) || git_work_tree_cfg) {
if (offset != cwd->len && !is_absolute_path(gitdir))
-   gitdir = xstrdup(real_path(gitdir));
+   gitdir = real_pathdup(gitdir);
if (chdir(cwd->buf))
die_errno("Could not come back to cwd");
return setup_explicit_git_dir(gitdir, cwd, nongit_ok);
@@ -800,11 +802,12 @@ static int canonicalize_ceiling_entry(struct 
string_list_item *item,
/* Keep entry but do not canonicalize it */
return 1;
} else {
-   const char *real_path = real_path_if_valid(ceil);
-   if (!real_path)
+   char *real_path = real_pathdup(ceil);
+   if (!real_path) {
return 0;
+   }
free(item->string);
-   item->string = xstrdup(real_path);
+   item->string = real_path;
return 1;
}
 }
diff --git a/sha1_file.c b/sha1_file.c
index 9c86d1924..6a03cc393 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -291,7 +291,7 @@ static int link_alt_odb_entry(const char *entry, const char 
*relative_base,
struct strbuf pathbuf = STRBUF_INIT;
 
if (!is_absolute_path(entry) && relative_base) {
-   strbuf_addstr(&pathbuf, real_path(relative_base));
+   strbuf_realpath(&pathbuf, relative_base, 1);
strbuf_addch(&pathbuf, '/');
}
strbuf_addstr(&pathbuf, entry);
diff --git a/submodule.c b/submodule.c
index ece17315d..55819ba9c 100644
--- a/submodule.c
+++ b/submodule.c
@@ -1301,7 +1301,7 @@ void connect_work_tree_and_git_dir(const char *work_tree, 
const char *git_dir)
 {
struct strbuf file_name = STRBUF_INIT;
struct strbuf rel_path = STRBUF_INIT;
-   const char *real_work_tree = xstrdup(real_path(work_tree));
+   const char *real_work_tree = real_pathdup(work_tree);
 
/* Update gitfile */
strbuf_addf(&file_name, &quo

[PATCH v4 12/16] pathspec: create parse_long_magic function

2017-01-03 Thread Brandon Williams
Factor out the logic responsible for parsing long magic into its own
function.  As well as hoist the prefix check logic outside of the inner
loop as there isn't anything that needs to be done after matching
"prefix:".

Signed-off-by: Brandon Williams 
---
 pathspec.c | 92 ++
 1 file changed, 57 insertions(+), 35 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index 8574010d5..f1439f6f9 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -157,6 +157,60 @@ static int get_global_magic(int element_magic)
 }
 
 /*
+ * Parse the pathspec element looking for long magic
+ *
+ * saves all magic in 'magic'
+ * if prefix magic is used, save the prefix length in 'prefix_len'
+ * returns the position in 'elem' after all magic has been parsed
+ */
+static const char *parse_long_magic(unsigned *magic, int *prefix_len,
+   const char *elem)
+{
+   const char *pos;
+   const char *nextat;
+
+   for (pos = elem + 2; *pos && *pos != ')'; pos = nextat) {
+   size_t len = strcspn(pos, ",)");
+   int i;
+
+   if (pos[len] == ',')
+   nextat = pos + len + 1; /* handle ',' */
+   else
+   nextat = pos + len; /* handle ')' and '\0' */
+
+   if (!len)
+   continue;
+
+   if (starts_with(pos, "prefix:")) {
+   char *endptr;
+   *prefix_len = strtol(pos + 7, &endptr, 10);
+   if (endptr - pos != len)
+   die(_("invalid parameter for pathspec magic 
'prefix'"));
+   continue;
+   }
+
+   for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
+   if (strlen(pathspec_magic[i].name) == len &&
+   !strncmp(pathspec_magic[i].name, pos, len)) {
+   *magic |= pathspec_magic[i].bit;
+   break;
+   }
+   }
+
+   if (ARRAY_SIZE(pathspec_magic) <= i)
+   die(_("Invalid pathspec magic '%.*s' in '%s'"),
+   (int) len, pos, elem);
+   }
+
+   if (*pos != ')')
+   die(_("Missing ')' at the end of pathspec magic in '%s'"),
+   elem);
+   pos++;
+
+   return pos;
+}
+
+/*
  * Parse the pathspec element looking for short magic
  *
  * saves all magic in 'magic'
@@ -218,41 +272,9 @@ static unsigned prefix_pathspec(struct pathspec_item 
*item, unsigned flags,
; /* nothing to do */
} else if (elt[1] == '(') {
/* longhand */
-   const char *nextat;
-   for (copyfrom = elt + 2;
-*copyfrom && *copyfrom != ')';
-copyfrom = nextat) {
-   size_t len = strcspn(copyfrom, ",)");
-   if (copyfrom[len] == ',')
-   nextat = copyfrom + len + 1;
-   else
-   /* handle ')' and '\0' */
-   nextat = copyfrom + len;
-   if (!len)
-   continue;
-   for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
-   if (strlen(pathspec_magic[i].name) == len &&
-   !strncmp(pathspec_magic[i].name, copyfrom, 
len)) {
-   element_magic |= pathspec_magic[i].bit;
-   break;
-   }
-   if (starts_with(copyfrom, "prefix:")) {
-   char *endptr;
-   pathspec_prefix = strtol(copyfrom + 7,
-&endptr, 10);
-   if (endptr - copyfrom != len)
-   die(_("invalid parameter for 
pathspec magic 'prefix'"));
-   /* "i" would be wrong, but it does not 
matter */
-   break;
-   }
-   }
-   if (ARRAY_SIZE(pathspec_magic) <= i)
-   die(_("Invalid pathspec magic '%.*s' in '%s'"),
-   (int) len, copyfrom, elt);
-   }
-   

[PATCH v4 16/16] pathspec: rename prefix_pathspec to init_pathspec_item

2017-01-03 Thread Brandon Williams
Give a more relevant name to the prefix_pathspec function as it does
more than just prefix a pathspec element.

Signed-off-by: Brandon Williams 
---
 pathspec.c | 24 +++-
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index ae9e1401f..bcf3ba039 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -297,21 +297,11 @@ static void strip_submodule_slash_expensive(struct 
pathspec_item *item)
 }
 
 /*
- * Take an element of a pathspec and check for magic signatures.
- * Append the result to the prefix. Return the magic bitmap.
- *
- * For now, we only parse the syntax and throw out anything other than
- * "top" magic.
- *
- * NEEDSWORK: This needs to be rewritten when we start migrating
- * get_pathspec() users to use the "struct pathspec" interface.  For
- * example, a pathspec element may be marked as case-insensitive, but
- * the prefix part must always match literally, and a single stupid
- * string cannot express such a case.
+ * Perform the initialization of a pathspec_item based on a pathspec element.
  */
-static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
-   const char *prefix, int prefixlen,
-   const char *elt)
+static void init_pathspec_item(struct pathspec_item *item, unsigned flags,
+  const char *prefix, int prefixlen,
+  const char *elt)
 {
unsigned magic = 0, element_magic = 0;
const char *copyfrom = elt;
@@ -329,6 +319,8 @@ static unsigned prefix_pathspec(struct pathspec_item *item, 
unsigned flags,
magic |= get_global_magic(element_magic);
}
 
+   item->magic = magic;
+
if (pathspec_prefix >= 0 &&
(prefixlen || (prefix && *prefix)))
die("BUG: 'prefix' magic is supposed to be used at worktree's 
root");
@@ -401,7 +393,6 @@ static unsigned prefix_pathspec(struct pathspec_item *item, 
unsigned flags,
/* sanity checks, pathspec matchers assume these are sane */
assert(item->nowildcard_len <= item->len &&
   item->prefix <= item->len);
-   return magic;
 }
 
 static int pathspec_item_cmp(const void *a_, const void *b_)
@@ -501,8 +492,7 @@ void parse_pathspec(struct pathspec *pathspec,
for (i = 0; i < n; i++) {
entry = argv[i];
 
-   item[i].magic = prefix_pathspec(item + i, flags,
-   prefix, prefixlen, entry);
+   init_pathspec_item(item + i, flags, prefix, prefixlen, entry);
 
if (item[i].magic & PATHSPEC_EXCLUDE)
nr_exclude++;
-- 
2.11.0.390.gc69c2f50cf-goog



Re: [PATCH v4 00/16] pathspec cleanup

2017-01-04 Thread Brandon Williams
On 01/04, Duy Nguyen wrote:
> On Wed, Jan 4, 2017 at 1:42 AM, Brandon Williams  wrote:
> > diff --git a/dir.c b/dir.c
> > index 15f7c9993..e8ddd7f8a 100644
> > --- a/dir.c
> > +++ b/dir.c
> > @@ -1353,6 +1353,15 @@ static int simplify_away(const char *path, int 
> > pathlen,
> >  {
> > int i;
> >
> > +   if (pathspec)
> > +   guard_pathspec(pathspec,
> > +  pathspec_fromtop |
> > +  pathspec_maxdepth |
> > +  pathspec_literal |
> > +  pathspec_glob |
> > +  pathspec_icase |
> > +  pathspec_exclude);
> 
> You have done some magic (or your MTA/editor did) to lower case
> GUARD_PATHSPEC and all the flags. The real patch looks good though, so
> no problem.

That's really odd, I was sure I just did a cut and paste.  I'll fix that
:)

> 
> > +
> > if (!pathspec || !pathspec->nr)
> > return 0;
> 
> 
> Super tiny nit, if GUARD_PATHSPEC is placed after this line, then we
> don't have to check if pathspec is non-NULL. Probably not worth a
> re-roll unless somebody else finds something else.

I saw this after I sent out the series again, and I agree.  I'll move
the guard to be after this check.

> 
> > if (m->mnemonic)
> > -   strbuf_addf(&sb, "'(%c)%s'", m->mnemonic, m->name);
> > +   strbuf_addf(&sb, "'%s' (mnemonic: '%c')",
> > +   m->name, m->mnemonic);
> 
> .. and that somebody might be me :) we need to mark "mnemonic" for
> translation. Putting _() around the string would do.
> 
> Ideally I would give the translators the whole sentence so they can
> have good context (and good translation as a result). But since we
> potentially concatenate multiple unsupported magic in the string,
> there's no way to provide one (or a few) fixed string(s) at compile
> time. So let's just _() it and leave it at that.

Will do!

-- 
Brandon Williams


Re: [PATCH v4 00/16] pathspec cleanup

2017-01-04 Thread Brandon Williams
On 01/04, Brandon Williams wrote:
> On 01/04, Duy Nguyen wrote:
> > On Wed, Jan 4, 2017 at 1:42 AM, Brandon Williams  wrote:
> > > diff --git a/dir.c b/dir.c
> > > index 15f7c9993..e8ddd7f8a 100644
> > > --- a/dir.c
> > > +++ b/dir.c
> > > @@ -1353,6 +1353,15 @@ static int simplify_away(const char *path, int 
> > > pathlen,
> > >  {
> > > int i;
> > >
> > > +   if (pathspec)
> > > +   guard_pathspec(pathspec,
> > > +  pathspec_fromtop |
> > > +  pathspec_maxdepth |
> > > +  pathspec_literal |
> > > +  pathspec_glob |
> > > +  pathspec_icase |
> > > +  pathspec_exclude);
> > 
> > You have done some magic (or your MTA/editor did) to lower case
> > GUARD_PATHSPEC and all the flags. The real patch looks good though, so
> > no problem.
> 
> That's really odd, I was sure I just did a cut and paste.  I'll fix that
> :)

So it looks like what ever I did to insert this into the cover letter
made everything lower case.  The actual patch itself looks fine.

-- 
Brandon Williams


[PATCH v5 15/16] pathspec: small readability changes

2017-01-04 Thread Brandon Williams
A few small changes to improve readability.  This is done by grouping related
assignments, adding blank lines, ensuring lines are <80 characters, and
adding additional comments.

Signed-off-by: Brandon Williams 
---
 pathspec.c | 25 +++--
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index f3a7a1d33..e53530e7a 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -67,11 +67,11 @@ static struct pathspec_magic {
char mnemonic; /* this cannot be ':'! */
const char *name;
 } pathspec_magic[] = {
-   { PATHSPEC_FROMTOP, '/', "top" },
-   { PATHSPEC_LITERAL,   0, "literal" },
-   { PATHSPEC_GLOB,   '\0', "glob" },
-   { PATHSPEC_ICASE,  '\0', "icase" },
-   { PATHSPEC_EXCLUDE, '!', "exclude" },
+   { PATHSPEC_FROMTOP,  '/', "top" },
+   { PATHSPEC_LITERAL, '\0', "literal" },
+   { PATHSPEC_GLOB,'\0', "glob" },
+   { PATHSPEC_ICASE,   '\0', "icase" },
+   { PATHSPEC_EXCLUDE,  '!', "exclude" },
 };
 
 static void prefix_magic(struct strbuf *sb, int prefixlen, unsigned magic)
@@ -336,6 +336,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item, 
unsigned flags,
if ((magic & PATHSPEC_LITERAL) && (magic & PATHSPEC_GLOB))
die(_("%s: 'literal' and 'glob' are incompatible"), elt);
 
+   /* Create match string which will be used for pathspec matching */
if (pathspec_prefix >= 0) {
match = xstrdup(copyfrom);
prefixlen = pathspec_prefix;
@@ -343,11 +344,16 @@ static unsigned prefix_pathspec(struct pathspec_item 
*item, unsigned flags,
match = xstrdup(copyfrom);
prefixlen = 0;
} else {
-   match = prefix_path_gently(prefix, prefixlen, &prefixlen, 
copyfrom);
+   match = prefix_path_gently(prefix, prefixlen,
+  &prefixlen, copyfrom);
if (!match)
die(_("%s: '%s' is outside repository"), elt, copyfrom);
}
+
item->match = match;
+   item->len = strlen(item->match);
+   item->prefix = prefixlen;
+
/*
 * Prefix the pathspec (keep all magic) and assign to
 * original. Useful for passing to another command.
@@ -364,8 +370,6 @@ static unsigned prefix_pathspec(struct pathspec_item *item, 
unsigned flags,
} else {
item->original = xstrdup(elt);
}
-   item->len = strlen(item->match);
-   item->prefix = prefixlen;
 
if (flags & PATHSPEC_STRIP_SUBMODULE_SLASH_CHEAP)
strip_submodule_slash_cheap(item);
@@ -373,13 +377,14 @@ static unsigned prefix_pathspec(struct pathspec_item 
*item, unsigned flags,
if (flags & PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE)
strip_submodule_slash_expensive(item);
 
-   if (magic & PATHSPEC_LITERAL)
+   if (magic & PATHSPEC_LITERAL) {
item->nowildcard_len = item->len;
-   else {
+   } else {
item->nowildcard_len = simple_length(item->match);
if (item->nowildcard_len < prefixlen)
item->nowildcard_len = prefixlen;
}
+
item->flags = 0;
if (magic & PATHSPEC_GLOB) {
/*
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v5 13/16] pathspec: create parse_element_magic helper

2017-01-04 Thread Brandon Williams
Factor out the logic responsible for the magic in a pathspec element
into its own function.

Also avoid calling into the parsing functions when
`PATHSPEC_LITERAL_PATH` is specified since it causes magic to be
ignored and all paths to be treated as literals.

Signed-off-by: Brandon Williams 
---
 pathspec.c | 37 -
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index f6356bde1..00fcae4e1 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -245,6 +245,19 @@ static const char *parse_short_magic(unsigned *magic, 
const char *elem)
return pos;
 }
 
+static const char *parse_element_magic(unsigned *magic, int *prefix_len,
+  const char *elem)
+{
+   if (elem[0] != ':' || get_literal_global())
+   return elem; /* nothing to do */
+   else if (elem[1] == '(')
+   /* longhand */
+   return parse_long_magic(magic, prefix_len, elem);
+   else
+   /* shorthand */
+   return parse_short_magic(magic, elem);
+}
+
 /*
  * Take an element of a pathspec and check for magic signatures.
  * Append the result to the prefix. Return the magic bitmap.
@@ -267,26 +280,16 @@ static unsigned prefix_pathspec(struct pathspec_item 
*item, unsigned flags,
char *match;
int i, pathspec_prefix = -1;
 
-   if (elt[0] != ':' || get_literal_global() ||
-   (flags & PATHSPEC_LITERAL_PATH)) {
-   ; /* nothing to do */
-   } else if (elt[1] == '(') {
-   /* longhand */
-   copyfrom = parse_long_magic(&element_magic,
-   &pathspec_prefix,
-   elt);
-   } else {
-   /* shorthand */
-   copyfrom = parse_short_magic(&element_magic, elt);
-   }
-
-   magic |= element_magic;
-
/* PATHSPEC_LITERAL_PATH ignores magic */
-   if (flags & PATHSPEC_LITERAL_PATH)
+   if (flags & PATHSPEC_LITERAL_PATH) {
magic = PATHSPEC_LITERAL;
-   else
+   } else {
+   copyfrom = parse_element_magic(&element_magic,
+  &pathspec_prefix,
+  elt);
+   magic |= element_magic;
magic |= get_global_magic(element_magic);
+   }
 
if (pathspec_prefix >= 0 &&
(prefixlen || (prefix && *prefix)))
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v5 08/16] pathspec: always show mnemonic and name in unsupported_magic

2017-01-04 Thread Brandon Williams
For better clarity, always show the mnemonic and name of the unsupported
magic being used.  This lets users have a more clear understanding of
what magic feature isn't supported.  And if they supplied a mnemonic,
the user will be told what its corresponding name is which will allow
them to more easily search the man pages for that magic type.

This also avoids passing an extra parameter around the pathspec
initialization code.

Signed-off-by: Brandon Williams 
---
 pathspec.c | 24 +---
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index b9a3819d6..5df364bc6 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -101,9 +101,7 @@ static void prefix_short_magic(struct strbuf *sb, int 
prefixlen,
  * the prefix part must always match literally, and a single stupid
  * string cannot express such a case.
  */
-static unsigned prefix_pathspec(struct pathspec_item *item,
-   unsigned *p_short_magic,
-   unsigned flags,
+static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
const char *prefix, int prefixlen,
const char *elt)
 {
@@ -210,7 +208,6 @@ static unsigned prefix_pathspec(struct pathspec_item *item,
}
 
magic |= short_magic;
-   *p_short_magic = short_magic;
 
/* --noglob-pathspec adds :(literal) _unless_ :(glob) is specified */
if (noglob_global && !(magic & PATHSPEC_GLOB))
@@ -329,8 +326,7 @@ static int pathspec_item_cmp(const void *a_, const void *b_)
 }
 
 static void NORETURN unsupported_magic(const char *pattern,
-  unsigned magic,
-  unsigned short_magic)
+  unsigned magic)
 {
struct strbuf sb = STRBUF_INIT;
int i;
@@ -339,9 +335,11 @@ static void NORETURN unsupported_magic(const char *pattern,
if (!(magic & m->bit))
continue;
if (sb.len)
-   strbuf_addch(&sb, ' ');
-   if (short_magic & m->bit)
-   strbuf_addf(&sb, "'%c'", m->mnemonic);
+   strbuf_addstr(&sb, ", ");
+
+   if (m->mnemonic)
+   strbuf_addf(&sb, _("'%s' (mnemonic: '%c')"),
+   m->name, m->mnemonic);
else
strbuf_addf(&sb, "'%s'", m->name);
}
@@ -413,11 +411,9 @@ void parse_pathspec(struct pathspec *pathspec,
prefixlen = prefix ? strlen(prefix) : 0;
 
for (i = 0; i < n; i++) {
-   unsigned short_magic;
entry = argv[i];
 
-   item[i].magic = prefix_pathspec(item + i, &short_magic,
-   flags,
+   item[i].magic = prefix_pathspec(item + i, flags,
prefix, prefixlen, entry);
if ((flags & PATHSPEC_LITERAL_PATH) &&
!(magic_mask & PATHSPEC_LITERAL))
@@ -425,9 +421,7 @@ void parse_pathspec(struct pathspec *pathspec,
if (item[i].magic & PATHSPEC_EXCLUDE)
nr_exclude++;
if (item[i].magic & magic_mask)
-   unsupported_magic(entry,
- item[i].magic & magic_mask,
- short_magic);
+   unsupported_magic(entry, item[i].magic & magic_mask);
 
if ((flags & PATHSPEC_SYMLINK_LEADING_PATH) &&
has_symlink_leading_path(item[i].match, item[i].len)) {
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v5 11/16] pathspec: create parse_short_magic function

2017-01-04 Thread Brandon Williams
Factor out the logic responsible for parsing short magic into its own
function.

Signed-off-by: Brandon Williams 
---
 pathspec.c | 54 --
 1 file changed, 36 insertions(+), 18 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index 77df55da6..1b0901848 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -157,6 +157,41 @@ static int get_global_magic(int element_magic)
 }
 
 /*
+ * Parse the pathspec element looking for short magic
+ *
+ * saves all magic in 'magic'
+ * returns the position in 'elem' after all magic has been parsed
+ */
+static const char *parse_short_magic(unsigned *magic, const char *elem)
+{
+   const char *pos;
+
+   for (pos = elem + 1; *pos && *pos != ':'; pos++) {
+   char ch = *pos;
+   int i;
+
+   if (!is_pathspec_magic(ch))
+   break;
+
+   for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
+   if (pathspec_magic[i].mnemonic == ch) {
+   *magic |= pathspec_magic[i].bit;
+   break;
+   }
+   }
+
+   if (ARRAY_SIZE(pathspec_magic) <= i)
+   die(_("Unimplemented pathspec magic '%c' in '%s'"),
+   ch, elem);
+   }
+
+   if (*pos == ':')
+   pos++;
+
+   return pos;
+}
+
+/*
  * Take an element of a pathspec and check for magic signatures.
  * Append the result to the prefix. Return the magic bitmap.
  *
@@ -220,24 +255,7 @@ static unsigned prefix_pathspec(struct pathspec_item 
*item, unsigned flags,
copyfrom++;
} else {
/* shorthand */
-   for (copyfrom = elt + 1;
-*copyfrom && *copyfrom != ':';
-copyfrom++) {
-   char ch = *copyfrom;
-
-   if (!is_pathspec_magic(ch))
-   break;
-   for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++)
-   if (pathspec_magic[i].mnemonic == ch) {
-   element_magic |= pathspec_magic[i].bit;
-   break;
-   }
-   if (ARRAY_SIZE(pathspec_magic) <= i)
-   die(_("Unimplemented pathspec magic '%c' in 
'%s'"),
-   ch, elt);
-   }
-   if (*copyfrom == ':')
-   copyfrom++;
+   copyfrom = parse_short_magic(&element_magic, elt);
}
 
magic |= element_magic;
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v5 09/16] pathspec: simpler logic to prefix original pathspec elements

2017-01-04 Thread Brandon Williams
The logic used to prefix an original pathspec element with 'prefix'
magic is more general purpose and can be used for more than just short
magic.  Remove the extra code paths and rename 'prefix_short_magic' to
'prefix_magic' to better indicate that it can be used in more general
situations.

Also, slightly change the logic which decides when to prefix the
original element in order to prevent a pathspec of "." from getting
converted to "" (empty string).

Signed-off-by: Brandon Williams 
---
 pathspec.c | 33 +
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index 5df364bc6..af7f2d01d 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -74,13 +74,12 @@ static struct pathspec_magic {
{ PATHSPEC_EXCLUDE, '!', "exclude" },
 };
 
-static void prefix_short_magic(struct strbuf *sb, int prefixlen,
-  unsigned short_magic)
+static void prefix_magic(struct strbuf *sb, int prefixlen, unsigned magic)
 {
int i;
strbuf_addstr(sb, ":(");
for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++)
-   if (short_magic & pathspec_magic[i].bit) {
+   if (magic & pathspec_magic[i].bit) {
if (sb->buf[sb->len - 1] != '(')
strbuf_addch(sb, ',');
strbuf_addstr(sb, pathspec_magic[i].name);
@@ -109,8 +108,8 @@ static unsigned prefix_pathspec(struct pathspec_item *item, 
unsigned flags,
static int glob_global = -1;
static int noglob_global = -1;
static int icase_global = -1;
-   unsigned magic = 0, short_magic = 0, global_magic = 0;
-   const char *copyfrom = elt, *long_magic_end = NULL;
+   unsigned magic = 0, element_magic = 0, global_magic = 0;
+   const char *copyfrom = elt;
char *match;
int i, pathspec_prefix = -1;
 
@@ -164,7 +163,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item, 
unsigned flags,
for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
if (strlen(pathspec_magic[i].name) == len &&
!strncmp(pathspec_magic[i].name, copyfrom, 
len)) {
-   magic |= pathspec_magic[i].bit;
+   element_magic |= pathspec_magic[i].bit;
break;
}
if (starts_with(copyfrom, "prefix:")) {
@@ -183,7 +182,6 @@ static unsigned prefix_pathspec(struct pathspec_item *item, 
unsigned flags,
}
if (*copyfrom != ')')
die(_("Missing ')' at the end of pathspec magic in 
'%s'"), elt);
-   long_magic_end = copyfrom;
copyfrom++;
} else {
/* shorthand */
@@ -196,7 +194,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item, 
unsigned flags,
break;
for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++)
if (pathspec_magic[i].mnemonic == ch) {
-   short_magic |= pathspec_magic[i].bit;
+   element_magic |= pathspec_magic[i].bit;
break;
}
if (ARRAY_SIZE(pathspec_magic) <= i)
@@ -207,7 +205,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item, 
unsigned flags,
copyfrom++;
}
 
-   magic |= short_magic;
+   magic |= element_magic;
 
/* --noglob-pathspec adds :(literal) _unless_ :(glob) is specified */
if (noglob_global && !(magic & PATHSPEC_GLOB))
@@ -242,18 +240,13 @@ static unsigned prefix_pathspec(struct pathspec_item 
*item, unsigned flags,
 * Prefix the pathspec (keep all magic) and assign to
 * original. Useful for passing to another command.
 */
-   if (flags & PATHSPEC_PREFIX_ORIGIN) {
+   if ((flags & PATHSPEC_PREFIX_ORIGIN) &&
+   prefixlen && !literal_global) {
struct strbuf sb = STRBUF_INIT;
-   if (prefixlen && !literal_global) {
-   /* Preserve the actual prefix length of each pattern */
-   if (short_magic)
-   prefix_short_magic(&sb, prefixlen, short_magic);
-   else if (long_magic_end) {
-   strbuf_add(&sb, elt, long_magic_end - elt);
-   strbuf_addf(&sb, ",prefix:%d)", prefixlen);
-   } else
-  

[PATCH v5 12/16] pathspec: create parse_long_magic function

2017-01-04 Thread Brandon Williams
Factor out the logic responsible for parsing long magic into its own
function.  As well as hoist the prefix check logic outside of the inner
loop as there isn't anything that needs to be done after matching
"prefix:".

Signed-off-by: Brandon Williams 
---
 pathspec.c | 92 ++
 1 file changed, 57 insertions(+), 35 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index 1b0901848..f6356bde1 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -157,6 +157,60 @@ static int get_global_magic(int element_magic)
 }
 
 /*
+ * Parse the pathspec element looking for long magic
+ *
+ * saves all magic in 'magic'
+ * if prefix magic is used, save the prefix length in 'prefix_len'
+ * returns the position in 'elem' after all magic has been parsed
+ */
+static const char *parse_long_magic(unsigned *magic, int *prefix_len,
+   const char *elem)
+{
+   const char *pos;
+   const char *nextat;
+
+   for (pos = elem + 2; *pos && *pos != ')'; pos = nextat) {
+   size_t len = strcspn(pos, ",)");
+   int i;
+
+   if (pos[len] == ',')
+   nextat = pos + len + 1; /* handle ',' */
+   else
+   nextat = pos + len; /* handle ')' and '\0' */
+
+   if (!len)
+   continue;
+
+   if (starts_with(pos, "prefix:")) {
+   char *endptr;
+   *prefix_len = strtol(pos + 7, &endptr, 10);
+   if (endptr - pos != len)
+   die(_("invalid parameter for pathspec magic 
'prefix'"));
+   continue;
+   }
+
+   for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
+   if (strlen(pathspec_magic[i].name) == len &&
+   !strncmp(pathspec_magic[i].name, pos, len)) {
+   *magic |= pathspec_magic[i].bit;
+   break;
+   }
+   }
+
+   if (ARRAY_SIZE(pathspec_magic) <= i)
+   die(_("Invalid pathspec magic '%.*s' in '%s'"),
+   (int) len, pos, elem);
+   }
+
+   if (*pos != ')')
+   die(_("Missing ')' at the end of pathspec magic in '%s'"),
+   elem);
+   pos++;
+
+   return pos;
+}
+
+/*
  * Parse the pathspec element looking for short magic
  *
  * saves all magic in 'magic'
@@ -218,41 +272,9 @@ static unsigned prefix_pathspec(struct pathspec_item 
*item, unsigned flags,
; /* nothing to do */
} else if (elt[1] == '(') {
/* longhand */
-   const char *nextat;
-   for (copyfrom = elt + 2;
-*copyfrom && *copyfrom != ')';
-copyfrom = nextat) {
-   size_t len = strcspn(copyfrom, ",)");
-   if (copyfrom[len] == ',')
-   nextat = copyfrom + len + 1;
-   else
-   /* handle ')' and '\0' */
-   nextat = copyfrom + len;
-   if (!len)
-   continue;
-   for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
-   if (strlen(pathspec_magic[i].name) == len &&
-   !strncmp(pathspec_magic[i].name, copyfrom, 
len)) {
-   element_magic |= pathspec_magic[i].bit;
-   break;
-   }
-   if (starts_with(copyfrom, "prefix:")) {
-   char *endptr;
-   pathspec_prefix = strtol(copyfrom + 7,
-&endptr, 10);
-   if (endptr - copyfrom != len)
-   die(_("invalid parameter for 
pathspec magic 'prefix'"));
-   /* "i" would be wrong, but it does not 
matter */
-   break;
-   }
-   }
-   if (ARRAY_SIZE(pathspec_magic) <= i)
-   die(_("Invalid pathspec magic '%.*s' in '%s'"),
-   (int) len, copyfrom, elt);
-   }
-   

[PATCH v5 14/16] pathspec: create strip submodule slash helpers

2017-01-04 Thread Brandon Williams
Factor out the logic responsible for stripping the trailing slash on
pathspecs referencing submodules into its own function.

Signed-off-by: Brandon Williams 
---
 pathspec.c | 68 ++
 1 file changed, 42 insertions(+), 26 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index 00fcae4e1..f3a7a1d33 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -258,6 +258,44 @@ static const char *parse_element_magic(unsigned *magic, 
int *prefix_len,
return parse_short_magic(magic, elem);
 }
 
+static void strip_submodule_slash_cheap(struct pathspec_item *item)
+{
+   if (item->len >= 1 && item->match[item->len - 1] == '/') {
+   int i = cache_name_pos(item->match, item->len - 1);
+
+   if (i >= 0 && S_ISGITLINK(active_cache[i]->ce_mode)) {
+   item->len--;
+   item->match[item->len] = '\0';
+   }
+   }
+}
+
+static void strip_submodule_slash_expensive(struct pathspec_item *item)
+{
+   int i;
+
+   for (i = 0; i < active_nr; i++) {
+   struct cache_entry *ce = active_cache[i];
+   int ce_len = ce_namelen(ce);
+
+   if (!S_ISGITLINK(ce->ce_mode))
+   continue;
+
+   if (item->len <= ce_len || item->match[ce_len] != '/' ||
+   memcmp(ce->name, item->match, ce_len))
+   continue;
+
+   if (item->len == ce_len + 1) {
+   /* strip trailing slash */
+   item->len--;
+   item->match[item->len] = '\0';
+   } else {
+   die(_("Pathspec '%s' is in submodule '%.*s'"),
+   item->original, ce_len, ce->name);
+   }
+   }
+}
+
 /*
  * Take an element of a pathspec and check for magic signatures.
  * Append the result to the prefix. Return the magic bitmap.
@@ -278,7 +316,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item, 
unsigned flags,
unsigned magic = 0, element_magic = 0;
const char *copyfrom = elt;
char *match;
-   int i, pathspec_prefix = -1;
+   int pathspec_prefix = -1;
 
/* PATHSPEC_LITERAL_PATH ignores magic */
if (flags & PATHSPEC_LITERAL_PATH) {
@@ -329,33 +367,11 @@ static unsigned prefix_pathspec(struct pathspec_item 
*item, unsigned flags,
item->len = strlen(item->match);
item->prefix = prefixlen;
 
-   if ((flags & PATHSPEC_STRIP_SUBMODULE_SLASH_CHEAP) &&
-   (item->len >= 1 && item->match[item->len - 1] == '/') &&
-   (i = cache_name_pos(item->match, item->len - 1)) >= 0 &&
-   S_ISGITLINK(active_cache[i]->ce_mode)) {
-   item->len--;
-   match[item->len] = '\0';
-   }
+   if (flags & PATHSPEC_STRIP_SUBMODULE_SLASH_CHEAP)
+   strip_submodule_slash_cheap(item);
 
if (flags & PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE)
-   for (i = 0; i < active_nr; i++) {
-   struct cache_entry *ce = active_cache[i];
-   int ce_len = ce_namelen(ce);
-
-   if (!S_ISGITLINK(ce->ce_mode))
-   continue;
-
-   if (item->len <= ce_len || match[ce_len] != '/' ||
-   memcmp(ce->name, match, ce_len))
-   continue;
-   if (item->len == ce_len + 1) {
-   /* strip trailing slash */
-   item->len--;
-   match[item->len] = '\0';
-   } else
-   die (_("Pathspec '%s' is in submodule '%.*s'"),
-elt, ce_len, ce->name);
-   }
+   strip_submodule_slash_expensive(item);
 
if (magic & PATHSPEC_LITERAL)
item->nowildcard_len = item->len;
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v5 10/16] pathspec: factor global magic into its own function

2017-01-04 Thread Brandon Williams
Create helper functions to read the global magic environment variables
in additon to factoring out the global magic gathering logic into its
own function.

Signed-off-by: Brandon Williams 
---
 pathspec.c | 127 +
 1 file changed, 78 insertions(+), 49 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index af7f2d01d..77df55da6 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -87,6 +87,75 @@ static void prefix_magic(struct strbuf *sb, int prefixlen, 
unsigned magic)
strbuf_addf(sb, ",prefix:%d)", prefixlen);
 }
 
+static inline int get_literal_global(void)
+{
+   static int literal = -1;
+
+   if (literal < 0)
+   literal = git_env_bool(GIT_LITERAL_PATHSPECS_ENVIRONMENT, 0);
+
+   return literal;
+}
+
+static inline int get_glob_global(void)
+{
+   static int glob = -1;
+
+   if (glob < 0)
+   glob = git_env_bool(GIT_GLOB_PATHSPECS_ENVIRONMENT, 0);
+
+   return glob;
+}
+
+static inline int get_noglob_global(void)
+{
+   static int noglob = -1;
+
+   if (noglob < 0)
+   noglob = git_env_bool(GIT_NOGLOB_PATHSPECS_ENVIRONMENT, 0);
+
+   return noglob;
+}
+
+static inline int get_icase_global(void)
+{
+   static int icase = -1;
+
+   if (icase < 0)
+   icase = git_env_bool(GIT_ICASE_PATHSPECS_ENVIRONMENT, 0);
+
+   return icase;
+}
+
+static int get_global_magic(int element_magic)
+{
+   int global_magic = 0;
+
+   if (get_literal_global())
+   global_magic |= PATHSPEC_LITERAL;
+
+   /* --glob-pathspec is overridden by :(literal) */
+   if (get_glob_global() && !(element_magic & PATHSPEC_LITERAL))
+   global_magic |= PATHSPEC_GLOB;
+
+   if (get_glob_global() && get_noglob_global())
+   die(_("global 'glob' and 'noglob' pathspec settings are 
incompatible"));
+
+   if (get_icase_global())
+   global_magic |= PATHSPEC_ICASE;
+
+   if ((global_magic & PATHSPEC_LITERAL) &&
+   (global_magic & ~PATHSPEC_LITERAL))
+   die(_("global 'literal' pathspec setting is incompatible "
+ "with all other global pathspec settings"));
+
+   /* --noglob-pathspec adds :(literal) _unless_ :(glob) is specified */
+   if (get_noglob_global() && !(element_magic & PATHSPEC_GLOB))
+   global_magic |= PATHSPEC_LITERAL;
+
+   return global_magic;
+}
+
 /*
  * Take an element of a pathspec and check for magic signatures.
  * Append the result to the prefix. Return the magic bitmap.
@@ -104,46 +173,12 @@ static unsigned prefix_pathspec(struct pathspec_item 
*item, unsigned flags,
const char *prefix, int prefixlen,
const char *elt)
 {
-   static int literal_global = -1;
-   static int glob_global = -1;
-   static int noglob_global = -1;
-   static int icase_global = -1;
-   unsigned magic = 0, element_magic = 0, global_magic = 0;
+   unsigned magic = 0, element_magic = 0;
const char *copyfrom = elt;
char *match;
int i, pathspec_prefix = -1;
 
-   if (literal_global < 0)
-   literal_global = 
git_env_bool(GIT_LITERAL_PATHSPECS_ENVIRONMENT, 0);
-   if (literal_global)
-   global_magic |= PATHSPEC_LITERAL;
-
-   if (glob_global < 0)
-   glob_global = git_env_bool(GIT_GLOB_PATHSPECS_ENVIRONMENT, 0);
-   if (glob_global)
-   global_magic |= PATHSPEC_GLOB;
-
-   if (noglob_global < 0)
-   noglob_global = git_env_bool(GIT_NOGLOB_PATHSPECS_ENVIRONMENT, 
0);
-
-   if (glob_global && noglob_global)
-   die(_("global 'glob' and 'noglob' pathspec settings are 
incompatible"));
-
-
-   if (icase_global < 0)
-   icase_global = git_env_bool(GIT_ICASE_PATHSPECS_ENVIRONMENT, 0);
-   if (icase_global)
-   global_magic |= PATHSPEC_ICASE;
-
-   if ((global_magic & PATHSPEC_LITERAL) &&
-   (global_magic & ~PATHSPEC_LITERAL))
-   die(_("global 'literal' pathspec setting is incompatible "
- "with all other global pathspec settings"));
-
-   if (flags & PATHSPEC_LITERAL_PATH)
-   global_magic = 0;
-
-   if (elt[0] != ':' || literal_global ||
+   if (elt[0] != ':' || get_literal_global() ||
(flags & PATHSPEC_LITERAL_PATH)) {
; /* nothing to do */
} else if (elt[1] == '(') {
@@ -207,15 +242,11 @@ static unsigned prefix_pathspec(struct pathspec_item 
*item, unsigned flags,
 
magic |= element_magic;
 
-   /* --noglob-pathspec adds :(literal) _unle

[PATCH v5 16/16] pathspec: rename prefix_pathspec to init_pathspec_item

2017-01-04 Thread Brandon Williams
Give a more relevant name to the prefix_pathspec function as it does
more than just prefix a pathspec element.

Signed-off-by: Brandon Williams 
---
 pathspec.c | 24 +++-
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index e53530e7a..ff2509ddd 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -297,21 +297,11 @@ static void strip_submodule_slash_expensive(struct 
pathspec_item *item)
 }
 
 /*
- * Take an element of a pathspec and check for magic signatures.
- * Append the result to the prefix. Return the magic bitmap.
- *
- * For now, we only parse the syntax and throw out anything other than
- * "top" magic.
- *
- * NEEDSWORK: This needs to be rewritten when we start migrating
- * get_pathspec() users to use the "struct pathspec" interface.  For
- * example, a pathspec element may be marked as case-insensitive, but
- * the prefix part must always match literally, and a single stupid
- * string cannot express such a case.
+ * Perform the initialization of a pathspec_item based on a pathspec element.
  */
-static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
-   const char *prefix, int prefixlen,
-   const char *elt)
+static void init_pathspec_item(struct pathspec_item *item, unsigned flags,
+  const char *prefix, int prefixlen,
+  const char *elt)
 {
unsigned magic = 0, element_magic = 0;
const char *copyfrom = elt;
@@ -329,6 +319,8 @@ static unsigned prefix_pathspec(struct pathspec_item *item, 
unsigned flags,
magic |= get_global_magic(element_magic);
}
 
+   item->magic = magic;
+
if (pathspec_prefix >= 0 &&
(prefixlen || (prefix && *prefix)))
die("BUG: 'prefix' magic is supposed to be used at worktree's 
root");
@@ -401,7 +393,6 @@ static unsigned prefix_pathspec(struct pathspec_item *item, 
unsigned flags,
/* sanity checks, pathspec matchers assume these are sane */
assert(item->nowildcard_len <= item->len &&
   item->prefix <= item->len);
-   return magic;
 }
 
 static int pathspec_item_cmp(const void *a_, const void *b_)
@@ -501,8 +492,7 @@ void parse_pathspec(struct pathspec *pathspec,
for (i = 0; i < n; i++) {
entry = argv[i];
 
-   item[i].magic = prefix_pathspec(item + i, flags,
-   prefix, prefixlen, entry);
+   init_pathspec_item(item + i, flags, prefix, prefixlen, entry);
 
if (item[i].magic & PATHSPEC_EXCLUDE)
nr_exclude++;
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v5 00/16] pathspec cleanup

2017-01-04 Thread Brandon Williams
Changes in v5:
* Move GUARD_PATHSPEC to prevent checking if pathspec is null twice.
* Mark a string containing 'mnemonic' for translation.

Brandon Williams (16):
  mv: remove use of deprecated 'get_pathspec()'
  dir: remove struct path_simplify
  dir: convert fill_directory to use the pathspec struct interface
  ls-tree: convert show_recursive to use the pathspec struct interface
  pathspec: remove the deprecated get_pathspec function
  pathspec: copy and free owned memory
  pathspec: remove unused variable from unsupported_magic
  pathspec: always show mnemonic and name in unsupported_magic
  pathspec: simpler logic to prefix original pathspec elements
  pathspec: factor global magic into its own function
  pathspec: create parse_short_magic function
  pathspec: create parse_long_magic function
  pathspec: create parse_element_magic helper
  pathspec: create strip submodule slash helpers
  pathspec: small readability changes
  pathspec: rename prefix_pathspec to init_pathspec_item

 Documentation/technical/api-setup.txt |   2 -
 builtin/ls-tree.c |  16 +-
 builtin/mv.c  |  50 ++--
 cache.h   |   1 -
 dir.c | 191 ++
 pathspec.c| 480 +++---
 pathspec.h|   5 +-
 7 files changed, 388 insertions(+), 357 deletions(-)

--- interdiff between v4 and v5

diff --git a/dir.c b/dir.c
index e8ddd7f8a..bc5ff7216 100644
--- a/dir.c
+++ b/dir.c
@@ -1353,18 +1353,17 @@ static int simplify_away(const char *path, int pathlen,
 {
int i;
 
-   if (pathspec)
-   GUARD_PATHSPEC(pathspec,
-  PATHSPEC_FROMTOP |
-  PATHSPEC_MAXDEPTH |
-  PATHSPEC_LITERAL |
-  PATHSPEC_GLOB |
-  PATHSPEC_ICASE |
-  PATHSPEC_EXCLUDE);
-
if (!pathspec || !pathspec->nr)
return 0;
 
+   GUARD_PATHSPEC(pathspec,
+  PATHSPEC_FROMTOP |
+  PATHSPEC_MAXDEPTH |
+  PATHSPEC_LITERAL |
+  PATHSPEC_GLOB |
+  PATHSPEC_ICASE |
+  PATHSPEC_EXCLUDE);
+
for (i = 0; i < pathspec->nr; i++) {
const struct pathspec_item *item = &pathspec->items[i];
int len = item->nowildcard_len;
@@ -1394,18 +1393,17 @@ static int exclude_matches_pathspec(const char *path, 
int pathlen,
 {
int i;
 
-   if (pathspec)
-   GUARD_PATHSPEC(pathspec,
-  PATHSPEC_FROMTOP |
-  PATHSPEC_MAXDEPTH |
-  PATHSPEC_LITERAL |
-  PATHSPEC_GLOB |
-  PATHSPEC_ICASE |
-  PATHSPEC_EXCLUDE);
-
if (!pathspec || !pathspec->nr)
return 0;
 
+   GUARD_PATHSPEC(pathspec,
+  PATHSPEC_FROMTOP |
+  PATHSPEC_MAXDEPTH |
+  PATHSPEC_LITERAL |
+  PATHSPEC_GLOB |
+  PATHSPEC_ICASE |
+  PATHSPEC_EXCLUDE);
+
for (i = 0; i < pathspec->nr; i++) {
const struct pathspec_item *item = &pathspec->items[i];
int len = item->nowildcard_len;
diff --git a/pathspec.c b/pathspec.c
index bcf3ba039..ff2509ddd 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -417,7 +417,7 @@ static void NORETURN unsupported_magic(const char *pattern,
strbuf_addstr(&sb, ", ");
 
if (m->mnemonic)
-   strbuf_addf(&sb, "'%s' (mnemonic: '%c')",
+   strbuf_addf(&sb, _("'%s' (mnemonic: '%c')"),
m->name, m->mnemonic);
else
strbuf_addf(&sb, "'%s'", m->name);

-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v5 04/16] ls-tree: convert show_recursive to use the pathspec struct interface

2017-01-04 Thread Brandon Williams
Convert 'show_recursive()' to use the pathspec struct interface from
using the '_raw' entry in the pathspec struct.

Signed-off-by: Brandon Williams 
---
 builtin/ls-tree.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c
index 0e30d8623..d7ebeb4ce 100644
--- a/builtin/ls-tree.c
+++ b/builtin/ls-tree.c
@@ -31,21 +31,18 @@ static const  char * const ls_tree_usage[] = {
 
 static int show_recursive(const char *base, int baselen, const char *pathname)
 {
-   const char **s;
+   int i;
 
if (ls_options & LS_RECURSIVE)
return 1;
 
-   s = pathspec._raw;
-   if (!s)
+   if (!pathspec.nr)
return 0;
 
-   for (;;) {
-   const char *spec = *s++;
+   for (i = 0; i < pathspec.nr; i++) {
+   const char *spec = pathspec.items[i].match;
int len, speclen;
 
-   if (!spec)
-   return 0;
if (strncmp(base, spec, baselen))
continue;
len = strlen(pathname);
@@ -59,6 +56,7 @@ static int show_recursive(const char *base, int baselen, 
const char *pathname)
continue;
return 1;
}
+   return 0;
 }
 
 static int show_tree(const unsigned char *sha1, struct strbuf *base,
@@ -175,8 +173,8 @@ int cmd_ls_tree(int argc, const char **argv, const char 
*prefix)
 * cannot be lifted until it is converted to use
 * match_pathspec() or tree_entry_interesting()
 */
-   parse_pathspec(&pathspec, PATHSPEC_GLOB | PATHSPEC_ICASE |
- PATHSPEC_EXCLUDE,
+   parse_pathspec(&pathspec, PATHSPEC_ALL_MAGIC &
+ ~(PATHSPEC_FROMTOP | PATHSPEC_LITERAL),
   PATHSPEC_PREFER_CWD,
   prefix, argv + 1);
for (i = 0; i < pathspec.nr; i++)
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v5 05/16] pathspec: remove the deprecated get_pathspec function

2017-01-04 Thread Brandon Williams
Now that all callers of the old 'get_pathspec' interface have been
migrated to use the new pathspec struct interface it can be removed
from the codebase.

Since there are no more users of the '_raw' field in the pathspec struct
it can also be removed.  This patch also removes the old functionality
of modifying the const char **argv array that was passed into
parse_pathspec.  Instead the constructed 'match' string (which is a
pathspec element with the prefix prepended) is only stored in its
corresponding pathspec_item entry.

Signed-off-by: Brandon Williams 
---
 Documentation/technical/api-setup.txt |  2 --
 cache.h   |  1 -
 pathspec.c| 42 +++
 pathspec.h|  1 -
 4 files changed, 3 insertions(+), 43 deletions(-)

diff --git a/Documentation/technical/api-setup.txt 
b/Documentation/technical/api-setup.txt
index 540e45568..eb1fa9853 100644
--- a/Documentation/technical/api-setup.txt
+++ b/Documentation/technical/api-setup.txt
@@ -27,8 +27,6 @@ parse_pathspec(). This function takes several arguments:
 
 - prefix and args come from cmd_* functions
 
-get_pathspec() is obsolete and should never be used in new code.
-
 parse_pathspec() helps catch unsupported features and reject them
 politely. At a lower level, different pathspec-related functions may
 not support the same set of features. Such pathspec-sensitive
diff --git a/cache.h b/cache.h
index a50a61a19..0f80e01bd 100644
--- a/cache.h
+++ b/cache.h
@@ -514,7 +514,6 @@ extern void set_git_work_tree(const char *tree);
 
 #define ALTERNATE_DB_ENVIRONMENT "GIT_ALTERNATE_OBJECT_DIRECTORIES"
 
-extern const char **get_pathspec(const char *prefix, const char **pathspec);
 extern void setup_work_tree(void);
 extern const char *setup_git_directory_gently(int *);
 extern const char *setup_git_directory(void);
diff --git a/pathspec.c b/pathspec.c
index 22ca74a12..1f918cbae 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -103,7 +103,7 @@ static void prefix_short_magic(struct strbuf *sb, int 
prefixlen,
  */
 static unsigned prefix_pathspec(struct pathspec_item *item,
unsigned *p_short_magic,
-   const char **raw, unsigned flags,
+   unsigned flags,
const char *prefix, int prefixlen,
const char *elt)
 {
@@ -240,7 +240,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item,
if (!match)
die(_("%s: '%s' is outside repository"), elt, copyfrom);
}
-   *raw = item->match = match;
+   item->match = match;
/*
 * Prefix the pathspec (keep all magic) and assign to
 * original. Useful for passing to another command.
@@ -381,8 +381,6 @@ void parse_pathspec(struct pathspec *pathspec,
 
/* No arguments with prefix -> prefix pathspec */
if (!entry) {
-   static const char *raw[2];
-
if (flags & PATHSPEC_PREFER_FULL)
return;
 
@@ -394,10 +392,7 @@ void parse_pathspec(struct pathspec *pathspec,
item->original = prefix;
item->nowildcard_len = item->len = strlen(prefix);
item->prefix = item->len;
-   raw[0] = prefix;
-   raw[1] = NULL;
pathspec->nr = 1;
-   pathspec->_raw = raw;
return;
}
 
@@ -415,7 +410,6 @@ void parse_pathspec(struct pathspec *pathspec,
pathspec->nr = n;
ALLOC_ARRAY(pathspec->items, n);
item = pathspec->items;
-   pathspec->_raw = argv;
prefixlen = prefix ? strlen(prefix) : 0;
 
for (i = 0; i < n; i++) {
@@ -423,7 +417,7 @@ void parse_pathspec(struct pathspec *pathspec,
entry = argv[i];
 
item[i].magic = prefix_pathspec(item + i, &short_magic,
-   argv + i, flags,
+   flags,
prefix, prefixlen, entry);
if ((flags & PATHSPEC_LITERAL_PATH) &&
!(magic_mask & PATHSPEC_LITERAL))
@@ -457,36 +451,6 @@ void parse_pathspec(struct pathspec *pathspec,
}
 }
 
-/*
- * N.B. get_pathspec() is deprecated in favor of the "struct pathspec"
- * based interface - see pathspec.c:parse_pathspec().
- *
- * Arguments:
- *  - prefix - a path relative to the root of the working tree
- *  - pathspec - a list of paths underneath the prefix path
- *
- * Iterates over pathspec, prepending each path with prefix,
- * and return the resulting list.
- *
- * If pathspec is empty, return a singleton list containing prefix.
- *
- * If pathspec and prefix are both empty, return an em

[PATCH v5 01/16] mv: remove use of deprecated 'get_pathspec()'

2017-01-04 Thread Brandon Williams
Convert the 'internal_copy_pathspec()' function to 'prefix_path()'
instead of using the deprecated 'get_pathspec()' interface.  Also,
rename 'internal_copy_pathspec()' to 'internal_prefix_pathspec()' to be
more descriptive of what the funciton is actually doing.

In addition to this, fix a memory leak caused by only duplicating some
of the pathspec elements.  Instead always duplicate all of the the
pathspec elements as an intermediate step (with modificationed based on
the passed in flags).  This way the intermediate strings can then be
freed after getting the result from 'prefix_path()'.

Signed-off-by: Brandon Williams 
---
 builtin/mv.c | 50 +++---
 1 file changed, 31 insertions(+), 19 deletions(-)

diff --git a/builtin/mv.c b/builtin/mv.c
index 2f43877bc..4e86dc523 100644
--- a/builtin/mv.c
+++ b/builtin/mv.c
@@ -4,6 +4,7 @@
  * Copyright (C) 2006 Johannes Schindelin
  */
 #include "builtin.h"
+#include "pathspec.h"
 #include "lockfile.h"
 #include "dir.h"
 #include "cache-tree.h"
@@ -19,31 +20,42 @@ static const char * const builtin_mv_usage[] = {
 #define DUP_BASENAME 1
 #define KEEP_TRAILING_SLASH 2
 
-static const char **internal_copy_pathspec(const char *prefix,
-  const char **pathspec,
-  int count, unsigned flags)
+static const char **internal_prefix_pathspec(const char *prefix,
+const char **pathspec,
+int count, unsigned flags)
 {
int i;
const char **result;
+   int prefixlen = prefix ? strlen(prefix) : 0;
ALLOC_ARRAY(result, count + 1);
-   COPY_ARRAY(result, pathspec, count);
-   result[count] = NULL;
+
+   /* Create an intermediate copy of the pathspec based on the flags */
for (i = 0; i < count; i++) {
-   int length = strlen(result[i]);
+   int length = strlen(pathspec[i]);
int to_copy = length;
+   char *it;
while (!(flags & KEEP_TRAILING_SLASH) &&
-  to_copy > 0 && is_dir_sep(result[i][to_copy - 1]))
+  to_copy > 0 && is_dir_sep(pathspec[i][to_copy - 1]))
to_copy--;
-   if (to_copy != length || flags & DUP_BASENAME) {
-   char *it = xmemdupz(result[i], to_copy);
-   if (flags & DUP_BASENAME) {
-   result[i] = xstrdup(basename(it));
-   free(it);
-   } else
-   result[i] = it;
+
+   it = xmemdupz(pathspec[i], to_copy);
+   if (flags & DUP_BASENAME) {
+   result[i] = xstrdup(basename(it));
+   free(it);
+   } else {
+   result[i] = it;
}
}
-   return get_pathspec(prefix, result);
+   result[count] = NULL;
+
+   /* Prefix the pathspec and free the old intermediate strings */
+   for (i = 0; i < count; i++) {
+   const char *match = prefix_path(prefix, prefixlen, result[i]);
+   free((char *) result[i]);
+   result[i] = match;
+   }
+
+   return result;
 }
 
 static const char *add_slash(const char *path)
@@ -130,7 +142,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
if (read_cache() < 0)
die(_("index file corrupt"));
 
-   source = internal_copy_pathspec(prefix, argv, argc, 0);
+   source = internal_prefix_pathspec(prefix, argv, argc, 0);
modes = xcalloc(argc, sizeof(enum update_mode));
/*
 * Keep trailing slash, needed to let
@@ -140,16 +152,16 @@ int cmd_mv(int argc, const char **argv, const char 
*prefix)
flags = KEEP_TRAILING_SLASH;
if (argc == 1 && is_directory(argv[0]) && !is_directory(argv[1]))
flags = 0;
-   dest_path = internal_copy_pathspec(prefix, argv + argc, 1, flags);
+   dest_path = internal_prefix_pathspec(prefix, argv + argc, 1, flags);
submodule_gitfile = xcalloc(argc, sizeof(char *));
 
if (dest_path[0][0] == '\0')
/* special case: "." was normalized to "" */
-   destination = internal_copy_pathspec(dest_path[0], argv, argc, 
DUP_BASENAME);
+   destination = internal_prefix_pathspec(dest_path[0], argv, 
argc, DUP_BASENAME);
else if (!lstat(dest_path[0], &st) &&
S_ISDIR(st.st_mode)) {
dest_path[0] = add_slash(dest_path[0]);
-   destination = internal_copy_pathspec(dest_path[0], argv, argc, 
DUP_BASEN

[PATCH v5 06/16] pathspec: copy and free owned memory

2017-01-04 Thread Brandon Williams
The 'original' string entry in a pathspec_item is only duplicated some
of the time, instead always make a copy of the original and take
ownership of the memory.

Since both 'match' and 'original' string entries in a pathspec_item are
owned by the pathspec struct, they need to be freed when clearing the
pathspec struct (in 'clear_pathspec()') and duplicated when copying the
pathspec struct (in 'copy_pathspec()').

Also change the type of 'match' and 'original' to 'char *' in order to
more explicitly show the ownership of the memory.

Signed-off-by: Brandon Williams 
---
 pathspec.c | 23 +++
 pathspec.h |  4 ++--
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index 1f918cbae..b8faa8f46 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -259,8 +259,9 @@ static unsigned prefix_pathspec(struct pathspec_item *item,
}
strbuf_addstr(&sb, match);
item->original = strbuf_detach(&sb, NULL);
-   } else
-   item->original = elt;
+   } else {
+   item->original = xstrdup(elt);
+   }
item->len = strlen(item->match);
item->prefix = prefixlen;
 
@@ -388,8 +389,8 @@ void parse_pathspec(struct pathspec *pathspec,
die("BUG: PATHSPEC_PREFER_CWD requires arguments");
 
pathspec->items = item = xcalloc(1, sizeof(*item));
-   item->match = prefix;
-   item->original = prefix;
+   item->match = xstrdup(prefix);
+   item->original = xstrdup(prefix);
item->nowildcard_len = item->len = strlen(prefix);
item->prefix = item->len;
pathspec->nr = 1;
@@ -453,13 +454,27 @@ void parse_pathspec(struct pathspec *pathspec,
 
 void copy_pathspec(struct pathspec *dst, const struct pathspec *src)
 {
+   int i;
+
*dst = *src;
ALLOC_ARRAY(dst->items, dst->nr);
COPY_ARRAY(dst->items, src->items, dst->nr);
+
+   for (i = 0; i < dst->nr; i++) {
+   dst->items[i].match = xstrdup(src->items[i].match);
+   dst->items[i].original = xstrdup(src->items[i].original);
+   }
 }
 
 void clear_pathspec(struct pathspec *pathspec)
 {
+   int i;
+
+   for (i = 0; i < pathspec->nr; i++) {
+   free(pathspec->items[i].match);
+   free(pathspec->items[i].original);
+   }
free(pathspec->items);
pathspec->items = NULL;
+   pathspec->nr = 0;
 }
diff --git a/pathspec.h b/pathspec.h
index 70a592e91..49fd823dd 100644
--- a/pathspec.h
+++ b/pathspec.h
@@ -25,8 +25,8 @@ struct pathspec {
unsigned magic;
int max_depth;
struct pathspec_item {
-   const char *match;
-   const char *original;
+   char *match;
+   char *original;
unsigned magic;
int len, prefix;
int nowildcard_len;
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v5 07/16] pathspec: remove unused variable from unsupported_magic

2017-01-04 Thread Brandon Williams
Removed unused variable 'n' from the 'unsupported_magic()' function.

Signed-off-by: Brandon Williams 
---
 pathspec.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index b8faa8f46..b9a3819d6 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -333,8 +333,8 @@ static void NORETURN unsupported_magic(const char *pattern,
   unsigned short_magic)
 {
struct strbuf sb = STRBUF_INIT;
-   int i, n;
-   for (n = i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
+   int i;
+   for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
const struct pathspec_magic *m = pathspec_magic + i;
if (!(magic & m->bit))
continue;
@@ -344,7 +344,6 @@ static void NORETURN unsupported_magic(const char *pattern,
strbuf_addf(&sb, "'%c'", m->mnemonic);
else
strbuf_addf(&sb, "'%s'", m->name);
-   n++;
}
/*
 * We may want to substitute "this command" with a command
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v5 03/16] dir: convert fill_directory to use the pathspec struct interface

2017-01-04 Thread Brandon Williams
Convert 'fill_directory()' to use the pathspec struct interface from
using the '_raw' entry in the pathspec struct.

Signed-off-by: Brandon Williams 
---
 dir.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/dir.c b/dir.c
index 9ae454dde..bc5ff7216 100644
--- a/dir.c
+++ b/dir.c
@@ -174,17 +174,21 @@ char *common_prefix(const struct pathspec *pathspec)
 
 int fill_directory(struct dir_struct *dir, const struct pathspec *pathspec)
 {
-   size_t len;
+   char *prefix;
+   size_t prefix_len;
 
/*
 * Calculate common prefix for the pathspec, and
 * use that to optimize the directory walk
 */
-   len = common_prefix_len(pathspec);
+   prefix = common_prefix(pathspec);
+   prefix_len = prefix ? strlen(prefix) : 0;
 
/* Read the directory and prune it */
-   read_directory(dir, pathspec->nr ? pathspec->_raw[0] : "", len, 
pathspec);
-   return len;
+   read_directory(dir, prefix, prefix_len, pathspec);
+
+   free(prefix);
+   return prefix_len;
 }
 
 int within_depth(const char *name, int namelen,
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v5 02/16] dir: remove struct path_simplify

2017-01-04 Thread Brandon Williams
Teach simplify_away() and exclude_matches_pathspec() to handle struct
pathspec directly, eliminating the need for the struct path_simplify.

Also renamed the len parameter to pathlen in exclude_matches_pathspec()
to match the parameter names used in simplify_away().

Signed-off-by: Brandon Williams 
---
 dir.c | 179 --
 1 file changed, 76 insertions(+), 103 deletions(-)

diff --git a/dir.c b/dir.c
index bfa8c8a9a..9ae454dde 100644
--- a/dir.c
+++ b/dir.c
@@ -16,11 +16,6 @@
 #include "varint.h"
 #include "ewah/ewok.h"
 
-struct path_simplify {
-   int len;
-   const char *path;
-};
-
 /*
  * Tells read_directory_recursive how a file or directory should be treated.
  * Values are ordered by significance, e.g. if a directory contains both
@@ -50,7 +45,7 @@ struct cached_dir {
 
 static enum path_treatment read_directory_recursive(struct dir_struct *dir,
const char *path, int len, struct untracked_cache_dir *untracked,
-   int check_only, const struct path_simplify *simplify);
+   int check_only, const struct pathspec *pathspec);
 static int get_dtype(struct dirent *de, const char *path, int len);
 
 int fspathcmp(const char *a, const char *b)
@@ -1312,7 +1307,7 @@ static enum exist_status directory_exists_in_index(const 
char *dirname, int len)
 static enum path_treatment treat_directory(struct dir_struct *dir,
struct untracked_cache_dir *untracked,
const char *dirname, int len, int baselen, int exclude,
-   const struct path_simplify *simplify)
+   const struct pathspec *pathspec)
 {
/* The "len-1" is to strip the final '/' */
switch (directory_exists_in_index(dirname, len-1)) {
@@ -1341,7 +1336,7 @@ static enum path_treatment treat_directory(struct 
dir_struct *dir,
untracked = lookup_untracked(dir->untracked, untracked,
 dirname + baselen, len - baselen);
return read_directory_recursive(dir, dirname, len,
-   untracked, 1, simplify);
+   untracked, 1, pathspec);
 }
 
 /*
@@ -1349,24 +1344,33 @@ static enum path_treatment treat_directory(struct 
dir_struct *dir,
  * reading - if the path cannot possibly be in the pathspec,
  * return true, and we'll skip it early.
  */
-static int simplify_away(const char *path, int pathlen, const struct 
path_simplify *simplify)
+static int simplify_away(const char *path, int pathlen,
+const struct pathspec *pathspec)
 {
-   if (simplify) {
-   for (;;) {
-   const char *match = simplify->path;
-   int len = simplify->len;
+   int i;
 
-   if (!match)
-   break;
-   if (len > pathlen)
-   len = pathlen;
-   if (!memcmp(path, match, len))
-   return 0;
-   simplify++;
-   }
-   return 1;
+   if (!pathspec || !pathspec->nr)
+   return 0;
+
+   GUARD_PATHSPEC(pathspec,
+  PATHSPEC_FROMTOP |
+  PATHSPEC_MAXDEPTH |
+  PATHSPEC_LITERAL |
+  PATHSPEC_GLOB |
+  PATHSPEC_ICASE |
+  PATHSPEC_EXCLUDE);
+
+   for (i = 0; i < pathspec->nr; i++) {
+   const struct pathspec_item *item = &pathspec->items[i];
+   int len = item->nowildcard_len;
+
+   if (len > pathlen)
+   len = pathlen;
+   if (!ps_strncmp(item, item->match, path, len))
+   return 0;
}
-   return 0;
+
+   return 1;
 }
 
 /*
@@ -1380,19 +1384,33 @@ static int simplify_away(const char *path, int pathlen, 
const struct path_simpli
  *   2. the path is a directory prefix of some element in the
  *  pathspec
  */
-static int exclude_matches_pathspec(const char *path, int len,
-   const struct path_simplify *simplify)
-{
-   if (simplify) {
-   for (; simplify->path; simplify++) {
-   if (len == simplify->len
-   && !memcmp(path, simplify->path, len))
-   return 1;
-   if (len < simplify->len
-   && simplify->path[len] == '/'
-   && !memcmp(path, simplify->path, len))
-   return 1;
-   }
+static int exclude_matches_pathspec(const char *path, int pathlen,
+   const struct pathspec *pathspec)
+{
+   int i;
+
+   if (!pathspec || !pathspec->nr)
+  

Re: [PATCH v4 0/5] road to reentrant real_path

2017-01-04 Thread Brandon Williams
On 01/04, Jeff King wrote:
> On Wed, Jan 04, 2017 at 07:56:02AM +0100, Torsten Bögershausen wrote:
> 
> > On 04.01.17 01:48, Jeff King wrote:
> > > On Tue, Jan 03, 2017 at 11:09:18AM -0800, Brandon Williams wrote:
> > > 
> > >> Only change with v4 is in [1/5] renaming the #define MAXSYMLINKS back to
> > >> MAXDEPTH due to a naming conflict brought up by Lars Schneider.
> > > 
> > > Hmm. Isn't MAXSYMLINKS basically what you want here, though? It what's
> > > what all other similar functions will be using.
> > > 
> > > The only problem was that we were redefining the macro. So maybe:
> > > 
> > >   #ifndef MAXSYMLINKS
> > >   #define MAXSYMLINKS 5
> > >   #endif
> > > 
> > > would be a good solution?
> > Why 5  ? (looking at the  20..30 below)
> > And why 5 on one system and e.g. on my Mac OS
> > #define MAXSYMLINKS 32  
> 
> I mentioned "5" because that is the current value of MAXDEPTH. I do
> think it would be reasonable to bump it to something higher.
> 
> > Would the same value value for all Git installations on all platforms make 
> > sense?
> > #define GITMAXSYMLINKS 20
> 
> I think it's probably more important to match the rest of the OS, so
> that open("foo") and realpath("foo") behave similarly on the same
> system. Though I think even that isn't always possible, as the limit is
> dynamic on some systems.
> 
> I think the idea of the 20-30 range is that it's small enough to catch
> an infinite loop quickly, and large enough that nobody will ever hit it
> in practice. :)

I agree that we should have similar guarantees as the OS provides,
especially if the OS already has MAXSYMLINKS defined.  What then, should
the fall back value be if the OS doesn't have this defined?  5 like we
have done historically, or something around the 20-30 range as Torsten
suggests?

-- 
Brandon Williams


Re: [PATCH v4 4/5] real_path: have callers use real_pathdup and strbuf_realpath

2017-01-04 Thread Brandon Williams
On 01/03, Jacob Keller wrote:
> On Tue, Jan 3, 2017 at 11:09 AM, Brandon Williams  wrote:
> > Migrate callers of real_path() who duplicate the retern value to use
> > real_pathdup or strbuf_realpath.
> 
> Nit: s/retern/return

Thanks for catching that, I'll fix that in the next reroll.

-- 
Brandon Williams


Re: [PATCH 2/2] pathspec: give better message for submodule related pathspec error

2017-01-04 Thread Brandon Williams
On 01/04, Stefan Beller wrote:
> On Tue, Jan 3, 2017 at 11:55 PM, Jeff King  wrote:
> > But as this commit message needs to stand on its own, rather than as part 
> > of a
> > larger discussion thread, it might be worth expanding "one of the cases"
> > here. And talking about what's happening to the other cases.
> >
> > Like:
> >
> >   This assertion triggered for cases where there wasn't a programming
> >   bug, but just bogus input. In particular, if the user asks for a
> >   pathspec that is inside a submodule, we shouldn't assert() or
> >   die("BUG"); we should tell the user their request is bogus.
> >
> >   We'll retain the assertion for non-submodule cases, though. We don't
> >   know of any cases that would trigger this, but it _would_ be
> >   indicative of a programming error, and we should catch it here.
> 
> makes sense.
> 
> >
> > or something. Writing the first paragraph made me wonder if a better
> > solution, though, would be to catch and complain about this case
> > earlier. IOW, this _is_ a programming bug, because we're violating some
> > assumption of the pathspec code. And whatever is putting that item into
> > the pathspec list is what should be fixed.
> >
> > I haven't looked closely enough to have a real opinion on that, though.
> 
> Well I think you get different behavior with different flags enabled, i.e.
> the test provided is a cornercase (as "git add ." in the submodule should
> not yell at us IF PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE
> were set, in my understanding of the code, so maybe the test rather adds
> a ./file/with/characters inside the submodule directory)
> 
> I think a valid long term vision would be to have
> 
> $ git -C submodule add file
> $ echo $?
> 0
> 
> to behave the same as
> 
> $ git add submodule/file
> advice/hint: adding file inside of a submodule
> $ echo $?
> 0
> $ git -c submodule.iKnowWhatIDo add submodule/anotherfile
> $ echo $?
> 0
> 
> Brandon, who is refactoring the pathspec stuff currently may have
> an opinion if we could catch it earlier and still have beautiful code.
> 
> Thanks,
> Stefan
> 
> > Given the discussion, this comment seems funny now. Who cares about
> > "historically"? It should probably be something like:
> >
> >   /*
> >* This case can be triggered by the user pointing us to a pathspec
> >* inside a submodule, which is an input error. Detect that here
> >* and complain, but fallback in the non-submodule case to a BUG,
> >* as we have no idea what would trigger that.
> >*/
> 
> Makes sense.
> 
> >
> > -Peff

So there are really two different things going on in the pathspec code
with regards to submodules.

The case that this series is trying to solve is not because the user
provided a pathspec into a submodule, but rather they are executing in
the context of the submodule with bogus state.  Typically this bogus
state has something to do with the submodule's .gitdir being blown away
(like in the last test (3/3) added in this patch).  Because the
submodule doesn't have a .gitdir, it searches backward in the directory
hierarchy for a .gitdir and it happens to find the superproject's gitdir
and uses that as its own .gitdir.  When this happens test 3/3 catches
that assert with the prefix being "sub/" and match being "sub" (since
the submodule slash was removed).  The condition doesn't trigger when
you supply a pathspec of "./b/a" assuming you have a file 'a' in
directory 'b' inside the submodule, since the prefix would still be
"sub/" while the match string would be "sub/b/a".  Coincidentally the
check that PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE does, does in fact
catch it (if using say the 'git add' command).

This leads me into the second case.  If
PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE is set, then any pathspec which
decends into a submodule will indeed be caught and cause and error (as
was happens in test 2/3 in this patch).

So in my opinion, the assert at the end of constructing a
pathspec object probably isn't the best place for determining if the
submodule's gitdir has been destroyed and instead it has fallen back to
its parent's gitdir.  A check for something like this should happen much
sooner.

There are cases where it is advantages to be able to supply a pathspec
into a submodule without it erroring out (git grep --recurse-submodules
is one example).  So right now the current method for not allowing a
pathspec into a submodule is to pass the STRIP_SUBMODULE_SLASH_EXPENSIVE
flag when creating the pathspec object.

-- 
Brandon Williams


[PATCH v5 1/5] real_path: resolve symlinks by hand

2017-01-04 Thread Brandon Williams
The current implementation of real_path uses chdir() in order to resolve
symlinks.  Unfortunately this isn't thread-safe as chdir() affects a
process as a whole and not just an individual thread.  Instead perform
the symlink resolution by hand so that the calls to chdir() can be
removed, making real_path one step closer to being reentrant.

Signed-off-by: Brandon Williams 
---
 abspath.c | 194 ++
 1 file changed, 133 insertions(+), 61 deletions(-)

diff --git a/abspath.c b/abspath.c
index 2825de859..629201e48 100644
--- a/abspath.c
+++ b/abspath.c
@@ -11,8 +11,47 @@ int is_directory(const char *path)
return (!stat(path, &st) && S_ISDIR(st.st_mode));
 }
 
+/* removes the last path component from 'path' except if 'path' is root */
+static void strip_last_component(struct strbuf *path)
+{
+   size_t offset = offset_1st_component(path->buf);
+   size_t len = path->len;
+
+   /* Find start of the last component */
+   while (offset < len && !is_dir_sep(path->buf[len - 1]))
+   len--;
+   /* Skip sequences of multiple path-separators */
+   while (offset < len && is_dir_sep(path->buf[len - 1]))
+   len--;
+
+   strbuf_setlen(path, len);
+}
+
+/* get (and remove) the next component in 'remaining' and place it in 'next' */
+static void get_next_component(struct strbuf *next, struct strbuf *remaining)
+{
+   char *start = NULL;
+   char *end = NULL;
+
+   strbuf_reset(next);
+
+   /* look for the next component */
+   /* Skip sequences of multiple path-separators */
+   for (start = remaining->buf; is_dir_sep(*start); start++)
+   ; /* nothing */
+   /* Find end of the path component */
+   for (end = start; *end && !is_dir_sep(*end); end++)
+   ; /* nothing */
+
+   strbuf_add(next, start, end - start);
+   /* remove the component from 'remaining' */
+   strbuf_remove(remaining, 0, end - remaining->buf);
+}
+
 /* We allow "recursive" symbolic links. Only within reason, though. */
-#define MAXDEPTH 5
+#ifndef MAXSYMLINKS
+#define MAXSYMLINKS 32
+#endif
 
 /*
  * Return the real path (i.e., absolute path, with symlinks resolved
@@ -21,7 +60,6 @@ int is_directory(const char *path)
  * absolute_path().)  The return value is a pointer to a static
  * buffer.
  *
- * The input and all intermediate paths must be shorter than MAX_PATH.
  * The directory part of path (i.e., everything up to the last
  * dir_sep) must denote a valid, existing directory, but the last
  * component need not exist.  If die_on_error is set, then die with an
@@ -33,22 +71,16 @@ int is_directory(const char *path)
  */
 static const char *real_path_internal(const char *path, int die_on_error)
 {
-   static struct strbuf sb = STRBUF_INIT;
+   static struct strbuf resolved = STRBUF_INIT;
+   struct strbuf remaining = STRBUF_INIT;
+   struct strbuf next = STRBUF_INIT;
+   struct strbuf symlink = STRBUF_INIT;
char *retval = NULL;
-
-   /*
-* If we have to temporarily chdir(), store the original CWD
-* here so that we can chdir() back to it at the end of the
-* function:
-*/
-   struct strbuf cwd = STRBUF_INIT;
-
-   int depth = MAXDEPTH;
-   char *last_elem = NULL;
+   int num_symlinks = 0;
struct stat st;
 
/* We've already done it */
-   if (path == sb.buf)
+   if (path == resolved.buf)
return path;
 
if (!*path) {
@@ -58,74 +90,114 @@ static const char *real_path_internal(const char *path, 
int die_on_error)
goto error_out;
}
 
-   strbuf_reset(&sb);
-   strbuf_addstr(&sb, path);
-
-   while (depth--) {
-   if (!is_directory(sb.buf)) {
-   char *last_slash = find_last_dir_sep(sb.buf);
-   if (last_slash) {
-   last_elem = xstrdup(last_slash + 1);
-   strbuf_setlen(&sb, last_slash - sb.buf + 1);
-   } else {
-   last_elem = xmemdupz(sb.buf, sb.len);
-   strbuf_reset(&sb);
-   }
+   strbuf_reset(&resolved);
+
+   if (is_absolute_path(path)) {
+   /* absolute path; start with only root as being resolved */
+   int offset = offset_1st_component(path);
+   strbuf_add(&resolved, path, offset);
+   strbuf_addstr(&remaining, path + offset);
+   } else {
+   /* relative path; can use CWD as the initial resolved path */
+   if (strbuf_getcwd(&resolved)) {
+   if (die_on_error)
+   die_errno("unable to get curr

[PATCH v5 4/5] real_path: have callers use real_pathdup and strbuf_realpath

2017-01-04 Thread Brandon Williams
Migrate callers of real_path() who duplicate the return value to use
real_pathdup or strbuf_realpath.

Signed-off-by: Brandon Williams 
---
 builtin/init-db.c |  6 +++---
 environment.c |  2 +-
 setup.c   | 13 -
 sha1_file.c   |  2 +-
 submodule.c   |  2 +-
 transport.c   |  2 +-
 worktree.c|  2 +-
 7 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/builtin/init-db.c b/builtin/init-db.c
index 2399b97d9..76d68fad0 100644
--- a/builtin/init-db.c
+++ b/builtin/init-db.c
@@ -338,7 +338,7 @@ int init_db(const char *git_dir, const char *real_git_dir,
 {
int reinit;
int exist_ok = flags & INIT_DB_EXIST_OK;
-   char *original_git_dir = xstrdup(real_path(git_dir));
+   char *original_git_dir = real_pathdup(git_dir);
 
if (real_git_dir) {
struct stat st;
@@ -489,7 +489,7 @@ int cmd_init_db(int argc, const char **argv, const char 
*prefix)
argc = parse_options(argc, argv, prefix, init_db_options, 
init_db_usage, 0);
 
if (real_git_dir && !is_absolute_path(real_git_dir))
-   real_git_dir = xstrdup(real_path(real_git_dir));
+   real_git_dir = real_pathdup(real_git_dir);
 
if (argc == 1) {
int mkdir_tried = 0;
@@ -560,7 +560,7 @@ int cmd_init_db(int argc, const char **argv, const char 
*prefix)
const char *git_dir_parent = strrchr(git_dir, '/');
if (git_dir_parent) {
char *rel = xstrndup(git_dir, git_dir_parent - git_dir);
-   git_work_tree_cfg = xstrdup(real_path(rel));
+   git_work_tree_cfg = real_pathdup(rel);
free(rel);
}
if (!git_work_tree_cfg)
diff --git a/environment.c b/environment.c
index 0935ec696..9b943d2d5 100644
--- a/environment.c
+++ b/environment.c
@@ -259,7 +259,7 @@ void set_git_work_tree(const char *new_work_tree)
return;
}
git_work_tree_initialized = 1;
-   work_tree = xstrdup(real_path(new_work_tree));
+   work_tree = real_pathdup(new_work_tree);
 }
 
 const char *get_git_work_tree(void)
diff --git a/setup.c b/setup.c
index fe572b82c..1b534a750 100644
--- a/setup.c
+++ b/setup.c
@@ -256,8 +256,10 @@ int get_common_dir_noenv(struct strbuf *sb, const char 
*gitdir)
strbuf_addbuf(&path, &data);
strbuf_addstr(sb, real_path(path.buf));
ret = 1;
-   } else
+   } else {
strbuf_addstr(sb, gitdir);
+   }
+
strbuf_release(&data);
strbuf_release(&path);
return ret;
@@ -692,7 +694,7 @@ static const char *setup_discovered_git_dir(const char 
*gitdir,
/* --work-tree is set without --git-dir; use discovered one */
if (getenv(GIT_WORK_TREE_ENVIRONMENT) || git_work_tree_cfg) {
if (offset != cwd->len && !is_absolute_path(gitdir))
-   gitdir = xstrdup(real_path(gitdir));
+   gitdir = real_pathdup(gitdir);
if (chdir(cwd->buf))
die_errno("Could not come back to cwd");
return setup_explicit_git_dir(gitdir, cwd, nongit_ok);
@@ -800,11 +802,12 @@ static int canonicalize_ceiling_entry(struct 
string_list_item *item,
/* Keep entry but do not canonicalize it */
return 1;
} else {
-   const char *real_path = real_path_if_valid(ceil);
-   if (!real_path)
+   char *real_path = real_pathdup(ceil);
+   if (!real_path) {
return 0;
+   }
free(item->string);
-   item->string = xstrdup(real_path);
+   item->string = real_path;
return 1;
}
 }
diff --git a/sha1_file.c b/sha1_file.c
index 9c86d1924..6a03cc393 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -291,7 +291,7 @@ static int link_alt_odb_entry(const char *entry, const char 
*relative_base,
struct strbuf pathbuf = STRBUF_INIT;
 
if (!is_absolute_path(entry) && relative_base) {
-   strbuf_addstr(&pathbuf, real_path(relative_base));
+   strbuf_realpath(&pathbuf, relative_base, 1);
strbuf_addch(&pathbuf, '/');
}
strbuf_addstr(&pathbuf, entry);
diff --git a/submodule.c b/submodule.c
index ece17315d..55819ba9c 100644
--- a/submodule.c
+++ b/submodule.c
@@ -1301,7 +1301,7 @@ void connect_work_tree_and_git_dir(const char *work_tree, 
const char *git_dir)
 {
struct strbuf file_name = STRBUF_INIT;
struct strbuf rel_path = STRBUF_INIT;
-   const char *real_work_tree = xstrdup(real_path(work_tree));
+   const char *real_work_tree = real_pathdup(work_tree);
 
/* Update gitfile */
strbuf_addf(&file_name, &quo

[PATCH v5 3/5] real_path: create real_pathdup

2017-01-04 Thread Brandon Williams
Create real_pathdup which returns a caller owned string of the resolved
realpath based on the provide path.

Signed-off-by: Brandon Williams 
---
 abspath.c | 13 +
 cache.h   |  1 +
 2 files changed, 14 insertions(+)

diff --git a/abspath.c b/abspath.c
index a200d4220..72f716f80 100644
--- a/abspath.c
+++ b/abspath.c
@@ -209,6 +209,19 @@ const char *real_path_if_valid(const char *path)
return strbuf_realpath(&realpath, path, 0);
 }
 
+char *real_pathdup(const char *path)
+{
+   struct strbuf realpath = STRBUF_INIT;
+   char *retval = NULL;
+
+   if (strbuf_realpath(&realpath, path, 0))
+   retval = strbuf_detach(&realpath, NULL);
+
+   strbuf_release(&realpath);
+
+   return retval;
+}
+
 /*
  * Use this to get an absolute path from a relative one. If you want
  * to resolve links, you should use real_path.
diff --git a/cache.h b/cache.h
index 7a8129403..e12a5d912 100644
--- a/cache.h
+++ b/cache.h
@@ -1068,6 +1068,7 @@ char *strbuf_realpath(struct strbuf *resolved, const char 
*path,
  int die_on_error);
 const char *real_path(const char *path);
 const char *real_path_if_valid(const char *path);
+char *real_pathdup(const char *path);
 const char *absolute_path(const char *path);
 const char *remove_leading_path(const char *in, const char *prefix);
 const char *relative_path(const char *in, const char *prefix, struct strbuf 
*sb);
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v5 0/5] road to reentrant real_path

2017-01-04 Thread Brandon Williams
changes in v5:
* set errno to ELOOP when MAXSYMLINKS is exceded.
* revert to use MAXSYMLINKS instead of MAXDEPTH.
* If the OS hasn't defined MAXSYMLINKS, use a fallback value of 32.

Brandon Williams (4):
  real_path: resolve symlinks by hand
  real_path: convert real_path_internal to strbuf_realpath
  real_path: create real_pathdup
  real_path: have callers use real_pathdup and strbuf_realpath

Johannes Sixt (1):
  real_path: canonicalize directory separators in root parts

 abspath.c | 231 +-
 builtin/init-db.c |   6 +-
 cache.h   |   3 +
 environment.c |   2 +-
 setup.c   |  13 +--
 sha1_file.c   |   2 +-
 submodule.c   |   2 +-
 transport.c   |   2 +-
 worktree.c|   2 +-
 9 files changed, 178 insertions(+), 85 deletions(-)

--- interdiff with v4

diff --git a/abspath.c b/abspath.c
index 3562d17bf..fce40fddc 100644
--- a/abspath.c
+++ b/abspath.c
@@ -62,7 +62,9 @@ static void get_root_part(struct strbuf *resolved, struct 
strbuf *remaining)
 }
 
 /* We allow "recursive" symbolic links. Only within reason, though. */
-#define MAXDEPTH 5
+#ifndef MAXSYMLINKS
+#define MAXSYMLINKS 32
+#endif
 
 /*
  * Return the real path (i.e., absolute path, with symlinks resolved
@@ -138,10 +140,12 @@ char *strbuf_realpath(struct strbuf *resolved, const char 
*path,
ssize_t len;
strbuf_reset(&symlink);
 
-   if (num_symlinks++ > MAXDEPTH) {
+   if (num_symlinks++ > MAXSYMLINKS) {
+   errno = ELOOP;
+
if (die_on_error)
die("More than %d nested symlinks "
-   "on path '%s'", MAXDEPTH, path);
+   "on path '%s'", MAXSYMLINKS, path);
else
goto error_out;
}

-- 
2.11.0.390.gc69c2f50cf-goog



Re: [PATCH v4 0/5] road to reentrant real_path

2017-01-04 Thread Brandon Williams
On 01/04, Jacob Keller wrote:
> On Wed, Jan 4, 2017 at 10:22 AM, Stefan Beller  wrote:
> > On Wed, Jan 4, 2017 at 10:13 AM, Brandon Williams  wrote:
> >> On 01/04, Jeff King wrote:
> >>> On Wed, Jan 04, 2017 at 07:56:02AM +0100, Torsten Bögershausen wrote:
> >>>
> >>> > On 04.01.17 01:48, Jeff King wrote:
> >>> > > On Tue, Jan 03, 2017 at 11:09:18AM -0800, Brandon Williams wrote:
> >>> > >
> >>> > >> Only change with v4 is in [1/5] renaming the #define MAXSYMLINKS 
> >>> > >> back to
> >>> > >> MAXDEPTH due to a naming conflict brought up by Lars Schneider.
> >>> > >
> >>> > > Hmm. Isn't MAXSYMLINKS basically what you want here, though? It what's
> >>> > > what all other similar functions will be using.
> >>> > >
> >>> > > The only problem was that we were redefining the macro. So maybe:
> >>> > >
> >>> > >   #ifndef MAXSYMLINKS
> >>> > >   #define MAXSYMLINKS 5
> >>> > >   #endif
> >>> > >
> >>> > > would be a good solution?
> >>> > Why 5  ? (looking at the  20..30 below)
> >>> > And why 5 on one system and e.g. on my Mac OS
> >>> > #define MAXSYMLINKS 32
> >>>
> >>> I mentioned "5" because that is the current value of MAXDEPTH. I do
> >>> think it would be reasonable to bump it to something higher.
> >>>
> >>> > Would the same value value for all Git installations on all platforms 
> >>> > make sense?
> >>> > #define GITMAXSYMLINKS 20
> >>>
> >>> I think it's probably more important to match the rest of the OS, so
> >>> that open("foo") and realpath("foo") behave similarly on the same
> >>> system. Though I think even that isn't always possible, as the limit is
> >>> dynamic on some systems.
> >>>
> >>> I think the idea of the 20-30 range is that it's small enough to catch
> >>> an infinite loop quickly, and large enough that nobody will ever hit it
> >>> in practice. :)
> >>
> >> I agree that we should have similar guarantees as the OS provides,
> >> especially if the OS already has MAXSYMLINKS defined.  What then, should
> >> the fall back value be if the OS doesn't have this defined?  5 like we
> >> have done historically, or something around the 20-30 range as Torsten
> >> suggests?
> >
> > As a fallback I'd rather go for a larger number than too small.
> > The reason for the existence is just to break an infinite loop
> > (and report an error? which the current code doesn't quite do,
> > but your series actually does).
> >
> > If the number is too large, then it takes a bit longer to generate the error
> > message, but the error path is no big deal w.r.t. performance, so it's fine
> > for it taking a bit longer.
> >
> > If the number is too low, then we may hinder people from getting actual
> > work done, (i.e. they have to figure out what the problem is and recompile
> > git), so I'd think a larger number is not harmful. So 32?
> >
> 
> I think I agree as well.
> 
> Thanks,
> Jake
> 
> >>
> >> --
> >> Brandon Williams

That's two people for 32.  I'll use that as the fallback and resend the
series.

-- 
Brandon Williams


[PATCH v5 5/5] real_path: canonicalize directory separators in root parts

2017-01-04 Thread Brandon Williams
From: Johannes Sixt 

When an absolute path is resolved, resolution begins at the first path
component after the root part. The root part is just copied verbatim,
because it must not be inspected for symbolic links. For POSIX paths,
this is just the initial slash, but on Windows, the root part has the
forms c:\ or \\server\share. We do want to canonicalize the back-slashes
in the root part because these parts are compared to the result of
getcwd(), which does return a fully canonicalized path.

Factor out a helper that splits off the root part, and have it
canonicalize the copied part.

This change was prompted because t1504-ceiling-dirs.sh caught a breakage
in GIT_CEILING_DIRECTORIES handling on Windows.

Signed-off-by: Johannes Sixt 
---
 abspath.c | 29 +
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/abspath.c b/abspath.c
index 72f716f80..fce40fddc 100644
--- a/abspath.c
+++ b/abspath.c
@@ -48,6 +48,19 @@ static void get_next_component(struct strbuf *next, struct 
strbuf *remaining)
strbuf_remove(remaining, 0, end - remaining->buf);
 }
 
+/* copies root part from remaining to resolved, canonicalizing it on the way */
+static void get_root_part(struct strbuf *resolved, struct strbuf *remaining)
+{
+   int offset = offset_1st_component(remaining->buf);
+
+   strbuf_reset(resolved);
+   strbuf_add(resolved, remaining->buf, offset);
+#ifdef GIT_WINDOWS_NATIVE
+   convert_slashes(resolved->buf);
+#endif
+   strbuf_remove(remaining, 0, offset);
+}
+
 /* We allow "recursive" symbolic links. Only within reason, though. */
 #ifndef MAXSYMLINKS
 #define MAXSYMLINKS 32
@@ -82,14 +95,10 @@ char *strbuf_realpath(struct strbuf *resolved, const char 
*path,
goto error_out;
}
 
-   strbuf_reset(resolved);
+   strbuf_addstr(&remaining, path);
+   get_root_part(resolved, &remaining);
 
-   if (is_absolute_path(path)) {
-   /* absolute path; start with only root as being resolved */
-   int offset = offset_1st_component(path);
-   strbuf_add(resolved, path, offset);
-   strbuf_addstr(&remaining, path + offset);
-   } else {
+   if (!resolved->len) {
/* relative path; can use CWD as the initial resolved path */
if (strbuf_getcwd(resolved)) {
if (die_on_error)
@@ -97,7 +106,6 @@ char *strbuf_realpath(struct strbuf *resolved, const char 
*path,
else
goto error_out;
}
-   strbuf_addstr(&remaining, path);
}
 
/* Iterate over the remaining path components */
@@ -154,10 +162,7 @@ char *strbuf_realpath(struct strbuf *resolved, const char 
*path,
 
if (is_absolute_path(symlink.buf)) {
/* absolute symlink; set resolved to root */
-   int offset = offset_1st_component(symlink.buf);
-   strbuf_reset(resolved);
-   strbuf_add(resolved, symlink.buf, offset);
-   strbuf_remove(&symlink, 0, offset);
+   get_root_part(resolved, &symlink);
} else {
/*
 * relative symlink
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH v5 2/5] real_path: convert real_path_internal to strbuf_realpath

2017-01-04 Thread Brandon Williams
Change the name of real_path_internal to strbuf_realpath.  In addition
push the static strbuf up to its callers and instead take as a
parameter a pointer to a strbuf to use for the final result.

This change makes strbuf_realpath reentrant.

Signed-off-by: Brandon Williams 
---
 abspath.c | 53 +
 cache.h   |  2 ++
 2 files changed, 27 insertions(+), 28 deletions(-)

diff --git a/abspath.c b/abspath.c
index 629201e48..a200d4220 100644
--- a/abspath.c
+++ b/abspath.c
@@ -57,21 +57,17 @@ static void get_next_component(struct strbuf *next, struct 
strbuf *remaining)
  * Return the real path (i.e., absolute path, with symlinks resolved
  * and extra slashes removed) equivalent to the specified path.  (If
  * you want an absolute path but don't mind links, use
- * absolute_path().)  The return value is a pointer to a static
- * buffer.
+ * absolute_path().)  Places the resolved realpath in the provided strbuf.
  *
  * The directory part of path (i.e., everything up to the last
  * dir_sep) must denote a valid, existing directory, but the last
  * component need not exist.  If die_on_error is set, then die with an
  * informative error message if there is a problem.  Otherwise, return
  * NULL on errors (without generating any output).
- *
- * If path is our buffer, then return path, as it's already what the
- * user wants.
  */
-static const char *real_path_internal(const char *path, int die_on_error)
+char *strbuf_realpath(struct strbuf *resolved, const char *path,
+ int die_on_error)
 {
-   static struct strbuf resolved = STRBUF_INIT;
struct strbuf remaining = STRBUF_INIT;
struct strbuf next = STRBUF_INIT;
struct strbuf symlink = STRBUF_INIT;
@@ -79,10 +75,6 @@ static const char *real_path_internal(const char *path, int 
die_on_error)
int num_symlinks = 0;
struct stat st;
 
-   /* We've already done it */
-   if (path == resolved.buf)
-   return path;
-
if (!*path) {
if (die_on_error)
die("The empty string is not a valid path");
@@ -90,16 +82,16 @@ static const char *real_path_internal(const char *path, int 
die_on_error)
goto error_out;
}
 
-   strbuf_reset(&resolved);
+   strbuf_reset(resolved);
 
if (is_absolute_path(path)) {
/* absolute path; start with only root as being resolved */
int offset = offset_1st_component(path);
-   strbuf_add(&resolved, path, offset);
+   strbuf_add(resolved, path, offset);
strbuf_addstr(&remaining, path + offset);
} else {
/* relative path; can use CWD as the initial resolved path */
-   if (strbuf_getcwd(&resolved)) {
+   if (strbuf_getcwd(resolved)) {
if (die_on_error)
die_errno("unable to get current working 
directory");
else
@@ -118,21 +110,21 @@ static const char *real_path_internal(const char *path, 
int die_on_error)
continue; /* '.' component */
} else if (next.len == 2 && !strcmp(next.buf, "..")) {
/* '..' component; strip the last path component */
-   strip_last_component(&resolved);
+   strip_last_component(resolved);
continue;
}
 
/* append the next component and resolve resultant path */
-   if (!is_dir_sep(resolved.buf[resolved.len - 1]))
-   strbuf_addch(&resolved, '/');
-   strbuf_addbuf(&resolved, &next);
+   if (!is_dir_sep(resolved->buf[resolved->len - 1]))
+   strbuf_addch(resolved, '/');
+   strbuf_addbuf(resolved, &next);
 
-   if (lstat(resolved.buf, &st)) {
+   if (lstat(resolved->buf, &st)) {
/* error out unless this was the last component */
if (errno != ENOENT || remaining.len) {
if (die_on_error)
die_errno("Invalid path '%s'",
- resolved.buf);
+ resolved->buf);
else
goto error_out;
}
@@ -150,12 +142,12 @@ static const char *real_path_internal(const char *path, 
int die_on_error)
goto error_out;
}
 
-   len = strbuf_readlink(&symlink, resolved.buf,
+   len = strbuf_readlin

Re: [PATCHv5] pathspec: give better message for submodule related pathspec error

2017-01-04 Thread Brandon Williams
7 @@ static unsigned prefix_pathspec(struct pathspec_item 
> *item,
>   }
>  
>   if (flags & PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE)
> - for (i = 0; i < active_nr; i++) {
> - struct cache_entry *ce = active_cache[i];
> - int ce_len = ce_namelen(ce);
> -
> - if (!S_ISGITLINK(ce->ce_mode))
> - continue;
> -
> - if (item->len <= ce_len || match[ce_len] != '/' ||
> - memcmp(ce->name, match, ce_len))
> - continue;
> - if (item->len == ce_len + 1) {
> - /* strip trailing slash */
> - item->len--;
> - match[item->len] = '\0';
> - } else
> - die (_("Pathspec '%s' is in submodule '%.*s'"),
> -  elt, ce_len, ce->name);
> - }
> + check_inside_submodule_expensive(item, match, elt, 0);
>  
>   if (magic & PATHSPEC_LITERAL)
>   item->nowildcard_len = item->len;
> @@ -313,8 +324,20 @@ static unsigned prefix_pathspec(struct pathspec_item 
> *item,
>   }
>  
>   /* sanity checks, pathspec matchers assume these are sane */
> - assert(item->nowildcard_len <= item->len &&
> -item->prefix <= item->len);
> + if (item->nowildcard_len > item->len ||
> + item->prefix > item->len) {
> + /*
> +  * We know something is fishy and we're going to die
> +  * anyway, so we don't care about following operation
> +  * to be expensive, despite the caller not asking for
> +  * an expensive submodule check. The potential expensive
> +  * operation here reduces the users head scratching
> +  * greatly, though.
> +  */
> + check_inside_submodule_expensive(item, match, elt, 1);
> + die ("BUG: item->nowildcard_len > item->len || item->prefix > 
> item->len)");
> + }
> +
>   return magic;
>  }
>  
> diff --git a/t/t6134-pathspec-in-submodule.sh 
> b/t/t6134-pathspec-in-submodule.sh
> new file mode 100755
> index 00..2900d8d06e
> --- /dev/null
> +++ b/t/t6134-pathspec-in-submodule.sh
> @@ -0,0 +1,33 @@
> +#!/bin/sh
> +
> +test_description='test case exclude pathspec'
> +
> +TEST_CREATE_SUBMODULE=yes
> +. ./test-lib.sh
> +
> +test_expect_success 'setup a submodule' '
> + git submodule add ./pretzel.bare sub &&
> + git commit -a -m "add submodule" &&
> +     git submodule deinit --all
> +'
> +
> +cat <expect
> +fatal: Pathspec 'sub/a' is in submodule 'sub'
> +EOF
> +
> +test_expect_success 'error message for path inside submodule' '
> + echo a >sub/a &&
> + test_must_fail git add sub/a 2>actual &&
> + test_cmp expect actual
> +'
> +
> +cat <expect
> +fatal: Pathspec '.' is in submodule 'sub'
> +EOF
> +
> +test_expect_success 'error message for path inside submodule from within 
> submodule' '
> + test_must_fail git -C sub add . 2>actual &&
> + test_cmp expect actual
> +'
> +
> +test_done

I haven't taken a through look at this patch but I think you may want to
base it off of 'origin/bw/pathspec-cleanup' series as the changes made in this
patch now conflict with that series.

Also I still don't really think this solves the problem of telling the
user what is wrong, which is that the submodule's gitdir is gone.

-- 
Brandon Williams


Re: [PATCH v5 00/16] pathspec cleanup

2017-01-09 Thread Brandon Williams
On 01/07, Duy Nguyen wrote:
> On Thu, Jan 5, 2017 at 1:03 AM, Brandon Williams  wrote:
> > Changes in v5:
> > * Move GUARD_PATHSPEC to prevent checking if pathspec is null twice.
> > * Mark a string containing 'mnemonic' for translation.
> 
> Argh.. I've run out of things to complain about! Ack!

haha! Well thanks again for your help in reviewing this series!

-- 
Brandon Williams


Re: [PATCH v5 0/5] road to reentrant real_path

2017-01-09 Thread Brandon Williams
On 01/07, Junio C Hamano wrote:
> Brandon Williams  writes:
> 
> > changes in v5:
> > * set errno to ELOOP when MAXSYMLINKS is exceded.
> > * revert to use MAXSYMLINKS instead of MAXDEPTH.
> > * If the OS hasn't defined MAXSYMLINKS, use a fallback value of 32.
> >
> > Brandon Williams (4):
> >   real_path: resolve symlinks by hand
> >   real_path: convert real_path_internal to strbuf_realpath
> >   real_path: create real_pathdup
> >   real_path: have callers use real_pathdup and strbuf_realpath
> >
> > Johannes Sixt (1):
> >   real_path: canonicalize directory separators in root parts
> >
> 
> How does this relate to the 5-patch real_path: series that has been
> on 'next' since last year?

The only difference should be in the first patch of the series which
handles the #define a bit differently due to the discussion that
happened last week.

Here is the interdiff between v5 and 'origin/bw/realpath-wo-chdir':

diff --git a/abspath.c b/abspath.c
index 1d56f5ed9..fce40fddc 100644
--- a/abspath.c
+++ b/abspath.c
@@ -62,7 +62,9 @@ static void get_root_part(struct strbuf *resolved, struct 
strbuf *remaining)
 }
 
 /* We allow "recursive" symbolic links. Only within reason, though. */
-#define MAXSYMLINKS 5
+#ifndef MAXSYMLINKS
+#define MAXSYMLINKS 32
+#endif
 
 /*
  * Return the real path (i.e., absolute path, with symlinks resolved
@@ -139,6 +141,8 @@ char *strbuf_realpath(struct strbuf *resolved, const char 
*path,
strbuf_reset(&symlink);
 
if (num_symlinks++ > MAXSYMLINKS) {
+   errno = ELOOP;
+
if (die_on_error)
    die("More than %d nested symlinks "
"on path '%s'", MAXSYMLINKS, path);

-- 
Brandon Williams


Re: [PATCH v5 0/5] road to reentrant real_path

2017-01-09 Thread Brandon Williams
On 01/09, Junio C Hamano wrote:
> Brandon Williams  writes:
> 
> >> How does this relate to the 5-patch real_path: series that has been
> >> on 'next' since last year?
> >
> > The only difference should be in the first patch of the series which
> > handles the #define a bit differently due to the discussion that
> > happened last week.
> >
> > Here is the interdiff between v5 and 'origin/bw/realpath-wo-chdir':
> 
> Then can you make that an incremental patch (or two) with its own
> log message instead?  It (or they) would look and smell like a
> bugfix patch that follows up a change that has already landed.  As
> you know, we won't eject and replace patches that are already in
> 'next' during a development cycle.
> 
> Thanks.

Yes I'll get right on that.

-- 
Brandon Williams


[PATCH 1/2] real_path: prevent redefinition of MAXSYMLINKS

2017-01-09 Thread Brandon Williams
The macro 'MAXSYMLINKS' is already defined on macOS and Linux in
.  If 'MAXSYMLINKS' has already been defined, use the value
defined by the OS otherwise default to a value of 32 which is more
inline with what is allowed by many systems.

Signed-off-by: Brandon Williams 
---
 abspath.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/abspath.c b/abspath.c
index 1d56f5ed9..0393213e5 100644
--- a/abspath.c
+++ b/abspath.c
@@ -62,7 +62,9 @@ static void get_root_part(struct strbuf *resolved, struct 
strbuf *remaining)
 }
 
 /* We allow "recursive" symbolic links. Only within reason, though. */
-#define MAXSYMLINKS 5
+#ifndef MAXSYMLINKS
+#define MAXSYMLINKS 32
+#endif
 
 /*
  * Return the real path (i.e., absolute path, with symlinks resolved
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH 2/2] real_path: set errno when max number of symlinks is exceeded

2017-01-09 Thread Brandon Williams
Set errno to ELOOP when the maximum number of symlinks is exceeded, as
would be done by other symlink-resolving functions.

Signed-off-by: Brandon Williams 
---
 abspath.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/abspath.c b/abspath.c
index 0393213e5..fce40fddc 100644
--- a/abspath.c
+++ b/abspath.c
@@ -141,6 +141,8 @@ char *strbuf_realpath(struct strbuf *resolved, const char 
*path,
strbuf_reset(&symlink);
 
if (num_symlinks++ > MAXSYMLINKS) {
+   errno = ELOOP;
+
if (die_on_error)
die("More than %d nested symlinks "
"on path '%s'", MAXSYMLINKS, path);
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH] index: improve constness for reading blob data

2017-01-10 Thread Brandon Williams
Improve constness of the index_state parameter to the
'read_blob_data_from_index' function.

Signed-off-by: Brandon Williams 
---
 cache.h  | 2 +-
 read-cache.c | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/cache.h b/cache.h
index a50a61a19..363953c1a 100644
--- a/cache.h
+++ b/cache.h
@@ -599,7 +599,7 @@ extern int chmod_index_entry(struct index_state *, struct 
cache_entry *ce, char
 extern int ce_same_name(const struct cache_entry *a, const struct cache_entry 
*b);
 extern void set_object_name_for_intent_to_add_entry(struct cache_entry *ce);
 extern int index_name_is_other(const struct index_state *, const char *, int);
-extern void *read_blob_data_from_index(struct index_state *, const char *, 
unsigned long *);
+extern void *read_blob_data_from_index(const struct index_state *, const char 
*, unsigned long *);
 
 /* do stat comparison even if CE_VALID is true */
 #define CE_MATCH_IGNORE_VALID  01
diff --git a/read-cache.c b/read-cache.c
index f92a912dc..6ee02 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2292,7 +2292,8 @@ int index_name_is_other(const struct index_state *istate, 
const char *name,
return 1;
 }
 
-void *read_blob_data_from_index(struct index_state *istate, const char *path, 
unsigned long *size)
+void *read_blob_data_from_index(const struct index_state *istate,
+   const char *path, unsigned long *size)
 {
int pos, len;
unsigned long sz;
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH 23/27] attr: remove maybe-real, maybe-macro from git_attr

2017-01-12 Thread Brandon Williams
Whether or not a git attribute is real or a macro isn't a property of
the attribute but rather it depends on the attribute stack (which
.gitattribute files were read).

This patch removes the 'maybe_real' and 'maybe_macro' fields in a
git_attr and instead adds the 'macro' field to a attr_check_item.  The
'macro' indicates (if non-NULL) that a particular attribute is a macro
for the given attribute stack.  It's populated, through a quick scan of
the attribute stack, with the match_attr that corresponds to the macro's
definition.  This way the attribute stack only needs to be scanned a
single time prior to attribute collection instead of each time a macro
needs to be expanded.

Signed-off-by: Brandon Williams 
---
 attr.c | 69 ++
 attr.h |  6 ++
 2 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/attr.c b/attr.c
index 38b0d4347..633a12cc3 100644
--- a/attr.c
+++ b/attr.c
@@ -30,20 +30,9 @@ static const char git_attr__unknown[] = "(builtin)unknown";
 
 struct git_attr {
int attr_nr; /* unique attribute number */
-   int maybe_macro;
-   int maybe_real;
char name[FLEX_ARRAY]; /* attribute name */
 };
 
-/*
- * NEEDSWORK: maybe-real, maybe-macro are not property of
- * an attribute, as it depends on what .gitattributes are
- * read.  Once we introduce per git_attr_check attr_stack
- * and check_all_attr, the optimization based on them will
- * become unnecessary and can go away.  So is this variable.
- */
-static int cannot_trust_maybe_real;
-
 const char *git_attr_name(const struct git_attr *attr)
 {
return attr->name;
@@ -182,6 +171,7 @@ static void all_attrs_init(struct attr_hashmap *map, struct 
attr_check *check)
 */
for (i = 0; i < check->all_attrs_nr; i++) {
check->all_attrs[i].value = ATTR__UNKNOWN;
+   check->all_attrs[i].macro = NULL;
}
 }
 
@@ -233,8 +223,6 @@ static struct git_attr *git_attr_internal(const char *name, 
int namelen)
if (!a) {
FLEX_ALLOC_MEM(a, name, name, namelen);
a->attr_nr = g_attr_hashmap.map.size;
-   a->maybe_real = 0;
-   a->maybe_macro = 0;
 
attr_hashmap_add(&g_attr_hashmap, a->name, namelen, a);
assert(a->attr_nr == (g_attr_hashmap.map.size - 1));
@@ -397,7 +385,6 @@ static struct match_attr *parse_attr_line(const char *line, 
const char *src,
  (is_macro ? 0 : namelen + 1));
if (is_macro) {
res->u.attr = git_attr_internal(name, namelen);
-   res->u.attr->maybe_macro = 1;
} else {
char *p = (char *)&(res->state[num_attr]);
memcpy(p, name, namelen);
@@ -418,10 +405,6 @@ static struct match_attr *parse_attr_line(const char 
*line, const char *src,
/* Second pass to fill the attr_states */
for (cp = states, i = 0; *cp; i++) {
cp = parse_attr(src, lineno, cp, &(res->state[i]));
-   if (!is_macro)
-   res->state[i].attr->maybe_real = 1;
-   if (res->state[i].attr->maybe_macro)
-   cannot_trust_maybe_real = 1;
}
 
strbuf_release(&pattern);
@@ -826,7 +809,7 @@ static int path_matches(const char *pathname, int pathlen,
 static int macroexpand_one(struct attr_check_item *all_attrs, int nr, int rem);
 
 static int fill_one(const char *what, struct attr_check_item *all_attrs,
-   struct match_attr *a, int rem)
+   const struct match_attr *a, int rem)
 {
int i;
 
@@ -867,24 +850,34 @@ static int fill(const char *path, int pathlen, int 
basename_offset,
 
 static int macroexpand_one(struct attr_check_item *all_attrs, int nr, int rem)
 {
-   struct attr_stack *stk;
-   int i;
+   const struct attr_check_item *item = &all_attrs[nr];
 
-   if (all_attrs[nr].value != ATTR__TRUE ||
-   !all_attrs[nr].attr->maybe_macro)
+   if (item->macro && item->value == ATTR__TRUE)
+   return fill_one("expand", all_attrs, item->macro, rem);
+   else
return rem;
+}
 
-   for (stk = attr_stack; stk; stk = stk->prev) {
-   for (i = stk->num_matches - 1; 0 <= i; i--) {
-   struct match_attr *ma = stk->attrs[i];
-   if (!ma->is_macro)
-   continue;
-   if (ma->u.attr->attr_nr == nr)
-   return fill_one("expand", all_attrs, ma, rem);
+/*
+ * Marks the attributes which are macros based on the attribute stack.
+ * This prevents having to search through the attribute stack each time
+ * a macro needs to be expa

[PATCH 18/27] attr: retire git_check_attrs() API

2017-01-12 Thread Brandon Williams
From: Junio C Hamano 

Since nobody uses the old API, make it file-scope static, and update
the documentation to describe the new API.

Signed-off-by: Junio C Hamano 
Signed-off-by: Stefan Beller 
Signed-off-by: Brandon Williams 
---
 Documentation/technical/api-gitattributes.txt | 86 +--
 attr.c|  3 +-
 attr.h|  1 -
 3 files changed, 58 insertions(+), 32 deletions(-)

diff --git a/Documentation/technical/api-gitattributes.txt 
b/Documentation/technical/api-gitattributes.txt
index 260266867..82f5130e7 100644
--- a/Documentation/technical/api-gitattributes.txt
+++ b/Documentation/technical/api-gitattributes.txt
@@ -16,10 +16,15 @@ Data Structure
of no interest to the calling programs.  The name of the
attribute can be retrieved by calling `git_attr_name()`.
 
-`struct git_attr_check`::
+`struct attr_check_item`::
 
-   This structure represents a set of attributes to check in a call
-   to `git_check_attr()` function, and receives the results.
+   This structure represents one attribute and its value.
+
+`struct attr_check`::
+
+   This structure represents a collection of `attr_check_item`.
+   It is passed to `git_check_attr()` function, specifying the
+   attributes to check, and receives their values.
 
 
 Attribute Values
@@ -27,7 +32,7 @@ Attribute Values
 
 An attribute for a path can be in one of four states: Set, Unset,
 Unspecified or set to a string, and `.value` member of `struct
-git_attr_check` records it.  There are three macros to check these:
+attr_check_item` records it.  There are three macros to check these:
 
 `ATTR_TRUE()`::
 
@@ -48,49 +53,51 @@ value of the attribute for the path.
 Querying Specific Attributes
 
 
-* Prepare an array of `struct git_attr_check` to define the list of
-  attributes you would want to check.  To populate this array, you would
-  need to define necessary attributes by calling `git_attr()` function.
+* Prepare `struct attr_check` using attr_check_initl()
+  function, enumerating the names of attributes whose values you are
+  interested in, terminated with a NULL pointer.  Alternatively, an
+  empty `struct attr_check` can be prepared by calling
+  `attr_check_alloc()` function and then attributes you want to
+  ask about can be added to it with `attr_check_append()`
+  function.
 
 * Call `git_check_attr()` to check the attributes for the path.
 
-* Inspect `git_attr_check` structure to see how each of the attribute in
-  the array is defined for the path.
+* Inspect `attr_check` structure to see how each of the
+  attribute in the array is defined for the path.
 
 
 Example
 ---
 
-To see how attributes "crlf" and "indent" are set for different paths.
+To see how attributes "crlf" and "ident" are set for different paths.
 
-. Prepare an array of `struct git_attr_check` with two elements (because
-  we are checking two attributes).  Initialize their `attr` member with
-  pointers to `struct git_attr` obtained by calling `git_attr()`:
+. Prepare a `struct attr_check` with two elements (because
+  we are checking two attributes):
 
 
-static struct git_attr_check check[2];
+static struct attr_check *check;
 static void setup_check(void)
 {
-   if (check[0].attr)
+   if (check)
return; /* already done */
-   check[0].attr = git_attr("crlf");
-   check[1].attr = git_attr("ident");
+   check = attr_check_initl("crlf", "ident", NULL);
 }
 
 
-. Call `git_check_attr()` with the prepared array of `struct git_attr_check`:
+. Call `git_check_attr()` with the prepared `struct attr_check`:
 
 
const char *path;
 
setup_check();
-   git_check_attr(path, ARRAY_SIZE(check), check);
+   git_check_attr(path, check);
 
 
-. Act on `.value` member of the result, left in `check[]`:
+. Act on `.value` member of the result, left in `check->check[]`:
 
 
-   const char *value = check[0].value;
+   const char *value = check->check[0].value;
 
if (ATTR_TRUE(value)) {
The attribute is Set, by listing only the name of the
@@ -109,20 +116,39 @@ static void setup_check(void)
}
 
 
+To see how attributes in argv[] are set for different paths, only
+the first step in the above would be different.
+
+
+static struct attr_check *check;
+static void setup_check(const char **argv)
+{
+   check = attr_check_alloc();
+   while (*argv) {
+   struct git_attr *attr = git_attr(*argv);
+   attr_check_append(check, attr);
+   argv++;
+   }
+}
+
+
 
 Querying All Attributes
 ---
 
 To get the values of all attributes associated with a file:
 
-* Call `git_all_attrs()`, whi

[PATCH 19/27] attr: pass struct attr_check to collect_some_attrs

2017-01-12 Thread Brandon Williams
The old callchain used to take an array of attr_check_item items.
Instead pass the 'attr_check' container object to 'collect_some_attrs()'
and access the fields in the data structure directly.

Signed-off-by: Brandon Williams 
---
 attr.c | 33 +
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/attr.c b/attr.c
index da727e3fd..e58fa340c 100644
--- a/attr.c
+++ b/attr.c
@@ -777,9 +777,7 @@ static int macroexpand_one(int nr, int rem)
  * check_all_attr. If num is non-zero, only attributes in check[] are
  * collected. Otherwise all attributes are collected.
  */
-static void collect_some_attrs(const char *path, int num,
-  struct attr_check_item *check)
-
+static void collect_some_attrs(const char *path, struct attr_check *check)
 {
struct attr_stack *stk;
int i, pathlen, rem, dirlen;
@@ -802,17 +800,18 @@ static void collect_some_attrs(const char *path, int num,
prepare_attr_stack(path, dirlen);
for (i = 0; i < attr_nr; i++)
check_all_attr[i].value = ATTR__UNKNOWN;
-   if (num && !cannot_trust_maybe_real) {
+   if (check->check_nr && !cannot_trust_maybe_real) {
rem = 0;
-   for (i = 0; i < num; i++) {
-   if (!check[i].attr->maybe_real) {
+   for (i = 0; i < check->check_nr; i++) {
+   const struct git_attr *a = check->check[i].attr;
+   if (!a->maybe_real) {
struct attr_check_item *c;
-   c = check_all_attr + check[i].attr->attr_nr;
+   c = check_all_attr + a->attr_nr;
c->value = ATTR__UNSET;
rem++;
}
}
-   if (rem == num)
+   if (rem == check->check_nr)
return;
}
 
@@ -821,18 +820,17 @@ static void collect_some_attrs(const char *path, int num,
rem = fill(path, pathlen, basename_offset, stk, rem);
 }
 
-static int git_check_attrs(const char *path, int num,
-  struct attr_check_item *check)
+int git_check_attr(const char *path, struct attr_check *check)
 {
int i;
 
-   collect_some_attrs(path, num, check);
+   collect_some_attrs(path, check);
 
-   for (i = 0; i < num; i++) {
-   const char *value = 
check_all_attr[check[i].attr->attr_nr].value;
+   for (i = 0; i < check->check_nr; i++) {
+   const char *value = 
check_all_attr[check->check[i].attr->attr_nr].value;
if (value == ATTR__UNKNOWN)
value = ATTR__UNSET;
-   check[i].value = value;
+   check->check[i].value = value;
}
 
return 0;
@@ -843,7 +841,7 @@ void git_all_attrs(const char *path, struct attr_check 
*check)
int i;
 
attr_check_reset(check);
-   collect_some_attrs(path, check->check_nr, check->check);
+   collect_some_attrs(path, check);
 
for (i = 0; i < attr_nr; i++) {
const char *name = check_all_attr[i].attr->name;
@@ -856,11 +854,6 @@ void git_all_attrs(const char *path, struct attr_check 
*check)
}
 }
 
-int git_check_attr(const char *path, struct attr_check *check)
-{
-   return git_check_attrs(path, check->check_nr, check->check);
-}
-
 struct attr_check *attr_check_alloc(void)
 {
return xcalloc(1, sizeof(struct attr_check));
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH 17/27] attr: convert git_check_attrs() callers to use the new API

2017-01-12 Thread Brandon Williams
From: Junio C Hamano 

The remaining callers are all simple "I have N attributes I am
interested in.  I'll ask about them with various paths one by one".

After this step, no caller to git_check_attrs() remains.  After
removing it, we can extend "struct attr_check" struct with data
that can be used in optimizing the query for the specific N
attributes it contains.

Signed-off-by: Junio C Hamano 
Signed-off-by: Stefan Beller 
Signed-off-by: Brandon Williams 
---
 archive.c  | 24 ++--
 builtin/pack-objects.c | 19 +--
 convert.c  | 17 ++---
 ll-merge.c | 33 ++---
 userdiff.c | 19 ---
 ws.c   | 19 ++-
 6 files changed, 45 insertions(+), 86 deletions(-)

diff --git a/archive.c b/archive.c
index b76bd4691..3591f7d55 100644
--- a/archive.c
+++ b/archive.c
@@ -87,19 +87,6 @@ void *sha1_file_to_archive(const struct archiver_args *args,
return buffer;
 }
 
-static void setup_archive_check(struct attr_check_item *check)
-{
-   static struct git_attr *attr_export_ignore;
-   static struct git_attr *attr_export_subst;
-
-   if (!attr_export_ignore) {
-   attr_export_ignore = git_attr("export-ignore");
-   attr_export_subst = git_attr("export-subst");
-   }
-   check[0].attr = attr_export_ignore;
-   check[1].attr = attr_export_subst;
-}
-
 struct directory {
struct directory *up;
struct object_id oid;
@@ -120,10 +107,10 @@ static int write_archive_entry(const unsigned char *sha1, 
const char *base,
void *context)
 {
static struct strbuf path = STRBUF_INIT;
+   static struct attr_check *check;
struct archiver_context *c = context;
struct archiver_args *args = c->args;
write_archive_entry_fn_t write_entry = c->write_entry;
-   struct attr_check_item check[2];
const char *path_without_prefix;
int err;
 
@@ -137,11 +124,12 @@ static int write_archive_entry(const unsigned char *sha1, 
const char *base,
strbuf_addch(&path, '/');
path_without_prefix = path.buf + args->baselen;
 
-   setup_archive_check(check);
-   if (!git_check_attrs(path_without_prefix, ARRAY_SIZE(check), check)) {
-   if (ATTR_TRUE(check[0].value))
+   if (!check)
+   check = attr_check_initl("export-ignore", "export-subst", NULL);
+   if (!git_check_attr(path_without_prefix, check)) {
+   if (ATTR_TRUE(check->check[0].value))
return 0;
-   args->convert = ATTR_TRUE(check[1].value);
+   args->convert = ATTR_TRUE(check->check[1].value);
}
 
if (S_ISDIR(mode) || S_ISGITLINK(mode)) {
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 8b8fbd814..ff8b3c12d 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -894,24 +894,15 @@ static void write_pack_file(void)
written, nr_result);
 }
 
-static void setup_delta_attr_check(struct attr_check_item *check)
-{
-   static struct git_attr *attr_delta;
-
-   if (!attr_delta)
-   attr_delta = git_attr("delta");
-
-   check[0].attr = attr_delta;
-}
-
 static int no_try_delta(const char *path)
 {
-   struct attr_check_item check[1];
+   static struct attr_check *check;
 
-   setup_delta_attr_check(check);
-   if (git_check_attrs(path, ARRAY_SIZE(check), check))
+   if (!check)
+   check = attr_check_initl("delta", NULL);
+   if (git_check_attr(path, check))
return 0;
-   if (ATTR_FALSE(check->value))
+   if (ATTR_FALSE(check->check[0].value))
return 1;
return 0;
 }
diff --git a/convert.c b/convert.c
index 1b9829279..affd8ce9b 100644
--- a/convert.c
+++ b/convert.c
@@ -1085,24 +1085,19 @@ struct conv_attrs {
int ident;
 };
 
-static const char *conv_attr_name[] = {
-   "crlf", "ident", "filter", "eol", "text",
-};
-#define NUM_CONV_ATTRS ARRAY_SIZE(conv_attr_name)
-
 static void convert_attrs(struct conv_attrs *ca, const char *path)
 {
-   int i;
-   static struct attr_check_item ccheck[NUM_CONV_ATTRS];
+   static struct attr_check *check;
 
-   if (!ccheck[0].attr) {
-   for (i = 0; i < NUM_CONV_ATTRS; i++)
-   ccheck[i].attr = git_attr(conv_attr_name[i]);
+   if (!check) {
+   check = attr_check_initl("crlf", "ident", "filter",
+"eol", "text", NULL);
user_convert_tail = &user_convert;
git_config(read_convert_config, NULL);
}
 

[PATCH 15/27] attr: (re)introduce git_check_attr() and struct attr_check

2017-01-12 Thread Brandon Williams
From: Junio C Hamano 

A common pattern to check N attributes for many paths is to

 (1) prepare an array A of N attr_check_item items;
 (2) call git_attr() to intern the N attribute names and fill A;
 (3) repeatedly call git_check_attrs() for path with N and A;

A look-up for these N attributes for a single path P scans the
entire attr_stack, starting from the .git/info/attributes file and
then .gitattributes file in the directory the path P is in, going
upwards to find .gitattributes file found in parent directories.

An earlier commit 06a604e6 (attr: avoid heavy work when we know the
specified attr is not defined, 2014-12-28) tried to optimize out
this scanning for one trivial special case: when the attribute being
sought is known not to exist, we do not have to scan for it.  While
this may be a cheap and effective heuristic, it would not work well
when N is (much) more than 1.

What we would want is a more customized way to skip irrelevant
entries in the attribute stack, and the definition of irrelevance
is tied to the set of attributes passed to git_check_attrs() call,
i.e. the set of attributes being sought.  The data necessary for
this optimization needs to live alongside the set of attributes, but
a simple array of git_attr_check_elem simply does not have any place
for that.

Introduce "struct attr_check" that contains N, the number of
attributes being sought, and A, the array that holds N
attr_check_item items, and a function git_check_attr() that
takes a path P and this structure as its parameters.  This structure
can later be extended to hold extra data necessary for optimization.

Also, to make it easier to write the first two steps in common
cases, introduce git_attr_check_initl() helper function, which takes
a NULL-terminated list of attribute names and initialize this
structure.

Signed-off-by: Junio C Hamano 
Signed-off-by: Stefan Beller 
Signed-off-by: Brandon Williams 
---
 attr.c | 74 ++
 attr.h | 17 +++
 2 files changed, 91 insertions(+)

diff --git a/attr.c b/attr.c
index 2f180d609..be9e398e9 100644
--- a/attr.c
+++ b/attr.c
@@ -865,6 +865,80 @@ int git_all_attrs(const char *path, int *num, struct 
attr_check_item **check)
return 0;
 }
 
+struct attr_check *attr_check_alloc(void)
+{
+   return xcalloc(1, sizeof(struct attr_check));
+}
+
+int git_check_attr(const char *path, struct attr_check *check)
+{
+   return git_check_attrs(path, check->check_nr, check->check);
+}
+
+struct attr_check *attr_check_initl(const char *one, ...)
+{
+   struct attr_check *check;
+   int cnt;
+   va_list params;
+   const char *param;
+
+   va_start(params, one);
+   for (cnt = 1; (param = va_arg(params, const char *)) != NULL; cnt++)
+   ;
+   va_end(params);
+
+   check = attr_check_alloc();
+   check->check_nr = cnt;
+   check->check_alloc = cnt;
+   check->check = xcalloc(cnt, sizeof(struct attr_check_item));
+
+   check->check[0].attr = git_attr(one);
+   va_start(params, one);
+   for (cnt = 1; cnt < check->check_nr; cnt++) {
+   struct git_attr *attr;
+   param = va_arg(params, const char *);
+   if (!param)
+   die("BUG: counted %d != ended at %d",
+   check->check_nr, cnt);
+   attr = git_attr(param);
+   if (!attr)
+   die("BUG: %s: not a valid attribute name", param);
+   check->check[cnt].attr = attr;
+   }
+   va_end(params);
+   return check;
+}
+
+struct attr_check_item *attr_check_append(struct attr_check *check,
+ const struct git_attr *attr)
+{
+   struct attr_check_item *item;
+
+   ALLOC_GROW(check->check, check->check_nr + 1, check->check_alloc);
+   item = &check->check[check->check_nr++];
+   item->attr = attr;
+   return item;
+}
+
+void attr_check_reset(struct attr_check *check)
+{
+   check->check_nr = 0;
+}
+
+void attr_check_clear(struct attr_check *check)
+{
+   free(check->check);
+   check->check = NULL;
+   check->check_alloc = 0;
+   check->check_nr = 0;
+}
+
+void attr_check_free(struct attr_check *check)
+{
+   attr_check_clear(check);
+   free(check);
+}
+
 void git_attr_set_direction(enum git_attr_direction new, struct index_state 
*istate)
 {
enum git_attr_direction old = direction;
diff --git a/attr.h b/attr.h
index efc7bb3b3..459347f4b 100644
--- a/attr.h
+++ b/attr.h
@@ -29,6 +29,22 @@ struct attr_check_item {
const char *value;
 };
 
+struct attr_check {
+   int check_nr;
+   int check_alloc;
+   struct attr_check_item *check;
+};
+
+extern struct attr_check *attr_check_alloc(void);
+extern struct attr_check *attr_check_initl

[PATCH 20/27] attr: change validity check for attribute names to use positive logic

2017-01-12 Thread Brandon Williams
From: Junio C Hamano 

Convert 'invalid_attr_name()' to 'attr_name_valid()' and use positive
logic for the return value.  In addition create a helper function that
prints out an error message when an invalid attribute name is used.

We could later update the message to exactly spell out what the
rules for a good attribute name are, etc.

Signed-off-by: Junio C Hamano 
Signed-off-by: Stefan Beller 
Signed-off-by: Brandon Williams 
---
 attr.c | 34 --
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/attr.c b/attr.c
index e58fa340c..5399e1cb3 100644
--- a/attr.c
+++ b/attr.c
@@ -74,23 +74,33 @@ static unsigned hash_name(const char *name, int namelen)
return val;
 }
 
-static int invalid_attr_name(const char *name, int namelen)
+static int attr_name_valid(const char *name, size_t namelen)
 {
/*
 * Attribute name cannot begin with '-' and must consist of
 * characters from [-A-Za-z0-9_.].
 */
if (namelen <= 0 || *name == '-')
-   return -1;
+   return 0;
while (namelen--) {
char ch = *name++;
if (! (ch == '-' || ch == '.' || ch == '_' ||
   ('0' <= ch && ch <= '9') ||
   ('a' <= ch && ch <= 'z') ||
   ('A' <= ch && ch <= 'Z')) )
-   return -1;
+   return 0;
}
-   return 0;
+   return 1;
+}
+
+static void report_invalid_attr(const char *name, size_t len,
+   const char *src, int lineno)
+{
+   struct strbuf err = STRBUF_INIT;
+   strbuf_addf(&err, _("%.*s is not a valid attribute name"),
+   (int) len, name);
+   fprintf(stderr, "%s: %s:%d\n", err.buf, src, lineno);
+   strbuf_release(&err);
 }
 
 static struct git_attr *git_attr_internal(const char *name, int len)
@@ -105,7 +115,7 @@ static struct git_attr *git_attr_internal(const char *name, 
int len)
return a;
}
 
-   if (invalid_attr_name(name, len))
+   if (!attr_name_valid(name, len))
return NULL;
 
FLEX_ALLOC_MEM(a, name, name, len);
@@ -196,17 +206,15 @@ static const char *parse_attr(const char *src, int 
lineno, const char *cp,
cp++;
len--;
}
-   if (invalid_attr_name(cp, len)) {
-   fprintf(stderr,
-   "%.*s is not a valid attribute name: %s:%d\n",
-   len, cp, src, lineno);
+   if (!attr_name_valid(cp, len)) {
+   report_invalid_attr(cp, len, src, lineno);
return NULL;
}
} else {
/*
 * As this function is always called twice, once with
 * e == NULL in the first pass and then e != NULL in
-* the second pass, no need for invalid_attr_name()
+* the second pass, no need for attr_name_valid()
 * check here.
 */
if (*cp == '-' || *cp == '!') {
@@ -258,10 +266,8 @@ static struct match_attr *parse_attr_line(const char 
*line, const char *src,
name += strlen(ATTRIBUTE_MACRO_PREFIX);
name += strspn(name, blank);
namelen = strcspn(name, blank);
-   if (invalid_attr_name(name, namelen)) {
-   fprintf(stderr,
-   "%.*s is not a valid attribute name: %s:%d\n",
-   namelen, name, src, lineno);
+   if (!attr_name_valid(name, namelen)) {
+   report_invalid_attr(name, namelen, src, lineno);
goto fail_return;
}
}
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH 16/27] attr: convert git_all_attrs() to use "struct attr_check"

2017-01-12 Thread Brandon Williams
From: Junio C Hamano 

This updates the other two ways the attribute check is done via an
array of "struct attr_check_item" elements.  These two niches
appear only in "git check-attr".

 * The caller does not know offhand what attributes it wants to ask
   about and cannot use attr_check_initl() to prepare the
   attr_check structure.

 * The caller may not know what attributes it wants to ask at all,
   and instead wants to learn everything that the given path has.

Such a caller can call attr_check_alloc() to allocate an empty
attr_check, and then call attr_check_append() to add attribute names
one by one.

Signed-off-by: Junio C Hamano 
Signed-off-by: Stefan Beller 
Signed-off-by: Brandon Williams 
---
 attr.c   | 38 -
 attr.h   |  9 +++-
 builtin/check-attr.c | 60 ++--
 3 files changed, 47 insertions(+), 60 deletions(-)

diff --git a/attr.c b/attr.c
index be9e398e9..d2eaa0410 100644
--- a/attr.c
+++ b/attr.c
@@ -837,42 +837,32 @@ int git_check_attrs(const char *path, int num, struct 
attr_check_item *check)
return 0;
 }
 
-int git_all_attrs(const char *path, int *num, struct attr_check_item **check)
+void git_all_attrs(const char *path, struct attr_check *check)
 {
-   int i, count, j;
+   int i;
 
-   collect_some_attrs(path, 0, NULL);
+   attr_check_reset(check);
+   collect_some_attrs(path, check->check_nr, check->check);
 
-   /* Count the number of attributes that are set. */
-   count = 0;
-   for (i = 0; i < attr_nr; i++) {
-   const char *value = check_all_attr[i].value;
-   if (value != ATTR__UNSET && value != ATTR__UNKNOWN)
-   ++count;
-   }
-   *num = count;
-   ALLOC_ARRAY(*check, count);
-   j = 0;
for (i = 0; i < attr_nr; i++) {
+   const char *name = check_all_attr[i].attr->name;
const char *value = check_all_attr[i].value;
-   if (value != ATTR__UNSET && value != ATTR__UNKNOWN) {
-   (*check)[j].attr = check_all_attr[i].attr;
-   (*check)[j].value = value;
-   ++j;
-   }
+   struct attr_check_item *item;
+   if (value == ATTR__UNSET || value == ATTR__UNKNOWN)
+   continue;
+   item = attr_check_append(check, git_attr(name));
+   item->value = value;
}
-
-   return 0;
 }
 
-struct attr_check *attr_check_alloc(void)
+int git_check_attr(const char *path, struct attr_check *check)
 {
-   return xcalloc(1, sizeof(struct attr_check));
+   return git_check_attrs(path, check->check_nr, check->check);
 }
 
-int git_check_attr(const char *path, struct attr_check *check)
+struct attr_check *attr_check_alloc(void)
 {
-   return git_check_attrs(path, check->check_nr, check->check);
+   return xcalloc(1, sizeof(struct attr_check));
 }
 
 struct attr_check *attr_check_initl(const char *one, ...)
diff --git a/attr.h b/attr.h
index 459347f4b..971bb9a38 100644
--- a/attr.h
+++ b/attr.h
@@ -56,13 +56,10 @@ int git_check_attrs(const char *path, int, struct 
attr_check_item *);
 extern int git_check_attr(const char *path, struct attr_check *check);
 
 /*
- * Retrieve all attributes that apply to the specified path.  *num
- * will be set to the number of attributes on the path; **check will
- * be set to point at a newly-allocated array of git_attr_check
- * objects describing the attributes and their values.  *check must be
- * free()ed by the caller.
+ * Retrieve all attributes that apply to the specified path.
+ * check holds the attributes and their values.
  */
-int git_all_attrs(const char *path, int *num, struct attr_check_item **check);
+void git_all_attrs(const char *path, struct attr_check *check);
 
 enum git_attr_direction {
GIT_ATTR_CHECKIN,
diff --git a/builtin/check-attr.c b/builtin/check-attr.c
index 889264a5b..3d4704be5 100644
--- a/builtin/check-attr.c
+++ b/builtin/check-attr.c
@@ -24,12 +24,13 @@ static const struct option check_attr_options[] = {
OPT_END()
 };
 
-static void output_attr(int cnt, struct attr_check_item *check,
-   const char *file)
+static void output_attr(struct attr_check *check, const char *file)
 {
int j;
+   int cnt = check->check_nr;
+
for (j = 0; j < cnt; j++) {
-   const char *value = check[j].value;
+   const char *value = check->check[j].value;
 
if (ATTR_TRUE(value))
value = "set";
@@ -42,36 +43,38 @@ static void output_attr(int cnt, struct attr_check_item 
*check,
printf("%s%c" /* path */
   "%s%c" /* attrname */
   &

[PATCH 24/27] attr: tighten const correctness with git_attr and match_attr

2017-01-12 Thread Brandon Williams
Signed-off-by: Brandon Williams 
---
 attr.c   | 14 +++---
 attr.h   |  2 +-
 builtin/check-attr.c |  3 ++-
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/attr.c b/attr.c
index 633a12cc3..90f576044 100644
--- a/attr.c
+++ b/attr.c
@@ -209,7 +209,7 @@ static void report_invalid_attr(const char *name, size_t 
len,
  * dictionary.  If no entry is found, create a new attribute and store it in
  * the dictionary.
  */
-static struct git_attr *git_attr_internal(const char *name, int namelen)
+static const struct git_attr *git_attr_internal(const char *name, int namelen)
 {
struct git_attr *a;
 
@@ -233,14 +233,14 @@ static struct git_attr *git_attr_internal(const char 
*name, int namelen)
return a;
 }
 
-struct git_attr *git_attr(const char *name)
+const struct git_attr *git_attr(const char *name)
 {
return git_attr_internal(name, strlen(name));
 }
 
 /* What does a matched pattern decide? */
 struct attr_state {
-   struct git_attr *attr;
+   const struct git_attr *attr;
const char *setto;
 };
 
@@ -267,7 +267,7 @@ struct pattern {
 struct match_attr {
union {
struct pattern pat;
-   struct git_attr *attr;
+   const struct git_attr *attr;
} u;
char is_macro;
unsigned num_attr;
@@ -814,7 +814,7 @@ static int fill_one(const char *what, struct 
attr_check_item *all_attrs,
int i;
 
for (i = a->num_attr - 1; rem > 0 && i >= 0; i--) {
-   struct git_attr *attr = a->state[i].attr;
+   const struct git_attr *attr = a->state[i].attr;
const char **n = &(all_attrs[attr->attr_nr].value);
const char *v = a->state[i].setto;
 
@@ -838,7 +838,7 @@ static int fill(const char *path, int pathlen, int 
basename_offset,
const char *base = stk->origin ? stk->origin : "";
 
for (i = stk->num_matches - 1; 0 < rem && 0 <= i; i--) {
-   struct match_attr *a = stk->attrs[i];
+   const struct match_attr *a = stk->attrs[i];
if (a->is_macro)
continue;
if (path_matches(path, pathlen, basename_offset,
@@ -988,7 +988,7 @@ struct attr_check *attr_check_initl(const char *one, ...)
check->check[0].attr = git_attr(one);
va_start(params, one);
for (cnt = 1; cnt < check->check_nr; cnt++) {
-   struct git_attr *attr;
+   const struct git_attr *attr;
param = va_arg(params, const char *);
if (!param)
die("BUG: counted %d != ended at %d",
diff --git a/attr.h b/attr.h
index f40524875..9b4dc07d8 100644
--- a/attr.h
+++ b/attr.h
@@ -8,7 +8,7 @@ struct git_attr;
  * Given a string, return the gitattribute object that
  * corresponds to it.
  */
-struct git_attr *git_attr(const char *);
+const struct git_attr *git_attr(const char *);
 
 /* Internal use */
 extern const char git_attr__true[];
diff --git a/builtin/check-attr.c b/builtin/check-attr.c
index 3d4704be5..cc6caf7ac 100644
--- a/builtin/check-attr.c
+++ b/builtin/check-attr.c
@@ -166,7 +166,8 @@ int cmd_check_attr(int argc, const char **argv, const char 
*prefix)
check = attr_check_alloc();
if (!all_attrs) {
for (i = 0; i < cnt; i++) {
-   struct git_attr *a = git_attr(argv[i]);
+   const struct git_attr *a = git_attr(argv[i]);
+
if (!a)
return error("%s: not a valid attribute name",
 argv[i]);
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH 27/27] attr: reformat git_attr_set_direction() function

2017-01-12 Thread Brandon Williams
Move the 'git_attr_set_direction()' up to be closer to the variables
that it modifies as well as a small formatting by renaming the variable
'new' to 'new_direction' so that it is more descriptive.

Update the comment about how 'direction' is used to read the state of
the world.  It should be noted that callers of
'git_attr_set_direction()' should ensure that other threads are not
making calls into the attribute system until after the call to
'git_attr_set_direction()' completes.  This function essentially acts as
reset button for the attribute system and should be handled with care.

Signed-off-by: Brandon Williams 
---
 attr.c | 49 -
 attr.h |  3 ++-
 2 files changed, 22 insertions(+), 30 deletions(-)

diff --git a/attr.c b/attr.c
index cbb07d25d..f5cc68b67 100644
--- a/attr.c
+++ b/attr.c
@@ -521,26 +521,30 @@ static struct attr_stack *read_attr_from_array(const char 
**list)
 }
 
 /*
- * NEEDSWORK: these two are tricky.  The callers assume there is a
- * single, system-wide global state "where we read attributes from?"
- * and when the state is flipped by calling git_attr_set_direction(),
- * attr_stack is discarded so that subsequent attr_check will lazily
- * read from the right place.  And they do not know or care who called
- * by them uses the attribute subsystem, hence have no knowledge of
- * existing git_attr_check instances or future ones that will be
- * created).
- *
- * Probably we need a thread_local that holds these two variables,
- * and a list of git_attr_check instances (which need to be maintained
- * by hooking into git_attr_check_alloc(), git_attr_check_initl(), and
- * git_attr_check_clear().  Then git_attr_set_direction() updates the
- * fields in that thread_local for these two variables, iterate over
- * all the active git_attr_check instances and discard the attr_stack
- * they hold.  Yuck, but it sounds doable.
+ * Callers into the attribute system assume there is a single, system-wide
+ * global state where attributes are read from and when the state is flipped by
+ * calling git_attr_set_direction(), the stack frames that have been
+ * constructed need to be discarded so so that subsequent calls into the
+ * attribute system will lazily read from the right place.  Since changing
+ * direction causes a global paradigm shift, it should not ever be called while
+ * another thread could potentially be calling into the attribute system.
  */
 static enum git_attr_direction direction;
 static struct index_state *use_index;
 
+void git_attr_set_direction(enum git_attr_direction new_direction,
+   struct index_state *istate)
+{
+   if (is_bare_repository() && new_direction != GIT_ATTR_INDEX)
+   die("BUG: non-INDEX attr direction in a bare repo");
+
+   if (new_direction != direction)
+   drop_attr_stack();
+
+   direction = new_direction;
+   use_index = istate;
+}
+
 static struct attr_stack *read_attr_from_file(const char *path, int macro_ok)
 {
FILE *fp = fopen(path, "r");
@@ -1130,19 +1134,6 @@ void attr_check_free(struct attr_check *check)
free(check);
 }
 
-void git_attr_set_direction(enum git_attr_direction new, struct index_state 
*istate)
-{
-   enum git_attr_direction old = direction;
-
-   if (is_bare_repository() && new != GIT_ATTR_INDEX)
-   die("BUG: non-INDEX attr direction in a bare repo");
-
-   direction = new;
-   if (new != old)
-   drop_attr_stack();
-   use_index = istate;
-}
-
 void attr_start(void)
 {
pthread_mutex_init(&g_attr_hashmap.mutex, NULL);
diff --git a/attr.h b/attr.h
index 9b4dc07d8..b8be37c91 100644
--- a/attr.h
+++ b/attr.h
@@ -73,7 +73,8 @@ enum git_attr_direction {
GIT_ATTR_CHECKOUT,
GIT_ATTR_INDEX
 };
-void git_attr_set_direction(enum git_attr_direction, struct index_state *);
+void git_attr_set_direction(enum git_attr_direction new_direction,
+   struct index_state *istate);
 
 extern void attr_start(void);
 
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH 25/27] attr: store attribute stacks in hashmap

2017-01-12 Thread Brandon Williams
The last big hurdle towards a thread-safe API for the attribute system
is the reliance on a global attribute stack that is modified during each
call into the attribute system.

This patch removes this global stack and instead a stack is retrieved or
constructed locally.  Since each of these stacks is only used as a
read-only structure once constructed, they can be stored in a hashmap
and shared between threads.  The key into the hashmap of attribute
stacks is, in the general case, the directory that corresponds to the
attribute stack frame.  For the core stack frames (builtin, system,
home, and info) a key of ".git/-attr" is used to prevent potential
collisions since a directory or file named ".git" is disallowed.

One caveat with storing and sharing the stack frames like this is that
the info stack needs to be treated separately from the rest of the
attribute stack.  This is because each stack frame holds a pointer to
the stack that comes before it and if it was placed on top of the rest
of the attribute stack then this pointer would be different for each
attribute stack and wouldn't be able to be shared between threads.  In
order to allow for sharing the info stack frame it needs to be its own
isolated frame and can simply be processed first to have the same affect
of being at the top of the stack.

Signed-off-by: Brandon Williams 
---
 attr.c | 375 +
 1 file changed, 235 insertions(+), 140 deletions(-)

diff --git a/attr.c b/attr.c
index 90f576044..78562592b 100644
--- a/attr.c
+++ b/attr.c
@@ -434,17 +434,19 @@ static struct match_attr *parse_attr_line(const char 
*line, const char *src,
  * .gitignore file and info/excludes file as a fallback.
  */
 
-/* NEEDSWORK: This will become per git_attr_check */
-static struct attr_stack {
-   struct attr_stack *prev;
+struct attr_stack {
+   const struct attr_stack *prev;
char *origin;
size_t originlen;
unsigned num_matches;
unsigned alloc;
struct match_attr **attrs;
-} *attr_stack;
+};
+
+/* Dictionary of stack frames; access should be surrounded by mutex */
+static struct attr_hashmap g_stack_hashmap;
 
-static void free_attr_elem(struct attr_stack *e)
+static void attr_stack_free(struct attr_stack *e)
 {
int i;
free(e->origin);
@@ -467,6 +469,25 @@ static void free_attr_elem(struct attr_stack *e)
free(e);
 }
 
+static void drop_attr_stack(void)
+{
+   struct hashmap_iter iter;
+   struct attr_hash_entry *e;
+
+   hashmap_lock(&g_stack_hashmap);
+
+   hashmap_iter_init(&g_stack_hashmap.map, &iter);
+   while ((e = hashmap_iter_next(&iter))) {
+   struct attr_stack *stack = e->value;
+   attr_stack_free(stack);
+   free(e);
+   }
+
+   hashmap_free(&g_stack_hashmap.map, 0);
+
+   hashmap_unlock(&g_stack_hashmap);
+}
+
 static const char *builtin_attr[] = {
"[attr]binary -diff -merge -text",
NULL,
@@ -621,15 +642,6 @@ static void debug_set(const char *what, const char *match, 
struct git_attr *attr
 #define debug_set(a,b,c,d) do { ; } while (0)
 #endif /* DEBUG_ATTR */
 
-static void drop_attr_stack(void)
-{
-   while (attr_stack) {
-   struct attr_stack *elem = attr_stack;
-   attr_stack = elem->prev;
-   free_attr_elem(elem);
-   }
-}
-
 static const char *git_etc_gitattributes(void)
 {
static const char *system_wide;
@@ -638,6 +650,14 @@ static const char *git_etc_gitattributes(void)
return system_wide;
 }
 
+static const char *get_home_gitattributes(void)
+{
+   if (!git_attributes_file)
+   git_attributes_file = xdg_config_home("attributes");
+
+   return git_attributes_file;
+}
+
 static int git_attr_system(void)
 {
return !git_env_bool("GIT_ATTR_NOSYSTEM", 0);
@@ -645,142 +665,208 @@ static int git_attr_system(void)
 
 static GIT_PATH_FUNC(git_path_info_attributes, INFOATTRIBUTES_FILE)
 
-static void push_stack(struct attr_stack **attr_stack_p,
-  struct attr_stack *elem, char *origin, size_t originlen)
+/*
+ * This funciton should only be called from 'get_attr_stack()' or
+ * 'get_info_stack()', which already needs to acquire the lock to the stack
+ * hashmap, so there is no need to also acquire the lock in this function.
+ */
+static void push_stack(const struct attr_stack **attr_stack_p,
+  struct attr_stack *elem,
+  const char *origin, size_t originlen)
 {
if (elem) {
-   elem->origin = origin;
-   if (origin)
-   elem->originlen = originlen;
+   elem->origin = xmemdupz(origin, originlen);
+   elem->originlen = originlen;
elem->prev = *attr_stack_p;
*

[PATCH 21/27] attr: use hashmap for attribute dictionary

2017-01-12 Thread Brandon Williams
The current implementation of the attribute dictionary uses a custom
hashtable.  This modernizes the dictionary by converting it to the builtin
'hashmap' structure.

Also, in order to enable a threaded API in the future add an
accompanying mutex which must be acquired prior to accessing the
dictionary of interned attributes.

Signed-off-by: Brandon Williams 
---
 attr.c| 171 ++
 attr.h|   2 +
 common-main.c |   3 ++
 3 files changed, 131 insertions(+), 45 deletions(-)

diff --git a/attr.c b/attr.c
index 5399e1cb3..8cf2ea901 100644
--- a/attr.c
+++ b/attr.c
@@ -14,6 +14,7 @@
 #include "dir.h"
 #include "utf8.h"
 #include "quote.h"
+#include "thread-utils.h"
 
 const char git_attr__true[] = "(builtin)true";
 const char git_attr__false[] = "\0(builtin)false";
@@ -23,28 +24,17 @@ static const char git_attr__unknown[] = "(builtin)unknown";
 #define ATTR__UNSET NULL
 #define ATTR__UNKNOWN git_attr__unknown
 
-/* This is a randomly chosen prime. */
-#define HASHSIZE 257
-
 #ifndef DEBUG_ATTR
 #define DEBUG_ATTR 0
 #endif
 
-/*
- * NEEDSWORK: the global dictionary of the interned attributes
- * must stay a singleton even after we become thread-ready.
- * Access to these must be surrounded with mutex when it happens.
- */
 struct git_attr {
-   struct git_attr *next;
-   unsigned h;
-   int attr_nr;
+   int attr_nr; /* unique attribute number */
int maybe_macro;
int maybe_real;
-   char name[FLEX_ARRAY];
+   char name[FLEX_ARRAY]; /* attribute name */
 };
 static int attr_nr;
-static struct git_attr *(git_attr_hash[HASHSIZE]);
 
 /*
  * NEEDSWORK: maybe-real, maybe-macro are not property of
@@ -63,15 +53,94 @@ const char *git_attr_name(const struct git_attr *attr)
return attr->name;
 }
 
-static unsigned hash_name(const char *name, int namelen)
+struct attr_hashmap {
+   struct hashmap map;
+#ifndef NO_PTHREADS
+   pthread_mutex_t mutex;
+#endif
+};
+
+static inline void hashmap_lock(struct attr_hashmap *map)
 {
-   unsigned val = 0, c;
+#ifndef NO_PTHREADS
+   pthread_mutex_lock(&map->mutex);
+#endif
+}
 
-   while (namelen--) {
-   c = *name++;
-   val = ((val << 7) | (val >> 22)) ^ c;
-   }
-   return val;
+static inline void hashmap_unlock(struct attr_hashmap *map)
+{
+#ifndef NO_PTHREADS
+   pthread_mutex_unlock(&map->mutex);
+#endif
+}
+
+/*
+ * The global dictionary of all interned attributes.  This
+ * is a singleton object which is shared between threads.
+ * Access to this dictionary must be surrounded with a mutex.
+ */
+static struct attr_hashmap g_attr_hashmap;
+
+/* The container for objects stored in "struct attr_hashmap" */
+struct attr_hash_entry {
+   struct hashmap_entry ent; /* must be the first member! */
+   const char *key; /* the key; memory should be owned by value */
+   size_t keylen; /* length of the key */
+   void *value; /* the stored value */
+};
+
+/* attr_hashmap comparison function */
+static int attr_hash_entry_cmp(const struct attr_hash_entry *a,
+  const struct attr_hash_entry *b,
+  void *unused)
+{
+   return (a->keylen != b->keylen) || strncmp(a->key, b->key, a->keylen);
+}
+
+/* Initialize an 'attr_hashmap' object */
+void attr_hashmap_init(struct attr_hashmap *map)
+{
+   hashmap_init(&map->map, (hashmap_cmp_fn) attr_hash_entry_cmp, 0);
+}
+
+/*
+ * Retrieve the 'value' stored in a hashmap given the provided 'key'.
+ * If there is no matching entry, return NULL.
+ */
+static void *attr_hashmap_get(struct attr_hashmap *map,
+ const char *key, size_t keylen)
+{
+   struct attr_hash_entry k;
+   struct attr_hash_entry *e;
+
+   if (!map->map.tablesize)
+   attr_hashmap_init(map);
+
+   hashmap_entry_init(&k, memhash(key, keylen));
+   k.key = key;
+   k.keylen = keylen;
+   e = hashmap_get(&map->map, &k, NULL);
+
+   return e ? e->value : NULL;
+}
+
+/* Add 'value' to a hashmap based on the provided 'key'. */
+static void attr_hashmap_add(struct attr_hashmap *map,
+const char *key, size_t keylen,
+void *value)
+{
+   struct attr_hash_entry *e;
+
+   if (!map->map.tablesize)
+   attr_hashmap_init(map);
+
+   e = xmalloc(sizeof(struct attr_hash_entry));
+   hashmap_entry_init(e, memhash(key, keylen));
+   e->key = key;
+   e->keylen = keylen;
+   e->value = value;
+
+   hashmap_add(&map->map, e);
 }
 
 static int attr_name_valid(const char *name, size_t namelen)
@@ -103,37 +172,44 @@ static void report_invalid_attr(const char 

[PATCH 22/27] attr: eliminate global check_all_attr array

2017-01-12 Thread Brandon Williams
Currently there is a reliance on 'check_all_attr' which is a global
array of 'attr_check_item' items which is used to store the value of
each attribute during the collection process.

This patch eliminates this global and instead creates an array per
'attr_check' instance which is then used in the attribute collection
process.  This brings the attribute system one step closer to being
thread-safe.

Signed-off-by: Brandon Williams 
---
 attr.c | 114 +++--
 attr.h |   2 ++
 2 files changed, 78 insertions(+), 38 deletions(-)

diff --git a/attr.c b/attr.c
index 8cf2ea901..38b0d4347 100644
--- a/attr.c
+++ b/attr.c
@@ -34,7 +34,6 @@ struct git_attr {
int maybe_real;
char name[FLEX_ARRAY]; /* attribute name */
 };
-static int attr_nr;
 
 /*
  * NEEDSWORK: maybe-real, maybe-macro are not property of
@@ -45,9 +44,6 @@ static int attr_nr;
  */
 static int cannot_trust_maybe_real;
 
-/* NEEDSWORK: This will become per git_attr_check */
-static struct attr_check_item *check_all_attr;
-
 const char *git_attr_name(const struct git_attr *attr)
 {
return attr->name;
@@ -143,6 +139,52 @@ static void attr_hashmap_add(struct attr_hashmap *map,
hashmap_add(&map->map, e);
 }
 
+/*
+ * Reallocate and reinitialize the array of all attributes (which is used in
+ * the attribute collection process) in 'check' based on the global dictionary
+ * of attributes.
+ */
+static void all_attrs_init(struct attr_hashmap *map, struct attr_check *check)
+{
+   int i;
+
+   hashmap_lock(map);
+
+   if (map->map.size < check->all_attrs_nr)
+   die("BUG: interned attributes shouldn't be deleted");
+
+   /*
+* If the number of attributes in the global dictionary has increased
+* (or this attr_check instance doesn't have an initialized all_attrs
+* field), reallocate the provided attr_check instance's all_attrs
+* field and fill each entry with its corresponding git_attr.
+*/
+   if (map->map.size != check->all_attrs_nr) {
+   struct attr_hash_entry *e;
+   struct hashmap_iter iter;
+   hashmap_iter_init(&map->map, &iter);
+
+   REALLOC_ARRAY(check->all_attrs, map->map.size);
+   check->all_attrs_nr = map->map.size;
+
+   while ((e = hashmap_iter_next(&iter))) {
+   const struct git_attr *a = e->value;
+   check->all_attrs[a->attr_nr].attr = a;
+   }
+   }
+
+   hashmap_unlock(map);
+
+   /*
+* Re-initialize every entry in check->all_attrs.
+* This re-initialization can live outside of the locked region since
+* the attribute dictionary is no longer being accessed.
+*/
+   for (i = 0; i < check->all_attrs_nr; i++) {
+   check->all_attrs[i].value = ATTR__UNKNOWN;
+   }
+}
+
 static int attr_name_valid(const char *name, size_t namelen)
 {
/*
@@ -196,16 +238,6 @@ static struct git_attr *git_attr_internal(const char 
*name, int namelen)
 
attr_hashmap_add(&g_attr_hashmap, a->name, namelen, a);
assert(a->attr_nr == (g_attr_hashmap.map.size - 1));
-
-   /*
-* NEEDSWORK: per git_attr_check check_all_attr
-* will be initialized a lot more lazily, not
-* like this, and not here.
-*/
-   REALLOC_ARRAY(check_all_attr, ++attr_nr);
-   check_all_attr[a->attr_nr].attr = a;
-   check_all_attr[a->attr_nr].value = ATTR__UNKNOWN;
-   assert(a->attr_nr == (attr_nr - 1));
}
 
hashmap_unlock(&g_attr_hashmap);
@@ -791,16 +823,16 @@ static int path_matches(const char *pathname, int pathlen,
  pattern, prefix, pat->patternlen, pat->flags);
 }
 
-static int macroexpand_one(int attr_nr, int rem);
+static int macroexpand_one(struct attr_check_item *all_attrs, int nr, int rem);
 
-static int fill_one(const char *what, struct match_attr *a, int rem)
+static int fill_one(const char *what, struct attr_check_item *all_attrs,
+   struct match_attr *a, int rem)
 {
-   struct attr_check_item *check = check_all_attr;
int i;
 
-   for (i = a->num_attr - 1; 0 < rem && 0 <= i; i--) {
+   for (i = a->num_attr - 1; rem > 0 && i >= 0; i--) {
struct git_attr *attr = a->state[i].attr;
-   const char **n = &(check[attr->attr_nr].value);
+   const char **n = &(all_attrs[attr->attr_nr].value);
const char *v = a->state[i].setto;
 
if (*n == ATTR__UNKNOWN) {
@@ -809,14 +841,15 @@ static int fill_one(

[PATCH 26/27] attr: push the bare repo check into read_attr()

2017-01-12 Thread Brandon Williams
Push the bare repository check into the 'read_attr()' function.  This
avoids needing to have extra logic which creates an empty stack frame
when inside a bare repo as a similar bit of logic already exists in the
'read_attr()' function.

Signed-off-by: Brandon Williams 
---
 attr.c | 40 
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/attr.c b/attr.c
index 78562592b..cbb07d25d 100644
--- a/attr.c
+++ b/attr.c
@@ -591,25 +591,29 @@ static struct attr_stack *read_attr_from_index(const char 
*path, int macro_ok)
 
 static struct attr_stack *read_attr(const char *path, int macro_ok)
 {
-   struct attr_stack *res;
+   struct attr_stack *res = NULL;
 
-   if (direction == GIT_ATTR_CHECKOUT) {
+   if (direction == GIT_ATTR_INDEX) {
res = read_attr_from_index(path, macro_ok);
-   if (!res)
-   res = read_attr_from_file(path, macro_ok);
-   }
-   else if (direction == GIT_ATTR_CHECKIN) {
-   res = read_attr_from_file(path, macro_ok);
-   if (!res)
-   /*
-* There is no checked out .gitattributes file there, 
but
-* we might have it in the index.  We allow operation 
in a
-* sparsely checked out work tree, so read from it.
-*/
+   } else if (!is_bare_repository()) {
+   if (direction == GIT_ATTR_CHECKOUT) {
res = read_attr_from_index(path, macro_ok);
+   if (!res)
+   res = read_attr_from_file(path, macro_ok);
+   }
+   else if (direction == GIT_ATTR_CHECKIN) {
+   res = read_attr_from_file(path, macro_ok);
+   if (!res)
+   /*
+* There is no checked out .gitattributes file
+* there, but we might have it in the index.
+* We allow operation in a sparsely checked out
+* work tree, so read from it.
+*/
+   res = read_attr_from_index(path, macro_ok);
+   }
}
-   else
-   res = read_attr_from_index(path, macro_ok);
+
if (!res)
res = xcalloc(1, sizeof(*res));
return res;
@@ -796,11 +800,7 @@ static const struct attr_stack *core_attr_stack(void)
}
 
/* root directory */
-   if (!is_bare_repository() || direction == GIT_ATTR_INDEX) {
-   e = read_attr(GITATTRIBUTES_FILE, 1);
-   } else {
-   e = xcalloc(1, sizeof(struct attr_stack));
-   }
+   e = read_attr(GITATTRIBUTES_FILE, 1);
key = "";
push_stack(&core, e, key, strlen(key));
}
-- 
2.11.0.390.gc69c2f50cf-goog



[PATCH 08/27] attr.c: tighten constness around "git_attr" structure

2017-01-12 Thread Brandon Williams
From: Junio C Hamano 

It holds an interned string, and git_attr_name() is a way to peek
into it.  Make sure the involved pointer types are pointer-to-const.

Signed-off-by: Junio C Hamano 
Signed-off-by: Stefan Beller 
Signed-off-by: Brandon Williams 
---
 attr.c | 2 +-
 attr.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/attr.c b/attr.c
index e42f931b3..f7cf7ae30 100644
--- a/attr.c
+++ b/attr.c
@@ -43,7 +43,7 @@ static int cannot_trust_maybe_real;
 static struct git_attr_check *check_all_attr;
 static struct git_attr *(git_attr_hash[HASHSIZE]);
 
-char *git_attr_name(struct git_attr *attr)
+const char *git_attr_name(const struct git_attr *attr)
 {
return attr->name;
 }
diff --git a/attr.h b/attr.h
index 8b08d33af..00d7a662c 100644
--- a/attr.h
+++ b/attr.h
@@ -25,7 +25,7 @@ extern const char git_attr__false[];
  * Unset one is returned as NULL.
  */
 struct git_attr_check {
-   struct git_attr *attr;
+   const struct git_attr *attr;
const char *value;
 };
 
@@ -34,7 +34,7 @@ struct git_attr_check {
  * return value is a pointer to a null-delimited string that is part
  * of the internal data structure; it should not be modified or freed.
  */
-char *git_attr_name(struct git_attr *);
+extern const char *git_attr_name(const struct git_attr *);
 
 int git_check_attr(const char *path, int, struct git_attr_check *);
 
-- 
2.11.0.390.gc69c2f50cf-goog



  1   2   3   4   5   6   7   8   9   10   >