On Thu, Dec 28 2017, suzuki toshiya jotted:

Thanks for working on this Just a quick review from skimming this,
haven't actually applied + run this:

> git-archive: accepts --owner --group aslike GNU tar.

Make this:

    git-archive: accept --owner and --group like GNU tar

I.e. imperative voice, s/aslike/like/, no full stop at the end. See
Documentation/SubmittingPatches.

> Current tar output by git-archive has always root:root.
> To generate tar output with non-root owner/group,
> the options like GNU tar are added.

Grammar/wording fixes (IMHO):

    The ownership of files craeted by git-archive is always
    root:root. Add --owner and --group options which work like the GNU
    tar equivalent to allow overriding these defaults.

> * archive.h: add members 'uid', 'gid', 'uname', 'gname'
>   to struct archiver_args.
>
> * archive.c: add functions to reflect the operands of
>   '--owner' and '--group' to archiver_args.
>
> * archive-tar.c: copy 'uid', 'gid', 'uname', 'gname'
>   from archiver_args to the entry headers in tar archive.
>
> * t/parse-tar-file.py: a script to dump uid, gid, uname,
>   gname fields from a tar archive.
>
> * t/t5005-archive-uid-gid.sh: a test script comparing
>   uid, gid, uname, gname between the options and
>   generated tar file.

I'd skip this whole bit, no need to itemize changes per-file, we see
that from the patch, especially stuff like "add xyz struct field".

> ---
>  archive-tar.c              |  8 +++---
>  archive.c                  | 66 ++++++++++++++++++++++++++++++++++++++++++
>  archive.h                  |  4 +++
>  t/parse-tar-file.py        | 56 ++++++++++++++++++++++++++++++++++++
>  t/t5005-archive-uid-gid.sh | 71 
> ++++++++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 201 insertions(+), 4 deletions(-)
>  create mode 100755 t/parse-tar-file.py
>  create mode 100755 t/t5005-archive-uid-gid.sh
>
> diff --git a/archive-tar.c b/archive-tar.c
> index c6ed96ee7..8546a6229 100644
> --- a/archive-tar.c
> +++ b/archive-tar.c
> @@ -204,10 +204,10 @@ static void prepare_header(struct archiver_args *args,
>       xsnprintf(header->size, sizeof(header->size), "%011lo", S_ISREG(mode) ? 
> size : 0);
>       xsnprintf(header->mtime, sizeof(header->mtime), "%011lo", (unsigned 
> long) args->time);
>
> -     xsnprintf(header->uid, sizeof(header->uid), "%07o", 0);
> -     xsnprintf(header->gid, sizeof(header->gid), "%07o", 0);
> -     strlcpy(header->uname, "root", sizeof(header->uname));
> -     strlcpy(header->gname, "root", sizeof(header->gname));
> +     xsnprintf(header->uid, sizeof(header->uid), "%07o", args->uid);
> +     xsnprintf(header->gid, sizeof(header->gid), "%07o", args->gid);
> +     strlcpy(header->uname, args->uname ? args->uname : "root", 
> sizeof(header->uname));
> +     strlcpy(header->gname, args->gname ? args->gname : "root", 
> sizeof(header->gname));
>       xsnprintf(header->devmajor, sizeof(header->devmajor), "%07o", 0);
>       xsnprintf(header->devminor, sizeof(header->devminor), "%07o", 0);
>
> diff --git a/archive.c b/archive.c
> index 0b7b62af0..db69041f1 100644
> --- a/archive.c
> +++ b/archive.c
> @@ -8,6 +8,7 @@
>  #include "parse-options.h"
>  #include "unpack-trees.h"
>  #include "dir.h"
> +#include "git-compat-util.h"
>
>  static char const * const archive_usage[] = {
>       N_("git archive [<options>] <tree-ish> [<path>...]"),
> @@ -417,6 +418,57 @@ static void parse_treeish_arg(const char **argv,
>       { OPTION_SET_INT, (s), NULL, (v), NULL, "", \
>         PARSE_OPT_NOARG | PARSE_OPT_NONEG | PARSE_OPT_HIDDEN, NULL, (p) }
>
> +static void set_args_uname_uid(struct archiver_args *args,
> +             const char* tar_owner, int set_gid_too)
> +{
> +     if (!args || !tar_owner)
> +             return;
> +
> +     const char* col_pos = strchr(tar_owner, ':');
> +     struct passwd* pw = NULL;
> +
> +     if (col_pos) {
> +             args->uname = xstrndup(tar_owner, col_pos - tar_owner);
> +             args->uid = atoi(col_pos + 1);
> +             return;
> +     }
> +
> +     args->uname = xstrndup(tar_owner, strlen(tar_owner));
> +     pw = getpwnam(tar_owner);
> +     if (!pw)
> +             return;

Both here and in set_args_gname_gid you just silently ignore failures
when you can't find the group, is this really the desired behavior?

Also, it seems there's no way to specify a numeric id / group id, e.g. a
reasonable use-case is hardcoding a group that you don't have on the
computer you make the tarball on. Doesn't GNU tar support that as well?

> +     args->uid = pw->pw_uid;
> +     if (set_gid_too)
> +             args->gid = pw->pw_gid;
> +
> +     return;
> +}
> +
> +static void set_args_gname_gid(struct archiver_args *args,
> +             const char* tar_group)
> +{
> +     if (!args || !tar_group)
> +             return;
> +
> +     const char* col_pos = strchr(tar_group, ':');
> +     struct group* gr = NULL;
> +
> +     if (col_pos) {
> +             args->gname = xstrndup(tar_group, col_pos - tar_group);
> +             args->gid = atoi(col_pos + 1);
> +             return;
> +     }
> +
> +     args->gname = xstrndup(tar_group, strlen(tar_group));
> +     gr = getgrnam(tar_group);
> +     if (!gr)
> +             return;
> +
> +     args->gid = gr->gr_gid;
> +     return;
> +}
> +
>  static int parse_archive_args(int argc, const char **argv,
>               const struct archiver **ar, struct archiver_args *args,
>               const char *name_hint, int is_remote)
> @@ -431,6 +483,8 @@ static int parse_archive_args(int argc, const char **argv,
>       int i;
>       int list = 0;
>       int worktree_attributes = 0;
> +     char *tar_owner = NULL;
> +     char *tar_group = NULL;
>       struct option opts[] = {
>               OPT_GROUP(""),
>               OPT_STRING(0, "format", &format, N_("fmt"), N_("archive 
> format")),
> @@ -459,6 +513,8 @@ static int parse_archive_args(int argc, const char **argv,
>                       N_("retrieve the archive from remote repository 
> <repo>")),
>               OPT_STRING(0, "exec", &exec, N_("command"),
>                       N_("path to the remote git-upload-archive command")),
> +             OPT_STRING(0, "owner", &tar_owner, N_("owner"), 
> N_("<name[:uid]> in tar")),
> +             OPT_STRING(0, "group", &tar_group, N_("group"), 
> N_("<name[:gid]> in tar")),
>               OPT_END()
>       };
>
> @@ -507,6 +563,16 @@ static int parse_archive_args(int argc, const char 
> **argv,
>       args->baselen = strlen(base);
>       args->worktree_attributes = worktree_attributes;
>
> +     args->uname = NULL;
> +     args->gname = NULL;
> +     args->uid = 0;
> +     args->gid = 0;
> +     set_args_uname_uid(args,
> +             tar_owner,
> +             1 /* init args->gid by pw, if resolved */);
> +     set_args_gname_gid(args,
> +             tar_group);
> +

Some overzelous line wrapping here. This could all just be on one line.

>       return argc;
>  }
>
> diff --git a/archive.h b/archive.h
> index 62d1d82c1..fad4a5d3e 100644
> --- a/archive.h
> +++ b/archive.h
> @@ -15,6 +15,10 @@ struct archiver_args {
>       unsigned int worktree_attributes : 1;
>       unsigned int convert : 1;
>       int compression_level;
> +     int uid;
> +     int gid;
> +     const char *uname;
> +     const char *gname;
>  };
>
>  #define ARCHIVER_WANT_COMPRESSION_LEVELS 1
> diff --git a/t/parse-tar-file.py b/t/parse-tar-file.py

If you add a helper script please put it in a folder in t/, see "ls
t/*/*.pl" for examples.

> new file mode 100755
> index 000000000..c240a8676
> --- /dev/null
> +++ b/t/parse-tar-file.py
> @@ -0,0 +1,56 @@
> +#!/usr/bin/env python
> +
> +import sys
> +import getopt
> +import tarfile
> +
> +optlist, args = getopt.getopt( sys.argv[1:], "", [
> +                             "print=", "show=",
> +                             "uniq",
> +                             "fail-if-multi",
> +             ] )
> +
> +infos_to_print = []
> +uniq = False
> +fail_if_multi = False
> +
> +for opt in optlist:
> +  if opt[0] == "--print":
> +    infos_to_print.append(opt[1])
> +  elif opt[0] == "--show":
> +    infos_to_print.append(opt[1])
> +  elif opt[0] == "--uniq":
> +    uniq = True
> +  elif opt[0] == "--fail-if-multi":
> +    uniq = True
> +    fail_if_multi = True
> +

Also the Git project uses 4-space indent for Python elsewhere AFACT.

> +if len(infos_to_print) == 0:
> +  infos_to_print = ["uid", "gid", "uname", "gname", "name"]
> +
> +tar = tarfile.open( args[0] )
> +out_lines = []
> +for tarinfo in tar:
> +  infos = []
> +  for info_tag in infos_to_print:
> +    if info_tag == "uid":
> +      infos.append( str(tarinfo.uid) )
> +    elif info_tag == "gid":
> +      infos.append( str(tarinfo.gid) )
> +    elif info_tag == "uname" or info_tag == "owner":
> +      infos.append( tarinfo.uname )
> +    elif info_tag == "gname" or info_tag == "group":
> +      infos.append( tarinfo.gname )
> +    elif info_tag == "name" or info_tag == "pathname":
> +      infos.append( tarinfo.name )
> +  out_lines.append( "\t".join(infos) )
> +tar.close()
> +
> +if uniq:
> +  out_lines = list(set(out_lines))
> +  if fail_if_multi and (len(out_lines) > 1):
> +    sys.stderr.write("*** not unique value, " + str(len(out_lines)) + " 
> values found\n")
> +    sys.exit(len(out_lines))
> +
> +for line in out_lines:
> +  print line
> diff --git a/t/t5005-archive-uid-gid.sh b/t/t5005-archive-uid-gid.sh
> new file mode 100755
> index 000000000..2b2a694d8
> --- /dev/null
> +++ b/t/t5005-archive-uid-gid.sh
> @@ -0,0 +1,71 @@
> +#!/bin/sh
> +
> +test_description='test --owner --group options for git-archive'
> +. ./test-lib.sh
> +
> +check_uid_gid_uname_gname_in_tar() {
> +     # $1 tar pathname
> +     # $2 uid (digit in string)
> +     # $3 gid (digit in string)
> +     # $4 uname (string)
> +     # $5 gname (string)
> +     uid=`./parse-tar-file.py --print=uid --fail-if-multi $1`
> +     if test $? != 0 -o x"${uid}" != "x"$2
> +     then
> +       echo "(some) uid differs from the specified value"
> +       return $?
> +        fi

Very odd indenting going on here. Should be tabs.

> +
> +     gid=`./parse-tar-file.py --print=gid --fail-if-multi $1`
> +     if test $? != 0 -o x"${gid}" != "x"$3
> +     then
> +       echo "(some) gid differs from the specified value"
> +       return $?
> +       exit $?
> +        fi
> +
> +     uname=`./parse-tar-file.py --print=uname --fail-if-multi $1`
> +     if test $? != 0 -o x"${uname}" != "x"$4
> +     then
> +       echo "(some) uname differs from the specified value"
> +       return $?
> +        fi
> +
> +     gname=`./parse-tar-file.py --print=gname --fail-if-multi $1`
> +     if test $? != 0 -o x"${gname}" != "x"$5
> +     then
> +       echo "(some) gname differs from the specified value"
> +       return $?
> +        fi
> +
> +     return 0
> +}
> +
> +git init . 1>/dev/null 2>/dev/null
> +touch uid-gid-test.001
> +mkdir uid-gid-test.002
> +mkdir uid-gid-test.002/uid-gid-test.003
> +git add uid-gid-test.001
> +git add uid-gid-test.002
> +git add uid-gid-test.002/uid-gid-test.003
> +git commit -m "uid-gid-test" 2>/dev/null 1>/dev/null

All of this setup stuff should be in a test, see "git grep
test_expect.*setup" for examples. Also see the helpers we have in
t/README, i.e. "test_commit" etc.

> +test_expect_success 'test a case with explicitly specified name/id, 
> owner=nobody:1234 group=nogroup:5678' '
> +     git archive --format=tar --owner nobody:1234 --group nogroup:5678 HEAD 
> > uid-gid-test1.tar &&
> +     check_uid_gid_uname_gname_in_tar uid-gid-test1.tar 1234 5678 nobody 
> nogroup &&
> +     test_result=$? &&
> +     return ${test_result}
> +'

There's no need to assign $? to a variable and then return it, just make
check_uid_gid_uname_gname_in_tar be the last thing you do in the test.

> +
> +test_expect_success 'test a case with only name is specified, owner=(current 
> my name) group=(current my group)' '
> +     my_uid=`id -u` &&
> +     my_gid=`id -g` &&
> +     my_uname=`id -u -n` &&
> +     my_gname=`id -g -n` &&
> +     git archive --format=tar --owner ${my_uname} --group ${my_gname} HEAD > 
> uid-gid-test2.tar &&
> +     check_uid_gid_uname_gname_in_tar uid-gid-test2.tar ${my_uid} ${my_gid} 
> ${my_uname} ${my_gname} &&
> +     test_result=$? &&
> +     return ${test_result}
> +'
> +
> +test_done

I'd probably notice other stuff if I did more than skim this, but I'll
stop for now...

Reply via email to