From: Jeff Hostetler <g...@jeffhostetler.com> Added a command to list the missing blobs for a commit. This can be used after a partial clone or fetch to list the omitted blobs that the client would need to checkout the given commit/branch. Optionally respecting or ignoring the current sparse-checkout definition.
This command prints a simple list of blob SHAs. It is expected that this would be piped into another command with knowledge of the transport and/or blob store. Signed-off-by: Jeff Hostetler <jeffh...@microsoft.com> --- Makefile | 2 + builtin.h | 1 + builtin/ls-partial.c | 110 ++++++++++++++++++++ git.c | 1 + partial-utils.c | 279 +++++++++++++++++++++++++++++++++++++++++++++++++++ partial-utils.h | 93 +++++++++++++++++ 6 files changed, 486 insertions(+) create mode 100644 builtin/ls-partial.c create mode 100644 partial-utils.c create mode 100644 partial-utils.h diff --git a/Makefile b/Makefile index 9ec6065..96e9e1e 100644 --- a/Makefile +++ b/Makefile @@ -791,6 +791,7 @@ LIB_OBJS += pack-write.o LIB_OBJS += pager.o LIB_OBJS += parse-options.o LIB_OBJS += parse-options-cb.o +LIB_OBJS += partial-utils.o LIB_OBJS += patch-delta.o LIB_OBJS += patch-ids.o LIB_OBJS += path.o @@ -908,6 +909,7 @@ BUILTIN_OBJS += builtin/init-db.o BUILTIN_OBJS += builtin/interpret-trailers.o BUILTIN_OBJS += builtin/log.o BUILTIN_OBJS += builtin/ls-files.o +BUILTIN_OBJS += builtin/ls-partial.o BUILTIN_OBJS += builtin/ls-remote.o BUILTIN_OBJS += builtin/ls-tree.o BUILTIN_OBJS += builtin/mailinfo.o diff --git a/builtin.h b/builtin.h index 9e4a898..df00c4b 100644 --- a/builtin.h +++ b/builtin.h @@ -79,6 +79,7 @@ extern int cmd_interpret_trailers(int argc, const char **argv, const char *prefi extern int cmd_log(int argc, const char **argv, const char *prefix); extern int cmd_log_reflog(int argc, const char **argv, const char *prefix); extern int cmd_ls_files(int argc, const char **argv, const char *prefix); +extern int cmd_ls_partial(int argc, const char **argv, const char *prefix); extern int cmd_ls_tree(int argc, const char **argv, const char *prefix); extern int cmd_ls_remote(int argc, const char **argv, const char *prefix); extern int cmd_mailinfo(int argc, const char **argv, const char *prefix); diff --git a/builtin/ls-partial.c b/builtin/ls-partial.c new file mode 100644 index 0000000..8ebf045 --- /dev/null +++ b/builtin/ls-partial.c @@ -0,0 +1,110 @@ +#include "cache.h" +#include "blob.h" +#include "tree.h" +#include "commit.h" +#include "quote.h" +#include "builtin.h" +#include "parse-options.h" +#include "pathspec.h" +#include "dir.h" +#include "partial-utils.h" + +static struct trace_key trace_partial = TRACE_KEY_INIT(PARTIAL); + +static int verbose; +static int ignore_sparse; +struct exclude_list el; + +static const char * const ls_partial_usage[] = { + N_("git ls-partial [<options>] <tree-ish>"), + NULL +}; + +/* + * map <tree-ish> arg into SHA1 and get the root treenode. + */ +static struct tree *lookup_tree_from_treeish(const char *arg) +{ + unsigned char sha1[20]; + struct tree *tree; + + if (get_sha1(arg, sha1)) + die("not a valid object name '%s'", arg); + + trace_printf_key( + &trace_partial, + "ls-partial: treeish '%s' '%s'\n", + arg, sha1_to_hex(sha1)); + + if (verbose) { + printf("commit\t%s\n", sha1_to_hex(sha1)); + printf("branch\t%s\n", arg); + } + + tree = parse_tree_indirect(sha1); + if (!tree) + die("not a tree object '%s'", arg); + + return tree; +} + +static void print_results(const struct pu_vec *vec) +{ + int k; + + for (k = 0; k < vec->data_nr; k++) + printf("%s\n", oid_to_hex(&vec->data[k]->oid)); +} + +static void print_results_verbose(const struct pu_vec *vec) +{ + int k; + + /* TODO Consider -z version */ + + for (k = 0; k < vec->data_nr; k++) + printf("%s\t%s\n", oid_to_hex(&vec->data[k]->oid), vec->data[k]->fullpath.buf); +} + +int cmd_ls_partial(int argc, const char **argv, const char *prefix) +{ + struct exclude_list el; + struct tree *tree; + struct pu_vec *vec; + struct pu_vec *vec_all = NULL; + struct pu_vec *vec_sparse = NULL; + struct pu_vec *vec_missing = NULL; + + const struct option ls_partial_options[] = { + OPT__VERBOSE(&verbose, N_("show verbose blob details")), + OPT_BOOL(0, "ignore-sparse", &ignore_sparse, + N_("ignore sparse-checkout settings (scan whole tree)")), + OPT_END() + }; + + git_config(git_default_config, NULL); + argc = parse_options(argc, argv, prefix, + ls_partial_options, ls_partial_usage, 0); + if (argc < 1) + usage_with_options(ls_partial_usage, ls_partial_options); + + tree = lookup_tree_from_treeish(argv[0]); + + vec_all = pu_vec_ls_tree(tree, prefix, argv + 1); + if (ignore_sparse || pu_load_sparse_definitions("info/sparse-checkout", &el) < 0) + vec = vec_all; + else { + vec_sparse = pu_vec_filter_sparse(vec_all, &el); + vec = vec_sparse; + } + + vec_missing = pu_vec_filter_missing(vec); + vec = vec_missing; + + if (verbose) + print_results_verbose(vec); + else + print_results(vec); + + return 0; +} diff --git a/git.c b/git.c index 33f52ac..ef1e019 100644 --- a/git.c +++ b/git.c @@ -444,6 +444,7 @@ static struct cmd_struct commands[] = { { "interpret-trailers", cmd_interpret_trailers, RUN_SETUP_GENTLY }, { "log", cmd_log, RUN_SETUP }, { "ls-files", cmd_ls_files, RUN_SETUP | SUPPORT_SUPER_PREFIX }, + { "ls-partial", cmd_ls_partial, RUN_SETUP }, { "ls-remote", cmd_ls_remote, RUN_SETUP_GENTLY }, { "ls-tree", cmd_ls_tree, RUN_SETUP }, { "mailinfo", cmd_mailinfo, RUN_SETUP_GENTLY }, diff --git a/partial-utils.c b/partial-utils.c new file mode 100644 index 0000000..b75e91e --- /dev/null +++ b/partial-utils.c @@ -0,0 +1,279 @@ +#include "cache.h" +#include "blob.h" +#include "tree.h" +#include "commit.h" +#include "quote.h" +#include "builtin.h" +#include "parse-options.h" +#include "pathspec.h" +#include "dir.h" +#include "partial-utils.h" + +static struct trace_key trace_partial_utils = TRACE_KEY_INIT(PARTIAL_UTILS); + +void pu_row_trace( + const struct pu_row *row, + const char *label) +{ + trace_printf_key( + &trace_partial_utils, + "%s: %06o %s %.*s\n", + label, + row->mode, + oid_to_hex(&row->oid), + (int)row->fullpath.len, + row->fullpath.buf); +} + +struct pu_row *pu_row_alloc( + const unsigned char *sha1, + const struct strbuf *base, + const char *entryname, + unsigned mode) +{ + struct pu_row *row = xcalloc(1, sizeof(struct pu_row)); + + hashcpy(row->oid.hash, sha1); + strbuf_init(&row->fullpath, base->len + strlen(entryname) + 1); + if (base->len) + strbuf_addbuf(&row->fullpath, base); + strbuf_addstr(&row->fullpath, entryname); + row->mode = mode; + row->entryname_offset = base->len; + + pu_row_trace(row, "alloc"); + + return row; +} + +struct pu_vec *pu_vec_alloc( + unsigned int nr_pre_alloc) +{ + struct pu_vec *vec = xcalloc(1, sizeof(struct pu_vec)); + + vec->data = xcalloc(nr_pre_alloc, sizeof(struct pu_row *)); + vec->data_alloc = nr_pre_alloc; + + return vec; +} + +void pu_vec_append( + struct pu_vec *vec, + struct pu_row *row) +{ + ALLOC_GROW(vec->data, vec->data_nr + 1, vec->data_alloc); + vec->data[vec->data_nr++] = row; +} + +static int ls_tree_cb( + const unsigned char *sha1, + struct strbuf *base, + const char *pathname, + unsigned mode, + int stage, + void *context) +{ + struct pu_vec *vec = (struct pu_vec *)context; + + /* omit submodules */ + if (S_ISGITLINK(mode)) + return 0; + + pu_vec_append(vec, pu_row_alloc(sha1, base, pathname, mode)); + + if (S_ISDIR(mode)) + return READ_TREE_RECURSIVE; + + return 0; +} + +struct pu_vec *pu_vec_ls_tree( + struct tree *tree, + const char *prefix, + const char **argv) +{ + struct pu_vec *vec; + struct pathspec pathspec; + int k; + + vec = pu_vec_alloc(PU_VEC_DEFAULT_SIZE); + + parse_pathspec( + &pathspec, PATHSPEC_GLOB | PATHSPEC_ICASE | PATHSPEC_EXCLUDE, + PATHSPEC_PREFER_CWD, prefix, argv); + for (k = 0; k < pathspec.nr; k++) + pathspec.items[k].nowildcard_len = pathspec.items[k].len; + pathspec.has_wildcard = 0; + + if (read_tree_recursive(tree, "", 0, 0, &pathspec, ls_tree_cb, vec) != 0) + die("Could not read tree"); + + return vec; +} + +int pu_load_sparse_definitions( + const char *path, + struct exclude_list *pel) +{ + int result; + char *sparse = git_pathdup("info/sparse-checkout"); + memset(pel, 0, sizeof(*pel)); + result = add_excludes_from_file_to_list(sparse, "", 0, pel, 0); + free(sparse); + return result; +} + +static int mode_to_dtype(unsigned mode) +{ + if (S_ISREG(mode)) + return DT_REG; + if (S_ISDIR(mode) || S_ISGITLINK(mode)) + return DT_DIR; + if (S_ISLNK(mode)) + return DT_LNK; + return DT_UNKNOWN; +} + +static int apply_excludes_1( + struct pu_row **subset, + unsigned int nr, + struct strbuf *prefix, + struct exclude_list *pel, + int defval, + struct pu_vec *vec_out); + +/* apply directory rules. based on clear_ce_flags_dir() */ +static int apply_excludes_dir( + struct pu_row **subset, + unsigned int nr, + struct strbuf *prefix, + char *basename, + struct exclude_list *pel, + int defval, + struct pu_vec *vec_out) +{ + struct pu_row **subset_end; + int dtype = DT_DIR; + int ret = is_excluded_from_list( + prefix->buf, prefix->len, basename, &dtype, pel); + int rc; + + strbuf_addch(prefix, '/'); + + if (ret < 0) + ret = defval; + + for (subset_end = subset; subset_end != subset + nr; subset_end++) { + struct pu_row *row = *subset_end; + if (strncmp(row->fullpath.buf, prefix->buf, prefix->len)) + break; + } + + rc = apply_excludes_1( + subset, subset_end - subset, + prefix, pel, ret, + vec_out); + strbuf_setlen(prefix, prefix->len - 1); + return rc; +} + +/* apply sparse rules to subset[0..nr). based on clear_ce_flags_1() */ +static int apply_excludes_1( + struct pu_row **subset, + unsigned int nr, + struct strbuf *prefix, + struct exclude_list *pel, + int defval, + struct pu_vec *vec_out) +{ + struct pu_row **subset_end = subset + nr; + + while (subset != subset_end) { + struct pu_row *row = *subset; + const char *name, *slash; + int len, dtype, val; + + if (prefix->len && strncmp(row->fullpath.buf, prefix->buf, prefix->len)) + break; + + name = row->fullpath.buf + prefix->len; + slash = strchr(name, '/'); + + if (slash) { + int processed; + + len = slash - name; + strbuf_add(prefix, name, len); + + processed = apply_excludes_dir( + subset, subset_end - subset, + prefix, prefix->buf + prefix->len - len, + pel, defval, + vec_out); + + if (processed) { + subset += processed; + strbuf_setlen(prefix, prefix->len - len); + continue; + } + + strbuf_addch(prefix, '/'); + subset += apply_excludes_1( + subset, subset_end - subset, + prefix, pel, defval, + vec_out); + strbuf_setlen(prefix, prefix->len - len - 1); + continue; + } + + dtype = mode_to_dtype(row->mode); + val = is_excluded_from_list( + row->fullpath.buf, row->fullpath.len, name, &dtype, pel); + if (val < 0) + val = defval; + if (val > 0) { + pu_row_trace(row, "sparse"); + pu_vec_append(vec_out, row); + } + subset++; + } + + return nr - (subset_end - subset); +} + +struct pu_vec *pu_vec_filter_sparse( + const struct pu_vec *vec_in, + struct exclude_list *pel) +{ + struct pu_vec *vec_out; + struct strbuf prefix = STRBUF_INIT; + int defval = 0; + + vec_out = pu_vec_alloc(vec_in->data_nr); + + apply_excludes_1( + vec_in->data, vec_in->data_nr, + &prefix, pel, defval, + vec_out); + + return vec_out; +} + +struct pu_vec *pu_vec_filter_missing( + const struct pu_vec *vec_in) +{ + struct pu_vec *vec_out; + int k; + + vec_out = pu_vec_alloc(vec_in->data_nr); + + for (k = 0; k < vec_in->data_nr; k++) { + struct pu_row *row = vec_in->data[k]; + if (!has_sha1_file(row->oid.hash)) { + pu_row_trace(row, "missing"); + pu_vec_append(vec_out, row); + } + } + + return vec_out; +} diff --git a/partial-utils.h b/partial-utils.h new file mode 100644 index 0000000..3bdf2e4 --- /dev/null +++ b/partial-utils.h @@ -0,0 +1,93 @@ +#ifndef PARTIAL_UTILS_H +#define PARTIAL_UTILS_H + +/* + * A 'partial-utils row' represents a single item in the tree. + * This is conceptually equivalent to a cache_entry, but does + * not require an index_state and lets us operate on any commit + * and not be tied to the current worktree. + */ +struct pu_row +{ + struct strbuf fullpath; + struct object_id oid; + unsigned mode; + unsigned entryname_offset; +}; + +/* + * A 'partial-utils vec' represents a vector of 'pu row' + * values using the normal vector machinery. + */ +struct pu_vec +{ + struct pu_row **data; + unsigned int data_nr; + unsigned int data_alloc; +}; + +#define PU_VEC_DEFAULT_SIZE (1024*1024) + + +void pu_row_trace( + const struct pu_row *row, + const char *label); + +struct pu_row *pu_row_alloc( + const unsigned char *sha1, + const struct strbuf *base, + const char *entryname, + unsigned mode); + +struct pu_vec *pu_vec_alloc( + unsigned int nr_pre_alloc); + +/* + * Append the given row onto the vector WITHOUT + * assuming ownership of the pointer. + */ +void pu_vec_append( + struct pu_vec *vec, + struct pu_row *row); + +/* + * Enumerate the contents of the tree (recursively) into + * a vector of rows. This is essentially "ls-tree -r -t" + * into a vector. + */ +struct pu_vec *pu_vec_ls_tree( + struct tree *tree, + const char *prefix, + const char **argv); + +/* + * Load a sparse-checkout file into (*pel). + * Returns -1 if none or error. + */ +int pu_load_sparse_definitions( + const char *path, + struct exclude_list *pel); + +/* + * Filter the given vector using the sparse-checkout + * definitions and return new vector of just the paths + * that WOULD BE populated. + * + * The returned vector BORROWS rows from the input vector. + * + * This is loosely based upon clear_ce_flags() in unpack-trees.c + */ +struct pu_vec *pu_vec_filter_sparse( + const struct pu_vec *vec_in, + struct exclude_list *pel); + +/* + * Filter the given vector and return the list of blobs + * missing from the local ODB. + * + * The returned vector BORROWS rows from the input vector. + */ +struct pu_vec *pu_vec_filter_missing( + const struct pu_vec *vec_in); + +#endif /* PARTIAL_UTILS_H */ -- 2.7.4