git-packv4-create now becomes test-packv4. Code that will not be used
by pack-objects.c is moved to test-packv4.c. It may be removed when
the code transition to pack-objects completes.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclo...@gmail.com>
---
 Makefile            |   4 +-
 packv4-create.c     | 491 +---------------------------------------------------
 packv4-create.h     |  39 +++++
 test-packv4.c (new) | 476 ++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 525 insertions(+), 485 deletions(-)
 create mode 100644 test-packv4.c

diff --git a/Makefile b/Makefile
index 22fc276..af2e3e3 100644
--- a/Makefile
+++ b/Makefile
@@ -550,7 +550,6 @@ PROGRAM_OBJS += shell.o
 PROGRAM_OBJS += show-index.o
 PROGRAM_OBJS += upload-pack.o
 PROGRAM_OBJS += remote-testsvn.o
-PROGRAM_OBJS += packv4-create.o
 
 # Binary suffix, set to .exe for Windows builds
 X =
@@ -568,6 +567,7 @@ TEST_PROGRAMS_NEED_X += test-line-buffer
 TEST_PROGRAMS_NEED_X += test-match-trees
 TEST_PROGRAMS_NEED_X += test-mergesort
 TEST_PROGRAMS_NEED_X += test-mktemp
+TEST_PROGRAMS_NEED_X += test-packv4
 TEST_PROGRAMS_NEED_X += test-parse-options
 TEST_PROGRAMS_NEED_X += test-path-utils
 TEST_PROGRAMS_NEED_X += test-prio-queue
@@ -702,6 +702,7 @@ LIB_H += notes.h
 LIB_H += object.h
 LIB_H += pack-revindex.h
 LIB_H += pack.h
+LIB_H += packv4-create.h
 LIB_H += packv4-parse.h
 LIB_H += parse-options.h
 LIB_H += patch-ids.h
@@ -839,6 +840,7 @@ LIB_OBJS += object.o
 LIB_OBJS += pack-check.o
 LIB_OBJS += pack-revindex.o
 LIB_OBJS += pack-write.o
+LIB_OBJS += packv4-create.o
 LIB_OBJS += packv4-parse.o
 LIB_OBJS += pager.o
 LIB_OBJS += parse-options.o
diff --git a/packv4-create.c b/packv4-create.c
index 920a0b4..cdf82c0 100644
--- a/packv4-create.c
+++ b/packv4-create.c
@@ -18,9 +18,9 @@
 #include "packv4-create.h"
 
 
-static int pack_compression_seen;
-static int pack_compression_level = Z_DEFAULT_COMPRESSION;
-static int min_tree_copy = 1;
+int pack_compression_seen;
+int pack_compression_level = Z_DEFAULT_COMPRESSION;
+int min_tree_copy = 1;
 
 struct data_entry {
        unsigned offset;
@@ -28,17 +28,6 @@ struct data_entry {
        unsigned hits;
 };
 
-struct dict_table {
-       unsigned char *data;
-       unsigned cur_offset;
-       unsigned size;
-       struct data_entry *entry;
-       unsigned nb_entries;
-       unsigned max_entries;
-       unsigned *hash;
-       unsigned hash_size;
-};
-
 struct dict_table *create_dict_table(void)
 {
        return xcalloc(sizeof(struct dict_table), 1);
@@ -139,7 +128,7 @@ static int cmp_dict_entries(const void *a_, const void *b_)
        return diff;
 }
 
-static void sort_dict_entries_by_hits(struct dict_table *t)
+void sort_dict_entries_by_hits(struct dict_table *t)
 {
        qsort(t->entry, t->nb_entries, sizeof(*t->entry), cmp_dict_entries);
        t->hash_size = (t->nb_entries * 4 / 3) / 2;
@@ -208,7 +197,7 @@ int add_commit_dict_entries(struct dict_table 
*commit_ident_table,
        return 0;
 }
 
-static int add_tree_dict_entries(struct dict_table *tree_path_table,
+int add_tree_dict_entries(struct dict_table *tree_path_table,
                                 void *buf, unsigned long size)
 {
        struct tree_desc desc;
@@ -224,7 +213,7 @@ static int add_tree_dict_entries(struct dict_table 
*tree_path_table,
        return 0;
 }
 
-void dump_dict_table(struct dict_table *t)
+static void dump_dict_table(struct dict_table *t)
 {
        int i;
 
@@ -241,7 +230,7 @@ void dump_dict_table(struct dict_table *t)
        }
 }
 
-static void dict_dump(struct packv4_tables *v4)
+void dict_dump(struct packv4_tables *v4)
 {
        dump_dict_table(v4->commit_ident_table);
        dump_dict_table(v4->tree_path_table);
@@ -611,103 +600,6 @@ void *pv4_encode_tree(const struct packv4_tables *v4,
        return buffer;
 }
 
-static struct pack_idx_entry *get_packed_object_list(struct packed_git *p)
-{
-       unsigned i, nr_objects = p->num_objects;
-       struct pack_idx_entry *objects;
-
-       objects = xmalloc((nr_objects + 1) * sizeof(*objects));
-       objects[nr_objects].offset = p->pack_size - 20;
-       for (i = 0; i < nr_objects; i++) {
-               hashcpy(objects[i].sha1, nth_packed_object_sha1(p, i));
-               objects[i].offset = nth_packed_object_offset(p, i);
-       }
-
-       return objects;
-}
-
-static int sort_by_offset(const void *e1, const void *e2)
-{
-       const struct pack_idx_entry * const *entry1 = e1;
-       const struct pack_idx_entry * const *entry2 = e2;
-       if ((*entry1)->offset < (*entry2)->offset)
-               return -1;
-       if ((*entry1)->offset > (*entry2)->offset)
-               return 1;
-       return 0;
-}
-
-static struct pack_idx_entry **sort_objs_by_offset(struct pack_idx_entry *list,
-                                                   unsigned nr_objects)
-{
-       unsigned i;
-       struct pack_idx_entry **sorted;
-
-       sorted = xmalloc((nr_objects + 1) * sizeof(*sorted));
-       for (i = 0; i < nr_objects + 1; i++)
-               sorted[i] = &list[i];
-       qsort(sorted, nr_objects + 1, sizeof(*sorted), sort_by_offset);
-
-       return sorted;
-}
-
-static int create_pack_dictionaries(struct packv4_tables *v4,
-                                   struct packed_git *p,
-                                   struct pack_idx_entry **obj_list)
-{
-       struct progress *progress_state;
-       unsigned int i;
-
-       v4->commit_ident_table = create_dict_table();
-       v4->tree_path_table = create_dict_table();
-
-       progress_state = start_progress("Scanning objects", p->num_objects);
-       for (i = 0; i < p->num_objects; i++) {
-               struct pack_idx_entry *obj = obj_list[i];
-               void *data;
-               enum object_type type;
-               unsigned long size;
-               struct object_info oi = {};
-               int (*add_dict_entries)(struct dict_table *, void *, unsigned 
long);
-               struct dict_table *dict;
-
-               display_progress(progress_state, i+1);
-
-               oi.typep = &type;
-               oi.sizep = &size;
-               if (packed_object_info(p, obj->offset, &oi) < 0)
-                       die("cannot get type of %s from %s",
-                           sha1_to_hex(obj->sha1), p->pack_name);
-
-               switch (type) {
-               case OBJ_COMMIT:
-                       add_dict_entries = add_commit_dict_entries;
-                       dict = v4->commit_ident_table;
-                       break;
-               case OBJ_TREE:
-                       add_dict_entries = add_tree_dict_entries;
-                       dict = v4->tree_path_table;
-                       break;
-               default:
-                       continue;
-               }
-               data = unpack_entry(p, obj->offset, &type, &size);
-               if (!data)
-                       die("cannot unpack %s from %s",
-                           sha1_to_hex(obj->sha1), p->pack_name);
-               if (check_sha1_signature(obj->sha1, data, size, typename(type)))
-                       die("packed %s from %s is corrupt",
-                           sha1_to_hex(obj->sha1), p->pack_name);
-               if (add_dict_entries(dict, data, size) < 0)
-                       die("can't process %s object %s",
-                               typename(type), sha1_to_hex(obj->sha1));
-               free(data);
-       }
-
-       stop_progress(&progress_state);
-       return 0;
-}
-
 static unsigned long write_dict_table(struct sha1file *f, struct dict_table *t)
 {
        unsigned char buffer[1024];
@@ -757,28 +649,6 @@ static unsigned long write_dict_table(struct sha1file *f, 
struct dict_table *t)
        return hdrlen + datalen;
 }
 
-static struct sha1file * packv4_open(char *path)
-{
-       int fd;
-
-       fd = open(path, O_CREAT|O_EXCL|O_WRONLY, 0600);
-       if (fd < 0)
-               die_errno("unable to create '%s'", path);
-       return sha1fd(fd, path);
-}
-
-static unsigned int packv4_write_header(struct sha1file *f, unsigned 
nr_objects)
-{
-       struct pack_header hdr;
-
-       hdr.hdr_signature = htonl(PACK_SIGNATURE);
-       hdr.hdr_version = htonl(4);
-       hdr.hdr_entries = htonl(nr_objects);
-       sha1write(f, &hdr, sizeof(hdr));
-
-       return sizeof(hdr);
-}
-
 unsigned long packv4_write_tables(struct sha1file *f,
                                  const struct packv4_tables *v4)
 {
@@ -802,350 +672,3 @@ unsigned long packv4_write_tables(struct sha1file *f,
 
        return written;
 }
-
-static int write_object_header(struct sha1file *f, enum object_type type, 
unsigned long size)
-{
-       unsigned char buf[16];
-       uint64_t val;
-       int len;
-
-       /*
-        * We really have only one kind of delta object.
-        */
-       if (type == OBJ_OFS_DELTA)
-               type = OBJ_REF_DELTA;
-
-       /*
-        * We allocate 4 bits in the LSB for the object type which should
-        * be good for quite a while, given that we effectively encodes
-        * only 5 object types: commit, tree, blob, delta, tag.
-        */
-       val = size;
-       if (MSB(val, 4))
-               die("fixme: the code doesn't currently cope with big sizes");
-       val <<= 4;
-       val |= type;
-       len = encode_varint(val, buf);
-       sha1write(f, buf, len);
-       return len;
-}
-
-static unsigned long copy_object_data(struct packv4_tables *v4,
-                                     struct sha1file *f, struct packed_git *p,
-                                     off_t offset)
-{
-       struct pack_window *w_curs = NULL;
-       struct revindex_entry *revidx;
-       enum object_type type;
-       unsigned long avail, size, datalen, written;
-       int hdrlen, reflen, idx_nr;
-       unsigned char *src, buf[24];
-
-       revidx = find_pack_revindex(p, offset);
-       idx_nr = revidx->nr;
-       datalen = revidx[1].offset - offset;
-
-       src = use_pack(p, &w_curs, offset, &avail);
-       hdrlen = unpack_object_header_buffer(src, avail, &type, &size);
-
-       written = write_object_header(f, type, size);
-
-       if (type == OBJ_OFS_DELTA) {
-               const unsigned char *cp = src + hdrlen;
-               off_t base_offset = decode_varint(&cp);
-               hdrlen = cp - src;
-               base_offset = offset - base_offset;
-               if (base_offset <= 0 || base_offset >= offset)
-                       die("delta offset out of bound");
-               revidx = find_pack_revindex(p, base_offset);
-               reflen = encode_sha1ref(v4,
-                                       nth_packed_object_sha1(p, revidx->nr),
-                                       buf);
-               sha1write(f, buf, reflen);
-               written += reflen;
-       } else if (type == OBJ_REF_DELTA) {
-               reflen = encode_sha1ref(v4, src + hdrlen, buf);
-               hdrlen += 20;
-               sha1write(f, buf, reflen);
-               written += reflen;
-       }
-
-       if (p->index_version > 1 &&
-           check_pack_crc(p, &w_curs, offset, datalen, idx_nr))
-               die("bad CRC for object at offset %"PRIuMAX" in %s",
-                   (uintmax_t)offset, p->pack_name);
-
-       offset += hdrlen;
-       datalen -= hdrlen;
-
-       while (datalen) {
-               src = use_pack(p, &w_curs, offset, &avail);
-               if (avail > datalen)
-                       avail = datalen;
-               sha1write(f, src, avail);
-               written += avail;
-               offset += avail;
-               datalen -= avail;
-       }
-       unuse_pack(&w_curs);
-
-       return written;
-}
-
-static unsigned char *get_delta_base(struct packed_git *p, off_t offset,
-                                    unsigned char *sha1_buf)
-{
-       struct pack_window *w_curs = NULL;
-       enum object_type type;
-       unsigned long avail, size;
-       int hdrlen;
-       unsigned char *src;
-       const unsigned char *base_sha1 = NULL; ;
-
-       src = use_pack(p, &w_curs, offset, &avail);
-       hdrlen = unpack_object_header_buffer(src, avail, &type, &size);
-
-       if (type == OBJ_OFS_DELTA) {
-               const unsigned char *cp = src + hdrlen;
-               off_t base_offset = decode_varint(&cp);
-               base_offset = offset - base_offset;
-               if (base_offset <= 0 || base_offset >= offset) {
-                       error("delta offset out of bound");
-               } else {
-                       struct revindex_entry *revidx;
-                       revidx = find_pack_revindex(p, base_offset);
-                       base_sha1 = nth_packed_object_sha1(p, revidx->nr);
-               }
-       } else if (type == OBJ_REF_DELTA) {
-               base_sha1 = src + hdrlen;
-       } else
-               error("expected to get a delta but got a %s", typename(type));
-
-       unuse_pack(&w_curs);
-
-       if (!base_sha1)
-               return NULL;
-       hashcpy(sha1_buf, base_sha1);
-       return sha1_buf;
-}
-
-static off_t packv4_write_object(struct packv4_tables *v4,
-                                struct sha1file *f, struct packed_git *p,
-                                struct pack_idx_entry *obj)
-{
-       void *src, *result;
-       struct object_info oi = {};
-       enum object_type type, packed_type;
-       unsigned long obj_size, buf_size;
-       unsigned int hdrlen;
-
-       oi.typep = &type;
-       oi.sizep = &obj_size;
-       packed_type = packed_object_info(p, obj->offset, &oi);
-       if (packed_type < 0)
-               die("cannot get type of %s from %s",
-                   sha1_to_hex(obj->sha1), p->pack_name);
-
-       /* Some objects are copied without decompression */
-       switch (type) {
-       case OBJ_COMMIT:
-       case OBJ_TREE:
-               break;
-       default:
-               return copy_object_data(v4, f, p, obj->offset);
-       }
-
-       /* The rest is converted into their new format */
-       src = unpack_entry(p, obj->offset, &type, &buf_size);
-       if (!src || obj_size != buf_size)
-               die("cannot unpack %s from %s",
-                   sha1_to_hex(obj->sha1), p->pack_name);
-       if (check_sha1_signature(obj->sha1, src, buf_size, typename(type)))
-               die("packed %s from %s is corrupt",
-                   sha1_to_hex(obj->sha1), p->pack_name);
-
-       switch (type) {
-       case OBJ_COMMIT:
-               result = pv4_encode_commit(v4, src, &buf_size);
-               break;
-       case OBJ_TREE:
-               if (packed_type != OBJ_TREE) {
-                       unsigned char sha1_buf[20], *ref_sha1;
-                       void *ref;
-                       enum object_type ref_type;
-                       unsigned long ref_size;
-
-                       ref_sha1 = get_delta_base(p, obj->offset, sha1_buf);
-                       if (!ref_sha1)
-                               die("unable to get delta base sha1 for %s",
-                                               sha1_to_hex(obj->sha1));
-                       ref = read_sha1_file(ref_sha1, &ref_type, &ref_size);
-                       if (!ref || ref_type != OBJ_TREE)
-                               die("cannot obtain delta base for %s",
-                                               sha1_to_hex(obj->sha1));
-                       result = pv4_encode_tree(v4, src, &buf_size,
-                                                ref, ref_size, ref_sha1);
-                       free(ref);
-               } else {
-                       result = pv4_encode_tree(v4, src, &buf_size,
-                                                NULL, 0, NULL);
-               }
-               break;
-       default:
-               die("unexpected object type %d", type);
-       }
-       free(src);
-       if (!result) {
-               warning("can't convert %s object %s",
-                       typename(type), sha1_to_hex(obj->sha1));
-               /* fall back to copy the object in its original form */
-               return copy_object_data(v4, f, p, obj->offset);
-       }
-
-       /* Use bit 3 to indicate a special type encoding */
-       type += 8;
-       hdrlen = write_object_header(f, type, obj_size);
-       sha1write(f, result, buf_size);
-       free(result);
-       return hdrlen + buf_size;
-}
-
-static char *normalize_pack_name(const char *path)
-{
-       char buf[PATH_MAX];
-       int len;
-
-       len = strlcpy(buf, path, PATH_MAX);
-       if (len >= PATH_MAX - 6)
-               die("name too long: %s", path);
-
-       /*
-        * In addition to "foo.idx" we accept "foo.pack" and "foo";
-        * normalize these forms to "foo.pack".
-        */
-       if (has_extension(buf, ".idx")) {
-               strcpy(buf + len - 4, ".pack");
-               len++;
-       } else if (!has_extension(buf, ".pack")) {
-               strcpy(buf + len, ".pack");
-               len += 5;
-       }
-
-       return xstrdup(buf);
-}
-
-static struct packed_git *open_pack(const char *path)
-{
-       char *packname = normalize_pack_name(path);
-       int len = strlen(packname);
-       struct packed_git *p;
-
-       strcpy(packname + len - 5, ".idx");
-       p = add_packed_git(packname, len - 1, 1);
-       if (!p)
-               die("packfile %s not found.", packname);
-
-       install_packed_git(p);
-       if (open_pack_index(p))
-               die("packfile %s index not opened", p->pack_name);
-
-       free(packname);
-       return p;
-}
-
-static void process_one_pack(struct packv4_tables *v4, char *src_pack, char 
*dst_pack)
-{
-       struct packed_git *p;
-       struct sha1file *f;
-       struct pack_idx_entry *objs, **p_objs;
-       struct pack_idx_option idx_opts;
-       unsigned i, nr_objects;
-       off_t written = 0;
-       char *packname;
-       unsigned char pack_sha1[20];
-       struct progress *progress_state;
-
-       p = open_pack(src_pack);
-       if (!p)
-               die("unable to open source pack");
-
-       nr_objects = p->num_objects;
-       objs = get_packed_object_list(p);
-       p_objs = sort_objs_by_offset(objs, nr_objects);
-
-       create_pack_dictionaries(v4, p, p_objs);
-       sort_dict_entries_by_hits(v4->commit_ident_table);
-       sort_dict_entries_by_hits(v4->tree_path_table);
-
-       packname = normalize_pack_name(dst_pack);
-       f = packv4_open(packname);
-       if (!f)
-               die("unable to open destination pack");
-       written += packv4_write_header(f, nr_objects);
-       written += packv4_write_tables(f, v4);
-
-       /* Let's write objects out, updating the object index list in place */
-       progress_state = start_progress("Writing objects", nr_objects);
-       v4->all_objs = objs;
-       v4->all_objs_nr = nr_objects;
-       for (i = 0; i < nr_objects; i++) {
-               off_t obj_pos = written;
-               struct pack_idx_entry *obj = p_objs[i];
-               crc32_begin(f);
-               written += packv4_write_object(v4, f, p, obj);
-               obj->offset = obj_pos;
-               obj->crc32 = crc32_end(f);
-               display_progress(progress_state, i+1);
-       }
-       stop_progress(&progress_state);
-
-       sha1close(f, pack_sha1, CSUM_CLOSE | CSUM_FSYNC);
-
-       reset_pack_idx_option(&idx_opts);
-       idx_opts.version = 3;
-       strcpy(packname + strlen(packname) - 5, ".idx");
-       write_idx_file(packname, p_objs, nr_objects, &idx_opts, pack_sha1);
-
-       free(packname);
-}
-
-static int git_pack_config(const char *k, const char *v, void *cb)
-{
-       if (!strcmp(k, "pack.compression")) {
-               int level = git_config_int(k, v);
-               if (level == -1)
-                       level = Z_DEFAULT_COMPRESSION;
-               else if (level < 0 || level > Z_BEST_COMPRESSION)
-                       die("bad pack compression level %d", level);
-               pack_compression_level = level;
-               pack_compression_seen = 1;
-               return 0;
-       }
-       return git_default_config(k, v, cb);
-}
-
-int main(int argc, char *argv[])
-{
-       struct packv4_tables v4;
-       char *src_pack, *dst_pack;
-
-       if (argc == 3) {
-               src_pack = argv[1];
-               dst_pack = argv[2];
-       } else if (argc == 4 && !prefixcmp(argv[1], "--min-tree-copy=")) {
-               min_tree_copy = atoi(argv[1] + strlen("--min-tree-copy="));
-               src_pack = argv[2];
-               dst_pack = argv[3];
-       } else {
-               fprintf(stderr, "Usage: %s [--min-tree-copy=<n>] <src_packfile> 
<dst_packfile>\n", argv[0]);
-               exit(1);
-       }
-
-       git_config(git_pack_config, NULL);
-       if (!pack_compression_seen && core_compression_seen)
-               pack_compression_level = core_compression_level;
-       process_one_pack(&v4, src_pack, dst_pack);
-       if (0)
-               dict_dump(&v4);
-       return 0;
-}
diff --git a/packv4-create.h b/packv4-create.h
index 0c8c77b..c1f32fd 100644
--- a/packv4-create.h
+++ b/packv4-create.h
@@ -8,4 +8,43 @@ struct packv4_tables {
        struct dict_table *tree_path_table;
 };
 
+struct dict_table {
+       unsigned char *data;
+       unsigned cur_offset;
+       unsigned size;
+       struct data_entry *entry;
+       unsigned nb_entries;
+       unsigned max_entries;
+       unsigned *hash;
+       unsigned hash_size;
+};
+
+
+struct sha1file;
+
+struct dict_table *create_dict_table(void);
+int dict_add_entry(struct dict_table *t, int val, const char *str, int 
str_len);
+void destroy_dict_table(struct dict_table *t);
+void dict_dump(struct packv4_tables *v4);
+
+int add_commit_dict_entries(struct dict_table *commit_ident_table,
+                           void *buf, unsigned long size);
+int add_tree_dict_entries(struct dict_table *tree_path_table,
+                         void *buf, unsigned long size);
+void sort_dict_entries_by_hits(struct dict_table *t);
+
+int encode_sha1ref(const struct packv4_tables *v4,
+                  const unsigned char *sha1, unsigned char *buf);
+unsigned long packv4_write_tables(struct sha1file *f,
+                                 const struct packv4_tables *v4);
+void *pv4_encode_commit(const struct packv4_tables *v4,
+                       void *buffer, unsigned long *sizep);
+void *pv4_encode_tree(const struct packv4_tables *v4,
+                     void *_buffer, unsigned long *sizep,
+                     void *delta, unsigned long delta_size,
+                     const unsigned char *delta_sha1);
+
+void process_one_pack(struct packv4_tables *v4,
+                     char *src_pack, char *dst_pack);
+
 #endif
diff --git a/test-packv4.c b/test-packv4.c
new file mode 100644
index 0000000..3b0d7a2
--- /dev/null
+++ b/test-packv4.c
@@ -0,0 +1,476 @@
+#include "cache.h"
+#include "pack.h"
+#include "pack-revindex.h"
+#include "progress.h"
+#include "varint.h"
+#include "packv4-create.h"
+
+extern int pack_compression_seen;
+extern int pack_compression_level;
+extern int min_tree_copy;
+
+static struct pack_idx_entry *get_packed_object_list(struct packed_git *p)
+{
+       unsigned i, nr_objects = p->num_objects;
+       struct pack_idx_entry *objects;
+
+       objects = xmalloc((nr_objects + 1) * sizeof(*objects));
+       objects[nr_objects].offset = p->pack_size - 20;
+       for (i = 0; i < nr_objects; i++) {
+               hashcpy(objects[i].sha1, nth_packed_object_sha1(p, i));
+               objects[i].offset = nth_packed_object_offset(p, i);
+       }
+
+       return objects;
+}
+
+static int sort_by_offset(const void *e1, const void *e2)
+{
+       const struct pack_idx_entry * const *entry1 = e1;
+       const struct pack_idx_entry * const *entry2 = e2;
+       if ((*entry1)->offset < (*entry2)->offset)
+               return -1;
+       if ((*entry1)->offset > (*entry2)->offset)
+               return 1;
+       return 0;
+}
+
+static struct pack_idx_entry **sort_objs_by_offset(struct pack_idx_entry *list,
+                                                   unsigned nr_objects)
+{
+       unsigned i;
+       struct pack_idx_entry **sorted;
+
+       sorted = xmalloc((nr_objects + 1) * sizeof(*sorted));
+       for (i = 0; i < nr_objects + 1; i++)
+               sorted[i] = &list[i];
+       qsort(sorted, nr_objects + 1, sizeof(*sorted), sort_by_offset);
+
+       return sorted;
+}
+
+static int create_pack_dictionaries(struct packv4_tables *v4,
+                                   struct packed_git *p,
+                                   struct pack_idx_entry **obj_list)
+{
+       struct progress *progress_state;
+       unsigned int i;
+
+       v4->commit_ident_table = create_dict_table();
+       v4->tree_path_table = create_dict_table();
+
+       progress_state = start_progress("Scanning objects", p->num_objects);
+       for (i = 0; i < p->num_objects; i++) {
+               struct pack_idx_entry *obj = obj_list[i];
+               void *data;
+               enum object_type type;
+               unsigned long size;
+               struct object_info oi = {};
+               int (*add_dict_entries)(struct dict_table *, void *, unsigned 
long);
+               struct dict_table *dict;
+
+               display_progress(progress_state, i+1);
+
+               oi.typep = &type;
+               oi.sizep = &size;
+               if (packed_object_info(p, obj->offset, &oi) < 0)
+                       die("cannot get type of %s from %s",
+                           sha1_to_hex(obj->sha1), p->pack_name);
+
+               switch (type) {
+               case OBJ_COMMIT:
+                       add_dict_entries = add_commit_dict_entries;
+                       dict = v4->commit_ident_table;
+                       break;
+               case OBJ_TREE:
+                       add_dict_entries = add_tree_dict_entries;
+                       dict = v4->tree_path_table;
+                       break;
+               default:
+                       continue;
+               }
+               data = unpack_entry(p, obj->offset, &type, &size);
+               if (!data)
+                       die("cannot unpack %s from %s",
+                           sha1_to_hex(obj->sha1), p->pack_name);
+               if (check_sha1_signature(obj->sha1, data, size, typename(type)))
+                       die("packed %s from %s is corrupt",
+                           sha1_to_hex(obj->sha1), p->pack_name);
+               if (add_dict_entries(dict, data, size) < 0)
+                       die("can't process %s object %s",
+                               typename(type), sha1_to_hex(obj->sha1));
+               free(data);
+       }
+
+       stop_progress(&progress_state);
+       return 0;
+}
+
+static struct sha1file * packv4_open(char *path)
+{
+       int fd;
+
+       fd = open(path, O_CREAT|O_EXCL|O_WRONLY, 0600);
+       if (fd < 0)
+               die_errno("unable to create '%s'", path);
+       return sha1fd(fd, path);
+}
+
+static unsigned int packv4_write_header(struct sha1file *f, unsigned 
nr_objects)
+{
+       struct pack_header hdr;
+
+       hdr.hdr_signature = htonl(PACK_SIGNATURE);
+       hdr.hdr_version = htonl(4);
+       hdr.hdr_entries = htonl(nr_objects);
+       sha1write(f, &hdr, sizeof(hdr));
+
+       return sizeof(hdr);
+}
+
+static int write_object_header(struct sha1file *f, enum object_type type, 
unsigned long size)
+{
+       unsigned char buf[16];
+       uint64_t val;
+       int len;
+
+       /*
+        * We really have only one kind of delta object.
+        */
+       if (type == OBJ_OFS_DELTA)
+               type = OBJ_REF_DELTA;
+
+       /*
+        * We allocate 4 bits in the LSB for the object type which should
+        * be good for quite a while, given that we effectively encodes
+        * only 5 object types: commit, tree, blob, delta, tag.
+        */
+       val = size;
+       if (MSB(val, 4))
+               die("fixme: the code doesn't currently cope with big sizes");
+       val <<= 4;
+       val |= type;
+       len = encode_varint(val, buf);
+       sha1write(f, buf, len);
+       return len;
+}
+
+static unsigned long copy_object_data(struct packv4_tables *v4,
+                                     struct sha1file *f, struct packed_git *p,
+                                     off_t offset)
+{
+       struct pack_window *w_curs = NULL;
+       struct revindex_entry *revidx;
+       enum object_type type;
+       unsigned long avail, size, datalen, written;
+       int hdrlen, reflen, idx_nr;
+       unsigned char *src, buf[24];
+
+       revidx = find_pack_revindex(p, offset);
+       idx_nr = revidx->nr;
+       datalen = revidx[1].offset - offset;
+
+       src = use_pack(p, &w_curs, offset, &avail);
+       hdrlen = unpack_object_header_buffer(src, avail, &type, &size);
+
+       written = write_object_header(f, type, size);
+
+       if (type == OBJ_OFS_DELTA) {
+               const unsigned char *cp = src + hdrlen;
+               off_t base_offset = decode_varint(&cp);
+               hdrlen = cp - src;
+               base_offset = offset - base_offset;
+               if (base_offset <= 0 || base_offset >= offset)
+                       die("delta offset out of bound");
+               revidx = find_pack_revindex(p, base_offset);
+               reflen = encode_sha1ref(v4,
+                                       nth_packed_object_sha1(p, revidx->nr),
+                                       buf);
+               sha1write(f, buf, reflen);
+               written += reflen;
+       } else if (type == OBJ_REF_DELTA) {
+               reflen = encode_sha1ref(v4, src + hdrlen, buf);
+               hdrlen += 20;
+               sha1write(f, buf, reflen);
+               written += reflen;
+       }
+
+       if (p->index_version > 1 &&
+           check_pack_crc(p, &w_curs, offset, datalen, idx_nr))
+               die("bad CRC for object at offset %"PRIuMAX" in %s",
+                   (uintmax_t)offset, p->pack_name);
+
+       offset += hdrlen;
+       datalen -= hdrlen;
+
+       while (datalen) {
+               src = use_pack(p, &w_curs, offset, &avail);
+               if (avail > datalen)
+                       avail = datalen;
+               sha1write(f, src, avail);
+               written += avail;
+               offset += avail;
+               datalen -= avail;
+       }
+       unuse_pack(&w_curs);
+
+       return written;
+}
+
+static unsigned char *get_delta_base(struct packed_git *p, off_t offset,
+                                    unsigned char *sha1_buf)
+{
+       struct pack_window *w_curs = NULL;
+       enum object_type type;
+       unsigned long avail, size;
+       int hdrlen;
+       unsigned char *src;
+       const unsigned char *base_sha1 = NULL; ;
+
+       src = use_pack(p, &w_curs, offset, &avail);
+       hdrlen = unpack_object_header_buffer(src, avail, &type, &size);
+
+       if (type == OBJ_OFS_DELTA) {
+               const unsigned char *cp = src + hdrlen;
+               off_t base_offset = decode_varint(&cp);
+               base_offset = offset - base_offset;
+               if (base_offset <= 0 || base_offset >= offset) {
+                       error("delta offset out of bound");
+               } else {
+                       struct revindex_entry *revidx;
+                       revidx = find_pack_revindex(p, base_offset);
+                       base_sha1 = nth_packed_object_sha1(p, revidx->nr);
+               }
+       } else if (type == OBJ_REF_DELTA) {
+               base_sha1 = src + hdrlen;
+       } else
+               error("expected to get a delta but got a %s", typename(type));
+
+       unuse_pack(&w_curs);
+
+       if (!base_sha1)
+               return NULL;
+       hashcpy(sha1_buf, base_sha1);
+       return sha1_buf;
+}
+
+static off_t packv4_write_object(struct packv4_tables *v4,
+                                struct sha1file *f, struct packed_git *p,
+                                struct pack_idx_entry *obj)
+{
+       void *src, *result;
+       struct object_info oi = {};
+       enum object_type type, packed_type;
+       unsigned long obj_size, buf_size;
+       unsigned int hdrlen;
+
+       oi.typep = &type;
+       oi.sizep = &obj_size;
+       packed_type = packed_object_info(p, obj->offset, &oi);
+       if (packed_type < 0)
+               die("cannot get type of %s from %s",
+                   sha1_to_hex(obj->sha1), p->pack_name);
+
+       /* Some objects are copied without decompression */
+       switch (type) {
+       case OBJ_COMMIT:
+       case OBJ_TREE:
+               break;
+       default:
+               return copy_object_data(v4, f, p, obj->offset);
+       }
+
+       /* The rest is converted into their new format */
+       src = unpack_entry(p, obj->offset, &type, &buf_size);
+       if (!src || obj_size != buf_size)
+               die("cannot unpack %s from %s",
+                   sha1_to_hex(obj->sha1), p->pack_name);
+       if (check_sha1_signature(obj->sha1, src, buf_size, typename(type)))
+               die("packed %s from %s is corrupt",
+                   sha1_to_hex(obj->sha1), p->pack_name);
+
+       switch (type) {
+       case OBJ_COMMIT:
+               result = pv4_encode_commit(v4, src, &buf_size);
+               break;
+       case OBJ_TREE:
+               if (packed_type != OBJ_TREE) {
+                       unsigned char sha1_buf[20], *ref_sha1;
+                       void *ref;
+                       enum object_type ref_type;
+                       unsigned long ref_size;
+
+                       ref_sha1 = get_delta_base(p, obj->offset, sha1_buf);
+                       if (!ref_sha1)
+                               die("unable to get delta base sha1 for %s",
+                                               sha1_to_hex(obj->sha1));
+                       ref = read_sha1_file(ref_sha1, &ref_type, &ref_size);
+                       if (!ref || ref_type != OBJ_TREE)
+                               die("cannot obtain delta base for %s",
+                                               sha1_to_hex(obj->sha1));
+                       result = pv4_encode_tree(v4, src, &buf_size,
+                                                ref, ref_size, ref_sha1);
+                       free(ref);
+               } else {
+                       result = pv4_encode_tree(v4, src, &buf_size,
+                                                NULL, 0, NULL);
+               }
+               break;
+       default:
+               die("unexpected object type %d", type);
+       }
+       free(src);
+       if (!result) {
+               warning("can't convert %s object %s",
+                       typename(type), sha1_to_hex(obj->sha1));
+               /* fall back to copy the object in its original form */
+               return copy_object_data(v4, f, p, obj->offset);
+       }
+
+       /* Use bit 3 to indicate a special type encoding */
+       type += 8;
+       hdrlen = write_object_header(f, type, obj_size);
+       sha1write(f, result, buf_size);
+       free(result);
+       return hdrlen + buf_size;
+}
+
+static char *normalize_pack_name(const char *path)
+{
+       char buf[PATH_MAX];
+       int len;
+
+       len = strlcpy(buf, path, PATH_MAX);
+       if (len >= PATH_MAX - 6)
+               die("name too long: %s", path);
+
+       /*
+        * In addition to "foo.idx" we accept "foo.pack" and "foo";
+        * normalize these forms to "foo.pack".
+        */
+       if (has_extension(buf, ".idx")) {
+               strcpy(buf + len - 4, ".pack");
+               len++;
+       } else if (!has_extension(buf, ".pack")) {
+               strcpy(buf + len, ".pack");
+               len += 5;
+       }
+
+       return xstrdup(buf);
+}
+
+static struct packed_git *open_pack(const char *path)
+{
+       char *packname = normalize_pack_name(path);
+       int len = strlen(packname);
+       struct packed_git *p;
+
+       strcpy(packname + len - 5, ".idx");
+       p = add_packed_git(packname, len - 1, 1);
+       if (!p)
+               die("packfile %s not found.", packname);
+
+       install_packed_git(p);
+       if (open_pack_index(p))
+               die("packfile %s index not opened", p->pack_name);
+
+       free(packname);
+       return p;
+}
+
+void process_one_pack(struct packv4_tables *v4, char *src_pack, char *dst_pack)
+{
+       struct packed_git *p;
+       struct sha1file *f;
+       struct pack_idx_entry *objs, **p_objs;
+       struct pack_idx_option idx_opts;
+       unsigned i, nr_objects;
+       off_t written = 0;
+       char *packname;
+       unsigned char pack_sha1[20];
+       struct progress *progress_state;
+
+       p = open_pack(src_pack);
+       if (!p)
+               die("unable to open source pack");
+
+       nr_objects = p->num_objects;
+       objs = get_packed_object_list(p);
+       p_objs = sort_objs_by_offset(objs, nr_objects);
+
+       create_pack_dictionaries(v4, p, p_objs);
+       sort_dict_entries_by_hits(v4->commit_ident_table);
+       sort_dict_entries_by_hits(v4->tree_path_table);
+
+       packname = normalize_pack_name(dst_pack);
+       f = packv4_open(packname);
+       if (!f)
+               die("unable to open destination pack");
+       written += packv4_write_header(f, nr_objects);
+       written += packv4_write_tables(f, v4);
+
+       /* Let's write objects out, updating the object index list in place */
+       progress_state = start_progress("Writing objects", nr_objects);
+       v4->all_objs = objs;
+       v4->all_objs_nr = nr_objects;
+       for (i = 0; i < nr_objects; i++) {
+               off_t obj_pos = written;
+               struct pack_idx_entry *obj = p_objs[i];
+               crc32_begin(f);
+               written += packv4_write_object(v4, f, p, obj);
+               obj->offset = obj_pos;
+               obj->crc32 = crc32_end(f);
+               display_progress(progress_state, i+1);
+       }
+       stop_progress(&progress_state);
+
+       sha1close(f, pack_sha1, CSUM_CLOSE | CSUM_FSYNC);
+
+       reset_pack_idx_option(&idx_opts);
+       idx_opts.version = 3;
+       strcpy(packname + strlen(packname) - 5, ".idx");
+       write_idx_file(packname, p_objs, nr_objects, &idx_opts, pack_sha1);
+
+       free(packname);
+}
+
+static int git_pack_config(const char *k, const char *v, void *cb)
+{
+       if (!strcmp(k, "pack.compression")) {
+               int level = git_config_int(k, v);
+               if (level == -1)
+                       level = Z_DEFAULT_COMPRESSION;
+               else if (level < 0 || level > Z_BEST_COMPRESSION)
+                       die("bad pack compression level %d", level);
+               pack_compression_level = level;
+               pack_compression_seen = 1;
+               return 0;
+       }
+       return git_default_config(k, v, cb);
+}
+
+int main(int argc, char *argv[])
+{
+       struct packv4_tables v4;
+       char *src_pack, *dst_pack;
+
+       if (argc == 3) {
+               src_pack = argv[1];
+               dst_pack = argv[2];
+       } else if (argc == 4 && !prefixcmp(argv[1], "--min-tree-copy=")) {
+               min_tree_copy = atoi(argv[1] + strlen("--min-tree-copy="));
+               src_pack = argv[2];
+               dst_pack = argv[3];
+       } else {
+               fprintf(stderr, "Usage: %s [--min-tree-copy=<n>] <src_packfile> 
<dst_packfile>\n", argv[0]);
+               exit(1);
+       }
+
+       git_config(git_pack_config, NULL);
+       if (!pack_compression_seen && core_compression_seen)
+               pack_compression_level = core_compression_level;
+       process_one_pack(&v4, src_pack, dst_pack);
+       if (0)
+               dict_dump(&v4);
+       return 0;
+}
-- 
1.8.2.83.gc99314b

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to