Thomas Rast <[email protected]> writes:
> Similar to the recursion in packed_object_info(), this leads to
> problems on stack-space-constrained systems in the presence of long
> delta chains.
>
> We proceed in three phases:
>
> 1. Dig through the delta chain, saving each delta object's offsets and
> size on an ad-hoc stack.
>
> 2. Unpack the base object at the bottom.
>
> 3. Unpack and apply the deltas from the stack.
>
> Signed-off-by: Thomas Rast <[email protected]>
> ---
Sounds sensible.
Do we keep the packfiles open that hold the deltas involved in the
chain so that they won't be removed by a concurrent repack? I do
not think this rewrite changes anything with respect to that access
pattern, though.
Will replace and merge to 'next'. Thanks.
> sha1_file.c | 231
> +++++++++++++++++++++++++++++++++++++++---------------------
> 1 file changed, 150 insertions(+), 81 deletions(-)
>
> diff --git a/sha1_file.c b/sha1_file.c
> index da41f51..f9191aa 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -1943,68 +1943,6 @@ static void add_delta_base_cache(struct packed_git *p,
> off_t base_offset,
> static void *read_object(const unsigned char *sha1, enum object_type *type,
> unsigned long *size);
>
> -static void *unpack_delta_entry(struct packed_git *p,
> - struct pack_window **w_curs,
> - off_t curpos,
> - unsigned long delta_size,
> - off_t obj_offset,
> - enum object_type *type,
> - unsigned long *sizep)
> -{
> - void *delta_data, *result, *base;
> - unsigned long base_size;
> - off_t base_offset;
> -
> - base_offset = get_delta_base(p, w_curs, &curpos, *type, obj_offset);
> - if (!base_offset) {
> - error("failed to validate delta base reference "
> - "at offset %"PRIuMAX" from %s",
> - (uintmax_t)curpos, p->pack_name);
> - return NULL;
> - }
> - unuse_pack(w_curs);
> - base = cache_or_unpack_entry(p, base_offset, &base_size, type, 0);
> - if (!base) {
> - /*
> - * We're probably in deep shit, but let's try to fetch
> - * the required base anyway from another pack or loose.
> - * This is costly but should happen only in the presence
> - * of a corrupted pack, and is better than failing outright.
> - */
> - struct revindex_entry *revidx;
> - const unsigned char *base_sha1;
> - revidx = find_pack_revindex(p, base_offset);
> - if (!revidx)
> - return NULL;
> - base_sha1 = nth_packed_object_sha1(p, revidx->nr);
> - error("failed to read delta base object %s"
> - " at offset %"PRIuMAX" from %s",
> - sha1_to_hex(base_sha1), (uintmax_t)base_offset,
> - p->pack_name);
> - mark_bad_packed_object(p, base_sha1);
> - base = read_object(base_sha1, type, &base_size);
> - if (!base)
> - return NULL;
> - }
> -
> - delta_data = unpack_compressed_entry(p, w_curs, curpos, delta_size);
> - if (!delta_data) {
> - error("failed to unpack compressed delta "
> - "at offset %"PRIuMAX" from %s",
> - (uintmax_t)curpos, p->pack_name);
> - free(base);
> - return NULL;
> - }
> - result = patch_delta(base, base_size,
> - delta_data, delta_size,
> - sizep);
> - if (!result)
> - die("failed to apply delta");
> - free(delta_data);
> - add_delta_base_cache(p, base_offset, base, base_size, *type);
> - return result;
> -}
> -
> static void write_pack_access_log(struct packed_git *p, off_t obj_offset)
> {
> static FILE *log_file;
> @@ -2025,48 +1963,179 @@ static void write_pack_access_log(struct packed_git
> *p, off_t obj_offset)
>
> int do_check_packed_object_crc;
>
> +#define UNPACK_ENTRY_STACK_PREALLOC 64
> +struct unpack_entry_stack_ent {
> + off_t obj_offset;
> + off_t curpos;
> + unsigned long size;
> +};
> +
> void *unpack_entry(struct packed_git *p, off_t obj_offset,
> - enum object_type *type, unsigned long *sizep)
> + enum object_type *final_type, unsigned long *final_size)
> {
> struct pack_window *w_curs = NULL;
> off_t curpos = obj_offset;
> - void *data;
> + void *data = NULL;
> + unsigned long size;
> + enum object_type type;
> + struct unpack_entry_stack_ent
> small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC];
> + struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
> + int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
> + int base_from_cache = 0;
>
> if (log_pack_access)
> write_pack_access_log(p, obj_offset);
>
> - if (do_check_packed_object_crc && p->index_version > 1) {
> - struct revindex_entry *revidx = find_pack_revindex(p,
> obj_offset);
> - unsigned long len = revidx[1].offset - obj_offset;
> - if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
> - const unsigned char *sha1 =
> - nth_packed_object_sha1(p, revidx->nr);
> - error("bad packed object CRC for %s",
> - sha1_to_hex(sha1));
> - mark_bad_packed_object(p, sha1);
> - unuse_pack(&w_curs);
> - return NULL;
> + /* PHASE 1: drill down to the innermost base object */
> + for (;;) {
> + off_t base_offset;
> + int i;
> + struct delta_base_cache_entry *ent;
> +
> + if (do_check_packed_object_crc && p->index_version > 1) {
> + struct revindex_entry *revidx = find_pack_revindex(p,
> obj_offset);
> + unsigned long len = revidx[1].offset - obj_offset;
> + if (check_pack_crc(p, &w_curs, obj_offset, len,
> revidx->nr)) {
> + const unsigned char *sha1 =
> + nth_packed_object_sha1(p, revidx->nr);
> + error("bad packed object CRC for %s",
> + sha1_to_hex(sha1));
> + mark_bad_packed_object(p, sha1);
> + unuse_pack(&w_curs);
> + return NULL;
> + }
> + }
> +
> + ent = get_delta_base_cache_entry(p, curpos);
> + if (eq_delta_base_cache_entry(ent, p, curpos)) {
> + type = ent->type;
> + data = ent->data;
> + size = ent->size;
> + clear_delta_base_cache_entry(ent);
> + base_from_cache = 1;
> + break;
> + }
> +
> + type = unpack_object_header(p, &w_curs, &curpos, &size);
> + if (type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA)
> + break;
> +
> + base_offset = get_delta_base(p, &w_curs, &curpos, type,
> obj_offset);
> + if (!base_offset) {
> + error("failed to validate delta base reference "
> + "at offset %"PRIuMAX" from %s",
> + (uintmax_t)curpos, p->pack_name);
> + /* bail to phase 2, in hopes of recovery */
> + data = NULL;
> + break;
> }
> +
> + /* push object, proceed to base */
> + if (delta_stack_nr >= delta_stack_alloc
> + && delta_stack == small_delta_stack) {
> + delta_stack_alloc = alloc_nr(delta_stack_nr);
> + delta_stack =
> xmalloc(sizeof(*delta_stack)*delta_stack_alloc);
> + memcpy(delta_stack, small_delta_stack,
> + sizeof(*delta_stack)*delta_stack_nr);
> + } else {
> + ALLOC_GROW(delta_stack, delta_stack_nr+1,
> delta_stack_alloc);
> + }
> + i = delta_stack_nr++;
> + delta_stack[i].obj_offset = obj_offset;
> + delta_stack[i].curpos = curpos;
> + delta_stack[i].size = size;
> +
> + curpos = obj_offset = base_offset;
> }
>
> - *type = unpack_object_header(p, &w_curs, &curpos, sizep);
> - switch (*type) {
> + /* PHASE 2: handle the base */
> + switch (type) {
> case OBJ_OFS_DELTA:
> case OBJ_REF_DELTA:
> - data = unpack_delta_entry(p, &w_curs, curpos, *sizep,
> - obj_offset, type, sizep);
> + if (data)
> + die("BUG in unpack_entry: left loop at a valid delta");
> break;
> case OBJ_COMMIT:
> case OBJ_TREE:
> case OBJ_BLOB:
> case OBJ_TAG:
> - data = unpack_compressed_entry(p, &w_curs, curpos, *sizep);
> + if (!base_from_cache)
> + data = unpack_compressed_entry(p, &w_curs, curpos,
> size);
> break;
> default:
> data = NULL;
> error("unknown object type %i at offset %"PRIuMAX" in %s",
> - *type, (uintmax_t)obj_offset, p->pack_name);
> + type, (uintmax_t)obj_offset, p->pack_name);
> }
> +
> + /* PHASE 3: apply deltas in order */
> +
> + /* invariants:
> + * 'data' holds the base data, or NULL if there was corruption
> + */
> + while (delta_stack_nr) {
> + void *delta_data;
> + void *base = data;
> + unsigned long delta_size, base_size = size;
> + int i;
> +
> + data = NULL;
> +
> + if (base)
> + add_delta_base_cache(p, obj_offset, base, base_size,
> type);
> +
> + if (!base) {
> + /*
> + * We're probably in deep shit, but let's try to fetch
> + * the required base anyway from another pack or loose.
> + * This is costly but should happen only in the presence
> + * of a corrupted pack, and is better than failing
> outright.
> + */
> + struct revindex_entry *revidx;
> + const unsigned char *base_sha1;
> + revidx = find_pack_revindex(p, obj_offset);
> + if (revidx) {
> + base_sha1 = nth_packed_object_sha1(p,
> revidx->nr);
> + error("failed to read delta base object %s"
> + " at offset %"PRIuMAX" from %s",
> + sha1_to_hex(base_sha1),
> (uintmax_t)obj_offset,
> + p->pack_name);
> + mark_bad_packed_object(p, base_sha1);
> + base = read_object(base_sha1, &type,
> &base_size);
> + }
> + }
> +
> + i = --delta_stack_nr;
> + obj_offset = delta_stack[i].obj_offset;
> + curpos = delta_stack[i].curpos;
> + delta_size = delta_stack[i].size;
> +
> + if (!base)
> + continue;
> +
> + delta_data = unpack_compressed_entry(p, &w_curs, curpos,
> delta_size);
> +
> + if (!delta_data) {
> + error("failed to unpack compressed delta "
> + "at offset %"PRIuMAX" from %s",
> + (uintmax_t)curpos, p->pack_name);
> + free(base);
> + data = NULL;
> + continue;
> + }
> +
> + data = patch_delta(base, base_size,
> + delta_data, delta_size,
> + &size);
> + if (!data)
> + die("failed to apply delta");
> +
> + free (delta_data);
> + }
> +
> + *final_type = type;
> + *final_size = size;
> +
> unuse_pack(&w_curs);
> return data;
> }
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html