Am 11.05.2018 um 19:42 schrieb Jeff King: > On Fri, May 11, 2018 at 03:34:19PM +0200, Duy Nguyen wrote: > >> On Fri, May 11, 2018 at 03:11:46PM +0200, Duy Nguyen wrote: >>> Back to fast-export, can we just allocate a new int on heap and point >>> it there? Allocating small pieces becomes quite cheap and fast with >>> mem-pool.h and we can avoid this storing integer in pointer business. >> >> Something like this seems to work, but we use 4-ish more bytes per >> object, or 100MB overhead on a repo with 25M objects. I think it's a >> reasonable trade off. > > I'm not sure I agree. 4 bytes per object certainly isn't the end of the > world, but what was the problem we were solving in the first place? Just > that we weren't comfortable with the round-trip from uintptr_t to void > and back? Is this actually a problem on real platforms? If not, it seems > silly to incur a run-time cost.
Storing integer values in pointers is a trick that seems to have worked so far for fast-export. A portable way to avoid that trick without requiring more memory would be to use a union. Or we could roll our own custom hash map, as I mused in an earlier post. That would duplicate quite a bit of code; are there reusable pieces hidden within that could be extracted into common functions? --- builtin/fast-export.c | 105 ++++++++++++++++++++++++++++++++---------- 1 file changed, 81 insertions(+), 24 deletions(-) diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 530df12f05..627b0032f3 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -14,7 +14,6 @@ #include "diffcore.h" #include "log-tree.h" #include "revision.h" -#include "decorate.h" #include "string-list.h" #include "utf8.h" #include "parse-options.h" @@ -71,9 +70,65 @@ static int parse_opt_tag_of_filtered_mode(const struct option *opt, return 0; } -static struct decoration idnums; +struct object_mark_entry { + const struct object *base; + uint32_t mark; +}; + +struct object_marks { + unsigned int size; + unsigned int nr; + struct object_mark_entry *entries; +}; + +static struct object_marks idnums; static uint32_t last_idnum; +static unsigned int hash_obj(const struct object *obj, unsigned int n) +{ + return sha1hash(obj->oid.hash) % n; +} + +static void set_object_mark(struct object_marks *n, const struct object *base, + uint32_t mark) +{ + unsigned int size = n->size; + struct object_mark_entry *entries = n->entries; + unsigned int j = hash_obj(base, size); + + while (entries[j].base) { + if (entries[j].base == base) { + entries[j].mark = mark; + return; + } + if (++j >= size) + j = 0; + } + entries[j].base = base; + entries[j].mark = mark; + n->nr++; +} + +static void grow_object_marks(struct object_marks *n) +{ + unsigned int i; + unsigned int old_size = n->size; + struct object_mark_entry *old_entries = n->entries; + + n->size = (old_size + 1000) * 3 / 2; + n->entries = xcalloc(n->size, sizeof(n->entries[0])); + n->nr = 0; + + for (i = 0; i < old_size; i++) { + const struct object *base = old_entries[i].base; + uint32_t mark = old_entries[i].mark; + + if (mark) + set_object_mark(n, base, mark); + } + free(old_entries); +} + static int has_unshown_parent(struct commit *commit) { struct commit_list *parent; @@ -156,20 +211,13 @@ static void anonymize_path(struct strbuf *out, const char *path, } } -/* Since intptr_t is C99, we do not use it here */ -static inline uint32_t *mark_to_ptr(uint32_t mark) -{ - return ((uint32_t *)NULL) + mark; -} - -static inline uint32_t ptr_to_mark(void * mark) -{ - return (uint32_t *)mark - (uint32_t *)NULL; -} - static inline void mark_object(struct object *object, uint32_t mark) { - add_decoration(&idnums, object, mark_to_ptr(mark)); + unsigned int nr = idnums.nr + 1; + + if (nr > idnums.size * 2 / 3) + grow_object_marks(&idnums); + return set_object_mark(&idnums, object, mark); } static inline void mark_next_object(struct object *object) @@ -179,10 +227,21 @@ static inline void mark_next_object(struct object *object) static int get_object_mark(struct object *object) { - void *decoration = lookup_decoration(&idnums, object); - if (!decoration) + unsigned int j; + + /* nothing to lookup */ + if (!idnums.size) return 0; - return ptr_to_mark(decoration); + j = hash_obj(object, idnums.size); + for (;;) { + struct object_mark_entry *ref = idnums.entries + j; + if (ref->base == object) + return ref->mark; + if (!ref->base) + return 0; + if (++j == idnums.size) + j = 0; + } } static void show_progress(void) @@ -897,8 +956,7 @@ static void handle_tags_and_duplicates(void) static void export_marks(char *file) { unsigned int i; - uint32_t mark; - struct decoration_entry *deco = idnums.entries; + struct object_mark_entry *entry = idnums.entries; FILE *f; int e = 0; @@ -907,15 +965,14 @@ static void export_marks(char *file) die_errno("Unable to open marks file %s for writing.", file); for (i = 0; i < idnums.size; i++) { - if (deco->base && deco->base->type == 1) { - mark = ptr_to_mark(deco->decoration); - if (fprintf(f, ":%"PRIu32" %s\n", mark, - oid_to_hex(&deco->base->oid)) < 0) { + if (entry->base && entry->base->type == 1) { + if (fprintf(f, ":%"PRIu32" %s\n", entry->mark, + oid_to_hex(&entry->base->oid)) < 0) { e = 1; break; } } - deco++; + entry++; } e |= ferror(f); -- 2.17.0