Re: [RFC/WIP PATCH] object store classification

Junio C Hamano Thu, 06 Jul 2017 19:34:16 -0700

Stefan Beller <[email protected]> writes:

> Subject: Re: [RFC/WIP PATCH] object store classification


I thought you are writing different object-store backends and
classifying them into many categories (e.g. local, networked,
telepathic, etc.)

It is a logical next step to put per-repository things into
the_repository.  I skimmed the patch and the changes smelled sane,
but I didn't read it really carefully with fine toothed comb.

Thanks.  The remainder kept for reference, but no additional
comments in there.

> This continues the efforts of bw/repo-object, making our code base
> more object oriented by adding the object store to the the repository struct.
>
> Long term goal of the series that would evolve from this discussion:
> * Easier to implement submodule features as it can be in the same process.
>
> Short term goal:
> * get rid of 'add_submodule_odb' in submodule.c
>   When fetching or pushing we need to determine if a submodule needs 
> processing
>   by finding out if certain commits are present.  To do this we add the 
> submodule
>   objects as an alternate object database and do the processing in the same
>   process. In case of multiple submodules, this would pollute the object store
>   hence being slower and increasing memory usage, too.
>
> This patch only changes the object store code to be object oriented based
> on the repository struct.
>
> To go for the short term goal we'd need to convert a few more places, mostly
> all the construction (blob.c, tree.c, commit.c)
>
> This is marked RFC as I'd want to gather feedback if the approach, presented
> in the header files is sound.
>
> Thanks!
>
> Signed-off-by: Stefan Beller <[email protected]>
> ---
>
>   Peff, this area of the code seems performance sensitive for a variety of
>   use cases, specifically yours. So I cc'd you here. :)
>
>  object.c     | 77 
> +++++++++++++++++++++++++++++++++++-------------------------
>  object.h     | 51 ++++++++++++++++++++++++++++++++--------
>  repository.h |  6 +++++
>  3 files changed, 92 insertions(+), 42 deletions(-)
>
> diff --git a/object.c b/object.c
> index 06ba3a11d8..b5ec0bb2f9 100644
> --- a/object.c
> +++ b/object.c
> @@ -5,17 +5,15 @@
>  #include "commit.h"
>  #include "tag.h"
>  
> -static struct object **obj_hash;
> -static int nr_objs, obj_hash_size;
> -
> -unsigned int get_max_object_index(void)
> +unsigned int object_store_get_max_index(struct repository *r)
>  {
> -     return obj_hash_size;
> +     return r->objects.obj_hash_size;
>  }
>  
> -struct object *get_indexed_object(unsigned int idx)
> +struct object *object_store_get_indexed(struct repository *r,
> +                                     unsigned int idx)
>  {
> -     return obj_hash[idx];
> +     return r->objects.obj_hash[idx];
>  }
>  
>  static const char *object_type_strings[] = {
> @@ -82,11 +80,15 @@ static void insert_obj_hash(struct object *obj, struct 
> object **hash, unsigned i
>   * Look up the record for the given sha1 in the hash map stored in
>   * obj_hash.  Return NULL if it was not found.
>   */
> -struct object *lookup_object(const unsigned char *sha1)
> +struct object *object_store_lookup(struct repository *r,
> +                                const unsigned char *sha1)
>  {
>       unsigned int i, first;
>       struct object *obj;
>  
> +     struct object **obj_hash = r->objects.obj_hash;
> +     int obj_hash_size = r->objects.obj_hash_size;
> +
>       if (!obj_hash)
>               return NULL;
>  
> @@ -114,29 +116,31 @@ struct object *lookup_object(const unsigned char *sha1)
>   * power of 2 (but at least 32).  Copy the existing values to the new
>   * hash map.
>   */
> -static void grow_object_hash(void)
> +static void grow_object_hash(struct repository *r)
>  {
>       int i;
>       /*
>        * Note that this size must always be power-of-2 to match hash_obj
>        * above.
>        */
> -     int new_hash_size = obj_hash_size < 32 ? 32 : 2 * obj_hash_size;
> +     int new_hash_size = r->objects.obj_hash_size < 32 ?
> +                     32 : 2 * r->objects.obj_hash_size;
>       struct object **new_hash;
>  
>       new_hash = xcalloc(new_hash_size, sizeof(struct object *));
> -     for (i = 0; i < obj_hash_size; i++) {
> -             struct object *obj = obj_hash[i];
> +     for (i = 0; i < r->objects.obj_hash_size; i++) {
> +             struct object *obj = r->objects.obj_hash[i];
>               if (!obj)
>                       continue;
>               insert_obj_hash(obj, new_hash, new_hash_size);
>       }
> -     free(obj_hash);
> -     obj_hash = new_hash;
> -     obj_hash_size = new_hash_size;
> +     free(r->objects.obj_hash);
> +     r->objects.obj_hash = new_hash;
> +     r->objects.obj_hash_size = new_hash_size;
>  }
>  
> -void *create_object(const unsigned char *sha1, void *o)
> +void *object_store_create(struct repository *r,
> +                       const unsigned char *sha1, void *o)
>  {
>       struct object *obj = o;
>  
> @@ -145,15 +149,17 @@ void *create_object(const unsigned char *sha1, void *o)
>       obj->flags = 0;
>       hashcpy(obj->oid.hash, sha1);
>  
> -     if (obj_hash_size - 1 <= nr_objs * 2)
> -             grow_object_hash();
> +     if (r->objects.obj_hash_size - 1 <= r->objects.nr_objs * 2)
> +             grow_object_hash(r);
>  
> -     insert_obj_hash(obj, obj_hash, obj_hash_size);
> -     nr_objs++;
> +     insert_obj_hash(obj, r->objects.obj_hash, r->objects.obj_hash_size);
> +     r->objects.nr_objs++;
>       return obj;
>  }
>  
> -void *object_as_type(struct object *obj, enum object_type type, int quiet)
> +void *object_as_type(struct object *obj,
> +                  enum object_type type,
> +                  int quiet)
>  {
>       if (obj->type == type)
>               return obj;
> @@ -172,15 +178,20 @@ void *object_as_type(struct object *obj, enum 
> object_type type, int quiet)
>       }
>  }
>  
> -struct object *lookup_unknown_object(const unsigned char *sha1)
> +struct object *object_store_lookup_unknown_object(struct repository *r,
> +                                               const unsigned char *sha1)
>  {
> -     struct object *obj = lookup_object(sha1);
> +     struct object *obj = object_store_lookup(r, sha1);
>       if (!obj)
> -             obj = create_object(sha1, alloc_object_node());
> +             obj = object_store_create(r, sha1, alloc_object_node());
>       return obj;
>  }
>  
> -struct object *parse_object_buffer(const struct object_id *oid, enum 
> object_type type, unsigned long size, void *buffer, int *eaten_p)
> +struct object *object_store_parse_object_buffer(struct repository *r,
> +                                             const struct object_id *oid,
> +                                             enum object_type type,
> +                                             unsigned long size,
> +                                             void *buffer, int *eaten_p)
>  {
>       struct object *obj;
>       *eaten_p = 0;
> @@ -230,17 +241,19 @@ struct object *parse_object_buffer(const struct 
> object_id *oid, enum object_type
>       return obj;
>  }
>  
> -struct object *parse_object_or_die(const struct object_id *oid,
> -                                const char *name)
> +struct object *object_store_parse_object_or_die(struct repository *r,
> +                                             const struct object_id *oid,
> +                                             const char *name)
>  {
> -     struct object *o = parse_object(oid);
> +     struct object *o = object_store_parse_object(r, oid);
>       if (o)
>               return o;
>  
>       die(_("unable to parse object: %s"), name ? name : oid_to_hex(oid));
>  }
>  
> -struct object *parse_object(const struct object_id *oid)
> +struct object *object_store_parse_object(struct repository *r,
> +                                      const struct object_id *oid)
>  {
>       unsigned long size;
>       enum object_type type;
> @@ -413,12 +426,12 @@ void object_array_remove_duplicates(struct object_array 
> *array)
>       }
>  }
>  
> -void clear_object_flags(unsigned flags)
> +void object_store_clear_object_flags(struct repository *r, unsigned flags)
>  {
>       int i;
>  
> -     for (i=0; i < obj_hash_size; i++) {
> -             struct object *obj = obj_hash[i];
> +     for (i = 0; i < r->objects.obj_hash_size; i++) {
> +             struct object *obj = r->objects.obj_hash[i];
>               if (obj)
>                       obj->flags &= ~flags;
>       }
> diff --git a/object.h b/object.h
> index 33e5cc9943..1b2ab8ee88 100644
> --- a/object.h
> +++ b/object.h
> @@ -1,6 +1,8 @@
>  #ifndef OBJECT_H
>  #define OBJECT_H
>  
> +#include "repository.h"
> +
>  struct object_list {
>       struct object *item;
>       struct object_list *next;
> @@ -59,12 +61,15 @@ extern int type_from_string_gently(const char *str, 
> ssize_t, int gentle);
>  /*
>   * Return the current number of buckets in the object hashmap.
>   */
> -extern unsigned int get_max_object_index(void);
> +extern unsigned int object_store_get_max_index(struct repository *r);
> +#define get_max_object_index() object_store_get_max_index(the_repository)
>  
>  /*
>   * Return the object from the specified bucket in the object hashmap.
>   */
> -extern struct object *get_indexed_object(unsigned int);
> +extern struct object *object_store_get_indexed(struct repository *r,
> +                                            unsigned int);
> +#define get_indexed_object(i) object_store_get_indexed(the_repository, i)
>  
>  /*
>   * This can be used to see if we have heard of the object before, but
> @@ -78,34 +83,59 @@ extern struct object *get_indexed_object(unsigned int);
>   * half-initialised objects, the caller is expected to initialize them
>   * by calling parse_object() on them.
>   */
> -struct object *lookup_object(const unsigned char *sha1);
> +extern struct object *object_store_lookup(struct repository *r,
> +                                       const unsigned char *sha1);
> +#define lookup_object(hash) object_store_lookup(the_repository, hash)
>  
> -extern void *create_object(const unsigned char *sha1, void *obj);
> +extern void *object_store_create(struct repository *r,
> +                              const unsigned char *sha1,
> +                              void *obj);
> +#define create_object(sha1, obj) object_store_create(the_repository, sha1, 
> obj)
>  
> -void *object_as_type(struct object *obj, enum object_type type, int quiet);
> +extern void *object_as_type(struct object *obj,
> +                         enum object_type type,
> +                         int quiet);
>  
>  /*
>   * Returns the object, having parsed it to find out what it is.
>   *
>   * Returns NULL if the object is missing or corrupt.
>   */
> -struct object *parse_object(const struct object_id *oid);
> +extern struct object *object_store_parse_object(struct repository *r,
> +                                             const struct object_id *oid);
> +#define parse_object(sha1) \
> +     object_store_parse_object(the_repository, sha1)
>  
>  /*
>   * Like parse_object, but will die() instead of returning NULL. If the
>   * "name" parameter is not NULL, it is included in the error message
>   * (otherwise, the hex object ID is given).
>   */
> -struct object *parse_object_or_die(const struct object_id *oid, const char 
> *name);
> +extern struct object *object_store_parse_object_or_die(struct repository *r,
> +                                                    const struct object_id 
> *oid,
> +                                                    const char *name);
> +#define parse_object_or_die(oid, name) \
> +     object_store_parse_object_or_die(the_repository, oid, name)
>  
>  /* Given the result of read_sha1_file(), returns the object after
>   * parsing it.  eaten_p indicates if the object has a borrowed copy
>   * of buffer and the caller should not free() it.
>   */
> -struct object *parse_object_buffer(const struct object_id *oid, enum 
> object_type type, unsigned long size, void *buffer, int *eaten_p);
> +struct object *object_store_parse_object_buffer(struct repository *r,
> +                                             const struct object_id *oid,
> +                                             enum object_type type,
> +                                             unsigned long size,
> +                                             void *buffer,
> +                                             int *eaten_p);
> +#define parse_object_buffer(oid, type, size, buffer, eaten_p) \
> +     object_store_parse_object_buffer(the_repository, oid, type, \
> +                                      size, buffer, eaten_p)
>  
>  /** Returns the object, with potentially excess memory allocated. **/
> -struct object *lookup_unknown_object(const unsigned  char *sha1);
> +struct object *object_store_lookup_unknown_object(struct repository *r,
> +                                               const unsigned char *sha1);
> +#define lookup_unknown_object(sha1) \
> +     object_store_lookup_unknown_object(the_repository, sha1)
>  
>  struct object_list *object_list_insert(struct object *item,
>                                      struct object_list **list_p);
> @@ -138,6 +168,7 @@ void object_array_remove_duplicates(struct object_array 
> *array);
>   */
>  void object_array_clear(struct object_array *array);
>  
> -void clear_object_flags(unsigned flags);
> +extern void object_store_clear_object_flags(struct repository *r, unsigned 
> flags);
> +#define clear_object_flags(flags) 
> object_store_clear_object_flags(the_repository, flags);
>  
>  #endif /* OBJECT_H */
> diff --git a/repository.h b/repository.h
> index 417787f3ef..aeb50a19bc 100644
> --- a/repository.h
> +++ b/repository.h
> @@ -1,6 +1,8 @@
>  #ifndef REPOSITORY_H
>  #define REPOSITORY_H
>  
> +#include "object.h"
> +
>  struct config_set;
>  struct index_state;
>  struct submodule_cache;
> @@ -24,6 +26,10 @@ struct repository {
>        * Cannot be NULL after initialization.
>        */
>       char *objectdir;
> +     struct object_store {
> +             struct object **obj_hash;
> +             int nr_objs, obj_hash_size;
> +     } objects;
>  
>       /*
>        * Path to the repository's graft file.

Re: [RFC/WIP PATCH] object store classification

Reply via email to