On Sat, Jul 13, 2013 at 12:26 AM, Thomas Gummerer <[email protected]> wrote:
> +struct directory_entry {
> + struct directory_entry *next;
> + struct directory_entry *next_hash;
> + struct cache_entry *ce;
> + struct cache_entry *ce_last;
> + struct conflict_entry *conflict;
> + struct conflict_entry *conflict_last;
> + unsigned int conflict_size;
> + unsigned int de_foffset;
> + unsigned int de_cr;
> + unsigned int de_ncr;
> + unsigned int de_nsubtrees;
> + unsigned int de_nfiles;
> + unsigned int de_nentries;
> + unsigned char sha1[20];
> + unsigned short de_flags;
> + unsigned int de_pathlen;
> + char pathname[FLEX_ARRAY];
> +};
> +
> +struct conflict_part {
> + struct conflict_part *next;
> + unsigned short flags;
> + unsigned short entry_mode;
> + unsigned char sha1[20];
> +};
> +
> +struct conflict_entry {
> + struct conflict_entry *next;
> + unsigned int nfileconflicts;
> + struct conflict_part *entries;
> + unsigned int namelen;
> + unsigned int pathlen;
> + char name[FLEX_ARRAY];
> +};
> +
> +struct ondisk_conflict_part {
> + unsigned short flags;
> + unsigned short entry_mode;
> + unsigned char sha1[20];
> +};
These new structs should probably be in read-cache-v5.c, or read-cache.h
> #define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1)
> +#define directory_entry_size(len) (offsetof(struct directory_entry,pathname)
> + (len) + 1)
> +#define conflict_entry_size(len) (offsetof(struct conflict_entry,name) +
> (len) + 1)
These are used by read-cache-v5.c only so far. I'd say move them to
read-cache.h or read-cache-v5.c together with the new structs.
> +struct ondisk_cache_entry {
> + unsigned short flags;
> + unsigned short mode;
> + struct cache_time mtime;
> + unsigned int size;
> + int stat_crc;
> + unsigned char sha1[20];
> +};
> +
> +struct ondisk_directory_entry {
> + unsigned int foffset;
> + unsigned int cr;
> + unsigned int ncr;
> + unsigned int nsubtrees;
> + unsigned int nfiles;
> + unsigned int nentries;
> + unsigned char sha1[20];
> + unsigned short flags;
> +};
Perhaps use uint32_t, uint16_t and friends for all on-disk structures?
> +static struct directory_entry *read_directories(unsigned int *dir_offset,
> + unsigned int *dir_table_offset,
> + void *mmap,
> + int mmap_size)
> +{
> + int i, ondisk_directory_size;
> + uint32_t *filecrc, *beginning, *end;
> + struct directory_entry *current = NULL;
> + struct ondisk_directory_entry *disk_de;
> + struct directory_entry *de;
> + unsigned int data_len, len;
> + char *name;
> +
> + /*
> + * Length of pathname + nul byte for termination + size of
> + * members of ondisk_directory_entry. (Just using the size
> + * of the struct doesn't work, because there may be padding
> + * bytes for the struct)
> + */
> + ondisk_directory_size = sizeof(disk_de->flags)
> + + sizeof(disk_de->foffset)
> + + sizeof(disk_de->cr)
> + + sizeof(disk_de->ncr)
> + + sizeof(disk_de->nsubtrees)
> + + sizeof(disk_de->nfiles)
> + + sizeof(disk_de->nentries)
> + + sizeof(disk_de->sha1);
> + name = ptr_add(mmap, *dir_offset);
> + beginning = ptr_add(mmap, *dir_table_offset);
> + end = ptr_add(mmap, *dir_table_offset + 4);
> + len = ntoh_l(*end) - ntoh_l(*beginning) - ondisk_directory_size - 5;
> + disk_de = ptr_add(mmap, *dir_offset + len + 1);
> + de = directory_entry_from_ondisk(disk_de, name, len);
> + de->next = NULL;
> +
> + data_len = len + 1 + ondisk_directory_size;
> + filecrc = ptr_add(mmap, *dir_offset + data_len);
> + if (!check_crc32(0, ptr_add(mmap, *dir_offset), data_len,
> ntoh_l(*filecrc)))
> + goto unmap;
> +
> + *dir_table_offset += 4;
> + *dir_offset += data_len + 4; /* crc code */
> +
> + current = de;
> + for (i = 0; i < de->de_nsubtrees; i++) {
> + current->next = read_directories(dir_offset, dir_table_offset,
> + mmap, mmap_size);
> + while (current->next)
> + current = current->next;
> + }
> +
> + return de;
> +unmap:
> + munmap(mmap, mmap_size);
> + die("directory crc doesn't match for '%s'", de->pathname);
> +}
You don't have to munmap when you die() anway. I'm not sure if flatten
the directory hierarchy into a list (linked by next pointer) is a good
idea, or we should maintain the tree structure in memory. Still a lot
of reading to figure that out..
I skipped from here..
> +static void ce_queue_push(struct cache_entry **head,
> + struct cache_entry **tail,
> + struct cache_entry *ce)
> +{
...
> +static int read_conflicts(struct conflict_entry **head,
> + struct directory_entry *de,
> + void **mmap, unsigned long mmap_size)
> +{
till the end of this function. Not interested in conflict stuff yet.
> +static struct directory_entry *read_head_directories(struct index_state
> *istate,
> + unsigned int
> *entry_offset,
> + unsigned int
> *foffsetblock,
> + unsigned int *ndirs,
> + void *mmap, unsigned
> long mmap_size)
> +{
Maybe read_all_directories is a better nam.
> +static int read_index_filtered_v5(struct index_state *istate, void *mmap,
> + unsigned long mmap_size, struct filter_opts
> *opts)
> +{
> + unsigned int entry_offset, ndirs, foffsetblock, nr = 0;
> + struct directory_entry *root_directory, *de;
> + const char **adjusted_pathspec = NULL;
> + int need_root = 0, i, n;
> + char *oldpath, *seen;
> +
> + ...
> +
> + de = root_directory;
> + while (de) {
> + if (need_root ||
> + match_pathspec(adjusted_pathspec, de->pathname,
> de->de_pathlen, 0, NULL)) {
> + unsigned int subdir_foffsetblock = de->de_foffset +
> foffsetblock;
> + unsigned int *off = mmap + subdir_foffsetblock;
> + unsigned int subdir_entry_offset = entry_offset +
> ntoh_l(*off);
> + oldpath = de->pathname;
> + do {
> + if (read_entries(istate, &de,
> &subdir_entry_offset,
> + &mmap, mmap_size, &nr,
> + &subdir_foffsetblock) < 0)
> + return -1;
> + } while (de && !prefixcmp(de->pathname, oldpath));
> + } else
> + de = de->next;
> + }
Hm.. if we maintain a tree structure here (one link to the first
subdir, one link to the next sibling), I think the "do" loop could be
done without prefixcmp. Just check if "de" returned by read_entries is
the next sibling "de" (iow the end of current directory recursively).
> + istate->cache_nr = nr;
> + return 0;
> +}
> +
> +static int read_index_v5(struct index_state *istate, void *mmap,
> + unsigned long mmap_size, struct filter_opts *opts)
> +{
> + unsigned int entry_offset, ndirs, foffsetblock, nr = 0;
> + struct directory_entry *root_directory, *de;
> +
> + if (opts != NULL)
> + return read_index_filtered_v5(istate, mmap, mmap_size, opts);
> +
> + root_directory = read_head_directories(istate, &entry_offset,
> + &foffsetblock, &ndirs,
> + mmap, mmap_size);
> + de = root_directory;
> + while (de)
> + if (read_entries(istate, &de, &entry_offset, &mmap,
> + mmap_size, &nr, &foffsetblock) < 0)
> + return -1;
> + istate->cache_nr = nr;
> + return 0;
> +}
Make it call read_index_filtered_v5 with an empty pathspec instead.
match_pathspec* returns true immediately if pathspec is empty. Without
the removal of prefixcmp() in the "do" loop mentioned above,
read_index_filtered_v5 can't be more expensive than this version.
That was it! Lunch time! Maybe I'll read the rest in the afternoon, or
someday next week.
--
Duy
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html