Duy Nguyen <[email protected]> writes:
> On Sat, Jul 13, 2013 at 12:26 AM, Thomas Gummerer <[email protected]>
> wrote:
>> +struct directory_entry {
>> + struct directory_entry *next;
>> + struct directory_entry *next_hash;
>> + struct cache_entry *ce;
>> + struct cache_entry *ce_last;
>> + struct conflict_entry *conflict;
>> + struct conflict_entry *conflict_last;
>> + unsigned int conflict_size;
>> + unsigned int de_foffset;
>> + unsigned int de_cr;
>> + unsigned int de_ncr;
>> + unsigned int de_nsubtrees;
>> + unsigned int de_nfiles;
>> + unsigned int de_nentries;
>> + unsigned char sha1[20];
>> + unsigned short de_flags;
>> + unsigned int de_pathlen;
>> + char pathname[FLEX_ARRAY];
>> +};
>> +
>> +struct conflict_part {
>> + struct conflict_part *next;
>> + unsigned short flags;
>> + unsigned short entry_mode;
>> + unsigned char sha1[20];
>> +};
>> +
>> +struct conflict_entry {
>> + struct conflict_entry *next;
>> + unsigned int nfileconflicts;
>> + struct conflict_part *entries;
>> + unsigned int namelen;
>> + unsigned int pathlen;
>> + char name[FLEX_ARRAY];
>> +};
>> +
>> +struct ondisk_conflict_part {
>> + unsigned short flags;
>> + unsigned short entry_mode;
>> + unsigned char sha1[20];
>> +};
>
> These new structs should probably be in read-cache-v5.c, or read-cache.h
Makes sense, thanks.
>> #define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) +
>> 1)
>> +#define directory_entry_size(len) (offsetof(struct
>> directory_entry,pathname) + (len) + 1)
>> +#define conflict_entry_size(len) (offsetof(struct conflict_entry,name) +
>> (len) + 1)
>
> These are used by read-cache-v5.c only so far. I'd say move them to
> read-cache.h or read-cache-v5.c together with the new structs.
Thanks.
>> +struct ondisk_cache_entry {
>> + unsigned short flags;
>> + unsigned short mode;
>> + struct cache_time mtime;
>> + unsigned int size;
>> + int stat_crc;
>> + unsigned char sha1[20];
>> +};
>> +
>> +struct ondisk_directory_entry {
>> + unsigned int foffset;
>> + unsigned int cr;
>> + unsigned int ncr;
>> + unsigned int nsubtrees;
>> + unsigned int nfiles;
>> + unsigned int nentries;
>> + unsigned char sha1[20];
>> + unsigned short flags;
>> +};
>
> Perhaps use uint32_t, uint16_t and friends for all on-disk structures?
We got this in the makefile, so I think we should be fine without it.
It still makes sense for clarity though I think.
ifdef NO_UINTMAX_T
BASIC_CFLAGS += -Duintmax_t=uint32_t
endif
While at it I'll make the code for v[234] use them too.
>> +static struct directory_entry *read_directories(unsigned int *dir_offset,
>> + unsigned int *dir_table_offset,
>> + void *mmap,
>> + int mmap_size)
>> +{
>> + int i, ondisk_directory_size;
>> + uint32_t *filecrc, *beginning, *end;
>> + struct directory_entry *current = NULL;
>> + struct ondisk_directory_entry *disk_de;
>> + struct directory_entry *de;
>> + unsigned int data_len, len;
>> + char *name;
>> +
>> + /*
>> + * Length of pathname + nul byte for termination + size of
>> + * members of ondisk_directory_entry. (Just using the size
>> + * of the struct doesn't work, because there may be padding
>> + * bytes for the struct)
>> + */
>> + ondisk_directory_size = sizeof(disk_de->flags)
>> + + sizeof(disk_de->foffset)
>> + + sizeof(disk_de->cr)
>> + + sizeof(disk_de->ncr)
>> + + sizeof(disk_de->nsubtrees)
>> + + sizeof(disk_de->nfiles)
>> + + sizeof(disk_de->nentries)
>> + + sizeof(disk_de->sha1);
>> + name = ptr_add(mmap, *dir_offset);
>> + beginning = ptr_add(mmap, *dir_table_offset);
>> + end = ptr_add(mmap, *dir_table_offset + 4);
>> + len = ntoh_l(*end) - ntoh_l(*beginning) - ondisk_directory_size - 5;
>> + disk_de = ptr_add(mmap, *dir_offset + len + 1);
>> + de = directory_entry_from_ondisk(disk_de, name, len);
>> + de->next = NULL;
>> +
>> + data_len = len + 1 + ondisk_directory_size;
>> + filecrc = ptr_add(mmap, *dir_offset + data_len);
>> + if (!check_crc32(0, ptr_add(mmap, *dir_offset), data_len,
>> ntoh_l(*filecrc)))
>> + goto unmap;
>> +
>> + *dir_table_offset += 4;
>> + *dir_offset += data_len + 4; /* crc code */
>> +
>> + current = de;
>> + for (i = 0; i < de->de_nsubtrees; i++) {
>> + current->next = read_directories(dir_offset,
>> dir_table_offset,
>> + mmap, mmap_size);
>> + while (current->next)
>> + current = current->next;
>> + }
>> +
>> + return de;
>> +unmap:
>> + munmap(mmap, mmap_size);
>> + die("directory crc doesn't match for '%s'", de->pathname);
>> +}
>
> You don't have to munmap when you die() anway.
Will change that in the re-roll.
> I'm not sure if flatten
> the directory hierarchy into a list (linked by next pointer) is a good
> idea, or we should maintain the tree structure in memory. Still a lot
> of reading to figure that out..
>
> I skipped from here..
>
>> +static void ce_queue_push(struct cache_entry **head,
>> + struct cache_entry **tail,
>> + struct cache_entry *ce)
>> +{
>
> ...
>
>> +static int read_conflicts(struct conflict_entry **head,
>> + struct directory_entry *de,
>> + void **mmap, unsigned long mmap_size)
>> +{
>
> till the end of this function. Not interested in conflict stuff yet.
>
>
>> +static struct directory_entry *read_head_directories(struct index_state
>> *istate,
>> + unsigned int
>> *entry_offset,
>> + unsigned int
>> *foffsetblock,
>> + unsigned int *ndirs,
>> + void *mmap, unsigned
>> long mmap_size)
>> +{
>
> Maybe read_all_directories is a better nam.
Makes sense, thanks.
>> +static int read_index_filtered_v5(struct index_state *istate, void *mmap,
>> + unsigned long mmap_size, struct
>> filter_opts *opts)
>> +{
>> + unsigned int entry_offset, ndirs, foffsetblock, nr = 0;
>> + struct directory_entry *root_directory, *de;
>> + const char **adjusted_pathspec = NULL;
>> + int need_root = 0, i, n;
>> + char *oldpath, *seen;
>> +
>> + ...
>> +
>> + de = root_directory;
>> + while (de) {
>> + if (need_root ||
>> + match_pathspec(adjusted_pathspec, de->pathname,
>> de->de_pathlen, 0, NULL)) {
>> + unsigned int subdir_foffsetblock = de->de_foffset +
>> foffsetblock;
>> + unsigned int *off = mmap + subdir_foffsetblock;
>> + unsigned int subdir_entry_offset = entry_offset +
>> ntoh_l(*off);
>> + oldpath = de->pathname;
>> + do {
>> + if (read_entries(istate, &de,
>> &subdir_entry_offset,
>> + &mmap, mmap_size, &nr,
>> + &subdir_foffsetblock) < 0)
>> + return -1;
>> + } while (de && !prefixcmp(de->pathname, oldpath));
>> + } else
>> + de = de->next;
>> + }
>
> Hm.. if we maintain a tree structure here (one link to the first
> subdir, one link to the next sibling), I think the "do" loop could be
> done without prefixcmp. Just check if "de" returned by read_entries is
> the next sibling "de" (iow the end of current directory recursively).
Yes, the tree-structure makes sense. I've implemented it a bit
differently though, instead of using two pointers, I'm using one pointer
to an array of directory entries, which can be iterated over.
>> + istate->cache_nr = nr;
>> + return 0;
>> +}
>> +
>> +static int read_index_v5(struct index_state *istate, void *mmap,
>> + unsigned long mmap_size, struct filter_opts *opts)
>> +{
>> + unsigned int entry_offset, ndirs, foffsetblock, nr = 0;
>> + struct directory_entry *root_directory, *de;
>> +
>> + if (opts != NULL)
>> + return read_index_filtered_v5(istate, mmap, mmap_size, opts);
>> +
>> + root_directory = read_head_directories(istate, &entry_offset,
>> + &foffsetblock, &ndirs,
>> + mmap, mmap_size);
>> + de = root_directory;
>> + while (de)
>> + if (read_entries(istate, &de, &entry_offset, &mmap,
>> + mmap_size, &nr, &foffsetblock) < 0)
>> + return -1;
>> + istate->cache_nr = nr;
>> + return 0;
>> +}
>
> Make it call read_index_filtered_v5 with an empty pathspec instead.
> match_pathspec* returns true immediately if pathspec is empty. Without
> the removal of prefixcmp() in the "do" loop mentioned above,
> read_index_filtered_v5 can't be more expensive than this version.
Yes right, will change in the re-roll.
> That was it! Lunch time! Maybe I'll read the rest in the afternoon, or
> someday next week.
Thanks a lot for taking the time to review my code.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html