In 2004, an allocation optimization has been added to the file list handling code, that preallocates 32k of file_struct pointers in a file_list. This optimization predates the incremental recursion feature, for which it is not appropriate anymore. When copying a tree containing a large number of small directories, using the incremental recursion, rsync allocates many short file_lists. Suddenly, the unused file_struct pointers can easily take 90-95% of the memory allocated by rsync.
This can be easily reproduced by using valgrind --tool=massif ./rsync -anx /usr /tmp/ and checking the memory profile of the first (sender) process. This patch changes the flist functions to start only with 32 entries for the partial file lists, instead of 32 * 1024. It also modifies the condition for the debug notification that the allocated memory moved, so it does not depend on the initial allocation size. Instead, it now simply checks if the file list has been already allocated. --- flist.c | 9 +++++++-- rsync.h | 5 +++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/flist.c b/flist.c index 3442d868..530d336e 100644 --- a/flist.c +++ b/flist.c @@ -305,7 +305,7 @@ static void flist_expand(struct file_list *flist, int extra) new_ptr = realloc_array(flist->files, struct file_struct *, flist->malloced); - if (DEBUG_GTE(FLIST, 1) && flist->malloced != FLIST_START) { + if (DEBUG_GTE(FLIST, 1) && flist->files) { rprintf(FCLIENT, "[%s] expand file_list pointer array to %s bytes, did%s move\n", who_am_i(), big_num(sizeof flist->files[0] * flist->malloced), @@ -2186,8 +2186,10 @@ struct file_list *send_file_list(int f, int argc, char *argv[]) #endif flist = cur_flist = flist_new(0, "send_file_list"); + flist_expand(flist, FLIST_START_LARGE); if (inc_recurse) { dir_flist = flist_new(FLIST_TEMP, "send_file_list"); + flist_expand(dir_flist, FLIST_START_LARGE); flags |= FLAG_DIVERT_DIRS; } else dir_flist = cur_flist; @@ -2541,10 +2543,13 @@ struct file_list *recv_file_list(int f, int dir_ndx) #endif flist = flist_new(0, "recv_file_list"); + flist_expand(flist, FLIST_START_LARGE); if (inc_recurse) { - if (flist->ndx_start == 1) + if (flist->ndx_start == 1) { dir_flist = flist_new(FLIST_TEMP, "recv_file_list"); + flist_expand(dir_flist, FLIST_START_LARGE); + } dstart = dir_flist->used; } else { dir_flist = flist; diff --git a/rsync.h b/rsync.h index 88319732..f8fcbffb 100644 --- a/rsync.h +++ b/rsync.h @@ -918,8 +918,9 @@ extern int xattrs_ndx; * Start the flist array at FLIST_START entries and grow it * by doubling until FLIST_LINEAR then grow by FLIST_LINEAR */ -#define FLIST_START (32 * 1024) -#define FLIST_LINEAR (FLIST_START * 512) +#define FLIST_START (32) +#define FLIST_START_LARGE (32 * 1024) +#define FLIST_LINEAR (FLIST_START_LARGE * 512) /* * Extent size for allocation pools: A minimum size of 128KB -- 2.33.0 -- Please use reply-all for most replies to avoid omitting the mailing list. To unsubscribe or change options: https://lists.samba.org/mailman/listinfo/rsync Before posting, read: http://www.catb.org/~esr/faqs/smart-questions.html