On Sun, Jan 04, 2004 at 06:35:03AM -0600, John Van Essen wrote: > Lester, > > You articulated your situation clear enough for me. Thanks. > > I'll address your issue about when rsync is running locally for /vol/N > to /vol/N_mirror syncing, it exhausts all of the RAM and swap. > > If you haven't read jw schultz's "How Rsync Works" page, here the link: > > http://www.pegasys.ws/how-rsync-works.html > > The sender, receiver, and generator each have a full copy of the file > list (each file's entry uses 100 bytes on average). > > Additonally, the --hard-links option creates yet *another* full copy of > the file list in the receiver, so that's even more memory consumed. > > So you are in a world o' hurt rsyncing an entire /vol/N internally > with --hard-links, since there will be FOUR copies of the file list. > > I'd suggest breaking the /vol/N rsync up into separate rsyncs for each > of the maxdepth 1 hierarchies. If I understand your situation correctly, > all hard link groups are self contained within each of those hierarchies > so you will be OK. > > I've modified hlink.c to use a list of file struct pointers instead of > copies of the actual file structs themselves, so that will save memory. > I'll submit that patch for review in a day or two after I've tested it.
I've just done the same. It reduces the memory requirements of the hlink list to 1/18th. It is also somewhat faster to build that way because we don't have to walk the list. If we built the hlink_list one element at a time the way we do the file_list only putting those files that we might link in it it would be smaller but building it would be slower. I've only done a little testing but it seems to be working and warnings about theory v. practice aside it should be good. =================================================================== RCS file: /data/cvs/rsync/hlink.c,v retrieving revision 1.23 diff -p -u -r1.23 hlink.c --- hlink.c 2 Jan 2004 07:34:49 -0000 1.23 +++ hlink.c 4 Jan 2004 13:21:14 -0000 @@ -24,45 +24,43 @@ extern int dry_run; extern int verbose; #if SUPPORT_HARD_LINKS -static int hlink_compare(struct file_struct *f1, struct file_struct *f2) +static int hlink_compare(struct file_struct **f1, struct file_struct **f2) { - if (!S_ISREG(f1->mode) && !S_ISREG(f2->mode)) + if (!S_ISREG((*f1)->mode) && !S_ISREG((*f2)->mode)) return 0; - if (!S_ISREG(f1->mode)) + if (!S_ISREG((*f1)->mode)) return -1; - if (!S_ISREG(f2->mode)) + if (!S_ISREG((*f2)->mode)) return 1; - if (f1->dev != f2->dev) - return (int) (f1->dev > f2->dev ? 1 : -1); + if ((*f1)->dev != (*f2)->dev) + return (int) ((*f1)->dev > (*f2)->dev ? 1 : -1); - if (f1->inode != f2->inode) - return (int) (f1->inode > f2->inode ? 1 : -1); + if ((*f1)->inode != (*f2)->inode) + return (int) ((*f1)->inode > (*f2)->inode ? 1 : -1); - return file_compare(&f1, &f2); + + return file_compare(f1, f2); } -static struct file_struct *hlink_list; +static struct file_struct **hlink_list; static int hlink_count; #endif void init_hard_links(struct file_list *flist) { #if SUPPORT_HARD_LINKS - int i; if (flist->count < 2) return; if (hlink_list) free(hlink_list); - if (!(hlink_list = new_array(struct file_struct, flist->count))) + if (!(hlink_list = new_array(struct file_struct *, flist->count))) out_of_memory("init_hard_links"); - - for (i = 0; i < flist->count; i++) - memcpy(&hlink_list[i], flist->files[i], - sizeof(hlink_list[0])); + + memcpy(hlink_list, flist->files, sizeof(hlink_list[0]) * flist->count); qsort(hlink_list, flist->count, sizeof(hlink_list[0]), (int (*)()) hlink_compare); @@ -84,7 +82,7 @@ int check_hard_link(struct file_struct * while (low != high) { int mid = (low + high) / 2; - ret = hlink_compare(&hlink_list[mid], file); + ret = hlink_compare(&hlink_list[mid], &file); if (ret == 0) { low = mid; break; @@ -97,16 +95,16 @@ int check_hard_link(struct file_struct * /* XXX: To me this looks kind of dodgy -- why do we use [low] * here and [low-1] below? -- mbp */ - if (hlink_compare(&hlink_list[low], file) != 0) + if (hlink_compare(&hlink_list[low], &file) != 0) return 0; if (low > 0 && - S_ISREG(hlink_list[low - 1].mode) && - file->dev == hlink_list[low - 1].dev && - file->inode == hlink_list[low - 1].inode) { + S_ISREG(hlink_list[low - 1]->mode) && + file->dev == hlink_list[low - 1]->dev && + file->inode == hlink_list[low - 1]->inode) { if (verbose >= 2) { rprintf(FINFO, "check_hard_link: \"%s\" is a hard link to file %d, \"%s\"\n", - f_name(file), low-1, f_name(&hlink_list[low-1])); + f_name(file), low-1, f_name(hlink_list[low-1])); } return 1; } @@ -120,12 +118,12 @@ int check_hard_link(struct file_struct * static void hard_link_one(int i) { STRUCT_STAT st1, st2; - char *hlink2, *hlink1 = f_name(&hlink_list[i - 1]); + char *hlink2, *hlink1 = f_name(hlink_list[i - 1]); if (link_stat(hlink1, &st1) != 0) return; - hlink2 = f_name(&hlink_list[i]); + hlink2 = f_name(hlink_list[i]); if (link_stat(hlink2, &st2) != 0) { if (do_link(hlink1, hlink2)) { if (verbose > 0) { @@ -166,11 +164,11 @@ void do_hard_links(void) return; for (i = 1; i < hlink_count; i++) { - if (S_ISREG(hlink_list[i].mode) && - S_ISREG(hlink_list[i - 1].mode) && - hlink_list[i].basename && hlink_list[i - 1].basename && - hlink_list[i].dev == hlink_list[i - 1].dev && - hlink_list[i].inode == hlink_list[i - 1].inode) { + if (S_ISREG(hlink_list[i]->mode) && + S_ISREG(hlink_list[i - 1]->mode) && + hlink_list[i]->basename && hlink_list[i - 1]->basename && + hlink_list[i]->dev == hlink_list[i - 1]->dev && + hlink_list[i]->inode == hlink_list[i - 1]->inode) { hard_link_one(i); } } -- To unsubscribe or change options: http://lists.samba.org/mailman/listinfo/rsync Before posting, read: http://www.catb.org/~esr/faqs/smart-questions.html