On Sun, Jan 04, 2004 at 06:35:03AM -0600, John Van Essen wrote:
> Lester,
> 
> You articulated your situation clear enough for me.  Thanks.
> 
> I'll address your issue about when rsync is running locally for /vol/N
> to /vol/N_mirror syncing, it exhausts all of the RAM and swap.
> 
> If you haven't read jw schultz's "How Rsync Works" page, here the link:
> 
>   http://www.pegasys.ws/how-rsync-works.html
> 
> The sender, receiver, and generator each have a full copy of the file
> list (each file's entry uses 100 bytes on average).
> 
> Additonally, the --hard-links option creates yet *another* full copy of
> the file list in the receiver, so that's even more memory consumed.
> 
> So you are in a world o' hurt rsyncing an entire /vol/N internally
> with --hard-links, since there will be FOUR copies of the file list.
> 
> I'd suggest breaking the /vol/N rsync up into separate rsyncs for each
> of the maxdepth 1 hierarchies.  If I understand your situation correctly,
> all hard link groups are self contained within each of those hierarchies
> so you will be OK.
> 
> I've modified hlink.c to use a list of file struct pointers instead of
> copies of the actual file structs themselves, so that will save memory.
> I'll submit that patch for review in a day or two after I've tested it.

I've just done the same.  It reduces the memory requirements
of the hlink list to 1/18th.  It is also somewhat faster to
build that way because we don't have to walk the list.

If we built the hlink_list one element at a time the way we
do the file_list only putting those files that we might link
in it it would be smaller but building it would be slower.

I've only done a little testing but it seems to be working
and warnings about theory v. practice aside it should be
good.


===================================================================
RCS file: /data/cvs/rsync/hlink.c,v
retrieving revision 1.23
diff -p -u -r1.23 hlink.c
--- hlink.c     2 Jan 2004 07:34:49 -0000       1.23
+++ hlink.c     4 Jan 2004 13:21:14 -0000
@@ -24,45 +24,43 @@ extern int dry_run;
 extern int verbose;
 
 #if SUPPORT_HARD_LINKS
-static int hlink_compare(struct file_struct *f1, struct file_struct *f2)
+static int hlink_compare(struct file_struct **f1, struct file_struct **f2)
 {
-       if (!S_ISREG(f1->mode) && !S_ISREG(f2->mode))
+       if (!S_ISREG((*f1)->mode) && !S_ISREG((*f2)->mode))
                return 0;
-       if (!S_ISREG(f1->mode))
+       if (!S_ISREG((*f1)->mode))
                return -1;
-       if (!S_ISREG(f2->mode))
+       if (!S_ISREG((*f2)->mode))
                return 1;
 
-       if (f1->dev != f2->dev)
-               return (int) (f1->dev > f2->dev ? 1 : -1);
+       if ((*f1)->dev != (*f2)->dev)
+               return (int) ((*f1)->dev > (*f2)->dev ? 1 : -1);
 
-       if (f1->inode != f2->inode)
-               return (int) (f1->inode > f2->inode ? 1 : -1);
+       if ((*f1)->inode != (*f2)->inode)
+               return (int) ((*f1)->inode > (*f2)->inode ? 1 : -1);
 
-       return file_compare(&f1, &f2);
+
+       return file_compare(f1, f2);
 }
 
 
-static struct file_struct *hlink_list;
+static struct file_struct **hlink_list;
 static int hlink_count;
 #endif
 
 void init_hard_links(struct file_list *flist)
 {
 #if SUPPORT_HARD_LINKS
-       int i;
        if (flist->count < 2)
                return;
 
        if (hlink_list)
                free(hlink_list);
 
-       if (!(hlink_list = new_array(struct file_struct, flist->count)))
+       if (!(hlink_list = new_array(struct file_struct *, flist->count)))
                out_of_memory("init_hard_links");
-
-       for (i = 0; i < flist->count; i++)
-               memcpy(&hlink_list[i], flist->files[i],
-                      sizeof(hlink_list[0]));
+       
+       memcpy(hlink_list, flist->files, sizeof(hlink_list[0]) * flist->count); 
 
        qsort(hlink_list, flist->count,
              sizeof(hlink_list[0]), (int (*)()) hlink_compare);
@@ -84,7 +82,7 @@ int check_hard_link(struct file_struct *
 
        while (low != high) {
                int mid = (low + high) / 2;
-               ret = hlink_compare(&hlink_list[mid], file);
+               ret = hlink_compare(&hlink_list[mid], &file);
                if (ret == 0) {
                        low = mid;
                        break;
@@ -97,16 +95,16 @@ int check_hard_link(struct file_struct *
 
        /* XXX: To me this looks kind of dodgy -- why do we use [low]
         * here and [low-1] below? -- mbp */
-       if (hlink_compare(&hlink_list[low], file) != 0)
+       if (hlink_compare(&hlink_list[low], &file) != 0)
                return 0;
 
        if (low > 0 &&
-           S_ISREG(hlink_list[low - 1].mode) &&
-           file->dev == hlink_list[low - 1].dev &&
-           file->inode == hlink_list[low - 1].inode) {
+           S_ISREG(hlink_list[low - 1]->mode) &&
+           file->dev == hlink_list[low - 1]->dev &&
+           file->inode == hlink_list[low - 1]->inode) {
                if (verbose >= 2) {
                        rprintf(FINFO, "check_hard_link: \"%s\" is a hard link to file 
%d, \"%s\"\n",
-                               f_name(file), low-1, f_name(&hlink_list[low-1]));
+                               f_name(file), low-1, f_name(hlink_list[low-1]));
                }
                return 1;
        }
@@ -120,12 +118,12 @@ int check_hard_link(struct file_struct *
 static void hard_link_one(int i)
 {
        STRUCT_STAT st1, st2;
-       char *hlink2, *hlink1 = f_name(&hlink_list[i - 1]);
+       char *hlink2, *hlink1 = f_name(hlink_list[i - 1]);
 
        if (link_stat(hlink1, &st1) != 0)
                return;
 
-       hlink2 = f_name(&hlink_list[i]);
+       hlink2 = f_name(hlink_list[i]);
        if (link_stat(hlink2, &st2) != 0) {
                if (do_link(hlink1, hlink2)) {
                        if (verbose > 0) {
@@ -166,11 +164,11 @@ void do_hard_links(void)
                return;
 
        for (i = 1; i < hlink_count; i++) {
-               if (S_ISREG(hlink_list[i].mode) &&
-                   S_ISREG(hlink_list[i - 1].mode) &&
-                   hlink_list[i].basename && hlink_list[i - 1].basename &&
-                   hlink_list[i].dev == hlink_list[i - 1].dev &&
-                   hlink_list[i].inode == hlink_list[i - 1].inode) {
+               if (S_ISREG(hlink_list[i]->mode) &&
+                   S_ISREG(hlink_list[i - 1]->mode) &&
+                   hlink_list[i]->basename && hlink_list[i - 1]->basename &&
+                   hlink_list[i]->dev == hlink_list[i - 1]->dev &&
+                   hlink_list[i]->inode == hlink_list[i - 1]->inode) {
                        hard_link_one(i);
                }
        }
-- 
To unsubscribe or change options: http://lists.samba.org/mailman/listinfo/rsync
Before posting, read: http://www.catb.org/~esr/faqs/smart-questions.html

Reply via email to