Hi, when copying a whole directory tree with standard tools, e.g. tar cf - . | ( cd $DEST && tar xf - ) or cpio -p ...
the source disk is busy seeking. That's noisy and particularly slow. I've written a small Python program which outputs the file names in i-node order. If this is fed into tar or cpio nearly no seeks are required during copying. I've tested it by comparing the resulting copied tree to one created by tar | tar. But it's correctness for backing up data is critical. Therefore I'd like to ask for comments. Thanks for any comments, Helmut.
#!/usr/bin/python3 import os, sys, stat def walktree(top): '''recursively descend the directory tree rooted at top, calling the callback function for each regular file''' for f in os.listdir(top): pathname = os.path.join(top, f) Stat= os.lstat(pathname) Dev = Stat.st_dev if Dev != Root_Dev : continue Ino = Stat.st_ino mode = Stat.st_mode if stat.S_ISDIR(mode): # It's a directory, recurse into it FN_List.append((Ino,pathname)) walktree(pathname) else : FN_List.append((Ino,pathname)) if len(sys.argv) != 2 : print('''usage: TreeWalk_I_Sorted <TOPDIR> # generates a list of files in inode order # example with tar : TreeWalk_I_Sorted <TOPDIR> | tar --no-recursion -c -j -T- -f XXX.tar.bz2 # example with cpio TreeWalk_I_Sorted <TOPDIR> | cpio -padmu <DESTDIR> ''') exit(1) TOP= sys.argv[1] Stat= os.lstat(TOP) Root_Dev= Stat.st_dev FN_List=[(Stat.st_ino,TOP)] # import resource # print("at Start in kB ",resource.getrusage(0).ru_maxrss) # uses about 500 bytes per file walktree(TOP) FN_List.sort() # print("*** starting ...",file=sys.stderr) for I,F in FN_List : print(F) # print(I," -> " ,F) # print("after loading",len(FN_List)," items : ",resource.getrusage(0).ru_maxrss)