This email is follow up to <https://bugs.gnu.org/8601> dated 2011-05-01. Jeff, thanks for reporting the problem. (There's a good chance this email will bounce but I'll send it to your 2011 email address anyway.)
I recently ran into the same issue and derived the attached patches independently. I then found your bug report, made sure the attached patches fixed every problem that your proposal did, and installed the attached patches into Savannah. The attached patches 1-3 merely fix typos and refactor. Patch 4 corresponds to your proposal; however, it differs in that its basic idea is to use the FIEMAP code only as a fallback if SEEK_DATA doesn't work, rather than try to add to the already-too-complicated code that fiddles with FIEMAPs. (I don't observe any significant performance advantage to the FIEMAP stuff, but maybe that's just me.) Patch 5 adds opportunistic use of the copy_file_range syscall introduced in Linux kernel 4.5 (2016) and reworked in 5.3 (2019). This should improve 'cp' performance on kernels and file systems that support copy_file_range.
>From 4fe5259ab6c9e459a6db5938d143a9c65be113d9 Mon Sep 17 00:00:00 2001 From: Paul Eggert <egg...@cs.ucla.edu> Date: Thu, 25 Jun 2020 18:10:49 -0700 Subject: [PATCH 1/5] maint: typo fix * NEWS: Fix typo. --- NEWS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS b/NEWS index d36259641..d713fa724 100644 --- a/NEWS +++ b/NEWS @@ -17,7 +17,7 @@ GNU coreutils NEWS -*- outline -*- cp and install now default to copy-on-write (COW) if available. - On GNU/Linux systems, ls no longer issues an error message on + On GNU/Linux systems, ls no longer issues an error message on a directory merely because it was removed. This reverts a change that was made in release 8.32. -- 2.25.4
>From 51981008f9892d44231c432535deac4f9b3cbe5e Mon Sep 17 00:00:00 2001 From: Paul Eggert <egg...@cs.ucla.edu> Date: Tue, 23 Jun 2020 19:18:04 -0700 Subject: [PATCH 2/5] cp: refactor extent_copy * src/copy.c (extent_copy): New arg SCAN, replacing REQUIRE_NORMAL_COPY. All callers changed. (enum scantype): New type. (infer_scantype): Rename from is_probably_sparse and return the new type. Add args FD and SCAN. All callers changed. --- src/copy.c | 119 +++++++++++++++++++++++++---------------------------- 1 file changed, 55 insertions(+), 64 deletions(-) diff --git a/src/copy.c b/src/copy.c index 54601ce07..f694f913f 100644 --- a/src/copy.c +++ b/src/copy.c @@ -422,9 +422,8 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, size_t hole_size, off_t src_total_size, enum Sparse_type sparse_mode, char const *src_name, char const *dst_name, - bool *require_normal_copy) + struct extent_scan *scan) { - struct extent_scan scan; off_t last_ext_start = 0; off_t last_ext_len = 0; @@ -432,45 +431,25 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, We may need this at the end, for a final ftruncate. */ off_t dest_pos = 0; - extent_scan_init (src_fd, &scan); - - *require_normal_copy = false; bool wrote_hole_at_eof = true; - do + while (true) { - bool ok = extent_scan_read (&scan); - if (! ok) - { - if (scan.hit_final_extent) - break; - - if (scan.initial_scan_failed) - { - *require_normal_copy = true; - return false; - } - - error (0, errno, _("%s: failed to get extents info"), - quotef (src_name)); - return false; - } - bool empty_extent = false; - for (unsigned int i = 0; i < scan.ei_count || empty_extent; i++) + for (unsigned int i = 0; i < scan->ei_count || empty_extent; i++) { off_t ext_start; off_t ext_len; off_t ext_hole_size; - if (i < scan.ei_count) + if (i < scan->ei_count) { - ext_start = scan.ext_info[i].ext_logical; - ext_len = scan.ext_info[i].ext_length; + ext_start = scan->ext_info[i].ext_logical; + ext_len = scan->ext_info[i].ext_length; } else /* empty extent at EOF. */ { i--; - ext_start = last_ext_start + scan.ext_info[i].ext_length; + ext_start = last_ext_start + scan->ext_info[i].ext_length; ext_len = 0; } @@ -498,7 +477,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, { error (0, errno, _("cannot lseek %s"), quoteaf (src_name)); fail: - extent_scan_free (&scan); + extent_scan_free (scan); return false; } @@ -539,7 +518,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, /* For now, do not treat FIEMAP_EXTENT_UNWRITTEN specially, because that (in combination with no sync) would lead to data loss at least on XFS and ext4 when using 2.6.39-rc3 kernels. */ - if (0 && (scan.ext_info[i].ext_flags & FIEMAP_EXTENT_UNWRITTEN)) + if (0 && (scan->ext_info[i].ext_flags & FIEMAP_EXTENT_UNWRITTEN)) { empty_extent = true; last_ext_len = 0; @@ -571,16 +550,23 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, extents beyond the apparent size. */ if (dest_pos == src_total_size) { - scan.hit_final_extent = true; + scan->hit_final_extent = true; break; } } /* Release the space allocated to scan->ext_info. */ - extent_scan_free (&scan); + extent_scan_free (scan); + if (scan->hit_final_extent) + break; + if (! extent_scan_read (scan) && ! scan->hit_final_extent) + { + error (0, errno, _("%s: failed to get extents info"), + quotef (src_name)); + return false; + } } - while (! scan.hit_final_extent); /* When the source file ends with a hole, we have to do a little more work, since the above copied only up to and including the final extent. @@ -1021,16 +1007,35 @@ fchmod_or_lchmod (int desc, char const *name, mode_t mode) # define HAVE_STRUCT_STAT_ST_BLOCKS 0 #endif +/* Type of scan being done on the input when looking for sparseness. */ +enum scantype + { + /* No fancy scanning; just read and write. */ + PLAIN_SCANTYPE, + + /* Read and examine data looking for zero blocks; useful when + attempting to create sparse output. */ + ZERO_SCANTYPE, + + /* Extent information is available. */ + EXTENT_SCANTYPE + }; + /* Use a heuristic to determine whether stat buffer SB comes from a file with sparse blocks. If the file has fewer blocks than would normally be needed for a file of its size, then at least one of the blocks in the file is a hole. In that case, return true. */ -static bool -is_probably_sparse (struct stat const *sb) +static enum scantype +infer_scantype (int fd, struct stat const *sb, struct extent_scan *scan) { - return (HAVE_STRUCT_STAT_ST_BLOCKS - && S_ISREG (sb->st_mode) - && ST_NBLOCKS (*sb) < sb->st_size / ST_NBLOCKSIZE); + if (! (HAVE_STRUCT_STAT_ST_BLOCKS + && S_ISREG (sb->st_mode) + && ST_NBLOCKS (*sb) < sb->st_size / ST_NBLOCKSIZE)) + return PLAIN_SCANTYPE; + + extent_scan_init (fd, scan); + extent_scan_read (scan); + return scan->initial_scan_failed ? ZERO_SCANTYPE : EXTENT_SCANTYPE; } @@ -1061,6 +1066,7 @@ copy_reg (char const *src_name, char const *dst_name, mode_t src_mode = src_sb->st_mode; struct stat sb; struct stat src_open_sb; + struct extent_scan scan; bool return_val = true; bool data_copy_required = x->data_copy_required; @@ -1260,23 +1266,13 @@ copy_reg (char const *src_name, char const *dst_name, fdadvise (source_desc, 0, 0, FADVISE_SEQUENTIAL); /* Deal with sparse files. */ - bool make_holes = false; - bool sparse_src = is_probably_sparse (&src_open_sb); - - if (S_ISREG (sb.st_mode)) - { - /* Even with --sparse=always, try to create holes only - if the destination is a regular file. */ - if (x->sparse_mode == SPARSE_ALWAYS) - make_holes = true; - - /* Use a heuristic to determine whether SRC_NAME contains any sparse - blocks. If the file has fewer blocks than would normally be - needed for a file of its size, then at least one of the blocks in - the file is a hole. */ - if (x->sparse_mode == SPARSE_AUTO && sparse_src) - make_holes = true; - } + enum scantype scantype = infer_scantype (source_desc, &src_open_sb, + &scan); + bool make_holes + = (S_ISREG (sb.st_mode) + && (x->sparse_mode == SPARSE_ALWAYS + || (x->sparse_mode == SPARSE_AUTO + && scantype != PLAIN_SCANTYPE))); /* If not making a sparse file, try to use a more-efficient buffer size. */ @@ -1305,10 +1301,8 @@ copy_reg (char const *src_name, char const *dst_name, buf_alloc = xmalloc (buf_size + buf_alignment); buf = ptr_align (buf_alloc, buf_alignment); - if (sparse_src) + if (scantype == EXTENT_SCANTYPE) { - bool normal_copy_required; - /* Perform an efficient extent-based copy, falling back to the standard copy only if the initial extent scan fails. If the '--sparse=never' option is specified, write all data but use @@ -1316,14 +1310,11 @@ copy_reg (char const *src_name, char const *dst_name, if (extent_copy (source_desc, dest_desc, buf, buf_size, hole_size, src_open_sb.st_size, make_holes ? x->sparse_mode : SPARSE_NEVER, - src_name, dst_name, &normal_copy_required)) + src_name, dst_name, &scan)) goto preserve_metadata; - if (! normal_copy_required) - { - return_val = false; - goto close_src_and_dst_desc; - } + return_val = false; + goto close_src_and_dst_desc; } off_t n_read; -- 2.25.4
>From 35a55effa81c5f4f8fe7414e8aede0acccee470f Mon Sep 17 00:00:00 2001 From: Paul Eggert <egg...@cs.ucla.edu> Date: Wed, 24 Jun 2020 17:05:20 -0700 Subject: [PATCH 3/5] cp: avoid copy_reg goto * src/copy.c (copy_reg): Redo to avoid label and goto. --- src/copy.c | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/src/copy.c b/src/copy.c index f694f913f..b382cfa4d 100644 --- a/src/copy.c +++ b/src/copy.c @@ -1301,29 +1301,18 @@ copy_reg (char const *src_name, char const *dst_name, buf_alloc = xmalloc (buf_size + buf_alignment); buf = ptr_align (buf_alloc, buf_alignment); - if (scantype == EXTENT_SCANTYPE) - { - /* Perform an efficient extent-based copy, falling back to the - standard copy only if the initial extent scan fails. If the - '--sparse=never' option is specified, write all data but use - any extents to read more efficiently. */ - if (extent_copy (source_desc, dest_desc, buf, buf_size, hole_size, - src_open_sb.st_size, - make_holes ? x->sparse_mode : SPARSE_NEVER, - src_name, dst_name, &scan)) - goto preserve_metadata; - - return_val = false; - goto close_src_and_dst_desc; - } - off_t n_read; - bool wrote_hole_at_eof; - if (! sparse_copy (source_desc, dest_desc, buf, buf_size, - make_holes ? hole_size : 0, - x->sparse_mode == SPARSE_ALWAYS, src_name, dst_name, - UINTMAX_MAX, &n_read, - &wrote_hole_at_eof)) + bool wrote_hole_at_eof = false; + if (! (scantype == EXTENT_SCANTYPE + ? extent_copy (source_desc, dest_desc, buf, buf_size, hole_size, + src_open_sb.st_size, + make_holes ? x->sparse_mode : SPARSE_NEVER, + src_name, dst_name, &scan) + : sparse_copy (source_desc, dest_desc, buf, buf_size, + make_holes ? hole_size : 0, + x->sparse_mode == SPARSE_ALWAYS, + src_name, dst_name, UINTMAX_MAX, &n_read, + &wrote_hole_at_eof))) { return_val = false; goto close_src_and_dst_desc; @@ -1336,7 +1325,6 @@ copy_reg (char const *src_name, char const *dst_name, } } -preserve_metadata: if (x->preserve_timestamps) { struct timespec timespec[2]; -- 2.25.4
>From 6fc41029fff6e955a87312d8f5f967f0d01e390c Mon Sep 17 00:00:00 2001 From: Paul Eggert <egg...@cs.ucla.edu> Date: Thu, 25 Jun 2020 16:31:44 -0700 Subject: [PATCH 4/5] cp: use SEEK_DATA/SEEK_HOLE if available If it works, prefer lseek with SEEK_DATA and SEEK_HOLE to FIEMAP, as lseek is simpler and more portable (will be in next POSIX). Problem reported in 2011 by Jeff Liu (Bug#8061). * NEWS: Mention this. * src/copy.c (lseek_copy) [SEEK_HOLE]: New function. (enum scantype): New constants ERROR_SCANTYPE, LSEEK_SCANTYPE. (union scan_inference): New type. (infer_scantype): Last arg is now union scan_inference *, not struct extent_scan *. All callers changed. Prefer SEEK_HOLE to FIEMAP if both work, since SEEK_HOLE is simpler and more portable. (copy_reg): Do the fdadvise after initial scan, in case the scan fails. Report an error if the initial scan fails. (copy_reg) [SEEK_HOLE]: Use lseek_copy if scantype says so. --- NEWS | 3 + src/copy.c | 209 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 201 insertions(+), 11 deletions(-) diff --git a/NEWS b/NEWS index d713fa724..63cb47d10 100644 --- a/NEWS +++ b/NEWS @@ -17,6 +17,9 @@ GNU coreutils NEWS -*- outline -*- cp and install now default to copy-on-write (COW) if available. + cp, install and mv now prefer lseek+SEEK_HOLE to ioctl+FS_IOC_FIEMAP + on sparse files, as lseek is simpler and more portable. + On GNU/Linux systems, ls no longer issues an error message on a directory merely because it was removed. This reverts a change that was made in release 8.32. diff --git a/src/copy.c b/src/copy.c index b382cfa4d..d88f8cf93 100644 --- a/src/copy.c +++ b/src/copy.c @@ -416,7 +416,12 @@ write_zeros (int fd, off_t n_bytes) Upon a successful copy, return true. If the initial extent scan fails, set *NORMAL_COPY_REQUIRED to true and return false. Upon any other failure, set *NORMAL_COPY_REQUIRED to false and - return false. */ + return false. + + FIXME: Once we no longer need to support Linux kernel versions + before 3.1 (2011), this function can be retired as it is superseded + by lseek_copy. That is, we no longer need extent-scan.h and can + remove any of the code that uses it. */ static bool extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, size_t hole_size, off_t src_total_size, @@ -595,6 +600,150 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, return true; } +#ifdef SEEK_HOLE +/* Perform an efficient extent copy, if possible. This avoids + the overhead of detecting holes in hole-introducing/preserving + copy, and thus makes copying sparse files much more efficient. + Copy from SRC_FD to DEST_FD, using BUF (of size BUF_SIZE) for a buffer. + Look for holes of size HOLE_SIZE in the input. + The input file is of size SRC_TOTAL_SIZE. + Use SPARSE_MODE to determine whether to create holes in the output. + SRC_NAME and DST_NAME are the input and output file names. + Return true if successful, false (with a diagnostic) otherwise. */ + +static bool +lseek_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, + size_t hole_size, off_t ext_start, off_t src_total_size, + enum Sparse_type sparse_mode, + char const *src_name, char const *dst_name) +{ + off_t last_ext_start = 0; + off_t last_ext_len = 0; + off_t dest_pos = 0; + bool wrote_hole_at_eof = true; + + while (0 <= ext_start) + { + off_t ext_end = lseek (src_fd, ext_start, SEEK_HOLE); + if (ext_end < 0) + { + if (errno != ENXIO) + goto cannot_lseek; + ext_end = src_total_size; + if (ext_end <= ext_start) + { + /* The input file grew; get its current size. */ + src_total_size = lseek (src_fd, 0, SEEK_END); + if (src_total_size < 0) + goto cannot_lseek; + + /* If the input file shrank after growing, stop copying. */ + if (src_total_size <= ext_start) + break; + + ext_end = src_total_size; + } + } + /* If the input file must have grown, increase its measured size. */ + if (src_total_size < ext_end) + src_total_size = ext_end; + + if (lseek (src_fd, ext_start, SEEK_SET) < 0) + goto cannot_lseek; + + wrote_hole_at_eof = false; + off_t ext_hole_size = ext_start - last_ext_start - last_ext_len; + + if (ext_hole_size) + { + if (sparse_mode != SPARSE_NEVER) + { + if (! create_hole (dest_fd, dst_name, + sparse_mode == SPARSE_ALWAYS, + ext_hole_size)) + return false; + wrote_hole_at_eof = true; + } + else + { + /* When not inducing holes and when there is a hole between + the end of the previous extent and the beginning of the + current one, write zeros to the destination file. */ + if (! write_zeros (dest_fd, ext_hole_size)) + { + error (0, errno, _("%s: write failed"), + quotef (dst_name)); + return false; + } + } + } + + off_t ext_len = ext_end - ext_start; + last_ext_start = ext_start; + last_ext_len = ext_len; + + /* Copy this extent, looking for further opportunities to not + bother to write zeros unless --sparse=never, since SEEK_HOLE + is conservative and may miss some holes. */ + off_t n_read; + bool read_hole; + if ( ! sparse_copy (src_fd, dest_fd, buf, buf_size, + sparse_mode == SPARSE_NEVER ? 0 : hole_size, + true, src_name, dst_name, ext_len, &n_read, + &read_hole)) + return false; + + dest_pos = ext_start + n_read; + if (n_read) + wrote_hole_at_eof = read_hole; + if (n_read < ext_len) + { + /* The input file shrank. */ + src_total_size = dest_pos; + break; + } + + ext_start = lseek (src_fd, dest_pos, SEEK_DATA); + if (ext_start < 0) + { + if (errno != ENXIO) + goto cannot_lseek; + break; + } + } + + /* When the source file ends with a hole, we have to do a little more work, + since the above copied only up to and including the final extent. + In order to complete the copy, we may have to insert a hole or write + zeros in the destination corresponding to the source file's hole-at-EOF. + + In addition, if the final extent was a block of zeros at EOF and we've + just converted them to a hole in the destination, we must call ftruncate + here in order to record the proper length in the destination. */ + if ((dest_pos < src_total_size || wrote_hole_at_eof) + && ! (sparse_mode == SPARSE_NEVER + ? write_zeros (dest_fd, src_total_size - dest_pos) + : ftruncate (dest_fd, src_total_size) == 0)) + { + error (0, errno, _("failed to extend %s"), quoteaf (dst_name)); + return false; + } + + if (sparse_mode == SPARSE_ALWAYS && dest_pos < src_total_size + && punch_hole (dest_fd, dest_pos, src_total_size - dest_pos) < 0) + { + error (0, errno, _("error deallocating %s"), quoteaf (dst_name)); + return false; + } + + return true; + + cannot_lseek: + error (0, errno, _("cannot lseek %s"), quoteaf (src_name)); + return false; +} +#endif + /* FIXME: describe */ /* FIXME: rewrite this to use a hash table so we avoid the quadratic performance hit that's probably noticeable only on trees deeper @@ -1010,6 +1159,9 @@ fchmod_or_lchmod (int desc, char const *name, mode_t mode) /* Type of scan being done on the input when looking for sparseness. */ enum scantype { + /* An error was found when determining scantype. */ + ERROR_SCANTYPE, + /* No fancy scanning; just read and write. */ PLAIN_SCANTYPE, @@ -1017,22 +1169,44 @@ enum scantype attempting to create sparse output. */ ZERO_SCANTYPE, + /* lseek information is available. */ + LSEEK_SCANTYPE, + /* Extent information is available. */ EXTENT_SCANTYPE }; -/* Use a heuristic to determine whether stat buffer SB comes from a file - with sparse blocks. If the file has fewer blocks than would normally - be needed for a file of its size, then at least one of the blocks in - the file is a hole. In that case, return true. */ +/* Result of infer_scantype. */ +union scan_inference +{ + /* Used if infer_scantype returns LSEEK_SCANTYPE. This is the + offset of the first data block, or -1 if the file has no data. */ + off_t ext_start; + + /* Used if infer_scantype returns EXTENT_SCANTYPE. */ + struct extent_scan extent_scan; +}; + +/* Return how to scan a file with descriptor FD and stat buffer SB. + Store any information gathered into *SCAN. */ static enum scantype -infer_scantype (int fd, struct stat const *sb, struct extent_scan *scan) +infer_scantype (int fd, struct stat const *sb, + union scan_inference *scan_inference) { if (! (HAVE_STRUCT_STAT_ST_BLOCKS && S_ISREG (sb->st_mode) && ST_NBLOCKS (*sb) < sb->st_size / ST_NBLOCKSIZE)) return PLAIN_SCANTYPE; +#ifdef SEEK_HOLE + scan_inference->ext_start = lseek (fd, 0, SEEK_DATA); + if (0 <= scan_inference->ext_start) + return LSEEK_SCANTYPE; + else if (errno != EINVAL && errno != ENOTSUP) + return errno == ENXIO ? LSEEK_SCANTYPE : ERROR_SCANTYPE; +#endif + + struct extent_scan *scan = &scan_inference->extent_scan; extent_scan_init (fd, scan); extent_scan_read (scan); return scan->initial_scan_failed ? ZERO_SCANTYPE : EXTENT_SCANTYPE; @@ -1066,7 +1240,7 @@ copy_reg (char const *src_name, char const *dst_name, mode_t src_mode = src_sb->st_mode; struct stat sb; struct stat src_open_sb; - struct extent_scan scan; + union scan_inference scan_inference; bool return_val = true; bool data_copy_required = x->data_copy_required; @@ -1263,17 +1437,23 @@ copy_reg (char const *src_name, char const *dst_name, size_t buf_size = io_blksize (sb); size_t hole_size = ST_BLKSIZE (sb); - fdadvise (source_desc, 0, 0, FADVISE_SEQUENTIAL); - /* Deal with sparse files. */ enum scantype scantype = infer_scantype (source_desc, &src_open_sb, - &scan); + &scan_inference); + if (scantype == ERROR_SCANTYPE) + { + error (0, errno, _("cannot lseek %s"), quoteaf (src_name)); + return_val = false; + goto close_src_and_dst_desc; + } bool make_holes = (S_ISREG (sb.st_mode) && (x->sparse_mode == SPARSE_ALWAYS || (x->sparse_mode == SPARSE_AUTO && scantype != PLAIN_SCANTYPE))); + fdadvise (source_desc, 0, 0, FADVISE_SEQUENTIAL); + /* If not making a sparse file, try to use a more-efficient buffer size. */ if (! make_holes) @@ -1307,7 +1487,14 @@ copy_reg (char const *src_name, char const *dst_name, ? extent_copy (source_desc, dest_desc, buf, buf_size, hole_size, src_open_sb.st_size, make_holes ? x->sparse_mode : SPARSE_NEVER, - src_name, dst_name, &scan) + src_name, dst_name, &scan_inference.extent_scan) +#ifdef SEEK_HOLE + : scantype == LSEEK_SCANTYPE + ? lseek_copy (source_desc, dest_desc, buf, buf_size, hole_size, + scan_inference.ext_start, src_open_sb.st_size, + make_holes ? x->sparse_mode : SPARSE_NEVER, + src_name, dst_name) +#endif : sparse_copy (source_desc, dest_desc, buf, buf_size, make_holes ? hole_size : 0, x->sparse_mode == SPARSE_ALWAYS, -- 2.25.4
>From 81849f05b94068f873299bd9fc01b4eebdb31f64 Mon Sep 17 00:00:00 2001 From: Paul Eggert <egg...@cs.ucla.edu> Date: Thu, 25 Jun 2020 17:34:23 -0700 Subject: [PATCH 5/5] cp: use copy_file_range if available * NEWS: Mention this. * bootstrap.conf (gnulib_modules): Add copy-file-range. * src/copy.c (sparse_copy): Try copy_file_range if not looking for holes. --- NEWS | 5 +++-- bootstrap.conf | 1 + src/copy.c | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index 63cb47d10..1c3f6378d 100644 --- a/NEWS +++ b/NEWS @@ -17,8 +17,9 @@ GNU coreutils NEWS -*- outline -*- cp and install now default to copy-on-write (COW) if available. - cp, install and mv now prefer lseek+SEEK_HOLE to ioctl+FS_IOC_FIEMAP - on sparse files, as lseek is simpler and more portable. + cp, install and mv now use the copy_file_range syscall if available. + Also, they prefer lseek+SEEK_HOLE to ioctl+FS_IOC_FIEMAP on sparse + files, as lseek is simpler and more portable. On GNU/Linux systems, ls no longer issues an error message on a directory merely because it was removed. This reverts a change diff --git a/bootstrap.conf b/bootstrap.conf index 12e2d831a..2506f0db4 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -54,6 +54,7 @@ gnulib_modules=" closeout config-h configmake + copy-file-range crypto/md5 crypto/sha1 crypto/sha256 diff --git a/src/copy.c b/src/copy.c index d88f8cf93..4050f6953 100644 --- a/src/copy.c +++ b/src/copy.c @@ -265,6 +265,46 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, { *last_write_made_hole = false; *total_n_read = 0; + + /* If not looking for holes, use copy_file_range if available. */ + if (!hole_size) + while (max_n_read) + { + /* Copy at most COPY_MAX bytes at a time; this is min + (PTRDIFF_MAX, SIZE_MAX) truncated to a value that is + surely aligned well. */ + ssize_t ssize_max = TYPE_MAXIMUM (ssize_t); + ptrdiff_t copy_max = MIN (ssize_max, SIZE_MAX) >> 30 << 30; + ssize_t n_copied = copy_file_range (src_fd, NULL, dest_fd, NULL, + MIN (max_n_read, copy_max), 0); + if (n_copied == 0) + { + /* copy_file_range incorrectly returns 0 when reading from + the proc file system on the Linux kernel through at + least 5.6.19 (2020), so fall back on 'read' if the + input file seems empty. */ + if (*total_n_read == 0) + break; + return true; + } + if (n_copied < 0) + { + if (errno == ENOSYS || errno == EINVAL + || errno == EBADF || errno == EXDEV) + break; + if (errno == EINTR) + n_copied = 0; + else + { + error (0, errno, _("error copying %s to %s"), + quoteaf_n (0, src_name), quoteaf_n (1, dst_name)); + return false; + } + } + max_n_read -= n_copied; + *total_n_read += n_copied; + } + bool make_hole = false; off_t psize = 0; -- 2.25.4