This email is follow up to <https://bugs.gnu.org/8601> dated 2011-05-01. Jeff,
thanks for reporting the problem. (There's a good chance this email will bounce
but I'll send it to your 2011 email address anyway.)

I recently ran into the same issue and derived the attached patches
independently. I then found your bug report, made sure the attached patches
fixed every problem that your proposal did, and installed the attached patches
into Savannah.

The attached patches 1-3 merely fix typos and refactor.

Patch 4 corresponds to your proposal; however, it differs in that its basic idea
is to use the FIEMAP code only as a fallback if SEEK_DATA doesn't work, rather
than try to add to the already-too-complicated code that fiddles with FIEMAPs.
(I don't observe any significant performance advantage to the FIEMAP stuff, but
maybe that's just me.)

Patch 5 adds opportunistic use of the copy_file_range syscall introduced in
Linux kernel 4.5 (2016) and reworked in 5.3 (2019). This should improve 'cp'
performance on kernels and file systems that support copy_file_range.
>From 4fe5259ab6c9e459a6db5938d143a9c65be113d9 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Thu, 25 Jun 2020 18:10:49 -0700
Subject: [PATCH 1/5] maint: typo fix

* NEWS: Fix typo.
---
 NEWS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS b/NEWS
index d36259641..d713fa724 100644
--- a/NEWS
+++ b/NEWS
@@ -17,7 +17,7 @@ GNU coreutils NEWS                                    -*- outline -*-
 
   cp and install now default to copy-on-write (COW) if available.
 
-  On GNU/Linux systems, ls no longer issues an error message on
+  On GNU/Linux systems, ls no longer issues an error message on a
   directory merely because it was removed.  This reverts a change
   that was made in release 8.32.
 
-- 
2.25.4

>From 51981008f9892d44231c432535deac4f9b3cbe5e Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Tue, 23 Jun 2020 19:18:04 -0700
Subject: [PATCH 2/5] cp: refactor extent_copy

* src/copy.c (extent_copy): New arg SCAN, replacing
REQUIRE_NORMAL_COPY.  All callers changed.
(enum scantype): New type.
(infer_scantype): Rename from is_probably_sparse and return
the new type.  Add args FD and SCAN.  All callers changed.
---
 src/copy.c | 119 +++++++++++++++++++++++++----------------------------
 1 file changed, 55 insertions(+), 64 deletions(-)

diff --git a/src/copy.c b/src/copy.c
index 54601ce07..f694f913f 100644
--- a/src/copy.c
+++ b/src/copy.c
@@ -422,9 +422,8 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
              size_t hole_size, off_t src_total_size,
              enum Sparse_type sparse_mode,
              char const *src_name, char const *dst_name,
-             bool *require_normal_copy)
+             struct extent_scan *scan)
 {
-  struct extent_scan scan;
   off_t last_ext_start = 0;
   off_t last_ext_len = 0;
 
@@ -432,45 +431,25 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
      We may need this at the end, for a final ftruncate.  */
   off_t dest_pos = 0;
 
-  extent_scan_init (src_fd, &scan);
-
-  *require_normal_copy = false;
   bool wrote_hole_at_eof = true;
-  do
+  while (true)
     {
-      bool ok = extent_scan_read (&scan);
-      if (! ok)
-        {
-          if (scan.hit_final_extent)
-            break;
-
-          if (scan.initial_scan_failed)
-            {
-              *require_normal_copy = true;
-              return false;
-            }
-
-          error (0, errno, _("%s: failed to get extents info"),
-                 quotef (src_name));
-          return false;
-        }
-
       bool empty_extent = false;
-      for (unsigned int i = 0; i < scan.ei_count || empty_extent; i++)
+      for (unsigned int i = 0; i < scan->ei_count || empty_extent; i++)
         {
           off_t ext_start;
           off_t ext_len;
           off_t ext_hole_size;
 
-          if (i < scan.ei_count)
+          if (i < scan->ei_count)
             {
-              ext_start = scan.ext_info[i].ext_logical;
-              ext_len = scan.ext_info[i].ext_length;
+              ext_start = scan->ext_info[i].ext_logical;
+              ext_len = scan->ext_info[i].ext_length;
             }
           else /* empty extent at EOF.  */
             {
               i--;
-              ext_start = last_ext_start + scan.ext_info[i].ext_length;
+              ext_start = last_ext_start + scan->ext_info[i].ext_length;
               ext_len = 0;
             }
 
@@ -498,7 +477,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
                 {
                   error (0, errno, _("cannot lseek %s"), quoteaf (src_name));
                 fail:
-                  extent_scan_free (&scan);
+                  extent_scan_free (scan);
                   return false;
                 }
 
@@ -539,7 +518,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
           /* For now, do not treat FIEMAP_EXTENT_UNWRITTEN specially,
              because that (in combination with no sync) would lead to data
              loss at least on XFS and ext4 when using 2.6.39-rc3 kernels.  */
-          if (0 && (scan.ext_info[i].ext_flags & FIEMAP_EXTENT_UNWRITTEN))
+          if (0 && (scan->ext_info[i].ext_flags & FIEMAP_EXTENT_UNWRITTEN))
             {
               empty_extent = true;
               last_ext_len = 0;
@@ -571,16 +550,23 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
              extents beyond the apparent size.  */
           if (dest_pos == src_total_size)
             {
-              scan.hit_final_extent = true;
+              scan->hit_final_extent = true;
               break;
             }
         }
 
       /* Release the space allocated to scan->ext_info.  */
-      extent_scan_free (&scan);
+      extent_scan_free (scan);
 
+      if (scan->hit_final_extent)
+        break;
+      if (! extent_scan_read (scan) && ! scan->hit_final_extent)
+        {
+          error (0, errno, _("%s: failed to get extents info"),
+                 quotef (src_name));
+          return false;
+        }
     }
-  while (! scan.hit_final_extent);
 
   /* When the source file ends with a hole, we have to do a little more work,
      since the above copied only up to and including the final extent.
@@ -1021,16 +1007,35 @@ fchmod_or_lchmod (int desc, char const *name, mode_t mode)
 # define HAVE_STRUCT_STAT_ST_BLOCKS 0
 #endif
 
+/* Type of scan being done on the input when looking for sparseness.  */
+enum scantype
+  {
+   /* No fancy scanning; just read and write.  */
+   PLAIN_SCANTYPE,
+
+   /* Read and examine data looking for zero blocks; useful when
+      attempting to create sparse output.  */
+   ZERO_SCANTYPE,
+
+   /* Extent information is available.  */
+   EXTENT_SCANTYPE
+  };
+
 /* Use a heuristic to determine whether stat buffer SB comes from a file
    with sparse blocks.  If the file has fewer blocks than would normally
    be needed for a file of its size, then at least one of the blocks in
    the file is a hole.  In that case, return true.  */
-static bool
-is_probably_sparse (struct stat const *sb)
+static enum scantype
+infer_scantype (int fd, struct stat const *sb, struct extent_scan *scan)
 {
-  return (HAVE_STRUCT_STAT_ST_BLOCKS
-          && S_ISREG (sb->st_mode)
-          && ST_NBLOCKS (*sb) < sb->st_size / ST_NBLOCKSIZE);
+  if (! (HAVE_STRUCT_STAT_ST_BLOCKS
+         && S_ISREG (sb->st_mode)
+         && ST_NBLOCKS (*sb) < sb->st_size / ST_NBLOCKSIZE))
+    return PLAIN_SCANTYPE;
+
+  extent_scan_init (fd, scan);
+  extent_scan_read (scan);
+  return scan->initial_scan_failed ? ZERO_SCANTYPE : EXTENT_SCANTYPE;
 }
 
 
@@ -1061,6 +1066,7 @@ copy_reg (char const *src_name, char const *dst_name,
   mode_t src_mode = src_sb->st_mode;
   struct stat sb;
   struct stat src_open_sb;
+  struct extent_scan scan;
   bool return_val = true;
   bool data_copy_required = x->data_copy_required;
 
@@ -1260,23 +1266,13 @@ copy_reg (char const *src_name, char const *dst_name,
       fdadvise (source_desc, 0, 0, FADVISE_SEQUENTIAL);
 
       /* Deal with sparse files.  */
-      bool make_holes = false;
-      bool sparse_src = is_probably_sparse (&src_open_sb);
-
-      if (S_ISREG (sb.st_mode))
-        {
-          /* Even with --sparse=always, try to create holes only
-             if the destination is a regular file.  */
-          if (x->sparse_mode == SPARSE_ALWAYS)
-            make_holes = true;
-
-          /* Use a heuristic to determine whether SRC_NAME contains any sparse
-             blocks.  If the file has fewer blocks than would normally be
-             needed for a file of its size, then at least one of the blocks in
-             the file is a hole.  */
-          if (x->sparse_mode == SPARSE_AUTO && sparse_src)
-            make_holes = true;
-        }
+      enum scantype scantype = infer_scantype (source_desc, &src_open_sb,
+                                               &scan);
+      bool make_holes
+        = (S_ISREG (sb.st_mode)
+           && (x->sparse_mode == SPARSE_ALWAYS
+               || (x->sparse_mode == SPARSE_AUTO
+                   && scantype != PLAIN_SCANTYPE)));
 
       /* If not making a sparse file, try to use a more-efficient
          buffer size.  */
@@ -1305,10 +1301,8 @@ copy_reg (char const *src_name, char const *dst_name,
       buf_alloc = xmalloc (buf_size + buf_alignment);
       buf = ptr_align (buf_alloc, buf_alignment);
 
-      if (sparse_src)
+      if (scantype == EXTENT_SCANTYPE)
         {
-          bool normal_copy_required;
-
           /* Perform an efficient extent-based copy, falling back to the
              standard copy only if the initial extent scan fails.  If the
              '--sparse=never' option is specified, write all data but use
@@ -1316,14 +1310,11 @@ copy_reg (char const *src_name, char const *dst_name,
           if (extent_copy (source_desc, dest_desc, buf, buf_size, hole_size,
                            src_open_sb.st_size,
                            make_holes ? x->sparse_mode : SPARSE_NEVER,
-                           src_name, dst_name, &normal_copy_required))
+                           src_name, dst_name, &scan))
             goto preserve_metadata;
 
-          if (! normal_copy_required)
-            {
-              return_val = false;
-              goto close_src_and_dst_desc;
-            }
+          return_val = false;
+          goto close_src_and_dst_desc;
         }
 
       off_t n_read;
-- 
2.25.4

>From 35a55effa81c5f4f8fe7414e8aede0acccee470f Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Wed, 24 Jun 2020 17:05:20 -0700
Subject: [PATCH 3/5] cp: avoid copy_reg goto

* src/copy.c (copy_reg): Redo to avoid label and goto.
---
 src/copy.c | 34 +++++++++++-----------------------
 1 file changed, 11 insertions(+), 23 deletions(-)

diff --git a/src/copy.c b/src/copy.c
index f694f913f..b382cfa4d 100644
--- a/src/copy.c
+++ b/src/copy.c
@@ -1301,29 +1301,18 @@ copy_reg (char const *src_name, char const *dst_name,
       buf_alloc = xmalloc (buf_size + buf_alignment);
       buf = ptr_align (buf_alloc, buf_alignment);
 
-      if (scantype == EXTENT_SCANTYPE)
-        {
-          /* Perform an efficient extent-based copy, falling back to the
-             standard copy only if the initial extent scan fails.  If the
-             '--sparse=never' option is specified, write all data but use
-             any extents to read more efficiently.  */
-          if (extent_copy (source_desc, dest_desc, buf, buf_size, hole_size,
-                           src_open_sb.st_size,
-                           make_holes ? x->sparse_mode : SPARSE_NEVER,
-                           src_name, dst_name, &scan))
-            goto preserve_metadata;
-
-          return_val = false;
-          goto close_src_and_dst_desc;
-        }
-
       off_t n_read;
-      bool wrote_hole_at_eof;
-      if (! sparse_copy (source_desc, dest_desc, buf, buf_size,
-                         make_holes ? hole_size : 0,
-                         x->sparse_mode == SPARSE_ALWAYS, src_name, dst_name,
-                         UINTMAX_MAX, &n_read,
-                         &wrote_hole_at_eof))
+      bool wrote_hole_at_eof = false;
+      if (! (scantype == EXTENT_SCANTYPE
+             ? extent_copy (source_desc, dest_desc, buf, buf_size, hole_size,
+                            src_open_sb.st_size,
+                            make_holes ? x->sparse_mode : SPARSE_NEVER,
+                            src_name, dst_name, &scan)
+             : sparse_copy (source_desc, dest_desc, buf, buf_size,
+                            make_holes ? hole_size : 0,
+                            x->sparse_mode == SPARSE_ALWAYS,
+                            src_name, dst_name, UINTMAX_MAX, &n_read,
+                            &wrote_hole_at_eof)))
         {
           return_val = false;
           goto close_src_and_dst_desc;
@@ -1336,7 +1325,6 @@ copy_reg (char const *src_name, char const *dst_name,
         }
     }
 
-preserve_metadata:
   if (x->preserve_timestamps)
     {
       struct timespec timespec[2];
-- 
2.25.4

>From 6fc41029fff6e955a87312d8f5f967f0d01e390c Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Thu, 25 Jun 2020 16:31:44 -0700
Subject: [PATCH 4/5] cp: use SEEK_DATA/SEEK_HOLE if available

If it works, prefer lseek with SEEK_DATA and SEEK_HOLE to FIEMAP,
as lseek is simpler and more portable (will be in next POSIX).
Problem reported in 2011 by Jeff Liu (Bug#8061).
* NEWS: Mention this.
* src/copy.c (lseek_copy) [SEEK_HOLE]: New function.
(enum scantype): New constants ERROR_SCANTYPE, LSEEK_SCANTYPE.
(union scan_inference): New type.
(infer_scantype): Last arg is now union scan_inference *,
not struct extent_scan *.  All callers changed.
Prefer SEEK_HOLE to FIEMAP if both work, since
SEEK_HOLE is simpler and more portable.
(copy_reg): Do the fdadvise after initial scan, in case the scan
fails.  Report an error if the initial scan fails.
(copy_reg) [SEEK_HOLE]: Use lseek_copy if scantype says so.
---
 NEWS       |   3 +
 src/copy.c | 209 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 201 insertions(+), 11 deletions(-)

diff --git a/NEWS b/NEWS
index d713fa724..63cb47d10 100644
--- a/NEWS
+++ b/NEWS
@@ -17,6 +17,9 @@ GNU coreutils NEWS                                    -*- outline -*-
 
   cp and install now default to copy-on-write (COW) if available.
 
+  cp, install and mv now prefer lseek+SEEK_HOLE to ioctl+FS_IOC_FIEMAP
+  on sparse files, as lseek is simpler and more portable.
+
   On GNU/Linux systems, ls no longer issues an error message on a
   directory merely because it was removed.  This reverts a change
   that was made in release 8.32.
diff --git a/src/copy.c b/src/copy.c
index b382cfa4d..d88f8cf93 100644
--- a/src/copy.c
+++ b/src/copy.c
@@ -416,7 +416,12 @@ write_zeros (int fd, off_t n_bytes)
    Upon a successful copy, return true.  If the initial extent scan
    fails, set *NORMAL_COPY_REQUIRED to true and return false.
    Upon any other failure, set *NORMAL_COPY_REQUIRED to false and
-   return false.  */
+   return false.
+
+   FIXME: Once we no longer need to support Linux kernel versions
+   before 3.1 (2011), this function can be retired as it is superseded
+   by lseek_copy.  That is, we no longer need extent-scan.h and can
+   remove any of the code that uses it.  */
 static bool
 extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
              size_t hole_size, off_t src_total_size,
@@ -595,6 +600,150 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
   return true;
 }
 
+#ifdef SEEK_HOLE
+/* Perform an efficient extent copy, if possible.  This avoids
+   the overhead of detecting holes in hole-introducing/preserving
+   copy, and thus makes copying sparse files much more efficient.
+   Copy from SRC_FD to DEST_FD, using BUF (of size BUF_SIZE) for a buffer.
+   Look for holes of size HOLE_SIZE in the input.
+   The input file is of size SRC_TOTAL_SIZE.
+   Use SPARSE_MODE to determine whether to create holes in the output.
+   SRC_NAME and DST_NAME are the input and output file names.
+   Return true if successful, false (with a diagnostic) otherwise.  */
+
+static bool
+lseek_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
+            size_t hole_size, off_t ext_start, off_t src_total_size,
+            enum Sparse_type sparse_mode,
+            char const *src_name, char const *dst_name)
+{
+  off_t last_ext_start = 0;
+  off_t last_ext_len = 0;
+  off_t dest_pos = 0;
+  bool wrote_hole_at_eof = true;
+
+  while (0 <= ext_start)
+    {
+      off_t ext_end = lseek (src_fd, ext_start, SEEK_HOLE);
+      if (ext_end < 0)
+        {
+          if (errno != ENXIO)
+            goto cannot_lseek;
+          ext_end = src_total_size;
+          if (ext_end <= ext_start)
+            {
+              /* The input file grew; get its current size.  */
+              src_total_size = lseek (src_fd, 0, SEEK_END);
+              if (src_total_size < 0)
+                goto cannot_lseek;
+
+              /* If the input file shrank after growing, stop copying.  */
+              if (src_total_size <= ext_start)
+                break;
+
+              ext_end = src_total_size;
+            }
+        }
+      /* If the input file must have grown, increase its measured size.  */
+      if (src_total_size < ext_end)
+        src_total_size = ext_end;
+
+      if (lseek (src_fd, ext_start, SEEK_SET) < 0)
+        goto cannot_lseek;
+
+      wrote_hole_at_eof = false;
+      off_t ext_hole_size = ext_start - last_ext_start - last_ext_len;
+
+      if (ext_hole_size)
+        {
+          if (sparse_mode != SPARSE_NEVER)
+            {
+              if (! create_hole (dest_fd, dst_name,
+                                 sparse_mode == SPARSE_ALWAYS,
+                                 ext_hole_size))
+                return false;
+              wrote_hole_at_eof = true;
+            }
+          else
+            {
+              /* When not inducing holes and when there is a hole between
+                 the end of the previous extent and the beginning of the
+                 current one, write zeros to the destination file.  */
+              if (! write_zeros (dest_fd, ext_hole_size))
+                {
+                  error (0, errno, _("%s: write failed"),
+                         quotef (dst_name));
+                  return false;
+                }
+            }
+        }
+
+      off_t ext_len = ext_end - ext_start;
+      last_ext_start = ext_start;
+      last_ext_len = ext_len;
+
+      /* Copy this extent, looking for further opportunities to not
+         bother to write zeros unless --sparse=never, since SEEK_HOLE
+         is conservative and may miss some holes.  */
+      off_t n_read;
+      bool read_hole;
+      if ( ! sparse_copy (src_fd, dest_fd, buf, buf_size,
+                          sparse_mode == SPARSE_NEVER ? 0 : hole_size,
+                          true, src_name, dst_name, ext_len, &n_read,
+                          &read_hole))
+        return false;
+
+      dest_pos = ext_start + n_read;
+      if (n_read)
+        wrote_hole_at_eof = read_hole;
+      if (n_read < ext_len)
+        {
+          /* The input file shrank.  */
+          src_total_size = dest_pos;
+          break;
+        }
+
+      ext_start = lseek (src_fd, dest_pos, SEEK_DATA);
+      if (ext_start < 0)
+        {
+          if (errno != ENXIO)
+            goto cannot_lseek;
+          break;
+        }
+    }
+
+  /* When the source file ends with a hole, we have to do a little more work,
+     since the above copied only up to and including the final extent.
+     In order to complete the copy, we may have to insert a hole or write
+     zeros in the destination corresponding to the source file's hole-at-EOF.
+
+     In addition, if the final extent was a block of zeros at EOF and we've
+     just converted them to a hole in the destination, we must call ftruncate
+     here in order to record the proper length in the destination.  */
+  if ((dest_pos < src_total_size || wrote_hole_at_eof)
+      && ! (sparse_mode == SPARSE_NEVER
+            ? write_zeros (dest_fd, src_total_size - dest_pos)
+            : ftruncate (dest_fd, src_total_size) == 0))
+    {
+      error (0, errno, _("failed to extend %s"), quoteaf (dst_name));
+      return false;
+    }
+
+  if (sparse_mode == SPARSE_ALWAYS && dest_pos < src_total_size
+      && punch_hole (dest_fd, dest_pos, src_total_size - dest_pos) < 0)
+    {
+      error (0, errno, _("error deallocating %s"), quoteaf (dst_name));
+      return false;
+    }
+
+  return true;
+
+ cannot_lseek:
+  error (0, errno, _("cannot lseek %s"), quoteaf (src_name));
+  return false;
+}
+#endif
+
 /* FIXME: describe */
 /* FIXME: rewrite this to use a hash table so we avoid the quadratic
    performance hit that's probably noticeable only on trees deeper
@@ -1010,6 +1159,9 @@ fchmod_or_lchmod (int desc, char const *name, mode_t mode)
 /* Type of scan being done on the input when looking for sparseness.  */
 enum scantype
   {
+   /* An error was found when determining scantype.  */
+   ERROR_SCANTYPE,
+
    /* No fancy scanning; just read and write.  */
    PLAIN_SCANTYPE,
 
@@ -1017,22 +1169,44 @@ enum scantype
       attempting to create sparse output.  */
    ZERO_SCANTYPE,
 
+   /* lseek information is available.  */
+   LSEEK_SCANTYPE,
+
    /* Extent information is available.  */
    EXTENT_SCANTYPE
   };
 
-/* Use a heuristic to determine whether stat buffer SB comes from a file
-   with sparse blocks.  If the file has fewer blocks than would normally
-   be needed for a file of its size, then at least one of the blocks in
-   the file is a hole.  In that case, return true.  */
+/* Result of infer_scantype.  */
+union scan_inference
+{
+  /* Used if infer_scantype returns LSEEK_SCANTYPE.  This is the
+     offset of the first data block, or -1 if the file has no data.  */
+  off_t ext_start;
+
+  /* Used if infer_scantype returns EXTENT_SCANTYPE.  */
+  struct extent_scan extent_scan;
+};
+
+/* Return how to scan a file with descriptor FD and stat buffer SB.
+   Store any information gathered into *SCAN.  */
 static enum scantype
-infer_scantype (int fd, struct stat const *sb, struct extent_scan *scan)
+infer_scantype (int fd, struct stat const *sb,
+                union scan_inference *scan_inference)
 {
   if (! (HAVE_STRUCT_STAT_ST_BLOCKS
          && S_ISREG (sb->st_mode)
          && ST_NBLOCKS (*sb) < sb->st_size / ST_NBLOCKSIZE))
     return PLAIN_SCANTYPE;
 
+#ifdef SEEK_HOLE
+  scan_inference->ext_start = lseek (fd, 0, SEEK_DATA);
+  if (0 <= scan_inference->ext_start)
+    return LSEEK_SCANTYPE;
+  else if (errno != EINVAL && errno != ENOTSUP)
+    return errno == ENXIO ? LSEEK_SCANTYPE : ERROR_SCANTYPE;
+#endif
+
+  struct extent_scan *scan = &scan_inference->extent_scan;
   extent_scan_init (fd, scan);
   extent_scan_read (scan);
   return scan->initial_scan_failed ? ZERO_SCANTYPE : EXTENT_SCANTYPE;
@@ -1066,7 +1240,7 @@ copy_reg (char const *src_name, char const *dst_name,
   mode_t src_mode = src_sb->st_mode;
   struct stat sb;
   struct stat src_open_sb;
-  struct extent_scan scan;
+  union scan_inference scan_inference;
   bool return_val = true;
   bool data_copy_required = x->data_copy_required;
 
@@ -1263,17 +1437,23 @@ copy_reg (char const *src_name, char const *dst_name,
       size_t buf_size = io_blksize (sb);
       size_t hole_size = ST_BLKSIZE (sb);
 
-      fdadvise (source_desc, 0, 0, FADVISE_SEQUENTIAL);
-
       /* Deal with sparse files.  */
       enum scantype scantype = infer_scantype (source_desc, &src_open_sb,
-                                               &scan);
+                                               &scan_inference);
+      if (scantype == ERROR_SCANTYPE)
+        {
+          error (0, errno, _("cannot lseek %s"), quoteaf (src_name));
+          return_val = false;
+          goto close_src_and_dst_desc;
+        }
       bool make_holes
         = (S_ISREG (sb.st_mode)
            && (x->sparse_mode == SPARSE_ALWAYS
                || (x->sparse_mode == SPARSE_AUTO
                    && scantype != PLAIN_SCANTYPE)));
 
+      fdadvise (source_desc, 0, 0, FADVISE_SEQUENTIAL);
+
       /* If not making a sparse file, try to use a more-efficient
          buffer size.  */
       if (! make_holes)
@@ -1307,7 +1487,14 @@ copy_reg (char const *src_name, char const *dst_name,
              ? extent_copy (source_desc, dest_desc, buf, buf_size, hole_size,
                             src_open_sb.st_size,
                             make_holes ? x->sparse_mode : SPARSE_NEVER,
-                            src_name, dst_name, &scan)
+                            src_name, dst_name, &scan_inference.extent_scan)
+#ifdef SEEK_HOLE
+             : scantype == LSEEK_SCANTYPE
+             ? lseek_copy (source_desc, dest_desc, buf, buf_size, hole_size,
+                           scan_inference.ext_start, src_open_sb.st_size,
+                           make_holes ? x->sparse_mode : SPARSE_NEVER,
+                           src_name, dst_name)
+#endif
              : sparse_copy (source_desc, dest_desc, buf, buf_size,
                             make_holes ? hole_size : 0,
                             x->sparse_mode == SPARSE_ALWAYS,
-- 
2.25.4

>From 81849f05b94068f873299bd9fc01b4eebdb31f64 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Thu, 25 Jun 2020 17:34:23 -0700
Subject: [PATCH 5/5] cp: use copy_file_range if available

* NEWS: Mention this.
* bootstrap.conf (gnulib_modules): Add copy-file-range.
* src/copy.c (sparse_copy): Try copy_file_range if not
looking for holes.
---
 NEWS           |  5 +++--
 bootstrap.conf |  1 +
 src/copy.c     | 40 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/NEWS b/NEWS
index 63cb47d10..1c3f6378d 100644
--- a/NEWS
+++ b/NEWS
@@ -17,8 +17,9 @@ GNU coreutils NEWS                                    -*- outline -*-
 
   cp and install now default to copy-on-write (COW) if available.
 
-  cp, install and mv now prefer lseek+SEEK_HOLE to ioctl+FS_IOC_FIEMAP
-  on sparse files, as lseek is simpler and more portable.
+  cp, install and mv now use the copy_file_range syscall if available.
+  Also, they prefer lseek+SEEK_HOLE to ioctl+FS_IOC_FIEMAP on sparse
+  files, as lseek is simpler and more portable.
 
   On GNU/Linux systems, ls no longer issues an error message on a
   directory merely because it was removed.  This reverts a change
diff --git a/bootstrap.conf b/bootstrap.conf
index 12e2d831a..2506f0db4 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -54,6 +54,7 @@ gnulib_modules="
   closeout
   config-h
   configmake
+  copy-file-range
   crypto/md5
   crypto/sha1
   crypto/sha256
diff --git a/src/copy.c b/src/copy.c
index d88f8cf93..4050f6953 100644
--- a/src/copy.c
+++ b/src/copy.c
@@ -265,6 +265,46 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
 {
   *last_write_made_hole = false;
   *total_n_read = 0;
+
+  /* If not looking for holes, use copy_file_range if available.  */
+  if (!hole_size)
+    while (max_n_read)
+      {
+        /* Copy at most COPY_MAX bytes at a time; this is min
+           (PTRDIFF_MAX, SIZE_MAX) truncated to a value that is
+           surely aligned well.  */
+        ssize_t ssize_max = TYPE_MAXIMUM (ssize_t);
+        ptrdiff_t copy_max = MIN (ssize_max, SIZE_MAX) >> 30 << 30;
+        ssize_t n_copied = copy_file_range (src_fd, NULL, dest_fd, NULL,
+                                            MIN (max_n_read, copy_max), 0);
+        if (n_copied == 0)
+          {
+            /* copy_file_range incorrectly returns 0 when reading from
+               the proc file system on the Linux kernel through at
+               least 5.6.19 (2020), so fall back on 'read' if the
+               input file seems empty.  */
+            if (*total_n_read == 0)
+              break;
+            return true;
+          }
+        if (n_copied < 0)
+          {
+            if (errno == ENOSYS || errno == EINVAL
+                || errno == EBADF || errno == EXDEV)
+              break;
+            if (errno == EINTR)
+              n_copied = 0;
+            else
+              {
+                error (0, errno, _("error copying %s to %s"),
+                       quoteaf_n (0, src_name), quoteaf_n (1, dst_name));
+                return false;
+              }
+          }
+        max_n_read -= n_copied;
+        *total_n_read += n_copied;
+      }
+
   bool make_hole = false;
   off_t psize = 0;
 
-- 
2.25.4

Reply via email to