On Wed, 22 Mar 2023 at 12:14, Jonathan Wakely wrote:

>
>
> On Mon, 20 Mar 2023 at 22:30, Jonathan Wakely via Libstdc++ <
> libstd...@gcc.gnu.org> wrote:
>
>> On 20/03/23 22:27 +0000, Jonathan Wakely wrote:
>> >On 06/03/23 20:52 +0100, Jannik Glückert wrote:
>> >>we were previously only using sendfile for files smaller than 2GB, as
>> >>sendfile needs to be called repeatedly for files bigger than that.
>> >>
>> >>some quick numbers, copying a 16GB file, average of 10 repetitions:
>> >>   old:
>> >>       real: 13.4s
>> >>       user: 0.14s
>> >>       sys : 7.43s
>> >>   new:
>> >>       real: 8.90s
>> >>       user: 0.00s
>> >>       sys : 3.68s
>> >>
>> >>Additionally, this fixes
>> >>https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108178
>> >>
>> >>libstdc++-v3/ChangeLog:
>> >>
>> >>       * acinclude.m4 (_GLIBCXX_HAVE_LSEEK): define
>> >>       * config.h.in: Regenerate.
>> >>       * configure: Regenerate.
>> >>       * src/filesystem/ops-common.h: enable sendfile for files
>> >>         >2GB in std::filesystem::copy_file, skip zero-length files
>>
>> Also, the ChangeLog entry needs to be indented with tabs, name the
>> changed functions, and should be complete sentences, e.g.
>>
>>         * acinclude.m4 (_GLIBCXX_HAVE_LSEEK): Define.
>>         * config.h.in: Regenerate.
>>         * configure: Regenerate.
>>         * src/filesystem/ops-common.h (copy_file_sendfile): Define new
>>         function for sendfile logic. Loop to support large files. Skip
>>         zero-length files.
>>         (do_copy_file): Use it.
>>
>>
> Here's what I plan to commit in a few weeks when GCC 14 Stage 1 opens.
>
>
>
And similarly for the copy_file_range change.
commit 2ad500e358c03ef63af1540d44645df582a4809c
Author: Jannik Glückert <jannik.glueck...@gmail.com>
Date:   Wed Mar 8 18:37:43 2023

    libstdc++: Use copy_file_range for filesystem::copy_file
    
    copy_file_range is a recent-ish syscall for copying files. It is similar
    to sendfile but allows filesystem-specific optimizations. Common are:
    Reflinks: BTRFS, XFS, ZFS (does not implement the syscall yet)
    Server-side copy: NFS, SMB, Ceph
    
    If copy_file_range is not available for the given files, fall back to
    sendfile / userspace copy.
    
    libstdc++-v3/ChangeLog:
    
            * acinclude.m4 (_GLIBCXX_USE_COPY_FILE_RANGE): Define.
            * config.h.in: Regenerate.
            * configure: Regenerate.
            * src/filesystem/ops-common.h (copy_file_copy_file_range):
            Define new function.
            (do_copy_file): Use it.
    
    Signed-off-by: Jannik Glückert <jannik.glueck...@gmail.com>

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index 85a09a5a869..4cf02dc6e4e 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -4581,6 +4581,7 @@ dnl  _GLIBCXX_USE_UTIMENSAT
 dnl  _GLIBCXX_USE_ST_MTIM
 dnl  _GLIBCXX_USE_FCHMOD
 dnl  _GLIBCXX_USE_FCHMODAT
+dnl  _GLIBCXX_USE_COPY_FILE_RANGE
 dnl  _GLIBCXX_USE_SENDFILE
 dnl  HAVE_LINK
 dnl  HAVE_LSEEK
@@ -4779,6 +4780,25 @@ dnl
   if test $glibcxx_cv_truncate = yes; then
     AC_DEFINE(HAVE_TRUNCATE, 1, [Define if truncate is available in 
<unistd.h>.])
   fi
+dnl
+  AC_CACHE_CHECK([for copy_file_range that can copy files],
+    glibcxx_cv_copy_file_range, [dnl
+    case "${target_os}" in
+      linux*)
+       GCC_TRY_COMPILE_OR_LINK(
+         [#include <unistd.h>],
+         [copy_file_range(1, nullptr, 2, nullptr, 1, 0);],
+         [glibcxx_cv_copy_file_range=yes],
+         [glibcxx_cv_copy_file_range=no])
+       ;;
+      *)
+       glibcxx_cv_copy_file_range=no
+       ;;
+    esac
+  ])
+  if test $glibcxx_cv_copy_file_range = yes; then
+    AC_DEFINE(_GLIBCXX_USE_COPY_FILE_RANGE, 1, [Define if copy_file_range is 
available in <unistd.h>.])
+  fi
 dnl
   AC_CACHE_CHECK([for sendfile that can copy files],
     glibcxx_cv_sendfile, [dnl
diff --git a/libstdc++-v3/src/filesystem/ops-common.h 
b/libstdc++-v3/src/filesystem/ops-common.h
index 7874a95488a..906436b484e 100644
--- a/libstdc++-v3/src/filesystem/ops-common.h
+++ b/libstdc++-v3/src/filesystem/ops-common.h
@@ -49,6 +49,9 @@
 #ifdef NEED_DO_COPY_FILE
 # include <filesystem>
 # include <ext/stdio_filebuf.h>
+# ifdef _GLIBCXX_USE_COPY_FILE_RANGE
+#  include <unistd.h> // copy_file_range
+# endif
 # ifdef _GLIBCXX_USE_SENDFILE
 #  include <sys/sendfile.h> // sendfile
 #  include <unistd.h> // lseek
@@ -359,6 +362,32 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM
   }
 
 #ifdef NEED_DO_COPY_FILE
+#ifdef _GLIBCXX_USE_COPY_FILE_RANGE
+  bool
+  copy_file_copy_file_range(int fd_in, int fd_out, size_t length) noexcept
+  {
+    // a zero-length file is either empty, or not copyable by this syscall
+    // return early to avoid the syscall cost
+    if (length == 0)
+      {
+        errno = EINVAL;
+        return false;
+      }
+    size_t bytes_left = length;
+    off64_t off_in = 0, off_out = 0;
+    ssize_t bytes_copied;
+    do
+      {
+       bytes_copied = ::copy_file_range(fd_in, &off_in, fd_out, &off_out,
+                                        bytes_left, 0);
+       bytes_left -= bytes_copied;
+      }
+    while (bytes_left > 0 && bytes_copied > 0);
+    if (bytes_copied < 0)
+      return false;
+    return true;
+  }
+#endif
 #if defined _GLIBCXX_USE_SENDFILE && ! defined _GLIBCXX_FILESYSTEM_IS_WINDOWS
   bool
   copy_file_sendfile(int fd_in, int fd_out, size_t length) noexcept
@@ -529,6 +558,33 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM
 
     bool has_copied = false;
 
+#ifdef _GLIBCXX_USE_COPY_FILE_RANGE
+    if (!has_copied)
+      has_copied = copy_file_copy_file_range(in.fd, out.fd, from_st->st_size);
+    if (!has_copied)
+      {
+       // EINVAL: src and dst are the same file (this is not cheaply
+       // detectable from userspace)
+       // EINVAL: copy_file_range is unsupported for this file type by the
+       // underlying filesystem
+       // ENOTSUP: undocumented, can arise with old kernels and NFS
+       // EOPNOTSUPP: filesystem does not implement copy_file_range
+       // ETXTBSY: src or dst is an active swapfile (nonsensical, but allowed
+       // with normal copying)
+       // EXDEV: src and dst are on different filesystems that do not support
+       // cross-fs copy_file_range
+       // ENOENT: undocumented, can arise with CIFS
+       // ENOSYS: unsupported by kernel or blocked by seccomp
+        if (errno != EINVAL && errno != ENOTSUP && errno != EOPNOTSUPP
+             && errno != ETXTBSY && errno != EXDEV && errno != ENOENT
+             && errno != ENOSYS)
+          {
+            ec.assign(errno, std::generic_category());
+            return false;
+          }
+      }
+#endif
+
 #if defined _GLIBCXX_USE_SENDFILE && ! defined _GLIBCXX_FILESYSTEM_IS_WINDOWS
     if (!has_copied)
       has_copied = copy_file_sendfile(in.fd, out.fd, from_st->st_size);

Reply via email to