It turns out that: lseek(3, 0, SEEK_HOLE) returns end-of-file for a
sparse file copied from a Linux squashfs mounted drive. This breaks
the --sparse=auto heuristic that detects a sparse file.

I have a fix for you to consider.

To reproduce:

First, create a squashfs drive containing a file output_file.bin.

mkdir squashfs-root
cd squashfs-root

Then run the following script mkhole.sh:

--------------------------------------------------------------
#!/bin/bash
OUTPUT="output_file.bin"

# Remove file if it exists
rm -f "$OUTPUT"

# Write 4KB of 'A'
dd if=<(yes A | tr -d '\n' | head -c 4096) of="$OUTPUT" bs=4096 count=1

# Create a 4k*100 hole followed by 4KB of zeros
dd if=/dev/zero of="$OUTPUT" bs=4096 count=1 seek=101

# Write another 4KB of 'A' after the hole (overwriting the 4k of zeros)
dd if=<(yes A | tr -d '\n' | head -c 4096) of="$OUTPUT" bs=4096 count=1 seek=101
--------------------------------------------------------------

Now create the mysquashfs.img file to mount:

cd ..
mksquashfs squashfs-root mysquashfs.img
sudo mount -o loop  mysquashfs.img /mnt

Check that /mnt/output_file.bin is sparse:

ls -lh /mnt/output_file.bin
du -sh /mnt/output_file.bin

(the second value should be less).

Now use a newly built cp command from coreutils to copy this file to a
local filesystem.

mkdir ~/tmp
cd ~/tmp
~/src/coreutils/src/cp --reflink=never /mnt/output_file.bin nonsparse

Even though --sparse=auto and the file is sparse, it is not detected
as such. This can be confirmed by
running:

strace ~/src/coreutils/src/cp --reflink=never /mnt/output_file.bin nonsparse

and you will see:

lseek(3, 0, SEEK_DATA)                  = 0
fadvise64(3, 0, 0, POSIX_FADV_SEQUENTIAL) = 0
lseek(3, 0, SEEK_HOLE)                  = 417792

The following diff fixes this for me, and still passes "make check".

diff --git a/src/copy.c b/src/copy.c
index 77f0c561e..91136cd7c 100644
--- a/src/copy.c
+++ b/src/copy.c
@@ -592,7 +592,7 @@ lseek_copy (int src_fd, int dest_fd, char **abuf,
size_t buf_size,
           else if (sparse_mode != SPARSE_NEVER)
             {
               if (! create_hole (dest_fd, dst_name,
-                                 sparse_mode == SPARSE_ALWAYS,
+                                 sparse_mode != SPARSE_NEVER,
                                  ext_hole_size))
                 return false;
             }
@@ -621,7 +621,7 @@ lseek_copy (int src_fd, int dest_fd, char **abuf,
size_t buf_size,
       if ( ! sparse_copy (src_fd, dest_fd, abuf, buf_size,
                           true, allow_reflink, src_name, dst_name,
                           ext_len,
-                          sparse_mode == SPARSE_ALWAYS ? hole_size : nullptr,
+                          sparse_mode != SPARSE_NEVER ? hole_size : nullptr,
                           &n_read))
         return false;

@@ -1576,7 +1576,7 @@ copy_reg (char const *src_name, char const *dst_name,
              :
 #endif
                sparse_copy (source_desc, dest_desc, &buf, buf_size,
-                            x->sparse_mode == SPARSE_ALWAYS,
+                            x->sparse_mode != SPARSE_NEVER,
                             x->reflink_mode != REFLINK_NEVER,
                             src_name, dst_name, UINTMAX_MAX,
                             make_holes ? &hole_size : nullptr, &n_read)))

Thanks !

Jeremy Allison,
CIQ Inc.



Reply via email to