Ping, any updates here? Thanks.
On Mon, Feb 15, 2021 at 5:07 PM Ondrej Dubaj <odu...@redhat.com> wrote: > Gentle ping > > On Mon, Jan 18, 2021 at 12:02 PM Ondrej Dubaj <odu...@redhat.com> wrote: > >> One of the customer faced I/O errors while archiving a huge file 11 TB and >> observed that after Tar had hit read I/O error due to xfs filesystem, it >> still continue writing 0's to the file using strace. However there was no >> indication for tar that it was writing 0's when the error occurred. >> >> Later it was found that it is expected behavior to write 0's as the file >> header is already written. Hence, it need to be padded with 0's. >> >> Using the reproducing steps provided by customer, we can see this behavior. >> >> Padding 0's is expected behavior however it does so silently (for Read error >> at byte...), it should say it is Padding with zeros similar to how it >> reports "File Shrank , padding with zeroes" >> >> During the reproducer steps provided by customer we see that sometimes tar >> report "Read I/O errors" as "File shrank, padding with 0" , we see in the >> step(2) provided. >> >> Reproducer available here: >> >> #!/bin/bash >> # Reproducer "tardust" >> # >> # When "tar create" reads a file there are several shortcomings when it hits >> read error >> # >> # 1) When read() returns 0 bytes due to read error, then this happens >> # read(4, >> "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 3584) >> = 3584 >> # write(3, >> "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 3584) >> = 3584 >> # read(4, 0x563adef7b000, 3584) = -1 EIO (Input/output error) >> # write(2, "tar: ", 5tar: ) = 5 >> # write(2, "/mntx/testfile: Read error at by"..., 70/mntx/testfile: Read >> error at byte 260653056, while reading 3584 bytes) = 70 >> # write(2, ": Input/output error", 20: Input/output error) = 20 >> # write(3, >> "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 3584) >> = 3584 >> # write(3, >> "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 3584) >> = 3584 >> # write(3, >> "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 3584) >> = 3584 >> # Actual behaviour: it prints a message about "Read error", but it conceals >> the information it will pad the output with zeros >> # Expected behaviour: it should also print the information "padding with >> zero" >> # 2) There is a 2nd shortcoming about tar not differentiate between "read >> error" and "file shrinkage" >> # That means when it sees a short read due to read error, it does not report >> read error. >> # It looks like this: >> # read(4, >> "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 3584) >> = 3584 >> # write(3, >> "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 3584) >> = 3584 >> # read(4, >> "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 3584) >> = 2560 <<< HERE >> # write(2, "tar: ", 5tar: ) = 5 >> # write(2, "/mntx/testfile: File shrank by 5"..., 65/mntx/testfile: File >> shrank by 53927936 bytes; padding with zeros) = 65 >> # write(2, "\n", 1 >> # ) = 1 >> # write(3, >> "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 3584) >> = 3584 >> # write(3, >> "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 3584) >> = 3584 >> # Summary: A read error is not reported here. At least it now says "padding >> with zeros" >> # Expected behaviour: it should report a read error, so the user knows what >> it going on. >> # >> # 3) Side-Note: >> # The blocking factor is applied to the output. When reading a file, all >> reads are misaligned by 512 bytes. >> # This is because it writes a 512 header for every archived file. >> # That means the first read from file is 512bytes too short: >> # Running with tar-blocking-factor=7 >> # fstat(1, {st_mode=S_IFREG|0644, st_size=17827, ...}) = 0 >> # write(1, "/mntx/testfile\n", 15/mntx/testfile >> # ) = 15 >> # read(4, >> "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 3072) >> = 3072 #1st read 512bytes too short >> # write(3, "mntx/testfile\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 3584) = >> 3584 >> # read(4, >> "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 3584) >> = 3584 >> # write(3, >> "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 3584) >> = 3584 >> # >> # 4) Reproducer overview: >> # - Create a 500MB testimage, then create a testfile1 in the image >> # - Use losetup/dmsetup with the "dust" target type >> # - you can inject IO errors at specified block number in "dust" >> # - You must hit a 4K boundary to see EIO, so use tar-blocking-factor=7 and >> # - vary the bad blocknumber to find the case (1) >> echo Step 1 Create disk image >> dd if=/dev/zero of=/tmp/testimage bs=1M count=500 || exit >> echo Step 2 Create XFS in image >> mkfs.xfs /tmp/testimage || exit >> echo Step 3 Use losetup so the file can be used a block device >> losetup /dev/loop1 /tmp/testimage || exit >> losetup >> echo Step 6 Now create the testfile, this will have read error injected later >> mkdir /mntx >> mount /dev/loop1 /mntx || exit >> dd if=/dev/zero of=/mntx/testfile bs=1M count=300 || exit >> umount /mntx >> echo Step7 Now iterating through bad blocks >> echo As result, there are strace output file a1000 ... a1040 >> for i in `seq 1000 1 1040` >> do >> echo >> echo Badblock $i >> let ERR=i >> let ERR1=i+1 >> let NUMSECTOR2=1024000-ERR1 >> #echo ERR1 is $ERR1 >> #echo NUMSECTOR2 is $NUMSECTOR2 >> dmsetup create tardust <<EOF >> 0 $ERR linear /dev/loop1 0 >> $ERR 1 error >> $ERR1 $NUMSECTOR2 linear /dev/loop1 $ERR1 >> EOF >> #dmsetup ls >> #dmsetup status >> #dmsetup table >> mount /dev/mapper/tardust /mntx || exit >> strace tar cvbf 7 /tmp/tardust.tar /mntx/testfile >&/tmp/a$i >> umount /mntx >> dmsetup remove tardust >> grep -e error -e shrank /tmp/a$i >> done >> echo "Done: inspect the strace output file for error behaviour (grep error ; >> Look at last read()-call )" >> losetup -d /dev/loop1 >> >> ================= >> >> Actual results: >> - When tar hits a disk read error when reading file from disk and creating >> an archive, then it prints "file shrank" >> - then it writes zeros (aka padding) according to initial file size (but >> does not print that message) >> - This happens in most cases (due to tar-block-size / disk-block-size / >> read-shift-by-512-bytes interaction) >> - I provided a reproducer which shows under which circumstances it correctly >> prints "Read error at byte…" >> >> Expected results: >> - When there is a read error, THEN tar shall report a read error >> - When there is a read error, THEN tar shall NOT report a "file shrank" >> - In addition it SHALL print "Padding with zeros". This is missing currently. >> >> >> Regards, >> >> Ondrej Dubaj >> >>