Configuration Information [Automatically generated, do not change]: Machine: x86_64 OS: linux-gnu Compiler: gcc Compilation CFLAGS: -DPROGRAM='bash' -DCONF_HOSTTYPE='x86_64' -DCONF_OSTYPE='linux-gnu' -DCONF_MACHTYPE='x86_64-unknown-linux-gnu' -DCONF_VENDOR='unknown' -DLOCALEDIR='/usr/local/share/locale' -DPACKAGE='bash' -DSHELL -DHAVE_CONFIG_H -I. -I. -I./include -I./lib -g -O2 uname output: Linux bohr 2.6.35-30-generic #54-Ubuntu SMP Tue Jun 7 18:41:54 UTC 2011 x86_64 GNU/Linux Machine Type: x86_64-unknown-linux-gnu
Bash Version: 4.2 Patch Level: 10 Release Status: release Description: "read" builtin corrupts buffered input after byte 2**31. Repeat-By: To demonstrate this error, use "read" to read a large text file with size > 2**31, with non-128-multiple line lengths, through a file descriptor, and compare against expected input. Attached is a bash script to perform that test. Fix: The problem is caused by an improper error test on lseek(2) in the zsyncfd() function in lib/sh/zread.c. lseek() returns an "off_t" but only an "int" was used, so when the file offset goes beyond 2**31, the offset appears to be negative, and hence the "lbuf" index and offset variables are not reset. Using the proper type and the proper error check fixes it. The following patch corrects the problem: --- lib/sh/zread.c.00 2009-03-02 05:54:45.000000000 -0800 +++ lib/sh/zread.c 2011-07-24 17:07:03.747260237 -0700 @@ -161,13 +161,13 @@ int fd; { off_t off; - int r; + off_t r; off = lused - lind; r = 0; if (off > 0) r = lseek (fd, -off, SEEK_CUR); - if (r >= 0) + if (r != (off_t)-1) lused = lind = 0; } -- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Gregory H. Margo gmargo at yahoo/com, gmail/com, pacbell/net; greg at margofamily/org
#!/bin/bash # Test script for 2**31 read bug. # This is _really_ slow. # Takes about 30 minutes on 2.8GHz i7 # G. Margo 2011-07-26 # Flags input_generate=1 # Generate input file input_check=1 # Check input file against expected input input_remove=1 # Remove input file # Constants INPUTFILE=readtest_in.txt limit=16909400 # With line length=127, errors out after 16909321 fmt="%09d-%s-%09d" padding="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzAB" ########################################################################### # Generate input file with line length 127. if [ $input_generate -ne 0 ]; then rm -f "$INPUTFILE" echo "Generating input file" # The shell way, _really_ slow (14 minutes) #for (( line=1 ; line <= limit ; line++ )) #do # printf "$fmt\n" $line $padding $line #done > "$INPUTFILE" # The perl way (20 seconds) perl -e "printf \"$fmt\\n\", \$_, \"$padding\", \$_ foreach 1 .. $limit;" > "$INPUTFILE" fi ########################################################################### # Check input against expected if [ $input_check -ne 0 ]; then echo "Checking input against expected" count=0 for (( line=1 ; line <= limit ; line++ )) do read rline if [ $? -ne 0 ]; then echo "Error: early EOF on input" (( count++ )) break; fi printf -v eline $fmt $line $padding $line if [ "$rline" != "$eline" ]; then echo "Error: line $line: expected: $eline" echo "Error: line $line: received: $rline" (( count++ )) fi done < "$INPUTFILE" echo "There were $count errors in $limit lines" fi ########################################################################### # Remove input file if [ $input_remove -ne 0 ]; then rm -f "$INPUTFILE" fi