Configuration Information [Automatically generated, do not change]:
Machine: x86_64
OS: linux-gnu
Compiler: gcc
Compilation CFLAGS:  -DPROGRAM='bash' -DCONF_HOSTTYPE='x86_64' 
-DCONF_OSTYPE='linux-gnu' -DCONF_MACHTYPE='x86_64-unknown-linux-gnu' 
-DCONF_VENDOR='unknown' -DLOCALEDIR='/usr/local/share/locale' -DPACKAGE='bash' 
-DSHELL -DHAVE_CONFIG_H   -I.  -I. -I./include -I./lib   -g -O2
uname output: Linux bohr 2.6.35-30-generic #54-Ubuntu SMP Tue Jun 7 18:41:54 
UTC 2011 x86_64 GNU/Linux
Machine Type: x86_64-unknown-linux-gnu

Bash Version: 4.2
Patch Level: 10
Release Status: release

Description:

"read" builtin corrupts buffered input after byte 2**31.


Repeat-By:

To demonstrate this error, use "read" to read a large text file with
size > 2**31, with non-128-multiple line lengths, through a file
descriptor, and compare against expected input.

Attached is a bash script to perform that test.


Fix:

The problem is caused by an improper error test on lseek(2)
in the zsyncfd() function in lib/sh/zread.c.

lseek() returns an "off_t" but only an "int" was used, so when the
file offset goes beyond 2**31, the offset appears to be negative, and
hence the "lbuf" index and offset variables are not reset.
Using the proper type and the proper error check fixes it.

The following patch corrects the problem:

--- lib/sh/zread.c.00   2009-03-02 05:54:45.000000000 -0800
+++ lib/sh/zread.c      2011-07-24 17:07:03.747260237 -0700
@@ -161,13 +161,13 @@
      int fd;
 {
   off_t off;
-  int r;
+  off_t r;
 
   off = lused - lind;
   r = 0;
   if (off > 0)
     r = lseek (fd, -off, SEEK_CUR);
 
-  if (r >= 0)
+  if (r != (off_t)-1)
     lused = lind = 0;
 }


-- 
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Gregory H. Margo
gmargo at yahoo/com, gmail/com, pacbell/net; greg at margofamily/org
#!/bin/bash

# Test script for 2**31 read bug.
# This is _really_ slow.
# Takes about 30 minutes on 2.8GHz i7
# G. Margo 2011-07-26

# Flags
input_generate=1        # Generate input file
input_check=1           # Check input file against expected input
input_remove=1          # Remove input file

# Constants
INPUTFILE=readtest_in.txt
limit=16909400  # With line length=127, errors out after 16909321
fmt="%09d-%s-%09d"
padding="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzAB"

###########################################################################
# Generate input file with line length 127.
if [ $input_generate -ne 0 ]; then
        rm -f "$INPUTFILE"
        echo "Generating input file"

        # The shell way, _really_ slow (14 minutes)
        #for (( line=1 ; line <= limit ; line++ ))
        #do
        #       printf "$fmt\n" $line $padding $line
        #done > "$INPUTFILE"

        # The perl way (20 seconds)
        perl -e "printf \"$fmt\\n\", \$_, \"$padding\", \$_ foreach 1 .. 
$limit;" > "$INPUTFILE"
fi

###########################################################################
# Check input against expected
if [ $input_check -ne 0 ]; then
        echo "Checking input against expected"
        count=0
        for (( line=1 ; line <= limit ; line++ ))
        do
                read rline
                if [ $? -ne 0 ]; then
                        echo "Error: early EOF on input"
                        (( count++ ))
                        break;
                fi
                printf -v eline $fmt $line $padding $line
                if [ "$rline" != "$eline" ]; then
                        echo "Error: line $line: expected: $eline"
                        echo "Error: line $line: received: $rline"
                        (( count++ ))
                fi
        done < "$INPUTFILE"
        echo "There were $count errors in $limit lines"
fi

###########################################################################
# Remove input file
if [ $input_remove -ne 0 ]; then
        rm -f "$INPUTFILE"
fi

Reply via email to