On 7/1/20 6:22 PM, Tom Lane wrote: > Joe Conway <m...@joeconway.com> writes: >> The only downside is that the max filesize is reduced to (MaxAllocSize - >> MIN_READ_SIZE - 1) compared to MaxAllocSize with the old method. > > Hm, I was expecting that the last successful iteration of > enlargeStringInfo would increase the buffer size to MaxAllocSize, > so that we'd really only be losing one byte (which we can't avoid > if we use stringinfo). But you're right that it's most likely moot > since later manipulations of such a result would risk hitting overflows. > > I marked the CF entry as RFC.
Sorry to open this can of worms again, but I couldn't get my head past the fact that reading the entire file would have a different size limit than reading the exact number of bytes in the file. So, inspired by what you did (and StringInfo itself) I came up with the attached. This version performs equivalently to your patch (and HEAD), and allows files up to and including (MaxAllocSize - VARHDRSZ) -- i.e. exactly the same as the specified-length case and legacy behavior for the full file read. But if you object I will just go with your version barring any other opinions. Joe -- Crunchy Data - http://crunchydata.com PostgreSQL Support for Secure Enterprises Consulting, Training, & Open Source Development
diff --git a/contrib/adminpack/expected/adminpack.out b/contrib/adminpack/expected/adminpack.out index 5738b0f..edf3ebf 100644 *** a/contrib/adminpack/expected/adminpack.out --- b/contrib/adminpack/expected/adminpack.out *************** SELECT pg_file_rename('test_file1', 'tes *** 79,85 **** (1 row) SELECT pg_read_file('test_file1'); -- not there ! ERROR: could not stat file "test_file1": No such file or directory SELECT pg_read_file('test_file2'); pg_read_file -------------- --- 79,85 ---- (1 row) SELECT pg_read_file('test_file1'); -- not there ! ERROR: could not open file "test_file1" for reading: No such file or directory SELECT pg_read_file('test_file2'); pg_read_file -------------- *************** SELECT pg_file_rename('test_file2', 'tes *** 108,114 **** (1 row) SELECT pg_read_file('test_file2'); -- not there ! ERROR: could not stat file "test_file2": No such file or directory SELECT pg_read_file('test_file3'); pg_read_file -------------- --- 108,114 ---- (1 row) SELECT pg_read_file('test_file2'); -- not there ! ERROR: could not open file "test_file2" for reading: No such file or directory SELECT pg_read_file('test_file3'); pg_read_file -------------- diff --git a/src/backend/utils/adt/genfile.c b/src/backend/utils/adt/genfile.c index ceaa618..1515032 100644 *** a/src/backend/utils/adt/genfile.c --- b/src/backend/utils/adt/genfile.c *************** read_binary_file(const char *filename, i *** 106,138 **** bool missing_ok) { bytea *buf; ! size_t nbytes; FILE *file; ! if (bytes_to_read < 0) ! { ! if (seek_offset < 0) ! bytes_to_read = -seek_offset; ! else ! { ! struct stat fst; ! ! if (stat(filename, &fst) < 0) ! { ! if (missing_ok && errno == ENOENT) ! return NULL; ! else ! ereport(ERROR, ! (errcode_for_file_access(), ! errmsg("could not stat file \"%s\": %m", filename))); ! } ! ! bytes_to_read = fst.st_size - seek_offset; ! } ! } ! ! /* not sure why anyone thought that int64 length was a good idea */ ! if (bytes_to_read > (MaxAllocSize - VARHDRSZ)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("requested length too large"))); --- 106,116 ---- bool missing_ok) { bytea *buf; ! size_t nbytes = 0; FILE *file; ! /* clamp request size to what we can actually deliver */ ! if (bytes_to_read > (int64) (MaxAllocSize - VARHDRSZ)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("requested length too large"))); *************** read_binary_file(const char *filename, i *** 154,162 **** (errcode_for_file_access(), errmsg("could not seek in file \"%s\": %m", filename))); ! buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ); ! nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file); if (ferror(file)) ereport(ERROR, --- 132,194 ---- (errcode_for_file_access(), errmsg("could not seek in file \"%s\": %m", filename))); ! if (bytes_to_read >= 0) ! { ! /* If passed explicit read size just do it */ ! buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ); ! nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file); ! } ! else ! { ! /* Negative read size, read rest of file */ ! ! /* Chunk size for reads and allocations */ ! #define READ_BLOCK_SIZE 4096 ! /* VARDATA maximum allowed size */ ! int64 alloc_max = MaxAllocSize - VARHDRSZ; ! /* VARDATA allocated size */ ! Size maxdatlen = READ_BLOCK_SIZE - VARHDRSZ; ! ! /* initialize the buffer */ ! buf = (bytea *) palloc(0); ! ! while (!(feof(file) || ferror(file))) ! { ! /* ! * If not at end of file, and nbytes is equal to alloc_max, ! * then either the file is too large, or there is nothing left ! * to read. Attempt to read one more byte to see if the end ! * of file has been reached. If not, the file is too large; ! * we'd rather give the error message for that ourselves. ! */ ! if (nbytes == alloc_max) ! { ! char rbuf[1]; ! ! fread(rbuf, 1, 1, file); ! if (!feof(file)) ! ereport(ERROR, ! (errcode(ERRCODE_INVALID_PARAMETER_VALUE), ! errmsg("requested length too large"))); ! else ! break; ! } ! ! buf = (bytea *) repalloc(buf, maxdatlen + VARHDRSZ); ! nbytes += fread(VARDATA(buf) + nbytes, 1, ! (size_t) maxdatlen - nbytes, file); ! ! /* ! * Double the data size allocation each iteration, but ! * clamp at alloc_max. Calculate such that memory ! * allocations are multiples of READ_BLOCK_SIZE. ! */ ! maxdatlen = (maxdatlen * 2) + VARHDRSZ; ! if (maxdatlen > alloc_max) ! maxdatlen = alloc_max; ! } ! } if (ferror(file)) ereport(ERROR,
signature.asc
Description: OpenPGP digital signature