On 6/28/20 6:00 PM, Tom Lane wrote: > Joe Conway <m...@joeconway.com> writes: >> All good stuff -- I believe the attached checks all the boxes. > > Looks okay to me, except I think you want > > ! if (bytes_to_read > 0) > > to be > > ! if (bytes_to_read >= 0)
Yep -- thanks. I did some performance testing of the worst case/largest possible file and found that skipping the stat and bulk read does cause a significant regression. Current HEAD takes about 400ms on my desktop, and with that version of the patch more like 1100ms. In the attached patch I was able to get most of the performance degradation back -- ~600ms. Hopefully you don't think what I did was "too cute by half" :-). Do you think this is good enough or should we go back to using the stat file size when it is > 0? As noted in the comment, the downside of that method is that the largest supported file size is 1 byte smaller when "reading the entire file" versus "reading a specified size" due to StringInfo reserving the last byte for a trailing null. Joe -- Crunchy Data - http://crunchydata.com PostgreSQL Support for Secure Enterprises Consulting, Training, & Open Source Development
diff --git a/contrib/adminpack/expected/adminpack.out b/contrib/adminpack/expected/adminpack.out index 5738b0f..edf3ebf 100644 *** a/contrib/adminpack/expected/adminpack.out --- b/contrib/adminpack/expected/adminpack.out *************** SELECT pg_file_rename('test_file1', 'tes *** 79,85 **** (1 row) SELECT pg_read_file('test_file1'); -- not there ! ERROR: could not stat file "test_file1": No such file or directory SELECT pg_read_file('test_file2'); pg_read_file -------------- --- 79,85 ---- (1 row) SELECT pg_read_file('test_file1'); -- not there ! ERROR: could not open file "test_file1" for reading: No such file or directory SELECT pg_read_file('test_file2'); pg_read_file -------------- *************** SELECT pg_file_rename('test_file2', 'tes *** 108,114 **** (1 row) SELECT pg_read_file('test_file2'); -- not there ! ERROR: could not stat file "test_file2": No such file or directory SELECT pg_read_file('test_file3'); pg_read_file -------------- --- 108,114 ---- (1 row) SELECT pg_read_file('test_file2'); -- not there ! ERROR: could not open file "test_file2" for reading: No such file or directory SELECT pg_read_file('test_file3'); pg_read_file -------------- diff --git a/src/backend/utils/adt/genfile.c b/src/backend/utils/adt/genfile.c index ceaa618..33db576 100644 *** a/src/backend/utils/adt/genfile.c --- b/src/backend/utils/adt/genfile.c *************** *** 36,41 **** --- 36,42 ---- #include "utils/syscache.h" #include "utils/timestamp.h" + #define READBUF_SIZE 8192 /* * Convert a "text" filename argument to C string, and check it's allowable. *************** read_binary_file(const char *filename, i *** 106,138 **** bool missing_ok) { bytea *buf; ! size_t nbytes; FILE *file; ! if (bytes_to_read < 0) ! { ! if (seek_offset < 0) ! bytes_to_read = -seek_offset; ! else ! { ! struct stat fst; ! ! if (stat(filename, &fst) < 0) ! { ! if (missing_ok && errno == ENOENT) ! return NULL; ! else ! ereport(ERROR, ! (errcode_for_file_access(), ! errmsg("could not stat file \"%s\": %m", filename))); ! } ! ! bytes_to_read = fst.st_size - seek_offset; ! } ! } ! ! /* not sure why anyone thought that int64 length was a good idea */ ! if (bytes_to_read > (MaxAllocSize - VARHDRSZ)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("requested length too large"))); --- 107,117 ---- bool missing_ok) { bytea *buf; ! size_t nbytes = 0; FILE *file; ! /* clamp request size to what we can actually deliver */ ! if (bytes_to_read > (int64) (MaxAllocSize - VARHDRSZ)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("requested length too large"))); *************** read_binary_file(const char *filename, i *** 154,162 **** (errcode_for_file_access(), errmsg("could not seek in file \"%s\": %m", filename))); ! buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ); ! nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file); if (ferror(file)) ereport(ERROR, --- 133,173 ---- (errcode_for_file_access(), errmsg("could not seek in file \"%s\": %m", filename))); ! if (bytes_to_read >= 0) ! { ! /* If passed explicit read size just do it */ ! buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ); ! nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file); ! } ! else ! { ! /* Negative read size, read rest of file */ ! StringInfoData sbuf; ! size_t rbytes = 0; ! char rbuf[READBUF_SIZE]; ! ! initStringInfo(&sbuf); ! buf = (bytea *) sbuf.data; ! sbuf.len += VARHDRSZ; ! ! while (!(feof(file) || ferror(file))) ! { ! rbytes = fread(rbuf, 1, (size_t) READBUF_SIZE, file); ! nbytes += rbytes; ! ! /* ! * Note that StringInfo reserves the last byte ! * for a trailing null ! */ ! if (nbytes > (MaxAllocSize - VARHDRSZ - 1)) ! ereport(ERROR, ! (errcode(ERRCODE_INVALID_PARAMETER_VALUE), ! errmsg("requested length too large"))); ! ! appendBinaryStringInfo(&sbuf, rbuf, rbytes); ! } ! } if (ferror(file)) ereport(ERROR,
signature.asc
Description: OpenPGP digital signature