On 7/1/20 6:22 PM, Tom Lane wrote:
> Joe Conway <m...@joeconway.com> writes:
>> The only downside is that the max filesize is reduced to (MaxAllocSize -
>> MIN_READ_SIZE - 1) compared to MaxAllocSize with the old method.
> 
> Hm, I was expecting that the last successful iteration of
> enlargeStringInfo would increase the buffer size to MaxAllocSize,
> so that we'd really only be losing one byte (which we can't avoid
> if we use stringinfo).  But you're right that it's most likely moot
> since later manipulations of such a result would risk hitting overflows.
> 
> I marked the CF entry as RFC.

Sorry to open this can of worms again, but I couldn't get my head past the fact
that reading the entire file would have a different size limit than reading the
exact number of bytes in the file.

So, inspired by what you did (and StringInfo itself) I came up with the
attached. This version performs equivalently to your patch (and HEAD), and
allows files up to and including (MaxAllocSize - VARHDRSZ) -- i.e. exactly the
same as the specified-length case and legacy behavior for the full file read.

But if you object I will just go with your version barring any other opinions.

Joe

-- 
Crunchy Data - http://crunchydata.com
PostgreSQL Support for Secure Enterprises
Consulting, Training, & Open Source Development
diff --git a/contrib/adminpack/expected/adminpack.out b/contrib/adminpack/expected/adminpack.out
index 5738b0f..edf3ebf 100644
*** a/contrib/adminpack/expected/adminpack.out
--- b/contrib/adminpack/expected/adminpack.out
*************** SELECT pg_file_rename('test_file1', 'tes
*** 79,85 ****
  (1 row)
  
  SELECT pg_read_file('test_file1');  -- not there
! ERROR:  could not stat file "test_file1": No such file or directory
  SELECT pg_read_file('test_file2');
   pg_read_file 
  --------------
--- 79,85 ----
  (1 row)
  
  SELECT pg_read_file('test_file1');  -- not there
! ERROR:  could not open file "test_file1" for reading: No such file or directory
  SELECT pg_read_file('test_file2');
   pg_read_file 
  --------------
*************** SELECT pg_file_rename('test_file2', 'tes
*** 108,114 ****
  (1 row)
  
  SELECT pg_read_file('test_file2');  -- not there
! ERROR:  could not stat file "test_file2": No such file or directory
  SELECT pg_read_file('test_file3');
   pg_read_file 
  --------------
--- 108,114 ----
  (1 row)
  
  SELECT pg_read_file('test_file2');  -- not there
! ERROR:  could not open file "test_file2" for reading: No such file or directory
  SELECT pg_read_file('test_file3');
   pg_read_file 
  --------------
diff --git a/src/backend/utils/adt/genfile.c b/src/backend/utils/adt/genfile.c
index ceaa618..1515032 100644
*** a/src/backend/utils/adt/genfile.c
--- b/src/backend/utils/adt/genfile.c
*************** read_binary_file(const char *filename, i
*** 106,138 ****
  				 bool missing_ok)
  {
  	bytea	   *buf;
! 	size_t		nbytes;
  	FILE	   *file;
  
! 	if (bytes_to_read < 0)
! 	{
! 		if (seek_offset < 0)
! 			bytes_to_read = -seek_offset;
! 		else
! 		{
! 			struct stat fst;
! 
! 			if (stat(filename, &fst) < 0)
! 			{
! 				if (missing_ok && errno == ENOENT)
! 					return NULL;
! 				else
! 					ereport(ERROR,
! 							(errcode_for_file_access(),
! 							 errmsg("could not stat file \"%s\": %m", filename)));
! 			}
! 
! 			bytes_to_read = fst.st_size - seek_offset;
! 		}
! 	}
! 
! 	/* not sure why anyone thought that int64 length was a good idea */
! 	if (bytes_to_read > (MaxAllocSize - VARHDRSZ))
  		ereport(ERROR,
  				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  				 errmsg("requested length too large")));
--- 106,116 ----
  				 bool missing_ok)
  {
  	bytea	   *buf;
! 	size_t		nbytes = 0;
  	FILE	   *file;
  
! 	/* clamp request size to what we can actually deliver */
! 	if (bytes_to_read > (int64) (MaxAllocSize - VARHDRSZ))
  		ereport(ERROR,
  				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  				 errmsg("requested length too large")));
*************** read_binary_file(const char *filename, i
*** 154,162 ****
  				(errcode_for_file_access(),
  				 errmsg("could not seek in file \"%s\": %m", filename)));
  
! 	buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ);
  
! 	nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file);
  
  	if (ferror(file))
  		ereport(ERROR,
--- 132,194 ----
  				(errcode_for_file_access(),
  				 errmsg("could not seek in file \"%s\": %m", filename)));
  
! 	if (bytes_to_read >= 0)
! 	{
! 		/* If passed explicit read size just do it */
! 		buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ);
  
! 		nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file);
! 	}
! 	else
! 	{
! 		/* Negative read size, read rest of file */
! 
! 		/* Chunk size for reads and allocations */
! #define READ_BLOCK_SIZE 4096
! 		/* VARDATA maximum allowed size */
! 		int64	alloc_max = MaxAllocSize - VARHDRSZ;
! 		/* VARDATA allocated size */
! 		Size	maxdatlen = READ_BLOCK_SIZE - VARHDRSZ;
! 
! 		/* initialize the buffer */
! 		buf = (bytea *) palloc(0);
! 
! 		while (!(feof(file) || ferror(file)))
! 		{
! 			/*
! 			 * If not at end of file, and nbytes is equal to alloc_max,
! 			 * then either the file is too large, or there is nothing left
! 			 * to read. Attempt to read one more byte to see if the end
! 			 * of file has been reached. If not, the file is too large;
! 			 * we'd rather give the error message for that ourselves.
! 			 */
! 			if (nbytes == alloc_max)
! 			{
! 				char	rbuf[1]; 
! 
! 				fread(rbuf, 1, 1, file);
! 				if (!feof(file))
! 					ereport(ERROR,
! 							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! 							 errmsg("requested length too large")));
! 				else
! 					break;
! 			}
! 
! 			buf = (bytea *) repalloc(buf, maxdatlen + VARHDRSZ);
! 			nbytes += fread(VARDATA(buf) + nbytes, 1,
! 						   (size_t) maxdatlen - nbytes, file);
! 
! 			/*
! 			 * Double the data size allocation each iteration, but
! 			 * clamp at alloc_max. Calculate such that memory
! 			 * allocations are multiples of READ_BLOCK_SIZE.
! 			 */
! 			maxdatlen = (maxdatlen * 2) + VARHDRSZ;
! 			if (maxdatlen > alloc_max)
! 				maxdatlen = alloc_max;
! 		}
! 	}
  
  	if (ferror(file))
  		ereport(ERROR,

Attachment: signature.asc
Description: OpenPGP digital signature

Reply via email to