Recently we had a gripe about how you can't read very many files concurrently with contrib/file_fdw: http://www.postgresql.org/message-id/of419b5767.8a3c9adb-on85257b79.005491e9-85257b79.0054f...@isn.rtss.qc.ca
The reason for this is that file_fdw goes through the COPY code, which uses AllocateFile(), which has a wired-in assumption that not very many files need to be open concurrently. I thought for a bit about trying to get COPY to use a "virtual" file descriptor such as is provided by the rest of fd.c, but that didn't look too easy, and in any case probably nobody would be excited about adding additional overhead to the COPY code path. What seems more practical is to relax the hard-coded limit on the number of files concurrently open through AllocateFile(). Now, we could certainly turn that array into some open-ended list structure, but that still wouldn't let us have an arbitrary number of files open, because at some point we'd run into platform-specific EMFILES or ENFILES limits on the number of open file descriptors. In practice we want to keep it under the max_safe_fds limit that fd.c goes to a lot of trouble to determine. So it seems like the most useful compromise is to keep the allocatedDescs[] array data structure as-is, but allow its size to depend on max_safe_fds. In the attached proposed patch I limit it to max_safe_fds / 2, so that there's still a reasonable number of FDs available for fd.c's main pool of virtual FDs. On typical modern platforms that should be at least a couple of hundred, thus making the effective limit about an order of magnitude more than it is currently (32). Barring objections or better ideas, I'd like to back-patch this as far as 9.1 where file_fdw was introduced. regards, tom lane
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 78c7d41ac48d95d6648baa3951563ba9e4ad3496..7de93647759ce8dc58d0a9f3c8aad9ed7f36f5e6 100644 *** a/src/backend/storage/file/fd.c --- b/src/backend/storage/file/fd.c *************** *** 43,50 **** * wrappers around fopen(3), opendir(3), popen(3) and open(2), respectively. * They behave like the corresponding native functions, except that the handle * is registered with the current subtransaction, and will be automatically ! * closed at abort. These are intended for short operations like reading a ! * configuration file, and there is a fixed limit on the number of files that * can be opened using these functions at any one time. * * Finally, BasicOpenFile is just a thin wrapper around open() that can --- 43,50 ---- * wrappers around fopen(3), opendir(3), popen(3) and open(2), respectively. * They behave like the corresponding native functions, except that the handle * is registered with the current subtransaction, and will be automatically ! * closed at abort. These are intended mainly for short operations like ! * reading a configuration file; there is a limit on the number of files that * can be opened using these functions at any one time. * * Finally, BasicOpenFile is just a thin wrapper around open() that can *************** static uint64 temporary_files_size = 0; *** 198,210 **** /* * List of OS handles opened with AllocateFile, AllocateDir and * OpenTransientFile. - * - * Since we don't want to encourage heavy use of those functions, - * it seems OK to put a pretty small maximum limit on the number of - * simultaneously allocated descs. */ - #define MAX_ALLOCATED_DESCS 32 - typedef enum { AllocateDescFile, --- 198,204 ---- *************** typedef enum *** 216,232 **** typedef struct { AllocateDescKind kind; union { FILE *file; DIR *dir; int fd; } desc; - SubTransactionId create_subid; } AllocateDesc; static int numAllocatedDescs = 0; ! static AllocateDesc allocatedDescs[MAX_ALLOCATED_DESCS]; /* * Number of temporary files opened during the current session; --- 210,227 ---- typedef struct { AllocateDescKind kind; + SubTransactionId create_subid; union { FILE *file; DIR *dir; int fd; } desc; } AllocateDesc; static int numAllocatedDescs = 0; ! static int maxAllocatedDescs = 0; ! static AllocateDesc *allocatedDescs = NULL; /* * Number of temporary files opened during the current session; *************** static void FreeVfd(File file); *** 284,289 **** --- 279,286 ---- static int FileAccess(File file); static File OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError); + static bool reserveAllocatedDesc(void); + static int FreeDesc(AllocateDesc *desc); static void AtProcExit_Files(int code, Datum arg); static void CleanupTempFiles(bool isProcExit); static void RemovePgTempFilesInDir(const char *tmpdirname); *************** FilePathName(File file) *** 1491,1496 **** --- 1488,1553 ---- /* + * Create/enlarge the allocatedDescs[] array if needed and possible. + * Returns true if an array element is available. + */ + static bool + reserveAllocatedDesc(void) + { + AllocateDesc *newDescs; + int newMax; + + /* Quick out if array already has a free slot. */ + if (numAllocatedDescs < maxAllocatedDescs) + return true; + + /* + * If the array hasn't yet been created in the current process, initialize + * it with FD_MINFREE / 2 elements. In many scenarios this is as many as + * we will ever need, anyway. We don't want to look at max_safe_fds + * immediately because set_max_safe_fds() may not have run yet. + */ + if (allocatedDescs == NULL) + { + newMax = FD_MINFREE / 2; + newDescs = (AllocateDesc *) malloc(newMax * sizeof(AllocateDesc)); + /* Out of memory already? Treat as fatal error. */ + if (newDescs == NULL) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + allocatedDescs = newDescs; + maxAllocatedDescs = newMax; + return true; + } + + /* + * Consider enlarging the array beyond the initial allocation used above. + * By the time this happens, max_safe_fds should be known accurately. + * + * We mustn't let allocated descriptors hog all the available FDs, and in + * practice we'd better leave a reasonable number of FDs for VFD use. So + * set the maximum to max_safe_fds / 2. (This should certainly be at + * least as large as the initial size, FD_MINFREE / 2.) + */ + newMax = max_safe_fds / 2; + if (newMax > maxAllocatedDescs) + { + newDescs = (AllocateDesc *) realloc(allocatedDescs, + newMax * sizeof(AllocateDesc)); + /* Treat out-of-memory as a non-fatal error. */ + if (newDescs == NULL) + return false; + allocatedDescs = newDescs; + maxAllocatedDescs = newMax; + return true; + } + + /* Can't enlarge allocatedDescs[] any more. */ + return false; + } + + /* * Routines that want to use stdio (ie, FILE*) should use AllocateFile * rather than plain fopen(). This lets fd.c deal with freeing FDs if * necessary to open the file. When done, call FreeFile rather than fclose. *************** AllocateFile(const char *name, const cha *** 1516,1530 **** numAllocatedDescs, name)); /* ! * The test against MAX_ALLOCATED_DESCS prevents us from overflowing ! * allocatedFiles[]; the test against max_safe_fds prevents AllocateFile ! * from hogging every one of the available FDs, which'd lead to infinite ! * looping. */ ! if (numAllocatedDescs >= MAX_ALLOCATED_DESCS || ! numAllocatedDescs >= max_safe_fds - 1) ! elog(ERROR, "exceeded MAX_ALLOCATED_DESCS while trying to open file \"%s\"", ! name); TryAgain: if ((file = fopen(name, mode)) != NULL) --- 1573,1587 ---- numAllocatedDescs, name)); /* ! * This test protects us both against overrunning the allocatedDescs[] ! * array and against trying to eat more than max_safe_fds file descriptors ! * for allocatedDescs (which would result in an infinite loop below). */ ! if (!reserveAllocatedDesc()) ! ereport(ERROR, ! (errcode(ERRCODE_INSUFFICIENT_RESOURCES), ! errmsg("exceeded maxAllocatedDescs (%d) while trying to open file \"%s\"", ! maxAllocatedDescs, name))); TryAgain: if ((file = fopen(name, mode)) != NULL) *************** OpenTransientFile(FileName fileName, int *** 1567,1581 **** numAllocatedDescs, fileName)); /* ! * The test against MAX_ALLOCATED_DESCS prevents us from overflowing ! * allocatedFiles[]; the test against max_safe_fds prevents BasicOpenFile ! * from hogging every one of the available FDs, which'd lead to infinite ! * looping. */ ! if (numAllocatedDescs >= MAX_ALLOCATED_DESCS || ! numAllocatedDescs >= max_safe_fds - 1) ! elog(ERROR, "exceeded MAX_ALLOCATED_DESCS while trying to open file \"%s\"", ! fileName); fd = BasicOpenFile(fileName, fileFlags, fileMode); --- 1624,1638 ---- numAllocatedDescs, fileName)); /* ! * This test protects us both against overrunning the allocatedDescs[] ! * array and against trying to eat more than max_safe_fds file descriptors ! * for allocatedDescs (which would result in an infinite loop below). */ ! if (!reserveAllocatedDesc()) ! ereport(ERROR, ! (errcode(ERRCODE_INSUFFICIENT_RESOURCES), ! errmsg("exceeded maxAllocatedDescs (%d) while trying to open file \"%s\"", ! maxAllocatedDescs, fileName))); fd = BasicOpenFile(fileName, fileFlags, fileMode); *************** OpenPipeStream(const char *command, cons *** 1608,1622 **** numAllocatedDescs, command)); /* ! * The test against MAX_ALLOCATED_DESCS prevents us from overflowing ! * allocatedFiles[]; the test against max_safe_fds prevents AllocateFile ! * from hogging every one of the available FDs, which'd lead to infinite ! * looping. */ ! if (numAllocatedDescs >= MAX_ALLOCATED_DESCS || ! numAllocatedDescs >= max_safe_fds - 1) ! elog(ERROR, "exceeded MAX_ALLOCATED_DESCS while trying to execute command \"%s\"", ! command); TryAgain: fflush(stdout); --- 1665,1679 ---- numAllocatedDescs, command)); /* ! * This test protects us both against overrunning the allocatedDescs[] ! * array and against trying to eat more than max_safe_fds file descriptors ! * for allocatedDescs (which would result in an infinite loop below). */ ! if (!reserveAllocatedDesc()) ! ereport(ERROR, ! (errcode(ERRCODE_INSUFFICIENT_RESOURCES), ! errmsg("exceeded maxAllocatedDescs (%d) while trying to execute command \"%s\"", ! maxAllocatedDescs, command))); TryAgain: fflush(stdout); *************** AllocateDir(const char *dirname) *** 1760,1774 **** numAllocatedDescs, dirname)); /* ! * The test against MAX_ALLOCATED_DESCS prevents us from overflowing ! * allocatedDescs[]; the test against max_safe_fds prevents AllocateDir ! * from hogging every one of the available FDs, which'd lead to infinite ! * looping. */ ! if (numAllocatedDescs >= MAX_ALLOCATED_DESCS || ! numAllocatedDescs >= max_safe_fds - 1) ! elog(ERROR, "exceeded MAX_ALLOCATED_DESCS while trying to open directory \"%s\"", ! dirname); TryAgain: if ((dir = opendir(dirname)) != NULL) --- 1817,1831 ---- numAllocatedDescs, dirname)); /* ! * This test protects us both against overrunning the allocatedDescs[] ! * array and against trying to eat more than max_safe_fds file descriptors ! * for allocatedDescs (which would result in an infinite loop below). */ ! if (!reserveAllocatedDesc()) ! ereport(ERROR, ! (errcode(ERRCODE_INSUFFICIENT_RESOURCES), ! errmsg("exceeded maxAllocatedDescs (%d) while trying to open directory \"%s\"", ! maxAllocatedDescs, dirname))); TryAgain: if ((dir = opendir(dirname)) != NULL)
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers