Here's a new version that also adjusts the code that just landed in da722699:
- /* - * Each IO handle can have an PG_IOV_MAX long iovec. - * - * XXX: Right now the amount of space available for each IO is PG_IOV_MAX. - * While it's tempting to use the io_combine_limit GUC, that's - * PGC_USERSET, so we can't allocate shared memory based on that. - */ + /* each IO handle can have up to io_max_combine_limit iovec objects */ return mul_size(sizeof(struct iovec), - mul_size(mul_size(PG_IOV_MAX, AioProcs()), + mul_size(mul_size(io_max_combine_limit, AioProcs()), io_max_concurrency)); While doing that, this juxtaposition jumped out at me: + uint32 per_backend_iovecs = io_max_concurrency * max_io_combine_limit; Surely one of these names has to give! First commit wins, so the new name in this version is "io_max_combine_limit". I was contemplating making the same change to the MAX_IO_COMBINE_LIMIT macro when it occurred to me that we could just delete it. It has few references and they could just use PG_IOV_MAX instead; it's perhaps a little less clear as names go, but it's also pretty confusing that there's a macro with the same name as a GUC...
From 1200802f3d647a8c9107b2148595dab4b6a1bd82 Mon Sep 17 00:00:00 2001 From: Thomas Munro <thomas.mu...@gmail.com> Date: Wed, 12 Feb 2025 13:52:22 +1300 Subject: [PATCH v4 1/3] Introduce io_max_combine_limit. The existing io_combine_limit setting can be changed by users. The new io_max_combine_limit setting is defined only at server startup time, and applies as a silent cap. This allows aio_init.c to know what the maximum could possibly be when allocating shared memory at startup, instead of having to assume PG_IOV_MAX (which a later commit will quadruple). Since our GUC system doesn't allow dependencies between GUCs, the user-settable one now assigns a "raw" value to io_combine_limit_guc, and the lower of io_combine_limit_guc and io_max_combine_limit is maintained in io_combine_limit. Most code should continue to refer to io_combine_limit directly, and io_combine_limit_guc is an implementation detail that could eventually be removed if the GUC system ever learns how to deal with dependencies. Reviewed-by: Andres Freund <and...@anarazel.de> Discussion: https://postgr.es/m/CA%2BhUKG%2B2T9p-%2BzM6Eeou-RAJjTML6eit1qn26f9twznX59qtCA%40mail.gmail.com --- doc/src/sgml/config.sgml | 23 ++++++++++++++++++- src/backend/commands/variable.c | 18 +++++++++++++++ src/backend/storage/aio/aio_init.c | 17 +++++--------- src/backend/storage/buffer/bufmgr.c | 5 +++- src/backend/utils/misc/guc_tables.c | 16 ++++++++++++- src/backend/utils/misc/postgresql.conf.sample | 2 ++ src/include/storage/bufmgr.h | 4 +++- src/include/utils/guc_hooks.h | 2 ++ 8 files changed, 72 insertions(+), 15 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 7ec18bb7627..d1080dac97f 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -2625,6 +2625,24 @@ include_dir 'conf.d' </listitem> </varlistentry> + <varlistentry id="guc-io-max-combine-limit" xreflabel="io_max_combine_limit"> + <term><varname>io_max_combine_limit</varname> (<type>integer</type>) + <indexterm> + <primary><varname>io_max_combine_limit</varname> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + Controls the largest I/O size in operations that combine I/O, and silently + limits the user-settable parameter <varname>io_combine_limit</varname>. + This parameter can only be set in + the <filename>postgresql.conf</filename> file or on the server + command line. + The default is 128kB. + </para> + </listitem> + </varlistentry> + <varlistentry id="guc-io-combine-limit" xreflabel="io_combine_limit"> <term><varname>io_combine_limit</varname> (<type>integer</type>) <indexterm> @@ -2633,7 +2651,10 @@ include_dir 'conf.d' </term> <listitem> <para> - Controls the largest I/O size in operations that combine I/O. + Controls the largest I/O size in operations that combine I/O. If set + higher than the <varname>io_max_combine_limit</varname> parameter, the + smaller value will silently be used instead, so both may need to be raised + to increase the I/O size. The default is 128kB. </para> </listitem> diff --git a/src/backend/commands/variable.c b/src/backend/commands/variable.c index 4ad6e236d69..f550a3c0c63 100644 --- a/src/backend/commands/variable.c +++ b/src/backend/commands/variable.c @@ -1156,6 +1156,24 @@ assign_maintenance_io_concurrency(int newval, void *extra) #endif } +/* + * GUC assign hooks that recompute io_combine_limit whenever + * io_combine_limit_guc and io_max_combine_limit are changed. These are needed + * because the GUC subsystem doesn't support dependencies between GUCs, and + * they may be assigned in either order. + */ +void +assign_io_max_combine_limit(int newval, void *extra) +{ + io_max_combine_limit = newval; + io_combine_limit = Min(io_max_combine_limit, io_combine_limit_guc); +} +void +assign_io_combine_limit(int newval, void *extra) +{ + io_combine_limit_guc = newval; + io_combine_limit = Min(io_max_combine_limit, io_combine_limit_guc); +} /* * These show hooks just exist because we want to show the values in octal. diff --git a/src/backend/storage/aio/aio_init.c b/src/backend/storage/aio/aio_init.c index 6fe55510fae..93aba6e03ba 100644 --- a/src/backend/storage/aio/aio_init.c +++ b/src/backend/storage/aio/aio_init.c @@ -18,6 +18,7 @@ #include "storage/aio.h" #include "storage/aio_internal.h" #include "storage/aio_subsys.h" +#include "storage/bufmgr.h" #include "storage/ipc.h" #include "storage/proc.h" #include "storage/shmem.h" @@ -66,15 +67,9 @@ AioHandleShmemSize(void) static Size AioHandleIOVShmemSize(void) { - /* - * Each IO handle can have an PG_IOV_MAX long iovec. - * - * XXX: Right now the amount of space available for each IO is PG_IOV_MAX. - * While it's tempting to use the io_combine_limit GUC, that's - * PGC_USERSET, so we can't allocate shared memory based on that. - */ + /* each IO handle can have up to io_max_combine_limit iovec objects */ return mul_size(sizeof(struct iovec), - mul_size(mul_size(PG_IOV_MAX, AioProcs()), + mul_size(mul_size(io_max_combine_limit, AioProcs()), io_max_concurrency)); } @@ -83,7 +78,7 @@ AioHandleDataShmemSize(void) { /* each buffer referenced by an iovec can have associated data */ return mul_size(sizeof(uint64), - mul_size(mul_size(PG_IOV_MAX, AioProcs()), + mul_size(mul_size(io_max_combine_limit, AioProcs()), io_max_concurrency)); } @@ -154,7 +149,7 @@ AioShmemInit(void) bool found; uint32 io_handle_off = 0; uint32 iovec_off = 0; - uint32 per_backend_iovecs = io_max_concurrency * PG_IOV_MAX; + uint32 per_backend_iovecs = io_max_concurrency * io_max_combine_limit; pgaio_ctl = (PgAioCtl *) ShmemInitStruct("AioCtl", AioCtlShmemSize(), &found); @@ -207,7 +202,7 @@ AioShmemInit(void) ConditionVariableInit(&ioh->cv); dclist_push_tail(&bs->idle_ios, &ioh->node); - iovec_off += PG_IOV_MAX; + iovec_off += io_max_combine_limit; } } diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 79ca9d18d07..d04afa5ab9c 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -160,9 +160,12 @@ int maintenance_io_concurrency = DEFAULT_MAINTENANCE_IO_CONCURRENCY; /* * Limit on how many blocks should be handled in single I/O operations. * StartReadBuffers() callers should respect it, as should other operations - * that call smgr APIs directly. + * that call smgr APIs directly. It is computed as the minimum of underlying + * GUCs io_combine_limit_guc and io_max_combine_limit. */ int io_combine_limit = DEFAULT_IO_COMBINE_LIMIT; +int io_combine_limit_guc = DEFAULT_IO_COMBINE_LIMIT; +int io_max_combine_limit = DEFAULT_IO_COMBINE_LIMIT; /* * GUC variables about triggering kernel writeback for buffers written; OS diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 0d3ebf06a95..720f28a5180 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -3241,6 +3241,20 @@ struct config_int ConfigureNamesInt[] = NULL }, + { + {"io_max_combine_limit", + PGC_POSTMASTER, + RESOURCES_IO, + gettext_noop("System-wide limit applied to io_combine_limit."), + NULL, + GUC_UNIT_BLOCKS + }, + &io_max_combine_limit, + DEFAULT_IO_COMBINE_LIMIT, + 1, MAX_IO_COMBINE_LIMIT, + NULL, assign_io_max_combine_limit, NULL + }, + { {"io_combine_limit", PGC_USERSET, @@ -3252,7 +3266,7 @@ struct config_int ConfigureNamesInt[] = &io_combine_limit, DEFAULT_IO_COMBINE_LIMIT, 1, MAX_IO_COMBINE_LIMIT, - NULL, NULL, NULL + NULL, assign_io_combine_limit, NULL }, { diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 43c2ec2153e..bd9a3507135 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -200,6 +200,8 @@ #backend_flush_after = 0 # measured in pages, 0 disables #effective_io_concurrency = 16 # 1-1000; 0 disables prefetching #maintenance_io_concurrency = 10 # 1-1000; 0 disables prefetching +#io_max_combine_limit = 128kB # usually 1-32 blocks (depends on OS) + # (change requires restart) #io_combine_limit = 128kB # usually 1-32 blocks (depends on OS) #io_method = sync # sync (change requires restart) diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 79a89f87fcc..71b3af61b6a 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -163,7 +163,9 @@ extern PGDLLIMPORT int maintenance_io_concurrency; #define MAX_IO_COMBINE_LIMIT PG_IOV_MAX #define DEFAULT_IO_COMBINE_LIMIT Min(MAX_IO_COMBINE_LIMIT, (128 * 1024) / BLCKSZ) -extern PGDLLIMPORT int io_combine_limit; +extern PGDLLIMPORT int io_combine_limit; /* min of the two GUCs below */ +extern PGDLLIMPORT int io_combine_limit_guc; +extern PGDLLIMPORT int io_max_combine_limit; extern PGDLLIMPORT int checkpoint_flush_after; extern PGDLLIMPORT int backend_flush_after; diff --git a/src/include/utils/guc_hooks.h b/src/include/utils/guc_hooks.h index a3eba8fbe21..0f1e74f96c9 100644 --- a/src/include/utils/guc_hooks.h +++ b/src/include/utils/guc_hooks.h @@ -86,6 +86,8 @@ extern const char *show_log_timezone(void); extern bool check_maintenance_io_concurrency(int *newval, void **extra, GucSource source); extern void assign_maintenance_io_concurrency(int newval, void *extra); +extern void assign_io_max_combine_limit(int newval, void *extra); +extern void assign_io_combine_limit(int newval, void *extra); extern bool check_max_slot_wal_keep_size(int *newval, void **extra, GucSource source); extern void assign_max_wal_size(int newval, void *extra); -- 2.39.5
From 4abac6442061744b791031a25a31cd45563cc111 Mon Sep 17 00:00:00 2001 From: Thomas Munro <thomas.mu...@gmail.com> Date: Tue, 21 Jan 2025 11:44:35 +1300 Subject: [PATCH v4 2/3] Increase the maximum I/O combine size to 1MB. The default value of 128kB is not changed, but the upper limit is changed from 32 blocks to 128 blocks (1MB with 8kB blocks), assuming the operating system's IOV_MAX doesn't limit us to a smaller size. This is around where some other RDBMSes seem to cap their buffer pool I/O size, and it seems like to good idea to allow experiments with that. The concrete change is to our definition of PG_IOV_MAX, which provides the maximum limit for io_combine_limit and io_max_combine_limit. It also affects a couple of other places that work with arrays of struct iovec or smaller objects on the stack, so we still don't want to use the system IOV_MAX directly without a clamp: it is not under our control and likely to be 1024. 128 seems acceptable for all our current use cases. The last Unix on our target list known to have a low IOV_MAX was Solaris before 11.4 SRU72 (it was 16, the minimum requirement for POSIX conformance, but is now 1024, matching all other systems I looked at). For Windows, we can't use real scatter/gather yet (though it's possible, in later work), so we continue to define our own IOV_MAX value of 16 and emulate preadv()/pwritev() with loops there. Someone would need to research the trade-off. This change also makes it possible for read_stream.c's internal cap of INT16_MAX to be hit, so adjust comments about that. With *_io_concurrent and io_combine_limit set to their maximum, it would want to be able to pin 128K buffers at once (= 1GB of data), but the choice of data type limits streams to 32K buffers. That could be revisited in future, but you'll probably hit other limits long before that one in your quest to run 1,000 concurrent I/Os of size 1MB. Suggested-by: Tomas Vondra <to...@vondra.me> Discussion: https://postgr.es/m/CA%2BhUKG%2B2T9p-%2BzM6Eeou-RAJjTML6eit1qn26f9twznX59qtCA%40mail.gmail.com --- doc/src/sgml/config.sgml | 4 ++++ src/backend/storage/aio/read_stream.c | 7 ++++--- src/backend/utils/misc/postgresql.conf.sample | 4 ++-- src/include/port/pg_iovec.h | 8 ++++++-- 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index d1080dac97f..93eea7f96d2 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -2638,6 +2638,8 @@ include_dir 'conf.d' This parameter can only be set in the <filename>postgresql.conf</filename> file or on the server command line. + The maximum possible size depends on the operating system and block + size, but is typically 1MB on Unix and 128kB on Windows. The default is 128kB. </para> </listitem> @@ -2655,6 +2657,8 @@ include_dir 'conf.d' higher than the <varname>io_max_combine_limit</varname> parameter, the smaller value will silently be used instead, so both may need to be raised to increase the I/O size. + The maximum possible size depends on the operating system and block + size, but is typically 1MB on Unix and 128kB on Windows. The default is 128kB. </para> </listitem> diff --git a/src/backend/storage/aio/read_stream.c b/src/backend/storage/aio/read_stream.c index d65fa07b44c..45bdf819d57 100644 --- a/src/backend/storage/aio/read_stream.c +++ b/src/backend/storage/aio/read_stream.c @@ -515,9 +515,10 @@ read_stream_begin_impl(int flags, * finishes we don't want to have to wait for its buffers to be consumed * before starting a new one. * - * Be careful not to allow int16 to overflow (even though that's not - * possible with the current GUC range limits), allowing also for the - * spare entry and the overflow space. + * Be careful not to allow int16 to overflow. That is possible with the + * current GUC range limits, so this is an artificial limit of ~32k + * buffers and we'd need to adjust the types to exceed that. We also have + * to allow for the spare entry and the overflow space. */ max_pinned_buffers = (max_ios + 1) * io_combine_limit; max_pinned_buffers = Min(max_pinned_buffers, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index bd9a3507135..e43d803b278 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -200,9 +200,9 @@ #backend_flush_after = 0 # measured in pages, 0 disables #effective_io_concurrency = 16 # 1-1000; 0 disables prefetching #maintenance_io_concurrency = 10 # 1-1000; 0 disables prefetching -#io_max_combine_limit = 128kB # usually 1-32 blocks (depends on OS) +#io_max_combine_limit = 128kB # usually 1-128 blocks (depends on OS) # (change requires restart) -#io_combine_limit = 128kB # usually 1-32 blocks (depends on OS) +#io_combine_limit = 128kB # usually 1-128 blocks (depends on OS) #io_method = sync # sync (change requires restart) #io_max_concurrency = -1 # Max number of IOs that one process diff --git a/src/include/port/pg_iovec.h b/src/include/port/pg_iovec.h index d9891d3805d..df40c7208be 100644 --- a/src/include/port/pg_iovec.h +++ b/src/include/port/pg_iovec.h @@ -33,8 +33,12 @@ struct iovec #endif -/* Define a reasonable maximum that is safe to use on the stack. */ -#define PG_IOV_MAX Min(IOV_MAX, 32) +/* + * Define a reasonable maximum that is safe to use on the stack in arrays of + * struct iovec and other small types. The operating system could limit us to + * a number as low as 16, but most systems have 1024. + */ +#define PG_IOV_MAX Min(IOV_MAX, 128) /* * Like preadv(), but with a prefix to remind us of a side-effect: on Windows -- 2.39.5
From c33e3b50be396819a73aca805d33f32d88560ace Mon Sep 17 00:00:00 2001 From: Thomas Munro <thomas.mu...@gmail.com> Date: Tue, 18 Mar 2025 16:12:43 +1300 Subject: [PATCH v4 3/3] Remove MAX_IO_COMBINE_LIMIT macro. Now that we have a setting io_max_combine_limit, the macro is a bit confusing. It only has a few references, and they can be replaced with PG_IOV_MAX. --- src/backend/storage/buffer/bufmgr.c | 6 +++--- src/backend/utils/misc/guc_tables.c | 4 ++-- src/include/storage/bufmgr.h | 3 +-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index d04afa5ab9c..5e4701d6f19 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -1259,7 +1259,7 @@ StartReadBuffersImpl(ReadBuffersOperation *operation, int maxcombine = 0; Assert(*nblocks > 0); - Assert(*nblocks <= MAX_IO_COMBINE_LIMIT); + Assert(*nblocks <= PG_IOV_MAX); for (int i = 0; i < actual_nblocks; ++i) { @@ -1452,8 +1452,8 @@ WaitReadBuffers(ReadBuffersOperation *operation) for (int i = 0; i < nblocks; ++i) { int io_buffers_len; - Buffer io_buffers[MAX_IO_COMBINE_LIMIT]; - void *io_pages[MAX_IO_COMBINE_LIMIT]; + Buffer io_buffers[PG_IOV_MAX]; + void *io_pages[PG_IOV_MAX]; instr_time io_start; BlockNumber io_first_block; diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 720f28a5180..d9540d31c8e 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -3251,7 +3251,7 @@ struct config_int ConfigureNamesInt[] = }, &io_max_combine_limit, DEFAULT_IO_COMBINE_LIMIT, - 1, MAX_IO_COMBINE_LIMIT, + 1, PG_IOV_MAX, NULL, assign_io_max_combine_limit, NULL }, @@ -3265,7 +3265,7 @@ struct config_int ConfigureNamesInt[] = }, &io_combine_limit, DEFAULT_IO_COMBINE_LIMIT, - 1, MAX_IO_COMBINE_LIMIT, + 1, PG_IOV_MAX, NULL, assign_io_combine_limit, NULL }, diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 71b3af61b6a..0452fe1f1e8 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -161,8 +161,7 @@ extern PGDLLIMPORT bool track_io_timing; extern PGDLLIMPORT int effective_io_concurrency; extern PGDLLIMPORT int maintenance_io_concurrency; -#define MAX_IO_COMBINE_LIMIT PG_IOV_MAX -#define DEFAULT_IO_COMBINE_LIMIT Min(MAX_IO_COMBINE_LIMIT, (128 * 1024) / BLCKSZ) +#define DEFAULT_IO_COMBINE_LIMIT Min(PG_IOV_MAX, (128 * 1024) / BLCKSZ) extern PGDLLIMPORT int io_combine_limit; /* min of the two GUCs below */ extern PGDLLIMPORT int io_combine_limit_guc; extern PGDLLIMPORT int io_max_combine_limit; -- 2.39.5