On Tue, Dec 17, 2013 at 12:35 PM, Jeff Janes <jeff.ja...@gmail.com> wrote: > Since it doesn't use directIO, you can't warm the PG buffers without also > warming FS cache as a side effect. That is why I like 'buffer' as the > default--if the data fits in shared_buffers, it warm those, otherwise it at > least warms the FS. If you want to only warm the FS cache, you can use > either the 'prefetch' or 'read' modes instead.
All right, here is an updated patch. I swapped the second and third arguments, because I think overriding the prewarm mode will be a lot more common than overriding the relation fork. I also added defaults, so you can do this: SELECT pg_prewarm('pgbench_accounts'); Or this: SELECT pg_prewarm('pgbench_accounts', 'read'); I also fixed some oversights in the error checks. I'm not inclined to wait for the next CommitFest to commit this, because it's a very simple patch and has already had a lot more field testing than most patches get before they're committed. And it's just a contrib module, so the damage it can do if there is in fact a bug is pretty limited. All that having been said, any review is appreciated. -- Robert Haas EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company
diff --git a/contrib/Makefile b/contrib/Makefile index 8a2a937..dd2683b 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -32,6 +32,7 @@ SUBDIRS = \ pg_archivecleanup \ pg_buffercache \ pg_freespacemap \ + pg_prewarm \ pg_standby \ pg_stat_statements \ pg_test_fsync \ diff --git a/contrib/pg_prewarm/Makefile b/contrib/pg_prewarm/Makefile new file mode 100644 index 0000000..176a29a --- /dev/null +++ b/contrib/pg_prewarm/Makefile @@ -0,0 +1,18 @@ +# contrib/pg_prewarm/Makefile + +MODULE_big = pg_prewarm +OBJS = pg_prewarm.o + +EXTENSION = pg_prewarm +DATA = pg_prewarm--1.0.sql + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/pg_prewarm +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/pg_prewarm/pg_prewarm--1.0.sql b/contrib/pg_prewarm/pg_prewarm--1.0.sql new file mode 100644 index 0000000..2bec776 --- /dev/null +++ b/contrib/pg_prewarm/pg_prewarm--1.0.sql @@ -0,0 +1,14 @@ +/* contrib/pg_prewarm/pg_prewarm--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION pg_prewarm" to load this file. \quit + +-- Register the function. +CREATE FUNCTION pg_prewarm(regclass, + mode text default 'buffer', + fork text default 'main', + first_block int8 default null, + last_block int8 default null) +RETURNS int8 +AS 'MODULE_PATHNAME', 'pg_prewarm' +LANGUAGE C; diff --git a/contrib/pg_prewarm/pg_prewarm.c b/contrib/pg_prewarm/pg_prewarm.c new file mode 100644 index 0000000..10317f3 --- /dev/null +++ b/contrib/pg_prewarm/pg_prewarm.c @@ -0,0 +1,205 @@ +/*------------------------------------------------------------------------- + * + * pg_prewarm.c + * prewarming utilities + * + * Copyright (c) 2010-2012, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/pg_prewarm/pg_prewarm.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <sys/stat.h> +#include <unistd.h> + +#include "access/heapam.h" +#include "catalog/catalog.h" +#include "fmgr.h" +#include "miscadmin.h" +#include "storage/bufmgr.h" +#include "storage/smgr.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" + +PG_MODULE_MAGIC; + +extern Datum pg_prewarm(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(pg_prewarm); + +typedef enum +{ + PREWARM_PREFETCH, + PREWARM_READ, + PREWARM_BUFFER +} PrewarmType; + +static char blockbuffer[BLCKSZ]; + +/* + * pg_prewarm(regclass, mode text, fork text, + * first_block int8, last_block int8) + * + * The first argument is the relation to be prewarmed; the second controls + * how prewarming is done; legal options are 'prefetch', 'read', and 'buffer'. + * The third is the name of the relation fork to be prewarmed. The fourth + * and fifth arguments specify the first and last block to be prewarmed. + * If the fourth argument is NULL, it will be taken as 0; if the fifth argument + * is NULL, it will be taken as the number of blocks in the relation. The + * return value is the number of blocks successfully prewarmed. + */ +Datum +pg_prewarm(PG_FUNCTION_ARGS) +{ + Oid relOid; + text *forkName; + text *type; + int64 first_block; + int64 last_block; + int64 nblocks; + int64 blocks_done = 0; + int64 block; + Relation rel; + ForkNumber forkNumber; + char *forkString; + char *ttype; + PrewarmType ptype; + AclResult aclresult; + + /* Basic sanity checking. */ + if (PG_ARGISNULL(0)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("relation cannot be null"))); + relOid = PG_GETARG_OID(0); + if (PG_ARGISNULL(1)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + (errmsg("prewarm type cannot be null")))); + type = PG_GETARG_TEXT_P(1); + ttype = text_to_cstring(type); + if (!strcmp(ttype, "prefetch")) + ptype = PREWARM_PREFETCH; + else if (!strcmp(ttype, "read")) + ptype = PREWARM_READ; + else if (!strcmp(ttype, "buffer")) + ptype = PREWARM_BUFFER; + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid prewarm type"), + errhint("Valid prewarm types are \"prefetch\", \"read\", and \"buffer\"."))); + PG_RETURN_INT64(0); /* Placate compiler. */ + } + if (PG_ARGISNULL(2)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + (errmsg("relation fork cannot be null")))); + forkName = PG_GETARG_TEXT_P(2); + forkString = text_to_cstring(forkName); + forkNumber = forkname_to_number(forkString); + + /* Open relation and check privileges. */ + rel = relation_open(relOid, AccessShareLock); + aclresult = pg_class_aclcheck(relOid, GetUserId(), ACL_SELECT); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, ACL_KIND_CLASS, get_rel_name(relOid)); + + /* Check that the fork exists. */ + RelationOpenSmgr(rel); + if (!smgrexists(rel->rd_smgr, forkNumber)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("fork \"%s\" does not exist for this relation", + forkString))); + + /* Validate block numbers, or handle nulls. */ + nblocks = RelationGetNumberOfBlocksInFork(rel, forkNumber); + if (PG_ARGISNULL(3)) + first_block = 0; + else + { + first_block = PG_GETARG_INT64(3); + if (first_block < 0 || first_block >= nblocks) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("starting block number must be between 0 and " INT64_FORMAT, + nblocks - 1))); + } + if (PG_ARGISNULL(4)) + last_block = nblocks - 1; + else + { + last_block = PG_GETARG_INT64(4); + if (last_block < 0 || last_block >= nblocks) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ending block number must be between 0 and " INT64_FORMAT, + nblocks - 1))); + } + + /* Now we're ready to do the real work. */ + if (ptype == PREWARM_PREFETCH) + { +#ifdef USE_PREFETCH + /* + * In prefetch mode, we just hint the OS to read the blocks, but we + * don't know whether it really does it, and we don't wait for it + * to finish. + * + * It would probably be better to pass our prefetch requests in + * chunks of a megabyte or maybe even a whole segment at a time, + * but there's no practical way to do that at present without + * a gross modularity violation, so we just do this. + */ + for (block = first_block; block <= last_block; ++block) + { + PrefetchBuffer(rel, forkNumber, block); + ++blocks_done; + } +#else + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("prefetch is not supported by this build"))); +#endif + } + else if (ptype == PREWARM_READ) + { + /* + * In read mode, we actually read the blocks, but not into shared + * buffers. This is more portable than prefetch mode (it works + * everywhere) and is synchronous. + */ + RelationOpenSmgr(rel); + for (block = first_block; block <= last_block; ++block) + { + smgrread(rel->rd_smgr, forkNumber, block, blockbuffer); + ++blocks_done; + } + } + else if (ptype == PREWARM_BUFFER) + { + /* + * In buffer mode, we actually pull the data into shared_buffers. + */ + for (block = first_block; block <= last_block; ++block) + { + Buffer buf; + + buf = ReadBufferExtended(rel, forkNumber, block, RBM_NORMAL, NULL); + ReleaseBuffer(buf); + ++blocks_done; + } + } + + /* Close relation, release lock. */ + relation_close(rel, AccessShareLock); + + PG_RETURN_INT64(blocks_done); +} diff --git a/contrib/pg_prewarm/pg_prewarm.control b/contrib/pg_prewarm/pg_prewarm.control new file mode 100644 index 0000000..442a5b8 --- /dev/null +++ b/contrib/pg_prewarm/pg_prewarm.control @@ -0,0 +1,5 @@ +# pg_buffercache extension +comment = 'prewarm relation data' +default_version = '1.0' +module_pathname = '$libdir/pg_prewarm' +relocatable = true diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml index dd8e09e..2892fa1 100644 --- a/doc/src/sgml/contrib.sgml +++ b/doc/src/sgml/contrib.sgml @@ -128,6 +128,7 @@ CREATE EXTENSION <replaceable>module_name</> FROM unpackaged; &pgbuffercache; &pgcrypto; &pgfreespacemap; + &pgprewarm; &pgrowlocks; &pgstatstatements; &pgstattuple; diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index d1b7dc6..552c3aa 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -126,6 +126,7 @@ <!ENTITY pgbuffercache SYSTEM "pgbuffercache.sgml"> <!ENTITY pgcrypto SYSTEM "pgcrypto.sgml"> <!ENTITY pgfreespacemap SYSTEM "pgfreespacemap.sgml"> +<!ENTITY pgprewarm SYSTEM "pgprewarm.sgml"> <!ENTITY pgrowlocks SYSTEM "pgrowlocks.sgml"> <!ENTITY pgstandby SYSTEM "pgstandby.sgml"> <!ENTITY pgstatstatements SYSTEM "pgstatstatements.sgml"> diff --git a/doc/src/sgml/pgprewarm.sgml b/doc/src/sgml/pgprewarm.sgml new file mode 100644 index 0000000..559ccf2 --- /dev/null +++ b/doc/src/sgml/pgprewarm.sgml @@ -0,0 +1,68 @@ +<!-- doc/src/sgml/pgprewarm.sgml --> + +<sect1 id="pgprewarm" xreflabel="pg_prewarm"> + <title>pg_prewarm</title> + + <indexterm zone="pgprewarm"> + <primary>pg_prewarm</primary> + </indexterm> + + <para> + The <filename>pg_prewarm</filename> module provides a convenient way + to load relation data into either the operating system buffer cache + or the <productname>PostgreSQL</productname> buffer cache. + </para> + + <sect2> + <title>Functions</title> + +<synopsis> +pg_prewarm(regclass, mode text default 'buffer', fork text default 'main', + first_block int8 default null, + last_block int8 default null) RETURNS int8 +</synopsis> + + <para> + The first argument is the relation to be prewarmed. The second argument + is the prewarming method to be used, as further discussed below; the third + is the relation fork to be prewarmed, usually <literal>main</literal>. + The fourth argument is the first block number to prewarm + (<literal>NULL</literal> is accepted as a synonym for zero). The fifth + argument is the last block number to prewarm (<literal>NULL</literal> + means prewarm through the last block in the relation). The return value + is the number of blocks prewarmed. + </para> + + <para> + There are three available prewarming methods. <literal>prefetch</literal> + issues asynchronous prefetch requests to the operating system, if this is + supported, or throws an error otherwise. <literal>read</literal> reads + the requested range of blocks; unlike <literal>prefetch</literal>, this is + synchronous and supported on all platforms and builds, but may be slower. + <literal>buffer</literal> reads the requested range of blocks into the + database buffer cache. + </para> + + <para> + Note that with any of these methods, attempting to prewarm more blocks than + can be cached - by the OS when using <literal>prefetch</literal> or + <literal>read</literal>, or by <productname>PostgreSQL</productname> when + using <literal>buffer</literal> - will likely result in lower-numbered + blocks being evicted as higher numbered blocks are read in. Prewarmed data + also enjoys no special projection from cache evictions, so it is possible + for other system activity may evict the newly prewarmed blocks shortly after + they are read; conversely, prewarming may also evict other data from cache. + For these reasons, prewarming is typically most useful at startup, when + caches are largely empty. + </para> + </sect2> + + <sect2> + <title>Author</title> + + <para> + Robert Haas <email>rh...@postgresql.org</email> + </para> + </sect2> + +</sect1>
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers