Forking thread "WAL logging problem in 9.4.3?" for this tangent: On Mon, Dec 09, 2019 at 06:04:06PM +0900, Kyotaro Horiguchi wrote: > I don't understand why mdclose checks for (v->mdfd_vfd >= 0) of open > segment but anyway mdimmedsync is believing that that won't happen and > I follow the assumption. (I suspect that the if condition in mdclose > should be an assertion..)
That check helps when data_sync_retry=on and FileClose() raised an error in a previous mdclose() invocation. However, the check is not sufficient to make that case work; the attached test case (not for commit) gets an assertion failure or SIGSEGV. I am inclined to fix this by decrementing md_num_open_segs before modifying md_seg_fds (second attachment). An alternative would be to call _fdvec_resize() after every FileClose(), like mdtruncate() does; however, the repalloc() overhead could be noticeable. (mdclose() is called much more frequently than mdtruncate().) Incidentally, _mdfd_openseg() has this: if (segno <= reln->md_num_open_segs[forknum]) _fdvec_resize(reln, forknum, segno + 1); That should be >=, not <=. If the less-than case happened, this would delete the record of a vfd for a higher-numbered segno. There's no live bug, because only segno == reln->md_num_open_segs[forknum] actually happens. I am inclined to make an assertion of that and remove the condition: Assert(segno == reln->md_num_open_segs[forknum]); _fdvec_resize(reln, forknum, segno + 1);
diff --git a/configure b/configure index 9de5037..a1f09e8 100755 --- a/configure +++ b/configure @@ -3725,7 +3725,7 @@ $as_echo "${segsize}GB" >&6; } cat >>confdefs.h <<_ACEOF -#define RELSEG_SIZE ${RELSEG_SIZE} +#define RELSEG_SIZE 2 _ACEOF diff --git a/configure.in b/configure.in index 9c5e5e7..86f3604 100644 --- a/configure.in +++ b/configure.in @@ -289,7 +289,7 @@ RELSEG_SIZE=`expr '(' 1024 / ${blocksize} ')' '*' ${segsize} '*' 1024` test $? -eq 0 || exit 1 AC_MSG_RESULT([${segsize}GB]) -AC_DEFINE_UNQUOTED([RELSEG_SIZE], ${RELSEG_SIZE}, [ +AC_DEFINE_UNQUOTED([RELSEG_SIZE], 2, [ RELSEG_SIZE is the maximum number of blocks allowed in one disk file. Thus, the maximum size of a single file is RELSEG_SIZE * BLCKSZ; relations bigger than that are divided into multiple files. diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index f4ab19f..350ce7f 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -1741,6 +1741,8 @@ PathNameDeleteTemporaryFile(const char *path, bool error_on_failure) return true; } +char *fail_to_close_file; + /* * close a file when done with it */ @@ -1751,8 +1753,11 @@ FileClose(File file) Assert(FileIsValid(file)); - DO_DB(elog(LOG, "FileClose: %d (%s)", - file, VfdCache[file].fileName)); + elog(LOG, "FileClose: %d (%s)", + file, VfdCache[file].fileName); + + if (strcmp(VfdCache[file].fileName, fail_to_close_file) == 0) + elog(ERROR, "failing to close %s, as requested", fail_to_close_file); vfdP = &VfdCache[file]; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 8d951ce..c9f5f53 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -3504,6 +3504,16 @@ static struct config_real ConfigureNamesReal[] = static struct config_string ConfigureNamesString[] = { { + {"fail_to_close_file", PGC_SUSET, DEVELOPER_OPTIONS, + gettext_noop("Raise an error when attempt to close this file."), + gettext_noop("This is a temporary GUC to demonstrate a bug."), + GUC_NOT_IN_SAMPLE + }, + &fail_to_close_file, + "", + NULL, NULL, NULL + }, + { {"archive_command", PGC_SIGHUP, WAL_ARCHIVING, gettext_noop("Sets the shell command that will be called to archive a WAL file."), NULL diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index 625fbc3..8c21d76 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -48,6 +48,7 @@ typedef int File; /* GUC parameter */ extern PGDLLIMPORT int max_files_per_process; extern PGDLLIMPORT bool data_sync_retry; +extern PGDLLIMPORT char *fail_to_close_file; /* * This is private to fd.c, but exported for save/restore_backend_variables() diff --git a/src/test/regress/sql/boolean.sql b/src/test/regress/sql/boolean.sql index df61fa4..50a4550 100644 --- a/src/test/regress/sql/boolean.sql +++ b/src/test/regress/sql/boolean.sql @@ -2,6 +2,22 @@ -- BOOLEAN -- +-- Row count must be large enough to create more than one segment. This value +-- is enough at RELSEG_SIZE=2. That is unrealistic for production use, but it +-- makes the test finish quickly, with low disk consumption. +create table fileclose (c) as select * from generate_series(1, 2000); + +-- This closes segno==1, then fails the attempt to close segno==0. This +-- leaves reln->md_num_open_segs[MAIN_FORKNUM]==2 despite +-- reln->md_seg_fds[MAIN_FORKNUM][1]==-1; md.c can't cope with that. +begin; +select set_config('fail_to_close_file', pg_relation_filepath('fileclose'), true); +analyze fileclose; +commit; + +analyze fileclose; -- TRAP: FailedAssertion("FileIsValid(file)", File: "fd.c", Line: 2039) +drop table fileclose; + -- -- sanity check - if this fails go insane! --
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 82442db..02983cd 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -516,12 +516,9 @@ mdclose(SMgrRelation reln, ForkNumber forknum) { MdfdVec *v = &reln->md_seg_fds[forknum][nopensegs - 1]; - /* if not closed already */ - if (v->mdfd_vfd >= 0) - { - FileClose(v->mdfd_vfd); - v->mdfd_vfd = -1; - } + FileClose(v->mdfd_vfd); + reln->md_num_open_segs[forknum] = nopensegs - 1; + v->mdfd_vfd = -1; nopensegs--; }