On Fri, Sep 3, 2021 at 2:01 PM Kyotaro Horiguchi <horikyota....@gmail.com> wrote: > Might be stupid, if a delete-pending'ed file can obstruct something, > couldn't we change unlink on Windows to rename to a temporary random > name then remove it? We do something like it explicitly while WAL > file removal. (It may cause degradation on bulk file deletion, and we > may need further fix so that such being-deleted files are excluded > while running a directory scan, though..) > > However, looking [1], with that strategy there may be a case where > such "deleted" files may be left alone forever, though.
It's a good idea. I tested it and it certainly does fix the basebackup problem I've seen (experimental patch attached). But, yeah, I'm also a bit worried that that path could be fragile and need special handling in lots of places. I also tried writing a new open() wrapper using the lower level NtCreateFile() interface, and then an updated stat() wrapper built on top of that. As a non-Windows person, getting that to (mostly) work involved a fair amount of suffering. I can share that if someone is interested, but while learning about that family of interfaces, I realised we could keep the existing Win32-based code, but also retrieve the NT status, leading to a very small change (experimental patch attached). The best idea is probably to set FILE_DISPOSITION_DELETE | FILE_DISPOSITION_POSIX_SEMANTICS before unlinking. This appears to be a silver bullet, but isn't available on ancient Windows releases that we support, or file systems other than local NTFS. So maybe we need a combination of that + STATUS_DELETE_PENDING as shown in the attached. I'll look into that next.
From 5990d14bb4f9a0ca83fd1b6c7c6e0a6a23786a0d Mon Sep 17 00:00:00 2001 From: Thomas Munro <thomas.mu...@gmail.com> Date: Sun, 5 Sep 2021 17:07:44 +1200 Subject: [PATCH] Fix Windows basebackup by renaming before unlinking. Rename files before unlinking on Windows. This solves a problem where basebackup could fail because other backends have a file handle to a deleted file, so stat() and open() fail with ERROR_ACCESS_DENIED. Suggested by Kyotaro Horiguchi <horikyota....@gmail.com>. XXX This is only a rough sketch to try the idea out! Discussion: https://postgr.es/m/CA%2BhUKGJz_pZTF9mckn6XgSv69%2BjGwdgLkxZ6b3NWGLBCVjqUZA%40mail.gmail.com --- src/backend/replication/basebackup.c | 7 +++++ src/backend/storage/file/fd.c | 38 +++++++++++++++++++++++++--- src/backend/storage/smgr/md.c | 6 ++--- src/include/storage/fd.h | 1 + 4 files changed, 46 insertions(+), 6 deletions(-) diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index e71d7406ed..7961a7041b 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -19,6 +19,7 @@ #include "access/xlog_internal.h" /* for pg_start/stop_backup */ #include "catalog/pg_type.h" #include "common/file_perm.h" +#include "common/string.h" #include "commands/progress.h" #include "lib/stringinfo.h" #include "libpq/libpq.h" @@ -1268,6 +1269,12 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces, strlen(PG_TEMP_FILE_PREFIX)) == 0) continue; +#ifdef WIN32 + /* Skip unlinked files */ + if (pg_str_endswith(de->d_name, ".unlinked")) + continue; +#endif + /* * Check if the postmaster has signaled us to exit, and abort with an * error in that case. The error handler further up will call diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 501652654e..43f24d4ceb 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -72,6 +72,7 @@ #include "postgres.h" +#include <common/string.h> #include <dirent.h> #include <sys/file.h> #include <sys/param.h> @@ -683,6 +684,35 @@ pg_truncate(const char *path, off_t length) #endif } +/* + * Unlink a file. On Windows, rename to a temporary filename before unlinking + * so that "path" is available for new files immediately even if other backends + * hold descriptors to the one we unlink. + */ +int +pg_unlink(const char *path) +{ +#ifdef WIN32 + for (;;) + { + char tmp_path[MAXPGPATH]; + + snprintf(tmp_path, sizeof(tmp_path), "%s.%ld.unlinked", path, random()); + if (!MoveFileEx(path, tmp_path, 0)) + { + if (GetLastError() == ERROR_FILE_EXISTS) + continue; /* try a new random number */ + _dosmaperr(GetLastError()); + return -1; + } + + return unlink(tmp_path); + } +#else + return unlink(path); +#endif +} + /* * fsync_fname -- fsync a file or directory, handling errors properly * @@ -3269,8 +3299,9 @@ CleanupTempFiles(bool isCommit, bool isProcExit) * postmaster session * * This should be called during postmaster startup. It will forcibly - * remove any leftover files created by OpenTemporaryFile and any leftover - * temporary relation files created by mdcreate. + * remove any leftover files created by OpenTemporaryFile, any leftover + * temporary relation files created by mdcreate, and anything left + * behind by pg_unlink() on crash. * * During post-backend-crash restart cycle, this routine is called when * remove_temp_files_after_crash GUC is enabled. Multiple crashes while @@ -3448,7 +3479,8 @@ RemovePgTempRelationFilesInDbspace(const char *dbspacedirname) while ((de = ReadDirExtended(dbspace_dir, dbspacedirname, LOG)) != NULL) { - if (!looks_like_temp_rel_name(de->d_name)) + if (!looks_like_temp_rel_name(de->d_name) && + !pg_str_endswith(de->d_name, ".unlinked")) continue; snprintf(rm_path, sizeof(rm_path), "%s/%s", diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 8592d47e95..d519855d59 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -374,7 +374,7 @@ mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo) /* Next unlink the file, unless it was already found to be missing */ if (ret == 0 || errno != ENOENT) { - ret = unlink(path); + ret = pg_unlink(path); if (ret < 0 && errno != ENOENT) ereport(WARNING, (errcode_for_file_access(), @@ -422,7 +422,7 @@ mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo) register_forget_request(rnode, forkNum, segno); } - if (unlink(segpath) < 0) + if (pg_unlink(segpath) < 0) { /* ENOENT is expected after the last segment... */ if (errno != ENOENT) @@ -1753,7 +1753,7 @@ mdunlinkfiletag(const FileTag *ftag, char *path) pfree(p); /* Try to unlink the file. */ - return unlink(path); + return pg_unlink(path); } /* diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index 329c7eba9a..be38f6a979 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -189,6 +189,7 @@ extern ssize_t pg_pwritev_with_retry(int fd, int iovcnt, off_t offset); extern int pg_truncate(const char *path, off_t length); +extern int pg_unlink(const char *path); extern void fsync_fname(const char *fname, bool isdir); extern int fsync_fname_ext(const char *fname, bool isdir, bool ignore_perm, int elevel); extern int durable_rename(const char *oldfile, const char *newfile, int loglevel); -- 2.30.2
From 10768af742ccf3c7dcdc8373a7b7f693edff6b81 Mon Sep 17 00:00:00 2001 From: Thomas Munro <thomas.mu...@gmail.com> Date: Sun, 5 Sep 2021 23:49:23 +1200 Subject: [PATCH] Handle STATUS_DELETE_PENDING on Windows. 1. Update our open() wrapper to check for STATUS_DELETE_PENDING and translate it to appropriate errors. 2. Remove non-working code that intended to do the same thing in our stat() wrapper, and build on top of open() instead. XXX Work in progress, see TODO notes in the code Discussion: https://postgr.es/m/CA%2BhUKGJz_pZTF9mckn6XgSv69%2BjGwdgLkxZ6b3NWGLBCVjqUZA%40mail.gmail.com --- src/include/port.h | 1 + src/port/open.c | 72 +++++++++++------ src/port/win32stat.c | 180 ++++--------------------------------------- 3 files changed, 65 insertions(+), 188 deletions(-) diff --git a/src/include/port.h b/src/include/port.h index 82f63de325..88b2711121 100644 --- a/src/include/port.h +++ b/src/include/port.h @@ -290,6 +290,7 @@ extern bool rmtree(const char *path, bool rmtopdir); * passing of other special options. */ #define O_DIRECT 0x80000000 +#define O_X_STAT 0x40000000 extern int pgwin32_open(const char *, int,...); extern FILE *pgwin32_fopen(const char *, const char *); #define open(a,b,c) pgwin32_open(a,b,c) diff --git a/src/port/open.c b/src/port/open.c index 14c6debba9..de976fc7e3 100644 --- a/src/port/open.c +++ b/src/port/open.c @@ -23,6 +23,34 @@ #include <assert.h> #include <sys/stat.h> +#include <ntstatus.h> + +typedef NTSTATUS (__stdcall *RtlGetLastNtStatus_t)(void); + +static RtlGetLastNtStatus_t pg_RtlGetLastNtStatus; + +static int +initialize_ntdll(void) +{ + HMODULE module; + + if (pg_RtlGetLastNtStatus) + return 0; + module = LoadLibraryEx("ntdll.dll", NULL, 0); + if (!module) + { + _dosmaperr(GetLastError()); + return -1; + } + pg_RtlGetLastNtStatus = (RtlGetLastNtStatus_t) (pg_funcptr_t) + GetProcAddress(module, "RtlGetLastNtStatus"); + if (!pg_RtlGetLastNtStatus) + { + _dosmaperr(GetLastError()); + return -1; + } + return 0; +} static int openFlagsToCreateFileFlags(int openFlags) @@ -66,13 +94,18 @@ pgwin32_open(const char *fileName, int fileFlags,...) SECURITY_ATTRIBUTES sa; int loops = 0; + if (initialize_ntdll() < 0) + return -1; + /* Check that we can handle the request */ assert((fileFlags & ((O_RDONLY | O_WRONLY | O_RDWR) | O_APPEND | (O_RANDOM | O_SEQUENTIAL | O_TEMPORARY) | _O_SHORT_LIVED | O_DSYNC | O_DIRECT | + O_X_STAT | (O_CREAT | O_TRUNC | O_EXCL) | (O_TEXT | O_BINARY))) == fileFlags); #ifndef FRONTEND - Assert(pgwin32_signal_event != NULL); /* small chance of pg_usleep() */ + /* XXX When called by stat very early on, this fails! */ + //Assert(pgwin32_signal_event != NULL); /* small chance of pg_usleep() */ #endif #ifdef FRONTEND @@ -102,6 +135,7 @@ pgwin32_open(const char *fileName, int fileFlags,...) &sa, openFlagsToCreateFileFlags(fileFlags), FILE_ATTRIBUTE_NORMAL | + ((fileFlags & O_X_STAT) ? FILE_FLAG_BACKUP_SEMANTICS : 0) | ((fileFlags & O_RANDOM) ? FILE_FLAG_RANDOM_ACCESS : 0) | ((fileFlags & O_SEQUENTIAL) ? FILE_FLAG_SEQUENTIAL_SCAN : 0) | ((fileFlags & _O_SHORT_LIVED) ? FILE_ATTRIBUTE_TEMPORARY : 0) | @@ -140,32 +174,20 @@ pgwin32_open(const char *fileName, int fileFlags,...) /* * ERROR_ACCESS_DENIED is returned if the file is deleted but not yet * gone (Windows NT status code is STATUS_DELETE_PENDING). In that - * case we want to wait a bit and try again, giving up after 1 second - * (since this condition should never persist very long). However, - * there are other commonly-hit cases that return ERROR_ACCESS_DENIED, - * so care is needed. In particular that happens if we try to open a - * directory, or of course if there's an actual file-permissions - * problem. To distinguish these cases, try a stat(). In the - * delete-pending case, it will either also get STATUS_DELETE_PENDING, - * or it will see the file as gone and fail with ENOENT. In other - * cases it will usually succeed. The only somewhat-likely case where - * this coding will uselessly wait is if there's a permissions problem - * with a containing directory, which we hope will never happen in any - * performance-critical code paths. + * case, we'd better ask for the NT status too so we can translate it + * to a more Unix-like error. + * + * If there's no O_CREAT flag, then we'll pretend the file is + * invisible. With O_CREAT, we have no choice but to report that + * there's a file in the way (which wouldn't happen on Unix). */ - if (err == ERROR_ACCESS_DENIED) + if (err == ERROR_ACCESS_DENIED && + pg_RtlGetLastNtStatus() == STATUS_DELETE_PENDING) { - if (loops < 10) - { - struct stat st; - - if (stat(fileName, &st) != 0) - { - pg_usleep(100000); - loops++; - continue; - } - } + if (fileFlags & O_CREAT) + err = ERROR_FILE_EXISTS; + else + err = ERROR_FILE_NOT_FOUND; } _dosmaperr(err); diff --git a/src/port/win32stat.c b/src/port/win32stat.c index 2ad8ee1359..efe5c3221c 100644 --- a/src/port/win32stat.c +++ b/src/port/win32stat.c @@ -18,53 +18,6 @@ #include "c.h" #include <windows.h> -/* - * In order to support MinGW and MSVC2013 we use NtQueryInformationFile as an - * alternative for GetFileInformationByHandleEx. It is loaded from the ntdll - * library. - */ -#if _WIN32_WINNT < 0x0600 -#include <winternl.h> - -#if !defined(__MINGW32__) && !defined(__MINGW64__) -/* MinGW includes this in <winternl.h>, but it is missing in MSVC */ -typedef struct _FILE_STANDARD_INFORMATION -{ - LARGE_INTEGER AllocationSize; - LARGE_INTEGER EndOfFile; - ULONG NumberOfLinks; - BOOLEAN DeletePending; - BOOLEAN Directory; -} FILE_STANDARD_INFORMATION; -#define FileStandardInformation 5 -#endif /* !defined(__MINGW32__) && - * !defined(__MINGW64__) */ - -typedef NTSTATUS (NTAPI * PFN_NTQUERYINFORMATIONFILE) - (IN HANDLE FileHandle, - OUT PIO_STATUS_BLOCK IoStatusBlock, - OUT PVOID FileInformation, - IN ULONG Length, - IN FILE_INFORMATION_CLASS FileInformationClass); - -static PFN_NTQUERYINFORMATIONFILE _NtQueryInformationFile = NULL; - -static HMODULE ntdll = NULL; - -/* - * Load DLL file just once regardless of how many functions we load/call in it. - */ -static void -LoadNtdll(void) -{ - if (ntdll != NULL) - return; - ntdll = LoadLibraryEx("ntdll.dll", NULL, 0); -} - -#endif /* _WIN32_WINNT < 0x0600 */ - - /* * Convert a FILETIME struct into a 64 bit time_t. */ @@ -161,125 +114,31 @@ fileinfo_to_stat(HANDLE hFile, struct stat *buf) int _pgstat64(const char *name, struct stat *buf) { + int fd; + int rc; + int save_errno; + /* - * We must use a handle so lstat() returns the information of the target - * file. To have a reliable test for ERROR_DELETE_PENDING, we use - * NtQueryInformationFile from Windows 2000 or - * GetFileInformationByHandleEx from Server 2008 / Vista. + * Our open wrapper will report STATUS_DELETE_PENDING as ENOENT. We pass + * in a special private flag to say that it's _pgstat64() calling, to + * activate a mode that allows directories to be opened for limited + * purposes. + * + * XXX Think about fd pressure, since we're opening an fd? */ - SECURITY_ATTRIBUTES sa; - HANDLE hFile; - int ret; -#if _WIN32_WINNT < 0x0600 - IO_STATUS_BLOCK ioStatus; - FILE_STANDARD_INFORMATION standardInfo; -#else - FILE_STANDARD_INFO standardInfo; -#endif - - if (name == NULL || buf == NULL) - { - errno = EINVAL; - return -1; - } + fd = pgwin32_open(name, O_RDONLY | O_X_STAT, 0); - /* fast not-exists check */ - if (GetFileAttributes(name) == INVALID_FILE_ATTRIBUTES) - { - _dosmaperr(GetLastError()); + if (fd < 0) return -1; - } - /* get a file handle as lightweight as we can */ - sa.nLength = sizeof(SECURITY_ATTRIBUTES); - sa.bInheritHandle = TRUE; - sa.lpSecurityDescriptor = NULL; - hFile = CreateFile(name, - GENERIC_READ, - (FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE), - &sa, - OPEN_EXISTING, - (FILE_FLAG_NO_BUFFERING | FILE_FLAG_BACKUP_SEMANTICS | - FILE_FLAG_OVERLAPPED), - NULL); - if (hFile == INVALID_HANDLE_VALUE) - { - DWORD err = GetLastError(); + rc = _pgfstat64(fd, buf); + save_errno = errno; - CloseHandle(hFile); - _dosmaperr(err); - return -1; - } - - memset(&standardInfo, 0, sizeof(standardInfo)); - -#if _WIN32_WINNT < 0x0600 - if (_NtQueryInformationFile == NULL) - { - /* First time through: load ntdll.dll and find NtQueryInformationFile */ - LoadNtdll(); - if (ntdll == NULL) - { - DWORD err = GetLastError(); - - CloseHandle(hFile); - _dosmaperr(err); - return -1; - } - - _NtQueryInformationFile = (PFN_NTQUERYINFORMATIONFILE) (pg_funcptr_t) - GetProcAddress(ntdll, "NtQueryInformationFile"); - if (_NtQueryInformationFile == NULL) - { - DWORD err = GetLastError(); - - CloseHandle(hFile); - _dosmaperr(err); - return -1; - } - } + close(fd); - if (!NT_SUCCESS(_NtQueryInformationFile(hFile, &ioStatus, &standardInfo, - sizeof(standardInfo), - FileStandardInformation))) - { - DWORD err = GetLastError(); + errno = save_errno; - CloseHandle(hFile); - _dosmaperr(err); - return -1; - } -#else - if (!GetFileInformationByHandleEx(hFile, FileStandardInfo, &standardInfo, - sizeof(standardInfo))) - { - DWORD err = GetLastError(); - - CloseHandle(hFile); - _dosmaperr(err); - return -1; - } -#endif /* _WIN32_WINNT < 0x0600 */ - - if (standardInfo.DeletePending) - { - /* - * File has been deleted, but is not gone from the filesystem yet. - * This can happen when some process with FILE_SHARE_DELETE has it - * open, and it will be fully removed once that handle is closed. - * Meanwhile, we can't open it, so indicate that the file just doesn't - * exist. - */ - CloseHandle(hFile); - errno = ENOENT; - return -1; - } - - /* At last we can invoke fileinfo_to_stat */ - ret = fileinfo_to_stat(hFile, buf); - - CloseHandle(hFile); - return ret; + return rc; } /* @@ -296,11 +155,6 @@ _pgfstat64(int fileno, struct stat *buf) return -1; } - /* - * Since we already have a file handle there is no need to check for - * ERROR_DELETE_PENDING. - */ - return fileinfo_to_stat(hFile, buf); } -- 2.30.2