2017-04-29 Bruno Haible <br...@clisp.org> stat: Fix time_t values and other problems on native Windows platforms. * doc/posix-functions/stat.texi: Mention the problem with the Microsoft implementations of stat(). * lib/stat.c: Include filename.h instead of dosname.h. Include malloca.h, stat-w32.h. (is_unc_root): New function. (rpl_stat): New implementation for native Windows. Remove REPLACE_FUNC_STAT_DIR code. * m4/stat.m4 (gl_FUNC_STAT): On native Windows, set REPLACE_STAT always. Don't define REPLACE_FUNC_STAT_DIR. (gl_PREREQ_STAT): Require gl_HEADER_SYS_STAT_H. * modules/stat (Files): Add lib/stat-w32.h, lib/stat-w32.c. (Depends-on): Remove dosname. Add filename, malloca. (configure.ac): Also compile lib/stat-w32.c.
diff --git a/doc/posix-functions/stat.texi b/doc/posix-functions/stat.texi index 33af95d..c30e7e1 100644 --- a/doc/posix-functions/stat.texi +++ b/doc/posix-functions/stat.texi @@ -13,6 +13,15 @@ On platforms where @code{off_t} is a 32-bit type, @code{stat} may not correctly report the size of files or block devices larger than 2 GB. (Cf. @code{AC_SYS_LARGEFILE}.) @item +The @code{st_atime}, @code{st_ctime}, @code{st_mtime} field are affected by +the current time zone and by the DST flag of the current time zone on some +platforms: +mingw, MSVC 14 (when the environment variable @code{TZ} is set). +@item +On MSVC 14, this function fails with error @code{ENOENT} +on files such as @samp{C:\pagefile.sys} and +on directories such as @samp{C:\System Volume Information}. +@item On some platforms, @code{stat("link-to-file/",buf)} succeeds instead of failing with @code{ENOTDIR}. FreeBSD 7.2, AIX 7.1, Solaris 9, mingw64. diff --git a/lib/stat.c b/lib/stat.c index 7e7c2f6..1954449 100644 --- a/lib/stat.c +++ b/lib/stat.c @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ -/* written by Eric Blake */ +/* Written by Eric Blake and Bruno Haible. */ /* If the user's config.h happens to include <sys/stat.h>, let it include only the system's <sys/stat.h> here, so that orig_stat doesn't recurse to @@ -28,6 +28,7 @@ #undef __need_system_sys_stat_h #if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ +# define WINDOWS_NATIVE # if _GL_WINDOWS_64_BIT_ST_SIZE # undef stat /* avoid warning on mingw64 with _FILE_OFFSET_BITS=64 */ # define stat _stati64 @@ -42,12 +43,16 @@ # endif #endif +#if !defined WINDOWS_NATIVE + static int orig_stat (const char *filename, struct stat *buf) { return stat (filename, buf); } +#endif + /* Specification. */ /* Write "sys/stat.h" here, not <sys/stat.h>, otherwise OSF/1 5.1 DTK cc eliminates this include because of the preliminary #include <sys/stat.h> @@ -58,9 +63,16 @@ orig_stat (const char *filename, struct stat *buf) #include <limits.h> #include <stdbool.h> #include <string.h> -#include "dosname.h" +#include "filename.h" +#include "malloca.h" #include "verify.h" +#ifdef WINDOWS_NATIVE +# define WIN32_LEAN_AND_MEAN +# include <windows.h> +# include "stat-w32.h" +#endif + #if REPLACE_FUNC_STAT_DIR # include "pathmax.h" /* The only known systems where REPLACE_FUNC_STAT_DIR is needed also @@ -70,6 +82,34 @@ orig_stat (const char *filename, struct stat *buf) # endif #endif +#ifdef WINDOWS_NATIVE +/* Return TRUE if the given file name denotes an UNC root. */ +static BOOL +is_unc_root (const char *rname) +{ + /* Test whether it has the syntax '\\server\share'. */ + if (ISSLASH (rname[0]) && ISSLASH (rname[1])) + { + /* It starts with two slashes. Find the next slash. */ + const char *p = rname + 2; + const char *q = p; + while (*q != '\0' && !ISSLASH (*q)) + q++; + if (q > p && *q != '\0') + { + /* Found the next slash at q. */ + q++; + const char *r = q; + while (*r != '\0' && !ISSLASH (*r)) + r++; + if (r > q && *r == '\0') + return TRUE; + } + } + return FALSE; +} +#endif + /* Store information about NAME into ST. Work around bugs with trailing slashes. Mingw has other bugs (such as st_ino always being 0 on success) which this wrapper does not work around. But @@ -77,62 +117,296 @@ orig_stat (const char *filename, struct stat *buf) correctly. */ int -rpl_stat (char const *name, struct stat *st) +rpl_stat (char const *name, struct stat *buf) { - int result = orig_stat (name, st); -#if REPLACE_FUNC_STAT_FILE - /* Solaris 9 mistakenly succeeds when given a non-directory with a - trailing slash. */ - if (result == 0 && !S_ISDIR (st->st_mode)) +#ifdef WINDOWS_NATIVE + /* Fill the fields ourselves, because the original stat function returns + values for st_atime, st_mtime, st_ctime that depend on the current time + zone. See + <https://lists.gnu.org/archive/html/bug-gnulib/2017-04/msg00134.html> */ + /* XXX Should we convert to wchar_t* and prepend '\\?\', in order to work + around length limitations + <https://msdn.microsoft.com/en-us/library/aa365247.aspx> ? */ + + /* POSIX <http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13> + specifies: "More than two leading <slash> characters shall be treated as + a single <slash> character." */ + if (ISSLASH (name[0]) && ISSLASH (name[1]) && ISSLASH (name[2])) { - size_t len = strlen (name); - if (ISSLASH (name[len - 1])) + name += 2; + while (ISSLASH (name[1])) + name++; + } + + size_t len = strlen (name); + size_t drive_prefix_len = (HAS_DEVICE (name) ? 2 : 0); + + /* Remove trailing slashes (except the very first one, at position + drive_prefix_len), but remember their presence. */ + size_t rlen; + bool check_dir = false; + + rlen = len; + while (rlen > drive_prefix_len && ISSLASH (name[rlen-1])) + { + check_dir = true; + if (rlen == drive_prefix_len + 1) + break; + rlen--; + } + + /* Handle '' and 'C:'. */ + if (!check_dir && rlen == drive_prefix_len) + { + errno = ENOENT; + return -1; + } + + /* Handle '\\'. */ + if (rlen == 1 && ISSLASH (name[0]) && len >= 2) + { + errno = ENOENT; + return -1; + } + + const char *rname; + char *malloca_rname; + if (rlen == len) + { + rname = name; + malloca_rname = NULL; + } + else + { + malloca_rname = malloca (rlen + 1); + if (malloca_rname == NULL) { - errno = ENOTDIR; + errno = ENOMEM; return -1; } + memcpy (malloca_rname, name, rlen); + malloca_rname[rlen] = '\0'; + rname = malloca_rname; } -#endif /* REPLACE_FUNC_STAT_FILE */ -#if REPLACE_FUNC_STAT_DIR - if (result == -1 && errno == ENOENT) + /* There are two ways to get at the requested information: + - by scanning the parent directory and examining the relevant + directory entry, + - by opening the file directly. + The first approach fails for root directories (e.g. 'C:\') and + UNC root directories (e.g. '\\server\share'). + The second approach fails for some system files (e.g. 'C:\pagefile.sys' + and 'C:\hiberfil.sys'): ERROR_SHARING_VIOLATION. + So we use the first approach for nearly all files, and the second one + only for root and UNC root directories. */ + { + int ret; + if (!((rlen == drive_prefix_len + 1 && ISSLASH (rname[drive_prefix_len])) + || is_unc_root (rname))) + { + /* Approach based on the directory entry. */ + + if (strchr (rname, '?') != NULL || strchr (rname, '*') != NULL) + { + /* Other Windows API functions would fail with error + ERROR_INVALID_NAME. */ + if (malloca_rname != NULL) + freea (malloca_rname); + errno = ENOENT; + return -1; + } + + /* Get the details about the directory entry. */ + WIN32_FIND_DATA info; + HANDLE h = FindFirstFile (rname, &info); + if (h == INVALID_HANDLE_VALUE) + goto failed; + + /* Test for error conditions before starting to fill *buf. */ + if (sizeof (buf->st_size) <= 4 && info.nFileSizeHigh > 0) + { + FindClose (h); + if (malloca_rname != NULL) + freea (malloca_rname); + errno = EOVERFLOW; + return -1; + } + + /* st_ino is not wide enough for identifying a file on a device. + Without st_ino, st_dev is pointless. */ + buf->st_dev = 0; + buf->st_ino = 0; + + /* st_mode. */ + unsigned int mode = + /* XXX How to handle FILE_ATTRIBUTE_REPARSE_POINT ? */ + ((info.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) ? _S_IFDIR | S_IEXEC_UGO : _S_IFREG) + | S_IREAD_UGO + | ((info.dwFileAttributes & FILE_ATTRIBUTE_READONLY) ? 0 : S_IWRITE_UGO); + if (!(info.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) + { + /* Determine whether the file is executable by looking at the file + name suffix. */ + if (info.nFileSizeHigh > 0 || info.nFileSizeLow > 0) + { + const char *last_dot = NULL; + const char *p; + for (p = info.cFileName; *p != '\0'; p++) + if (*p == '.') + last_dot = p; + if (last_dot != NULL) + { + const char *suffix = last_dot + 1; + if (_stricmp (suffix, "exe") == 0 + || _stricmp (suffix, "bat") == 0 + || _stricmp (suffix, "cmd") == 0 + || _stricmp (suffix, "com") == 0) + mode |= S_IEXEC_UGO; + } + } + } + buf->st_mode = mode; + + /* st_nlink. Ignore hard links here. */ + buf->st_nlink = 1; + + /* There's no easy way to map the Windows SID concept to an integer. */ + buf->st_uid = 0; + buf->st_gid = 0; + + /* st_rdev is irrelevant for normal files and directories. */ + buf->st_rdev = 0; + + /* st_size. */ + if (sizeof (buf->st_size) <= 4) + /* Range check already done above. */ + buf->st_size = info.nFileSizeLow; + else + buf->st_size = ((long long) info.nFileSizeHigh << 32) | (long long) info.nFileSizeLow; + + /* st_atime, st_mtime, st_ctime. */ + buf->st_atime = _gl_convert_FILETIME_to_POSIX (&info.ftLastAccessTime); + buf->st_mtime = _gl_convert_FILETIME_to_POSIX (&info.ftLastWriteTime); + buf->st_ctime = _gl_convert_FILETIME_to_POSIX (&info.ftCreationTime); + + FindClose (h); + + ret = 0; + } + else + { + /* Approach based on the file. */ + + /* Open a handle to the file. + CreateFile + <https://msdn.microsoft.com/en-us/library/aa363858.aspx> + <https://msdn.microsoft.com/en-us/library/aa363874.aspx> */ + HANDLE h = + CreateFile (rname, + FILE_READ_ATTRIBUTES, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + NULL, + OPEN_EXISTING, + /* FILE_FLAG_POSIX_SEMANTICS (treat file names that differ only + in case as different) makes sense only when applied to *all* + filesystem operations. */ + FILE_FLAG_BACKUP_SEMANTICS /* | FILE_FLAG_POSIX_SEMANTICS */, + NULL); + if (h == INVALID_HANDLE_VALUE) + goto failed; + + ret = _gl_fstat_by_handle (h, rname, buf); + CloseHandle (h); + } + + if (ret >= 0 && check_dir && !S_ISDIR (buf->st_mode)) + { + errno = ENOTDIR; + ret = -1; + } + if (malloca_rname != NULL) + { + int saved_errno = errno; + freea (malloca_rname); + errno = saved_errno; + } + return ret; + } + + failed: + { + DWORD error = GetLastError (); + #if 0 + fprintf (stderr, "rpl_stat error 0x%x\n", (unsigned int) error); + #endif + + if (malloca_rname != NULL) + freea (malloca_rname); + + switch (error) + { + /* Some of these errors probably cannot happen with the specific flags + that we pass to CreateFile. But who knows... */ + case ERROR_FILE_NOT_FOUND: /* The last component of rname does not exist. */ + case ERROR_PATH_NOT_FOUND: /* Some directory component in rname does not exist. */ + case ERROR_BAD_PATHNAME: /* rname is such as '\\server'. */ + case ERROR_BAD_NET_NAME: /* rname is such as '\\server\nonexistentshare'. */ + case ERROR_INVALID_NAME: /* rname contains wildcards, misplaced colon, etc. */ + case ERROR_DIRECTORY: + errno = ENOENT; + break; + + case ERROR_ACCESS_DENIED: /* rname is such as 'C:\System Volume Information\foo'. */ + case ERROR_SHARING_VIOLATION: /* rname is such as 'C:\pagefile.sys' (second approach only). */ + /* XXX map to EACCESS or EPERM? */ + errno = EACCES; + break; + + case ERROR_OUTOFMEMORY: + errno = ENOMEM; + break; + + case ERROR_WRITE_PROTECT: + errno = EROFS; + break; + + case ERROR_WRITE_FAULT: + case ERROR_READ_FAULT: + case ERROR_GEN_FAILURE: + errno = EIO; + break; + + case ERROR_BUFFER_OVERFLOW: + case ERROR_FILENAME_EXCED_RANGE: + errno = ENAMETOOLONG; + break; + + case ERROR_DELETE_PENDING: /* XXX map to EACCESS or EPERM? */ + errno = EPERM; + break; + + default: + errno = EINVAL; + break; + } + + return -1; + } +#else + int result = orig_stat (name, buf); +# if REPLACE_FUNC_STAT_FILE + /* Solaris 9 mistakenly succeeds when given a non-directory with a + trailing slash. */ + if (result == 0 && !S_ISDIR (buf->st_mode)) { - /* Due to mingw's oddities, there are some directories (like - c:\) where stat() only succeeds with a trailing slash, and - other directories (like c:\windows) where stat() only - succeeds without a trailing slash. But we want the two to be - synonymous, since chdir() manages either style. Likewise, Mingw also - reports ENOENT for names longer than PATH_MAX, when we want - ENAMETOOLONG, and for stat("file/"), when we want ENOTDIR. - Fortunately, mingw PATH_MAX is small enough for stack - allocation. */ - char fixed_name[PATH_MAX + 1] = {0}; size_t len = strlen (name); - bool check_dir = false; - verify (PATH_MAX <= 4096); - if (PATH_MAX <= len) - errno = ENAMETOOLONG; - else if (len) + if (ISSLASH (name[len - 1])) { - strcpy (fixed_name, name); - if (ISSLASH (fixed_name[len - 1])) - { - check_dir = true; - while (len && ISSLASH (fixed_name[len - 1])) - fixed_name[--len] = '\0'; - if (!len) - fixed_name[0] = '/'; - } - else - fixed_name[len++] = '/'; - result = orig_stat (fixed_name, st); - if (result == 0 && check_dir && !S_ISDIR (st->st_mode)) - { - result = -1; - errno = ENOTDIR; - } + errno = ENOTDIR; + return -1; } } -#endif /* REPLACE_FUNC_STAT_DIR */ +# endif /* REPLACE_FUNC_STAT_FILE */ return result; +#endif } diff --git a/m4/stat.m4 b/m4/stat.m4 index 9ff77df..b3172d8 100644 --- a/m4/stat.m4 +++ b/m4/stat.m4 @@ -1,4 +1,4 @@ -# serial 11 +# serial 12 # Copyright (C) 2009-2017 Free Software Foundation, Inc. # @@ -11,61 +11,57 @@ AC_DEFUN([gl_FUNC_STAT], AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles AC_REQUIRE([gl_SYS_STAT_H_DEFAULTS]) AC_CHECK_FUNCS_ONCE([lstat]) - dnl mingw is the only known platform where stat(".") and stat("./") differ - AC_CACHE_CHECK([whether stat handles trailing slashes on directories], - [gl_cv_func_stat_dir_slash], - [AC_RUN_IFELSE( - [AC_LANG_PROGRAM( - [[#include <sys/stat.h> -]], [[struct stat st; return stat (".", &st) != stat ("./", &st);]])], - [gl_cv_func_stat_dir_slash=yes], [gl_cv_func_stat_dir_slash=no], - [case $host_os in - mingw*) gl_cv_func_stat_dir_slash="guessing no";; - *) gl_cv_func_stat_dir_slash="guessing yes";; - esac])]) - dnl AIX 7.1, Solaris 9, mingw64 mistakenly succeed on stat("file/"). - dnl (For mingw, this is due to a broken stat() override in libmingwex.a.) - dnl FreeBSD 7.2 mistakenly succeeds on stat("link-to-file/"). - AC_CACHE_CHECK([whether stat handles trailing slashes on files], - [gl_cv_func_stat_file_slash], - [touch conftest.tmp - # Assume that if we have lstat, we can also check symlinks. - if test $ac_cv_func_lstat = yes; then - ln -s conftest.tmp conftest.lnk - fi - AC_RUN_IFELSE( - [AC_LANG_PROGRAM( - [[#include <sys/stat.h> + case "$host_os" in + mingw*) + dnl On this platform, the original stat() returns st_atime, st_mtime, + dnl st_ctime values that are affected by the time zone. + REPLACE_STAT=1 + ;; + *) + dnl AIX 7.1, Solaris 9, mingw64 mistakenly succeed on stat("file/"). + dnl (For mingw, this is due to a broken stat() override in libmingwex.a.) + dnl FreeBSD 7.2 mistakenly succeeds on stat("link-to-file/"). + AC_CACHE_CHECK([whether stat handles trailing slashes on files], + [gl_cv_func_stat_file_slash], + [touch conftest.tmp + # Assume that if we have lstat, we can also check symlinks. + if test $ac_cv_func_lstat = yes; then + ln -s conftest.tmp conftest.lnk + fi + AC_RUN_IFELSE( + [AC_LANG_PROGRAM( + [[#include <sys/stat.h> ]], [[int result = 0; - struct stat st; - if (!stat ("conftest.tmp/", &st)) - result |= 1; + struct stat st; + if (!stat ("conftest.tmp/", &st)) + result |= 1; #if HAVE_LSTAT - if (!stat ("conftest.lnk/", &st)) - result |= 2; + if (!stat ("conftest.lnk/", &st)) + result |= 2; #endif - return result; - ]])], - [gl_cv_func_stat_file_slash=yes], [gl_cv_func_stat_file_slash=no], - [case "$host_os" in - # Guess yes on glibc systems. - *-gnu*) gl_cv_func_stat_file_slash="guessing yes" ;; - # If we don't know, assume the worst. - *) gl_cv_func_stat_file_slash="guessing no" ;; - esac - ]) - rm -f conftest.tmp conftest.lnk]) - case $gl_cv_func_stat_dir_slash in - *no) REPLACE_STAT=1 - AC_DEFINE([REPLACE_FUNC_STAT_DIR], [1], [Define to 1 if stat needs - help when passed a directory name with a trailing slash]);; - esac - case $gl_cv_func_stat_file_slash in - *no) REPLACE_STAT=1 - AC_DEFINE([REPLACE_FUNC_STAT_FILE], [1], [Define to 1 if stat needs - help when passed a file name with a trailing slash]);; + return result; + ]])], + [gl_cv_func_stat_file_slash=yes], [gl_cv_func_stat_file_slash=no], + [case "$host_os" in + # Guess yes on glibc systems. + *-gnu*) gl_cv_func_stat_file_slash="guessing yes" ;; + # If we don't know, assume the worst. + *) gl_cv_func_stat_file_slash="guessing no" ;; + esac + ]) + rm -f conftest.tmp conftest.lnk]) + case $gl_cv_func_stat_file_slash in + *no) + REPLACE_STAT=1 + AC_DEFINE([REPLACE_FUNC_STAT_FILE], [1], [Define to 1 if stat needs + help when passed a file name with a trailing slash]);; + esac + ;; esac ]) -# Prerequisites of lib/stat.c. -AC_DEFUN([gl_PREREQ_STAT], [:]) +# Prerequisites of lib/stat.c and lib/stat-w32.c. +AC_DEFUN([gl_PREREQ_STAT], [ + AC_REQUIRE([gl_HEADER_SYS_STAT_H]) + : +]) diff --git a/modules/stat b/modules/stat index e309d73..de9e7cd 100644 --- a/modules/stat +++ b/modules/stat @@ -3,12 +3,15 @@ stat() function: query file information Files: lib/stat.c +lib/stat-w32.h +lib/stat-w32.c m4/stat.m4 Depends-on: sys_stat largefile -dosname [test $REPLACE_STAT = 1] +filename [test $REPLACE_STAT = 1] +malloca [test $REPLACE_STAT = 1] pathmax [test $REPLACE_STAT = 1] stdbool [test $REPLACE_STAT = 1] verify [test $REPLACE_STAT = 1] @@ -17,6 +20,7 @@ configure.ac: gl_FUNC_STAT if test $REPLACE_STAT = 1; then AC_LIBOBJ([stat]) + AC_LIBOBJ([stat-w32]) gl_PREREQ_STAT fi gl_SYS_STAT_MODULE_INDICATOR([stat])