> grep snapshot: > http://meyering.net/grep/grep-ss.tar.xz 1.4 MB > http://meyering.net/grep/grep-ss.tar.xz.sig > http://meyering.net/grep/grep-2.26.39-ae3f.tar.xz
This release, built for mingw, is hardly usable: - 33 out of 107 tests fail, - A simple "grep.exe o xx > yy" fails with error grep.exe: input file 'xx' is also the output More details: - This happens both in a Cygwin mintty.exe window and in a cmd.exe window. - It's the same for 32-bit mingw builds and 64-bit mingw builds (recipe: http://git.savannah.gnu.org/gitweb/?p=gperf.git;a=blob_plain;f=README.windows;hb=HEAD ) - The error is signalled in grep.c:1874. At this point, 'st' (of type 'struct _stat64') contains { st_dev = 0, st_ino = 0, st_mode = 0x81B6 = _S_IFREG | _S_IREAD | _S_IWRITE | 0x36, st_nlink = 1, st_uid = 0, st_gid = 0, st_rdev = 0, st_size = 4, st_atime = 1481099615, st_mtime = 1481099615, st_ctime = 1481099615 } Obviously, such a struct cannot reliably distinguish two different regular files. In other words, SAME_INODE cannot work. - So, how do you determine identity of files in Windows? http://stackoverflow.com/questions/562701/best-way-to-determine-if-two-path-reference-to-same-file-in-windows But even this is wrong, the use of a BY_HANDLE_FILE_INFORMATION is not sufficient because it contains only 64-bit identifiers for files. See https://msdn.microsoft.com/en-us/library/windows/desktop/aa363788(v=vs.85).aspx The best approach is to use GetFileInformationByHandleEx to produce a FILE_ID_INFO. Find attached a proof-of-concept patch. (Really rough - needs -D_WIN32_WINNT=_WIN32_WINNT_WIN8, and lacks good error handling.) With it, I get: $ ./grep.exe o xx > yy $ ./grep.exe o xx > xx grep.exe: input file 'xx' is also the output That is, now the detection of identical regular files works. How can we go forward from here? I would propose a gnulib module that defines a data structure that combines a 'struct stat' with the FILE_ID_INFO for native Windows, and rebase the 'same-inode' module on it. The other approach, to override mingw's 'struct stat' and stat/fstat/lstat() functions, would imply a performance hit to all stat calls, even those that don't want to access the st_ino field. Bruno
--- grep.c.orig 2016-11-21 18:31:31.000000000 +0100 +++ grep.c 2016-12-09 16:12:51.294888100 +0100 @@ -27,6 +27,11 @@ #include <stdarg.h> #include <stdio.h> #include "system.h" +#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ +# define WIN32_LEAN_AND_MEAN /* avoid conflict due to DATADIR */ +# include <io.h> +# include <windows.h> +#endif #include "argmatch.h" #include "c-ctype.h" @@ -62,6 +67,9 @@ information here, so that we can automatically skip it, thus avoiding a potential (racy) infinite loop. */ static struct stat out_stat; +#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ +static FILE_ID_INFO out_id; +#endif /* if non-zero, display usage information and exit */ static int show_help; @@ -1868,13 +1876,26 @@ input==output, while there is no risk of infloop, there is a race condition that could result in "alternate" output. */ if (!out_quiet && list_files == LISTFILES_NONE && 1 < max_count - && S_ISREG (st.st_mode) && SAME_INODE (st, out_stat)) + && S_ISREG (st.st_mode)) { - if (! suppress_errors) - error (0, 0, _("input file %s is also the output"), - quote (input_filename ())); - errseen = true; - goto closeout; +#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ + FILE_ID_INFO desc_id; + if (!GetFileInformationByHandleEx (_get_osfhandle (desc), FileIdInfo, &desc_id, sizeof (desc_id))) + { + fprintf (stderr, "GetFileInformationByHandleEx failed -> %d\n", GetLastError ()); + } + if (desc_id.VolumeSerialNumber == out_id.VolumeSerialNumber + && memcmp (&desc_id.FileId, &out_id.FileId, sizeof (FILE_ID_128)) == 0) +#else + if (SAME_INODE (st, out_stat)) +#endif + { + if (! suppress_errors) + error (0, 0, _("input file %s is also the output"), + quote (input_filename ())); + errseen = true; + goto closeout; + } } /* Set input to binary mode. Pipes are simulated with files @@ -2763,7 +2784,15 @@ if (! exit_on_match && fstat (STDOUT_FILENO, &tmp_stat) == 0) { if (S_ISREG (tmp_stat.st_mode)) - out_stat = tmp_stat; + { + out_stat = tmp_stat; +#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ + if (!GetFileInformationByHandleEx (_get_osfhandle (STDOUT_FILENO), FileIdInfo, &out_id, sizeof (out_id))) + { + fprintf (stderr, "GetFileInformationByHandleEx failed -> %d\n", GetLastError ()); + } +#endif + } else if (S_ISCHR (tmp_stat.st_mode)) { struct stat null_stat;