> grep snapshot:
>   http://meyering.net/grep/grep-ss.tar.xz      1.4 MB
>   http://meyering.net/grep/grep-ss.tar.xz.sig
>   http://meyering.net/grep/grep-2.26.39-ae3f.tar.xz

This release, built for mingw, is hardly usable:
  - 33 out of 107 tests fail,
  - A simple "grep.exe o xx > yy" fails with error
    grep.exe: input file 'xx' is also the output

More details:
- This happens both in a Cygwin mintty.exe window and in a cmd.exe window.
- It's the same for 32-bit mingw builds and 64-bit mingw builds
  (recipe: 
http://git.savannah.gnu.org/gitweb/?p=gperf.git;a=blob_plain;f=README.windows;hb=HEAD
 )
- The error is signalled in grep.c:1874.
  At this point, 'st' (of type 'struct _stat64') contains
    { st_dev = 0, st_ino = 0,
      st_mode = 0x81B6 = _S_IFREG | _S_IREAD | _S_IWRITE | 0x36,
      st_nlink = 1,
      st_uid = 0, st_gid = 0, st_rdev = 0, st_size = 4,
      st_atime = 1481099615, st_mtime = 1481099615, st_ctime = 1481099615 }
  Obviously, such a struct cannot reliably distinguish two different regular 
files.
  In other words, SAME_INODE cannot work.
- So, how do you determine identity of files in Windows?
  
http://stackoverflow.com/questions/562701/best-way-to-determine-if-two-path-reference-to-same-file-in-windows
  But even this is wrong, the use of a BY_HANDLE_FILE_INFORMATION
  is not sufficient because it contains only 64-bit identifiers for
  files. See 
https://msdn.microsoft.com/en-us/library/windows/desktop/aa363788(v=vs.85).aspx
  The best approach is to use GetFileInformationByHandleEx to produce a
  FILE_ID_INFO.

Find attached a proof-of-concept patch. (Really rough - needs
-D_WIN32_WINNT=_WIN32_WINNT_WIN8, and lacks good error handling.)

With it, I get:
$ ./grep.exe o xx > yy
$ ./grep.exe o xx > xx
grep.exe: input file 'xx' is also the output

That is, now the detection of identical regular files works.

How can we go forward from here? I would propose a gnulib module that defines
a data structure that combines a 'struct stat' with the FILE_ID_INFO for native
Windows, and rebase the 'same-inode' module on it.

The other approach, to override mingw's 'struct stat' and stat/fstat/lstat()
functions, would imply a performance hit to all stat calls, even those that
don't want to access the st_ino field.

Bruno

--- grep.c.orig	2016-11-21 18:31:31.000000000 +0100
+++ grep.c	2016-12-09 16:12:51.294888100 +0100
@@ -27,6 +27,11 @@
 #include <stdarg.h>
 #include <stdio.h>
 #include "system.h"
+#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
+# define WIN32_LEAN_AND_MEAN /* avoid conflict due to DATADIR */
+# include <io.h>
+# include <windows.h>
+#endif
 
 #include "argmatch.h"
 #include "c-ctype.h"
@@ -62,6 +67,9 @@
    information here, so that we can automatically skip it, thus
    avoiding a potential (racy) infinite loop.  */
 static struct stat out_stat;
+#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
+static FILE_ID_INFO out_id;
+#endif
 
 /* if non-zero, display usage information and exit */
 static int show_help;
@@ -1868,13 +1876,26 @@
      input==output, while there is no risk of infloop, there is a race
      condition that could result in "alternate" output.  */
   if (!out_quiet && list_files == LISTFILES_NONE && 1 < max_count
-      && S_ISREG (st.st_mode) && SAME_INODE (st, out_stat))
+      && S_ISREG (st.st_mode))
     {
-      if (! suppress_errors)
-        error (0, 0, _("input file %s is also the output"),
-               quote (input_filename ()));
-      errseen = true;
-      goto closeout;
+#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
+      FILE_ID_INFO desc_id;
+      if (!GetFileInformationByHandleEx (_get_osfhandle (desc), FileIdInfo, &desc_id, sizeof (desc_id)))
+        {
+          fprintf (stderr, "GetFileInformationByHandleEx failed -> %d\n", GetLastError ());
+        }
+      if (desc_id.VolumeSerialNumber == out_id.VolumeSerialNumber
+          && memcmp (&desc_id.FileId, &out_id.FileId, sizeof (FILE_ID_128)) == 0)
+#else
+      if (SAME_INODE (st, out_stat))
+#endif
+        {
+          if (! suppress_errors)
+            error (0, 0, _("input file %s is also the output"),
+                   quote (input_filename ()));
+          errseen = true;
+          goto closeout;
+        }
     }
 
   /* Set input to binary mode.  Pipes are simulated with files
@@ -2763,7 +2784,15 @@
   if (! exit_on_match && fstat (STDOUT_FILENO, &tmp_stat) == 0)
     {
       if (S_ISREG (tmp_stat.st_mode))
-        out_stat = tmp_stat;
+        {
+          out_stat = tmp_stat;
+#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
+          if (!GetFileInformationByHandleEx (_get_osfhandle (STDOUT_FILENO), FileIdInfo, &out_id, sizeof (out_id)))
+            {
+              fprintf (stderr, "GetFileInformationByHandleEx failed -> %d\n", GetLastError ());
+            }
+#endif
+        }
       else if (S_ISCHR (tmp_stat.st_mode))
         {
           struct stat null_stat;

Reply via email to