Corinna Vinschen via Cygwin wrote:
On Jun 25 16:59, Christian Franke via Cygwin wrote:
On Sun, 15 Sep 2024 19:47:11 +0200, Christian Franke wrote:
If a file name contains an invalid (truncated) UTF-8 sequence, open()
does not refuse to create the file. Later readdir() returns a different
name which could not be used to access the file.

Testcase with U+1F321 (Thermometer):

$ uname -r
3.5.4-1.x86_64

$ printf $'\U0001F321' | od -A none -t x1
  f0 9f 8c a1

$ touch 'file1-'$'\xf0\x9f\x8c\xa1''.ext'

$ touch 'file2-'$'\xf0\x9f\x8c''.ext'

$ touch 'file3-'$'\xf0\x9f\x8c'

$ ls -1
ls: cannot access 'file2-.?ext': No such file or directory
ls: cannot access 'file3-': No such file or directory
'file1-'$'\360\237\214\241''.ext'
file2-.?ext
file3-


Name mapping according to "fhandler_disk_file::readdir" strace lines:

"file1-\xF0\x9F\x8C\xA1.ext" -(open)-> L"file1-\xD83C\xDF21.ext"
-(readdir)->
"file1-\xF0\x9F\x8C\xA1.ext"

"file2-\xF0\x9f\x8C.ext" -(open)-> L"file2-\xD83C\xF02Eext" -(readdir)->
"file2-.\xE1\x9E\xB3ext"

"file3-\xF0\x9F\x8C" -(open)-> L"file3-\xD83C\xF000" -(readdir)->
"file3-"
I don't know exactly where this happens, but the input of the
conversion is invalid UTF-8 because it's missing the 4th byte.
There's no way to represent these filenames on Windows
filesystems storing filenames as UTF-16 values.

So the problem here is that the conversion somehow misses that
the 4th byte is invalid and just plods forward and converts the
leading three bytes into the matching high surrogate value and
then stumbles over the conversion for the low surrogate.

It would be really helpful to have an STC for this problem.


With some trial and error I found a testcase for this more serious problem reported yesterday but not quoted above:


In cases like file3-... above, the converted Windows path ends with 0xF000. This suggests that this is an accidental conversion of the terminating null to the 0xF0xx range.

In some cases, the created Windows file name has random garbage behind the 0xF000. Then even Cygwin is not able to access or unlink the file after creation.

Testcase (attached):

$ uname -r
3.7.0-0.160.g922719ba36e0.x86_64

$ gcc -o badname badname.c

$ ./badname
unlink() failed, errno=2, Win path: L"t-\xda01\xf000a"
unlink() failed, errno=2, Win path: L"t-\xda01\xf000b"
unlink() failed, errno=2, Win path: L"t-\xda01\xf000c"
unlink() failed, errno=2, Win path: L"t-\xda01\xf000d"
unlink() failed, errno=2, Win path: L"t-\xda01\xf000e"
unlink() failed, errno=2, Win path: L"t-\xda01\xf000f"
unlink() failed, errno=2, Win path: L"t-\xda01\xf000g"
unlink() failed, errno=2, Win path: L"t-\xda01\xf000h"
unlink() failed, errno=2, Win path: L"t-\xda01\xf000i"
unlink() failed, errno=2, Win path: L"t-\xda01\xf000j"

Conclusion: The terminating null char is accidentally converted to 0xF000 and no new null is appended. A trailing fragment of a previously used path appears.

In fortunately very rare cases, the created Windows file is not accessible from Win32 layer itself because it looks like
  L"file3-\xD83C\xF000garbage."
or
  L"file3-\xD83C\xF000garbage "
which is invalid on Win32 layer due to trailing '.' or space. Then a tool which removes the file via Nt*() layer is required.

Testcase: enable one of the "DON'T DO THIS" lines and make sure that a suitable file removal tool is available :-)

--
Regards,
Christian

#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <wchar.h>
#include <windows.h>

static void print_w(FILE * f, const wchar_t * s)
{
  fputs("L\"", f);
  wchar_t c;
  for (int i = 0; (c = s[i]); i++) {
    if (c == L'"' || c == L'\\')
      fprintf(f, "\\%c", c);
    else if (L' ' <= c && c <= L'~')
      fputc(c, f);
    else
      fprintf(f, "\\x%04x", c & 0xffff);
  }
  fputc('"', f);
}

static void get_winname(wchar_t * name)
{
  WIN32_FIND_DATAW e;
  HANDLE h = FindFirstFileW(L"*", &e);
  if (h == INVALID_HANDLE_VALUE) {
    fprintf(stderr, "FindFirstFileW(): Error=%u\n", GetLastError());
    exit(1);
  }
  int i = 0;
  do {
    if (!wcscmp(e.cFileName, L".") || !wcscmp(e.cFileName, L".."))
      continue;
    if (++i > 1) {
      fprintf(stderr, "Error: more than one Win32 file found\n");
      exit(1);
    }
    wcscpy(name, e.cFileName);
  } while (FindNextFileW(h, &e));
  FindClose(h);
}

static void testname(const char * name)
{
  int fd = open(name, O_WRONLY|O_CREAT, 0666);
  if (fd < 0) {
    printf("open() failed, errno=%d\n", errno);
    return;
  }
  close(fd);

  wchar_t winname[MAX_PATH];
  get_winname(winname);

  if (!unlink(name))
    return;

  printf("unlink() failed, errno=%d, Win path: ", errno);
  print_w(stdout, winname); printf("\n");

  if (!DeleteFileW(winname)) {
    printf("FATAL: DeleteFileW() failed, error=%u\n", GetLastError());
    exit(1);
  }
}

int main()
{
  const char * dir = "test.tmp";
  rmdir(dir);
  if (mkdir(dir, 0666)) {
    perror(dir); return 1;
  }
  if (chdir(dir)) {
    perror(dir); return 1;
  }

  for (int i = 0; i < 10; i++) {
    const char name[] = "t-\xf2\x90\x90";
    char prev[sizeof(name)+2];
    memset(prev, 'X', sizeof(prev)-2); prev[sizeof(prev)-1] = 0;
    prev[sizeof(name)] = 'a' + (i % 26);
  //prev[sizeof(name)] = '.'; // DON'T DO THIS!
  //prev[sizeof(name)] = ' '; // DON'T DO THIS!
    
    access(prev, 0);
    testname(name);
  }
  return 1;
}
-- 
Problem reports:      https://cygwin.com/problems.html
FAQ:                  https://cygwin.com/faq/
Documentation:        https://cygwin.com/docs.html
Unsubscribe info:     https://cygwin.com/ml/#unsubscribe-simple

Reply via email to