Corinna Vinschen wrote:
Hi Christian,

Looks good, but I just realized that I was already wondering about the
sanitization and forgot to talk about it:

On Nov 21 12:24, Christian Franke wrote:
diff --git a/winsup/cygwin/fhandler/dev_disk.cc 
b/winsup/cygwin/fhandler/dev_disk.cc
index c5d72816f..d12ac52fa 100644
--- a/winsup/cygwin/fhandler/dev_disk.cc
+++ b/winsup/cygwin/fhandler/dev_disk.cc
@@ -64,10 +64,12 @@ sanitize_label_string (WCHAR *s)
    /* Linux does not skip leading spaces. */
    return sanitize_string (s, L'\0', L' ', L'_', [] (WCHAR c) -> bool
      {
-      /* Labels may contain characters not allowed in filenames.
-        Linux replaces spaces with \x20 which is not an option here. */
+      /* Labels may contain characters not allowed in filenames.  Also
Apart from slash and backslash, we don't have this problem in Cygwin,
usually.  Even control characters are no problem.  All chars not allowed
in filenames are just transposed into the Unicode private use area, as
per strfuncs.cc, line 20ff on the way to storage, and back when reading
the names from storage.  This, and especially in a virtual filesystem
like /proc, there's no reason to avoid these characters.

Thanks for clarification.



+         replace '#' to avoid that duplicate markers introduce new
+        duplicates.  Linux replaces spaces with \x20 which is not an
+        option here. */
        return !((0 <= c && c <= L' ') || c == L':' || c == L'/' || c == L'\\'
-             || c == L'"');
+             || c == L'#' || c == L'"');
If you really want to avoid chars not allowed in DOS filenames, the
list seems incomplete, missing '<', '>', '?', '*', '|'.

But as I said, there's really no reason for that.  I simply reduced the
above expression to

   return !(c == L'/' || c == L'\\' || c == L'#');

and created a disk label

   test"foo*bar?baz:"

It works nicely, including stuff like

   $ ls *\"*
   $ ls *\**

So, I can push it as is, or we just allow everything and the kitchen sink
as per the reduced filter expression.  What do you prefer?

The latter - patch attached.

Christian

From ecc54356adbe7768bd5fd5561c78c67cd5725183 Mon Sep 17 00:00:00 2001
From: Christian Franke <christian.fra...@t-online.de>
Date: Tue, 21 Nov 2023 19:28:02 +0100
Subject: [PATCH] Cygwin: /dev/disk: Append '#N' if the same name appears more
 than once

No longer drop ranges of identical link names.  Append '#0, #1, ...'
to each name instead.  Enhance charset allowed in label names.
No longer ignore null volume serial numbers.

Signed-off-by: Christian Franke <christian.fra...@t-online.de>
---
 winsup/cygwin/fhandler/dev_disk.cc | 54 ++++++++++++++++++------------
 1 file changed, 33 insertions(+), 21 deletions(-)

diff --git a/winsup/cygwin/fhandler/dev_disk.cc 
b/winsup/cygwin/fhandler/dev_disk.cc
index c5d72816f..29af9de95 100644
--- a/winsup/cygwin/fhandler/dev_disk.cc
+++ b/winsup/cygwin/fhandler/dev_disk.cc
@@ -64,10 +64,11 @@ sanitize_label_string (WCHAR *s)
   /* Linux does not skip leading spaces. */
   return sanitize_string (s, L'\0', L' ', L'_', [] (WCHAR c) -> bool
     {
-      /* Labels may contain characters not allowed in filenames.
-        Linux replaces spaces with \x20 which is not an option here. */
-      return !((0 <= c && c <= L' ') || c == L':' || c == L'/' || c == L'\\'
-             || c == L'"');
+      /* Labels may contain characters not allowed in filenames.  Also
+         replace '#' to avoid that duplicate markers introduce new
+        duplicates.  Linux replaces spaces with \x20 which is not an
+        option here. */
+      return !(c == L'/' || c == L'\\' || c == L'#');
     }
   );
 }
@@ -304,8 +305,7 @@ partition_to_label_or_uuid(bool uuid, const UNICODE_STRING 
*drive_uname,
   const NTFS_VOLUME_DATA_BUFFER *nvdb =
     reinterpret_cast<const NTFS_VOLUME_DATA_BUFFER *>(ioctl_buf);
   if (uuid && DeviceIoControl (volhdl, FSCTL_GET_NTFS_VOLUME_DATA, nullptr, 0,
-                              ioctl_buf, NT_MAX_PATH, &bytes_read, nullptr)
-      && nvdb->VolumeSerialNumber.QuadPart)
+                              ioctl_buf, NT_MAX_PATH, &bytes_read, nullptr))
     {
       /* Print without any separator as on Linux. */
       __small_sprintf (name, "%016X", nvdb->VolumeSerialNumber.QuadPart);
@@ -327,13 +327,9 @@ partition_to_label_or_uuid(bool uuid, const UNICODE_STRING 
*drive_uname,
   FILE_FS_VOLUME_INFORMATION *ffvi =
     reinterpret_cast<FILE_FS_VOLUME_INFORMATION *>(ioctl_buf);
   if (uuid)
-    {
-      if (!ffvi->VolumeSerialNumber)
-       return false;
-      /* Print with separator as on Linux. */
-      __small_sprintf (name, "%04x-%04x", ffvi->VolumeSerialNumber >> 16,
-                      ffvi->VolumeSerialNumber & 0xffff);
-    }
+    /* Print with separator as on Linux. */
+    __small_sprintf (name, "%04x-%04x", ffvi->VolumeSerialNumber >> 16,
+                    ffvi->VolumeSerialNumber & 0xffff);
   else
     {
       /* Label is not null terminated. */
@@ -361,6 +357,20 @@ by_id_compare_name (const void *a, const void *b)
   return strcmp (ap->name, bp->name);
 }
 
+static int
+by_id_compare_name_drive_part (const void *a, const void *b)
+{
+  const by_id_entry *ap = reinterpret_cast<const by_id_entry *>(a);
+  const by_id_entry *bp = reinterpret_cast<const by_id_entry *>(b);
+  int cmp = strcmp (ap->name, bp->name);
+  if (cmp)
+    return cmp;
+  cmp = ap->drive - bp->drive;
+  if (cmp)
+    return cmp;
+  return ap->part - bp->part;
+}
+
 static by_id_entry *
 by_id_realloc (by_id_entry *p, size_t n)
 {
@@ -610,8 +620,9 @@ get_by_id_table (by_id_entry * &table, 
fhandler_dev_disk::dev_disk_location loc)
   if (!table)
     return (errno_set ? -1 : 0);
 
-  /* Sort by name and remove duplicates. */
-  qsort (table, table_size, sizeof (*table), by_id_compare_name);
+  /* Sort by {name, drive, part} to ensure stable sort order. */
+  qsort (table, table_size, sizeof (*table), by_id_compare_name_drive_part);
+  /* Mark duplicate names. */
   for (unsigned i = 0; i < table_size; i++)
     {
       unsigned j = i + 1;
@@ -619,12 +630,13 @@ get_by_id_table (by_id_entry * &table, 
fhandler_dev_disk::dev_disk_location loc)
        j++;
       if (j == i + 1)
        continue;
-      /* Duplicate(s) found, remove all entries with this name. */
-      debug_printf ("removing duplicates %d-%d: '%s'", i, j - 1, 
table[i].name);
-      if (j < table_size)
-       memmove (table + i, table + j, (table_size - j) * sizeof (*table));
-      table_size -= j - i;
-      i--;
+      /* Duplicate(s) found, append "#N" to all entries.  This never
+        introduces new duplicates because '#' never occurs in the
+        original names. */
+      debug_printf ("mark duplicates %u-%u of '%s'", i, j - 1, table[i].name);
+      size_t len = strlen (table[i].name);
+      for (unsigned k = i; k < j; k++)
+       __small_sprintf (table[k].name + len, "#%u", k - i);
     }
 
   debug_printf ("table_size: %d", table_size);
-- 
2.42.1

Reply via email to