v3:
* moves files to x86_64/fastcwd.cc and aarch64/fastcwd.cc
* reflows commit message in patch 3
* adds comments & assert to x86_64/fastcwd.cc
* removes Windows 8.0 case (I didn't realize Windows 8 was no longer supported)
* adds tracking of lea destination register to rcx for call to 
RtlEnterCriticalSection
* switches from memcmp to comparing the members of opr that are relevant.
* minor formatting tweaks to aarch64/fastcwd.cc

I tested x86_64 code on every released Windows version from 9600 to 26100.
Interestingly, the machine code of the "use_cwd" function
(RtlpReferenceCurrentDirectory) didn't seem to change until 26100.

(I previously tested the prototype aarch64 code on 16299, 19045, 22631,
and 26100, but only 22000+ supports x86_64 emulation).

Jeremy Drake (5):
  Cygwin: factor out find_fast_cwd_pointer to arch-specific file.
  Cygwin: vendor libudis86 1.7.2/libudis86
  Cygwin: patch libudis86 to build as part of Cygwin
  Cygwin: use udis86 to find fast cwd pointer on x64
  Cygwin: add find_fast_cwd_pointer_aarch64.

 winsup/cygwin/Makefile.am        |   14 +-
 winsup/cygwin/aarch64/fastcwd.cc |  203 +
 winsup/cygwin/path.cc            |  145 +-
 winsup/cygwin/udis86/decode.c    | 1113 ++++
 winsup/cygwin/udis86/decode.h    |  195 +
 winsup/cygwin/udis86/extern.h    |  109 +
 winsup/cygwin/udis86/itab.c      | 8404 ++++++++++++++++++++++++++++++
 winsup/cygwin/udis86/itab.h      |  680 +++
 winsup/cygwin/udis86/types.h     |  260 +
 winsup/cygwin/udis86/udint.h     |   91 +
 winsup/cygwin/udis86/udis86.c    |  464 ++
 winsup/cygwin/x86_64/fastcwd.cc  |  200 +
 12 files changed, 11755 insertions(+), 123 deletions(-)
 create mode 100644 winsup/cygwin/aarch64/fastcwd.cc
 create mode 100644 winsup/cygwin/udis86/decode.c
 create mode 100644 winsup/cygwin/udis86/decode.h
 create mode 100644 winsup/cygwin/udis86/extern.h
 create mode 100644 winsup/cygwin/udis86/itab.c
 create mode 100644 winsup/cygwin/udis86/itab.h
 create mode 100644 winsup/cygwin/udis86/types.h
 create mode 100644 winsup/cygwin/udis86/udint.h
 create mode 100644 winsup/cygwin/udis86/udis86.c
 create mode 100644 winsup/cygwin/x86_64/fastcwd.cc

Range-diff against v2:
1:  25a8b233f5 ! 1:  a1c9f722d7 Cygwin: factor out find_fast_cwd_pointer to 
arch-specific file.
    @@ winsup/cygwin/Makefile.am: LIB_NAME=libcygwin.a
      if TARGET_X86_64
      TARGET_FILES= \
        x86_64/bcopy.S \
    -+  x86_64/fastcwd_x86_64.cc \
    ++  x86_64/fastcwd.cc \
        x86_64/memchr.S \
        x86_64/memcpy.S \
        x86_64/memmove.S \
    @@ winsup/cygwin/path.cc: find_fast_cwd ()
          small_printf ("Cygwin WARNING:\n"
      "  Couldn't compute FAST_CWD pointer.  This typically occurs if you're 
using\n"

    - ## winsup/cygwin/x86_64/fastcwd_x86_64.cc (new) ##
    + ## winsup/cygwin/x86_64/fastcwd.cc (new) ##
     @@
    -+/* fastcwd_x86_64.cc: find fast cwd pointer on x86_64 hosts.
    ++/* x86_64/fastcwd.cc: find fast cwd pointer on x86_64 hosts.
     +
     +  This file is part of Cygwin.
     +
2:  faa2688d1f = 2:  1c290dbc53 Cygwin: vendor libudis86 1.7.2/libudis86
3:  04f7a44f59 ! 3:  bd2dca35eb Cygwin: patch libudis86 to build as part of 
Cygwin
    @@ Metadata
      ## Commit message ##
         Cygwin: patch libudis86 to build as part of Cygwin

    -    This ifdefs out the large table of
    -    opcode strings (and the function that references it) since we're only
    -    interested in walking machine code, not generating disassembly, and
    -    makes a couple of other tables "const" so that they end up in .rdata
    -    instead of .data.
    +    This ifdefs out the large table of opcode strings (and the function 
that
    +    references it) since we're only interested in walking machine code, not
    +    generating disassembly, and makes a couple of other tables "const" so
    +    that they end up in .rdata instead of .data.

         Signed-off-by: Jeremy Drake <cyg...@jdrake.com>

4:  0f06e96562 ! 4:  140a61c9e1 Cygwin: use udis86 to find fast cwd pointer on 
x64
    @@ Commit message

         Signed-off-by: Jeremy Drake <cyg...@jdrake.com>

    - ## winsup/cygwin/x86_64/fastcwd_x86_64.cc ##
    + ## winsup/cygwin/x86_64/fastcwd.cc ##
     @@
        details. */

      #include "winsup.h"
    ++#include <assert.h>
     +#include "udis86/types.h"
     +#include "udis86/extern.h"

      class fcwd_access_t;

     -#define peek32(x) (*(int32_t *)(x))
    ++/* Helper function to get the absolute address of an rip-relative 
instruction
    ++   by summing the current instruction's pc (rip), the current 
instruction's
    ++   length, and the signed 32-bit displacement in the operand.  
Optionally, an
    ++   additional offset is subtracted to deal with the case where a member 
of a
    ++   struct is being referenced by the instruction but the address of the 
struct
    ++   is desired.
    ++*/
     +static inline const void *
     +rip_rel_offset (const ud_t *ud_obj, const ud_operand_t *opr, int 
sub_off=0)
     +{
    ++  assert ((opr->type == UD_OP_JIMM && opr->size == 32) ||
    ++    (opr->type == UD_OP_MEM && opr->base == UD_R_RIP &&
    ++     opr->index == UD_NONE && opr->scale == 0 && opr->offset == 32));
    ++
     +  return (const void *) (ud_insn_off (ud_obj) + ud_insn_len (ud_obj) +
     +                   opr->lval.sdword - sub_off);
     +}

      /* This function scans the code in ntdll.dll to find the address of the
         global variable used to access the CWD.  While the pointer is global,
    -@@ winsup/cygwin/x86_64/fastcwd_x86_64.cc: find_fast_cwd_pointer_x86_64 ()
    +@@ winsup/cygwin/x86_64/fastcwd.cc: find_fast_cwd_pointer_x86_64 ()
                            GetProcAddress (ntdll, "RtlEnterCriticalSection");
        if (!get_dir || !ent_crit)
          return NULL;
    ++  /* Initialize udis86 */
     +  ud_t ud_obj;
     +  ud_init (&ud_obj);
    ++  /* Set 64-bit mode */
     +  ud_set_mode (&ud_obj, 64);
     +  ud_set_input_buffer (&ud_obj, get_dir, 80);
    -+  ud_set_pc (&ud_obj, (const uint64_t) get_dir);
    -+  const ud_operand_t *opr;
    ++  /* Set pc (rip) so that subsequent calls to ud_insn_off will return the 
pc of
    ++     the instruction, saving us the hassle of tracking it ourselves */
    ++  ud_set_pc (&ud_obj, (uint64_t) get_dir);
    ++  const ud_operand_t *opr, *opr0;
     +  ud_mnemonic_code_t insn;
    ++  ud_type_t reg = UD_NONE;
        /* Search first relative call instruction in RtlGetCurrentDirectory_U. 
*/
     -  const uint8_t *rcall = (const uint8_t *) memchr (get_dir, 0xe8, 80);
     -  if (!rcall)
    @@ winsup/cygwin/x86_64/fastcwd_x86_64.cc: find_fast_cwd_pointer_x86_64 ()
     -     performs some other actions, not important to us. */
     -  const uint8_t *use_cwd = rcall + 5 + peek32 (rcall + 1);
     +  ud_set_input_buffer (&ud_obj, use_cwd, 120);
    -+  ud_set_pc (&ud_obj, (const uint64_t) use_cwd);
    ++  ud_set_pc (&ud_obj, (uint64_t) use_cwd);
     +
        /* Next we search for the locking mechanism and perform a sanity check.
     -     On Pre-Windows 8 we basically look for the RtlEnterCriticalSection 
call.
    @@ winsup/cygwin/x86_64/fastcwd_x86_64.cc: find_fast_cwd_pointer_x86_64 ()
     -                        memmem ((const char *) use_cwd, 80,
     -                                "\xf0\x0f\xba\x35", 4);
     -  if (lock)
    -+     On Pre- (or Post-) Windows 8 we basically look for the
    -+     RtlEnterCriticalSection call.  Windows 8 does not call
    -+     RtlEnterCriticalSection.  The code manipulates the FastPebLock 
manually,
    -+     probably because RtlEnterCriticalSection has been converted to an 
inline
    -+     function.  Either way, we test if the code uses the FastPebLock. */
    ++     we basically look for the RtlEnterCriticalSection call and test if 
the
    ++     code uses the FastPebLock. */
     +  PRTL_CRITICAL_SECTION lockaddr = NULL;
     +
    -+  /* both cases have an `lea rel(%rip)` on the lock */
     +  while (ud_disassemble (&ud_obj) &&
     +      (insn = ud_insn_mnemonic (&ud_obj)) != UD_Iret &&
     +      insn != UD_Ijmp)
    @@ winsup/cygwin/x86_64/fastcwd_x86_64.cc: find_fast_cwd_pointer_x86_64 ()
     -                                         "\x48\x8b\x1d", 3);
     +      if (insn == UD_Ilea)
     +  {
    -+    /* this seems to follow intel syntax, in that operand 0 is the
    ++    /* udis86 seems to follow intel syntax, in that operand 0 is the
     +       dest and 1 is the src */
    ++    opr0 = ud_insn_opr (&ud_obj, 0);
     +    opr = ud_insn_opr (&ud_obj, 1);
     +    if (opr->type == UD_OP_MEM && opr->base == UD_R_RIP &&
    -+        opr->index == UD_NONE && opr->scale == 0 && opr->offset == 32)
    ++        opr->index == UD_NONE && opr->scale == 0 && opr->offset == 32 &&
    ++        opr0->type == UD_OP_REG && opr0->size == 64)
     +      {
     +        lockaddr = (PRTL_CRITICAL_SECTION) rip_rel_offset (&ud_obj, opr);
    ++        reg = opr0->base;
     +        break;
     +      }
     +  }
    @@ winsup/cygwin/x86_64/fastcwd_x86_64.cc: find_fast_cwd_pointer_x86_64 ()
     +  if (lockaddr != NtCurrentTeb ()->Peb->FastPebLock)
     +    return NULL;
     +
    -+  /* Next is either the `callq RtlEnterCriticalSection', or on Windows 8,
    -+     a `lock btr` */
    ++  /* Find where the lock address is loaded into rcx as the first 
parameter of
    ++     a function call */
     +  bool found = false;
    -+  while (ud_disassemble (&ud_obj) &&
    -+      (insn = ud_insn_mnemonic (&ud_obj)) != UD_Iret &&
    -+      insn != UD_Ijmp)
    ++  if (reg != UD_R_RCX)
          {
     -      /* Usually the callq RtlEnterCriticalSection follows right after
     -   fetching the lock address. */
    @@ winsup/cygwin/x86_64/fastcwd_x86_64.cc: find_fast_cwd_pointer_x86_64 ()
     -      lock = (const uint8_t *) memmem ((const char *) use_cwd, 80,
     -                                       "\x48\x8d\x0d", 3);
     -      if (!lock)
    -+      if (insn == UD_Icall)
    ++      while (ud_disassemble (&ud_obj) &&
    ++    (insn = ud_insn_mnemonic (&ud_obj)) != UD_Iret &&
    ++    insn != UD_Ijmp)
        {
     -    /* Windows 8.1 Preview calls `lea rel(rip),%r12' then some unrelated
     -       ops, then `mov %r12,%rcx', then `callq RtlEnterCriticalSection'. */
     -    lock = (const uint8_t *) memmem ((const char *) use_cwd, 80,
     -                                     "\x4c\x8d\x25", 3);
     -    call_rtl_offset = 14;
    -+    opr = ud_insn_opr (&ud_obj, 0);
    -+    if (opr->type == UD_OP_JIMM && opr->size == 32)
    ++    if (insn == UD_Imov)
     +      {
    -+        if (ent_crit != rip_rel_offset (&ud_obj, opr))
    -+          return NULL;
    -+        found = true;
    -+        break;
    ++        opr0 = ud_insn_opr (&ud_obj, 0);
    ++        opr = ud_insn_opr (&ud_obj, 1);
    ++        if (opr->type == UD_OP_REG && opr->size == 64 &&
    ++            opr->base == reg && opr0->type == UD_OP_REG &&
    ++            opr0->size == 64 && opr0->base == UD_R_RCX)
    ++          {
    ++            found = true;
    ++            break;
    ++          }
     +      }
        }
    --
    ++      if (!found)
    ++  return NULL;
    ++    }
    +
     -      if (!lock)
    -+      else if (insn == UD_Ibtr && ud_obj.pfx_lock)
    ++  /* Next is the `callq RtlEnterCriticalSection' */
    ++  found = false;
    ++  while (ud_disassemble (&ud_obj) &&
    ++      (insn = ud_insn_mnemonic (&ud_obj)) != UD_Iret &&
    ++      insn != UD_Ijmp)
    ++    {
    ++      if (insn == UD_Icall)
        {
     -    /* A recent Windows 11 Preview calls `lea rel(rip),%r13' then
     -       some unrelated instructions, then `callq RtlEnterCriticalSection'.
    @@ winsup/cygwin/x86_64/fastcwd_x86_64.cc: find_fast_cwd_pointer_x86_64 ()
     -    lock = (const uint8_t *) memmem ((const char *) use_cwd, 80,
     -                                     "\x4c\x8d\x2d", 3);
     -    call_rtl_offset = 24;
    -+    /* for Windows 8 */
     +    opr = ud_insn_opr (&ud_obj, 0);
    -+    if (opr->type == UD_OP_MEM && opr->base == UD_R_RIP &&
    -+        opr->index == UD_NONE && opr->scale == 0 && opr->offset == 32 &&
    -+        opr->size == 32)
    ++    if (opr->type == UD_OP_JIMM && opr->size == 32)
     +      {
    -+        if (lockaddr != rip_rel_offset (&ud_obj, opr,
    -+                            offsetof (RTL_CRITICAL_SECTION, LockCount)))
    ++        if (ent_crit != rip_rel_offset (&ud_obj, opr))
     +          return NULL;
     +        found = true;
     +        break;
    @@ winsup/cygwin/x86_64/fastcwd_x86_64.cc: find_fast_cwd_pointer_x86_64 ()

     -      if (!lock)
     +  fcwd_access_t **f_cwd_ptr = NULL;
    -+  ud_type_t reg = UD_NONE;
    -+  /* now we're looking for a movq rel(%rip) */
    ++  /* now we're looking for a mov rel(%rip), %<reg64> */
     +  while (ud_disassemble (&ud_obj) &&
     +      (insn = ud_insn_mnemonic (&ud_obj)) != UD_Iret &&
     +      insn != UD_Ijmp)
    @@ winsup/cygwin/x86_64/fastcwd_x86_64.cc: find_fast_cwd_pointer_x86_64 ()
     +      if (insn == UD_Imov)
        {
     -    return NULL;
    -+    const ud_operand_t *opr0 = ud_insn_opr (&ud_obj, 0);
    ++    opr0 = ud_insn_opr (&ud_obj, 0);
     +    opr = ud_insn_opr (&ud_obj, 1);
    -+    if (opr->type == UD_OP_MEM && opr->base == UD_R_RIP &&
    -+        opr->index == UD_NONE && opr->scale == 0 &&
    -+        opr->offset == 32 && opr->size == 64 &&
    -+        opr0->type == UD_OP_REG)
    ++    if (opr->type == UD_OP_MEM && opr->size == 64 &&
    ++        opr->base == UD_R_RIP && opr->index == UD_NONE &&
    ++        opr->scale == 0 && opr->offset == 32 &&
    ++        opr0->type == UD_OP_REG && opr0->size == 64)
     +      {
     +        f_cwd_ptr = (fcwd_access_t **) rip_rel_offset (&ud_obj, opr);
     +        reg = opr0->base;
    @@ winsup/cygwin/x86_64/fastcwd_x86_64.cc: find_fast_cwd_pointer_x86_64 ()
     -      movrbx = lock + 5;
          }
     -  if (!movrbx)
    --    return NULL;
    -   /* Check that the next instruction tests if the fetched value is NULL. 
*/
    ++  /* Check that the next instruction is a test. */
    ++  if (!f_cwd_ptr || !ud_disassemble (&ud_obj) ||
    ++      ud_insn_mnemonic (&ud_obj) != UD_Itest)
    +     return NULL;
    +-  /* Check that the next instruction tests if the fetched value is NULL. 
*/
     -  const uint8_t *testrbx = (const uint8_t *)
     -                     memmem (movrbx + 7, 3, "\x48\x85\xdb", 3);
     -  if (!testrbx)
    -+  if (!f_cwd_ptr || !ud_disassemble (&ud_obj) ||
    -+      ud_insn_mnemonic (&ud_obj) != UD_Itest)
    -+    return NULL;
     +
    -+  opr = ud_insn_opr (&ud_obj, 0);
    -+  if (opr->type != UD_OP_REG || opr->base != reg ||
    -+      memcmp (opr, ud_insn_opr (&ud_obj, 1), offsetof (ud_operand_t, 
_legacy)))
    ++  /* ... and that it's testing the same register that the mov above 
loaded the
    ++     f_cwd_ptr into against itself */
    ++  opr0 = ud_insn_opr (&ud_obj, 0);
    ++  opr = ud_insn_opr (&ud_obj, 1);
    ++  if (opr->type != UD_OP_REG || opr->size != 64 || opr->base != reg ||
    ++      opr0->type != opr->type || opr0->size != 64 || opr0->base != 
opr->base)
          return NULL;
     -  /* Compute address of the fcwd_access_t ** pointer. */
     -  return (fcwd_access_t **) (testrbx + peek32 (movrbx + 3));
5:  e3adc20c9f ! 5:  87f2bcf895 Cygwin: add find_fast_cwd_pointer_aarch64.
    @@ Commit message
         Signed-off-by: Jeremy Drake <cyg...@jdrake.com>

      ## winsup/cygwin/Makefile.am ##
    -@@ winsup/cygwin/Makefile.am: DLL_FILES= \
    -   exceptions.cc \
    -   exec.cc \
    -   external.cc \
    -+  fastcwd_aarch64.cc \
    -   fcntl.cc \
    -   fenv.c \
    -   flock.cc \
    +@@ winsup/cygwin/Makefile.am: if TARGET_X86_64
    + TARGET_FILES= \
    +   x86_64/bcopy.S \
    +   x86_64/fastcwd.cc \
    ++  aarch64/fastcwd.cc \
    +   x86_64/memchr.S \
    +   x86_64/memcpy.S \
    +   x86_64/memmove.S \

    - ## winsup/cygwin/fastcwd_aarch64.cc (new) ##
    + ## winsup/cygwin/aarch64/fastcwd.cc (new) ##
     @@
    -+/* fastcwd_aarch64.cc: find the fast cwd pointer on aarch64 hosts.
    ++/* aarch64/fastcwd.cc: find the fast cwd pointer on aarch64 hosts.
     +
     +  This file is part of Cygwin.
     +
    @@ winsup/cygwin/fastcwd_aarch64.cc (new)
     +  Cygwin license.  Please consult the file "CYGWIN_LICENSE" for
     +  details. */
     +
    -+/* You might well wonder why this file is not in an aarch64 
target-specific
    -+   directory, like fastcwd_x86_64.cc.  It turns out that this code works 
when
    -+   built for i686, x86_64, or aarch64 with just the small #if/#elif block 
in
    ++/* You might well wonder why this file is included in x86_64 target files
    ++   in Makefile.am.  It turns out that this code works when built for i686,
    ++   x86_64, or aarch64 with just the small #if/#elif block in
     +   GetArm64ProcAddress below caring which. */
     +
     +#include "winsup.h"
    -+#include "assert.h"
    ++#include <assert.h>
     +
     +class fcwd_access_t;
     +
     +static LPCVOID
     +GetArm64ProcAddress (HMODULE hModule, LPCSTR procname)
     +{
    -+  const BYTE * proc = (const BYTE *) GetProcAddress (hModule, procname);
    ++  const BYTE *proc = (const BYTE *) GetProcAddress (hModule, procname);
     +#if defined (__aarch64__)
     +  return proc;
     +#else
    @@ winsup/cygwin/fastcwd_aarch64.cc (new)
     +
     +/* this would work for either bl or b, but we only use it for bl */
     +static inline LPCVOID
    -+extract_bl_target (const uint32_t * pc)
    ++extract_bl_target (const uint32_t *pc)
     +{
     +  assert (IS_INSN (pc, bl) || IS_INSN (pc, b));
     +  int32_t offset = *pc & ~bl_mask;
    @@ winsup/cygwin/fastcwd_aarch64.cc (new)
     +}
     +
     +static inline uint64_t
    -+extract_adrp_address (const uint32_t * pc)
    ++extract_adrp_address (const uint32_t *pc)
     +{
     +  assert (IS_INSN (pc, adrp));
     +  uint64_t adrp_base = (uint64_t) pc & ~0xFFF;
-- 
2.48.1.windows.1

Reply via email to