On Tue, 18 Mar 2025, Corinna Vinschen wrote:

> Subdir of winsup/cygwin, probably.  What I'm most curious about is the
> size it adds to the DLL.  I wonder if, say, an extra 32K is really
> usefully spent, given it only checks a small part of ntdll.dll, and only
> once per process tree, too.

I did this with msys-2.0.dll, but it shouldn't matter as a delta.
all are stripped msys-2.0.dll size
start:
3,246,118 bytes
with udis86 vendored, but not called:
3,247,142 bytes
with find_fast_cwd_pointer rewritten to use udis86:
3,328,550 bytes

(I know the second one isn't realistic, the linker could exclude unused
code, I was just kind of curious)

This is with all the "translate to assembly text, intel or at&t syntax"
and "table of strings for opcodes" stuff removed to try to save space,
still a net increase of 82,432 bytes.

Here's the new find_fast_cwd_pointer function:

static fcwd_access_t **
find_fast_cwd_pointer ()
{
  /* Fetch entry points of relevant functions in ntdll.dll. */
  HMODULE ntdll = GetModuleHandle ("ntdll.dll");
  if (!ntdll)
    return NULL;
  const uint8_t *get_dir = (const uint8_t *)
                           GetProcAddress (ntdll, "RtlGetCurrentDirectory_U");
  const uint8_t *ent_crit = (const uint8_t *)
                            GetProcAddress (ntdll, "RtlEnterCriticalSection");
  if (!get_dir || !ent_crit)
    return NULL;
  ud_t ud_obj;
  ud_init (&ud_obj);
  ud_set_mode (&ud_obj, 64);
  ud_set_input_buffer (&ud_obj, (const uint8_t *) get_dir, 80);
  ud_set_pc (&ud_obj, (const uint64_t) get_dir);
  const ud_operand_t *opr;
  /* Search first relative call instruction in RtlGetCurrentDirectory_U. */
  const uint8_t *use_cwd = NULL;
  while (ud_disassemble (&ud_obj))
    {
      if (ud_insn_mnemonic (&ud_obj) == UD_Icall)
        {
          opr = ud_insn_opr (&ud_obj, 0);
          if (opr->type == UD_OP_JIMM && opr->size == 32)
            {
              /* Fetch offset from instruction and compute address of called
                 function.  This function actually fetches the current FAST_CWD
                 instance and performs some other actions, not important to us.
               */
              use_cwd = (const uint8_t *) (ud_insn_off (&ud_obj) +
                                           ud_insn_len (&ud_obj) +
                                           opr->lval.sdword);
              break;
            }
        }
    }
  if (!use_cwd)
    return NULL;
  ud_set_input_buffer (&ud_obj, (const uint8_t *) use_cwd, 120);
  ud_set_pc (&ud_obj, (const uint64_t) use_cwd);

  /* Next we search for the locking mechanism and perform a sanity check.
     On Pre- (or Post-) Windows 8 we basically look for the
     RtlEnterCriticalSection call.  Windows 8 does not call
     RtlEnterCriticalSection.  The code manipulates the FastPebLock manually,
     probably because RtlEnterCriticalSection has been converted to an inline
     function.  Either way, we test if the code uses the FastPebLock. */
  PRTL_CRITICAL_SECTION lockaddr = NULL;

  /* both cases have an `lea rel(%rip)` on the lock */
  while (ud_disassemble (&ud_obj))
    {
      if (ud_insn_mnemonic (&ud_obj) == UD_Ilea)
        {
          /* this seems to follow intel syntax, in that operand 0 is the
             dest and 1 is the src */
          opr = ud_insn_opr (&ud_obj, 1);
          if (opr->type == UD_OP_MEM && opr->base == UD_R_RIP &&
              opr->index == UD_NONE && opr->scale == 0 && opr->offset == 32)
            {
              lockaddr = (PRTL_CRITICAL_SECTION) (ud_insn_off (&ud_obj) +
                                                  ud_insn_len (&ud_obj) +
                                                  opr->lval.sdword);
              break;
            }
        }
    }

  /* Test if lock address is FastPebLock. */
  if (lockaddr != NtCurrentTeb ()->Peb->FastPebLock)
    return NULL;

  /* Next is either the `callq RtlEnterCriticalSection', or on Windows 8,
     a `lock btr` */
  while (ud_disassemble (&ud_obj))
    {
      ud_mnemonic_code_t insn = ud_insn_mnemonic (&ud_obj);
      if (insn == UD_Icall)
        {
          opr = ud_insn_opr (&ud_obj, 0);
          if (opr->type == UD_OP_JIMM && opr->size == 32)
            {
              if (ent_crit != (const uint8_t *) (ud_insn_off (&ud_obj) +
                                                 ud_insn_len (&ud_obj) +
                                                 opr->lval.sdword))
                return NULL;
              break;
            }
        }
      else if (insn == UD_Ibtr && ud_obj.pfx_lock)
        {
          /* for Windows 8 */
          opr = ud_insn_opr (&ud_obj, 0);
          if (opr->type == UD_OP_MEM && opr->base == UD_R_RIP &&
              opr->index == UD_NONE && opr->scale == 0 && opr->offset == 32 &&
              opr->size == 32)
            {
              if (lockaddr != (PRTL_CRITICAL_SECTION) (ud_insn_off (&ud_obj) +
                                ud_insn_len (&ud_obj) + opr->lval.sdword -
                                offsetof (RTL_CRITICAL_SECTION, LockCount)))
                return NULL;

              break;
            }
        }
    }

  fcwd_access_t **f_cwd_ptr = NULL;
  ud_type_t reg = UD_NONE;
  /* now we're looking for a movq rel(%rip) */
  while (ud_disassemble (&ud_obj))
    {
      if (ud_insn_mnemonic (&ud_obj) == UD_Imov)
        {
          const ud_operand_t *opr0 = ud_insn_opr (&ud_obj, 0);
          opr = ud_insn_opr (&ud_obj, 1);
          if (opr->type == UD_OP_MEM && opr->base == UD_R_RIP &&
              opr->index == UD_NONE && opr->scale == 0 &&
              opr->offset == 32 && opr->size == 64 &&
              opr0->type == UD_OP_REG)
            {
              f_cwd_ptr = (fcwd_access_t **) (ud_insn_off (&ud_obj) +
                                              ud_insn_len (&ud_obj) +
                                              opr->lval.sdword);
              reg = opr0->base;
              break;
            }
        }
    }
  /* Check that the next instruction tests if the fetched value is NULL. */
  if (!ud_disassemble (&ud_obj) || ud_insn_mnemonic (&ud_obj) != UD_Itest)
    return NULL;

  opr = ud_insn_opr (&ud_obj, 0);
  if (opr->type != UD_OP_REG || opr->base != reg ||
      memcmp (opr, ud_insn_opr (&ud_obj, 1), offsetof (ud_operand_t, _legacy)))
    return NULL;
  return f_cwd_ptr;
}

Reply via email to