On Tue, 18 Mar 2025, Corinna Vinschen wrote: > Subdir of winsup/cygwin, probably. What I'm most curious about is the > size it adds to the DLL. I wonder if, say, an extra 32K is really > usefully spent, given it only checks a small part of ntdll.dll, and only > once per process tree, too.
I did this with msys-2.0.dll, but it shouldn't matter as a delta. all are stripped msys-2.0.dll size start: 3,246,118 bytes with udis86 vendored, but not called: 3,247,142 bytes with find_fast_cwd_pointer rewritten to use udis86: 3,328,550 bytes (I know the second one isn't realistic, the linker could exclude unused code, I was just kind of curious) This is with all the "translate to assembly text, intel or at&t syntax" and "table of strings for opcodes" stuff removed to try to save space, still a net increase of 82,432 bytes. Here's the new find_fast_cwd_pointer function: static fcwd_access_t ** find_fast_cwd_pointer () { /* Fetch entry points of relevant functions in ntdll.dll. */ HMODULE ntdll = GetModuleHandle ("ntdll.dll"); if (!ntdll) return NULL; const uint8_t *get_dir = (const uint8_t *) GetProcAddress (ntdll, "RtlGetCurrentDirectory_U"); const uint8_t *ent_crit = (const uint8_t *) GetProcAddress (ntdll, "RtlEnterCriticalSection"); if (!get_dir || !ent_crit) return NULL; ud_t ud_obj; ud_init (&ud_obj); ud_set_mode (&ud_obj, 64); ud_set_input_buffer (&ud_obj, (const uint8_t *) get_dir, 80); ud_set_pc (&ud_obj, (const uint64_t) get_dir); const ud_operand_t *opr; /* Search first relative call instruction in RtlGetCurrentDirectory_U. */ const uint8_t *use_cwd = NULL; while (ud_disassemble (&ud_obj)) { if (ud_insn_mnemonic (&ud_obj) == UD_Icall) { opr = ud_insn_opr (&ud_obj, 0); if (opr->type == UD_OP_JIMM && opr->size == 32) { /* Fetch offset from instruction and compute address of called function. This function actually fetches the current FAST_CWD instance and performs some other actions, not important to us. */ use_cwd = (const uint8_t *) (ud_insn_off (&ud_obj) + ud_insn_len (&ud_obj) + opr->lval.sdword); break; } } } if (!use_cwd) return NULL; ud_set_input_buffer (&ud_obj, (const uint8_t *) use_cwd, 120); ud_set_pc (&ud_obj, (const uint64_t) use_cwd); /* Next we search for the locking mechanism and perform a sanity check. On Pre- (or Post-) Windows 8 we basically look for the RtlEnterCriticalSection call. Windows 8 does not call RtlEnterCriticalSection. The code manipulates the FastPebLock manually, probably because RtlEnterCriticalSection has been converted to an inline function. Either way, we test if the code uses the FastPebLock. */ PRTL_CRITICAL_SECTION lockaddr = NULL; /* both cases have an `lea rel(%rip)` on the lock */ while (ud_disassemble (&ud_obj)) { if (ud_insn_mnemonic (&ud_obj) == UD_Ilea) { /* this seems to follow intel syntax, in that operand 0 is the dest and 1 is the src */ opr = ud_insn_opr (&ud_obj, 1); if (opr->type == UD_OP_MEM && opr->base == UD_R_RIP && opr->index == UD_NONE && opr->scale == 0 && opr->offset == 32) { lockaddr = (PRTL_CRITICAL_SECTION) (ud_insn_off (&ud_obj) + ud_insn_len (&ud_obj) + opr->lval.sdword); break; } } } /* Test if lock address is FastPebLock. */ if (lockaddr != NtCurrentTeb ()->Peb->FastPebLock) return NULL; /* Next is either the `callq RtlEnterCriticalSection', or on Windows 8, a `lock btr` */ while (ud_disassemble (&ud_obj)) { ud_mnemonic_code_t insn = ud_insn_mnemonic (&ud_obj); if (insn == UD_Icall) { opr = ud_insn_opr (&ud_obj, 0); if (opr->type == UD_OP_JIMM && opr->size == 32) { if (ent_crit != (const uint8_t *) (ud_insn_off (&ud_obj) + ud_insn_len (&ud_obj) + opr->lval.sdword)) return NULL; break; } } else if (insn == UD_Ibtr && ud_obj.pfx_lock) { /* for Windows 8 */ opr = ud_insn_opr (&ud_obj, 0); if (opr->type == UD_OP_MEM && opr->base == UD_R_RIP && opr->index == UD_NONE && opr->scale == 0 && opr->offset == 32 && opr->size == 32) { if (lockaddr != (PRTL_CRITICAL_SECTION) (ud_insn_off (&ud_obj) + ud_insn_len (&ud_obj) + opr->lval.sdword - offsetof (RTL_CRITICAL_SECTION, LockCount))) return NULL; break; } } } fcwd_access_t **f_cwd_ptr = NULL; ud_type_t reg = UD_NONE; /* now we're looking for a movq rel(%rip) */ while (ud_disassemble (&ud_obj)) { if (ud_insn_mnemonic (&ud_obj) == UD_Imov) { const ud_operand_t *opr0 = ud_insn_opr (&ud_obj, 0); opr = ud_insn_opr (&ud_obj, 1); if (opr->type == UD_OP_MEM && opr->base == UD_R_RIP && opr->index == UD_NONE && opr->scale == 0 && opr->offset == 32 && opr->size == 64 && opr0->type == UD_OP_REG) { f_cwd_ptr = (fcwd_access_t **) (ud_insn_off (&ud_obj) + ud_insn_len (&ud_obj) + opr->lval.sdword); reg = opr0->base; break; } } } /* Check that the next instruction tests if the fetched value is NULL. */ if (!ud_disassemble (&ud_obj) || ud_insn_mnemonic (&ud_obj) != UD_Itest) return NULL; opr = ud_insn_opr (&ud_obj, 0); if (opr->type != UD_OP_REG || opr->base != reg || memcmp (opr, ud_insn_opr (&ud_obj, 1), offsetof (ud_operand_t, _legacy))) return NULL; return f_cwd_ptr; }