On Thu, 19 Jun 2025, Jeremy Drake via Cygwin-patches wrote: > On Thu, 19 Jun 2025, Thirumalai Nagalingam wrote: > > > Hi Jeremy, > > > > Thanks again for the quick follow-up. `ldr` is the correct choice here, > > it's a nice idea for reducing loads. > > I've updated the patch to use it for loading stackaddr and stackbase. > > Also added the Signed-off-by line to the commit message as requested. > > > > Patch is In-lined below and attached. > > > > In-lined patch: > > > > From 609cc27fa50700ab135dff421f08473c29dcb533 Mon Sep 17 00:00:00 2001 > > From: Thirumalai Nagalingam <[email protected]> > > Date: Fri, 20 Jun 2025 02:12:51 +0530 > > Subject: [PATCH] Aarch64: Add inline assembly pthread wrapper > > > > This patch adds AArch64-specific inline assembly block for the pthread > > wrapper used to bootstrap new threads. It sets up the thread stack, > > adjusts for __CYGTLS_PADSIZE__, releases the original stack via > > VirtualFree, and invokes the target thread function. > > > > Signed-off-by: Thirumalai Nagalingam > > <[email protected]> > > --- > > winsup/cygwin/create_posix_thread.cc | 18 +++++++++++++++++- > > 1 file changed, 17 insertions(+), 1 deletion(-) > > > > diff --git a/winsup/cygwin/create_posix_thread.cc > > b/winsup/cygwin/create_posix_thread.cc > > index 3fcd61707..592aaf1a5 100644 > > --- a/winsup/cygwin/create_posix_thread.cc > > +++ b/winsup/cygwin/create_posix_thread.cc > > @@ -75,7 +75,7 @@ pthread_wrapper (PVOID arg) > > /* Initialize new _cygtls. */ > > _my_tls.init_thread (wrapper_arg.stackbase - __CYGTLS_PADSIZE__, > > (DWORD (*)(void*, void*)) wrapper_arg.func); > > -#ifdef __x86_64__ > > +#if defined(__x86_64__) > > __asm__ ("\n\ > > leaq %[WRAPPER_ARG], %%rbx # Load &wrapper_arg into rbx \n\ > > movq (%%rbx), %%r12 # Load thread func into r12 \n\ > > @@ -99,6 +99,22 @@ pthread_wrapper (PVOID arg) > > call *%%r12 # Call thread func \n" > > : : [WRAPPER_ARG] "o" (wrapper_arg), > > [CYGTLS] "i" (__CYGTLS_PADSIZE__)); > > +#elif defined(__aarch64__) > > + /* Sets up a new thread stack, frees the original OS stack, > > + * and calls the thread function with its arg using AArch64 ABI. */ > > + __asm__ __volatile__ ("\n\ > > + mov x19, %[WRAPPER_ARG] // x19 = &wrapper_arg \n\ > > + ldp x0, x10, [x19, #16] // x0 = stackaddr, x10 = stackbase \n\ > > + sub sp, x10, %[CYGTLS] // sp = stackbase - (CYGTLS) \n\ > > + mov fp, xzr // clear frame pointer (x29) \n\ > > + mov x1, xzr // x1 = 0 (dwSize) \n\ > > + mov x2, #0x8000 // x2 = MEM_RELEASE \n\ > > + bl VirtualFree // free original stack \n\ > > + ldp x19, x0, [x19] // x19 = func, x0 = arg \n\ > > + blr x19 // call thread function \n" > > + : : [WRAPPER_ARG] "r" (&wrapper_arg), > > + [CYGTLS] "r" (__CYGTLS_PADSIZE__) > > + : "x0", "x1", "x2", "x10", "x19", "x29", "memory"); > > #else > > #error unimplemented for this target > > #endif > > > > LGTM. I'll wait at least a day before pushing in case somebody else has > any objections.
Pushed now, thanks
