The branch main has been updated by kib:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=df8dd6025af88a99d34f549fa9591a9b8f9b75b1

commit df8dd6025af88a99d34f549fa9591a9b8f9b75b1
Author:     Konstantin Belousov <k...@freebsd.org>
AuthorDate: 2021-09-13 21:05:47 +0000
Commit:     Konstantin Belousov <k...@freebsd.org>
CommitDate: 2021-09-21 17:20:15 +0000

    amd64: stop using top of the thread' kernel stack for FPU user save area
    
    Instead do one more allocation at the thread creation time.  This frees
    a lot of space on the stack.
    
    Also do not use alloca() for temporal storage in signal delivery sendsig()
    function and signal return syscall sys_sigreturn().  This saves equal
    amount of space, again by the cost of one more allocation at the thread
    creation time.
    
    A useful experiment now would be to reduce KSTACK_PAGES.
    
    Reviewed by:    jhb, markj
    Tested by:      pho
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D31954
---
 sys/amd64/amd64/exec_machdep.c |  4 ++--
 sys/amd64/amd64/fpu.c          |  2 ++
 sys/amd64/amd64/machdep.c      | 14 --------------
 sys/amd64/amd64/vm_machdep.c   | 22 +++++++++++++---------
 sys/amd64/ia32/ia32_signal.c   |  6 +++---
 sys/amd64/include/proc.h       |  2 ++
 sys/kern/kern_thread.c         |  2 +-
 7 files changed, 23 insertions(+), 29 deletions(-)

diff --git a/sys/amd64/amd64/exec_machdep.c b/sys/amd64/amd64/exec_machdep.c
index 1297117638d6..48bda05f9685 100644
--- a/sys/amd64/amd64/exec_machdep.c
+++ b/sys/amd64/amd64/exec_machdep.c
@@ -135,7 +135,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 
        if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
                xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
-               xfpusave = __builtin_alloca(xfpusave_len);
+               xfpusave = (char *)td->td_md.md_fpu_scratch;
        } else {
                xfpusave_len = 0;
                xfpusave = NULL;
@@ -674,7 +674,7 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
                if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
                    sizeof(struct savefpu))
                        return (EINVAL);
-               xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
+               xfpustate = (char *)td->td_md.md_fpu_scratch;
                ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
                    mcp->mc_xfpustate_len);
                if (ret != 0)
diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
index d7936b3b1922..24986958d4ca 100644
--- a/sys/amd64/amd64/fpu.c
+++ b/sys/amd64/amd64/fpu.c
@@ -448,6 +448,8 @@ fpuinitstate(void *arg __unused)
                    xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO);
        }
 
+       cpu_thread_alloc(&thread0);
+
        saveintr = intr_disable();
        stop_emulating();
 
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index d4e2356a9ae1..5c9b64526609 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -1258,7 +1258,6 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
        caddr_t kmdp;
        int gsel_tss, x;
        struct pcpu *pc;
-       struct xstate_hdr *xhdr;
        uint64_t cr3, rsp0;
        pml4_entry_t *pml4e;
        pdp_entry_t *pdpe;
@@ -1564,19 +1563,6 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
        msgbufinit(msgbufp, msgbufsize);
        fpuinit();
 
-       /*
-        * Reinitialize thread0's stack base now that the xsave area size is
-        * known.  Set up thread0's pcb save area after fpuinit calculated fpu
-        * save area size.  Zero out the extended state header in fpu save area.
-        */
-       set_top_of_stack_td(&thread0);
-       thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0);
-       bzero(thread0.td_pcb->pcb_save, cpu_max_ext_state_size);
-       if (use_xsave) {
-               xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
-                   1);
-               xhdr->xstate_bv = xsave_mask;
-       }
        /* make an initial tss so cpu can get interrupt stack on syscall! */
        rsp0 = thread0.td_md.md_stack_base;
        /* Ensure the stack is aligned to 16 bytes */
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index 4567e6e0eb5d..e42d16d61b3a 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -90,19 +90,17 @@ void
 set_top_of_stack_td(struct thread *td)
 {
        td->td_md.md_stack_base = td->td_kstack +
-           td->td_kstack_pages * PAGE_SIZE -
-           roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN);
+           td->td_kstack_pages * PAGE_SIZE;
 }
 
 struct savefpu *
 get_pcb_user_save_td(struct thread *td)
 {
-       vm_offset_t p;
-
-       p = td->td_md.md_stack_base;
-       KASSERT((p % XSAVE_AREA_ALIGN) == 0,
-           ("Unaligned pcb_user_save area ptr %#lx td %p", p, td));
-       return ((struct savefpu *)p);
+       KASSERT(((vm_offset_t)td->td_md.md_usr_fpu_save %
+           XSAVE_AREA_ALIGN) == 0,
+           ("Unaligned pcb_user_save area ptr %p td %p",
+           td->td_md.md_usr_fpu_save, td));
+       return (td->td_md.md_usr_fpu_save);
 }
 
 struct pcb *
@@ -393,6 +391,8 @@ cpu_thread_alloc(struct thread *td)
        set_top_of_stack_td(td);
        td->td_pcb = pcb = get_pcb_td(td);
        td->td_frame = (struct trapframe *)td->td_md.md_stack_base - 1;
+       td->td_md.md_usr_fpu_save = fpu_save_area_alloc();
+       td->td_md.md_fpu_scratch = fpu_save_area_alloc();
        pcb->pcb_save = get_pcb_user_save_pcb(pcb);
        if (use_xsave) {
                xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1);
@@ -404,8 +404,12 @@ cpu_thread_alloc(struct thread *td)
 void
 cpu_thread_free(struct thread *td)
 {
-
        cpu_thread_clean(td);
+
+       fpu_save_area_free(td->td_md.md_usr_fpu_save);
+       td->td_md.md_usr_fpu_save = NULL;
+       fpu_save_area_free(td->td_md.md_fpu_scratch);
+       td->td_md.md_fpu_scratch = NULL;
 }
 
 bool
diff --git a/sys/amd64/ia32/ia32_signal.c b/sys/amd64/ia32/ia32_signal.c
index 49b5797d68fd..9b67c7001a87 100644
--- a/sys/amd64/ia32/ia32_signal.c
+++ b/sys/amd64/ia32/ia32_signal.c
@@ -210,7 +210,7 @@ ia32_set_mcontext(struct thread *td, struct ia32_mcontext 
*mcp)
                if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
                    sizeof(struct savefpu))
                        return (EINVAL);
-               xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
+               xfpustate = (char *)td->td_md.md_fpu_scratch;
                ret = copyin(PTRIN(mcp->mc_xfpustate), xfpustate,
                    mcp->mc_xfpustate_len);
                if (ret != 0)
@@ -579,7 +579,7 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 
        if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
                xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
-               xfpusave = __builtin_alloca(xfpusave_len);
+               xfpusave = (char *)td->td_md.md_fpu_scratch;
        } else {
                xfpusave_len = 0;
                xfpusave = NULL;
@@ -882,7 +882,7 @@ freebsd32_sigreturn(td, uap)
                            td->td_proc->p_pid, td->td_name, xfpustate_len);
                        return (EINVAL);
                }
-               xfpustate = __builtin_alloca(xfpustate_len);
+               xfpustate = (char *)td->td_md.md_fpu_scratch;
                error = copyin(PTRIN(ucp->uc_mcontext.mc_xfpustate),
                    xfpustate, xfpustate_len);
                if (error != 0) {
diff --git a/sys/amd64/include/proc.h b/sys/amd64/include/proc.h
index 0f8cf50e326d..bd07f70f8d44 100644
--- a/sys/amd64/include/proc.h
+++ b/sys/amd64/include/proc.h
@@ -75,6 +75,8 @@ struct mdthread {
        int     md_efirt_dis_pf;        /* (k) */
        struct pcb md_pcb;
        vm_offset_t md_stack_base;
+       struct savefpu *md_usr_fpu_save;
+       struct savefpu *md_fpu_scratch;
 };
 
 struct mdproc {
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index 65c5cc65c87e..62f939406374 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -91,7 +91,7 @@ _Static_assert(offsetof(struct thread, td_pflags) == 0x110,
     "struct thread KBI td_pflags");
 _Static_assert(offsetof(struct thread, td_frame) == 0x4a8,
     "struct thread KBI td_frame");
-_Static_assert(offsetof(struct thread, td_emuldata) == 0x6b0,
+_Static_assert(offsetof(struct thread, td_emuldata) == 0x6c0,
     "struct thread KBI td_emuldata");
 _Static_assert(offsetof(struct proc, p_flag) == 0xb8,
     "struct proc KBI p_flag");
_______________________________________________
dev-commits-src-main@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"

Reply via email to