On Sat, Aug 30, 2014 at 05:48:38PM +0000, John Baldwin wrote: > Author: jhb > Date: Sat Aug 30 17:48:38 2014 > New Revision: 270850 > URL: http://svnweb.freebsd.org/changeset/base/270850 > > Log: > Save and restore FPU state across suspend and resume. In earlier revisions > of this patch, resumectx() called npxresume() directly, but that doesn't > work because resumectx() runs with a non-standard %cs selector. Instead, > all of the FPU suspend/resume handling is done in C. > > MFC after: 1 week > > Modified: > head/sys/i386/i386/mp_machdep.c > head/sys/i386/i386/swtch.s > head/sys/i386/include/npx.h > head/sys/i386/include/pcb.h > head/sys/i386/isa/npx.c > head/sys/x86/acpica/acpi_wakeup.c > > Modified: head/sys/i386/i386/mp_machdep.c > ============================================================================== > --- head/sys/i386/i386/mp_machdep.c Sat Aug 30 17:39:28 2014 > (r270849) > +++ head/sys/i386/i386/mp_machdep.c Sat Aug 30 17:48:38 2014 > (r270850) > @@ -1522,9 +1522,15 @@ cpususpend_handler(void) > > cpu = PCPU_GET(cpuid); > if (savectx(susppcbs[cpu])) { > +#ifdef DEV_NPX > + npxsuspend(&suspcbs[cpu]->pcb_fpususpend); > +#endif > wbinvd(); > CPU_SET_ATOMIC(cpu, &suspended_cpus); > } else { > +#ifdef DEV_NPX > + npxresume(&suspcbs[cpu]->pcb_fpususpend); > +#endif > pmap_init_pat(); > PCPU_SET(switchtime, 0); > PCPU_SET(switchticks, ticks); > > Modified: head/sys/i386/i386/swtch.s > ============================================================================== > --- head/sys/i386/i386/swtch.s Sat Aug 30 17:39:28 2014 > (r270849) > +++ head/sys/i386/i386/swtch.s Sat Aug 30 17:48:38 2014 > (r270850) > @@ -416,45 +416,6 @@ ENTRY(savectx) > sldt PCB_LDT(%ecx) > str PCB_TR(%ecx) > > -#ifdef DEV_NPX > - /* > - * If fpcurthread == NULL, then the npx h/w state is irrelevant and the > - * state had better already be in the pcb. This is true for forks > - * but not for dumps (the old book-keeping with FP flags in the pcb > - * always lost for dumps because the dump pcb has 0 flags). > - * > - * If fpcurthread != NULL, then we have to save the npx h/w state to > - * fpcurthread's pcb and copy it to the requested pcb, or save to the > - * requested pcb and reload. Copying is easier because we would > - * have to handle h/w bugs for reloading. We used to lose the > - * parent's npx state for forks by forgetting to reload. > - */ > - pushfl > - CLI > - movl PCPU(FPCURTHREAD),%eax > - testl %eax,%eax > - je 1f > - > - pushl %ecx > - movl TD_PCB(%eax),%eax > - movl PCB_SAVEFPU(%eax),%eax > - pushl %eax > - pushl %eax > - call npxsave > - addl $4,%esp > - popl %eax > - popl %ecx > - > - pushl $PCB_SAVEFPU_SIZE > - leal PCB_USERFPU(%ecx),%ecx > - pushl %ecx > - pushl %eax > - call bcopy > - addl $12,%esp > -1: > - popfl > -#endif /* DEV_NPX */ > - > movl $1,%eax > ret > END(savectx) > @@ -519,10 +480,6 @@ ENTRY(resumectx) > movl PCB_DR7(%ecx),%eax > movl %eax,%dr7 > > -#ifdef DEV_NPX > - /* XXX FIX ME */ > -#endif > - > /* Restore other registers */ > movl PCB_EDI(%ecx),%edi > movl PCB_ESI(%ecx),%esi > > Modified: head/sys/i386/include/npx.h > ============================================================================== > --- head/sys/i386/include/npx.h Sat Aug 30 17:39:28 2014 > (r270849) > +++ head/sys/i386/include/npx.h Sat Aug 30 17:48:38 2014 > (r270850) > @@ -53,8 +53,10 @@ void npxexit(struct thread *td); > int npxformat(void); > int npxgetregs(struct thread *td); > void npxinit(void); > +void npxresume(union savefpu *addr); > void npxsave(union savefpu *addr); > void npxsetregs(struct thread *td, union savefpu *addr); > +void npxsuspend(union savefpu *addr); > int npxtrap_x87(void); > int npxtrap_sse(void); > void npxuserinited(struct thread *); > > Modified: head/sys/i386/include/pcb.h > ============================================================================== > --- head/sys/i386/include/pcb.h Sat Aug 30 17:39:28 2014 > (r270849) > +++ head/sys/i386/include/pcb.h Sat Aug 30 17:48:38 2014 > (r270850) > @@ -90,6 +90,8 @@ struct pcb { > struct region_descriptor pcb_idt; > uint16_t pcb_ldt; > uint16_t pcb_tr; > + > + union savefpu pcb_fpususpend; > }; Now pcb consumes 512 bytes from each thread' kernel stack, which mostly stay unused.
Amd64 only stores the pointer to the fpususpend context in pcb, and acpu_wakeup() allocates the memory as needed. Even this is a waste of 8 bytes which are not needed for normal kernel operations. Suspend FPU context, as well as amd64 MSRs should go out of pcb into some per-cpu suspend data block.
pgp2cvbEvK5RX.pgp
Description: PGP signature