Author: jhb
Date: Sun Nov  2 22:58:30 2014
New Revision: 273995
URL: https://svnweb.freebsd.org/changeset/base/273995

Log:
  MFamd64: Add support for extended FPU states on i386.  This includes
  support for AVX on i386.
  - Similar to amd64, move the FPU save area out of the PCB and instead
    store saved FPU state in a variable-sized buffer after the PCB on the
    stack.
  - To support the variable PCB location, alter the locore code to only use
    the bottom-most page of proc0stack for init386().  init386() returns
    the correct stack pointer to locore which adjusts the stack for thread0
    before calling mi_startup().
  - Don't bother setting cr3 in thread0's pcb in locore before calling
    init386().  It wasn't used (init386() overwrote it at the end) and
    it doesn't work with the variable-sized FPU save area.
  - Remove the new-bus attachment from npx.  This was only ever useful for
    external co-processors using IRQ13, but those have not been supported
    for several years.  npxinit() is now called much earlier during boot
    (init386()) similar to amd64.
  - Implement PT_{GET,SET}XSTATE and I386_GET_XFPUSTATE.
  - npxsave() is now only called from context switch contexts so it can
    use XSAVEOPT.
  
  Differential Revision:        https://reviews.freebsd.org/D1058
  Reviewed by:  kib
  Tested on:    FreeBSD/i386 VM under bhyve on Intel i5-2520

Modified:
  head/sys/amd64/amd64/genassym.c
  head/sys/amd64/amd64/sys_machdep.c
  head/sys/amd64/amd64/vm_machdep.c
  head/sys/i386/i386/genassym.c
  head/sys/i386/i386/initcpu.c
  head/sys/i386/i386/locore.s
  head/sys/i386/i386/machdep.c
  head/sys/i386/i386/mp_machdep.c
  head/sys/i386/i386/ptrace_machdep.c
  head/sys/i386/i386/sys_machdep.c
  head/sys/i386/i386/trap.c
  head/sys/i386/i386/vm_machdep.c
  head/sys/i386/include/cpufunc.h
  head/sys/i386/include/md_var.h
  head/sys/i386/include/npx.h
  head/sys/i386/include/pcb.h
  head/sys/i386/isa/npx.c
  head/sys/i386/linux/linux_ptrace.c
  head/sys/x86/acpica/acpi_wakeup.c

Modified: head/sys/amd64/amd64/genassym.c
==============================================================================
--- head/sys/amd64/amd64/genassym.c     Sun Nov  2 22:42:19 2014        
(r273994)
+++ head/sys/amd64/amd64/genassym.c     Sun Nov  2 22:58:30 2014        
(r273995)
@@ -156,8 +156,6 @@ ASSYM(PCB_ONFAULT, offsetof(struct pcb, 
 ASSYM(PCB_GS32SD, offsetof(struct pcb, pcb_gs32sd));
 ASSYM(PCB_TSSP, offsetof(struct pcb, pcb_tssp));
 ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
-ASSYM(PCB_SAVEFPU_SIZE, sizeof(struct savefpu));
-ASSYM(PCB_USERFPU, sizeof(struct pcb));
 ASSYM(PCB_EFER, offsetof(struct pcb, pcb_efer));
 ASSYM(PCB_STAR, offsetof(struct pcb, pcb_star));
 ASSYM(PCB_LSTAR, offsetof(struct pcb, pcb_lstar));

Modified: head/sys/amd64/amd64/sys_machdep.c
==============================================================================
--- head/sys/amd64/amd64/sys_machdep.c  Sun Nov  2 22:42:19 2014        
(r273994)
+++ head/sys/amd64/amd64/sys_machdep.c  Sun Nov  2 22:58:30 2014        
(r273995)
@@ -319,7 +319,7 @@ sysarch(td, uap)
                fpugetregs(td);
                error = copyout((char *)(get_pcb_user_save_td(td) + 1),
                    a64xfpu.addr, a64xfpu.len);
-               return (error);
+               break;
 
        default:
                error = EINVAL;

Modified: head/sys/amd64/amd64/vm_machdep.c
==============================================================================
--- head/sys/amd64/amd64/vm_machdep.c   Sun Nov  2 22:42:19 2014        
(r273994)
+++ head/sys/amd64/amd64/vm_machdep.c   Sun Nov  2 22:58:30 2014        
(r273995)
@@ -127,7 +127,7 @@ get_pcb_td(struct thread *td)
 void *
 alloc_fpusave(int flags)
 {
-       struct pcb *res;
+       void *res;
        struct savefpu_ymm *sf;
 
        res = malloc(cpu_max_ext_state_size, M_DEVBUF, flags);

Modified: head/sys/i386/i386/genassym.c
==============================================================================
--- head/sys/i386/i386/genassym.c       Sun Nov  2 22:42:19 2014        
(r273994)
+++ head/sys/i386/i386/genassym.c       Sun Nov  2 22:58:30 2014        
(r273995)
@@ -144,7 +144,6 @@ ASSYM(PCB_DR2, offsetof(struct pcb, pcb_
 ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3));
 ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6));
 ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
-ASSYM(PCB_USERFPU, offsetof(struct pcb, pcb_user_save));
 ASSYM(PCB_PSL, offsetof(struct pcb, pcb_psl));
 ASSYM(PCB_DBREGS, PCB_DBREGS);
 ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
@@ -154,7 +153,6 @@ ASSYM(PCB_GSD, offsetof(struct pcb, pcb_
 ASSYM(PCB_VM86, offsetof(struct pcb, pcb_vm86));
 ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
 ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
-ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu));
 ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
 
 ASSYM(PCB_SIZE, sizeof(struct pcb));

Modified: head/sys/i386/i386/initcpu.c
==============================================================================
--- head/sys/i386/i386/initcpu.c        Sun Nov  2 22:42:19 2014        
(r273994)
+++ head/sys/i386/i386/initcpu.c        Sun Nov  2 22:58:30 2014        
(r273995)
@@ -102,6 +102,7 @@ u_int       cpu_mxcsr_mask;         /* Valid bits in 
 #endif
 u_int  cpu_clflush_line_size = 32;
 u_int  cpu_stdext_feature;
+u_int  cpu_max_ext_state_size;
 u_int  cpu_mon_mwait_flags;    /* MONITOR/MWAIT flags (CPUID.05H.ECX) */
 u_int  cpu_mon_min_size;       /* MONITOR minimum range size, bytes */
 u_int  cpu_mon_max_size;       /* MONITOR minimum range size, bytes */

Modified: head/sys/i386/i386/locore.s
==============================================================================
--- head/sys/i386/i386/locore.s Sun Nov  2 22:42:19 2014        (r273994)
+++ head/sys/i386/i386/locore.s Sun Nov  2 22:58:30 2014        (r273995)
@@ -302,17 +302,14 @@ NON_GPROF_ENTRY(btext)
 begin:
        /* set up bootstrap stack */
        movl    proc0kstack,%eax        /* location of in-kernel stack */
-                       /* bootstrap stack end location */
-       leal    (KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp
 
-       xorl    %ebp,%ebp               /* mark end of frames */
+       /*
+        * Only use bottom page for init386().  init386() calculates the
+        * PCB + FPU save area size and returns the true top of stack.
+        */
+       leal    PAGE_SIZE(%eax),%esp
 
-#ifdef PAE
-       movl    IdlePDPT,%esi
-#else
-       movl    IdlePTD,%esi
-#endif
-       movl    %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
+       xorl    %ebp,%ebp               /* mark end of frames */
 
        pushl   physfree                /* value of first for init386(first) */
        call    init386                 /* wire 386 chip for unix operation */
@@ -324,6 +321,9 @@ begin:
         */
        addl    $4,%esp
 
+       /* Switch to true top of stack. */
+       movl    %eax,%esp
+
        call    mi_startup              /* autoconfiguration, mountroot etc */
        /* NOTREACHED */
        addl    $0,%esp                 /* for db_numargs() again */

Modified: head/sys/i386/i386/machdep.c
==============================================================================
--- head/sys/i386/i386/machdep.c        Sun Nov  2 22:42:19 2014        
(r273994)
+++ head/sys/i386/i386/machdep.c        Sun Nov  2 22:58:30 2014        
(r273995)
@@ -181,7 +181,7 @@ extern unsigned long physfree;
 /* Sanity check for __curthread() */
 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
 
-extern void init386(int first);
+extern register_t init386(int first);
 extern void dblfault_handler(void);
 
 #define        CS_SECURE(cs)           (ISPL(cs) == SEL_UPL)
@@ -193,8 +193,10 @@ extern void dblfault_handler(void);
 
 static void cpu_startup(void *);
 static void fpstate_drop(struct thread *td);
-static void get_fpcontext(struct thread *td, mcontext_t *mcp);
-static int  set_fpcontext(struct thread *td, const mcontext_t *mcp);
+static void get_fpcontext(struct thread *td, mcontext_t *mcp,
+    char *xfpusave, size_t xfpusave_len);
+static int  set_fpcontext(struct thread *td, const mcontext_t *mcp,
+    char *xfpustate, size_t xfpustate_len);
 #ifdef CPU_ENABLE_SSE
 static void set_fpregs_xmm(struct save87 *, struct savexmm *);
 static void fill_fpregs_xmm(struct savexmm *, struct save87 *);
@@ -363,7 +365,7 @@ cpu_startup(dummy)
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
- * at top to call routine, followed by kcall
+ * at top to call routine, followed by call
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
@@ -642,6 +644,8 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, 
        char *sp;
        struct trapframe *regs;
        struct segment_descriptor *sdp;
+       char *xfpusave;
+       size_t xfpusave_len;
        int sig;
        int oonstack;
 
@@ -666,6 +670,14 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, 
        regs = td->td_frame;
        oonstack = sigonstack(regs->tf_esp);
 
+       if (cpu_max_ext_state_size > sizeof(union savefpu) && use_xsave) {
+               xfpusave_len = cpu_max_ext_state_size - sizeof(union savefpu);
+               xfpusave = __builtin_alloca(xfpusave_len);
+       } else {
+               xfpusave_len = 0;
+               xfpusave = NULL;
+       }
+
        /* Save user context. */
        bzero(&sf, sizeof(sf));
        sf.sf_uc.uc_sigmask = *mask;
@@ -676,7 +688,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, 
        sf.sf_uc.uc_mcontext.mc_gs = rgs();
        bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
        sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
-       get_fpcontext(td, &sf.sf_uc.uc_mcontext);
+       get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
        fpstate_drop(td);
        /*
         * Unconditionally fill the fsbase and gsbase into the mcontext.
@@ -687,7 +699,6 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, 
        sdp = &td->td_pcb->pcb_gsd;
        sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 |
            sdp->sd_lobase;
-       sf.sf_uc.uc_mcontext.mc_flags = 0;
        bzero(sf.sf_uc.uc_mcontext.mc_spare2,
            sizeof(sf.sf_uc.uc_mcontext.mc_spare2));
        bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
@@ -695,13 +706,19 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, 
        /* Allocate space for the signal handler context. */
        if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
            SIGISMEMBER(psp->ps_sigonstack, sig)) {
-               sp = td->td_sigstk.ss_sp +
-                   td->td_sigstk.ss_size - sizeof(struct sigframe);
+               sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
 #if defined(COMPAT_43)
                td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
        } else
-               sp = (char *)regs->tf_esp - sizeof(struct sigframe);
+               sp = (char *)regs->tf_esp - 128;
+       if (xfpusave != NULL) {
+               sp -= xfpusave_len;
+               sp = (char *)((unsigned int)sp & ~0x3F);
+               sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
+       }
+       sp -= sizeof(struct sigframe);
+
        /* Align to 16 bytes. */
        sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
 
@@ -762,7 +779,10 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, 
        /*
         * Copy the sigframe out to the user's stack.
         */
-       if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
+       if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
+           (xfpusave != NULL && copyout(xfpusave,
+           (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
+           != 0)) {
 #ifdef DEBUG
                printf("process %ld has trashed its stack\n", (long)p->p_pid);
 #endif
@@ -1022,11 +1042,16 @@ sys_sigreturn(td, uap)
        } */ *uap;
 {
        ucontext_t uc;
+       struct proc *p;
        struct trapframe *regs;
        ucontext_t *ucp;
+       char *xfpustate;
+       size_t xfpustate_len;
        int cs, eflags, error, ret;
        ksiginfo_t ksi;
 
+       p = td->td_proc;
+
        error = copyin(uap->sigcntxp, &uc, sizeof(uc));
        if (error != 0)
                return (error);
@@ -1101,7 +1126,30 @@ sys_sigreturn(td, uap)
                        return (EINVAL);
                }
 
-               ret = set_fpcontext(td, &ucp->uc_mcontext);
+               if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
+                       xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
+                       if (xfpustate_len > cpu_max_ext_state_size -
+                           sizeof(union savefpu)) {
+                               uprintf(
+                           "pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
+                                   p->p_pid, td->td_name, xfpustate_len);
+                               return (EINVAL);
+                       }
+                       xfpustate = __builtin_alloca(xfpustate_len);
+                       error = copyin((const void 
*)uc.uc_mcontext.mc_xfpustate,
+                           xfpustate, xfpustate_len);
+                       if (error != 0) {
+                               uprintf(
+       "pid %d (%s): sigreturn copying xfpustate failed\n",
+                                   p->p_pid, td->td_name);
+                               return (error);
+                       }
+               } else {
+                       xfpustate = NULL;
+                       xfpustate_len = 0;
+               }
+               ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate,
+                   xfpustate_len);
                if (ret != 0)
                        return (ret);
                bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
@@ -1599,7 +1647,7 @@ exec_setregs(struct thread *td, struct i
                         */
                        reset_dbregs();
                 }
-                pcb->pcb_flags &= ~PCB_DBREGS;
+               pcb->pcb_flags &= ~PCB_DBREGS;
         }
 
        pcb->pcb_initial_npxcw = __INITIAL_NPXCW__;
@@ -2853,14 +2901,14 @@ do_next:
 #ifdef XEN
 #define MTOPSIZE (1<<(14 + PAGE_SHIFT))
 
-void
+register_t
 init386(first)
        int first;
 {
        unsigned long gdtmachpfn;
        int error, gsel_tss, metadata_missing, x, pa;
-       size_t kstack0_sz;
        struct pcpu *pc;
+       struct xstate_hdr *xhdr;
        struct callback_register event = {
                .type = CALLBACKTYPE_event,
                .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned 
long)Xhypervisor_callback },
@@ -2872,8 +2920,6 @@ init386(first)
 
        thread0.td_kstack = proc0kstack;
        thread0.td_kstack_pages = KSTACK_PAGES;
-       kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE;
-       thread0.td_pcb = (struct pcb *)(thread0.td_kstack + kstack0_sz) - 1;
 
        /*
         * This may be done better later if it gets more high level
@@ -2953,7 +2999,6 @@ init386(first)
 
        PCPU_SET(prvspace, pc);
        PCPU_SET(curthread, &thread0);
-       PCPU_SET(curpcb, thread0.td_pcb);
 
        /*
         * Initialize mutexes.
@@ -3035,15 +3080,6 @@ init386(first)
        initializecpu();        /* Initialize CPU registers */
        initializecpucache();
 
-       /* make an initial tss so cpu can get interrupt stack on syscall! */
-       /* Note: -16 is so we can grow the trapframe if we came from vm86 */
-       PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
-           kstack0_sz - sizeof(struct pcb) - 16);
-       PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
-       gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
-       HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL),
-           PCPU_GET(common_tss.tss_esp0));
-       
        /* pointer to selector slot for %fs/%gs */
        PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
 
@@ -3071,6 +3107,30 @@ init386(first)
        /* now running on new page tables, configured,and u/iom is accessible */
 
        msgbufinit(msgbufp, msgbufsize);
+#ifdef DEV_NPX
+       npxinit(true);
+#endif
+       /*
+        * Set up thread0 pcb after npxinit calculated pcb + fpu save
+        * area size.  Zero out the extended state header in fpu save
+        * area.
+        */
+       thread0.td_pcb = get_pcb_td(&thread0);
+       bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
+       if (use_xsave) {
+               xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
+                   1);
+               xhdr->xstate_bv = xsave_mask;
+       }
+       PCPU_SET(curpcb, thread0.td_pcb);
+       /* make an initial tss so cpu can get interrupt stack on syscall! */
+       /* Note: -16 is so we can grow the trapframe if we came from vm86 */
+       PCPU_SET(common_tss.tss_esp0, (vm_offset_t)thread0.td_pcb - 16);
+       PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
+       gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
+       HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL),
+           PCPU_GET(common_tss.tss_esp0));
+       
        /* transfer to user mode */
 
        _ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
@@ -3089,22 +3149,23 @@ init386(first)
        thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1];
 
        cpu_probe_amdc1e();
+
+       /* Location of kernel stack for locore */
+       return ((register_t)thread0.td_pcb);
 }
 
 #else
-void
+register_t
 init386(first)
        int first;
 {
        struct gate_descriptor *gdp;
        int gsel_tss, metadata_missing, x, pa;
-       size_t kstack0_sz;
        struct pcpu *pc;
+       struct xstate_hdr *xhdr;
 
        thread0.td_kstack = proc0kstack;
        thread0.td_kstack_pages = KSTACK_PAGES;
-       kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE;
-       thread0.td_pcb = (struct pcb *)(thread0.td_kstack + kstack0_sz) - 1;
 
        /*
         * This may be done better later if it gets more high level
@@ -3165,7 +3226,6 @@ init386(first)
        first += DPCPU_SIZE;
        PCPU_SET(prvspace, pc);
        PCPU_SET(curthread, &thread0);
-       PCPU_SET(curpcb, thread0.td_pcb);
 
        /*
         * Initialize mutexes.
@@ -3320,17 +3380,6 @@ init386(first)
        initializecpu();        /* Initialize CPU registers */
        initializecpucache();
 
-       /* make an initial tss so cpu can get interrupt stack on syscall! */
-       /* Note: -16 is so we can grow the trapframe if we came from vm86 */
-       PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
-           kstack0_sz - sizeof(struct pcb) - 16);
-       PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
-       gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
-       PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
-       PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
-       PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
-       ltr(gsel_tss);
-
        /* pointer to selector slot for %fs/%gs */
        PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
 
@@ -3358,6 +3407,31 @@ init386(first)
        /* now running on new page tables, configured,and u/iom is accessible */
 
        msgbufinit(msgbufp, msgbufsize);
+#ifdef DEV_NPX
+       npxinit(true);
+#endif
+       /*
+        * Set up thread0 pcb after npxinit calculated pcb + fpu save
+        * area size.  Zero out the extended state header in fpu save
+        * area.
+        */
+       thread0.td_pcb = get_pcb_td(&thread0);
+       bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
+       if (use_xsave) {
+               xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
+                   1);
+               xhdr->xstate_bv = xsave_mask;
+       }
+       PCPU_SET(curpcb, thread0.td_pcb);
+       /* make an initial tss so cpu can get interrupt stack on syscall! */
+       /* Note: -16 is so we can grow the trapframe if we came from vm86 */
+       PCPU_SET(common_tss.tss_esp0, (vm_offset_t)thread0.td_pcb - 16);
+       PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
+       gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
+       PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
+       PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
+       PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
+       ltr(gsel_tss);
 
        /* make a call gate to reenter kernel with */
        gdp = &ldt[LSYS5CALLS_SEL].gd;
@@ -3396,6 +3470,9 @@ init386(first)
 #ifdef FDT
        x86_init_fdt();
 #endif
+
+       /* Location of kernel stack for locore */
+       return ((register_t)thread0.td_pcb);
 }
 #endif
 
@@ -3678,11 +3755,11 @@ fill_fpregs(struct thread *td, struct fp
 #endif
 #ifdef CPU_ENABLE_SSE
        if (cpu_fxsr)
-               fill_fpregs_xmm(&td->td_pcb->pcb_user_save.sv_xmm,
+               fill_fpregs_xmm(&get_pcb_user_save_td(td)->sv_xmm,
                    (struct save87 *)fpregs);
        else
 #endif /* CPU_ENABLE_SSE */
-               bcopy(&td->td_pcb->pcb_user_save.sv_87, fpregs,
+               bcopy(&get_pcb_user_save_td(td)->sv_87, fpregs,
                    sizeof(*fpregs));
        return (0);
 }
@@ -3694,10 +3771,10 @@ set_fpregs(struct thread *td, struct fpr
 #ifdef CPU_ENABLE_SSE
        if (cpu_fxsr)
                set_fpregs_xmm((struct save87 *)fpregs,
-                   &td->td_pcb->pcb_user_save.sv_xmm);
+                   &get_pcb_user_save_td(td)->sv_xmm);
        else
 #endif /* CPU_ENABLE_SSE */
-               bcopy(fpregs, &td->td_pcb->pcb_user_save.sv_87,
+               bcopy(fpregs, &get_pcb_user_save_td(td)->sv_87,
                    sizeof(*fpregs));
 #ifdef DEV_NPX
        npxuserinited(td);
@@ -3743,12 +3820,14 @@ get_mcontext(struct thread *td, mcontext
        mcp->mc_esp = tp->tf_esp;
        mcp->mc_ss = tp->tf_ss;
        mcp->mc_len = sizeof(*mcp);
-       get_fpcontext(td, mcp);
+       get_fpcontext(td, mcp, NULL, 0);
        sdp = &td->td_pcb->pcb_fsd;
        mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
        sdp = &td->td_pcb->pcb_gsd;
        mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
        mcp->mc_flags = 0;
+       mcp->mc_xfpustate = 0;
+       mcp->mc_xfpustate_len = 0;
        bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2));
        return (0);
 }
@@ -3763,6 +3842,7 @@ int
 set_mcontext(struct thread *td, const mcontext_t *mcp)
 {
        struct trapframe *tp;
+       char *xfpustate;
        int eflags, ret;
 
        tp = td->td_frame;
@@ -3770,30 +3850,43 @@ set_mcontext(struct thread *td, const mc
                return (EINVAL);
        eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
            (tp->tf_eflags & ~PSL_USERCHANGE);
-       if ((ret = set_fpcontext(td, mcp)) == 0) {
-               tp->tf_fs = mcp->mc_fs;
-               tp->tf_es = mcp->mc_es;
-               tp->tf_ds = mcp->mc_ds;
-               tp->tf_edi = mcp->mc_edi;
-               tp->tf_esi = mcp->mc_esi;
-               tp->tf_ebp = mcp->mc_ebp;
-               tp->tf_ebx = mcp->mc_ebx;
-               tp->tf_edx = mcp->mc_edx;
-               tp->tf_ecx = mcp->mc_ecx;
-               tp->tf_eax = mcp->mc_eax;
-               tp->tf_eip = mcp->mc_eip;
-               tp->tf_eflags = eflags;
-               tp->tf_esp = mcp->mc_esp;
-               tp->tf_ss = mcp->mc_ss;
-               td->td_pcb->pcb_gs = mcp->mc_gs;
-               ret = 0;
-       }
-       return (ret);
+       if (mcp->mc_flags & _MC_HASFPXSTATE) {
+               if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
+                   sizeof(union savefpu))
+                       return (EINVAL);
+               xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
+               ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
+                   mcp->mc_xfpustate_len);
+               if (ret != 0)
+                       return (ret);
+       } else
+               xfpustate = NULL;
+       ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
+       if (ret != 0)
+               return (ret);
+       tp->tf_fs = mcp->mc_fs;
+       tp->tf_es = mcp->mc_es;
+       tp->tf_ds = mcp->mc_ds;
+       tp->tf_edi = mcp->mc_edi;
+       tp->tf_esi = mcp->mc_esi;
+       tp->tf_ebp = mcp->mc_ebp;
+       tp->tf_ebx = mcp->mc_ebx;
+       tp->tf_edx = mcp->mc_edx;
+       tp->tf_ecx = mcp->mc_ecx;
+       tp->tf_eax = mcp->mc_eax;
+       tp->tf_eip = mcp->mc_eip;
+       tp->tf_eflags = eflags;
+       tp->tf_esp = mcp->mc_esp;
+       tp->tf_ss = mcp->mc_ss;
+       td->td_pcb->pcb_gs = mcp->mc_gs;
+       return (0);
 }
 
 static void
-get_fpcontext(struct thread *td, mcontext_t *mcp)
+get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
+    size_t xfpusave_len)
 {
+       size_t max_len, len;
 
 #ifndef DEV_NPX
        mcp->mc_fpformat = _MC_FPFMT_NODEV;
@@ -3801,37 +3894,54 @@ get_fpcontext(struct thread *td, mcontex
        bzero(mcp->mc_fpstate, sizeof(mcp->mc_fpstate));
 #else
        mcp->mc_ownedfp = npxgetregs(td);
-       bcopy(&td->td_pcb->pcb_user_save, &mcp->mc_fpstate[0],
+       bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
            sizeof(mcp->mc_fpstate));
        mcp->mc_fpformat = npxformat();
+       if (!use_xsave || xfpusave_len == 0)
+               return;
+       max_len = cpu_max_ext_state_size - sizeof(union savefpu);
+       len = xfpusave_len;
+       if (len > max_len) {
+               len = max_len;
+               bzero(xfpusave + max_len, len - max_len);
+       }
+       mcp->mc_flags |= _MC_HASFPXSTATE;
+       mcp->mc_xfpustate_len = len;
+       bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
 #endif
 }
 
 static int
-set_fpcontext(struct thread *td, const mcontext_t *mcp)
+set_fpcontext(struct thread *td, const mcontext_t *mcp, char *xfpustate,
+    size_t xfpustate_len)
 {
+       union savefpu *fpstate;
+       int error;
 
        if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
                return (0);
        else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
            mcp->mc_fpformat != _MC_FPFMT_XMM)
                return (EINVAL);
-       else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE)
+       else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
                /* We don't care what state is left in the FPU or PCB. */
                fpstate_drop(td);
-       else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
+               error = 0;
+       } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
            mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 #ifdef DEV_NPX
+               fpstate = (union savefpu *)&mcp->mc_fpstate;
 #ifdef CPU_ENABLE_SSE
                if (cpu_fxsr)
-                       ((union savefpu *)&mcp->mc_fpstate)->sv_xmm.sv_env.
-                           en_mxcsr &= cpu_mxcsr_mask;
+                       fpstate->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask;
 #endif
-               npxsetregs(td, (union savefpu *)&mcp->mc_fpstate);
+               error = npxsetregs(td, fpstate, xfpustate, xfpustate_len);
+#else
+               error = EINVAL;
 #endif
        } else
                return (EINVAL);
-       return (0);
+       return (error);
 }
 
 static void

Modified: head/sys/i386/i386/mp_machdep.c
==============================================================================
--- head/sys/i386/i386/mp_machdep.c     Sun Nov  2 22:42:19 2014        
(r273994)
+++ head/sys/i386/i386/mp_machdep.c     Sun Nov  2 22:58:30 2014        
(r273995)
@@ -749,7 +749,7 @@ init_secondary(void)
        initializecpu();
 
        /* set up FPU state on the AP */
-       npxinit();
+       npxinit(false);
 
        if (cpu_ops.cpu_init)
                cpu_ops.cpu_init();
@@ -1512,11 +1512,11 @@ cpususpend_handler(void)
 
        cpu = PCPU_GET(cpuid);
        if (savectx(&susppcbs[cpu]->sp_pcb)) {
-               npxsuspend(&susppcbs[cpu]->sp_fpususpend);
+               npxsuspend(susppcbs[cpu]->sp_fpususpend);
                wbinvd();
                CPU_SET_ATOMIC(cpu, &suspended_cpus);
        } else {
-               npxresume(&susppcbs[cpu]->sp_fpususpend);
+               npxresume(susppcbs[cpu]->sp_fpususpend);
                pmap_init_pat();
                initializecpu();
                PCPU_SET(switchtime, 0);

Modified: head/sys/i386/i386/ptrace_machdep.c
==============================================================================
--- head/sys/i386/i386/ptrace_machdep.c Sun Nov  2 22:42:19 2014        
(r273994)
+++ head/sys/i386/i386/ptrace_machdep.c Sun Nov  2 22:58:30 2014        
(r273995)
@@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <machine/md_var.h>
@@ -41,6 +42,47 @@ __FBSDID("$FreeBSD$");
 #define CPU_ENABLE_SSE
 #endif
 
+#ifdef CPU_ENABLE_SSE
+static int
+cpu_ptrace_xstate(struct thread *td, int req, void *addr, int data)
+{
+       char *savefpu;
+       int error;
+
+       if (!use_xsave)
+               return (EOPNOTSUPP);
+
+       switch (req) {
+       case PT_GETXSTATE:
+               npxgetregs(td);
+               savefpu = (char *)(get_pcb_user_save_td(td) + 1);
+               error = copyout(savefpu, addr,
+                   cpu_max_ext_state_size - sizeof(union savefpu));
+               break;
+
+       case PT_SETXSTATE:
+               if (data > cpu_max_ext_state_size - sizeof(union savefpu)) {
+                       error = EINVAL;
+                       break;
+               }
+               savefpu = malloc(data, M_TEMP, M_WAITOK);
+               error = copyin(addr, savefpu, data);
+               if (error == 0) {
+                       npxgetregs(td);
+                       error = npxsetxstate(td, savefpu, data);
+               }
+               free(savefpu, M_TEMP);
+               break;
+
+       default:
+               error = EINVAL;
+               break;
+       }
+
+       return (error);
+}
+#endif
+
 int
 cpu_ptrace(struct thread *td, int req, void *addr, int data)
 {
@@ -51,7 +93,7 @@ cpu_ptrace(struct thread *td, int req, v
        if (!cpu_fxsr)
                return (EINVAL);
 
-       fpstate = &td->td_pcb->pcb_user_save.sv_xmm;
+       fpstate = &get_pcb_user_save_td(td)->sv_xmm;
        switch (req) {
        case PT_GETXMMREGS:
                npxgetregs(td);
@@ -64,6 +106,11 @@ cpu_ptrace(struct thread *td, int req, v
                fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask;
                break;
 
+       case PT_GETXSTATE:
+       case PT_SETXSTATE:
+               error = cpu_ptrace_xstate(td, req, addr, data);
+               break;
+
        default:
                return (EINVAL);
        }

Modified: head/sys/i386/i386/sys_machdep.c
==============================================================================
--- head/sys/i386/i386/sys_machdep.c    Sun Nov  2 22:42:19 2014        
(r273994)
+++ head/sys/i386/i386/sys_machdep.c    Sun Nov  2 22:58:30 2014        
(r273995)
@@ -105,6 +105,7 @@ sysarch(td, uap)
        union {
                struct i386_ldt_args largs;
                struct i386_ioperm_args iargs;
+               struct i386_get_xfpustate xfpu;
        } kargs;
        uint32_t base;
        struct segment_descriptor sd, *sdp;
@@ -126,6 +127,7 @@ sysarch(td, uap)
                case I386_SET_FSBASE:
                case I386_GET_GSBASE:
                case I386_SET_GSBASE:
+               case I386_GET_XFPUSTATE:
                        break;
 
                case I386_SET_IOPERM:
@@ -154,6 +156,11 @@ sysarch(td, uap)
                if (kargs.largs.num > MAX_LD || kargs.largs.num <= 0)
                        return (EINVAL);
                break;
+       case I386_GET_XFPUSTATE:
+               if ((error = copyin(uap->parms, &kargs.xfpu,
+                   sizeof(struct i386_get_xfpustate))) != 0)
+                       return (error);
+               break;
        default:
                break;
        }
@@ -270,6 +277,14 @@ sysarch(td, uap)
                        load_gs(GSEL(GUGS_SEL, SEL_UPL));
                }
                break;
+       case I386_GET_XFPUSTATE:
+               if (kargs.xfpu.len > cpu_max_ext_state_size -
+                   sizeof(union savefpu))
+                       return (EINVAL);
+               npxgetregs(td);
+               error = copyout((char *)(get_pcb_user_save_td(td) + 1),
+                   kargs.xfpu.addr, kargs.xfpu.len);
+               break;
        default:
                error = EINVAL;
                break;

Modified: head/sys/i386/i386/trap.c
==============================================================================
--- head/sys/i386/i386/trap.c   Sun Nov  2 22:42:19 2014        (r273994)
+++ head/sys/i386/i386/trap.c   Sun Nov  2 22:58:30 2014        (r273995)
@@ -1157,7 +1157,7 @@ syscall(struct trapframe *frame)
        KASSERT(PCB_USER_FPU(td->td_pcb),
            ("System call %s returning with kernel FPU ctx leaked",
             syscallname(td->td_proc, sa.code)));
-       KASSERT(td->td_pcb->pcb_save == &td->td_pcb->pcb_user_save,
+       KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td),
            ("System call %s returning with mangled pcb_save",
             syscallname(td->td_proc, sa.code)));
 

Modified: head/sys/i386/i386/vm_machdep.c
==============================================================================
--- head/sys/i386/i386/vm_machdep.c     Sun Nov  2 22:42:19 2014        
(r273994)
+++ head/sys/i386/i386/vm_machdep.c     Sun Nov  2 22:58:30 2014        
(r273995)
@@ -118,7 +118,50 @@ static u_int       cpu_reset_proxyid;
 static volatile u_int  cpu_reset_proxy_active;
 #endif
 
+union savefpu *
+get_pcb_user_save_td(struct thread *td)
+{
+       vm_offset_t p;
+
+       p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
+           cpu_max_ext_state_size;
+       KASSERT((p % 64) == 0, ("Unaligned pcb_user_save area"));
+       return ((union savefpu *)p);
+}
+
+union savefpu *
+get_pcb_user_save_pcb(struct pcb *pcb)
+{
+       vm_offset_t p;
+
+       p = (vm_offset_t)(pcb + 1);
+       return ((union savefpu *)p);
+}
+
+struct pcb *
+get_pcb_td(struct thread *td)
+{
+       vm_offset_t p;
 
+       p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
+           cpu_max_ext_state_size - sizeof(struct pcb);
+       return ((struct pcb *)p);
+}
+
+void *
+alloc_fpusave(int flags)
+{
+       void *res;
+       struct savefpu_ymm *sf;
+
+       res = malloc(cpu_max_ext_state_size, M_DEVBUF, flags);
+       if (use_xsave) {
+               sf = (struct savefpu_ymm *)res;
+               bzero(&sf->sv_xstate.sx_hd, sizeof(sf->sv_xstate.sx_hd));
+               sf->sv_xstate.sx_hd.xstate_bv = xsave_mask;
+       }
+       return (res);
+}
 /*
  * Finish a fork operation, with process p2 nearly set up.
  * Copy and update the pcb, set up the stack so that the child
@@ -168,15 +211,16 @@ cpu_fork(td1, p2, td2, flags)
 #endif
 
        /* Point the pcb to the top of the stack */
-       pcb2 = (struct pcb *)(td2->td_kstack +
-           td2->td_kstack_pages * PAGE_SIZE) - 1;
+       pcb2 = get_pcb_td(td2);
        td2->td_pcb = pcb2;
 
        /* Copy td1's pcb */
        bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
 
        /* Properly initialize pcb_save */
-       pcb2->pcb_save = &pcb2->pcb_user_save;
+       pcb2->pcb_save = get_pcb_user_save_pcb(pcb2);
+       bcopy(get_pcb_user_save_td(td1), get_pcb_user_save_pcb(pcb2),
+           cpu_max_ext_state_size);
 
        /* Point mdproc and then copy over td1's contents */
        mdp2 = &p2->p_md;
@@ -353,12 +397,18 @@ cpu_thread_swapout(struct thread *td)
 void
 cpu_thread_alloc(struct thread *td)
 {
+       struct pcb *pcb;
+       struct xstate_hdr *xhdr;
 
-       td->td_pcb = (struct pcb *)(td->td_kstack +
-           td->td_kstack_pages * PAGE_SIZE) - 1;
-       td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1;
-       td->td_pcb->pcb_ext = NULL; 
-       td->td_pcb->pcb_save = &td->td_pcb->pcb_user_save;
+       td->td_pcb = pcb = get_pcb_td(td);
+       td->td_frame = (struct trapframe *)((caddr_t)pcb - 16) - 1;
+       pcb->pcb_ext = NULL; 
+       pcb->pcb_save = get_pcb_user_save_pcb(pcb);
+       if (use_xsave) {
+               xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1);
+               bzero(xhdr, sizeof(*xhdr));
+               xhdr->xstate_bv = xsave_mask;
+       }
 }
 
 void
@@ -426,7 +476,9 @@ cpu_set_upcall(struct thread *td, struct
        bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
        pcb2->pcb_flags &= ~(PCB_NPXINITDONE | PCB_NPXUSERINITDONE |
            PCB_KERNNPX);
-       pcb2->pcb_save = &pcb2->pcb_user_save;
+       pcb2->pcb_save = get_pcb_user_save_pcb(pcb2);
+       bcopy(get_pcb_user_save_td(td0), pcb2->pcb_save,
+           cpu_max_ext_state_size);
 
        /*
         * Create a new fresh stack for the new thread.

Modified: head/sys/i386/include/cpufunc.h
==============================================================================
--- head/sys/i386/include/cpufunc.h     Sun Nov  2 22:42:19 2014        
(r273994)
+++ head/sys/i386/include/cpufunc.h     Sun Nov  2 22:58:30 2014        
(r273995)
@@ -457,6 +457,25 @@ rcr4(void)
        return (data);
 }
 
+static __inline uint64_t
+rxcr(u_int reg)
+{
+       u_int low, high;
+
+       __asm __volatile("xgetbv" : "=a" (low), "=d" (high) : "c" (reg));
+       return (low | ((uint64_t)high << 32));
+}
+
+static __inline void
+load_xcr(u_int reg, uint64_t val)
+{
+       u_int low, high;
+
+       low = val;
+       high = val >> 32;
+       __asm __volatile("xsetbv" : : "c" (reg), "a" (low), "d" (high));
+}
+
 /*
  * Global TLB flush (except for thise for pages marked PG_G)
  */

Modified: head/sys/i386/include/md_var.h
==============================================================================
--- head/sys/i386/include/md_var.h      Sun Nov  2 22:42:19 2014        
(r273994)
+++ head/sys/i386/include/md_var.h      Sun Nov  2 22:58:30 2014        
(r273995)
@@ -52,6 +52,7 @@ extern        u_int   cpu_stdext_feature;
 extern u_int   cpu_fxsr;
 extern u_int   cpu_high;
 extern u_int   cpu_id;
+extern u_int   cpu_max_ext_state_size;
 extern u_int   cpu_mxcsr_mask;
 extern u_int   cpu_procinfo;
 extern u_int   cpu_procinfo2;
@@ -80,14 +81,19 @@ extern      int     vm_page_dump_size;
 extern int     workaround_erratum383;
 extern int     _udatasel;
 extern int     _ucodesel;
+extern int     use_xsave;
+extern uint64_t xsave_mask;
 
 typedef void alias_for_inthand_t(u_int cs, u_int ef, u_int esp, u_int ss);
+struct pcb;
+union  savefpu;
 struct thread;
 struct reg;
 struct fpreg;
 struct  dbreg;
 struct dumperinfo;
 
+void   *alloc_fpusave(int flags);
 void   bcopyb(const void *from, void *to, size_t len);
 void   busdma_swi(void);
 void   cpu_setregs(void);
@@ -118,5 +124,8 @@ void        printcpuinfo(void);
 void   setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int selec);
 int     user_dbreg_trap(void);
 int    minidumpsys(struct dumperinfo *);
+union savefpu *get_pcb_user_save_td(struct thread *td);
+union savefpu *get_pcb_user_save_pcb(struct pcb *pcb);
+struct pcb *get_pcb_td(struct thread *td);
 
 #endif /* !_MACHINE_MD_VAR_H_ */

Modified: head/sys/i386/include/npx.h
==============================================================================
--- head/sys/i386/include/npx.h Sun Nov  2 22:42:19 2014        (r273994)
+++ head/sys/i386/include/npx.h Sun Nov  2 22:58:30 2014        (r273995)
@@ -45,17 +45,24 @@
 
 #ifdef _KERNEL
 
+struct fpu_kern_ctx;
+
 #define        PCB_USER_FPU(pcb) (((pcb)->pcb_flags & PCB_KERNNPX) == 0)
 
+#define        XSAVE_AREA_ALIGN        64
+
 int    npxdna(void);
 void   npxdrop(void);
 void   npxexit(struct thread *td);
 int    npxformat(void);
 int    npxgetregs(struct thread *td);
-void   npxinit(void);
+void   npxinit(bool bsp);
 void   npxresume(union savefpu *addr);
 void   npxsave(union savefpu *addr);
-void   npxsetregs(struct thread *td, union savefpu *addr);
+int    npxsetregs(struct thread *td, union savefpu *addr,
+           char *xfpustate, size_t xfpustate_size);
+int    npxsetxstate(struct thread *td, char *xfpustate,
+           size_t xfpustate_size);
 void   npxsuspend(union savefpu *addr);
 int    npxtrap_x87(void);
 int    npxtrap_sse(void);
@@ -68,8 +75,12 @@ int  fpu_kern_leave(struct thread *td, st
 int    fpu_kern_thread(u_int flags);
 int    is_fpu_kern_thread(u_int flags);
 
+union savefpu  *fpu_save_area_alloc(void);
+void   fpu_save_area_free(union savefpu *fsa);
+void   fpu_save_area_reset(union savefpu *fsa);
+
 /*
- * Flags for fpu_kern_enter() and fpu_kern_thread().
+ * Flags for fpu_kern_alloc_ctx(), fpu_kern_enter() and fpu_kern_thread().
  */
 #define        FPU_KERN_NORMAL 0x0000
 #define        FPU_KERN_NOWAIT 0x0001

Modified: head/sys/i386/include/pcb.h
==============================================================================
--- head/sys/i386/include/pcb.h Sun Nov  2 22:42:19 2014        (r273994)
+++ head/sys/i386/include/pcb.h Sun Nov  2 22:58:30 2014        (r273995)
@@ -45,17 +45,23 @@

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to