On 7/29/18, Konstantin Belousov <k...@freebsd.org> wrote:
> Author: kib
> Date: Sun Jul 29 20:47:00 2018
> New Revision: 336876
> URL: https://svnweb.freebsd.org/changeset/base/336876
>
> Log:
>   Use SMAP on amd64.
>
>   Ifuncs selectors dispatch copyin(9) family to the suitable variant, to
>   set rflags.AC around userspace access.  Rflags.AC bit is cleared in
>   all kernel entry points unconditionally even on machines not
>   supporting SMAP.
>
>   Reviewed by:        jhb
>   Sponsored by:       The FreeBSD Foundation
>   Differential revision:      https://reviews.freebsd.org/D13838
>
> Added:
>   head/sys/amd64/amd64/copyout.c   (contents, props changed)
> Modified:
>   head/sys/amd64/amd64/exception.S
>   head/sys/amd64/amd64/initcpu.c
>   head/sys/amd64/amd64/machdep.c
>   head/sys/amd64/amd64/pmap.c
>   head/sys/amd64/amd64/support.S
>   head/sys/amd64/amd64/trap.c
>   head/sys/amd64/ia32/ia32_exception.S
>   head/sys/amd64/include/asmacros.h
>   head/sys/conf/files.amd64
>   head/sys/dev/hyperv/vmbus/amd64/vmbus_vector.S
>
> Added: head/sys/amd64/amd64/copyout.c
> ==============================================================================
> --- /dev/null 00:00:00 1970   (empty, because file is newly added)
> +++ head/sys/amd64/amd64/copyout.c    Sun Jul 29 20:47:00 2018        
> (r336876)
> @@ -0,0 +1,178 @@
> +/*-
> + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
> + *
> + * Copyright (c) 2018 The FreeBSD Foundation
> + * All rights reserved.
> + *
> + * This software was developed by Konstantin Belousov <k...@freebsd.org>
> + * under sponsorship from the FreeBSD Foundation.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *    notice, this list of conditions and the following disclaimer in the
> + *    documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
> PURPOSE
> + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
> STRICT
> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
> WAY
> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +
> +#include <sys/cdefs.h>
> +__FBSDID("$FreeBSD$");
> +
> +#include <sys/param.h>
> +#include <sys/systm.h>
> +
> +#include <machine/md_var.h>
> +#include <machine/specialreg.h>
> +#include <x86/ifunc.h>
> +
> +int fubyte_nosmap(volatile const void *base);
> +int fubyte_smap(volatile const void *base);
> +DEFINE_IFUNC(, int, fubyte, (volatile const void *), static)
> +{
> +
> +     return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +         fubyte_smap : fubyte_nosmap);
> +}
> +
> +int fuword16_nosmap(volatile const void *base);
> +int fuword16_smap(volatile const void *base);
> +DEFINE_IFUNC(, int, fuword16, (volatile const void *), static)
> +{
> +
> +     return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +         fuword16_smap : fuword16_nosmap);
> +}
> +
> +int fueword_nosmap(volatile const void *base, long *val);
> +int fueword_smap(volatile const void *base, long *val);
> +DEFINE_IFUNC(, int, fueword, (volatile const void *, long *), static)
> +{
> +
> +     return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +         fueword_smap : fueword_nosmap);
> +}
> +DEFINE_IFUNC(, int, fueword64, (volatile const void *, int64_t *), static)
> +{
> +
> +     return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +         fueword_smap : fueword_nosmap);
> +}
> +
> +int  fueword32_nosmap(volatile const void *base, int32_t *val);
> +int  fueword32_smap(volatile const void *base, int32_t *val);
> +DEFINE_IFUNC(, int, fueword32, (volatile const void *, int32_t *), static)
> +{
> +
> +     return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +         fueword32_smap : fueword32_nosmap);
> +}
> +
> +int  subyte_nosmap(volatile void *base, int byte);
> +int  subyte_smap(volatile void *base, int byte);
> +DEFINE_IFUNC(, int, subyte, (volatile void *, int), static)
> +{
> +
> +     return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +         subyte_smap : subyte_nosmap);
> +}
> +
> +int  suword16_nosmap(volatile void *base, int word);
> +int  suword16_smap(volatile void *base, int word);
> +DEFINE_IFUNC(, int, suword16, (volatile void *, int), static)
> +{
> +
> +     return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +         suword16_smap : suword16_nosmap);
> +}
> +
> +int  suword32_nosmap(volatile void *base, int32_t word);
> +int  suword32_smap(volatile void *base, int32_t word);
> +DEFINE_IFUNC(, int, suword32, (volatile void *, int32_t), static)
> +{
> +
> +     return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +         suword32_smap : suword32_nosmap);
> +}
> +
> +int  suword_nosmap(volatile void *base, long word);
> +int  suword_smap(volatile void *base, long word);
> +DEFINE_IFUNC(, int, suword, (volatile void *, long), static)
> +{
> +
> +     return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +         suword_smap : suword_nosmap);
> +}
> +DEFINE_IFUNC(, int, suword64, (volatile void *, int64_t), static)
> +{
> +
> +     return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +         suword_smap : suword_nosmap);
> +}
> +
> +int  casueword32_nosmap(volatile uint32_t *base, uint32_t oldval,
> +         uint32_t *oldvalp, uint32_t newval);
> +int  casueword32_smap(volatile uint32_t *base, uint32_t oldval,
> +         uint32_t *oldvalp, uint32_t newval);
> +DEFINE_IFUNC(, int, casueword32, (volatile uint32_t *, uint32_t, uint32_t
> *,
> +    uint32_t), static)
> +{
> +
> +     return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +         casueword32_smap : casueword32_nosmap);
> +}
> +
> +int  casueword_nosmap(volatile u_long *p, u_long oldval, u_long *oldvalp,
> +         u_long newval);
> +int  casueword_smap(volatile u_long *p, u_long oldval, u_long *oldvalp,
> +         u_long newval);
> +DEFINE_IFUNC(, int, casueword, (volatile u_long *, u_long, u_long *,
> u_long),
> +    static)
> +{
> +
> +     return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +         casueword_smap : casueword_nosmap);
> +}
> +
> +int  copyinstr_nosmap(const void *udaddr, void *kaddr, size_t len,
> +         size_t *lencopied);
> +int  copyinstr_smap(const void *udaddr, void *kaddr, size_t len,
> +         size_t *lencopied);
> +DEFINE_IFUNC(, int, copyinstr, (const void *, void *, size_t, size_t *),
> +    static)
> +{
> +
> +     return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +         copyinstr_smap : copyinstr_nosmap);
> +}
> +
> +int  copyin_nosmap(const void *udaddr, void *kaddr, size_t len);
> +int  copyin_smap(const void *udaddr, void *kaddr, size_t len);
> +DEFINE_IFUNC(, int, copyin, (const void *, void *, size_t), static)
> +{
> +
> +     return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +         copyin_smap : copyin_nosmap);
> +}
> +
> +int  copyout_nosmap(const void *kaddr, void *udaddr, size_t len);
> +int  copyout_smap(const void *kaddr, void *udaddr, size_t len);
> +DEFINE_IFUNC(, int, copyout, (const void *, void *, size_t), static)
> +{
> +
> +     return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +         copyout_smap : copyout_nosmap);
> +}
>
> Modified: head/sys/amd64/amd64/exception.S
> ==============================================================================
> --- head/sys/amd64/amd64/exception.S  Sun Jul 29 20:34:44 2018        
> (r336875)
> +++ head/sys/amd64/amd64/exception.S  Sun Jul 29 20:47:00 2018        
> (r336876)
> @@ -43,8 +43,8 @@
>
>  #include "assym.inc"
>
> -#include <machine/asmacros.h>
>  #include <machine/psl.h>
> +#include <machine/asmacros.h>
>  #include <machine/trap.h>
>  #include <machine/specialreg.h>
>
> @@ -196,7 +196,9 @@ alltraps_pushregs_no_rax:
>       movq    %r14,TF_R14(%rsp)
>       movq    %r15,TF_R15(%rsp)
>       movl    $TF_HASSEGS,TF_FLAGS(%rsp)
> -     cld
> +     pushfq
> +     andq    $~(PSL_D | PSL_AC),(%rsp)
> +     popfq
>       FAKE_MCOUNT(TF_RIP(%rsp))
>  #ifdef KDTRACE_HOOKS
>       /*
> @@ -277,7 +279,9 @@ IDTVEC(dblfault)
>       movq    %r15,TF_R15(%rsp)
>       SAVE_SEGS
>       movl    $TF_HASSEGS,TF_FLAGS(%rsp)
> -     cld
> +     pushfq
> +     andq    $~(PSL_D | PSL_AC),(%rsp)
> +     popfq
>       testb   $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
>       jz      1f                      /* already running with kernel GS.base 
> */
>       swapgs
> @@ -571,7 +575,9 @@ IDTVEC(dbg)
>       movq    %r15,TF_R15(%rsp)
>       SAVE_SEGS
>       movl    $TF_HASSEGS,TF_FLAGS(%rsp)
> -     cld
> +     pushfq
> +     andq    $~(PSL_D | PSL_AC),(%rsp)
> +     popfq
>       testb   $SEL_RPL_MASK,TF_CS(%rsp)
>       jnz     dbg_fromuserspace
>       /*
> @@ -704,7 +710,9 @@ IDTVEC(nmi)
>       movq    %r15,TF_R15(%rsp)
>       SAVE_SEGS
>       movl    $TF_HASSEGS,TF_FLAGS(%rsp)
> -     cld
> +     pushfq
> +     andq    $~(PSL_D | PSL_AC),(%rsp)
> +     popfq
>       xorl    %ebx,%ebx
>       testb   $SEL_RPL_MASK,TF_CS(%rsp)
>       jnz     nmi_fromuserspace
> @@ -793,7 +801,9 @@ nmi_calltrap:
>       subq    %rcx,%rdx
>       movq    %rdx,%rdi       /* destination stack pointer */
>       shrq    $3,%rcx         /* trap frame size in long words */
> -     cld
> +     pushfq
> +     andq    $~(PSL_D | PSL_AC),(%rsp)
> +     popfq
>       rep
>       movsq                   /* copy trapframe */
>       movq    %rdx,%rsp       /* we are on the regular kstack */
> @@ -902,7 +912,9 @@ IDTVEC(mchk)
>       movq    %r15,TF_R15(%rsp)
>       SAVE_SEGS
>       movl    $TF_HASSEGS,TF_FLAGS(%rsp)
> -     cld
> +     pushfq
> +     andq    $~(PSL_D | PSL_AC),(%rsp)
> +     popfq
>       xorl    %ebx,%ebx
>       testb   $SEL_RPL_MASK,TF_CS(%rsp)
>       jnz     mchk_fromuserspace
>
> Modified: head/sys/amd64/amd64/initcpu.c
> ==============================================================================
> --- head/sys/amd64/amd64/initcpu.c    Sun Jul 29 20:34:44 2018        
> (r336875)
> +++ head/sys/amd64/amd64/initcpu.c    Sun Jul 29 20:47:00 2018        
> (r336876)
> @@ -239,8 +239,12 @@ initializecpu(void)
>        * to the kernel tables.  The boot loader enables the U bit in
>        * its tables.
>        */
> -     if (!IS_BSP() && (cpu_stdext_feature & CPUID_STDEXT_SMEP))
> -             cr4 |= CR4_SMEP;
> +     if (!IS_BSP()) {
> +             if (cpu_stdext_feature & CPUID_STDEXT_SMEP)
> +                     cr4 |= CR4_SMEP;
> +             if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
> +                     cr4 |= CR4_SMAP;
> +     }
>       load_cr4(cr4);
>       if (IS_BSP() && (amd_feature & AMDID_NX) != 0) {
>               msr = rdmsr(MSR_EFER) | EFER_NXE;
>
> Modified: head/sys/amd64/amd64/machdep.c
> ==============================================================================
> --- head/sys/amd64/amd64/machdep.c    Sun Jul 29 20:34:44 2018        
> (r336875)
> +++ head/sys/amd64/amd64/machdep.c    Sun Jul 29 20:47:00 2018        
> (r336876)
> @@ -1548,7 +1548,7 @@ amd64_conf_fast_syscall(void)
>       msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
>           ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
>       wrmsr(MSR_STAR, msr);
> -     wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D);
> +     wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D | PSL_AC);
>  }
>
>  u_int64_t
>
> Modified: head/sys/amd64/amd64/pmap.c
> ==============================================================================
> --- head/sys/amd64/amd64/pmap.c       Sun Jul 29 20:34:44 2018        
> (r336875)
> +++ head/sys/amd64/amd64/pmap.c       Sun Jul 29 20:47:00 2018        
> (r336876)
> @@ -1092,6 +1092,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
>  {
>       vm_offset_t va;
>       pt_entry_t *pte;
> +     uint64_t cr4;
>       int i;
>
>       KERNend = *firstaddr;
> @@ -1118,11 +1119,21 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
>       virtual_end = VM_MAX_KERNEL_ADDRESS;
>
>
> -     /* XXX do %cr0 as well */
> -     load_cr4(rcr4() | CR4_PGE);
> +     /*
> +      * Enable PG_G global pages, then switch to the kernel page
> +      * table from the bootstrap page table.  After the switch, it
> +      * is possible to enable SMEP and SMAP since PG_U bits are
> +      * correct now.
> +      */
> +     cr4 = rcr4();
> +     cr4 |= CR4_PGE;
> +     load_cr4(cr4);
>       load_cr3(KPML4phys);
>       if (cpu_stdext_feature & CPUID_STDEXT_SMEP)
> -             load_cr4(rcr4() | CR4_SMEP);
> +             cr4 |= CR4_SMEP;
> +     if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
> +             cr4 |= CR4_SMAP;
> +     load_cr4(cr4);
>
>       /*
>        * Initialize the kernel pmap (which is statically allocated).
>
> Modified: head/sys/amd64/amd64/support.S
> ==============================================================================
> --- head/sys/amd64/amd64/support.S    Sun Jul 29 20:34:44 2018        
> (r336875)
> +++ head/sys/amd64/amd64/support.S    Sun Jul 29 20:47:00 2018        
> (r336876)
> @@ -226,7 +226,7 @@ END(fillw)
>   * copyout(from_kernel, to_user, len)
>   *         %rdi,        %rsi,    %rdx
>   */
> -ENTRY(copyout)
> +ENTRY(copyout_nosmap)
>       PUSH_FRAME_POINTER
>       movq    PCPU(CURPCB),%rax
>       movq    $copyout_fault,PCB_ONFAULT(%rax)
> @@ -268,6 +268,55 @@ ENTRY(copyout)
>       rep
>       movsb
>
> +     jmp     done_copyout
> +END(copyout_nosmap)
> +
> +ENTRY(copyout_smap)
> +     PUSH_FRAME_POINTER
> +     movq    PCPU(CURPCB),%rax
> +     /* Trap entry clears PSL.AC */
> +     movq    $copyout_fault,PCB_ONFAULT(%rax)
> +     testq   %rdx,%rdx                       /* anything to do? */
> +     jz      done_copyout
> +
> +     /*
> +      * Check explicitly for non-user addresses.  If 486 write protection
> +      * is being used, this check is essential because we are in kernel
> +      * mode so the h/w does not provide any protection against writing
> +      * kernel addresses.
> +      */
> +
> +     /*
> +      * First, prevent address wrapping.
> +      */
> +     movq    %rsi,%rax
> +     addq    %rdx,%rax
> +     jc      copyout_fault
> +/*
> + * XXX STOP USING VM_MAXUSER_ADDRESS.
> + * It is an end address, not a max, so every time it is used correctly it
> + * looks like there is an off by one error, and of course it caused an off
> + * by one error in several places.
> + */
> +     movq    $VM_MAXUSER_ADDRESS,%rcx
> +     cmpq    %rcx,%rax
> +     ja      copyout_fault
> +
> +     xchgq   %rdi,%rsi
> +     /* bcopy(%rsi, %rdi, %rdx) */
> +     movq    %rdx,%rcx
> +
> +     shrq    $3,%rcx
> +     cld
> +     stac
> +     rep
> +     movsq
> +     movb    %dl,%cl
> +     andb    $7,%cl
> +     rep
> +     movsb
> +     clac
> +
>  done_copyout:
>       xorl    %eax,%eax
>       movq    PCPU(CURPCB),%rdx
> @@ -288,7 +337,7 @@ END(copyout)
>   * copyin(from_user, to_kernel, len)
>   *        %rdi,      %rsi,      %rdx
>   */
> -ENTRY(copyin)
> +ENTRY(copyin_nosmap)
>       PUSH_FRAME_POINTER
>       movq    PCPU(CURPCB),%rax
>       movq    $copyin_fault,PCB_ONFAULT(%rax)
> @@ -309,13 +358,47 @@ ENTRY(copyin)
>       movq    %rdx,%rcx
>       movb    %cl,%al
>       shrq    $3,%rcx                         /* copy longword-wise */
> +     cld
>       rep
>       movsq
>       movb    %al,%cl
>       andb    $7,%cl                          /* copy remaining bytes */
> +     rep
> +     movsb
> +
> +     jmp     done_copyin
> +END(copyin_nosmap)
> +
> +ENTRY(copyin_smap)
> +     PUSH_FRAME_POINTER
> +     movq    PCPU(CURPCB),%rax
> +     movq    $copyin_fault,PCB_ONFAULT(%rax)
> +     testq   %rdx,%rdx                       /* anything to do? */
> +     jz      done_copyin
> +
> +     /*
> +      * make sure address is valid
> +      */
> +     movq    %rdi,%rax
> +     addq    %rdx,%rax
> +     jc      copyin_fault
> +     movq    $VM_MAXUSER_ADDRESS,%rcx
> +     cmpq    %rcx,%rax
> +     ja      copyin_fault
> +
> +     xchgq   %rdi,%rsi
> +     movq    %rdx,%rcx
> +     movb    %cl,%al
> +     shrq    $3,%rcx                         /* copy longword-wise */

missing cld from here

> +     stac
> +     rep
> +     movsq
> +     movb    %al,%cl
> +     andb    $7,%cl                          /* copy remaining bytes */
>       je      done_copyin
>       rep
>       movsb
> +     clac
>
>  done_copyin:
>       xorl    %eax,%eax
> @@ -323,6 +406,7 @@ done_copyin:
>       movq    %rax,PCB_ONFAULT(%rdx)
>       POP_FRAME_POINTER
>       ret
> +END(copyin_smap)
>
>       ALIGN_TEXT
>  copyin_fault:
> @@ -331,14 +415,13 @@ copyin_fault:
>       movq    $EFAULT,%rax
>       POP_FRAME_POINTER
>       ret
> -END(copyin)
>
>  /*
>   * casueword32.  Compare and set user integer.  Returns -1 on fault,
>   *        0 if access was successful.  Old value is written to *oldp.
>   *        dst = %rdi, old = %esi, oldp = %rdx, new = %ecx
>   */
> -ENTRY(casueword32)
> +ENTRY(casueword32_nosmap)
>       PUSH_FRAME_POINTER
>       movq    PCPU(CURPCB),%r8
>       movq    $fusufault,PCB_ONFAULT(%r8)
> @@ -370,14 +453,50 @@ ENTRY(casueword32)
>       movl    %esi,(%rdx)                     /* oldp = %rdx */
>       POP_FRAME_POINTER
>       ret
> -END(casueword32)
> +END(casueword32_nosmap)
>
> +ENTRY(casueword32_smap)
> +     PUSH_FRAME_POINTER
> +     movq    PCPU(CURPCB),%r8
> +     movq    $fusufault,PCB_ONFAULT(%r8)
> +
> +     movq    $VM_MAXUSER_ADDRESS-4,%rax
> +     cmpq    %rax,%rdi                       /* verify address is valid */
> +     ja      fusufault
> +
> +     movl    %esi,%eax                       /* old */
> +     stac
> +#ifdef SMP
> +     lock
> +#endif
> +     cmpxchgl %ecx,(%rdi)                    /* new = %ecx */
> +     clac
> +
> +     /*
> +      * The old value is in %eax.  If the store succeeded it will be the
> +      * value we expected (old) from before the store, otherwise it will
> +      * be the current value.  Save %eax into %esi to prepare the return
> +      * value.
> +      */
> +     movl    %eax,%esi
> +     xorl    %eax,%eax
> +     movq    %rax,PCB_ONFAULT(%r8)
> +
> +     /*
> +      * Access the oldp after the pcb_onfault is cleared, to correctly
> +      * catch corrupted pointer.
> +      */
> +     movl    %esi,(%rdx)                     /* oldp = %rdx */
> +     POP_FRAME_POINTER
> +     ret
> +END(casueword32_smap)
> +
>  /*
>   * casueword.  Compare and set user long.  Returns -1 on fault,
>   *        0 if access was successful.  Old value is written to *oldp.
>   *        dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx
>   */
> -ENTRY(casueword)
> +ENTRY(casueword_nosmap)
>       PUSH_FRAME_POINTER
>       movq    PCPU(CURPCB),%r8
>       movq    $fusufault,PCB_ONFAULT(%r8)
> @@ -403,16 +522,45 @@ ENTRY(casueword)
>       movq    %rsi,(%rdx)
>       POP_FRAME_POINTER
>       ret
> -END(casueword)
> +END(casueword_nosmap)
>
> +ENTRY(casueword_smap)
> +     PUSH_FRAME_POINTER
> +     movq    PCPU(CURPCB),%r8
> +     movq    $fusufault,PCB_ONFAULT(%r8)
> +
> +     movq    $VM_MAXUSER_ADDRESS-4,%rax
> +     cmpq    %rax,%rdi                       /* verify address is valid */
> +     ja      fusufault
> +
> +     movq    %rsi,%rax                       /* old */
> +     stac
> +#ifdef SMP
> +     lock
> +#endif
> +     cmpxchgq %rcx,(%rdi)                    /* new = %rcx */
> +     clac
> +
> +     /*
> +      * The old value is in %rax.  If the store succeeded it will be the
> +      * value we expected (old) from before the store, otherwise it will
> +      * be the current value.
> +      */
> +     movq    %rax,%rsi
> +     xorl    %eax,%eax
> +     movq    %rax,PCB_ONFAULT(%r8)
> +     movq    %rsi,(%rdx)
> +     POP_FRAME_POINTER
> +     ret
> +END(casueword_smap)
> +
>  /*
>   * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit
>   * byte from user memory.
>   * addr = %rdi, valp = %rsi
>   */
>
> -ALTENTRY(fueword64)
> -ENTRY(fueword)
> +ENTRY(fueword_nosmap)
>       PUSH_FRAME_POINTER
>       movq    PCPU(CURPCB),%rcx
>       movq    $fusufault,PCB_ONFAULT(%rcx)
> @@ -427,14 +575,32 @@ ENTRY(fueword)
>       movq    %r11,(%rsi)
>       POP_FRAME_POINTER
>       ret
> -END(fueword64)
> -END(fueword)
> +END(fueword64_nosmap)
>
> -ENTRY(fueword32)
> +ENTRY(fueword_smap)
>       PUSH_FRAME_POINTER
>       movq    PCPU(CURPCB),%rcx
>       movq    $fusufault,PCB_ONFAULT(%rcx)
>
> +     movq    $VM_MAXUSER_ADDRESS-8,%rax
> +     cmpq    %rax,%rdi                       /* verify address is valid */
> +     ja      fusufault
> +
> +     xorl    %eax,%eax
> +     stac
> +     movq    (%rdi),%r11
> +     clac
> +     movq    %rax,PCB_ONFAULT(%rcx)
> +     movq    %r11,(%rsi)
> +     POP_FRAME_POINTER
> +     ret
> +END(fueword64_smap)
> +
> +ENTRY(fueword32_nosmap)
> +     PUSH_FRAME_POINTER
> +     movq    PCPU(CURPCB),%rcx
> +     movq    $fusufault,PCB_ONFAULT(%rcx)
> +
>       movq    $VM_MAXUSER_ADDRESS-4,%rax
>       cmpq    %rax,%rdi                       /* verify address is valid */
>       ja      fusufault
> @@ -445,13 +611,32 @@ ENTRY(fueword32)
>       movl    %r11d,(%rsi)
>       POP_FRAME_POINTER
>       ret
> -END(fueword32)
> +END(fueword32_nosmap)
>
> -ENTRY(fuword16)
> +ENTRY(fueword32_smap)
>       PUSH_FRAME_POINTER
>       movq    PCPU(CURPCB),%rcx
>       movq    $fusufault,PCB_ONFAULT(%rcx)
>
> +     movq    $VM_MAXUSER_ADDRESS-4,%rax
> +     cmpq    %rax,%rdi                       /* verify address is valid */
> +     ja      fusufault
> +
> +     xorl    %eax,%eax
> +     stac
> +     movl    (%rdi),%r11d
> +     clac
> +     movq    %rax,PCB_ONFAULT(%rcx)
> +     movl    %r11d,(%rsi)
> +     POP_FRAME_POINTER
> +     ret
> +END(fueword32_smap)
> +
> +ENTRY(fuword16_nosmap)
> +     PUSH_FRAME_POINTER
> +     movq    PCPU(CURPCB),%rcx
> +     movq    $fusufault,PCB_ONFAULT(%rcx)
> +
>       movq    $VM_MAXUSER_ADDRESS-2,%rax
>       cmpq    %rax,%rdi
>       ja      fusufault
> @@ -460,13 +645,30 @@ ENTRY(fuword16)
>       movq    $0,PCB_ONFAULT(%rcx)
>       POP_FRAME_POINTER
>       ret
> -END(fuword16)
> +END(fuword16_nosmap)
>
> -ENTRY(fubyte)
> +ENTRY(fuword16_smap)
>       PUSH_FRAME_POINTER
>       movq    PCPU(CURPCB),%rcx
>       movq    $fusufault,PCB_ONFAULT(%rcx)
>
> +     movq    $VM_MAXUSER_ADDRESS-2,%rax
> +     cmpq    %rax,%rdi
> +     ja      fusufault
> +
> +     stac
> +     movzwl  (%rdi),%eax
> +     clac
> +     movq    $0,PCB_ONFAULT(%rcx)
> +     POP_FRAME_POINTER
> +     ret
> +END(fuword16_smap)
> +
> +ENTRY(fubyte_nosmap)
> +     PUSH_FRAME_POINTER
> +     movq    PCPU(CURPCB),%rcx
> +     movq    $fusufault,PCB_ONFAULT(%rcx)
> +
>       movq    $VM_MAXUSER_ADDRESS-1,%rax
>       cmpq    %rax,%rdi
>       ja      fusufault
> @@ -475,9 +677,27 @@ ENTRY(fubyte)
>       movq    $0,PCB_ONFAULT(%rcx)
>       POP_FRAME_POINTER
>       ret
> -END(fubyte)
> +END(fubyte_nosmap)
>
> +ENTRY(fubyte_smap)
> +     PUSH_FRAME_POINTER
> +     movq    PCPU(CURPCB),%rcx
> +     movq    $fusufault,PCB_ONFAULT(%rcx)
> +
> +     movq    $VM_MAXUSER_ADDRESS-1,%rax
> +     cmpq    %rax,%rdi
> +     ja      fusufault
> +
> +     stac
> +     movzbl  (%rdi),%eax
> +     clac
> +     movq    $0,PCB_ONFAULT(%rcx)
> +     POP_FRAME_POINTER
> +     ret
> +END(fubyte_smap)
> +
>       ALIGN_TEXT
> +     /* Fault entry clears PSL.AC */
>  fusufault:
>       movq    PCPU(CURPCB),%rcx
>       xorl    %eax,%eax
> @@ -491,8 +711,7 @@ fusufault:
>   * user memory.
>   * addr = %rdi, value = %rsi
>   */
> -ALTENTRY(suword64)
> -ENTRY(suword)
> +ENTRY(suword_nosmap)
>       PUSH_FRAME_POINTER
>       movq    PCPU(CURPCB),%rcx
>       movq    $fusufault,PCB_ONFAULT(%rcx)
> @@ -507,14 +726,32 @@ ENTRY(suword)
>       movq    %rax,PCB_ONFAULT(%rcx)
>       POP_FRAME_POINTER
>       ret
> -END(suword64)
> -END(suword)
> +END(suword_nosmap)
>
> -ENTRY(suword32)
> +ENTRY(suword_smap)
>       PUSH_FRAME_POINTER
>       movq    PCPU(CURPCB),%rcx
>       movq    $fusufault,PCB_ONFAULT(%rcx)
>
> +     movq    $VM_MAXUSER_ADDRESS-8,%rax
> +     cmpq    %rax,%rdi                       /* verify address validity */
> +     ja      fusufault
> +
> +     stac
> +     movq    %rsi,(%rdi)
> +     clac
> +     xorl    %eax,%eax
> +     movq    PCPU(CURPCB),%rcx
> +     movq    %rax,PCB_ONFAULT(%rcx)
> +     POP_FRAME_POINTER
> +     ret
> +END(suword_smap)
> +
> +ENTRY(suword32_nosmap)
> +     PUSH_FRAME_POINTER
> +     movq    PCPU(CURPCB),%rcx
> +     movq    $fusufault,PCB_ONFAULT(%rcx)
> +
>       movq    $VM_MAXUSER_ADDRESS-4,%rax
>       cmpq    %rax,%rdi                       /* verify address validity */
>       ja      fusufault
> @@ -525,13 +762,32 @@ ENTRY(suword32)
>       movq    %rax,PCB_ONFAULT(%rcx)
>       POP_FRAME_POINTER
>       ret
> -END(suword32)
> +END(suword32_nosmap)
>
> -ENTRY(suword16)
> +ENTRY(suword32_smap)
>       PUSH_FRAME_POINTER
>       movq    PCPU(CURPCB),%rcx
>       movq    $fusufault,PCB_ONFAULT(%rcx)
>
> +     movq    $VM_MAXUSER_ADDRESS-4,%rax
> +     cmpq    %rax,%rdi                       /* verify address validity */
> +     ja      fusufault
> +
> +     stac
> +     movl    %esi,(%rdi)
> +     clac
> +     xorl    %eax,%eax
> +     movq    PCPU(CURPCB),%rcx
> +     movq    %rax,PCB_ONFAULT(%rcx)
> +     POP_FRAME_POINTER
> +     ret
> +END(suword32_smap)
> +
> +ENTRY(suword16_nosmap)
> +     PUSH_FRAME_POINTER
> +     movq    PCPU(CURPCB),%rcx
> +     movq    $fusufault,PCB_ONFAULT(%rcx)
> +
>       movq    $VM_MAXUSER_ADDRESS-2,%rax
>       cmpq    %rax,%rdi                       /* verify address validity */
>       ja      fusufault
> @@ -542,13 +798,32 @@ ENTRY(suword16)
>       movq    %rax,PCB_ONFAULT(%rcx)
>       POP_FRAME_POINTER
>       ret
> -END(suword16)
> +END(suword16_nosmap)
>
> -ENTRY(subyte)
> +ENTRY(suword16_smap)
>       PUSH_FRAME_POINTER
>       movq    PCPU(CURPCB),%rcx
>       movq    $fusufault,PCB_ONFAULT(%rcx)
>
> +     movq    $VM_MAXUSER_ADDRESS-2,%rax
> +     cmpq    %rax,%rdi                       /* verify address validity */
> +     ja      fusufault
> +
> +     stac
> +     movw    %si,(%rdi)
> +     clac
> +     xorl    %eax,%eax
> +     movq    PCPU(CURPCB),%rcx               /* restore trashed register */
> +     movq    %rax,PCB_ONFAULT(%rcx)
> +     POP_FRAME_POINTER
> +     ret
> +END(suword16_smap)
> +
> +ENTRY(subyte_nosmap)
> +     PUSH_FRAME_POINTER
> +     movq    PCPU(CURPCB),%rcx
> +     movq    $fusufault,PCB_ONFAULT(%rcx)
> +
>       movq    $VM_MAXUSER_ADDRESS-1,%rax
>       cmpq    %rax,%rdi                       /* verify address validity */
>       ja      fusufault
> @@ -560,8 +835,28 @@ ENTRY(subyte)
>       movq    %rax,PCB_ONFAULT(%rcx)
>       POP_FRAME_POINTER
>       ret
> -END(subyte)
> +END(subyte_nosmap)
>
> +ENTRY(subyte_smap)
> +     PUSH_FRAME_POINTER
> +     movq    PCPU(CURPCB),%rcx
> +     movq    $fusufault,PCB_ONFAULT(%rcx)
> +
> +     movq    $VM_MAXUSER_ADDRESS-1,%rax
> +     cmpq    %rax,%rdi                       /* verify address validity */
> +     ja      fusufault
> +
> +     movl    %esi,%eax
> +     stac
> +     movb    %al,(%rdi)
> +     clac
> +     xorl    %eax,%eax
> +     movq    PCPU(CURPCB),%rcx               /* restore trashed register */
> +     movq    %rax,PCB_ONFAULT(%rcx)
> +     POP_FRAME_POINTER
> +     ret
> +END(subyte_smap)
> +
>  /*
>   * copyinstr(from, to, maxlen, int *lencopied)
>   *           %rdi, %rsi, %rdx, %rcx
> @@ -571,7 +866,7 @@ END(subyte)
>   *   EFAULT on protection violations. If lencopied is non-zero,
>   *   return the actual length in *lencopied.
>   */
> -ENTRY(copyinstr)
> +ENTRY(copyinstr_nosmap)
>       PUSH_FRAME_POINTER
>       movq    %rdx,%r8                        /* %r8 = maxlen */
>       movq    %rcx,%r9                        /* %r9 = *len */
> @@ -592,29 +887,67 @@ ENTRY(copyinstr)
>       movq    %rax,%r8
>  1:
>       incq    %rdx
> +     cld
>
>  2:
>       decq    %rdx
> -     jz      3f
> +     jz      copyinstr_toolong
>
>       lodsb
>       stosb
>       orb     %al,%al
>       jnz     2b
>
> +     jmp     copyinstr_succ
> +END(copyinstr_nosmap)
> +
> +ENTRY(copyinstr_smap)
> +     PUSH_FRAME_POINTER
> +     movq    %rdx,%r8                        /* %r8 = maxlen */
> +     movq    %rcx,%r9                        /* %r9 = *len */
> +     xchgq   %rdi,%rsi                       /* %rdi = from, %rsi = to */
> +     movq    PCPU(CURPCB),%rcx
> +     movq    $cpystrflt,PCB_ONFAULT(%rcx)
> +
> +     movq    $VM_MAXUSER_ADDRESS,%rax
> +
> +     /* make sure 'from' is within bounds */
> +     subq    %rsi,%rax
> +     jbe     cpystrflt
> +
> +     /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
> +     cmpq    %rdx,%rax
> +     jae     1f
> +     movq    %rax,%rdx
> +     movq    %rax,%r8
> +1:
> +     incq    %rdx

missing cld here

> +
> +2:
> +     decq    %rdx
> +     jz      copyinstr_succ
> +
> +     stac
> +     lodsb
> +     stosb
> +     clac
> +     orb     %al,%al
> +     jnz     2b
> +
> +copyinstr_succ:
>       /* Success -- 0 byte reached */
>       decq    %rdx
>       xorl    %eax,%eax
>       jmp     cpystrflt_x
> -3:
> +copyinstr_toolong:
>       /* rdx is zero - return ENAMETOOLONG or EFAULT */
>       movq    $VM_MAXUSER_ADDRESS,%rax
>       cmpq    %rax,%rsi
>       jae     cpystrflt
> -4:
>       movq    $ENAMETOOLONG,%rax
>       jmp     cpystrflt_x
>
> +     /* Fault entry clears PSL.AC */
>  cpystrflt:
>       movq    $EFAULT,%rax
>
> @@ -630,7 +963,7 @@ cpystrflt_x:

cpystrflt_x:
        /* set *lencopied and return %eax */
        movq    PCPU(CURPCB),%rcx
        movq    $0,PCB_ONFAULT(%rcx)

        testq   %r9,%r9
        jz      1f
        subq    %rdx,%r8
        movq    %r8,(%r9) << Here you access user-space, with cleared
RFLAGS.AC from the fault handler.
1:
        POP_FRAME_POINTER
        ret


>  1:
>       POP_FRAME_POINTER
>       ret
> -END(copyinstr)
> +END(copyinstr_smap)
>
>  /*
>   * copystr(from, to, maxlen, int *lencopied)
>
> Modified: head/sys/amd64/amd64/trap.c
> ==============================================================================
> --- head/sys/amd64/amd64/trap.c       Sun Jul 29 20:34:44 2018        
> (r336875)
> +++ head/sys/amd64/amd64/trap.c       Sun Jul 29 20:47:00 2018        
> (r336876)
> @@ -673,6 +673,24 @@ trap_check(struct trapframe *frame)
>       trap(frame);
>  }
>
> +static bool
> +trap_is_smap(struct trapframe *frame)
> +{
> +
> +     /*
> +      * A page fault on a userspace address is classified as
> +      * SMAP-induced if:
> +      * - SMAP is supported;
> +      * - kernel mode accessed present data page;
> +      * - rflags.AC was cleared.
> +      * Kernel must never access user space with rflags.AC cleared
> +      * if SMAP is enabled.
> +      */
> +     return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 &&
> +         (frame->tf_err & (PGEX_P | PGEX_U | PGEX_I | PGEX_RSV)) ==
> +         PGEX_P && (frame->tf_rflags & PSL_AC) == 0);
> +}
> +
>  static int
>  trap_pfault(struct trapframe *frame, int usermode)
>  {
> @@ -750,9 +768,13 @@ trap_pfault(struct trapframe *frame, int usermode)
>                * handling routine.  Since accessing the address
>                * without the handler is a bug, do not try to handle
>                * it normally, and panic immediately.
> +              *
> +              * If SMAP is enabled, filter SMAP faults also,
> +              * because illegal access might occur to the mapped
> +              * user address, causing infinite loop.
>                */
>               if (!usermode && (td->td_intr_nesting_level != 0 ||
> -                 curpcb->pcb_onfault == NULL)) {
> +                 trap_is_smap(frame) || curpcb->pcb_onfault == NULL)) {
>                       trap_fatal(frame, eva);
>                       return (-1);
>               }
>
> Modified: head/sys/amd64/ia32/ia32_exception.S
> ==============================================================================
> --- head/sys/amd64/ia32/ia32_exception.S      Sun Jul 29 20:34:44 2018        
> (r336875)
> +++ head/sys/amd64/ia32/ia32_exception.S      Sun Jul 29 20:47:00 2018        
> (r336876)
> @@ -70,7 +70,9 @@ int0x80_syscall_common:
>       movq    %r14,TF_R14(%rsp)
>       movq    %r15,TF_R15(%rsp)
>       movl    $TF_HASSEGS,TF_FLAGS(%rsp)
> -     cld
> +     pushfq
> +     andq    $~(PSL_D | PSL_AC),(%rsp)
> +     popfq
>       FAKE_MCOUNT(TF_RIP(%rsp))
>       movq    %rsp, %rdi
>       call    ia32_syscall
>
> Modified: head/sys/amd64/include/asmacros.h
> ==============================================================================
> --- head/sys/amd64/include/asmacros.h Sun Jul 29 20:34:44 2018        
> (r336875)
> +++ head/sys/amd64/include/asmacros.h Sun Jul 29 20:47:00 2018        
> (r336876)
> @@ -255,7 +255,9 @@ X\vec_name:
>       movq    %r15,TF_R15(%rsp)
>       SAVE_SEGS
>
> *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
> _______________________________________________
> svn-src-head@freebsd.org mailing list
> https://lists.freebsd.org/mailman/listinfo/svn-src-head
> To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
>
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to