Author: kib
Date: Fri Apr 13 20:30:49 2018
New Revision: 332489
URL: https://svnweb.freebsd.org/changeset/base/332489

Log:
  i386 4/4G split.
  
  The change makes the user and kernel address spaces on i386
  independent, giving each almost the full 4G of usable virtual addresses
  except for one PDE at top used for trampoline and per-CPU trampoline
  stacks, and system structures that must be always mapped, namely IDT,
  GDT, common TSS and LDT, and process-private TSS and LDT if allocated.
  
  By using 1:1 mapping for the kernel text and data, it appeared
  possible to eliminate assembler part of the locore.S which bootstraps
  initial page table and KPTmap.  The code is rewritten in C and moved
  into the pmap_cold(). The comment in vmparam.h explains the KVA
  layout.
  
  There is no PCID mechanism available in protected mode, so each
  kernel/user switch forth and back completely flushes the TLB, except
  for the trampoline PTD region. The TLB invalidations for userspace
  becomes trivial, because IPI handlers switch page tables. On the other
  hand, context switches no longer need to reload %cr3.
  
  copyout(9) was rewritten to use vm_fault_quick_hold().  An issue for
  new copyout(9) is compatibility with wiring user buffers around sysctl
  handlers. This explains two kind of locks for copyout ptes and
  accounting of the vslock() calls.  The vm_fault_quick_hold() AKA slow
  path, is only tried after the 'fast path' failed, which temporary
  changes mapping to the userspace and copies the data to/from small
  per-cpu buffer in the trampoline.  If a page fault occurs during the
  copy, it is short-circuit by exception.s to not even reach C code.
  
  The change was motivated by the need to implement the Meltdown
  mitigation, but instead of KPTI the full split is done.  The i386
  architecture already shows the sizing problems, in particular, it is
  impossible to link clang and lld with debugging.  I expect that the
  issues due to the virtual address space limits would only exaggerate
  and the split gives more liveness to the platform.
  
  Tested by: pho
  Discussed with:       bde
  Sponsored by: The FreeBSD Foundation
  MFC after:    1 month
  Differential revision:        https://reviews.freebsd.org/D14633

Added:
  head/sys/i386/i386/copyout.c   (contents, props changed)
  head/sys/i386/i386/copyout_fast.s
     - copied, changed from r332488, head/sys/i386/i386/support.s
Modified:
  head/gnu/usr.bin/gdb/kgdb/trgt_i386.c
  head/sys/conf/files.i386
  head/sys/conf/ldscript.i386
  head/sys/dev/dcons/dcons_crom.c
  head/sys/dev/dcons/dcons_os.c
  head/sys/dev/hyperv/vmbus/i386/vmbus_vector.S
  head/sys/dev/ppc/ppc.c
  head/sys/dev/syscons/syscons.c
  head/sys/i386/conf/NOTES
  head/sys/i386/i386/apic_vector.s
  head/sys/i386/i386/atpic_vector.s
  head/sys/i386/i386/bios.c
  head/sys/i386/i386/db_interface.c
  head/sys/i386/i386/db_trace.c
  head/sys/i386/i386/elf_machdep.c
  head/sys/i386/i386/exception.s
  head/sys/i386/i386/genassym.c
  head/sys/i386/i386/locore.s
  head/sys/i386/i386/machdep.c
  head/sys/i386/i386/mem.c
  head/sys/i386/i386/minidump_machdep.c
  head/sys/i386/i386/mp_machdep.c
  head/sys/i386/i386/mpboot.s
  head/sys/i386/i386/pmap.c
  head/sys/i386/i386/sigtramp.s
  head/sys/i386/i386/support.s
  head/sys/i386/i386/swtch.s
  head/sys/i386/i386/sys_machdep.c
  head/sys/i386/i386/trap.c
  head/sys/i386/i386/vm86.c
  head/sys/i386/i386/vm86bios.s
  head/sys/i386/i386/vm_machdep.c
  head/sys/i386/include/asmacros.h
  head/sys/i386/include/frame.h
  head/sys/i386/include/md_var.h
  head/sys/i386/include/param.h
  head/sys/i386/include/pc/bios.h
  head/sys/i386/include/pcpu.h
  head/sys/i386/include/pmap.h
  head/sys/i386/include/segments.h
  head/sys/i386/include/vmparam.h
  head/sys/kern/imgact_aout.c
  head/sys/kern/subr_witness.c
  head/sys/x86/acpica/acpi_wakeup.c
  head/sys/x86/x86/local_apic.c
  head/sys/x86/x86/mp_x86.c
  head/sys/x86/x86/mptable.c

Modified: head/gnu/usr.bin/gdb/kgdb/trgt_i386.c
==============================================================================
--- head/gnu/usr.bin/gdb/kgdb/trgt_i386.c       Fri Apr 13 19:43:23 2018        
(r332488)
+++ head/gnu/usr.bin/gdb/kgdb/trgt_i386.c       Fri Apr 13 20:30:49 2018        
(r332489)
@@ -29,6 +29,8 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/proc.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
 #include <machine/pcb.h>
 #include <machine/frame.h>
 #include <machine/segments.h>
@@ -279,12 +281,26 @@ kgdb_trgt_frame_cache(struct frame_info *next_frame, v
        char buf[MAX_REGISTER_SIZE];
        struct kgdb_frame_cache *cache;
        char *pname;
+       CORE_ADDR pcx;
+       uintptr_t addr, setidt_disp;
 
        cache = *this_cache;
        if (cache == NULL) {
                cache = FRAME_OBSTACK_ZALLOC(struct kgdb_frame_cache);
                *this_cache = cache;
-               cache->pc = frame_func_unwind(next_frame);
+               pcx = frame_pc_unwind(next_frame);
+               if (pcx >= PMAP_TRM_MIN_ADDRESS) {
+                       addr = kgdb_lookup("setidt_disp");
+                       if (addr != 0) {
+                               if (kvm_read(kvm, addr, &setidt_disp,
+                                   sizeof(setidt_disp)) !=
+                                   sizeof(setidt_disp))
+                                       warnx("kvm_read: %s", kvm_geterr(kvm));
+                               else
+                                       pcx -= setidt_disp;
+                       }
+               }
+               cache->pc = pcx;
                find_pc_partial_function(cache->pc, &pname, NULL, NULL);
                if (pname[0] != 'X')
                        cache->frame_type = FT_NORMAL;
@@ -373,6 +389,8 @@ kgdb_trgt_trapframe_sniffer(struct frame_info *next_fr
        CORE_ADDR pc;
 
        pc = frame_pc_unwind(next_frame);
+       if (pc >= PMAP_TRM_MIN_ADDRESS)
+               return (&kgdb_trgt_trapframe_unwind);
        pname = NULL;
        find_pc_partial_function(pc, &pname, NULL, NULL);
        if (pname == NULL)

Modified: head/sys/conf/files.i386
==============================================================================
--- head/sys/conf/files.i386    Fri Apr 13 19:43:23 2018        (r332488)
+++ head/sys/conf/files.i386    Fri Apr 13 20:30:49 2018        (r332489)
@@ -483,6 +483,7 @@ i386/i386/atomic.c          standard                \
 i386/i386/bios.c               standard
 i386/i386/bioscall.s           standard
 i386/i386/bpf_jit_machdep.c    optional bpf_jitter
+i386/i386/copyout.c            standard
 i386/i386/db_disasm.c          optional ddb
 i386/i386/db_interface.c       optional ddb
 i386/i386/db_trace.c           optional ddb

Modified: head/sys/conf/ldscript.i386
==============================================================================
--- head/sys/conf/ldscript.i386 Fri Apr 13 19:43:23 2018        (r332488)
+++ head/sys/conf/ldscript.i386 Fri Apr 13 20:30:49 2018        (r332489)
@@ -6,7 +6,7 @@ SEARCH_DIR(/usr/lib);
 SECTIONS
 {
   /* Read-only sections, merged into text segment: */
-  . = kernbase + kernload + SIZEOF_HEADERS;
+  . = kernbase + SIZEOF_HEADERS;
   .interp         : { *(.interp) }
   .hash           : { *(.hash) }
   .gnu.hash       : { *(.gnu.hash) }

Modified: head/sys/dev/dcons/dcons_crom.c
==============================================================================
--- head/sys/dev/dcons/dcons_crom.c     Fri Apr 13 19:43:23 2018        
(r332488)
+++ head/sys/dev/dcons/dcons_crom.c     Fri Apr 13 20:30:49 2018        
(r332489)
@@ -109,7 +109,11 @@ dcons_crom_expose_idt(struct dcons_crom_softc *sc)
        static off_t idt_paddr;
 
        /* XXX */
+#ifdef __amd64__
        idt_paddr = (char *)idt - (char *)KERNBASE;
+#else /* __i386__ */
+       idt_paddr = (off_t)pmap_kextract((vm_offset_t)idt);
+#endif
 
        crom_add_entry(&sc->unit, DCONS_CSR_KEY_RESET_HI, ADDR_HI(idt_paddr));
        crom_add_entry(&sc->unit, DCONS_CSR_KEY_RESET_LO, ADDR_LO(idt_paddr));

Modified: head/sys/dev/dcons/dcons_os.c
==============================================================================
--- head/sys/dev/dcons/dcons_os.c       Fri Apr 13 19:43:23 2018        
(r332488)
+++ head/sys/dev/dcons/dcons_os.c       Fri Apr 13 20:30:49 2018        
(r332489)
@@ -309,11 +309,16 @@ dcons_drv_init(int stage)
                 * Allow read/write access to dcons buffer.
                 */
                for (pa = trunc_page(addr); pa < addr + size; pa += PAGE_SIZE)
-                       *vtopte(KERNBASE + pa) |= PG_RW;
+                       *vtopte(PMAP_MAP_LOW + pa) |= PG_RW;
                invltlb();
 #endif
                /* XXX P to V */
+#ifdef __amd64__
                dg.buf = (struct dcons_buf *)(vm_offset_t)(KERNBASE + addr);
+#else /* __i386__ */
+               dg.buf = (struct dcons_buf *)((vm_offset_t)PMAP_MAP_LOW +
+                   addr);
+#endif
                dg.size = size;
                if (dcons_load_buffer(dg.buf, dg.size, sc) < 0)
                        dg.buf = NULL;

Modified: head/sys/dev/hyperv/vmbus/i386/vmbus_vector.S
==============================================================================
--- head/sys/dev/hyperv/vmbus/i386/vmbus_vector.S       Fri Apr 13 19:43:23 
2018        (r332488)
+++ head/sys/dev/hyperv/vmbus/i386/vmbus_vector.S       Fri Apr 13 20:30:49 
2018        (r332489)
@@ -26,11 +26,12 @@
  * $FreeBSD$
  */
 
+#include "assym.inc"
+
+#include <machine/psl.h>
 #include <machine/asmacros.h>
 #include <machine/specialreg.h>
 
-#include "assym.inc"
-
 /*
  * This is the Hyper-V vmbus channel direct callback interrupt.
  * Only used when it is running on Hyper-V.
@@ -42,6 +43,7 @@ IDTVEC(vmbus_isr)
        PUSH_FRAME
        SET_KERNEL_SREGS
        cld
+       KENTER
        FAKE_MCOUNT(TF_EIP(%esp))
        pushl   %esp
        call    vmbus_handle_intr

Modified: head/sys/dev/ppc/ppc.c
==============================================================================
--- head/sys/dev/ppc/ppc.c      Fri Apr 13 19:43:23 2018        (r332488)
+++ head/sys/dev/ppc/ppc.c      Fri Apr 13 20:30:49 2018        (r332489)
@@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <machine/vmparam.h>
+#include <machine/pc/bios.h>
 #endif
 
 #include <dev/ppbus/ppbconf.h>
@@ -121,7 +122,7 @@ static char *ppc_epp_protocol[] = { " (EPP 1.9)", " (E
  * BIOS printer list - used by BIOS probe.
  */
 #define        BIOS_PPC_PORTS  0x408
-#define        BIOS_PORTS      (short *)(KERNBASE+BIOS_PPC_PORTS)
+#define        BIOS_PORTS      ((short *)BIOS_PADDRTOVADDR(BIOS_PPC_PORTS))
 #define        BIOS_MAX_PPC    4
 #endif
 

Modified: head/sys/dev/syscons/syscons.c
==============================================================================
--- head/sys/dev/syscons/syscons.c      Fri Apr 13 19:43:23 2018        
(r332488)
+++ head/sys/dev/syscons/syscons.c      Fri Apr 13 20:30:49 2018        
(r332489)
@@ -288,7 +288,11 @@ ec_putc(int c)
                 * This is enough for ec_putc() to work very early on x86
                 * if the kernel starts in normal color text mode.
                 */
+#ifdef __amd64__
                fb = KERNBASE + 0xb8000;
+#else /* __i386__ */
+               fb = PMAP_MAP_LOW + 0xb8000;
+#endif
                xsize = 80;
                ysize = 25;
 #endif

Modified: head/sys/i386/conf/NOTES
==============================================================================
--- head/sys/i386/conf/NOTES    Fri Apr 13 19:43:23 2018        (r332488)
+++ head/sys/i386/conf/NOTES    Fri Apr 13 20:30:49 2018        (r332489)
@@ -895,19 +895,6 @@ options    ENABLE_ALART            # Control alarm on 
Intel intpm 
 options        PMAP_SHPGPERPROC=201
 
 #
-# Change the size of the kernel virtual address space.  Due to
-# constraints in loader(8) on i386, this must be a multiple of 4.
-# 256 = 1 GB of kernel address space.  Increasing this also causes
-# a reduction of the address space in user processes.  512 splits
-# the 4GB cpu address space in half (2GB user, 2GB kernel).  For PAE
-# kernels, the value will need to be double non-PAE.  A value of 1024
-# for PAE kernels is necessary to split the address space in half.
-# This will likely need to be increased to handle memory sizes >4GB.
-# PAE kernels default to a value of 512.
-#
-options        KVA_PAGES=260
-
-#
 # Number of initial kernel page table pages used for early bootstrap.
 # This number should include enough pages to map the kernel, any
 # modules or other data loaded with the kernel by the loader, and data
@@ -950,22 +937,6 @@ device             ndis
 
 #####################################################################
 # VM OPTIONS
-
-# Disable the 4 MByte page PSE CPU feature.  The PSE feature allows the
-# kernel to use 4 MByte pages to map the kernel instead of 4k pages.
-# This saves on the amount of memory needed for page tables needed to
-# map the kernel.  You should only disable this feature as a temporary
-# workaround if you are having problems with it enabled.
-#
-#options       DISABLE_PSE
-
-# Disable the global pages PGE CPU feature.  The PGE feature allows pages
-# to be marked with the PG_G bit.  TLB entries for these pages are not
-# flushed from the cache when %cr3 is reloaded.  This can make context
-# switches less expensive.  You should only disable this feature as a
-# temporary workaround if you are having problems with it enabled.
-#
-#options       DISABLE_PG_G
 
 # KSTACK_PAGES is the number of memory pages to assign to the kernel
 # stack of each thread.

Modified: head/sys/i386/i386/apic_vector.s
==============================================================================
--- head/sys/i386/i386/apic_vector.s    Fri Apr 13 19:43:23 2018        
(r332488)
+++ head/sys/i386/i386/apic_vector.s    Fri Apr 13 20:30:49 2018        
(r332489)
@@ -39,6 +39,7 @@
 #include "opt_smp.h"
 
 #include <machine/asmacros.h>
+#include <machine/psl.h>
 #include <machine/specialreg.h>
 #include <x86/apicreg.h>
 
@@ -67,34 +68,39 @@ as_lapic_eoi:
  * translates that into a vector, and passes the vector to the
  * lapic_handle_intr() function.
  */
-#define        ISR_VEC(index, vec_name)                                        
\
-       .text ;                                                         \
-       SUPERALIGN_TEXT ;                                               \
-IDTVEC(vec_name ## _pti) ;                                             \
-IDTVEC(vec_name) ;                                                     \
-       PUSH_FRAME ;                                                    \
-       SET_KERNEL_SREGS ;                                              \
-       cld ;                                                           \
-       FAKE_MCOUNT(TF_EIP(%esp)) ;                                     \
-       cmpl    $0,x2apic_mode ;                                        \
-       je      1f ;                                                    \
-       movl    $(MSR_APIC_ISR0 + index),%ecx ;                         \
-       rdmsr ;                                                         \
-       jmp     2f ;                                                    \
-1: ;                                                                   \
-       movl    lapic_map, %edx ;/* pointer to local APIC */            \
-       movl    LA_ISR + 16 * (index)(%edx), %eax ;     /* load ISR */  \
-2: ;                                                                   \
-       bsrl    %eax, %eax ;    /* index of highest set bit in ISR */   \
-       jz      3f ;                                                    \
-       addl    $(32 * index),%eax ;                                    \
-       pushl   %esp            ;                                       \
-       pushl   %eax ;          /* pass the IRQ */                      \
-       call    lapic_handle_intr ;                                     \
-       addl    $8, %esp ;      /* discard parameter */                 \
-3: ;                                                                   \
-       MEXITCOUNT ;                                                    \
+       .macro  ISR_VEC index, vec_name
+       .text
+       SUPERALIGN_TEXT
+       .globl  X\()\vec_name\()_pti, X\()\vec_name
+
+X\()\vec_name\()_pti:
+X\()\vec_name:
+       PUSH_FRAME
+       SET_KERNEL_SREGS
+       cld
+       KENTER
+       FAKE_MCOUNT(TF_EIP(%esp))
+       cmpl    $0,x2apic_mode
+       je      2f
+       movl    $(MSR_APIC_ISR0 + \index),%ecx
+       rdmsr
+       jmp     3f
+2:
+       movl    lapic_map, %edx         /* pointer to local APIC */
+       movl    LA_ISR + 16 * \index(%edx), %eax        /* load ISR */
+3:
+       bsrl    %eax, %eax      /* index of highest set bit in ISR */
+       jz      4f
+       addl    $(32 * \index),%eax
+       pushl   %esp
+       pushl   %eax            /* pass the IRQ */
+       movl    $lapic_handle_intr, %eax
+       call    *%eax
+       addl    $8, %esp        /* discard parameter */
+4:
+       MEXITCOUNT
        jmp     doreti
+       .endm
 
 /*
  * Handle "spurious INTerrupts".
@@ -111,13 +117,13 @@ IDTVEC(spuriousint)
 
        iret
 
-       ISR_VEC(1, apic_isr1)
-       ISR_VEC(2, apic_isr2)
-       ISR_VEC(3, apic_isr3)
-       ISR_VEC(4, apic_isr4)
-       ISR_VEC(5, apic_isr5)
-       ISR_VEC(6, apic_isr6)
-       ISR_VEC(7, apic_isr7)
+       ISR_VEC 1, apic_isr1
+       ISR_VEC 2, apic_isr2
+       ISR_VEC 3, apic_isr3
+       ISR_VEC 4, apic_isr4
+       ISR_VEC 5, apic_isr5
+       ISR_VEC 6, apic_isr6
+       ISR_VEC 7, apic_isr7
 
 /*
  * Local APIC periodic timer handler.
@@ -129,9 +135,11 @@ IDTVEC(timerint)
        PUSH_FRAME
        SET_KERNEL_SREGS
        cld
+       KENTER
        FAKE_MCOUNT(TF_EIP(%esp))
        pushl   %esp
-       call    lapic_handle_timer
+       movl    $lapic_handle_timer, %eax
+       call    *%eax
        add     $4, %esp
        MEXITCOUNT
        jmp     doreti
@@ -146,8 +154,10 @@ IDTVEC(cmcint)
        PUSH_FRAME
        SET_KERNEL_SREGS
        cld
+       KENTER
        FAKE_MCOUNT(TF_EIP(%esp))
-       call    lapic_handle_cmc
+       movl    $lapic_handle_cmc, %eax
+       call    *%eax
        MEXITCOUNT
        jmp     doreti
 
@@ -161,8 +171,10 @@ IDTVEC(errorint)
        PUSH_FRAME
        SET_KERNEL_SREGS
        cld
+       KENTER
        FAKE_MCOUNT(TF_EIP(%esp))
-       call    lapic_handle_error
+       movl    $lapic_handle_error, %eax
+       call    *%eax
        MEXITCOUNT
        jmp     doreti
 
@@ -177,9 +189,11 @@ IDTVEC(xen_intr_upcall)
        PUSH_FRAME
        SET_KERNEL_SREGS
        cld
+       KENTER
        FAKE_MCOUNT(TF_EIP(%esp))
        pushl   %esp
-       call    xen_intr_handle_upcall
+       movl    $xen_intr_handle_upcall, %eax
+       call    *%eax
        add     $4, %esp
        MEXITCOUNT
        jmp     doreti
@@ -200,9 +214,9 @@ IDTVEC(invltlb)
        PUSH_FRAME
        SET_KERNEL_SREGS
        cld
-
-       call    invltlb_handler
-
+       KENTER
+       movl    $invltlb_handler, %eax
+       call    *%eax
        jmp     invltlb_ret
 
 /*
@@ -214,9 +228,9 @@ IDTVEC(invlpg)
        PUSH_FRAME
        SET_KERNEL_SREGS
        cld
-
-       call    invlpg_handler
-
+       KENTER
+       movl    $invlpg_handler, %eax
+       call    *%eax
        jmp     invltlb_ret
 
 /*
@@ -228,9 +242,9 @@ IDTVEC(invlrng)
        PUSH_FRAME
        SET_KERNEL_SREGS
        cld
-
-       call    invlrng_handler
-
+       KENTER
+       movl    $invlrng_handler, %eax
+       call    *%eax
        jmp     invltlb_ret
 
 /*
@@ -242,9 +256,9 @@ IDTVEC(invlcache)
        PUSH_FRAME
        SET_KERNEL_SREGS
        cld
-
-       call    invlcache_handler
-
+       KENTER
+       movl    $invlcache_handler, %eax
+       call    *%eax
        jmp     invltlb_ret
 
 /*
@@ -256,12 +270,11 @@ IDTVEC(ipi_intr_bitmap_handler)   
        PUSH_FRAME
        SET_KERNEL_SREGS
        cld
-
+       KENTER
        call    as_lapic_eoi
-       
        FAKE_MCOUNT(TF_EIP(%esp))
-
-       call    ipi_bitmap_handler
+       movl    $ipi_bitmap_handler, %eax
+       call    *%eax
        MEXITCOUNT
        jmp     doreti
 
@@ -274,9 +287,10 @@ IDTVEC(cpustop)
        PUSH_FRAME
        SET_KERNEL_SREGS
        cld
-
+       KENTER
        call    as_lapic_eoi
-       call    cpustop_handler
+       movl    $cpustop_handler, %eax
+       call    *%eax
        jmp     doreti
 
 /*
@@ -288,9 +302,10 @@ IDTVEC(cpususpend)
        PUSH_FRAME
        SET_KERNEL_SREGS
        cld
-
+       KENTER
        call    as_lapic_eoi
-       call    cpususpend_handler
+       movl    $cpususpend_handler, %eax
+       call    *%eax
        jmp     doreti
 
 /*
@@ -304,14 +319,14 @@ IDTVEC(rendezvous)
        PUSH_FRAME
        SET_KERNEL_SREGS
        cld
-
+       KENTER
 #ifdef COUNT_IPIS
        movl    PCPU(CPUID), %eax
        movl    ipi_rendezvous_counts(,%eax,4), %eax
        incl    (%eax)
 #endif
-       call    smp_rendezvous_action
-
+       movl    $smp_rendezvous_action, %eax
+       call    *%eax
        call    as_lapic_eoi
        jmp     doreti
        

Modified: head/sys/i386/i386/atpic_vector.s
==============================================================================
--- head/sys/i386/i386/atpic_vector.s   Fri Apr 13 19:43:23 2018        
(r332488)
+++ head/sys/i386/i386/atpic_vector.s   Fri Apr 13 20:30:49 2018        
(r332489)
@@ -36,6 +36,7 @@
  * master and slave interrupt controllers.
  */
 
+#include <machine/psl.h>
 #include <machine/asmacros.h>
 
 #include "assym.inc"
@@ -43,37 +44,41 @@
 /*
  * Macros for interrupt entry, call to handler, and exit.
  */
-#define        INTR(irq_num, vec_name) \
-       .text ;                                                         \
-       SUPERALIGN_TEXT ;                                               \
-IDTVEC(vec_name ##_pti) ;                                              \
-IDTVEC(vec_name) ;                                                     \
-       PUSH_FRAME ;                                                    \
-       SET_KERNEL_SREGS ;                                              \
-       cld ;                                                           \
-;                                                                      \
-       FAKE_MCOUNT(TF_EIP(%esp)) ;                                     \
-       pushl   %esp            ;                                       \
-       pushl   $irq_num;       /* pass the IRQ */                      \
-       call    atpic_handle_intr ;                                     \
-       addl    $8, %esp ;      /* discard the parameters */            \
-;                                                                      \
-       MEXITCOUNT ;                                                    \
+       .macro  INTR    irq_num, vec_name
+       .text
+       SUPERALIGN_TEXT
+       .globl  X\()\vec_name\()_pti, X\()\vec_name
+
+X\()\vec_name\()_pti:
+X\()\vec_name:
+       PUSH_FRAME
+       SET_KERNEL_SREGS
+       cld
+       KENTER
+       FAKE_MCOUNT(TF_EIP(%esp))
+       pushl   %esp
+       pushl   $\irq_num       /* pass the IRQ */
+       movl    $atpic_handle_intr, %eax
+       call    *%eax
+       addl    $8, %esp        /* discard the parameters */
+
+       MEXITCOUNT
        jmp     doreti
+       .endm
 
-       INTR(0, atpic_intr0)
-       INTR(1, atpic_intr1)
-       INTR(2, atpic_intr2)
-       INTR(3, atpic_intr3)
-       INTR(4, atpic_intr4)
-       INTR(5, atpic_intr5)
-       INTR(6, atpic_intr6)
-       INTR(7, atpic_intr7)
-       INTR(8, atpic_intr8)
-       INTR(9, atpic_intr9)
-       INTR(10, atpic_intr10)
-       INTR(11, atpic_intr11)
-       INTR(12, atpic_intr12)
-       INTR(13, atpic_intr13)
-       INTR(14, atpic_intr14)
-       INTR(15, atpic_intr15)
+       INTR    0, atpic_intr0
+       INTR    1, atpic_intr1
+       INTR    2, atpic_intr2
+       INTR    3, atpic_intr3
+       INTR    4, atpic_intr4
+       INTR    5, atpic_intr5
+       INTR    6, atpic_intr6
+       INTR    7, atpic_intr7
+       INTR    8, atpic_intr8
+       INTR    9, atpic_intr9
+       INTR    10, atpic_intr10
+       INTR    11, atpic_intr11
+       INTR    12, atpic_intr12
+       INTR    13, atpic_intr13
+       INTR    14, atpic_intr14
+       INTR    15, atpic_intr15

Modified: head/sys/i386/i386/bios.c
==============================================================================
--- head/sys/i386/i386/bios.c   Fri Apr 13 19:43:23 2018        (r332488)
+++ head/sys/i386/i386/bios.c   Fri Apr 13 20:30:49 2018        (r332489)
@@ -305,6 +305,7 @@ set_bios_selectors(struct bios_segments *seg, int flag
 }
 
 extern int vm86pa;
+extern u_long vm86phystk;
 extern void bios16_jmp(void);
 
 /*
@@ -329,7 +330,7 @@ bios16(struct bios_args *args, char *fmt, ...)
     int        flags = BIOSCODE_FLAG | BIOSDATA_FLAG;
     u_int      i, arg_start, arg_end;
     pt_entry_t *pte;
-    pd_entry_t *ptd;
+    pd_entry_t *ptd, orig_ptd;
 
     arg_start = 0xffffffff;
     arg_end = 0;
@@ -390,27 +391,14 @@ bios16(struct bios_args *args, char *fmt, ...)
     args->seg.code32.base = (u_int)&bios16_jmp & PG_FRAME;
     args->seg.code32.limit = 0xffff;   
 
-    ptd = (pd_entry_t *)rcr3();
-#if defined(PAE) || defined(PAE_TABLES)
-    if (ptd == IdlePDPT)
-#else
-    if (ptd == IdlePTD)
-#endif
-    {
-       /*
-        * no page table, so create one and install it.
-        */
-       pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
-       ptd = (pd_entry_t *)((u_int)IdlePTD + KERNBASE);
-       *pte = (vm86pa - PAGE_SIZE) | PG_RW | PG_V;
-       *ptd = vtophys(pte) | PG_RW | PG_V;
-    } else {
-       /*
-        * this is a user-level page table 
-        */
-       pte = PTmap;
-       *pte = (vm86pa - PAGE_SIZE) | PG_RW | PG_V;
-    }
+    /*
+     * no page table, so create one and install it.
+     */
+    pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
+    ptd = IdlePTD;
+    *pte = vm86phystk | PG_RW | PG_V;
+    orig_ptd = *ptd;
+    *ptd = vtophys(pte) | PG_RW | PG_V;
     pmap_invalidate_all(kernel_pmap);  /* XXX insurance for now */
 
     stack_top = stack;
@@ -464,20 +452,12 @@ bios16(struct bios_args *args, char *fmt, ...)
 
     i = bios16_call(&args->r, stack_top);
 
-    if (pte == PTmap) {
-       *pte = 0;                       /* remove entry */
-       /*
-        * XXX only needs to be invlpg(0) but that doesn't work on the 386 
-        */
-       pmap_invalidate_all(kernel_pmap);
-    } else {
-       *ptd = 0;                       /* remove page table */
-       /*
-        * XXX only needs to be invlpg(0) but that doesn't work on the 386 
-        */
-       pmap_invalidate_all(kernel_pmap);
-       free(pte, M_TEMP);              /* ... and free it */
-    }
+    *ptd = orig_ptd;           /* remove page table */
+    /*
+     * XXX only needs to be invlpg(0) but that doesn't work on the 386
+     */
+    pmap_invalidate_all(kernel_pmap);
+    free(pte, M_TEMP);         /* ... and free it */
     return (i);
 }
 

Added: head/sys/i386/i386/copyout.c
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/i386/i386/copyout.c        Fri Apr 13 20:30:49 2018        
(r332489)
@@ -0,0 +1,489 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2018 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <k...@freebsd.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_extern.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+
+#if defined(PAE) || defined(PAE_TABLES)
+#define        KCR3    ((u_int)IdlePDPT)
+#else
+#define        KCR3    ((u_int)IdlePTD)
+#endif
+
+int copyin_fast(const void *udaddr, void *kaddr, size_t len, u_int);
+static int (*copyin_fast_tramp)(const void *, void *, size_t, u_int);
+int copyout_fast(const void *kaddr, void *udaddr, size_t len, u_int);
+static int (*copyout_fast_tramp)(const void *, void *, size_t, u_int);
+int fubyte_fast(volatile const void *base, u_int kcr3);
+static int (*fubyte_fast_tramp)(volatile const void *, u_int);
+int fuword16_fast(volatile const void *base, u_int kcr3);
+static int (*fuword16_fast_tramp)(volatile const void *, u_int);
+int fueword_fast(volatile const void *base, long *val, u_int kcr3);
+static int (*fueword_fast_tramp)(volatile const void *, long *, u_int);
+int subyte_fast(volatile void *base, int val, u_int kcr3);
+static int (*subyte_fast_tramp)(volatile void *, int, u_int);
+int suword16_fast(volatile void *base, int val, u_int kcr3);
+static int (*suword16_fast_tramp)(volatile void *, int, u_int);
+int suword_fast(volatile void *base, long val, u_int kcr3);
+static int (*suword_fast_tramp)(volatile void *, long, u_int);
+
+static int fast_copyout = 1;
+SYSCTL_INT(_machdep, OID_AUTO, fast_copyout, CTLFLAG_RWTUN,
+    &fast_copyout, 0,
+    "");
+
+void
+copyout_init_tramp(void)
+{
+
+       copyin_fast_tramp = (int (*)(const void *, void *, size_t, u_int))(
+           (uintptr_t)copyin_fast + setidt_disp);
+       copyout_fast_tramp = (int (*)(const void *, void *, size_t, u_int))(
+           (uintptr_t)copyout_fast + setidt_disp);
+       fubyte_fast_tramp = (int (*)(volatile const void *, u_int))(
+           (uintptr_t)fubyte_fast + setidt_disp);
+       fuword16_fast_tramp = (int (*)(volatile const void *, u_int))(
+           (uintptr_t)fuword16_fast + setidt_disp);
+       fueword_fast_tramp = (int (*)(volatile const void *, long *, u_int))(
+           (uintptr_t)fueword_fast + setidt_disp);
+       subyte_fast_tramp = (int (*)(volatile void *, int, u_int))(
+           (uintptr_t)subyte_fast + setidt_disp);
+       suword16_fast_tramp = (int (*)(volatile void *, int, u_int))(
+           (uintptr_t)suword16_fast + setidt_disp);
+       suword_fast_tramp = (int (*)(volatile void *, long, u_int))(
+           (uintptr_t)suword_fast + setidt_disp);
+}
+
+static int
+cp_slow0(vm_offset_t uva, size_t len, bool write,
+    void (*f)(vm_offset_t, void *), void *arg)
+{
+       struct pcpu *pc;
+       vm_page_t m[2];
+       pt_entry_t *pte;
+       vm_offset_t kaddr;
+       int error, i, plen;
+       bool sleepable;
+
+       plen = howmany(uva - trunc_page(uva) + len, PAGE_SIZE);
+       MPASS(plen <= nitems(m));
+       error = 0;
+       i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, uva, len,
+           (write ? VM_PROT_WRITE : VM_PROT_READ) | VM_PROT_QUICK_NOFAULT,
+           m, nitems(m));
+       if (i != plen)
+               return (EFAULT);
+       sched_pin();
+       pc = get_pcpu();
+       if (!THREAD_CAN_SLEEP() || curthread->td_vslock_sz > 0 ||
+           (curthread->td_pflags & TDP_NOFAULTING) != 0) {
+               sleepable = false;
+               mtx_lock(&pc->pc_copyout_mlock);
+               kaddr = pc->pc_copyout_maddr;
+       } else {
+               sleepable = true;
+               sx_xlock(&pc->pc_copyout_slock);
+               kaddr = pc->pc_copyout_saddr;
+       }
+       for (i = 0, pte = vtopte(kaddr); i < plen; i++, pte++) {
+               *pte = PG_V | PG_RW | PG_A | PG_M | VM_PAGE_TO_PHYS(m[i]) |
+                   pmap_cache_bits(pmap_page_get_memattr(m[i]), FALSE);
+               invlpg(kaddr + ptoa(i));
+       }
+       kaddr += uva - trunc_page(uva);
+       f(kaddr, arg);
+       sched_unpin();
+       if (sleepable)
+               sx_xunlock(&pc->pc_copyout_slock);
+       else
+               mtx_unlock(&pc->pc_copyout_mlock);
+       for (i = 0; i < plen; i++) {
+               vm_page_lock(m[i]);
+               vm_page_unhold(m[i]);
+               vm_page_unlock(m[i]);
+       }
+       return (error);
+}
+
+struct copyinstr_arg0 {
+       vm_offset_t kc;
+       size_t len;
+       size_t alen;
+       bool end;
+};
+
+static void
+copyinstr_slow0(vm_offset_t kva, void *arg)
+{
+       struct copyinstr_arg0 *ca;
+       char c;
+
+       ca = arg;
+       MPASS(ca->alen == 0 && ca->len > 0 && !ca->end);
+       while (ca->alen < ca->len && !ca->end) {
+               c = *(char *)(kva + ca->alen);
+               *(char *)ca->kc = c;
+               ca->alen++;
+               ca->kc++;
+               if (c == '\0')
+                       ca->end = true;
+       }
+}
+
+int
+copyinstr(const void *udaddr, void *kaddr, size_t maxlen, size_t *lencopied)
+{
+       struct copyinstr_arg0 ca;
+       vm_offset_t uc;
+       size_t plen;
+       int error;
+
+       error = 0;
+       ca.end = false;
+       for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr;
+           plen < maxlen && !ca.end; uc += ca.alen, plen += ca.alen) {
+               ca.len = round_page(uc) - uc;
+               if (ca.len == 0)
+                       ca.len = PAGE_SIZE;
+               if (plen + ca.len > maxlen)
+                       ca.len = maxlen - plen;
+               ca.alen = 0;
+               if (cp_slow0(uc, ca.len, false, copyinstr_slow0, &ca) != 0) {
+                       error = EFAULT;
+                       break;
+               }
+       }
+       if (!ca.end && plen == maxlen && error == 0)
+               error = ENAMETOOLONG;
+       if (lencopied != NULL)
+               *lencopied = plen;
+       return (error);
+}
+
+struct copyin_arg0 {
+       vm_offset_t kc;
+       size_t len;
+};
+
+static void
+copyin_slow0(vm_offset_t kva, void *arg)
+{
+       struct copyin_arg0 *ca;
+
+       ca = arg;
+       bcopy((void *)kva, (void *)ca->kc, ca->len);
+}
+
+int
+copyin(const void *udaddr, void *kaddr, size_t len)
+{
+       struct copyin_arg0 ca;
+       vm_offset_t uc;
+       size_t plen;
+
+       if ((uintptr_t)udaddr + len < (uintptr_t)udaddr ||
+           (uintptr_t)udaddr + len > VM_MAXUSER_ADDRESS)
+               return (-1);
+       if (len == 0 || (fast_copyout && len <= TRAMP_COPYOUT_SZ &&
+           copyin_fast_tramp(udaddr, kaddr, len, KCR3) == 0))
+               return (0);
+       for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr;
+           plen < len; uc += ca.len, ca.kc += ca.len, plen += ca.len) {
+               ca.len = round_page(uc) - uc;
+               if (ca.len == 0)
+                       ca.len = PAGE_SIZE;
+               if (plen + ca.len > len)
+                       ca.len = len - plen;
+               if (cp_slow0(uc, ca.len, false, copyin_slow0, &ca) != 0)
+                       return (EFAULT);
+       }
+       return (0);
+}
+
+static void
+copyout_slow0(vm_offset_t kva, void *arg)
+{
+       struct copyin_arg0 *ca;
+
+       ca = arg;
+       bcopy((void *)ca->kc, (void *)kva, ca->len);
+}
+
+int
+copyout(const void *kaddr, void *udaddr, size_t len)
+{
+       struct copyin_arg0 ca;
+       vm_offset_t uc;
+       size_t plen;
+
+       if ((uintptr_t)udaddr + len < (uintptr_t)udaddr ||
+           (uintptr_t)udaddr + len > VM_MAXUSER_ADDRESS)
+               return (-1);
+       if (len == 0 || (fast_copyout && len <= TRAMP_COPYOUT_SZ &&
+           copyout_fast_tramp(kaddr, udaddr, len, KCR3) == 0))
+               return (0);
+       for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr;
+           plen < len; uc += ca.len, ca.kc += ca.len, plen += ca.len) {
+               ca.len = round_page(uc) - uc;
+               if (ca.len == 0)
+                       ca.len = PAGE_SIZE;
+               if (plen + ca.len > len)
+                       ca.len = len - plen;
+               if (cp_slow0(uc, ca.len, true, copyout_slow0, &ca) != 0)
+                       return (EFAULT);
+       }
+       return (0);
+}
+
+/*
+ * Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user
+ * memory.
+ */
+
+static void
+fubyte_slow0(vm_offset_t kva, void *arg)
+{
+
+       *(int *)arg = *(u_char *)kva;
+}
+
+int
+fubyte(volatile const void *base)
+{
+       int res;
+
+       if ((uintptr_t)base + sizeof(uint8_t) < (uintptr_t)base ||
+           (uintptr_t)base + sizeof(uint8_t) > VM_MAXUSER_ADDRESS)
+               return (-1);
+       if (fast_copyout) {
+               res = fubyte_fast_tramp(base, KCR3);
+               if (res != -1)
+                       return (res);
+       }
+       if (cp_slow0((vm_offset_t)base, sizeof(char), false, fubyte_slow0,
+           &res) != 0)
+               return (-1);
+       return (res);
+}
+
+static void
+fuword16_slow0(vm_offset_t kva, void *arg)
+{
+
+       *(int *)arg = *(uint16_t *)kva;
+}
+
+int
+fuword16(volatile const void *base)
+{
+       int res;
+
+       if ((uintptr_t)base + sizeof(uint16_t) < (uintptr_t)base ||
+           (uintptr_t)base + sizeof(uint16_t) > VM_MAXUSER_ADDRESS)
+               return (-1);
+       if (fast_copyout) {
+               res = fuword16_fast_tramp(base, KCR3);
+               if (res != -1)
+                       return (res);
+       }
+       if (cp_slow0((vm_offset_t)base, sizeof(uint16_t), false,
+           fuword16_slow0, &res) != 0)
+               return (-1);
+       return (res);
+}
+
+static void
+fueword_slow0(vm_offset_t kva, void *arg)
+{
+
+       *(uint32_t *)arg = *(uint32_t *)kva;
+}
+
+int
+fueword(volatile const void *base, long *val)
+{
+       uint32_t res;
+
+       if ((uintptr_t)base + sizeof(*val) < (uintptr_t)base ||
+           (uintptr_t)base + sizeof(*val) > VM_MAXUSER_ADDRESS)
+               return (-1);
+       if (fast_copyout) {
+               if (fueword_fast_tramp(base, val, KCR3) == 0)
+                       return (0);
+       }
+       if (cp_slow0((vm_offset_t)base, sizeof(long), false, fueword_slow0,
+           &res) != 0)
+               return (-1);
+       *val = res;
+       return (0);
+}
+
+int
+fueword32(volatile const void *base, int32_t *val)

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to