Author: jeff Date: Tue Jun 23 22:42:39 2009 New Revision: 194784 URL: http://svn.freebsd.org/changeset/base/194784
Log: Implement a facility for dynamic per-cpu variables. - Modules and kernel code alike may use DPCPU_DEFINE(), DPCPU_GET(), DPCPU_SET(), etc. akin to the statically defined PCPU_*. Requires only one extra instruction more than PCPU_* and is virtually the same as __thread for builtin and much faster for shared objects. DPCPU variables can be initialized when defined. - Modules are supported by relocating the module's per-cpu linker set over space reserved in the kernel. Modules may fail to load if there is insufficient space available. - Track space available for modules with a one-off extent allocator. Free may block for memory to allocate space for an extent. Reviewed by: jhb, rwatson, kan, sam, grehan, marius, marcel, stas Modified: head/sys/amd64/amd64/machdep.c head/sys/amd64/amd64/mp_machdep.c head/sys/arm/arm/elf_machdep.c head/sys/arm/at91/at91_machdep.c head/sys/arm/mv/mv_machdep.c head/sys/arm/sa11x0/assabet_machdep.c head/sys/arm/xscale/i80321/ep80219_machdep.c head/sys/arm/xscale/i80321/iq31244_machdep.c head/sys/arm/xscale/i8134x/crb_machdep.c head/sys/arm/xscale/ixp425/avila_machdep.c head/sys/arm/xscale/pxa/pxa_machdep.c head/sys/i386/i386/elf_machdep.c head/sys/i386/i386/machdep.c head/sys/i386/i386/mp_machdep.c head/sys/i386/xen/mp_machdep.c head/sys/ia64/ia64/elf_machdep.c head/sys/ia64/ia64/machdep.c head/sys/ia64/ia64/mp_machdep.c head/sys/kern/link_elf.c head/sys/kern/link_elf_obj.c head/sys/kern/subr_pcpu.c head/sys/mips/mips/elf_machdep.c head/sys/mips/mips/mp_machdep.c head/sys/mips/mips/pmap.c head/sys/pc98/pc98/machdep.c head/sys/powerpc/aim/mmu_oea.c head/sys/powerpc/aim/mmu_oea64.c head/sys/powerpc/booke/pmap.c head/sys/powerpc/powerpc/elf_machdep.c head/sys/powerpc/powerpc/mp_machdep.c head/sys/sparc64/include/pcpu.h head/sys/sparc64/sparc64/elf_machdep.c head/sys/sparc64/sparc64/machdep.c head/sys/sparc64/sparc64/mp_machdep.c head/sys/sparc64/sparc64/pmap.c head/sys/sun4v/include/pcpu.h head/sys/sun4v/sun4v/machdep.c head/sys/sun4v/sun4v/mp_machdep.c head/sys/sun4v/sun4v/pmap.c head/sys/sys/linker.h head/sys/sys/pcpu.h head/sys/sys/sysctl.h Modified: head/sys/amd64/amd64/machdep.c ============================================================================== --- head/sys/amd64/amd64/machdep.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/amd64/amd64/machdep.c Tue Jun 23 22:42:39 2009 (r194784) @@ -1501,6 +1501,8 @@ hammer_time(u_int64_t modulep, u_int64_t wrmsr(MSR_KGSBASE, 0); /* User value while in the kernel */ pcpu_init(pc, 0, sizeof(struct pcpu)); + dpcpu_init((void *)(physfree + KERNBASE), 0); + physfree += DPCPU_SIZE; PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); PCPU_SET(curpcb, thread0.td_pcb); Modified: head/sys/amd64/amd64/mp_machdep.c ============================================================================== --- head/sys/amd64/amd64/mp_machdep.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/amd64/amd64/mp_machdep.c Tue Jun 23 22:42:39 2009 (r194784) @@ -93,9 +93,10 @@ static int bootAP; /* Free these after use */ void *bootstacks[MAXCPU]; -/* Temporary holder for double fault stack */ +/* Temporary variables for init_secondary() */ char *doublefault_stack; char *nmi_stack; +void *dpcpu; /* Hotwire a 0->4MB V==P mapping */ extern pt_entry_t *KPTphys; @@ -590,6 +591,7 @@ init_secondary(void) /* prime data page for it to use */ pcpu_init(pc, cpu, sizeof(struct pcpu)); + dpcpu_init(dpcpu, cpu); pc->pc_apic_id = cpu_apic_ids[cpu]; pc->pc_prvspace = pc; pc->pc_curthread = 0; @@ -885,6 +887,7 @@ start_all_aps(void) bootstacks[cpu] = (void *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); doublefault_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE); nmi_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE); + dpcpu = (void *)kmem_alloc(kernel_map, DPCPU_SIZE); bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8; bootAP = cpu; Modified: head/sys/arm/arm/elf_machdep.c ============================================================================== --- head/sys/arm/arm/elf_machdep.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/arm/arm/elf_machdep.c Tue Jun 23 22:42:39 2009 (r194784) @@ -149,7 +149,7 @@ elf_reloc_internal(linker_file_t lf, Elf if (local) { if (rtype == R_ARM_RELATIVE) { /* A + B */ - addr = relocbase + addend; + addr = elf_relocaddr(lf, relocbase + addend); if (*where != addr) *where = addr; } Modified: head/sys/arm/at91/at91_machdep.c ============================================================================== --- head/sys/arm/at91/at91_machdep.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/arm/at91/at91_machdep.c Tue Jun 23 22:42:39 2009 (r194784) @@ -215,6 +215,7 @@ void * initarm(void *arg, void *arg2) { struct pv_addr kernel_l1pt; + struct pv_addr dpcpu; int loop, i; u_int l1pagetable; vm_offset_t freemempos; @@ -264,6 +265,10 @@ initarm(void *arg, void *arg2) */ valloc_pages(systempage, 1); + /* Allocate dynamic per-cpu area. */ + valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); + dpcpu_init((void *)dpcpu.pv_va, 0); + /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE); valloc_pages(abtstack, ABT_STACK_SIZE); Modified: head/sys/arm/mv/mv_machdep.c ============================================================================== --- head/sys/arm/mv/mv_machdep.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/arm/mv/mv_machdep.c Tue Jun 23 22:42:39 2009 (r194784) @@ -358,6 +358,7 @@ void * initarm(void *mdp, void *unused __unused) { struct pv_addr kernel_l1pt; + struct pv_addr dpcpu; vm_offset_t freemempos, l2_start, lastaddr; uint32_t memsize, l2size; struct bi_mem_region *mr; @@ -479,6 +480,10 @@ initarm(void *mdp, void *unused __unused */ valloc_pages(systempage, 1); + /* Allocate dynamic per-cpu area. */ + valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); + dpcpu_init((void *)dpcpu.pv_va, 0); + /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE); valloc_pages(abtstack, ABT_STACK_SIZE); Modified: head/sys/arm/sa11x0/assabet_machdep.c ============================================================================== --- head/sys/arm/sa11x0/assabet_machdep.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/arm/sa11x0/assabet_machdep.c Tue Jun 23 22:42:39 2009 (r194784) @@ -209,6 +209,7 @@ initarm(void *arg, void *arg2) struct pv_addr kernel_l1pt; struct pv_addr md_addr; struct pv_addr md_bla; + struct pv_addr dpcpu; int loop; u_int l1pagetable; vm_offset_t freemempos; @@ -268,6 +269,10 @@ initarm(void *arg, void *arg2) */ valloc_pages(systempage, 1); + /* Allocate dynamic per-cpu area. */ + valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); + dpcpu_init((void *)dpcpu.pv_va, 0); + /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE); valloc_pages(abtstack, ABT_STACK_SIZE); Modified: head/sys/arm/xscale/i80321/ep80219_machdep.c ============================================================================== --- head/sys/arm/xscale/i80321/ep80219_machdep.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/arm/xscale/i80321/ep80219_machdep.c Tue Jun 23 22:42:39 2009 (r194784) @@ -186,6 +186,7 @@ void * initarm(void *arg, void *arg2) { struct pv_addr kernel_l1pt; + struct pv_addr dpcpu; int loop, i; u_int l1pagetable; vm_offset_t freemempos; @@ -236,6 +237,10 @@ initarm(void *arg, void *arg2) */ valloc_pages(systempage, 1); + /* Allocate dynamic per-cpu area. */ + valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); + dpcpu_init((void *)dpcpu.pv_va, 0); + /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE); valloc_pages(abtstack, ABT_STACK_SIZE); Modified: head/sys/arm/xscale/i80321/iq31244_machdep.c ============================================================================== --- head/sys/arm/xscale/i80321/iq31244_machdep.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/arm/xscale/i80321/iq31244_machdep.c Tue Jun 23 22:42:39 2009 (r194784) @@ -187,6 +187,7 @@ void * initarm(void *arg, void *arg2) { struct pv_addr kernel_l1pt; + struct pv_addr dpcpu; int loop, i; u_int l1pagetable; vm_offset_t freemempos; @@ -236,6 +237,10 @@ initarm(void *arg, void *arg2) */ valloc_pages(systempage, 1); + /* Allocate dynamic per-cpu area. */ + valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); + dpcpu_init((void *)dpcpu.pv_va, 0); + /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE); valloc_pages(abtstack, ABT_STACK_SIZE); Modified: head/sys/arm/xscale/i8134x/crb_machdep.c ============================================================================== --- head/sys/arm/xscale/i8134x/crb_machdep.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/arm/xscale/i8134x/crb_machdep.c Tue Jun 23 22:42:39 2009 (r194784) @@ -183,6 +183,7 @@ void * initarm(void *arg, void *arg2) { struct pv_addr kernel_l1pt; + struct pv_addr dpcpu; int loop, i; u_int l1pagetable; vm_offset_t freemempos; @@ -232,6 +233,10 @@ initarm(void *arg, void *arg2) */ valloc_pages(systempage, 1); + /* Allocate dynamic per-cpu area. */ + valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); + dpcpu_init((void *)dpcpu.pv_va, 0); + /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE); valloc_pages(abtstack, ABT_STACK_SIZE); Modified: head/sys/arm/xscale/ixp425/avila_machdep.c ============================================================================== --- head/sys/arm/xscale/ixp425/avila_machdep.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/arm/xscale/ixp425/avila_machdep.c Tue Jun 23 22:42:39 2009 (r194784) @@ -232,6 +232,7 @@ initarm(void *arg, void *arg2) #define next_chunk2(a,b) (((a) + (b)) &~ ((b)-1)) #define next_page(a) next_chunk2(a,PAGE_SIZE) struct pv_addr kernel_l1pt; + struct pv_addr dpcpu; int loop, i; u_int l1pagetable; vm_offset_t freemempos; @@ -303,6 +304,10 @@ initarm(void *arg, void *arg2) */ valloc_pages(systempage, 1); + /* Allocate dynamic per-cpu area. */ + valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); + dpcpu_init((void *)dpcpu.pv_va, 0); + /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE); valloc_pages(abtstack, ABT_STACK_SIZE); Modified: head/sys/arm/xscale/pxa/pxa_machdep.c ============================================================================== --- head/sys/arm/xscale/pxa/pxa_machdep.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/arm/xscale/pxa/pxa_machdep.c Tue Jun 23 22:42:39 2009 (r194784) @@ -166,6 +166,7 @@ void * initarm(void *arg, void *arg2) { struct pv_addr kernel_l1pt; + struct pv_addr dpcpu; int loop; u_int l1pagetable; vm_offset_t freemempos; @@ -218,6 +219,10 @@ initarm(void *arg, void *arg2) */ valloc_pages(systempage, 1); + /* Allocate dynamic per-cpu area. */ + valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); + dpcpu_init((void *)dpcpu.pv_va, 0); + /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE); valloc_pages(abtstack, ABT_STACK_SIZE); Modified: head/sys/i386/i386/elf_machdep.c ============================================================================== --- head/sys/i386/i386/elf_machdep.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/i386/i386/elf_machdep.c Tue Jun 23 22:42:39 2009 (r194784) @@ -149,7 +149,7 @@ elf_reloc_internal(linker_file_t lf, Elf if (local) { if (rtype == R_386_RELATIVE) { /* A + B */ - addr = relocbase + addend; + addr = elf_relocaddr(lf, relocbase + addend); if (*where != addr) *where = addr; } Modified: head/sys/i386/i386/machdep.c ============================================================================== --- head/sys/i386/i386/machdep.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/i386/i386/machdep.c Tue Jun 23 22:42:39 2009 (r194784) @@ -2448,7 +2448,7 @@ init386(first) int first; { unsigned long gdtmachpfn; - int error, gsel_tss, metadata_missing, x; + int error, gsel_tss, metadata_missing, x, pa; struct pcpu *pc; struct callback_register event = { .type = CALLBACKTYPE_event, @@ -2532,6 +2532,11 @@ init386(first) GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback); #endif pcpu_init(pc, 0, sizeof(struct pcpu)); + for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE) + pmap_kenter(pa + KERNBASE, pa); + dpcpu_init((void *)(first + KERNBASE), 0); + first += DPCPU_SIZE; + PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); PCPU_SET(curpcb, thread0.td_pcb); @@ -2665,7 +2670,7 @@ init386(first) int first; { struct gate_descriptor *gdp; - int gsel_tss, metadata_missing, x; + int gsel_tss, metadata_missing, x, pa; struct pcpu *pc; thread0.td_kstack = proc0kstack; @@ -2718,6 +2723,10 @@ init386(first) lgdt(&r_gdt); pcpu_init(pc, 0, sizeof(struct pcpu)); + for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE) + pmap_kenter(pa + KERNBASE, pa); + dpcpu_init((void *)(first + KERNBASE), 0); + first += DPCPU_SIZE; PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); PCPU_SET(curpcb, thread0.td_pcb); Modified: head/sys/i386/i386/mp_machdep.c ============================================================================== --- head/sys/i386/i386/mp_machdep.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/i386/i386/mp_machdep.c Tue Jun 23 22:42:39 2009 (r194784) @@ -143,6 +143,7 @@ static int bootAP; /* Free these after use */ void *bootstacks[MAXCPU]; +static void *dpcpu; /* Hotwire a 0->4MB V==P mapping */ extern pt_entry_t *KPTphys; @@ -610,6 +611,7 @@ init_secondary(void) /* prime data page for it to use */ pcpu_init(pc, myid, sizeof(struct pcpu)); + dpcpu_init(dpcpu, myid); pc->pc_apic_id = cpu_apic_ids[myid]; pc->pc_prvspace = pc; pc->pc_curthread = 0; @@ -897,8 +899,9 @@ start_all_aps(void) apic_id = cpu_apic_ids[cpu]; /* allocate and set up a boot stack data page */ - bootstacks[cpu] = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); - + bootstacks[cpu] = + (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); + dpcpu = (void *)kmem_alloc(kernel_map, DPCPU_SIZE); /* setup a vector to our boot code */ *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); Modified: head/sys/i386/xen/mp_machdep.c ============================================================================== --- head/sys/i386/xen/mp_machdep.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/i386/xen/mp_machdep.c Tue Jun 23 22:42:39 2009 (r194784) @@ -744,6 +744,7 @@ start_all_aps(void) /* Get per-cpu data */ pc = &__pcpu[bootAP]; pcpu_init(pc, bootAP, sizeof(struct pcpu)); + dpcpu_init((void *)kmem_alloc(kernel_map, DPCPU_SIZE), bootAP); pc->pc_apic_id = cpu_apic_ids[bootAP]; pc->pc_prvspace = pc; pc->pc_curthread = 0; Modified: head/sys/ia64/ia64/elf_machdep.c ============================================================================== --- head/sys/ia64/ia64/elf_machdep.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/ia64/ia64/elf_machdep.c Tue Jun 23 22:42:39 2009 (r194784) @@ -211,7 +211,7 @@ elf_reloc_internal(linker_file_t lf, Elf if (local) { if (rtype == R_IA_64_REL64LSB) - *where = relocbase + addend; + *where = elf_relocaddr(lf, relocbase + addend); return (0); } Modified: head/sys/ia64/ia64/machdep.c ============================================================================== --- head/sys/ia64/ia64/machdep.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/ia64/ia64/machdep.c Tue Jun 23 22:42:39 2009 (r194784) @@ -647,6 +647,21 @@ ia64_init(void) bootverbose = 1; /* + * Find the beginning and end of the kernel. + */ + kernstart = trunc_page(kernel_text); +#ifdef DDB + ksym_start = bootinfo.bi_symtab; + ksym_end = bootinfo.bi_esymtab; + kernend = (vm_offset_t)round_page(ksym_end); +#else + kernend = (vm_offset_t)round_page(_end); +#endif + /* But if the bootstrap tells us otherwise, believe it! */ + if (bootinfo.bi_kernend) + kernend = round_page(bootinfo.bi_kernend); + + /* * Setup the PCPU data for the bootstrap processor. It is needed * by printf(). Also, since printf() has critical sections, we * need to initialize at least pc_curthread. @@ -654,6 +669,8 @@ ia64_init(void) pcpup = &pcpu0; ia64_set_k4((u_int64_t)pcpup); pcpu_init(pcpup, 0, sizeof(pcpu0)); + dpcpu_init((void *)kernend, 0); + kernend += DPCPU_SIZE; PCPU_SET(curthread, &thread0); /* @@ -682,21 +699,6 @@ ia64_init(void) ia64_sal_init(); calculate_frequencies(); - /* - * Find the beginning and end of the kernel. - */ - kernstart = trunc_page(kernel_text); -#ifdef DDB - ksym_start = bootinfo.bi_symtab; - ksym_end = bootinfo.bi_esymtab; - kernend = (vm_offset_t)round_page(ksym_end); -#else - kernend = (vm_offset_t)round_page(_end); -#endif - - /* But if the bootstrap tells us otherwise, believe it! */ - if (bootinfo.bi_kernend) - kernend = round_page(bootinfo.bi_kernend); if (metadata_missing) printf("WARNING: loader(8) metadata is missing!\n"); Modified: head/sys/ia64/ia64/mp_machdep.c ============================================================================== --- head/sys/ia64/ia64/mp_machdep.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/ia64/ia64/mp_machdep.c Tue Jun 23 22:42:39 2009 (r194784) @@ -207,6 +207,7 @@ cpu_mp_add(u_int acpiid, u_int apicid, u { struct pcpu *pc; u_int64_t lid; + void *dpcpu; /* Ignore any processor numbers outside our range */ if (acpiid > mp_maxid) @@ -224,7 +225,9 @@ cpu_mp_add(u_int acpiid, u_int apicid, u if (acpiid != 0) { pc = (struct pcpu *)malloc(sizeof(*pc), M_SMP, M_WAITOK); + dpcpu = (void *)kmem_alloc(kernel_map, DPCPU_SIZE); pcpu_init(pc, acpiid, sizeof(*pc)); + dpcpu_init(dpcpu, acpiid); } else pc = pcpup; Modified: head/sys/kern/link_elf.c ============================================================================== --- head/sys/kern/link_elf.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/kern/link_elf.c Tue Jun 23 22:42:39 2009 (r194784) @@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$"); #include <sys/malloc.h> #include <sys/mutex.h> #include <sys/mount.h> +#include <sys/pcpu.h> #include <sys/proc.h> #include <sys/namei.h> #include <sys/fcntl.h> @@ -107,6 +108,9 @@ typedef struct elf_file { caddr_t ctfoff; /* CTF offset table */ caddr_t typoff; /* Type offset table */ long typlen; /* Number of type entries. */ + Elf_Addr pcpu_start; /* Pre-relocation pcpu set start. */ + Elf_Addr pcpu_stop; /* Pre-relocation pcpu set stop. */ + Elf_Addr pcpu_base; /* Relocated pcpu set address. */ #ifdef GDB struct link_map gdb; /* hooks for gdb */ #endif @@ -475,6 +479,34 @@ parse_dynamic(elf_file_t ef) } static int +parse_dpcpu(elf_file_t ef) +{ + int count; + int error; + + ef->pcpu_start = 0; + ef->pcpu_stop = 0; + error = link_elf_lookup_set(&ef->lf, "pcpu", (void ***)&ef->pcpu_start, + (void ***)&ef->pcpu_stop, &count); + /* Error just means there is no pcpu set to relocate. */ + if (error) + return (0); + count *= sizeof(void *); + /* + * Allocate space in the primary pcpu area. Copy in our initialization + * from the data section and then initialize all per-cpu storage from + * that. + */ + ef->pcpu_base = (Elf_Addr)(uintptr_t)dpcpu_alloc(count); + if (ef->pcpu_base == (Elf_Addr)NULL) + return (ENOSPC); + memcpy((void *)ef->pcpu_base, (void *)ef->pcpu_start, count); + dpcpu_copy((void *)ef->pcpu_base, count); + + return (0); +} + +static int link_elf_link_preload(linker_class_t cls, const char* filename, linker_file_t *result) { @@ -519,6 +551,8 @@ link_elf_link_preload(linker_class_t cls lf->size = *(size_t *)sizeptr; error = parse_dynamic(ef); + if (error == 0) + error = parse_dpcpu(ef); if (error) { linker_file_unload(lf, LINKER_UNLOAD_FORCE); return error; @@ -801,6 +835,9 @@ link_elf_load_file(linker_class_t cls, c error = parse_dynamic(ef); if (error) goto out; + error = parse_dpcpu(ef); + if (error) + goto out; link_elf_reloc_local(lf); VOP_UNLOCK(nd.ni_vp, 0); @@ -897,11 +934,26 @@ out: return error; } +Elf_Addr +elf_relocaddr(linker_file_t lf, Elf_Addr x) +{ + elf_file_t ef; + + ef = (elf_file_t)lf; + if (x >= ef->pcpu_start && x < ef->pcpu_stop) + return ((x - ef->pcpu_start) + ef->pcpu_base); + return (x); +} + + static void link_elf_unload_file(linker_file_t file) { elf_file_t ef = (elf_file_t) file; + if (ef->pcpu_base) { + dpcpu_free((void *)ef->pcpu_base, ef->pcpu_stop - ef->pcpu_start); + } #ifdef GDB if (ef->gdb.l_ld) { GDB_STATE(RT_DELETE); Modified: head/sys/kern/link_elf_obj.c ============================================================================== --- head/sys/kern/link_elf_obj.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/kern/link_elf_obj.c Tue Jun 23 22:42:39 2009 (r194784) @@ -333,6 +333,20 @@ link_elf_link_preload(linker_class_t cls if (ef->shstrtab && shdr[i].sh_name != 0) ef->progtab[pb].name = ef->shstrtab + shdr[i].sh_name; + if (ef->progtab[pb].name != NULL && + !strcmp(ef->progtab[pb].name, "set_pcpu")) { + void *dpcpu; + + dpcpu = dpcpu_alloc(shdr[i].sh_size); + if (dpcpu == NULL) { + error = ENOSPC; + goto out; + } + memcpy(dpcpu, ef->progtab[pb].addr, + ef->progtab[pb].size); + dpcpu_copy(dpcpu, shdr[i].sh_size); + ef->progtab[pb].addr = dpcpu; + } /* Update all symbol values with the offset. */ for (j = 0; j < ef->ddbsymcnt; j++) { @@ -712,9 +726,27 @@ link_elf_load_file(linker_class_t cls, c alignmask = shdr[i].sh_addralign - 1; mapbase += alignmask; mapbase &= ~alignmask; - ef->progtab[pb].addr = (void *)(uintptr_t)mapbase; - if (shdr[i].sh_type == SHT_PROGBITS) { + if (ef->shstrtab && shdr[i].sh_name != 0) + ef->progtab[pb].name = + ef->shstrtab + shdr[i].sh_name; + else if (shdr[i].sh_type == SHT_PROGBITS) ef->progtab[pb].name = "<<PROGBITS>>"; + else + ef->progtab[pb].name = "<<NOBITS>>"; + if (ef->progtab[pb].name != NULL && + !strcmp(ef->progtab[pb].name, "set_pcpu")) + ef->progtab[pb].addr = + dpcpu_alloc(shdr[i].sh_size); + else + ef->progtab[pb].addr = + (void *)(uintptr_t)mapbase; + if (ef->progtab[pb].addr == NULL) { + error = ENOSPC; + goto out; + } + ef->progtab[pb].size = shdr[i].sh_size; + ef->progtab[pb].sec = i; + if (shdr[i].sh_type == SHT_PROGBITS) { error = vn_rdwr(UIO_READ, nd.ni_vp, ef->progtab[pb].addr, shdr[i].sh_size, shdr[i].sh_offset, @@ -726,15 +758,12 @@ link_elf_load_file(linker_class_t cls, c error = EINVAL; goto out; } - } else { - ef->progtab[pb].name = "<<NOBITS>>"; + /* Initialize the per-cpu area. */ + if (ef->progtab[pb].addr != (void *)mapbase) + dpcpu_copy(ef->progtab[pb].addr, + shdr[i].sh_size); + } else bzero(ef->progtab[pb].addr, shdr[i].sh_size); - } - ef->progtab[pb].size = shdr[i].sh_size; - ef->progtab[pb].sec = i; - if (ef->shstrtab && shdr[i].sh_name != 0) - ef->progtab[pb].name = - ef->shstrtab + shdr[i].sh_name; /* Update all symbol values with the offset. */ for (j = 0; j < ef->ddbsymcnt; j++) { @@ -839,6 +868,17 @@ link_elf_unload_file(linker_file_t file) /* Notify MD code that a module is being unloaded. */ elf_cpu_unload_file(file); + if (ef->progtab) { + for (i = 0; i < ef->nprogtab; i++) { + if (ef->progtab[i].size == 0) + continue; + if (ef->progtab[i].name == NULL) + continue; + if (!strcmp(ef->progtab[i].name, "set_pcpu")) + dpcpu_free(ef->progtab[i].addr, + ef->progtab[i].size); + } + } if (ef->preloaded) { if (ef->reltab) free(ef->reltab, M_LINKER); Modified: head/sys/kern/subr_pcpu.c ============================================================================== --- head/sys/kern/subr_pcpu.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/kern/subr_pcpu.c Tue Jun 23 22:42:39 2009 (r194784) @@ -3,6 +3,9 @@ * All rights reserved. * Written by: John Baldwin <j...@freebsd.org> * + * Copyright (c) 2009 Jeffrey Roberson <j...@freebsd.org> + * All rights reserved. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -49,13 +52,28 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> +#include <sys/sysctl.h> #include <sys/linker_set.h> #include <sys/lock.h> +#include <sys/malloc.h> #include <sys/pcpu.h> #include <sys/proc.h> #include <sys/smp.h> +#include <sys/sx.h> #include <ddb/ddb.h> +MALLOC_DEFINE(M_PCPU, "Per-cpu", "Per-cpu resource accouting."); + +struct dpcpu_free { + uintptr_t df_start; + int df_len; + TAILQ_ENTRY(dpcpu_free) df_link; +}; + +static DPCPU_DEFINE(char, modspace[DPCPU_MODMIN]); +static TAILQ_HEAD(, dpcpu_free) dpcpu_head = TAILQ_HEAD_INITIALIZER(dpcpu_head); +static struct sx dpcpu_lock; +uintptr_t dpcpu_off[MAXCPU]; struct pcpu *cpuid_to_pcpu[MAXCPU]; struct cpuhead cpuhead = SLIST_HEAD_INITIALIZER(cpuhead); @@ -79,7 +97,146 @@ pcpu_init(struct pcpu *pcpu, int cpuid, #ifdef KTR snprintf(pcpu->pc_name, sizeof(pcpu->pc_name), "CPU %d", cpuid); #endif +} + +void +dpcpu_init(void *dpcpu, int cpuid) +{ + struct pcpu *pcpu; + + pcpu = pcpu_find(cpuid); + pcpu->pc_dynamic = (uintptr_t)dpcpu - DPCPU_START; + + /* + * Initialize defaults from our linker section. + */ + memcpy(dpcpu, (void *)DPCPU_START, DPCPU_BYTES); + + /* + * Place it in the global pcpu offset array. + */ + dpcpu_off[cpuid] = pcpu->pc_dynamic; +} + +static void +dpcpu_startup(void *dummy __unused) +{ + struct dpcpu_free *df; + + df = malloc(sizeof(*df), M_PCPU, M_WAITOK | M_ZERO); + df->df_start = (uintptr_t)&DPCPU_NAME(modspace); + df->df_len = DPCPU_MODSIZE; + TAILQ_INSERT_HEAD(&dpcpu_head, df, df_link); + sx_init(&dpcpu_lock, "dpcpu alloc lock"); +} +SYSINIT(dpcpu, SI_SUB_KLD, SI_ORDER_FIRST, dpcpu_startup, 0); + +/* + * First-fit extent based allocator for allocating space in the per-cpu + * region reserved for modules. This is only intended for use by the + * kernel linkers to place module linker sets. + */ +void * +dpcpu_alloc(int size) +{ + struct dpcpu_free *df; + void *s; + + s = NULL; + size = roundup2(size, sizeof(void *)); + sx_xlock(&dpcpu_lock); + TAILQ_FOREACH(df, &dpcpu_head, df_link) { + if (df->df_len < size) + continue; + if (df->df_len == size) { + s = (void *)df->df_start; + TAILQ_REMOVE(&dpcpu_head, df, df_link); + free(df, M_PCPU); + break; + } + s = (void *)df->df_start; + df->df_len -= size; + df->df_start = df->df_start + size; + break; + } + sx_xunlock(&dpcpu_lock); + + return (s); +} + +/* + * Free dynamic per-cpu space at module unload time. + */ +void +dpcpu_free(void *s, int size) +{ + struct dpcpu_free *df; + struct dpcpu_free *dn; + uintptr_t start; + uintptr_t end; + + size = roundup2(size, sizeof(void *)); + start = (uintptr_t)s; + end = start + size; + /* + * Free a region of space and merge it with as many neighbors as + * possible. Keeping the list sorted simplifies this operation. + */ + sx_xlock(&dpcpu_lock); + TAILQ_FOREACH(df, &dpcpu_head, df_link) { + if (df->df_start > end) + break; + /* + * If we expand at the end of an entry we may have to + * merge it with the one following it as well. + */ + if (df->df_start + df->df_len == start) { + df->df_len += size; + dn = TAILQ_NEXT(df, df_link); + if (df->df_start + df->df_len == dn->df_start) { + df->df_len += dn->df_len; + TAILQ_REMOVE(&dpcpu_head, dn, df_link); + free(dn, M_PCPU); + } + sx_xunlock(&dpcpu_lock); + return; + } + if (df->df_start == end) { + df->df_start = start; + df->df_len += size; + sx_xunlock(&dpcpu_lock); + return; + } + } + dn = malloc(sizeof(*df), M_PCPU, M_WAITOK | M_ZERO); + dn->df_start = start; + dn->df_len = size; + if (df) + TAILQ_INSERT_BEFORE(df, dn, df_link); + else + TAILQ_INSERT_TAIL(&dpcpu_head, dn, df_link); + sx_xunlock(&dpcpu_lock); +} +/* + * Initialize the per-cpu storage from an updated linker-set region. + */ +void +dpcpu_copy(void *s, int size) +{ +#ifdef SMP + uintptr_t dpcpu; + int i; + + for (i = 0; i < mp_ncpus; ++i) { + dpcpu = dpcpu_off[i]; + if (dpcpu == 0) + continue; + memcpy((void *)(dpcpu + (uintptr_t)s), s, size); + } +#else + memcpy((void *)(dpcpu_off[0] + (uintptr_t)s), s, size); +#endif } /* @@ -91,6 +248,7 @@ pcpu_destroy(struct pcpu *pcpu) SLIST_REMOVE(&cpuhead, pcpu, pcpu, pc_allcpu); cpuid_to_pcpu[pcpu->pc_cpuid] = NULL; + dpcpu_off[pcpu->pc_cpuid] = 0; } /* @@ -103,6 +261,48 @@ pcpu_find(u_int cpuid) return (cpuid_to_pcpu[cpuid]); } +int +sysctl_dpcpu_quad(SYSCTL_HANDLER_ARGS) +{ + int64_t count; +#ifdef SMP + uintptr_t dpcpu; + int i; + + count = 0; + for (i = 0; i < mp_ncpus; ++i) { + dpcpu = dpcpu_off[i]; + if (dpcpu == 0) + continue; + count += *(int64_t *)(dpcpu + (uintptr_t)arg1); + } +#else + count = *(int64_t *)(dpcpu_off[0] + (uintptr_t)arg1); +#endif + return (SYSCTL_OUT(req, &count, sizeof(count))); +} + +int +sysctl_dpcpu_int(SYSCTL_HANDLER_ARGS) +{ + int count; +#ifdef SMP + uintptr_t dpcpu; + int i; + + count = 0; + for (i = 0; i < mp_ncpus; ++i) { + dpcpu = dpcpu_off[i]; + if (dpcpu == 0) + continue; + count += *(int *)(dpcpu + (uintptr_t)arg1); + } +#else + count = *(int *)(dpcpu_off[0] + (uintptr_t)arg1); +#endif + return (SYSCTL_OUT(req, &count, sizeof(count))); +} + #ifdef DDB static void @@ -111,6 +311,7 @@ show_pcpu(struct pcpu *pc) struct thread *td; db_printf("cpuid = %d\n", pc->pc_cpuid); + db_printf("dynamic pcpu = %p\n", (void *)pc->pc_dynamic); db_printf("curthread = "); td = pc->pc_curthread; if (td != NULL) Modified: head/sys/mips/mips/elf_machdep.c ============================================================================== --- head/sys/mips/mips/elf_machdep.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/mips/mips/elf_machdep.c Tue Jun 23 22:42:39 2009 (r194784) @@ -134,7 +134,7 @@ elf_reloc_internal(linker_file_t lf, Elf if (local) { #if 0 /* TBD */ if (rtype == R_386_RELATIVE) { /* A + B */ - addr = relocbase + addend; + addr = elf_relocaddr(lf, relocbase + addend); if (*where != addr) *where = addr; } Modified: head/sys/mips/mips/mp_machdep.c ============================================================================== --- head/sys/mips/mips/mp_machdep.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/mips/mips/mp_machdep.c Tue Jun 23 22:42:39 2009 (r194784) @@ -224,12 +224,15 @@ static int smp_start_secondary(int cpuid) { struct pcpu *pcpu; + void *dpcpu; int i; if (bootverbose) printf("smp_start_secondary: starting cpu %d\n", cpuid); + dpcpu = (void *)kmem_alloc(kernel_map, DPCPU_SIZE); pcpu_init(&__pcpu[cpuid], cpuid, sizeof(struct pcpu)); + dpcpu_init(dpcpu, cpuid); if (bootverbose) printf("smp_start_secondary: cpu %d started\n", cpuid); Modified: head/sys/mips/mips/pmap.c ============================================================================== --- head/sys/mips/mips/pmap.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/mips/mips/pmap.c Tue Jun 23 22:42:39 2009 (r194784) @@ -331,6 +331,9 @@ again: msgbufp = (struct msgbuf *)pmap_steal_memory(MSGBUF_SIZE); msgbufinit(msgbufp, MSGBUF_SIZE); + /* Steal memory for the dynamic per-cpu area. */ + dpcpu_init((void *)pmap_steal_memory(DPCPU_SIZE), 0); + /* * Steal thread0 kstack. */ Modified: head/sys/pc98/pc98/machdep.c ============================================================================== --- head/sys/pc98/pc98/machdep.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/pc98/pc98/machdep.c Tue Jun 23 22:42:39 2009 (r194784) @@ -1954,6 +1954,7 @@ init386(first) struct gate_descriptor *gdp; int gsel_tss, metadata_missing, x; struct pcpu *pc; + int pa; thread0.td_kstack = proc0kstack; thread0.td_pcb = (struct pcb *) @@ -2010,6 +2011,11 @@ init386(first) lgdt(&r_gdt); pcpu_init(pc, 0, sizeof(struct pcpu)); + for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE) + pmap_kenter(pa + KERNBASE, pa); + dpcpu_init((void *)(first + KERNBASE), 0); + first += DPCPU_SIZE; + PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); PCPU_SET(curpcb, thread0.td_pcb); Modified: head/sys/powerpc/aim/mmu_oea.c ============================================================================== --- head/sys/powerpc/aim/mmu_oea.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/powerpc/aim/mmu_oea.c Tue Jun 23 22:42:39 2009 (r194784) @@ -669,6 +669,7 @@ moea_bootstrap(mmu_t mmup, vm_offset_t k int ofw_mappings; vm_size_t size, physsz, hwphyssz; vm_offset_t pa, va, off; + void *dpcpu; /* * Set up BAT0 to map the lowest 256 MB area @@ -938,6 +939,20 @@ moea_bootstrap(mmu_t mmup, vm_offset_t k pa += PAGE_SIZE; va += PAGE_SIZE; } + + /* + * Allocate virtual address space for the dynamic percpu area. + */ + pa = moea_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE); + dpcpu = (void *)virtual_avail; + va = virtual_avail; + virtual_avail += DPCPU_SIZE; + while (va < virtual_avail) { + moea_kenter(mmup, va, pa);; + pa += PAGE_SIZE; + va += PAGE_SIZE; + } + dpcpu_init(dpcpu, 0); } /* Modified: head/sys/powerpc/aim/mmu_oea64.c ============================================================================== --- head/sys/powerpc/aim/mmu_oea64.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/powerpc/aim/mmu_oea64.c Tue Jun 23 22:42:39 2009 (r194784) @@ -726,6 +726,7 @@ moea64_bridge_bootstrap(mmu_t mmup, vm_o vm_size_t size, physsz, hwphyssz; vm_offset_t pa, va, off; uint32_t msr; + void *dpcpu; /* We don't have a direct map since there is no BAT */ hw_direct_map = 0; @@ -1027,6 +1028,20 @@ moea64_bridge_bootstrap(mmu_t mmup, vm_o pa += PAGE_SIZE; va += PAGE_SIZE; } + + /* + * Allocate virtual address space for the dynamic percpu area. + */ + pa = moea64_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE); + dpcpu = (void *)virtual_avail; + va = virtual_avail; + virtual_avail += DPCPU_SIZE; + while (va < virtual_avail) { + moea64_kenter(mmup, va, pa);; + pa += PAGE_SIZE; + va += PAGE_SIZE; + } + dpcpu_init(dpcpu, 0); } /* Modified: head/sys/powerpc/booke/pmap.c ============================================================================== --- head/sys/powerpc/booke/pmap.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/powerpc/booke/pmap.c Tue Jun 23 22:42:39 2009 (r194784) @@ -963,6 +963,7 @@ mmu_booke_bootstrap(mmu_t mmu, vm_offset vm_size_t physsz, hwphyssz, kstack0_sz; vm_offset_t kernel_pdir, kstack0, va; vm_paddr_t kstack0_phys; + void *dpcpu; pte_t *pte; debugf("mmu_booke_bootstrap: entered\n"); @@ -988,6 +989,11 @@ mmu_booke_bootstrap(mmu_t mmu, vm_offset data_end = round_page(data_end); + /* Allocate the dynamic per-cpu area. */ + dpcpu = (void *)data_end; + data_end += DPCPU_SIZE; + dpcpu_init(dpcpu, 0); + /* Allocate space for ptbl_bufs. */ ptbl_bufs = (struct ptbl_buf *)data_end; data_end += sizeof(struct ptbl_buf) * PTBL_BUFS; Modified: head/sys/powerpc/powerpc/elf_machdep.c ============================================================================== --- head/sys/powerpc/powerpc/elf_machdep.c Tue Jun 23 22:28:44 2009 (r194783) +++ head/sys/powerpc/powerpc/elf_machdep.c Tue Jun 23 22:42:39 2009 (r194784) @@ -194,7 +194,7 @@ elf_reloc_internal(linker_file_t lf, Elf break; case R_PPC_RELATIVE: /* word32 B + A */ - *where = relocbase + addend; *** DIFF OUTPUT TRUNCATED AT 1000 LINES *** _______________________________________________ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"