On Wed, Apr 20, 2016 at 04:22:32AM +0300, Artturi Alm wrote: > Hi, > > would anyone care to look at diffs aiming to make diffs like below possible > for armv7 too? doesn't take 'much' fixing beyond removing cruft, and can > be done mostly w/o touching the files/headers shared with armish/zaurus, if > wanted. > > -Artturi >
Ok, so there was zero interest. I did got to fix the regression i was using this branch to help me fixing with, so it got purged. I'll leave some hints and tips incase anyone able eventually decides to start cleaning up the bootstrap/pmap/MD code for armv7: - special casing vectorpage everywhere is nothing but legacy cruft making things way more complex than needed. armv7 has VBAR accessible w/the now-in-tree sysreg.h CP15_VBAR()-macro.. besides setting the vector address and removing the obvious cruft(hundreds of lines i guess), iirc. this is enough in .S containing vectors(ie. moving armv7 to using vectorsv7.S or something would make it easier to get rid of the completely unnecessary levels of indirection in current code for vecs also..): vectors.S: .text + .p2align 5 /* VBAR(=vector base address) has to be 32bit aligned */ - as added bonus removing the special casing for vector page does allow removing all the references to arm mmu domains after setting DACR to domain_client for domain 0(as i see you've been adopting fbsd stuff on this front; they've done all that too.). unrelated 'tip' for simplifying the code would be something i wonder if i even dare to suggest.. oh well, using ld.script for l1pagedir and l2pagetables, will make related bootstrap code _alot_ easier to understand(by removing most of it:P). does get rid of all the code to align and malign addresses and so on. something like: in locore.S or _start.S or what have you: .section .pts.l1 .global l1pd l1pd: .space L1TBL_SIZE .section .pts.l2 .global l2pts l2pts: .space L2TBL_SIZE * _NPTS ld.script(don't need head&tail): /* $OpenBSD: ld.script,v 1.2 2013/11/03 09:42:55 miod Exp $ */ /* * Copyright (c) 2012 Miodrag Vallat. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ OUTPUT_FORMAT("elf32-littlearm") OUTPUT_ARCH(arm) ENTRY(arm88k_boot) SECTIONS { .text (0xc0000000) : { /* .text (0xc0200000) : { */ PROVIDE(__text = ABSOLUTE(.)); *(.text) PROVIDE(etext = ABSOLUTE(.)); } PROVIDE(_text_sizeof = ABSOLUTE(.) - __text); . = ALIGN(0x80000); .rodata : { *(.rodata*) PROVIDE(erodata = ABSOLUTE(.)); } . = ALIGN(0x80000); .data : { *(.data) PROVIDE(edata = ABSOLUTE(.)); } PROVIDE(_edata = ABSOLUTE(.)); .bss : { PROVIDE(__bss = ABSOLUTE(.)); *(.bss) } /* Page dir + L2 page tables */ .pts ALIGN(16K) : { PROVIDE(__l1pd = ABSOLUTE(.)); *(.pts.l1) /* .l2 aligned by being after .l1 */ PROVIDE(__l2pts = ABSOLUTE(.)); *(.pts.l2) } PROVIDE(_pts_sizeof = ABSOLUTE(.) - __l1pd); PROVIDE(_bss_sizeof = ABSOLUTE(.) - __bss); PROVIDE(_kernel_sizeof = ABSOLUTE(.) - __text); PROVIDE(end = ABSOLUTE(.)); PROVIDE(_end = ABSOLUTE(.)); /DISCARD/ : { *(.comment) } } bored? yes. -Artturi > > sys/arch/arm/arm/cpuswitch7.S | 224 > +++++++----------------------------------- > 1 file changed, 38 insertions(+), 186 deletions(-) > > diff --git a/sys/arch/arm/arm/cpuswitch7.S b/sys/arch/arm/arm/cpuswitch7.S > index 43bf7da..a5cc631 100644 > --- a/sys/arch/arm/arm/cpuswitch7.S > +++ b/sys/arch/arm/arm/cpuswitch7.S > @@ -83,6 +83,7 @@ > #include <machine/frame.h> > #include <machine/intr.h> > #include <machine/asm.h> > +#include <machine/sysreg.h> > > /* LINTSTUB: include <sys/param.h> */ > > @@ -157,27 +158,25 @@ ENTRY(cpu_idle_leave) > */ > > ENTRY(cpu_switchto) > - stmfd sp!, {r4-r7, lr} > + push {r4-r7, lr} > + sub sp, sp, #4 > > - /* Get curcpu from TPIDRPRW. */ > - mrc p15, 0, r3, c13, c0, 4 > + /* Process is now on a processor. */ > + mov r2, #SONPROC /* p->p_stat = SONPROC */ > + strb r2, [r1, #P_STAT] > + > + mrc CP15_TPIDRPRW(r3) /* r3 = curcpu */ > #ifdef MULTIPROCESSOR > - str r3, [r1, #(P_CPU)] > + str r3, [r1, #P_CPU] > #else > /* p->p_cpu initialized in fork1() for single-processor */ > #endif > - > - /* Process is now on a processor. */ > - mov r2, #SONPROC /* p->p_stat = SONPROC */ > - strb r2, [r1, #(P_STAT)] > - > - /* We have a new curproc now so make a note it */ > - str r1, [r3, #(CI_CURPROC)] > + str r1, [r3, #CI_CURPROC] /* set new curproc */ > + ldr r6, [r3, #CI_CURPCB] /* r6 = old PCB */ > > /* Hook in a new pcb */ > - ldr r6, [r3, #(CI_CURPCB)] /* Remember the old PCB */ > - ldr r2, [r1, #(P_ADDR)] > - str r2, [r3, #(CI_CURPCB)] > + ldr r2, [r1, #P_ADDR] /* r2 = new PCB */ > + str r2, [r3, #CI_CURPCB] > > /* > * If the old proc on entry to cpu_switch was zero then the > @@ -186,192 +185,45 @@ ENTRY(cpu_switchto) > * to clear the cache and TLB). > */ > teq r0, #0x00000000 > - beq .Lswitch_exited > - > - /* Stage two: Save old context */ > - > - /* Save all the registers in the old proc's pcb */ > - add r7, r6, #(PCB_R8) > - stmia r7, {r8-r13} > - > -.Lswitch_exited: > - /* > - * NOTE: We can now use r8-r13 until it is time to restore > - * them for the new process. > - */ > - > - /* Remember the old PCB. */ > - mov r8, r6 > - > - /* Save new proc in r6 now. */ > - mov r6, r1 > - > - /* Get the user structure for the new process in r9 */ > - ldr r9, [r6, #(P_ADDR)] > - > - /* > - * This can be optimised... We know we want to go from SVC32 > - * mode to UND32 mode > - */ > - mrs r3, cpsr > - bic r2, r3, #(PSR_MODE) > - orr r2, r2, #(PSR_UND32_MODE | PSR_I) > - msr cpsr_c, r2 > - > -#ifdef notworthit > - teq r0, #0x00000000 > - strne sp, [r8, #(PCB_UND_SP)] > -#else > - str sp, [r8, #(PCB_UND_SP)] > -#endif > - > - msr cpsr_c, r3 /* Restore the old mode */ > - > - /* rem: r0 = old proc */ > - /* rem: r1 = r6 = new process */ > - /* rem: r8 = old PCB */ > - /* rem: r9 = new PCB */ > - > - /* What else needs to be saved Only FPA stuff when that is supported */ > - > - /* Third phase: restore saved context */ > + beq .switch_exited__skip_save > > + add r7, r6, #PCB_R8 /* save registers in */ > + stmia r7, {r8-r13} /* the old proc's pcb */ > +.switch_exited__skip_save: > /* > - * Get the new L1 table pointer into r11. If we're switching to > - * an LWP with the same address space as the outgoing one, we can > - * skip the cache purge and the TTB load. > - * > - * To avoid data dep stalls that would happen anyway, we try > - * and get some useful work done in the mean time. > + * If we're switching to the same address space as the outgoing > + * one, we can skip the cache purge and the TTB load. > */ > - ldr r10, [r8, #(PCB_PAGEDIR)] /* r10 = old L1 */ > - ldr r11, [r9, #(PCB_PAGEDIR)] /* r11 = new L1 */ > - > - ldr r0, [r8, #(PCB_DACR)] /* r0 = old DACR */ > - ldr r1, [r9, #(PCB_DACR)] /* r1 = new DACR */ > + ldr r10, [r6, #PCB_PAGEDIR] /* r10 = old L1 */ > + ldr r9, [r1, #P_ADDR] /* r9 = new PCB */ > + ldr r11, [r9, #PCB_PAGEDIR] /* r11 = new L1 */ > > teq r10, r11 /* Same L1? */ > - cmpeq r0, r1 /* Same DACR? */ > - beq .Lcs_context_switched /* yes! */ > - > - mov r2, #DOMAIN_CLIENT > - cmp r1, r2, lsl #(PMAP_DOMAIN_KERNEL * 2) /* Sw to kernel thread? */ > - beq .Lcs_cache_purge_skipped /* Yup. Don't flush cache */ > - > - stmfd sp!, {r0-r3} > - ldr r1, .Lcpufuncs > - mov lr, pc > - ldr pc, [r1, #CF_ICACHE_SYNC_ALL] > - ldmfd sp!, {r0-r3} > - > -.Lcs_cache_purge_skipped: > - /* rem: r1 = new DACR */ > - /* rem: r6 = new proc */ > - /* rem: r9 = new PCB */ > - /* rem: r10 = old L1 */ > - /* rem: r11 = new L1 */ > - > - ldr r7, [r9, #(PCB_PL1VEC)] > + beq .context_switched__skip_flush /* yes! */ > > /* > - * At this point we need to kill IRQ's again. > - * > - * XXXSCW: Don't need to block FIQs if vectors have been relocated > + * Do a full context switch = full TLB flush. > */ > - IRQdisableALL > - > - /* > - * Ensure the vector table is accessible by fixing up the L1 > - */ > - cmp r7, #0 /* No need to fixup vector table? */ > - ldrne r2, [r7] /* But if yes, fetch current value */ > - ldrne r0, [r9, #(PCB_L1VEC)] /* Fetch new vector_page value */ > - mcr p15, 0, r1, c3, c0, 0 /* Update DACR for new context */ > - cmpne r2, r0 /* Stuffing the same value? */ > -#ifndef PMAP_INCLUDE_PTE_SYNC > - strne r0, [r7] /* Nope, update it */ > -#else > - beq .Lcs_same_vector > - str r0, [r7] /* Otherwise, update it */ > - > - /* > - * Need to sync the cache to make sure that last store is > - * visible to the MMU. > - */ > - ldr r2, .Lcpufuncs > - mov r0, r7 > - mov r1, #4 > - mov lr, pc > - ldr pc, [r2, #CF_DCACHE_WB_RANGE] > - > -.Lcs_same_vector: > -#endif /* PMAP_INCLUDE_PTE_SYNC */ > - > - cmp r10, r11 /* Switching to the same L1? */ > - ldr r10, .Lcpufuncs > - beq .Lcs_same_l1 /* Yup. */ > - > - /* > - * Do a full context switch, including full TLB flush. > - */ > - mov r0, r11 > - mov lr, pc > - ldr pc, [r10, #CF_CONTEXT_SWITCH] > - > - b .Lcs_context_switched > - > - /* > - * We're switching to a different process in the same L1. > - * In this situation, we only need to flush the TLB for the > - * vector_page mapping, and even then only if r7 is non-NULL. > - */ > -.Lcs_same_l1: > - cmp r7, #0 > - movne r0, #0 /* We *know* vector_page's VA is 0x0 */ > - movne lr, pc > - ldrne pc, [r10, #CF_TLB_FLUSHID_SE] > - > -.Lcs_context_switched: > - > - /* XXXSCW: Safe to re-enable FIQs here */ > - > - /* rem: r6 = new proc */ > - /* rem: r9 = new PCB */ > - > - /* > - * This can be optimised... We know we want to go from SVC32 > - * mode to UND32 mode > - */ > - mrs r3, cpsr > - bic r2, r3, #(PSR_MODE) > - orr r2, r2, #(PSR_UND32_MODE) > - msr cpsr_c, r2 > - > - ldr sp, [r9, #(PCB_UND_SP)] > - > - msr cpsr_c, r3 /* Restore the old mode */ > - > - /* Restore all the save registers */ > - add r7, r9, #PCB_R8 > - ldmia r7, {r8-r13} > - > + mcr CP15_ICIALLU > + mcr CP15_BPIALL > + dsb sy > + isb sy > + mcr CP15_TTBR0(r11) > + mcr CP15_TLBIALL > + dsb sy > + isb sy > + > +.context_switched__skip_flush: > + add r7, r9, #PCB_R8 /* restore registers saved */ > + ldmia r7, {r8-r13} /* in the new proc's pcb */ > sub r7, r7, #PCB_R8 /* restore PCB pointer */ > > - /* rem: r6 = new proc */ > - /* rem: r7 = new pcb */ > - > - /* We can enable interrupts again */ > - IRQenableALL > - > - /* rem: r6 = new proc */ > - /* rem: r7 = new PCB */ > - > -.Lswitch_return: > /* > * Pull the registers that got pushed when either savectx() or > * cpu_switch() was called and return. > */ > - ldmfd sp!, {r4-r7, pc} > + add sp, sp, #4 > + pop {r4-r7, pc} > > /* LINTSTUB: Func: void savectx(struct pcb *pcb) */ > ENTRY(savectx)