On Wed, Apr 20, 2016 at 04:22:32AM +0300, Artturi Alm wrote:
> Hi,
> 
> would anyone care to look at diffs aiming to make diffs like below possible
> for armv7 too? doesn't take 'much' fixing beyond removing cruft, and can
> be done mostly w/o touching the files/headers shared with armish/zaurus, if
> wanted.
> 
> -Artturi
> 

Ok, so there was zero interest. I did got to fix the regression i was using
this branch to help me fixing with, so it got purged.
I'll leave some hints and tips incase anyone able eventually decides to start
cleaning up the bootstrap/pmap/MD code for armv7:
- special casing vectorpage everywhere is nothing but legacy cruft making things
  way more complex than needed. armv7 has VBAR accessible w/the now-in-tree
  sysreg.h CP15_VBAR()-macro.. besides setting the vector address and removing
  the obvious cruft(hundreds of lines i guess), iirc. this is enough in .S
  containing vectors(ie. moving armv7 to using vectorsv7.S or something
  would make it easier to get rid of the completely unnecessary levels
  of indirection in current code for vecs also..):
vectors.S:
        .text
+       .p2align 5 /* VBAR(=vector base address) has to be 32bit aligned */

- as added bonus removing the special casing for vector page does allow removing
  all the references to arm mmu domains after setting DACR to domain_client for
  domain 0(as i see you've been adopting fbsd stuff on this front; they've done
  all that too.).

unrelated 'tip' for simplifying the code would be something i wonder if i even
dare to suggest.. oh well, using ld.script for l1pagedir and l2pagetables, will
make related bootstrap code _alot_ easier to understand(by removing most of
it:P). does get rid of all the code to align and malign addresses and so on.
something like:
in locore.S or _start.S or what have you:

        .section .pts.l1
        .global l1pd
l1pd:
        .space  L1TBL_SIZE

        .section .pts.l2
        .global l2pts
l2pts:
        .space  L2TBL_SIZE * _NPTS


ld.script(don't need head&tail):
/*      $OpenBSD: ld.script,v 1.2 2013/11/03 09:42:55 miod Exp $        */

/*
 * Copyright (c) 2012 Miodrag Vallat.
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

OUTPUT_FORMAT("elf32-littlearm")
OUTPUT_ARCH(arm)
ENTRY(arm88k_boot)
SECTIONS
{
        .text (0xc0000000) : {
/*      .text (0xc0200000) : {  */
                PROVIDE(__text = ABSOLUTE(.));
                *(.text)
                PROVIDE(etext = ABSOLUTE(.));
        }
        PROVIDE(_text_sizeof = ABSOLUTE(.) - __text);
        . = ALIGN(0x80000);
        .rodata : {
                *(.rodata*)
                PROVIDE(erodata = ABSOLUTE(.));
        }
        . = ALIGN(0x80000);
        .data : {
                *(.data)
                PROVIDE(edata = ABSOLUTE(.));
        }
        PROVIDE(_edata = ABSOLUTE(.));
        .bss : {
                PROVIDE(__bss = ABSOLUTE(.));
                *(.bss)
        }
        /* Page dir + L2 page tables */
        .pts ALIGN(16K) : {
                PROVIDE(__l1pd = ABSOLUTE(.));
                *(.pts.l1)
                /* .l2 aligned by being after .l1 */
                PROVIDE(__l2pts = ABSOLUTE(.));
                *(.pts.l2)
        }
        PROVIDE(_pts_sizeof = ABSOLUTE(.) - __l1pd);
        PROVIDE(_bss_sizeof = ABSOLUTE(.) - __bss);
        PROVIDE(_kernel_sizeof = ABSOLUTE(.) - __text);

        PROVIDE(end = ABSOLUTE(.));
        PROVIDE(_end = ABSOLUTE(.));
        /DISCARD/ : {
                *(.comment)
        }
}

bored? yes.
-Artturi

> 
>  sys/arch/arm/arm/cpuswitch7.S | 224 
> +++++++-----------------------------------
>  1 file changed, 38 insertions(+), 186 deletions(-)
> 
> diff --git a/sys/arch/arm/arm/cpuswitch7.S b/sys/arch/arm/arm/cpuswitch7.S
> index 43bf7da..a5cc631 100644
> --- a/sys/arch/arm/arm/cpuswitch7.S
> +++ b/sys/arch/arm/arm/cpuswitch7.S
> @@ -83,6 +83,7 @@
>  #include <machine/frame.h>
>  #include <machine/intr.h>
>  #include <machine/asm.h>
> +#include <machine/sysreg.h>
>  
>  /* LINTSTUB: include <sys/param.h> */
>       
> @@ -157,27 +158,25 @@ ENTRY(cpu_idle_leave)
>   */
>  
>  ENTRY(cpu_switchto)
> -     stmfd   sp!, {r4-r7, lr}
> +     push    {r4-r7, lr}
> +     sub     sp, sp, #4
>  
> -     /* Get curcpu from TPIDRPRW. */
> -     mrc     p15, 0, r3, c13, c0, 4
> +     /* Process is now on a processor. */
> +     mov     r2, #SONPROC                    /* p->p_stat = SONPROC */
> +     strb    r2, [r1, #P_STAT]
> +
> +     mrc     CP15_TPIDRPRW(r3)               /* r3 = curcpu */
>  #ifdef MULTIPROCESSOR
> -     str     r3, [r1, #(P_CPU)]
> +     str     r3, [r1, #P_CPU]
>  #else
>       /* p->p_cpu initialized in fork1() for single-processor */
>  #endif
> -
> -     /* Process is now on a processor. */
> -     mov     r2, #SONPROC                    /* p->p_stat = SONPROC */
> -     strb    r2, [r1, #(P_STAT)]
> -
> -     /* We have a new curproc now so make a note it */
> -     str     r1, [r3, #(CI_CURPROC)]
> +     str     r1, [r3, #CI_CURPROC]           /* set new curproc */
> +     ldr     r6, [r3, #CI_CURPCB]            /* r6 = old PCB */
>  
>       /* Hook in a new pcb */
> -     ldr     r6, [r3, #(CI_CURPCB)]          /* Remember the old PCB */
> -     ldr     r2, [r1, #(P_ADDR)]
> -     str     r2, [r3, #(CI_CURPCB)]
> +     ldr     r2, [r1, #P_ADDR]               /* r2 = new PCB */
> +     str     r2, [r3, #CI_CURPCB]
>  
>       /*
>        * If the old proc on entry to cpu_switch was zero then the
> @@ -186,192 +185,45 @@ ENTRY(cpu_switchto)
>        * to clear the cache and TLB).
>        */
>       teq     r0, #0x00000000
> -     beq     .Lswitch_exited
> -
> -     /* Stage two: Save old context */
> -
> -     /* Save all the registers in the old proc's pcb */
> -     add     r7, r6, #(PCB_R8)
> -     stmia   r7, {r8-r13}
> -
> -.Lswitch_exited:
> -     /*
> -      * NOTE: We can now use r8-r13 until it is time to restore
> -      * them for the new process.
> -      */
> -
> -     /* Remember the old PCB. */
> -     mov     r8, r6
> -
> -     /* Save new proc in r6 now. */
> -     mov     r6, r1
> -
> -     /* Get the user structure for the new process in r9 */
> -     ldr     r9, [r6, #(P_ADDR)]
> -
> -     /*
> -      * This can be optimised... We know we want to go from SVC32
> -      * mode to UND32 mode
> -      */
> -        mrs  r3, cpsr
> -     bic     r2, r3, #(PSR_MODE)
> -     orr     r2, r2, #(PSR_UND32_MODE | PSR_I)
> -        msr  cpsr_c, r2
> -
> -#ifdef notworthit
> -     teq     r0, #0x00000000
> -     strne   sp, [r8, #(PCB_UND_SP)]
> -#else
> -     str     sp, [r8, #(PCB_UND_SP)]
> -#endif
> -
> -        msr  cpsr_c, r3              /* Restore the old mode */
> -
> -     /* rem: r0 = old proc */
> -     /* rem: r1 = r6 = new process */
> -     /* rem: r8 = old PCB */
> -     /* rem: r9 = new PCB */
> -
> -     /* What else needs to be saved  Only FPA stuff when that is supported */
> -
> -     /* Third phase: restore saved context */
> +     beq     .switch_exited__skip_save
>  
> +     add     r7, r6, #PCB_R8                 /* save registers in */
> +     stmia   r7, {r8-r13}                    /* the old proc's pcb */
> +.switch_exited__skip_save:
>       /*
> -      * Get the new L1 table pointer into r11.  If we're switching to
> -      * an LWP with the same address space as the outgoing one, we can
> -      * skip the cache purge and the TTB load.
> -      *
> -      * To avoid data dep stalls that would happen anyway, we try
> -      * and get some useful work done in the mean time.
> +      * If we're switching to the same address space as the outgoing
> +      * one, we can skip the cache purge and the TTB load.
>        */
> -     ldr     r10, [r8, #(PCB_PAGEDIR)]       /* r10 = old L1 */
> -     ldr     r11, [r9, #(PCB_PAGEDIR)]       /* r11 = new L1 */
> -
> -     ldr     r0, [r8, #(PCB_DACR)]           /* r0 = old DACR */
> -     ldr     r1, [r9, #(PCB_DACR)]           /* r1 = new DACR */
> +     ldr     r10, [r6, #PCB_PAGEDIR]         /* r10 = old L1 */
> +     ldr     r9, [r1, #P_ADDR]               /* r9 = new PCB */
> +     ldr     r11, [r9, #PCB_PAGEDIR]         /* r11 = new L1 */
>  
>       teq     r10, r11                        /* Same L1? */
> -     cmpeq   r0, r1                          /* Same DACR? */
> -     beq     .Lcs_context_switched           /* yes! */
> -
> -     mov     r2, #DOMAIN_CLIENT
> -     cmp     r1, r2, lsl #(PMAP_DOMAIN_KERNEL * 2) /* Sw to kernel thread? */
> -     beq     .Lcs_cache_purge_skipped        /* Yup. Don't flush cache */
> -
> -     stmfd   sp!, {r0-r3}
> -     ldr     r1, .Lcpufuncs
> -     mov     lr, pc
> -     ldr     pc, [r1, #CF_ICACHE_SYNC_ALL]
> -     ldmfd   sp!, {r0-r3}
> -
> -.Lcs_cache_purge_skipped:
> -     /* rem: r1 = new DACR */
> -     /* rem: r6 = new proc */
> -     /* rem: r9 = new PCB */
> -     /* rem: r10 = old L1 */
> -     /* rem: r11 = new L1 */
> -
> -     ldr     r7, [r9, #(PCB_PL1VEC)]
> +     beq     .context_switched__skip_flush   /* yes! */
>  
>       /*
> -      * At this point we need to kill IRQ's again.
> -      *
> -      * XXXSCW: Don't need to block FIQs if vectors have been relocated
> +      * Do a full context switch = full TLB flush.
>        */
> -     IRQdisableALL
> -
> -     /*
> -      * Ensure the vector table is accessible by fixing up the L1
> -      */
> -     cmp     r7, #0                  /* No need to fixup vector table? */
> -     ldrne   r2, [r7]                /* But if yes, fetch current value */
> -     ldrne   r0, [r9, #(PCB_L1VEC)]  /* Fetch new vector_page value */
> -     mcr     p15, 0, r1, c3, c0, 0   /* Update DACR for new context */
> -     cmpne   r2, r0                  /* Stuffing the same value? */
> -#ifndef PMAP_INCLUDE_PTE_SYNC
> -     strne   r0, [r7]                /* Nope, update it */
> -#else
> -     beq     .Lcs_same_vector
> -     str     r0, [r7]                /* Otherwise, update it */
> -
> -     /*
> -      * Need to sync the cache to make sure that last store is
> -      * visible to the MMU.
> -      */
> -     ldr     r2, .Lcpufuncs
> -     mov     r0, r7
> -     mov     r1, #4
> -     mov     lr, pc
> -     ldr     pc, [r2, #CF_DCACHE_WB_RANGE]
> -
> -.Lcs_same_vector:
> -#endif /* PMAP_INCLUDE_PTE_SYNC */
> -
> -     cmp     r10, r11                /* Switching to the same L1? */
> -     ldr     r10, .Lcpufuncs
> -     beq     .Lcs_same_l1            /* Yup. */
> -
> -     /*
> -      * Do a full context switch, including full TLB flush.
> -      */
> -     mov     r0, r11
> -     mov     lr, pc
> -     ldr     pc, [r10, #CF_CONTEXT_SWITCH]
> -
> -     b       .Lcs_context_switched
> -
> -     /*
> -      * We're switching to a different process in the same L1.
> -      * In this situation, we only need to flush the TLB for the
> -      * vector_page mapping, and even then only if r7 is non-NULL.
> -      */
> -.Lcs_same_l1:
> -     cmp     r7, #0
> -     movne   r0, #0                  /* We *know* vector_page's VA is 0x0 */
> -     movne   lr, pc
> -     ldrne   pc, [r10, #CF_TLB_FLUSHID_SE]
> -
> -.Lcs_context_switched:
> -
> -     /* XXXSCW: Safe to re-enable FIQs here */
> -
> -     /* rem: r6 = new proc */
> -     /* rem: r9 = new PCB */
> -
> -     /*
> -      * This can be optimised... We know we want to go from SVC32
> -      * mode to UND32 mode
> -      */
> -        mrs  r3, cpsr
> -     bic     r2, r3, #(PSR_MODE)
> -     orr     r2, r2, #(PSR_UND32_MODE)
> -        msr  cpsr_c, r2
> -
> -     ldr     sp, [r9, #(PCB_UND_SP)]
> -
> -        msr  cpsr_c, r3              /* Restore the old mode */
> -
> -     /* Restore all the save registers */
> -     add     r7, r9, #PCB_R8
> -     ldmia   r7, {r8-r13}
> -
> +     mcr     CP15_ICIALLU
> +     mcr     CP15_BPIALL
> +     dsb     sy
> +     isb     sy
> +     mcr     CP15_TTBR0(r11)
> +     mcr     CP15_TLBIALL
> +     dsb     sy
> +     isb     sy
> +
> +.context_switched__skip_flush:
> +     add     r7, r9, #PCB_R8         /* restore registers saved */
> +     ldmia   r7, {r8-r13}            /* in the new proc's pcb */
>       sub     r7, r7, #PCB_R8         /* restore PCB pointer */
>  
> -     /* rem: r6 = new proc */
> -     /* rem: r7 = new pcb */
> -
> -     /* We can enable interrupts again */
> -     IRQenableALL
> -
> -     /* rem: r6 = new proc */
> -     /* rem: r7 = new PCB */
> -
> -.Lswitch_return:
>       /*
>        * Pull the registers that got pushed when either savectx() or
>        * cpu_switch() was called and return.
>        */
> -     ldmfd   sp!, {r4-r7, pc}
> +     add     sp, sp, #4
> +     pop     {r4-r7, pc}
>  
>  /* LINTSTUB: Func: void savectx(struct pcb *pcb) */
>  ENTRY(savectx)

Reply via email to