Hi Leonid,

On Wed, Mar 18, 2015 at 12:23:32PM -0700, Leonid Yegoshin wrote:
> MIPS R5, MIPS R6 and MSA HW specs allow a broad range of address exception
> on unaligned MSA load/store operations - from none unaligned up to
> full support in HW. In practice, it is expected that HW can occasionally
> triggers AdE for non-aligned data access (misalignment). It is usually
> expected on page boundaries because HW handling of two TLBs in single
> data access operation may be complicated and expensive.
> 
> So, this patch handles MSA LD.df and ST.df Address Error exceptions.
> 
> It handles separately two cases - MSA owned by thread and MSA registers
> saved in current->thread.fpu. If thread still owns MSA unit then it
> loads and stores directly with MSA unit and only one MSA register. Saving
> and restoring the full MSA context (512bytes) on each misalign exception
> is expensive! Preemption is disabled, of course.
> 
> Signed-off-by: Leonid Yegoshin <leonid.yegos...@imgtec.com>
> ---
> V2:
>     - added a missed assignment in double-word case of BIG ENDIAN conversion
>     - added a missed initial allignment in block of assembler mini-functions
>       to get/put MSA register.
>     - added a missed preempt_disable() in ST.D unalignment processing

I think you forgot to either answer or address some of my smaller
comments.

Cheers
James

> ---
>  arch/mips/include/asm/processor.h |    2 +
>  arch/mips/include/uapi/asm/inst.h |   21 +++++
>  arch/mips/kernel/r4k_fpu.S        |  109 ++++++++++++++++++++++++++++
>  arch/mips/kernel/unaligned.c      |  146 
> +++++++++++++++++++++++++++++++++++++
>  4 files changed, 278 insertions(+)
> 
> diff --git a/arch/mips/include/asm/processor.h 
> b/arch/mips/include/asm/processor.h
> index f1df4cb4a286..af2675060244 100644
> --- a/arch/mips/include/asm/processor.h
> +++ b/arch/mips/include/asm/processor.h
> @@ -104,6 +104,8 @@ extern unsigned int vced_count, vcei_count;
>  #endif
>  
>  union fpureg {
> +     __u8    val8[FPU_REG_WIDTH / 8];
> +     __u16   val16[FPU_REG_WIDTH / 16];
>       __u32   val32[FPU_REG_WIDTH / 32];
>       __u64   val64[FPU_REG_WIDTH / 64];
>  };
> diff --git a/arch/mips/include/uapi/asm/inst.h 
> b/arch/mips/include/uapi/asm/inst.h
> index 89c22433b1c6..7ab6987cb7d5 100644
> --- a/arch/mips/include/uapi/asm/inst.h
> +++ b/arch/mips/include/uapi/asm/inst.h
> @@ -58,6 +58,7 @@ enum spec_op {
>       dsll_op, spec7_unused_op, dsrl_op, dsra_op,
>       dsll32_op, spec8_unused_op, dsrl32_op, dsra32_op
>  };
> +#define msa_op  mdmx_op
>  
>  /*
>   * func field of spec2 opcode.
> @@ -217,6 +218,14 @@ enum bshfl_func {
>  };
>  
>  /*
> + * func field for MSA MI10 format
> + */
> +enum msa_mi10_func {
> +     msa_ld_op = 8,
> +     msa_st_op = 9,
> +};
> +
> +/*
>   * (microMIPS) Major opcodes.
>   */
>  enum mm_major_op {
> @@ -616,6 +625,17 @@ struct spec3_format {   /* SPEC3 */
>       ;)))))
>  };
>  
> +struct msa_mi10_format {        /* MSA */
> +     __BITFIELD_FIELD(unsigned int opcode : 6,
> +     __BITFIELD_FIELD(signed int s10 : 10,
> +     __BITFIELD_FIELD(unsigned int rs : 5,
> +     __BITFIELD_FIELD(unsigned int wd : 5,
> +     __BITFIELD_FIELD(unsigned int func : 4,
> +     __BITFIELD_FIELD(unsigned int df : 2,
> +     ;))))))
> +};
> +
> +
>  /*
>   * microMIPS instruction formats (32-bit length)
>   *
> @@ -884,6 +904,7 @@ union mips_instruction {
>       struct p_format p_format;
>       struct f_format f_format;
>       struct ma_format ma_format;
> +     struct msa_mi10_format msa_mi10_format;
>       struct b_format b_format;
>       struct ps_format ps_format;
>       struct v_format v_format;
> diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
> index 6c160c67984c..a2f9a0420f54 100644
> --- a/arch/mips/kernel/r4k_fpu.S
> +++ b/arch/mips/kernel/r4k_fpu.S
> @@ -13,6 +13,7 @@
>   * Copyright (C) 1999, 2001 Silicon Graphics, Inc.
>   */
>  #include <asm/asm.h>
> +#include <asm/asmmacro.h>
>  #include <asm/errno.h>
>  #include <asm/fpregdef.h>
>  #include <asm/mipsregs.h>
> @@ -268,6 +269,114 @@ LEAF(_restore_fp_context32)
>       END(_restore_fp_context32)
>  #endif
>  
> +#ifdef CONFIG_CPU_HAS_MSA
> +
> +     .macro  msa_ld_d    wd, base
> +     ld_d    \wd, 0, \base
> +     jalr    $0, $31
> +       nop
> +     .align  4
> +     .endm
> +
> +     .macro  msa_st_d    wd, base
> +     st_d    \wd, 0, \base
> +     jalr    $0, $31
> +       nop
> +     .align  4
> +     .endm
> +
> +LEAF(msa_to_wd)
> +     .set    push
> +     .set    noreorder
> +     sll         t0, a0, 4
> +     PTR_LA      t1, Lmsa_to
> +     PTR_ADDU    t0, t0, t1
> +     jalr        $0, t0
> +       nop
> +     .align  4
> +Lmsa_to:
> +     msa_ld_d    0, a1
> +     msa_ld_d    1, a1
> +     msa_ld_d    2, a1
> +     msa_ld_d    3, a1
> +     msa_ld_d    4, a1
> +     msa_ld_d    5, a1
> +     msa_ld_d    6, a1
> +     msa_ld_d    7, a1
> +     msa_ld_d    8, a1
> +     msa_ld_d    9, a1
> +     msa_ld_d    10, a1
> +     msa_ld_d    11, a1
> +     msa_ld_d    12, a1
> +     msa_ld_d    13, a1
> +     msa_ld_d    14, a1
> +     msa_ld_d    15, a1
> +     msa_ld_d    16, a1
> +     msa_ld_d    17, a1
> +     msa_ld_d    18, a1
> +     msa_ld_d    19, a1
> +     msa_ld_d    20, a1
> +     msa_ld_d    21, a1
> +     msa_ld_d    22, a1
> +     msa_ld_d    23, a1
> +     msa_ld_d    24, a1
> +     msa_ld_d    25, a1
> +     msa_ld_d    26, a1
> +     msa_ld_d    27, a1
> +     msa_ld_d    28, a1
> +     msa_ld_d    29, a1
> +     msa_ld_d    30, a1
> +     msa_ld_d    31, a1
> +     .set    pop
> +     END(msa_to_wd)
> +
> +LEAF(msa_from_wd)
> +     .set    push
> +     .set    noreorder
> +     sll         t0, a0, 4
> +     PTR_LA      t1, Lmsa_from
> +     PTR_ADDU    t0, t0, t1
> +     jalr        $0, t0
> +       nop
> +     .align  4
> +Lmsa_from:
> +     msa_st_d    0, a1
> +     msa_st_d    1, a1
> +     msa_st_d    2, a1
> +     msa_st_d    3, a1
> +     msa_st_d    4, a1
> +     msa_st_d    5, a1
> +     msa_st_d    6, a1
> +     msa_st_d    7, a1
> +     msa_st_d    8, a1
> +     msa_st_d    9, a1
> +     msa_st_d    10, a1
> +     msa_st_d    11, a1
> +     msa_st_d    12, a1
> +     msa_st_d    13, a1
> +     msa_st_d    14, a1
> +     msa_st_d    15, a1
> +     msa_st_d    16, a1
> +     msa_st_d    17, a1
> +     msa_st_d    18, a1
> +     msa_st_d    19, a1
> +     msa_st_d    20, a1
> +     msa_st_d    21, a1
> +     msa_st_d    22, a1
> +     msa_st_d    23, a1
> +     msa_st_d    24, a1
> +     msa_st_d    25, a1
> +     msa_st_d    26, a1
> +     msa_st_d    27, a1
> +     msa_st_d    28, a1
> +     msa_st_d    29, a1
> +     msa_st_d    30, a1
> +     msa_st_d    31, a1
> +     .set    pop
> +     END(msa_from_wd)
> +
> +#endif /* CONFIG_CPU_HAS_MSA */
> +
>       .set    reorder
>  
>       .type   fault@function
> diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c
> index e11906dff885..bf6a0c63d3de 100644
> --- a/arch/mips/kernel/unaligned.c
> +++ b/arch/mips/kernel/unaligned.c
> @@ -108,6 +108,11 @@ static u32 unaligned_action;
>  #endif
>  extern void show_registers(struct pt_regs *regs);
>  
> +#ifdef CONFIG_CPU_HAS_MSA
> +void msa_to_wd(unsigned int wd, union fpureg *from);
> +void msa_from_wd(unsigned int wd, union fpureg *to);
> +#endif
> +
>  #ifdef __BIG_ENDIAN
>  #define     LoadHW(addr, value, res)  \
>               __asm__ __volatile__ (".set\tnoat\n"        \
> @@ -422,6 +427,66 @@ extern void show_registers(struct pt_regs *regs);
>               : "r" (value), "r" (addr), "i" (-EFAULT));
>  #endif
>  
> +#ifdef CONFIG_CPU_HAS_MSA
> +#ifdef __BIG_ENDIAN
> +/*
> + * MSA data format conversion.
> + * Only for BIG ENDIAN - LITTLE ENDIAN has register format which matches 
> memory
> + * layout contiguously.
> + *
> + * Conversion is done between two Double words and other formats (W/H/B)
> + * because kernel uses LD.D and ST.D to load/store MSA registers and keeps
> + * MSA registers in this format in current->thread.fpu.fpr
> + */
> +static void msa_convert(union fpureg *to, union fpureg *from, int fmt)
> +{
> +     switch (fmt) {
> +     case 0: /* byte */
> +             to->val8[0] = from->val8[7];
> +             to->val8[1] = from->val8[6];
> +             to->val8[2] = from->val8[5];
> +             to->val8[3] = from->val8[4];
> +             to->val8[4] = from->val8[3];
> +             to->val8[5] = from->val8[2];
> +             to->val8[6] = from->val8[1];
> +             to->val8[7] = from->val8[0];
> +             to->val8[8] = from->val8[15];
> +             to->val8[9] = from->val8[14];
> +             to->val8[10] = from->val8[13];
> +             to->val8[11] = from->val8[12];
> +             to->val8[12] = from->val8[11];
> +             to->val8[13] = from->val8[10];
> +             to->val8[14] = from->val8[9];
> +             to->val8[15] = from->val8[8];
> +             break;
> +
> +     case 1: /* halfword */
> +             to->val16[0] = from->val16[3];
> +             to->val16[1] = from->val16[2];
> +             to->val16[2] = from->val16[1];
> +             to->val16[3] = from->val16[0];
> +             to->val16[4] = from->val16[7];
> +             to->val16[5] = from->val16[6];
> +             to->val16[6] = from->val16[5];
> +             to->val16[7] = from->val16[4];
> +             break;
> +
> +     case 2: /* word */
> +             to->val32[0] = from->val32[1];
> +             to->val32[1] = from->val32[0];
> +             to->val32[2] = from->val32[3];
> +             to->val32[3] = from->val32[2];
> +             break;
> +
> +     case 3: /* doubleword, no conversion */
> +             to->val64[0] = from->val64[0];
> +             to->val64[1] = from->val64[1];
> +             break;
> +     }
> +}
> +#endif
> +#endif
> +
>  static void emulate_load_store_insn(struct pt_regs *regs,
>       void __user *addr, unsigned int __user *pc)
>  {
> @@ -434,6 +499,10 @@ static void emulate_load_store_insn(struct pt_regs *regs,
>  #ifdef       CONFIG_EVA
>       mm_segment_t seg;
>  #endif
> +#ifdef CONFIG_CPU_HAS_MSA
> +     union fpureg msadatabase[2], *msadata;
> +     unsigned int func, df, rs, wd;
> +#endif
>       origpc = (unsigned long)pc;
>       orig31 = regs->regs[31];
>  
> @@ -703,6 +772,83 @@ static void emulate_load_store_insn(struct pt_regs *regs,
>                       break;
>               return;
>  
> +#ifdef CONFIG_CPU_HAS_MSA
> +     case msa_op:
> +             if (cpu_has_mdmx)
> +                     goto sigill;
> +
> +             func = insn.msa_mi10_format.func;
> +             switch (func) {
> +             default:
> +                     goto sigbus;
> +
> +             case msa_ld_op:
> +             case msa_st_op:
> +                     ;
> +             }
> +
> +             if (!thread_msa_context_live())
> +                     goto sigbus;
> +
> +             df = insn.msa_mi10_format.df;
> +             rs = insn.msa_mi10_format.rs;
> +             wd = insn.msa_mi10_format.wd;
> +             addr = (unsigned long *)(regs->regs[rs] + 
> (insn.msa_mi10_format.s10 << df));
> +             /* align a working space in stack... */
> +             msadata = (union fpureg *)(((unsigned long)msadatabase + 15) & 
> ~(unsigned long)0xf);
> +             if (func == msa_ld_op) {
> +                     if (!access_ok(VERIFY_READ, addr, 16))
> +                             goto sigbus;
> +                     compute_return_epc(regs);
> +                     res = __copy_from_user_inatomic(msadata, addr, 16);
> +                     if (res)
> +                             goto fault;
> +                     preempt_disable();
> +                     if (test_thread_flag(TIF_USEDMSA)) {
> +#ifdef __BIG_ENDIAN
> +                             msa_convert(&current->thread.fpu.fpr[wd], 
> msadata, df);
> +                             msa_to_wd(wd, &current->thread.fpu.fpr[wd]);
> +#else
> +                             msa_to_wd(wd, msadata);
> +#endif
> +                             preempt_enable();
> +                     } else {
> +                             preempt_enable();
> +#ifdef __BIG_ENDIAN
> +                             msa_convert(&current->thread.fpu.fpr[wd], 
> msadata, df);
> +#else
> +                             current->thread.fpu.fpr[wd] = *msadata;
> +#endif
> +                     }
> +             } else {
> +                     if (!access_ok(VERIFY_WRITE, addr, 16))
> +                             goto sigbus;
> +                     compute_return_epc(regs);
> +                     preempt_disable();
> +                     if (test_thread_flag(TIF_USEDMSA)) {
> +#ifdef __BIG_ENDIAN
> +                             msa_from_wd(wd, &current->thread.fpu.fpr[wd]);
> +                             msa_convert(msadata, 
> &current->thread.fpu.fpr[wd], df);
> +#else
> +                             msa_from_wd(wd, msadata);
> +#endif
> +                             preempt_enable();
> +                     } else {
> +                             preempt_enable();
> +#ifdef __BIG_ENDIAN
> +                             msa_convert(msadata, 
> &current->thread.fpu.fpr[wd], df);
> +#else
> +                             *msadata = current->thread.fpu.fpr[wd];
> +#endif
> +                     }
> +                     res = __copy_to_user_inatomic(addr, msadata, 16);
> +                     if (res)
> +                             goto fault;
> +             }
> +
> +             break;
> +#endif /* CONFIG_CPU_HAS_MSA */
> +
>       /*
>        * COP2 is available to implementor for application specific use.
>        * It's up to applications to register a notifier chain and do
> 
> 

Attachment: signature.asc
Description: Digital signature

Reply via email to