add emulate_step() changes to support vsx vector paired storage
access instructions that provides octword operands loads/stores
between storage and set of 64 Vector Scalar Registers (VSRs).

Suggested-by: Ravi Bangoria <ravi.bango...@linux.ibm.com>
Suggested-by: Naveen N. Rao <naveen.n....@linux.vnet.ibm.com>
Signed-off-by: Balamuruhan S <bal...@linux.ibm.com>
---
 arch/powerpc/lib/sstep.c | 77 +++++++++++++++++++++++++++++++---------
 1 file changed, 60 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 22147257d74d..01e1a3adc406 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -280,6 +280,19 @@ static nokprobe_inline void do_byte_reverse(void *ptr, int 
nb)
                up[1] = tmp;
                break;
        }
+       case 32: {
+               unsigned long *up = (unsigned long *)ptr;
+               unsigned long tmp;
+
+               tmp = byterev_8(up[0]);
+               up[0] = byterev_8(up[3]);
+               up[3] = tmp;
+               tmp = byterev_8(up[2]);
+               up[2] = byterev_8(up[1]);
+               up[1] = tmp;
+               break;
+       }
+
 #endif
        default:
                WARN_ON_ONCE(1);
@@ -710,6 +723,8 @@ void emulate_vsx_load(struct instruction_op *op, union 
vsx_reg *reg,
        reg->d[0] = reg->d[1] = 0;
 
        switch (op->element_size) {
+       case 32:
+               /* [p]lxvp[x] */
        case 16:
                /* whole vector; lxv[x] or lxvl[l] */
                if (size == 0)
@@ -718,7 +733,7 @@ void emulate_vsx_load(struct instruction_op *op, union 
vsx_reg *reg,
                if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
                        rev = !rev;
                if (rev)
-                       do_byte_reverse(reg, 16);
+                       do_byte_reverse(reg, size);
                break;
        case 8:
                /* scalar loads, lxvd2x, lxvdsx */
@@ -794,6 +809,20 @@ void emulate_vsx_store(struct instruction_op *op, const 
union vsx_reg *reg,
        size = GETSIZE(op->type);
 
        switch (op->element_size) {
+       case 32:
+               /* [p]stxvp[x] */
+               if (size == 0)
+                       break;
+               if (rev) {
+                       /* reverse 32 bytes */
+                       buf.d[0] = byterev_8(reg->d[3]);
+                       buf.d[1] = byterev_8(reg->d[2]);
+                       buf.d[2] = byterev_8(reg->d[1]);
+                       buf.d[3] = byterev_8(reg->d[0]);
+                       reg = &buf;
+               }
+               memcpy(mem, reg, size);
+               break;
        case 16:
                /* stxv, stxvx, stxvl, stxvll */
                if (size == 0)
@@ -862,28 +891,35 @@ static nokprobe_inline int do_vsx_load(struct 
instruction_op *op,
                                       bool cross_endian)
 {
        int reg = op->reg;
-       u8 mem[16];
-       union vsx_reg buf;
+       int i, nr_vsx_regs;
+       u8 mem[32];
+       union vsx_reg buf[2];
        int size = GETSIZE(op->type);
 
        if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs))
                return -EFAULT;
 
-       emulate_vsx_load(op, &buf, mem, cross_endian);
+       nr_vsx_regs = size / sizeof(__vector128);
+       emulate_vsx_load(op, buf, mem, cross_endian);
        preempt_disable();
        if (reg < 32) {
                /* FP regs + extensions */
                if (regs->msr & MSR_FP) {
-                       load_vsrn(reg, &buf);
+                       for (i = 0; i < nr_vsx_regs; i++)
+                               load_vsrn(reg + i, &buf[i].v);
                } else {
-                       current->thread.fp_state.fpr[reg][0] = buf.d[0];
-                       current->thread.fp_state.fpr[reg][1] = buf.d[1];
+                       for (i = 0; i < nr_vsx_regs; i++) {
+                               current->thread.fp_state.fpr[reg + i][0] = 
buf[i].d[0];
+                               current->thread.fp_state.fpr[reg + i][1] = 
buf[i].d[1];
+                       }
                }
        } else {
                if (regs->msr & MSR_VEC)
-                       load_vsrn(reg, &buf);
+                       for (i = 0; i < nr_vsx_regs; i++)
+                               load_vsrn(reg + i, &buf[i].v);
                else
-                       current->thread.vr_state.vr[reg - 32] = buf.v;
+                       for (i = 0; i < nr_vsx_regs; i++)
+                               current->thread.vr_state.vr[reg - 32 + i] = 
buf[i].v;
        }
        preempt_enable();
        return 0;
@@ -894,30 +930,37 @@ static nokprobe_inline int do_vsx_store(struct 
instruction_op *op,
                                        bool cross_endian)
 {
        int reg = op->reg;
-       u8 mem[16];
-       union vsx_reg buf;
+       int i, nr_vsx_regs;
+       u8 mem[32];
+       union vsx_reg buf[2];
        int size = GETSIZE(op->type);
 
        if (!address_ok(regs, ea, size))
                return -EFAULT;
 
+       nr_vsx_regs = size / sizeof(__vector128);
        preempt_disable();
        if (reg < 32) {
                /* FP regs + extensions */
                if (regs->msr & MSR_FP) {
-                       store_vsrn(reg, &buf);
+                       for (i = 0; i < nr_vsx_regs; i++)
+                               store_vsrn(reg + i, &buf[i].v);
                } else {
-                       buf.d[0] = current->thread.fp_state.fpr[reg][0];
-                       buf.d[1] = current->thread.fp_state.fpr[reg][1];
+                       for (i = 0; i < nr_vsx_regs; i++) {
+                               buf[i].d[0] = current->thread.fp_state.fpr[reg 
+ i][0];
+                               buf[i].d[1] = current->thread.fp_state.fpr[reg 
+ i][1];
+                       }
                }
        } else {
                if (regs->msr & MSR_VEC)
-                       store_vsrn(reg, &buf);
+                       for (i = 0; i < nr_vsx_regs; i++)
+                               store_vsrn(reg + i, &buf[i].v);
                else
-                       buf.v = current->thread.vr_state.vr[reg - 32];
+                       for (i = 0; i < nr_vsx_regs; i++)
+                               buf[i].v = current->thread.vr_state.vr[reg - 32 
+ i];
        }
        preempt_enable();
-       emulate_vsx_store(op, &buf, mem, cross_endian);
+       emulate_vsx_store(op, buf, mem, cross_endian);
        return  copy_mem_out(mem, ea, size, regs);
 }
 #endif /* CONFIG_VSX */
-- 
2.24.1

Reply via email to