From: Chao-ying Fu <c...@mips.com>

This patch adds software emulation for atomic memory operations (AMO)
instructions that may not be supported in hardware.

The `emu-amo.s` file provides assembly implementations of the
aforementioned operations. Corresponding handler logic is integrated
into the illegal instruction trap to catch and emulate unsupported
AMO* instructions at runtime.

Signed-off-by: Chao-ying Fu <c...@mips.com>
Signed-off-by: Uros Stajic <uros.sta...@htecgroup.com>
---
 arch/riscv/cpu/p8700/Makefile  |   1 +
 arch/riscv/cpu/p8700/emu-amo.S | 254 ++++++++++++++++++++++++++++
 arch/riscv/lib/interrupts.c    | 299 +++++++++++++++++++++++++++++++++
 include/interrupt.h            |  19 +++
 4 files changed, 573 insertions(+)
 create mode 100644 arch/riscv/cpu/p8700/emu-amo.S

diff --git a/arch/riscv/cpu/p8700/Makefile b/arch/riscv/cpu/p8700/Makefile
index ecdd232da6f..f9fcd20e2ab 100644
--- a/arch/riscv/cpu/p8700/Makefile
+++ b/arch/riscv/cpu/p8700/Makefile
@@ -5,3 +5,4 @@
 obj-y += dram.o
 obj-y += cpu.o
 obj-y += cache.o
+obj-y += emu-amo.o
diff --git a/arch/riscv/cpu/p8700/emu-amo.S b/arch/riscv/cpu/p8700/emu-amo.S
new file mode 100644
index 00000000000..b7005339939
--- /dev/null
+++ b/arch/riscv/cpu/p8700/emu-amo.S
@@ -0,0 +1,254 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2021, Chao-ying Fu <c...@mips.com>
+ */
+
+        .text
+
+        .align 3
+       .globl  atomic_swap_w
+atomic_swap_w:
+       lw      a5,0(a0)
+       mv      a4,a0
+2:     mv      a0,a5
+1:     lr.w    a5,(a4)
+       bne     a5,a0,2b
+       sc.w    a6,a1,(a4)
+       bnez    a6,1b
+       ret
+
+        .align 3
+       .globl  atomic_swap_d
+atomic_swap_d:
+       ld      a5,0(a0)
+       mv      a4,a0
+2:     mv      a0,a5
+1:     lr.d    a5,(a4)
+       bne     a5,a0,2b
+       sc.d    a6,a1,(a4)
+       bnez    a6,1b
+       ret
+
+        .align 3
+       .globl  atomic_add_w
+atomic_add_w:
+       lw      a5,0(a0)
+       mv      a4,a0
+2:     mv      a0,a5
+       addw    a3,a5,a1
+1:     lr.w    a5,(a4)
+       bne     a5,a0,2b
+       sc.w    a6,a3,(a4)
+       bnez    a6,1b
+       ret
+
+        .align 3
+       .globl  atomic_add_d
+atomic_add_d:
+       ld      a5,0(a0)
+       mv      a4,a0
+2:     mv      a0,a5
+       add     a3,a5,a1
+1:     lr.d    a5,(a4)
+       bne     a5,a0,2b
+       sc.d    a6,a3,(a4)
+       bnez    a6,1b
+       ret
+
+        .align 3
+       .globl  atomic_and_w
+atomic_and_w:
+       lw      a5,0(a0)
+       mv      a4,a0
+2:     mv      a0,a5
+       and     a3,a5,a1
+1:     lr.w    a5,(a4)
+       bne     a5,a0,2b
+       sc.w    a6,a3,(a4)
+       bnez    a6,1b
+       ret
+
+        .align 3
+       .globl  atomic_and_d
+atomic_and_d:
+       ld      a5,0(a0)
+       mv      a4,a0
+2:     mv      a0,a5
+       and     a3,a5,a1
+1:     lr.d    a5,(a4)
+       bne     a5,a0,2b
+       sc.d    a6,a3,(a4)
+       bnez    a6,1b
+       ret
+
+        .align 3
+       .globl  atomic_or_w
+atomic_or_w:
+       lw      a5,0(a0)
+       mv      a4,a0
+2:     mv      a0,a5
+       or      a3,a5,a1
+1:     lr.w    a5,(a4)
+       bne     a5,a0,2b
+       sc.w    a6,a3,(a4)
+       bnez    a6,1b
+       ret
+
+        .align 3
+       .globl  atomic_or_d
+atomic_or_d:
+       ld      a5,0(a0)
+       mv      a4,a0
+2:     mv      a0,a5
+       or      a3,a5,a1
+1:     lr.d    a5,(a4)
+       bne     a5,a0,2b
+       sc.d    a6,a3,(a4)
+       bnez    a6,1b
+       ret
+
+        .align 3
+       .globl  atomic_xor_w
+atomic_xor_w:
+       lw      a5,0(a0)
+       mv      a4,a0
+2:     mv      a0,a5
+       xor     a3,a5,a1
+1:     lr.w    a5,(a4)
+       bne     a5,a0,2b
+       sc.w    a6,a3,(a4)
+       bnez    a6,1b
+       ret
+
+        .align 3
+       .globl  atomic_xor_d
+atomic_xor_d:
+       ld      a5,0(a0)
+       mv      a4,a0
+2:     mv      a0,a5
+       xor     a3,a5,a1
+1:     lr.d    a5,(a4)
+       bne     a5,a0,2b
+       sc.d    a6,a3,(a4)
+       bnez    a6,1b
+       ret
+
+        .align 3
+       .globl  atomic_max_w
+atomic_max_w:
+       lw      a5,0(a0)
+       mv      a4,a0
+2:     mv      a0,a5
+       mv      a3,a5
+       bge     a5,a1,1f
+       mv      a3,a1
+1:     lr.w    a5,(a4)
+       bne     a5,a0,2b
+       sc.w    a6,a3,(a4)
+       bnez    a6,1b
+       ret
+
+        .align 3
+       .globl  atomic_max_d
+atomic_max_d:
+       ld      a5,0(a0)
+       mv      a4,a0
+2:     mv      a0,a5
+       mv      a3,a5
+       bge     a5,a1,1f
+       mv      a3,a1
+1:     lr.d    a5,(a4)
+       bne     a5,a0,2b
+       sc.d    a6,a3,(a4)
+       bnez    a6,1b
+       ret
+
+        .align 3
+       .globl  atomic_maxu_w
+atomic_maxu_w:
+       lw      a5,0(a0)
+       mv      a4,a0
+2:     mv      a0,a5
+       mv      a3,a5
+       bgeu    a5,a1,1f
+       mv      a3,a1
+1:     lr.w    a5,(a4)
+       bne     a5,a0,2b
+       sc.w    a6,a3,(a4)
+       bnez    a6,1b
+       ret
+
+        .align 3
+       .globl  atomic_maxu_d
+atomic_maxu_d:
+       ld      a5,0(a0)
+       mv      a4,a0
+2:     mv      a0,a5
+       mv      a3,a5
+       bgeu    a5,a1,1f
+       mv      a3,a1
+1:     lr.d    a5,(a4)
+       bne     a5,a0,2b
+       sc.d    a6,a3,(a4)
+       bnez    a6,1b
+       ret
+
+        .align 3
+       .globl  atomic_min_w
+atomic_min_w:
+       lw      a5,0(a0)
+       mv      a4,a0
+2:     mv      a0,a5
+       mv      a3,a5
+       bge     a1,a5,1f
+       mv      a3,a1
+1:     lr.w    a5,(a4)
+       bne     a5,a0,2b
+       sc.w    a6,a3,(a4)
+       bnez    a6,1b
+       ret
+
+        .align 3
+       .globl  atomic_min_d
+atomic_min_d:
+       ld      a5,0(a0)
+       mv      a4,a0
+2:     mv      a0,a5
+       mv      a3,a5
+       bge     a1,a5,1f
+       mv      a3,a1
+1:     lr.d    a5,(a4)
+       bne     a5,a0,2b
+       sc.d    a6,a3,(a4)
+       bnez    a6,1b
+       ret
+
+        .align 3
+       .globl  atomic_minu_w
+atomic_minu_w:
+       lw      a5,0(a0)
+       mv      a4,a0
+2:     mv      a0,a5
+       mv      a3,a5
+       bgeu    a1,a5,1f
+       mv      a3,a1
+1:     lr.w    a5,(a4)
+       bne     a5,a0,2b
+       sc.w    a6,a3,(a4)
+       bnez    a6,1b
+       ret
+
+        .align 3
+       .globl  atomic_minu_d
+atomic_minu_d:
+       ld      a5,0(a0)
+       mv      a4,a0
+2:     mv      a0,a5
+       mv      a3,a5
+       bgeu    a1,a5,1f
+       mv      a3,a1
+1:     lr.d    a5,(a4)
+       bne     a5,a0,2b
+       sc.d    a6,a3,(a4)
+       bnez    a6,1b
+       ret
diff --git a/arch/riscv/lib/interrupts.c b/arch/riscv/lib/interrupts.c
index ef1056eeb6f..906916c762f 100644
--- a/arch/riscv/lib/interrupts.c
+++ b/arch/riscv/lib/interrupts.c
@@ -22,6 +22,36 @@
 
 DECLARE_GLOBAL_DATA_PTR;
 
+#define ILLEGAL_INSTRUCTION 2
+#define AMO_MASK 0xf800707f
+#define AQRL_MASK 0x06000000
+#define AQRL_SHIFT 25
+#define RS2_MASK 0x01f00000
+#define RS2_SHIFT 20
+#define RS1_MASK 0x000f8000
+#define RS1_SHIFT 15
+#define RD_MASK 0x00000f80
+#define RD_SHIFT 7
+
+#define AMOADD_D_MATCH 0x0000302f
+#define AMOADD_W_MATCH 0x0000202f
+#define AMOAND_D_MATCH 0x6000302f
+#define AMOAND_W_MATCH 0x6000202f
+#define AMOMAX_D_MATCH 0xa000302f
+#define AMOMAX_W_MATCH 0xa000202f
+#define AMOMAXU_D_MATCH 0xe000302f
+#define AMOMAXU_W_MATCH 0xe000202f
+#define AMOMIN_D_MATCH 0x8000302f
+#define AMOMIN_W_MATCH 0x8000202f
+#define AMOMINU_D_MATCH 0xc000302f
+#define AMOMINU_W_MATCH 0xc000202f
+#define AMOOR_D_MATCH 0x4000302f
+#define AMOOR_W_MATCH 0x4000202f
+#define AMOSWAP_D_MATCH 0x0800302f
+#define AMOSWAP_W_MATCH 0x0800202f
+#define AMOXOR_D_MATCH 0x2000302f
+#define AMOXOR_W_MATCH 0x2000202f
+
 void set_resume(struct resume_data *data)
 {
        gd->arch.resume = data;
@@ -115,6 +145,184 @@ static void show_code(ulong epc)
                printf("%04x%s", pos[i], i + 1 == len ? ")\n" : " ");
 }
 
+static ulong get_reg(struct pt_regs *regs, int reg_num)
+{
+       switch (reg_num) {
+       case 0:
+               return 0;
+       case 1:
+               return regs->ra;
+       case 2:
+               return regs->sp;
+       case 3:
+               return regs->gp;
+       case 4:
+               return regs->tp;
+       case 5:
+               return regs->t0;
+       case 6:
+               return regs->t1;
+       case 7:
+               return regs->t2;
+       case 8:
+               return regs->s0;
+       case 9:
+               return regs->s1;
+       case 10:
+               return regs->a0;
+       case 11:
+               return regs->a1;
+       case 12:
+               return regs->a2;
+       case 13:
+               return regs->a3;
+       case 14:
+               return regs->a4;
+       case 15:
+               return regs->a5;
+       case 16:
+               return regs->a6;
+       case 17:
+               return regs->a7;
+       case 18:
+               return regs->s2;
+       case 19:
+               return regs->s3;
+       case 20:
+               return regs->s4;
+       case 21:
+               return regs->s5;
+       case 22:
+               return regs->s6;
+       case 23:
+               return regs->s7;
+       case 24:
+               return regs->s8;
+       case 25:
+               return regs->s9;
+       case 26:
+               return regs->s10;
+       case 27:
+               return regs->s11;
+       case 28:
+               return regs->t3;
+       case 29:
+               return regs->t4;
+       case 30:
+               return regs->t5;
+       case 31:
+               return regs->t6;
+       default:
+               printf("Error reg_num=%d for %s\n", reg_num, __func__);
+               break;
+       }
+       return 0;
+}
+
+static void set_reg(struct pt_regs *regs, int reg_num, ulong reg_value)
+{
+       switch (reg_num) {
+       case 0:
+               break;
+       case 1:
+               regs->ra = reg_value;
+               break;
+       case 2:
+               regs->sp = reg_value;
+               break;
+       case 3:
+               regs->gp = reg_value;
+               break;
+       case 4:
+               regs->tp = reg_value;
+               break;
+       case 5:
+               regs->t0 = reg_value;
+               break;
+       case 6:
+               regs->t1 = reg_value;
+               break;
+       case 7:
+               regs->t2 = reg_value;
+               break;
+       case 8:
+               regs->s0 = reg_value;
+               break;
+       case 9:
+               regs->s1 = reg_value;
+               break;
+       case 10:
+               regs->a0 = reg_value;
+               break;
+       case 11:
+               regs->a1 = reg_value;
+               break;
+       case 12:
+               regs->a2 = reg_value;
+               break;
+       case 13:
+               regs->a3 = reg_value;
+               break;
+       case 14:
+               regs->a4 = reg_value;
+               break;
+       case 15:
+               regs->a5 = reg_value;
+               break;
+       case 16:
+               regs->a6 = reg_value;
+               break;
+       case 17:
+               regs->a7 = reg_value;
+               break;
+       case 18:
+               regs->s2 = reg_value;
+               break;
+       case 19:
+               regs->s3 = reg_value;
+               break;
+       case 20:
+               regs->s4 = reg_value;
+               break;
+       case 21:
+               regs->s5 = reg_value;
+               break;
+       case 22:
+               regs->s6 = reg_value;
+               break;
+       case 23:
+               regs->s7 = reg_value;
+               break;
+       case 24:
+               regs->s8 = reg_value;
+               break;
+       case 25:
+               regs->s9 = reg_value;
+               break;
+       case 26:
+               regs->s10 = reg_value;
+               break;
+       case 27:
+               regs->s11 = reg_value;
+               break;
+       case 28:
+               regs->t3 = reg_value;
+               break;
+       case 29:
+               regs->t4 = reg_value;
+               break;
+       case 30:
+               regs->t5 = reg_value;
+               break;
+       case 31:
+               regs->t6 = reg_value;
+               break;
+       default:
+               printf("Error reg_num=%d for %s\n", reg_num, __func__);
+               break;
+       }
+}
+
 static void _exit_trap(ulong code, ulong epc, ulong tval, struct pt_regs *regs)
 {
        static const char * const exception_code[] = {
@@ -140,6 +348,97 @@ static void _exit_trap(ulong code, ulong epc, ulong tval, 
struct pt_regs *regs)
                gd->arch.resume->code = code;
                longjmp(gd->arch.resume->jump, 1);
        }
+       if (IS_ENABLED(CONFIG_P8700_RISCV) && code == ILLEGAL_INSTRUCTION) {
+               // Fetch one 16-bit op at a time to deal with 16-bit alignment.
+               // FIXME! For the big-endian mode, we need to swap bytes.
+               unsigned short op0 = *(unsigned short *)epc;
+               unsigned short op1 = *((unsigned short *)epc + 1);
+               unsigned int opcode = (op1 << 16) | op0;
+               //int aqrl = (opcode & AQRL_MASK) >> AQRL_SHIFT;
+               int rs2 = (opcode & RS2_MASK) >> RS2_SHIFT;
+               int rs1 = (opcode & RS1_MASK) >> RS1_SHIFT;
+               int rd = (opcode & RD_MASK) >> RD_SHIFT;
+               ulong rs2_value = get_reg(regs, rs2);
+               ulong rs1_value = get_reg(regs, rs1);
+               ulong rd_value = 0;
+
+               switch (opcode & AMO_MASK) {
+               case AMOADD_D_MATCH:
+                       rd_value = atomic_add_d(rs1_value, rs2_value);
+                       set_reg(regs, rd, rd_value);
+
+               case AMOADD_W_MATCH:
+                       rd_value = atomic_add_w(rs1_value, rs2_value);
+                       set_reg(regs, rd, rd_value);
+
+               case AMOAND_D_MATCH:
+                       rd_value = atomic_and_d(rs1_value, rs2_value);
+                       set_reg(regs, rd, rd_value);
+
+               case AMOAND_W_MATCH:
+                       rd_value = atomic_and_w(rs1_value, rs2_value);
+                       set_reg(regs, rd, rd_value);
+
+               case AMOMAX_D_MATCH:
+                       rd_value = atomic_max_d(rs1_value, rs2_value);
+                       set_reg(regs, rd, rd_value);
+
+               case AMOMAX_W_MATCH:
+                       rd_value = atomic_max_w(rs1_value, rs2_value);
+                       set_reg(regs, rd, rd_value);
+
+               case AMOMAXU_D_MATCH:
+                       rd_value = atomic_maxu_d(rs1_value, rs2_value);
+                       set_reg(regs, rd, rd_value);
+
+               case AMOMAXU_W_MATCH:
+                       rd_value = atomic_maxu_w(rs1_value, rs2_value);
+                       set_reg(regs, rd, rd_value);
+
+               case AMOMIN_D_MATCH:
+                       rd_value = atomic_min_d(rs1_value, rs2_value);
+                       set_reg(regs, rd, rd_value);
+
+               case AMOMIN_W_MATCH:
+                       rd_value = atomic_min_w(rs1_value, rs2_value);
+                       set_reg(regs, rd, rd_value);
+
+               case AMOMINU_D_MATCH:
+                       rd_value = atomic_minu_d(rs1_value, rs2_value);
+                       set_reg(regs, rd, rd_value);
+
+               case AMOMINU_W_MATCH:
+                       rd_value = atomic_minu_w(rs1_value, rs2_value);
+                       set_reg(regs, rd, rd_value);
+
+               case AMOOR_D_MATCH:
+                       rd_value = atomic_or_d(rs1_value, rs2_value);
+                       set_reg(regs, rd, rd_value);
+
+               case AMOOR_W_MATCH:
+                       rd_value = atomic_or_w(rs1_value, rs2_value);
+                       set_reg(regs, rd, rd_value);
+
+               case AMOSWAP_D_MATCH:
+                       rd_value = atomic_swap_d(rs1_value, rs2_value);
+                       set_reg(regs, rd, rd_value);
+
+               case AMOSWAP_W_MATCH:
+                       rd_value = atomic_swap_w(rs1_value, rs2_value);
+                       set_reg(regs, rd, rd_value);
+
+               case AMOXOR_D_MATCH:
+                       rd_value = atomic_xor_d(rs1_value, rs2_value);
+                       set_reg(regs, rd, rd_value);
+
+               case AMOXOR_W_MATCH:
+                       rd_value = atomic_xor_w(rs1_value, rs2_value);
+                       set_reg(regs, rd, rd_value);
+
+               default:
+                       break;
+               }
+       }
 
        if (code < ARRAY_SIZE(exception_code))
                printf("Unhandled exception: %s\n", exception_code[code]);
diff --git a/include/interrupt.h b/include/interrupt.h
index 6ea28b54a56..5fc983afccb 100644
--- a/include/interrupt.h
+++ b/include/interrupt.h
@@ -43,3 +43,22 @@ struct resume_data {
  * Return:     0 before an exception, 1 after an exception occurred
  */
 void set_resume(struct resume_data *data);
+
+ulong atomic_swap_w(ulong val, ulong addr);
+ulong atomic_swap_d(ulong val, ulong addr);
+ulong atomic_add_w(ulong val, ulong addr);
+ulong atomic_add_d(ulong val, ulong addr);
+ulong atomic_and_w(ulong val, ulong addr);
+ulong atomic_and_d(ulong val, ulong addr);
+ulong atomic_or_w(ulong val, ulong addr);
+ulong atomic_or_d(ulong val, ulong addr);
+ulong atomic_xor_w(ulong val, ulong addr);
+ulong atomic_xor_d(ulong val, ulong addr);
+ulong atomic_max_w(ulong val, ulong addr);
+ulong atomic_max_d(ulong val, ulong addr);
+ulong atomic_maxu_w(ulong val, ulong addr);
+ulong atomic_maxu_d(ulong val, ulong addr);
+ulong atomic_min_w(ulong val, ulong addr);
+ulong atomic_min_d(ulong val, ulong addr);
+ulong atomic_minu_w(ulong val, ulong addr);
+ulong atomic_minu_d(ulong val, ulong addr);
-- 
2.34.1

Reply via email to