From: Chao-ying Fu <c...@mips.com> This patch adds software emulation for atomic memory operations (AMO) instructions that may not be supported in hardware.
The `emu-amo.s` file provides assembly implementations of the aforementioned operations. Corresponding handler logic is integrated into the illegal instruction trap to catch and emulate unsupported AMO* instructions at runtime. Signed-off-by: Chao-ying Fu <c...@mips.com> Signed-off-by: Uros Stajic <uros.sta...@htecgroup.com> --- arch/riscv/cpu/p8700/Makefile | 1 + arch/riscv/cpu/p8700/emu-amo.S | 254 ++++++++++++++++++++++++++++ arch/riscv/lib/interrupts.c | 299 +++++++++++++++++++++++++++++++++ include/interrupt.h | 19 +++ 4 files changed, 573 insertions(+) create mode 100644 arch/riscv/cpu/p8700/emu-amo.S diff --git a/arch/riscv/cpu/p8700/Makefile b/arch/riscv/cpu/p8700/Makefile index ecdd232da6f..f9fcd20e2ab 100644 --- a/arch/riscv/cpu/p8700/Makefile +++ b/arch/riscv/cpu/p8700/Makefile @@ -5,3 +5,4 @@ obj-y += dram.o obj-y += cpu.o obj-y += cache.o +obj-y += emu-amo.o diff --git a/arch/riscv/cpu/p8700/emu-amo.S b/arch/riscv/cpu/p8700/emu-amo.S new file mode 100644 index 00000000000..b7005339939 --- /dev/null +++ b/arch/riscv/cpu/p8700/emu-amo.S @@ -0,0 +1,254 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (C) 2021, Chao-ying Fu <c...@mips.com> + */ + + .text + + .align 3 + .globl atomic_swap_w +atomic_swap_w: + lw a5,0(a0) + mv a4,a0 +2: mv a0,a5 +1: lr.w a5,(a4) + bne a5,a0,2b + sc.w a6,a1,(a4) + bnez a6,1b + ret + + .align 3 + .globl atomic_swap_d +atomic_swap_d: + ld a5,0(a0) + mv a4,a0 +2: mv a0,a5 +1: lr.d a5,(a4) + bne a5,a0,2b + sc.d a6,a1,(a4) + bnez a6,1b + ret + + .align 3 + .globl atomic_add_w +atomic_add_w: + lw a5,0(a0) + mv a4,a0 +2: mv a0,a5 + addw a3,a5,a1 +1: lr.w a5,(a4) + bne a5,a0,2b + sc.w a6,a3,(a4) + bnez a6,1b + ret + + .align 3 + .globl atomic_add_d +atomic_add_d: + ld a5,0(a0) + mv a4,a0 +2: mv a0,a5 + add a3,a5,a1 +1: lr.d a5,(a4) + bne a5,a0,2b + sc.d a6,a3,(a4) + bnez a6,1b + ret + + .align 3 + .globl atomic_and_w +atomic_and_w: + lw a5,0(a0) + mv a4,a0 +2: mv a0,a5 + and a3,a5,a1 +1: lr.w a5,(a4) + bne a5,a0,2b + sc.w a6,a3,(a4) + bnez a6,1b + ret + + .align 3 + .globl atomic_and_d +atomic_and_d: + ld a5,0(a0) + mv a4,a0 +2: mv a0,a5 + and a3,a5,a1 +1: lr.d a5,(a4) + bne a5,a0,2b + sc.d a6,a3,(a4) + bnez a6,1b + ret + + .align 3 + .globl atomic_or_w +atomic_or_w: + lw a5,0(a0) + mv a4,a0 +2: mv a0,a5 + or a3,a5,a1 +1: lr.w a5,(a4) + bne a5,a0,2b + sc.w a6,a3,(a4) + bnez a6,1b + ret + + .align 3 + .globl atomic_or_d +atomic_or_d: + ld a5,0(a0) + mv a4,a0 +2: mv a0,a5 + or a3,a5,a1 +1: lr.d a5,(a4) + bne a5,a0,2b + sc.d a6,a3,(a4) + bnez a6,1b + ret + + .align 3 + .globl atomic_xor_w +atomic_xor_w: + lw a5,0(a0) + mv a4,a0 +2: mv a0,a5 + xor a3,a5,a1 +1: lr.w a5,(a4) + bne a5,a0,2b + sc.w a6,a3,(a4) + bnez a6,1b + ret + + .align 3 + .globl atomic_xor_d +atomic_xor_d: + ld a5,0(a0) + mv a4,a0 +2: mv a0,a5 + xor a3,a5,a1 +1: lr.d a5,(a4) + bne a5,a0,2b + sc.d a6,a3,(a4) + bnez a6,1b + ret + + .align 3 + .globl atomic_max_w +atomic_max_w: + lw a5,0(a0) + mv a4,a0 +2: mv a0,a5 + mv a3,a5 + bge a5,a1,1f + mv a3,a1 +1: lr.w a5,(a4) + bne a5,a0,2b + sc.w a6,a3,(a4) + bnez a6,1b + ret + + .align 3 + .globl atomic_max_d +atomic_max_d: + ld a5,0(a0) + mv a4,a0 +2: mv a0,a5 + mv a3,a5 + bge a5,a1,1f + mv a3,a1 +1: lr.d a5,(a4) + bne a5,a0,2b + sc.d a6,a3,(a4) + bnez a6,1b + ret + + .align 3 + .globl atomic_maxu_w +atomic_maxu_w: + lw a5,0(a0) + mv a4,a0 +2: mv a0,a5 + mv a3,a5 + bgeu a5,a1,1f + mv a3,a1 +1: lr.w a5,(a4) + bne a5,a0,2b + sc.w a6,a3,(a4) + bnez a6,1b + ret + + .align 3 + .globl atomic_maxu_d +atomic_maxu_d: + ld a5,0(a0) + mv a4,a0 +2: mv a0,a5 + mv a3,a5 + bgeu a5,a1,1f + mv a3,a1 +1: lr.d a5,(a4) + bne a5,a0,2b + sc.d a6,a3,(a4) + bnez a6,1b + ret + + .align 3 + .globl atomic_min_w +atomic_min_w: + lw a5,0(a0) + mv a4,a0 +2: mv a0,a5 + mv a3,a5 + bge a1,a5,1f + mv a3,a1 +1: lr.w a5,(a4) + bne a5,a0,2b + sc.w a6,a3,(a4) + bnez a6,1b + ret + + .align 3 + .globl atomic_min_d +atomic_min_d: + ld a5,0(a0) + mv a4,a0 +2: mv a0,a5 + mv a3,a5 + bge a1,a5,1f + mv a3,a1 +1: lr.d a5,(a4) + bne a5,a0,2b + sc.d a6,a3,(a4) + bnez a6,1b + ret + + .align 3 + .globl atomic_minu_w +atomic_minu_w: + lw a5,0(a0) + mv a4,a0 +2: mv a0,a5 + mv a3,a5 + bgeu a1,a5,1f + mv a3,a1 +1: lr.w a5,(a4) + bne a5,a0,2b + sc.w a6,a3,(a4) + bnez a6,1b + ret + + .align 3 + .globl atomic_minu_d +atomic_minu_d: + ld a5,0(a0) + mv a4,a0 +2: mv a0,a5 + mv a3,a5 + bgeu a1,a5,1f + mv a3,a1 +1: lr.d a5,(a4) + bne a5,a0,2b + sc.d a6,a3,(a4) + bnez a6,1b + ret diff --git a/arch/riscv/lib/interrupts.c b/arch/riscv/lib/interrupts.c index ef1056eeb6f..906916c762f 100644 --- a/arch/riscv/lib/interrupts.c +++ b/arch/riscv/lib/interrupts.c @@ -22,6 +22,36 @@ DECLARE_GLOBAL_DATA_PTR; +#define ILLEGAL_INSTRUCTION 2 +#define AMO_MASK 0xf800707f +#define AQRL_MASK 0x06000000 +#define AQRL_SHIFT 25 +#define RS2_MASK 0x01f00000 +#define RS2_SHIFT 20 +#define RS1_MASK 0x000f8000 +#define RS1_SHIFT 15 +#define RD_MASK 0x00000f80 +#define RD_SHIFT 7 + +#define AMOADD_D_MATCH 0x0000302f +#define AMOADD_W_MATCH 0x0000202f +#define AMOAND_D_MATCH 0x6000302f +#define AMOAND_W_MATCH 0x6000202f +#define AMOMAX_D_MATCH 0xa000302f +#define AMOMAX_W_MATCH 0xa000202f +#define AMOMAXU_D_MATCH 0xe000302f +#define AMOMAXU_W_MATCH 0xe000202f +#define AMOMIN_D_MATCH 0x8000302f +#define AMOMIN_W_MATCH 0x8000202f +#define AMOMINU_D_MATCH 0xc000302f +#define AMOMINU_W_MATCH 0xc000202f +#define AMOOR_D_MATCH 0x4000302f +#define AMOOR_W_MATCH 0x4000202f +#define AMOSWAP_D_MATCH 0x0800302f +#define AMOSWAP_W_MATCH 0x0800202f +#define AMOXOR_D_MATCH 0x2000302f +#define AMOXOR_W_MATCH 0x2000202f + void set_resume(struct resume_data *data) { gd->arch.resume = data; @@ -115,6 +145,184 @@ static void show_code(ulong epc) printf("%04x%s", pos[i], i + 1 == len ? ")\n" : " "); } +static ulong get_reg(struct pt_regs *regs, int reg_num) +{ + switch (reg_num) { + case 0: + return 0; + case 1: + return regs->ra; + case 2: + return regs->sp; + case 3: + return regs->gp; + case 4: + return regs->tp; + case 5: + return regs->t0; + case 6: + return regs->t1; + case 7: + return regs->t2; + case 8: + return regs->s0; + case 9: + return regs->s1; + case 10: + return regs->a0; + case 11: + return regs->a1; + case 12: + return regs->a2; + case 13: + return regs->a3; + case 14: + return regs->a4; + case 15: + return regs->a5; + case 16: + return regs->a6; + case 17: + return regs->a7; + case 18: + return regs->s2; + case 19: + return regs->s3; + case 20: + return regs->s4; + case 21: + return regs->s5; + case 22: + return regs->s6; + case 23: + return regs->s7; + case 24: + return regs->s8; + case 25: + return regs->s9; + case 26: + return regs->s10; + case 27: + return regs->s11; + case 28: + return regs->t3; + case 29: + return regs->t4; + case 30: + return regs->t5; + case 31: + return regs->t6; + default: + printf("Error reg_num=%d for %s\n", reg_num, __func__); + break; + } + return 0; +} + +static void set_reg(struct pt_regs *regs, int reg_num, ulong reg_value) +{ + switch (reg_num) { + case 0: + break; + case 1: + regs->ra = reg_value; + break; + case 2: + regs->sp = reg_value; + break; + case 3: + regs->gp = reg_value; + break; + case 4: + regs->tp = reg_value; + break; + case 5: + regs->t0 = reg_value; + break; + case 6: + regs->t1 = reg_value; + break; + case 7: + regs->t2 = reg_value; + break; + case 8: + regs->s0 = reg_value; + break; + case 9: + regs->s1 = reg_value; + break; + case 10: + regs->a0 = reg_value; + break; + case 11: + regs->a1 = reg_value; + break; + case 12: + regs->a2 = reg_value; + break; + case 13: + regs->a3 = reg_value; + break; + case 14: + regs->a4 = reg_value; + break; + case 15: + regs->a5 = reg_value; + break; + case 16: + regs->a6 = reg_value; + break; + case 17: + regs->a7 = reg_value; + break; + case 18: + regs->s2 = reg_value; + break; + case 19: + regs->s3 = reg_value; + break; + case 20: + regs->s4 = reg_value; + break; + case 21: + regs->s5 = reg_value; + break; + case 22: + regs->s6 = reg_value; + break; + case 23: + regs->s7 = reg_value; + break; + case 24: + regs->s8 = reg_value; + break; + case 25: + regs->s9 = reg_value; + break; + case 26: + regs->s10 = reg_value; + break; + case 27: + regs->s11 = reg_value; + break; + case 28: + regs->t3 = reg_value; + break; + case 29: + regs->t4 = reg_value; + break; + case 30: + regs->t5 = reg_value; + break; + case 31: + regs->t6 = reg_value; + break; + default: + printf("Error reg_num=%d for %s\n", reg_num, __func__); + break; + } +} + static void _exit_trap(ulong code, ulong epc, ulong tval, struct pt_regs *regs) { static const char * const exception_code[] = { @@ -140,6 +348,97 @@ static void _exit_trap(ulong code, ulong epc, ulong tval, struct pt_regs *regs) gd->arch.resume->code = code; longjmp(gd->arch.resume->jump, 1); } + if (IS_ENABLED(CONFIG_P8700_RISCV) && code == ILLEGAL_INSTRUCTION) { + // Fetch one 16-bit op at a time to deal with 16-bit alignment. + // FIXME! For the big-endian mode, we need to swap bytes. + unsigned short op0 = *(unsigned short *)epc; + unsigned short op1 = *((unsigned short *)epc + 1); + unsigned int opcode = (op1 << 16) | op0; + //int aqrl = (opcode & AQRL_MASK) >> AQRL_SHIFT; + int rs2 = (opcode & RS2_MASK) >> RS2_SHIFT; + int rs1 = (opcode & RS1_MASK) >> RS1_SHIFT; + int rd = (opcode & RD_MASK) >> RD_SHIFT; + ulong rs2_value = get_reg(regs, rs2); + ulong rs1_value = get_reg(regs, rs1); + ulong rd_value = 0; + + switch (opcode & AMO_MASK) { + case AMOADD_D_MATCH: + rd_value = atomic_add_d(rs1_value, rs2_value); + set_reg(regs, rd, rd_value); + + case AMOADD_W_MATCH: + rd_value = atomic_add_w(rs1_value, rs2_value); + set_reg(regs, rd, rd_value); + + case AMOAND_D_MATCH: + rd_value = atomic_and_d(rs1_value, rs2_value); + set_reg(regs, rd, rd_value); + + case AMOAND_W_MATCH: + rd_value = atomic_and_w(rs1_value, rs2_value); + set_reg(regs, rd, rd_value); + + case AMOMAX_D_MATCH: + rd_value = atomic_max_d(rs1_value, rs2_value); + set_reg(regs, rd, rd_value); + + case AMOMAX_W_MATCH: + rd_value = atomic_max_w(rs1_value, rs2_value); + set_reg(regs, rd, rd_value); + + case AMOMAXU_D_MATCH: + rd_value = atomic_maxu_d(rs1_value, rs2_value); + set_reg(regs, rd, rd_value); + + case AMOMAXU_W_MATCH: + rd_value = atomic_maxu_w(rs1_value, rs2_value); + set_reg(regs, rd, rd_value); + + case AMOMIN_D_MATCH: + rd_value = atomic_min_d(rs1_value, rs2_value); + set_reg(regs, rd, rd_value); + + case AMOMIN_W_MATCH: + rd_value = atomic_min_w(rs1_value, rs2_value); + set_reg(regs, rd, rd_value); + + case AMOMINU_D_MATCH: + rd_value = atomic_minu_d(rs1_value, rs2_value); + set_reg(regs, rd, rd_value); + + case AMOMINU_W_MATCH: + rd_value = atomic_minu_w(rs1_value, rs2_value); + set_reg(regs, rd, rd_value); + + case AMOOR_D_MATCH: + rd_value = atomic_or_d(rs1_value, rs2_value); + set_reg(regs, rd, rd_value); + + case AMOOR_W_MATCH: + rd_value = atomic_or_w(rs1_value, rs2_value); + set_reg(regs, rd, rd_value); + + case AMOSWAP_D_MATCH: + rd_value = atomic_swap_d(rs1_value, rs2_value); + set_reg(regs, rd, rd_value); + + case AMOSWAP_W_MATCH: + rd_value = atomic_swap_w(rs1_value, rs2_value); + set_reg(regs, rd, rd_value); + + case AMOXOR_D_MATCH: + rd_value = atomic_xor_d(rs1_value, rs2_value); + set_reg(regs, rd, rd_value); + + case AMOXOR_W_MATCH: + rd_value = atomic_xor_w(rs1_value, rs2_value); + set_reg(regs, rd, rd_value); + + default: + break; + } + } if (code < ARRAY_SIZE(exception_code)) printf("Unhandled exception: %s\n", exception_code[code]); diff --git a/include/interrupt.h b/include/interrupt.h index 6ea28b54a56..5fc983afccb 100644 --- a/include/interrupt.h +++ b/include/interrupt.h @@ -43,3 +43,22 @@ struct resume_data { * Return: 0 before an exception, 1 after an exception occurred */ void set_resume(struct resume_data *data); + +ulong atomic_swap_w(ulong val, ulong addr); +ulong atomic_swap_d(ulong val, ulong addr); +ulong atomic_add_w(ulong val, ulong addr); +ulong atomic_add_d(ulong val, ulong addr); +ulong atomic_and_w(ulong val, ulong addr); +ulong atomic_and_d(ulong val, ulong addr); +ulong atomic_or_w(ulong val, ulong addr); +ulong atomic_or_d(ulong val, ulong addr); +ulong atomic_xor_w(ulong val, ulong addr); +ulong atomic_xor_d(ulong val, ulong addr); +ulong atomic_max_w(ulong val, ulong addr); +ulong atomic_max_d(ulong val, ulong addr); +ulong atomic_maxu_w(ulong val, ulong addr); +ulong atomic_maxu_d(ulong val, ulong addr); +ulong atomic_min_w(ulong val, ulong addr); +ulong atomic_min_d(ulong val, ulong addr); +ulong atomic_minu_w(ulong val, ulong addr); +ulong atomic_minu_d(ulong val, ulong addr); -- 2.34.1