This middle-end has been merged:
https://github.com/gcc-mirror/gcc/commit/0d4dd7e07a879d6c07a33edb2799710faa95651e
With this patch, we can handle operations may trap on elements outside the loop.
These 2 following cases will be addressed by this patch:
1. integer division:
#define TEST_TYPE(TYPE) \
__attribute__((noipa)) \
void vrem_##TYPE (TYPE * __restrict dst, TYPE * __restrict a, TYPE *
__restrict b, int n) \
{ \
for (int i = 0; i < n; i++) \
dst[i] = a[i] % b[i]; \
}
#define TEST_ALL() \
TEST_TYPE(int8_t) \
TEST_ALL()
Before this patch:
vrem_int8_t:
ble a3,zero,.L14
csrr t4,vlenb
addiw a5,a3,-1
addiw a4,t4,-1
sext.w t5,a3
bltu a5,a4,.L10
csrr t3,vlenb
subw t3,t5,t3
li a5,0
vsetvli t6,zero,e8,m1,ta,ma
.L4:
add a6,a2,a5
add a7,a0,a5
add t1,a1,a5
mv a4,a5
add a5,a5,t4
vl1re8.v v2,0(a6)
vl1re8.v v1,0(t1)
sext.w a6,a5
vrem.vv v1,v1,v2
vs1r.v v1,0(a7)
bleu a6,t3,.L4
csrr a5,vlenb
addw a4,a4,a5
sext.w a5,a4
beq t5,a4,.L16
.L3:
csrr a6,vlenb
subw t5,t5,a4
srli a6,a6,1
addiw t1,t5,-1
addiw a7,a6,-1
bltu t1,a7,.L9
slli a4,a4,32
srli a4,a4,32
add t0,a1,a4
add t6,a2,a4
add a4,a0,a4
vsetvli a7,zero,e8,mf2,ta,ma
sext.w t3,a6
vle8.v v1,0(t0)
vle8.v v2,0(t6)
subw t4,t5,a6
vrem.vv v1,v1,v2
vse8.v v1,0(a4)
mv t1,t3
bltu t4,t3,.L7
csrr t1,vlenb
add a4,a4,a6
add t0,t0,a6
add t6,t6,a6
sext.w t1,t1
vle8.v v1,0(t0)
vle8.v v2,0(t6)
vrem.vv v1,v1,v2
vse8.v v1,0(a4)
.L7:
addw a5,t1,a5
beq t5,t1,.L14
.L9:
add a4,a1,a5
add a6,a2,a5
lb a6,0(a6)
lb a4,0(a4)
add a7,a0,a5
addi a5,a5,1
remw a4,a4,a6
sext.w a6,a5
sb a4,0(a7)
bgt a3,a6,.L9
.L14:
ret
.L10:
li a4,0
li a5,0
j .L3
.L16:
ret
After this patch:
vrem_int8_t:
ble a3,zero,.L5
.L3:
vsetvli a5,a3,e8,m1,tu,ma
vle8.v v1,0(a1)
vle8.v v2,0(a2)
sub a3,a3,a5
vrem.vv v1,v1,v2
vse8.v v1,0(a0)
add a1,a1,a5
add a2,a2,a5
add a0,a0,a5
bne a3,zero,.L3
.L5:
ret
2. Floating-point operation **WITHOUT** -ffast-math:
#define TEST_TYPE(TYPE) \
__attribute__((noipa)) \
void vadd_##TYPE (TYPE * __restrict dst, TYPE *__restrict a, TYPE
*__restrict b, int n) \
{ \
for (int i = 0; i < n; i++) \
dst[i] = a[i] + b[i]; \
}
#define TEST_ALL() \
TEST_TYPE(float) \
TEST_ALL()
Before this patch:
vadd_float:
ble a3,zero,.L10
csrr a4,vlenb
srli t3,a4,2
addiw a5,a3,-1
addiw a6,t3,-1
sext.w t6,a3
bltu a5,a6,.L7
subw t5,t6,t3
mv t1,a1
mv a7,a2
mv a6,a0
li a5,0
vsetvli t4,zero,e32,m1,ta,ma
.L4:
vl1re32.v v1,0(t1)
vl1re32.v v2,0(a7)
addw a5,a5,t3
vfadd.vv v1,v1,v2
vs1r.v v1,0(a6)
add t1,t1,a4
add a7,a7,a4
add a6,a6,a4
bgeu t5,a5,.L4
beq t6,a5,.L10
sext.w a5,a5
.L3:
slli a4,a5,2
.L6:
add a6,a1,a4
add a7,a2,a4
flw fa4,0(a6)
flw fa5,0(a7)
add a6,a0,a4
addiw a5,a5,1
fadd.s fa5,fa5,fa4
addi a4,a4,4
fsw fa5,0(a6)
bgt a3,a5,.L6
.L10:
ret
.L7:
li a5,0
j .L3
After this patch:
vadd_float:
ble a3,zero,.L5
.L3:
vsetvli a5,a3,e32,m1,tu,ma
slli a4,a5,2
vle32.v v1,0(a1)
vle32.v v2,0(a2)
sub a3,a3,a5
vfadd.vv v1,v1,v2
vse32.v v1,0(a0)
add a1,a1,a4
add a2,a2,a4
add a0,a0,a4
bne a3,zero,.L3
.L5:
ret
gcc/ChangeLog:
* config/riscv/autovec.md (cond_len_<optab><mode>): New pattern.
* config/riscv/riscv-protos.h (enum insn_type): New enum.
(expand_cond_len_binop): New function.
* config/riscv/riscv-v.cc (emit_nonvlmax_tu_insn): Ditto.
(emit_nonvlmax_fp_tu_insn): Ditto.
(need_fp_rounding_p): Ditto.
(expand_cond_len_binop): Ditto.
* config/riscv/riscv.cc (riscv_preferred_else_value): Ditto.
(TARGET_PREFERRED_ELSE_VALUE): New target hook.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/binop/vdiv-rv32gcv.c: Adapt testcase.
* gcc.target/riscv/rvv/autovec/binop/vdiv-rv64gcv.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vrem-rv32gcv.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vrem-rv64gcv.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vadd-run-nofm.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vadd-rv32gcv-nofm.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vadd-rv64gcv-nofm.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vdiv-run-nofm.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vdiv-rv32gcv-nofm.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vdiv-rv64gcv-nofm.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vmul-run-nofm.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv-nofm.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv-nofm.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vsub-run-nofm.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vsub-rv32gcv-nofm.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vsub-rv64gcv-nofm.c: New test.