在 2025/2/22 下午3:34, Xi Ruoyao 写道:
Now for __builtin_popcountl we are getting things like
vrepli.b $vr0,0
vinsgr2vr.d $vr0,$r4,0
vpcnt.d $vr0,$vr0
vpickve2gr.du $r4,$vr0,0
slli.w $r4,$r4,0
jr $r1
The "vrepli.b" instruction is introduced by the init-regs pass (see
PR61810 and all the issues it references). To work it around, we can
use post-reload instead of define_expand: the "f" constraint will make
the compiler automatically move the scalar between GPR and FPR, and
reload is much later than init-regs so init-regs won't get in our way.
Now the code looks like:
movgr2fr.d $f0,$r4
vpcnt.d $vr0,$vr0
movfr2gr.d $r4,$f0
jr $r1
gcc/ChangeLog:
* config/loongarch/loongarch.md (cntmap): Change to uppercase.
(popcount<GPR:mode>2): Modify to a post reload split.
---
Bootstrapped and regtested on loongarch64-linux-gnu. Ok for trunk?
I am currently optimizing the alignment with the code of r15-7684,
so should I submit the optimization patch for GCC16 stage1?
gcc/config/loongarch/loongarch.md | 26 ++++++++++++++------------
1 file changed, 14 insertions(+), 12 deletions(-)
diff --git a/gcc/config/loongarch/loongarch.md
b/gcc/config/loongarch/loongarch.md
index 6f507c3c7f6..478f859051c 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1732,21 +1732,23 @@ (define_insn "truncdfsf2"
;; This attribute used for get connection of scalar mode and corresponding
;; vector mode.
-(define_mode_attr cntmap [(SI "v4si") (DI "v2di")])
+(define_mode_attr cntmap [(SI "V4SI") (DI "V2DI")])
-(define_expand "popcount<mode>2"
- [(set (match_operand:GPR 0 "register_operand")
- (popcount:GPR (match_operand:GPR 1 "register_operand")))]
+(define_insn_and_split "popcount<mode>2"
+ [(set (match_operand:GPR 0 "register_operand" "=f")
+ (popcount:GPR (match_operand:GPR 1 "register_operand" "f")))]
"ISA_HAS_LSX"
+ "#"
+ ;; Do the split very lately to work around init-regs unneeded zero-
+ ;; initialization from init-regs. See PR61810 and all the referenced
+ ;; issues.
+ "&& reload_completed"
+ [(set (match_operand:<cntmap> 0 "register_operand" "=f")
+ (popcount:<cntmap>
+ (match_operand:<cntmap> 1 "register_operand" "f")))]
{
- rtx in = operands[1];
- rtx out = operands[0];
- rtx vreg = <MODE>mode == SImode ? gen_reg_rtx (V4SImode) :
- gen_reg_rtx (V2DImode);
- emit_insn (gen_lsx_vinsgr2vr_<size> (vreg, in, vreg, GEN_INT (1)));
- emit_insn (gen_popcount<cntmap>2 (vreg, vreg));
- emit_insn (gen_lsx_vpickve2gr_<size> (out, vreg, GEN_INT (0)));
- DONE;
+ operands[0] = gen_rtx_REG (<cntmap>mode, REGNO (operands[0]));
+ operands[1] = gen_rtx_REG (<cntmap>mode, REGNO (operands[1]));
})
;;