在 2025/2/22 下午3:34, Xi Ruoyao 写道:
Now for __builtin_popcountl we are getting things like

        vrepli.b        $vr0,0
        vinsgr2vr.d     $vr0,$r4,0
        vpcnt.d $vr0,$vr0
        vpickve2gr.du   $r4,$vr0,0
        slli.w  $r4,$r4,0
        jr  $r1

The "vrepli.b" instruction is introduced by the init-regs pass (see
PR61810 and all the issues it references).  To work it around, we can
use post-reload instead of define_expand: the "f" constraint will make
the compiler automatically move the scalar between GPR and FPR, and
reload is much later than init-regs so init-regs won't get in our way.

Now the code looks like:

        movgr2fr.d      $f0,$r4
        vpcnt.d $vr0,$vr0
        movfr2gr.d      $r4,$f0
        jr  $r1

gcc/ChangeLog:

        * config/loongarch/loongarch.md (cntmap): Change to uppercase.
        (popcount<GPR:mode>2): Modify to a post reload split.
---

Bootstrapped and regtested on loongarch64-linux-gnu.  Ok for trunk?

I am currently optimizing the alignment with the code of r15-7684,

so should I submit the optimization patch for GCC16 stage1?


  gcc/config/loongarch/loongarch.md | 26 ++++++++++++++------------
  1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 6f507c3c7f6..478f859051c 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1732,21 +1732,23 @@ (define_insn "truncdfsf2"
;; This attribute used for get connection of scalar mode and corresponding
  ;; vector mode.
-(define_mode_attr cntmap [(SI "v4si") (DI "v2di")])
+(define_mode_attr cntmap [(SI "V4SI") (DI "V2DI")])
-(define_expand "popcount<mode>2"
-  [(set (match_operand:GPR 0 "register_operand")
-       (popcount:GPR (match_operand:GPR 1 "register_operand")))]
+(define_insn_and_split "popcount<mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "=f")
+       (popcount:GPR (match_operand:GPR 1 "register_operand" "f")))]
    "ISA_HAS_LSX"
+  "#"
+  ;; Do the split very lately to work around init-regs unneeded zero-
+  ;; initialization from init-regs.  See PR61810 and all the referenced
+  ;; issues.
+  "&& reload_completed"
+  [(set (match_operand:<cntmap> 0 "register_operand" "=f")
+       (popcount:<cntmap>
+         (match_operand:<cntmap> 1 "register_operand" "f")))]
  {
-  rtx in = operands[1];
-  rtx out = operands[0];
-  rtx vreg = <MODE>mode == SImode ? gen_reg_rtx (V4SImode) :
-                                   gen_reg_rtx (V2DImode);
-  emit_insn (gen_lsx_vinsgr2vr_<size> (vreg, in, vreg, GEN_INT (1)));
-  emit_insn (gen_popcount<cntmap>2 (vreg, vreg));
-  emit_insn (gen_lsx_vpickve2gr_<size> (out, vreg, GEN_INT (0)));
-  DONE;
+  operands[0] = gen_rtx_REG (<cntmap>mode, REGNO (operands[0]));
+  operands[1] = gen_rtx_REG (<cntmap>mode, REGNO (operands[1]));
  })
;;

Reply via email to