https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111381

            Bug ID: 111381
           Summary: RISC-V: missed autovec MULH for signed * unsigned
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: lehua.ding at rivai dot ai
  Target Milestone: ---

For singed * signed or unsigned * unsigned, they can be convert to .MULH, but
for singed * unsigned, it is failed. If the target support singed * unsigned, I
think it can be convert to .MULH and expand to a sumul<mode>3_highpart pattern.

https://godbolt.org/z/exrfYWdW9

C Cdoe:
#include <riscv_vector.h>

void foo6 (uint32_t* restrict a, uint32_t* restrict b, int* restrict pred, int
n)
{
    for (int i = 0; i < n; i += 1)
      a[i] = pred[i] ? (uint32_t)(((uint64_t)a[i] * (uint64_t)b[i]) >> 32) :
a[i];
}

void foo7 (int* restrict a, uint32_t* restrict b, int* restrict pred, int n)
{
    for (int i = 0; i < n; i += 1)
      a[i] = pred[i] ? (int32_t)(((int64_t)a[i] * (uint64_t)b[i]) >> 32) :
a[i];
}

Optimized dump:

;; Function foo6 (foo6, funcdef_no=0, decl_uid=56325, cgraph_uid=1,
symbol_order=0)

Removing basic block 6
Removing basic block 7
Removing basic block 8
void foo6 (uint32_t * restrict a, uint32_t * restrict b, int * restrict pred,
int n)
{
  vector([4,4]) unsigned int * vectp_a.23;
  vector([4,4]) unsigned int vect_iftmp.22;
  vector([4,4]) unsigned int vect_patt_37.20;
  vector([4,4]) unsigned int vect__9.19;
  vector([4,4]) unsigned int * vectp_b.17;
  vector([4,4]) <signed-boolean:1> mask__38.16;
  vector([4,4]) unsigned int vect_pretmp_41.15;
  vector([4,4]) unsigned int * vectp_a.13;
  vector([4,4]) int vect__4.12;
  vector([4,4]) int * vectp_pred.10;
  unsigned long ivtmp_62;
  unsigned long _83;
  unsigned long ivtmp_84;
  unsigned long ivtmp_85;
  unsigned long _86;

  <bb 2> [local count: 118111600]:
  if (n_19(D) > 0)
    goto <bb 4>; [89.00%]
  else
    goto <bb 3>; [11.00%]

  <bb 3> [local count: 118111600]:
  return;

  <bb 4> [local count: 105119324]:
  _83 = (unsigned long) n_19(D);

  <bb 5> [local count: 955630224]:
  # vectp_pred.10_63 = PHI <vectp_pred.10_64(5), pred_20(D)(4)>
  # vectp_a.13_67 = PHI <vectp_a.13_68(5), a_21(D)(4)>
  # vectp_b.17_73 = PHI <vectp_b.17_74(5), b_23(D)(4)>
  # vectp_a.23_80 = PHI <vectp_a.23_81(5), a_21(D)(4)>
  # ivtmp_84 = PHI <ivtmp_85(5), _83(4)>
  _86 = .SELECT_VL (ivtmp_84, POLY_INT_CST [4, 4]);
  ivtmp_62 = _86 * 4;
  vect__4.12_65 = .MASK_LEN_LOAD (vectp_pred.10_63, 32B, { -1, ... }, _86, 0);
  vect_pretmp_41.15_69 = .MASK_LEN_LOAD (vectp_a.13_67, 32B, { -1, ... }, _86,
0);
  mask__38.16_71 = vect__4.12_65 != { 0, ... };
  vect__9.19_75 = .MASK_LEN_LOAD (vectp_b.17_73, 32B, mask__38.16_71, _86, 0);
  vect_patt_37.20_76 = .MULH (vect_pretmp_41.15_69, vect__9.19_75);
  vect_iftmp.22_78 = .VCOND_MASK (mask__38.16_71, vect_patt_37.20_76,
vect_pretmp_41.15_69);
  .MASK_LEN_STORE (vectp_a.23_80, 32B, { -1, ... }, _86, 0, vect_iftmp.22_78);
  vectp_pred.10_64 = vectp_pred.10_63 + ivtmp_62;
  vectp_a.13_68 = vectp_a.13_67 + ivtmp_62;
  vectp_b.17_74 = vectp_b.17_73 + ivtmp_62;
  vectp_a.23_81 = vectp_a.23_80 + ivtmp_62;
  ivtmp_85 = ivtmp_84 - _86;
  if (ivtmp_85 != 0)
    goto <bb 5>; [89.00%]
  else
    goto <bb 3>; [11.00%]

}



;; Function foo7 (foo7, funcdef_no=1, decl_uid=56336, cgraph_uid=2,
symbol_order=1)

Removing basic block 6
Removing basic block 7
Removing basic block 8
void foo7 (int * restrict a, uint32_t * restrict b, int * restrict pred, int n)
{
  vector([2,2]) int * vectp_a.49;
  vector([2,2]) int vect_iftmp.48;
  vector([2,2]) int vect_iftmp.47;
  vector([2,2]) long unsigned int vect__12.46;
  vector([2,2]) long unsigned int vect__11.45;
  vector([2,2]) long unsigned int vect__10.44;
  vector([2,2]) unsigned int vect__9.43;
  vector([2,2]) unsigned int * vectp_b.41;
  vector([2,2]) long unsigned int vect__7.40;
  vector([2,2]) <signed-boolean:1> mask__38.39;
  vector([2,2]) int vect_pretmp_41.38;
  vector([2,2]) int * vectp_a.36;
  vector([2,2]) int vect__4.35;
  vector([2,2]) int * vectp_pred.33;
  unsigned long ivtmp_56;
  unsigned long _80;
  unsigned long ivtmp_81;
  unsigned long ivtmp_82;
  unsigned long _83;

  <bb 2> [local count: 118111600]:
  if (n_19(D) > 0)
    goto <bb 4>; [89.00%]
  else
    goto <bb 3>; [11.00%]

  <bb 3> [local count: 118111600]:
  return;

  <bb 4> [local count: 105119324]:
  _80 = (unsigned long) n_19(D);

  <bb 5> [local count: 955630224]:
  # vectp_pred.33_57 = PHI <vectp_pred.33_58(5), pred_20(D)(4)>
  # vectp_a.36_61 = PHI <vectp_a.36_62(5), a_21(D)(4)>
  # vectp_b.41_68 = PHI <vectp_b.41_69(5), b_23(D)(4)>
  # vectp_a.49_77 = PHI <vectp_a.49_78(5), a_21(D)(4)>
  # ivtmp_81 = PHI <ivtmp_82(5), _80(4)>
  _83 = .SELECT_VL (ivtmp_81, POLY_INT_CST [2, 2]);
  ivtmp_56 = _83 * 4;
  vect__4.35_59 = .MASK_LEN_LOAD (vectp_pred.33_57, 32B, { -1, ... }, _83, 0);
  vect_pretmp_41.38_63 = .MASK_LEN_LOAD (vectp_a.36_61, 32B, { -1, ... }, _83,
0);
  mask__38.39_65 = vect__4.35_59 != { 0, ... };
  vect__7.40_66 = (vector([2,2]) long unsigned int) vect_pretmp_41.38_63;
  vect__9.43_70 = .MASK_LEN_LOAD (vectp_b.41_68, 32B, mask__38.39_65, _83, 0);
  vect__10.44_71 = (vector([2,2]) long unsigned int) vect__9.43_70;
  vect__11.45_72 = vect__7.40_66 * vect__10.44_71;
  vect__12.46_73 = vect__11.45_72 >> 32;
  vect_iftmp.47_74 = (vector([2,2]) int) vect__12.46_73;
  vect_iftmp.48_75 = .VCOND_MASK (mask__38.39_65, vect_iftmp.47_74,
vect_pretmp_41.38_63);
  .MASK_LEN_STORE (vectp_a.49_77, 32B, { -1, ... }, _83, 0, vect_iftmp.48_75);
  vectp_pred.33_58 = vectp_pred.33_57 + ivtmp_56;
  vectp_a.36_62 = vectp_a.36_61 + ivtmp_56;
  vectp_b.41_69 = vectp_b.41_68 + ivtmp_56;
  vectp_a.49_78 = vectp_a.49_77 + ivtmp_56;
  ivtmp_82 = ivtmp_81 - _83;
  if (ivtmp_82 != 0)
    goto <bb 5>; [89.00%]
  else
    goto <bb 3>; [11.00%]

}

Reply via email to