https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111381
Bug ID: 111381 Summary: RISC-V: missed autovec MULH for signed * unsigned Product: gcc Version: 14.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: target Assignee: unassigned at gcc dot gnu.org Reporter: lehua.ding at rivai dot ai Target Milestone: --- For singed * signed or unsigned * unsigned, they can be convert to .MULH, but for singed * unsigned, it is failed. If the target support singed * unsigned, I think it can be convert to .MULH and expand to a sumul<mode>3_highpart pattern. https://godbolt.org/z/exrfYWdW9 C Cdoe: #include <riscv_vector.h> void foo6 (uint32_t* restrict a, uint32_t* restrict b, int* restrict pred, int n) { for (int i = 0; i < n; i += 1) a[i] = pred[i] ? (uint32_t)(((uint64_t)a[i] * (uint64_t)b[i]) >> 32) : a[i]; } void foo7 (int* restrict a, uint32_t* restrict b, int* restrict pred, int n) { for (int i = 0; i < n; i += 1) a[i] = pred[i] ? (int32_t)(((int64_t)a[i] * (uint64_t)b[i]) >> 32) : a[i]; } Optimized dump: ;; Function foo6 (foo6, funcdef_no=0, decl_uid=56325, cgraph_uid=1, symbol_order=0) Removing basic block 6 Removing basic block 7 Removing basic block 8 void foo6 (uint32_t * restrict a, uint32_t * restrict b, int * restrict pred, int n) { vector([4,4]) unsigned int * vectp_a.23; vector([4,4]) unsigned int vect_iftmp.22; vector([4,4]) unsigned int vect_patt_37.20; vector([4,4]) unsigned int vect__9.19; vector([4,4]) unsigned int * vectp_b.17; vector([4,4]) <signed-boolean:1> mask__38.16; vector([4,4]) unsigned int vect_pretmp_41.15; vector([4,4]) unsigned int * vectp_a.13; vector([4,4]) int vect__4.12; vector([4,4]) int * vectp_pred.10; unsigned long ivtmp_62; unsigned long _83; unsigned long ivtmp_84; unsigned long ivtmp_85; unsigned long _86; <bb 2> [local count: 118111600]: if (n_19(D) > 0) goto <bb 4>; [89.00%] else goto <bb 3>; [11.00%] <bb 3> [local count: 118111600]: return; <bb 4> [local count: 105119324]: _83 = (unsigned long) n_19(D); <bb 5> [local count: 955630224]: # vectp_pred.10_63 = PHI <vectp_pred.10_64(5), pred_20(D)(4)> # vectp_a.13_67 = PHI <vectp_a.13_68(5), a_21(D)(4)> # vectp_b.17_73 = PHI <vectp_b.17_74(5), b_23(D)(4)> # vectp_a.23_80 = PHI <vectp_a.23_81(5), a_21(D)(4)> # ivtmp_84 = PHI <ivtmp_85(5), _83(4)> _86 = .SELECT_VL (ivtmp_84, POLY_INT_CST [4, 4]); ivtmp_62 = _86 * 4; vect__4.12_65 = .MASK_LEN_LOAD (vectp_pred.10_63, 32B, { -1, ... }, _86, 0); vect_pretmp_41.15_69 = .MASK_LEN_LOAD (vectp_a.13_67, 32B, { -1, ... }, _86, 0); mask__38.16_71 = vect__4.12_65 != { 0, ... }; vect__9.19_75 = .MASK_LEN_LOAD (vectp_b.17_73, 32B, mask__38.16_71, _86, 0); vect_patt_37.20_76 = .MULH (vect_pretmp_41.15_69, vect__9.19_75); vect_iftmp.22_78 = .VCOND_MASK (mask__38.16_71, vect_patt_37.20_76, vect_pretmp_41.15_69); .MASK_LEN_STORE (vectp_a.23_80, 32B, { -1, ... }, _86, 0, vect_iftmp.22_78); vectp_pred.10_64 = vectp_pred.10_63 + ivtmp_62; vectp_a.13_68 = vectp_a.13_67 + ivtmp_62; vectp_b.17_74 = vectp_b.17_73 + ivtmp_62; vectp_a.23_81 = vectp_a.23_80 + ivtmp_62; ivtmp_85 = ivtmp_84 - _86; if (ivtmp_85 != 0) goto <bb 5>; [89.00%] else goto <bb 3>; [11.00%] } ;; Function foo7 (foo7, funcdef_no=1, decl_uid=56336, cgraph_uid=2, symbol_order=1) Removing basic block 6 Removing basic block 7 Removing basic block 8 void foo7 (int * restrict a, uint32_t * restrict b, int * restrict pred, int n) { vector([2,2]) int * vectp_a.49; vector([2,2]) int vect_iftmp.48; vector([2,2]) int vect_iftmp.47; vector([2,2]) long unsigned int vect__12.46; vector([2,2]) long unsigned int vect__11.45; vector([2,2]) long unsigned int vect__10.44; vector([2,2]) unsigned int vect__9.43; vector([2,2]) unsigned int * vectp_b.41; vector([2,2]) long unsigned int vect__7.40; vector([2,2]) <signed-boolean:1> mask__38.39; vector([2,2]) int vect_pretmp_41.38; vector([2,2]) int * vectp_a.36; vector([2,2]) int vect__4.35; vector([2,2]) int * vectp_pred.33; unsigned long ivtmp_56; unsigned long _80; unsigned long ivtmp_81; unsigned long ivtmp_82; unsigned long _83; <bb 2> [local count: 118111600]: if (n_19(D) > 0) goto <bb 4>; [89.00%] else goto <bb 3>; [11.00%] <bb 3> [local count: 118111600]: return; <bb 4> [local count: 105119324]: _80 = (unsigned long) n_19(D); <bb 5> [local count: 955630224]: # vectp_pred.33_57 = PHI <vectp_pred.33_58(5), pred_20(D)(4)> # vectp_a.36_61 = PHI <vectp_a.36_62(5), a_21(D)(4)> # vectp_b.41_68 = PHI <vectp_b.41_69(5), b_23(D)(4)> # vectp_a.49_77 = PHI <vectp_a.49_78(5), a_21(D)(4)> # ivtmp_81 = PHI <ivtmp_82(5), _80(4)> _83 = .SELECT_VL (ivtmp_81, POLY_INT_CST [2, 2]); ivtmp_56 = _83 * 4; vect__4.35_59 = .MASK_LEN_LOAD (vectp_pred.33_57, 32B, { -1, ... }, _83, 0); vect_pretmp_41.38_63 = .MASK_LEN_LOAD (vectp_a.36_61, 32B, { -1, ... }, _83, 0); mask__38.39_65 = vect__4.35_59 != { 0, ... }; vect__7.40_66 = (vector([2,2]) long unsigned int) vect_pretmp_41.38_63; vect__9.43_70 = .MASK_LEN_LOAD (vectp_b.41_68, 32B, mask__38.39_65, _83, 0); vect__10.44_71 = (vector([2,2]) long unsigned int) vect__9.43_70; vect__11.45_72 = vect__7.40_66 * vect__10.44_71; vect__12.46_73 = vect__11.45_72 >> 32; vect_iftmp.47_74 = (vector([2,2]) int) vect__12.46_73; vect_iftmp.48_75 = .VCOND_MASK (mask__38.39_65, vect_iftmp.47_74, vect_pretmp_41.38_63); .MASK_LEN_STORE (vectp_a.49_77, 32B, { -1, ... }, _83, 0, vect_iftmp.48_75); vectp_pred.33_58 = vectp_pred.33_57 + ivtmp_56; vectp_a.36_62 = vectp_a.36_61 + ivtmp_56; vectp_b.41_69 = vectp_b.41_68 + ivtmp_56; vectp_a.49_78 = vectp_a.49_77 + ivtmp_56; ivtmp_82 = ivtmp_81 - _83; if (ivtmp_82 != 0) goto <bb 5>; [89.00%] else goto <bb 3>; [11.00%] }