Re: Re: [PATCH v2] RISC-V:Auto vect for vector-bfloat16

wangf...@eswincomputing.com Tue, 12 Nov 2024 22:03:08 -0800
On 2024-11-13 07:30  Edwin Lu <e...@rivosinc.com> wrote:
>
>I took a look at the CI errors today since I remember Jeff checking the
>CI output. I don't remember if the errors were the main things blocking
>the patch or if there just wasn't any follow up. 
Juzhe suggested me to addd some run test case for this patch, but the run case 
depends on the qemu flag in the script.
So I will submmit a change for "march-to-cpu-opt" firstly and then update this 
patch for the test case.
At the same time, I will merg these case into vector test case, so this issue 
would not be haapened.
Thanks.
>
>I'll look into having the CI output some additional test log information
>to make understanding failures a lot more user friendly but for now, all
>of the (test for excess errors) are "error: 'for' loop initial
>declarations are only allowed in C99 or C11 mode".
>
>https://github.com/ewlu/gcc-precommit-ci/issues/2407#issuecomment-2421766160
>
>Edwin 
>
>On 10/18/2024 12:24 AM, Feng Wang wrote:
>> This patch add auto-vect patterns for vector-bfloat16 extension.
>> Similar to vector extensions, these patterns can use vector
>> BF16 instructions to optimize the automatic vectorization of for loops.
>> gcc/ChangeLog:
>>
>> * config/riscv/autovec-opt.md (*widen_bf16_fma<mode>):
>> Add vfwmacc auto-vect opt pattern for vector-bfloat16.
>> * config/riscv/vector-bfloat16.md (extend<v_fpwidetobf16_trunc><mode>2):
>> Add auto-vect pattern for Zvfbfmin extension.
>> (trunc<mode><v_fpwidetobf16_trunc>2): Ditto.
>> * config/riscv/vector-iterators.md:
>> Move vector-bfloat16 iterator definitions from vector-bfloat16.md.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c: New test.
>> * gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c: New test.
>> * gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c: New test.
>>
>> Signed-off-by: Feng Wang <wangf...@eswincomputing.com>
>> ---
>>   gcc/config/riscv/autovec-opt.md               |  23 ++++
>>   gcc/config/riscv/vector-bfloat16.md           | 116 +++++++++++++-----
>>   gcc/config/riscv/vector-iterators.md          |  32 +++++
>>   .../riscv/rvv/autovec/vfncvt-auto-vect.c      |  19 +++
>>   .../riscv/rvv/autovec/vfwcvt-auto-vect.c      |  19 +++
>>   .../riscv/rvv/autovec/vfwmacc-auto-vect.c     |  14 +++
>>   6 files changed, 195 insertions(+), 28 deletions(-)
>>   create mode 100644 
>>gcc/testsuite/gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c
>>   create mode 100644 
>>gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c
>>   create mode 100644 
>>gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c
>>
>> diff --git a/gcc/config/riscv/autovec-opt.md 
>> b/gcc/config/riscv/autovec-opt.md
>> index 4b33a145c17..0c6722601ff 100644
>> --- a/gcc/config/riscv/autovec-opt.md
>> +++ b/gcc/config/riscv/autovec-opt.md
>> @@ -1009,6 +1009,29 @@
>>     }
>>     [(set_attr "type" "vfwmuladd")])
>>  
>> +;; vfwmacc for vector_bfloat16
>> +(define_insn_and_split "*widen_bf16_fma<mode>"
>> +  [(set (match_operand:VWEXTF_ZVFBF 0 "register_operand")
>> +        (plus:VWEXTF_ZVFBF
>> +      (mult:VWEXTF_ZVFBF
>> +            (float_extend:VWEXTF_ZVFBF
>> +          (match_operand:<V_FPWIDETOBF16_TRUNC> 2 "register_operand"))
>> +            (float_extend:VWEXTF_ZVFBF
>> +          (match_operand:<V_FPWIDETOBF16_TRUNC> 3 "register_operand")))
>> +      (match_operand:VWEXTF_ZVFBF 1 "register_operand")))]
>> +  "TARGET_ZVFBFWMA && can_create_pseudo_p ()"
>> +  "#"
>> +  "&& 1"
>> +  [(const_int 0)]
>> +  {
>> +    rtx ops[] = {operands[0], operands[1], operands[2], operands[3]};
>> +    riscv_vector::emit_vlmax_insn (code_for_pred_widen_bf16_mul 
>> (<MODE>mode),
>> +       riscv_vector::WIDEN_TERNARY_OP_FRM_DYN, ops);
>> +    DONE;
>> +  }
>> +  [(set_attr "type" "vfwmaccbf16")
>> +   (set_attr "mode" "<MODE>")])
>> +
>>   ;; This combine pattern does not correspond to an single instruction.
>>   ;; This is a temporary pattern produced by a combine pass and if there
>>   ;; is no further combine into widen pattern, then fall back to extend
>> diff --git a/gcc/config/riscv/vector-bfloat16.md 
>> b/gcc/config/riscv/vector-bfloat16.md
>> index 562aa8ee5ed..90b174be2e7 100644
>> --- a/gcc/config/riscv/vector-bfloat16.md
>> +++ b/gcc/config/riscv/vector-bfloat16.md
>> @@ -17,26 +17,11 @@
>>   ;; along with GCC; see the file COPYING3.  If not see
>> ;; <http://www.gnu.org/licenses/>.
>>  
>> -(define_mode_iterator VWEXTF_ZVFBF [
>> -  (RVVM8SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
>> -  (RVVM4SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
>> -  (RVVM2SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
>> -  (RVVM1SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
>> -  (RVVMF2SF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32 && 
>> TARGET_MIN_VLEN > 32")
>> -])
>> -
>> -(define_mode_attr V_FP32TOBF16_TRUNC [
>> -  (RVVM8SF "RVVM4BF") (RVVM4SF "RVVM2BF") (RVVM2SF "RVVM1BF") (RVVM1SF 
>> "RVVMF2BF") (RVVMF2SF "RVVMF4BF")
>> -])
>> -
>> -(define_mode_attr VF32_SUBEL [
>> -   (RVVM8SF "BF") (RVVM4SF "BF") (RVVM2SF "BF") (RVVM1SF "BF") (RVVMF2SF 
>> "BF")])
>> -
>>   ;; Zvfbfmin extension
>>  
>>   (define_insn "@pred_trunc<mode>_to_bf16"
>> -  [(set (match_operand:<V_FP32TOBF16_TRUNC> 0 "register_operand"   "=vd, 
>> vd, vr, vr,  &vr,  &vr")
>> -     (if_then_else:<V_FP32TOBF16_TRUNC>
>> +  [(set (match_operand:<V_FPWIDETOBF16_TRUNC> 0 "register_operand"   "=vd, 
>> vd, vr, vr,  &vr,  &vr")
>> +     (if_then_else:<V_FPWIDETOBF16_TRUNC>
>>          (unspec:<VM>
>>            [(match_operand:<VM> 1 "vector_mask_operand"              " vm, 
>>vm,Wc1,Wc1,vmWc1,vmWc1")
>>             (match_operand 4 "vector_length_operand"                 " rK, 
>>rK, rK, rK,   rK,   rK")
>> @@ -47,13 +32,13 @@
>>             (reg:SI VL_REGNUM)
>>             (reg:SI VTYPE_REGNUM)
>>             (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
>> -       (float_truncate:<V_FP32TOBF16_TRUNC>
>> +       (float_truncate:<V_FPWIDETOBF16_TRUNC>
>>             (match_operand:VWEXTF_ZVFBF 3 "register_operand"          "  0,  
>>0,  0,  0,   vr,   vr"))
>> -       (match_operand:<V_FP32TOBF16_TRUNC> 2 "vector_merge_operand" " vu,  
>> 0, vu,  0,   vu,    0")))]
>> +       (match_operand:<V_FPWIDETOBF16_TRUNC> 2 "vector_merge_operand" " vu, 
>>  0, vu,  0,   vu,    0")))]
>>     "TARGET_ZVFBFMIN"
>>     "vfncvtbf16.f.f.w\t%0,%3%p1"
>>     [(set_attr "type" "vfncvtbf16")
>> -   (set_attr "mode" "<V_FP32TOBF16_TRUNC>")
>> +   (set_attr "mode" "<V_FPWIDETOBF16_TRUNC>")
>>      (set (attr "frm_mode")
>>   (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
>>  
>> @@ -69,12 +54,12 @@
>>            (reg:SI VL_REGNUM)
>>            (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
>>         (float_extend:VWEXTF_ZVFBF
>> -         (match_operand:<V_FP32TOBF16_TRUNC> 3 "register_operand" "   vr,   
>> vr"))
>> +         (match_operand:<V_FPWIDETOBF16_TRUNC> 3 "register_operand" "   vr, 
>>   vr"))
>>         (match_operand:VWEXTF_ZVFBF 2 "vector_merge_operand"        "   vu,  
>>  0")))]
>>     "TARGET_ZVFBFMIN"
>>     "vfwcvtbf16.f.f.v\t%0,%3%p1"
>>     [(set_attr "type" "vfwcvtbf16")
>> -   (set_attr "mode" "<V_FP32TOBF16_TRUNC>")])
>> +   (set_attr "mode" "<V_FPWIDETOBF16_TRUNC>")])
>>  
>>  
>>   (define_insn "@pred_widen_bf16_mul_<mode>"
>> @@ -93,15 +78,15 @@
>>         (plus:VWEXTF_ZVFBF
>>           (mult:VWEXTF_ZVFBF
>>             (float_extend:VWEXTF_ZVFBF
>> -            (match_operand:<V_FP32TOBF16_TRUNC> 3 "register_operand" "   
>> vr"))
>> +            (match_operand:<V_FPWIDETOBF16_TRUNC> 3 "register_operand" "   
>> vr"))
>>             (float_extend:VWEXTF_ZVFBF
>> -            (match_operand:<V_FP32TOBF16_TRUNC> 4 "register_operand" "   
>> vr")))
>> +            (match_operand:<V_FPWIDETOBF16_TRUNC> 4 "register_operand" "   
>> vr")))
>>           (match_operand:VWEXTF_ZVFBF 2 "register_operand"             "    
>>0"))
>>         (match_dup 2)))]
>>     "TARGET_ZVFBFWMA"
>>     "vfwmaccbf16.vv\t%0,%3,%4%p1"
>>     [(set_attr "type" "vfwmaccbf16")
>> -   (set_attr "mode" "<V_FP32TOBF16_TRUNC>")
>> +   (set_attr "mode" "<V_FPWIDETOBF16_TRUNC>")
>>      (set (attr "frm_mode")
>>   (symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
>>  
>> @@ -121,15 +106,90 @@
>>         (plus:VWEXTF_ZVFBF
>>           (mult:VWEXTF_ZVFBF
>>             (float_extend:VWEXTF_ZVFBF
>> -            (vec_duplicate:<V_FP32TOBF16_TRUNC>
>> +            (vec_duplicate:<V_FPWIDETOBF16_TRUNC>
>>                 (match_operand:<VF32_SUBEL> 3 "register_operand"       "    
>>f")))
>>             (float_extend:VWEXTF_ZVFBF
>> -            (match_operand:<V_FP32TOBF16_TRUNC> 4 "register_operand" "   
>> vr")))
>> +            (match_operand:<V_FPWIDETOBF16_TRUNC> 4 "register_operand" "   
>> vr")))
>>           (match_operand:VWEXTF_ZVFBF 2 "register_operand"             "    
>>0"))
>>         (match_dup 2)))]
>>     "TARGET_ZVFBFWMA"
>>     "vfwmaccbf16.vf\t%0,%3,%4%p1"
>>     [(set_attr "type" "vfwmaccbf16")
>> -   (set_attr "mode" "<V_FP32TOBF16_TRUNC>")
>> +   (set_attr "mode" "<V_FPWIDETOBF16_TRUNC>")
>>      (set (attr "frm_mode")
>>   (symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
>> +
>> +;; Auto vect pattern
>> +
>> +;; -------------------------------------------------------------------------
>> +;; ---- [BF16] Widening.
>> +;; -------------------------------------------------------------------------
>> +;; - vfwcvtbf16.f.f.v
>> +;; -------------------------------------------------------------------------
>> +(define_insn_and_split "extend<v_fpwidetobf16_trunc><mode>2"
>> +  [(set (match_operand:VWEXTF_ZVFBF 0 "register_operand" "=&vr")
>> +    (float_extend:VWEXTF_ZVFBF
>> +     (match_operand:<V_FPWIDETOBF16_TRUNC>  1 "register_operand" "  vr")))]
>> +  "TARGET_ZVFBFMIN && can_create_pseudo_p ()"
>> +  "#"
>> +  "&& 1"
>> +  [(const_int 0)]
>> +{
>> +  insn_code icode = code_for_pred_extend_bf16_to (<MODE>mode);
>> +  riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP, operands);
>> +  DONE;
>> +}
>> +  [(set_attr "type" "vfwcvtbf16")
>> +   (set_attr "mode" "<MODE>")])
>> +
>> +(define_expand "extend<v_fpwidetobf16_trunc><mode>2"
>> +  [(set (match_operand:VDF 0 "register_operand")
>> +    (float_extend:VDF
>> +     (match_operand:<V_FPWIDETOBF16_TRUNC> 1 "register_operand")))]
>> +  "TARGET_ZVFBFMIN"
>> +{
>> +  rtx dblw = gen_reg_rtx (<V_DOUBLE_TRUNC>mode);
>> +  emit_insn (gen_extend<v_fpwidetobf16_trunc><v_double_trunc>2 (dblw, 
>> operands[1]));
>> +  emit_insn (gen_extend<v_double_trunc><mode>2 (operands[0], dblw));
>> +  DONE;
>> +})
>> +
>> +;; -------------------------------------------------------------------------
>> +;; ---- [BF16] Narrowing.
>> +;; -------------------------------------------------------------------------
>> +;; - vfncvtbf16.f.f.w
>> +;; -------------------------------------------------------------------------
>> +(define_insn_and_split "trunc<mode><v_fpwidetobf16_trunc>2"
>> +  [(set (match_operand:<V_FPWIDETOBF16_TRUNC> 0 "register_operand" "=vr")
>> +    (float_truncate:<V_FPWIDETOBF16_TRUNC>
>> +     (match_operand:VSF 1 "register_operand"      " vr")))]
>> +  "TARGET_ZVFBFMIN && can_create_pseudo_p ()"
>> +  "#"
>> +  "&& 1"
>> +  [(const_int 0)]
>> +{
>> +  insn_code icode = code_for_pred_trunc_to_bf16 (<MODE>mode);
>> +  riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_DYN, 
>> operands);
>> +  DONE;
>> +}
>> +  [(set_attr "type" "vfncvtbf16")
>> +   (set_attr "mode" "<MODE>")])
>> +
>> +(define_expand "trunc<mode><v_fpwidetobf16_trunc>2"
>> +  [(set (match_operand:<V_FPWIDETOBF16_TRUNC> 0 "register_operand")
>> +    (float_truncate:<V_FPWIDETOBF16_TRUNC>
>> +     (match_operand:VDF 1 "register_operand")))]
>> +  "TARGET_ZVFBFMIN"
>> +{
>> +  rtx half = gen_reg_rtx (<V_DOUBLE_TRUNC>mode);
>> +  rtx opshalf[] = {half, operands[1]};
>> +
>> +  /* According to the RISC-V V Spec 13.19. we need to use
>> +     vfncvt.rod.f.f.w for all steps but the last.  */
>> +  insn_code icode = code_for_pred_rod_trunc (<MODE>mode);
>> +  riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP, opshalf);
>> +
>> +  emit_insn (gen_trunc<v_double_trunc><v_fpwidetobf16_trunc>2 (operands[0], 
>> half));
>> +  DONE;
>> +})
>> +
>> diff --git a/gcc/config/riscv/vector-iterators.md 
>> b/gcc/config/riscv/vector-iterators.md
>> index 43325d1ba87..a53c5233839 100644
>> --- a/gcc/config/riscv/vector-iterators.md
>> +++ b/gcc/config/riscv/vector-iterators.md
>> @@ -4512,3 +4512,35 @@
>>     (V256DF "v64df")
>>     (V512DF "v128df")
>>   ])
>> +
>> +;;vector bfloat16
>> +(define_mode_iterator VWEXTF_ZVFBF [
>> +  (RVVM8SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
>> +  (RVVM4SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
>> +  (RVVM2SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
>> +  (RVVM1SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
>> +  (RVVMF2SF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32 && 
>> TARGET_MIN_VLEN > 32")
>> +])
>> +
>> +(define_mode_iterator VSF [
>> +  (RVVM8SF "TARGET_VECTOR_ELEN_FP_32") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32") 
>> (RVVM2SF "TARGET_VECTOR_ELEN_FP_32")
>> +  (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 
>> && TARGET_MIN_VLEN > 32")
>> +])
>> +
>> +(define_mode_iterator VDF [
>> +  (RVVM8DF "TARGET_VECTOR_ELEN_FP_64") (RVVM4DF "TARGET_VECTOR_ELEN_FP_64")
>> +  (RVVM2DF "TARGET_VECTOR_ELEN_FP_64") (RVVM1DF "TARGET_VECTOR_ELEN_FP_64")
>> +])
>> +
>> +(define_mode_attr V_FPWIDETOBF16_TRUNC [
>> +  (RVVM8SF "RVVM4BF") (RVVM4SF "RVVM2BF") (RVVM2SF "RVVM1BF") (RVVM1SF 
>> "RVVMF2BF") (RVVMF2SF "RVVMF4BF")
>> +  (RVVM8DF "RVVM2BF") (RVVM4DF "RVVM1BF") (RVVM2DF "RVVMF2BF") (RVVM1DF 
>> "RVVMF4BF")
>> +])
>> +
>> +(define_mode_attr v_fpwidetobf16_trunc [
>> +  (RVVM8SF "rvvm4bf") (RVVM4SF "rvvm2bf") (RVVM2SF "rvvm1bf") (RVVM1SF 
>> "rvvmf2bf") (RVVMF2SF "rvvmf4bf")
>> +  (RVVM8DF "rvvm2bf") (RVVM4DF "rvvm1bf") (RVVM2DF "rvvmf2bf") (RVVM1DF 
>> "rvvmf4bf")
>> +])
>> +
>> +(define_mode_attr VF32_SUBEL [
>> +   (RVVM8SF "BF") (RVVM4SF "BF") (RVVM2SF "BF") (RVVM1SF "BF") (RVVMF2SF 
>> "BF")])
>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c 
>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c
>> new file mode 100644
>> index 00000000000..7ba3615ccf1
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c
>> @@ -0,0 +1,19 @@
>> +/* { dg-do compile } */
>> +/* { dg-additional-options "-march=rv32gcv_zvfbfmin -mabi=ilp32d" } */
>> +
>> +__attribute__((noipa))
>> +void vfncvt_float_BFloat16 (__bf16 *dst, float *a, int n)
>> +{
>> +  for (int i = 0; i < n; i++)
>> +    dst[i] = (__bf16)a[i];
>> +}
>> +
>> +__attribute__((noipa))
>> +void vfncvt_double_BFloat16 (__bf16 *dst, double *a, int n)
>> +{
>> +  for (int i = 0; i < n; i++)
>> +    dst[i] = (__bf16)a[i];
>> +}
>> +
>> +/* { dg-final { scan-assembler-times {\tvfncvtbf16\.f\.f\.w} 2 } } */
>> +/* { dg-final { scan-assembler-times {\tvfncvt\.rod\.f\.f\.w} 1 } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c 
>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c
>> new file mode 100644
>> index 00000000000..6629dd909a0
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c
>> @@ -0,0 +1,19 @@
>> +/* { dg-do compile } */
>> +/* { dg-additional-options "-march=rv32gcv_zvfbfmin -mabi=ilp32d" } */
>> +
>> +__attribute__((noipa))
>> +void vfwcvt__BFloat16float (float *dst, __bf16 *a, int n)
>> +{
>> +  for (int i = 0; i < n; i++)
>> +    dst[i] = (float)a[i];
>> +}
>> +
>> +__attribute__((noipa))
>> +void vfwcvt__BFloat16double (double *dst, __bf16 *a, int n)
>> +{
>> +  for (int i = 0; i < n; i++)
>> +    dst[i] = (double)a[i];
>> +}
>> +
>> +/* { dg-final { scan-assembler-times {\tvfwcvtbf16\.f\.f\.v} 2 } } */
>> +/* { dg-final { scan-assembler-times {\tvfwcvt\.f\.f\.v} 1 } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c 
>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c
>> new file mode 100644
>> index 00000000000..a767f2c8ef8
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c
>> @@ -0,0 +1,14 @@
>> +/* { dg-do compile } */
>> +/* { dg-additional-options "-march=rv32gcv_zvfbfwma -mabi=ilp32d 
>> -ffast-math" } */
>> +
>> +__attribute__ ((noipa))
>> +void vwmacc_float_bf16 (float *__restrict dst,
>> +    __bf16 *__restrict a,
>> +    __bf16 *__restrict b,
>> +    int n)
>> +{
>> +  for (int i = 0; i < n; i++)
>> +    dst[i] += (float) (a[i] * b[i]);
>> +}
>> +
>> +/* { dg-final { scan-assembler-times {\tvfwmaccbf16\.vv} 1 } } */
Re: Re: [PATCH v2] RISC-V:Auto vect for vector-bfloat16

Reply via email to