Re: [PATCH, RS6000 PR target/94954] Fix wrong codegen for vec_pack_to_short_fp32() builtin

David Edelsohn via Gcc-patches Fri, 12 Jun 2020 07:25:43 -0700

Hi, Will

On Fri, Jun 12, 2020 at 12:22 AM will schmidt <will_schm...@vnet.ibm.com> wrote:
>
>
> Hi,
>   Fix codegen implementation for the builtin vec_pack_to_short_fp32.
>
>   Regtests are underway against powerpc64 (power7be,power8le,power9le).
>   (this is a power9 builtin, so should be a noop for most of those).
>   OK for trunk and backports?
>
>   Thanks
>   -Will
>
>
>     [gcc]
>       target pr/94954
>
>         * config/rs6000/altivec.h (vec_pack_to_short_fp32):  Update.
>         * config/rs6000/altivec.md (UNSPEC_CONVERT_4F32_8F16):  New unspec.
>         (convert_4f32_8f16):  New define_expand.
>         * config/rs6000/rs6000-builtin.def (convert_4f32_8f16): New builtin 
> define
>         and overload.
>         * config/rs6000/rs6000-call.c (P9V_BUILTIN_VEC_CONVERT_4F32_8F16): New
>         overloaded builtin entry.
>         * config/rs6000/vsx.md (UNSPEC_VSX_XVCVSPHP): New unspec.
>         (vsx_xvcvsphp): New define_insn.
>
>     [testsuite]
>         * testsuite/gcc.target/powerpc/builtins-1-p9-runnable.c: Update.
>
> diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
> index 0a7e8ab..ab10025 100644
> --- a/gcc/config/rs6000/altivec.h
> +++ b/gcc/config/rs6000/altivec.h
> @@ -431,11 +431,11 @@
>  /* Vector additions added in ISA 3.0.  */
>  #define vec_first_match_index __builtin_vec_first_match_index
>  #define vec_first_match_or_eos_index __builtin_vec_first_match_or_eos_index
>  #define vec_first_mismatch_index __builtin_vec_first_mismatch_index
>  #define vec_first_mismatch_or_eos_index 
> __builtin_vec_first_mismatch_or_eos_index
> -#define vec_pack_to_short_fp32 __builtin_vec_convert_4f32_8i16
> +#define vec_pack_to_short_fp32 __builtin_vec_convert_4f32_8f16
>  #define vec_parity_lsbb __builtin_vec_vparity_lsbb
>  #define vec_vctz __builtin_vec_vctz
>  #define vec_cnttz __builtin_vec_vctz
>  #define vec_vctzb __builtin_vec_vctzb
>  #define vec_vctzd __builtin_vec_vctzd
> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
> index 159f24e..855e7cc 100644
> --- a/gcc/config/rs6000/altivec.md
> +++ b/gcc/config/rs6000/altivec.md
> @@ -78,10 +78,11 @@
>     UNSPEC_VUNPACK_HI_SIGN_DIRECT
>     UNSPEC_VUNPACK_LO_SIGN_DIRECT
>     UNSPEC_VUPKHPX
>     UNSPEC_VUPKLPX
>     UNSPEC_CONVERT_4F32_8I16
> +   UNSPEC_CONVERT_4F32_8F16
>     UNSPEC_DST
>     UNSPEC_DSTT
>     UNSPEC_DSTST
>     UNSPEC_DSTSTT
>     UNSPEC_LVSL
> @@ -3215,10 +3216,32 @@
>    emit_insn (gen_altivec_vctuxs (rtx_tmp_lo, operands[2], const0_rtx));
>    emit_insn (gen_altivec_vpkswss (operands[0], rtx_tmp_hi, rtx_tmp_lo));
>    DONE;
>  })
>
> +
> +;; Convert two vector F32 to packed vector f16.


Just for consistency, the comment above probably should be F16
(capitalized) above.

Why does this pattern use V8HI mode instead of V8HF if this is for
8F16 in contrast to the existing 8I16 pattern?  Other than that
(fundamental) question, the rest looks good.

Thanks, David

> +(define_expand "convert_4f32_8f16"
> +  [(set (match_operand:V8HI 0 "register_operand" "=v")
> +       (unspec:V8HI [(match_operand:V4SF 1 "register_operand" "v")
> +                     (match_operand:V4SF 2 "register_operand" "v")]
> +                    UNSPEC_CONVERT_4F32_8F16))]
> +  "TARGET_P9_VECTOR"
> +{
> +  rtx rtx_tmp_hi = gen_reg_rtx (V4SImode);
> +  rtx rtx_tmp_lo = gen_reg_rtx (V4SImode);
> +
> +  emit_insn (gen_vsx_xvcvsphp (rtx_tmp_hi, operands[1] ));
> +  emit_insn (gen_vsx_xvcvsphp (rtx_tmp_lo, operands[2] ));
> +  if (BYTES_BIG_ENDIAN)
> +    emit_insn (gen_altivec_vpkuwum (operands[0], rtx_tmp_lo, rtx_tmp_hi));
> +  else
> +    emit_insn (gen_altivec_vpkuwum (operands[0], rtx_tmp_hi, rtx_tmp_lo));
> +  DONE;
> +})
> +
> +
>  ;; Generate
>  ;;    xxlxor/vxor SCRATCH0,SCRATCH0,SCRATCH0
>  ;;    vsubu?m SCRATCH2,SCRATCH1,%1
>  ;;    vmaxs? %0,%1,SCRATCH2"
>  (define_expand "abs<mode>2"
> diff --git a/gcc/config/rs6000/rs6000-builtin.def 
> b/gcc/config/rs6000/rs6000-builtin.def
> index 8b1ddb0..47e9137 100644
> --- a/gcc/config/rs6000/rs6000-builtin.def
> +++ b/gcc/config/rs6000/rs6000-builtin.def
> @@ -2208,10 +2208,11 @@ BU_P8V_OVERLOAD_3 (VPERMXOR,   "vpermxor")
>
>  /* ISA 3.0 vector overloaded 2-argument functions. */
>  BU_P9V_AV_2 (VSLV,             "vslv",                 CONST, vslv)
>  BU_P9V_AV_2 (VSRV,             "vsrv",                 CONST, vsrv)
>  BU_P9V_AV_2 (CONVERT_4F32_8I16, "convert_4f32_8i16", CONST, 
> convert_4f32_8i16)
> +BU_P9V_AV_2 (CONVERT_4F32_8F16, "convert_4f32_8f16", CONST, 
> convert_4f32_8f16)
>
>  BU_P9V_AV_2 (VFIRSTMATCHINDEX_V16QI, "first_match_index_v16qi",
>              CONST, first_match_index_v16qi)
>  BU_P9V_AV_2 (VFIRSTMATCHINDEX_V8HI, "first_match_index_v8hi",
>              CONST, first_match_index_v8hi)
> @@ -2238,10 +2239,11 @@ BU_P9V_AV_2 (VFIRSTMISMATCHOREOSINDEX_V4SI, 
> "first_mismatch_or_eos_index_v4si",
>
>  /* ISA 3.0 vector overloaded 2-argument functions. */
>  BU_P9V_OVERLOAD_2 (VSLV,       "vslv")
>  BU_P9V_OVERLOAD_2 (VSRV,       "vsrv")
>  BU_P9V_OVERLOAD_2 (CONVERT_4F32_8I16, "convert_4f32_8i16")
> +BU_P9V_OVERLOAD_2 (CONVERT_4F32_8F16, "convert_4f32_8f16")
>
>  /* 2 argument vector functions added in ISA 3.0 (power9). */
>  BU_P9V_AV_2 (VADUB,            "vadub",                CONST,  vaduv16qi3)
>  BU_P9V_AV_2 (VADUH,            "vaduh",                CONST,  vaduv8hi3)
>  BU_P9V_AV_2 (VADUW,            "vaduw",                CONST,  vaduv4si3)
> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
> index 0ac8054..9708d7e 100644
> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -1975,10 +1975,12 @@ const struct altivec_builtin_types 
> altivec_overloaded_builtins[] = {
>    { P8V_BUILTIN_VEC_NEG, P8V_BUILTIN_NEG_V2DF,
>      RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
>
>    { P9V_BUILTIN_VEC_CONVERT_4F32_8I16, P9V_BUILTIN_CONVERT_4F32_8I16,
>      RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
> +  { P9V_BUILTIN_VEC_CONVERT_4F32_8F16, P9V_BUILTIN_CONVERT_4F32_8F16,
> +    RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
>
>    { P9V_BUILTIN_VEC_VFIRSTMATCHINDEX, P9V_BUILTIN_VFIRSTMATCHINDEX_V16QI,
>      RS6000_BTI_UINTSI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
>    { P9V_BUILTIN_VEC_VFIRSTMATCHINDEX, P9V_BUILTIN_VFIRSTMATCHINDEX_V16QI,
>      RS6000_BTI_UINTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 
> 0 },
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index 2a28215..38908a5 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -295,10 +295,11 @@
>     UNSPEC_VSX_DIVSD
>     UNSPEC_VSX_DIVUD
>     UNSPEC_VSX_MULSD
>     UNSPEC_VSX_SIGN_EXTEND
>     UNSPEC_VSX_XVCVSPSXDS
> +   UNSPEC_VSX_XVCVSPHP
>     UNSPEC_VSX_VSLO
>     UNSPEC_VSX_EXTRACT
>     UNSPEC_VSX_SXEXPDP
>     UNSPEC_VSX_SXSIG
>     UNSPEC_VSX_SIEXPDP
> @@ -2177,10 +2178,19 @@
>                      UNSPEC_VSX_CVHPSP))]
>    "TARGET_P9_VECTOR"
>    "xvcvhpsp %x0,%x1"
>    [(set_attr "type" "vecfloat")])
>
> +;; Generate xvcvsphp
> +(define_insn "vsx_xvcvsphp"
> +  [(set (match_operand:V4SI 0 "register_operand" "=wa")
> +        (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
> +                    UNSPEC_VSX_XVCVSPHP))]
> +  "TARGET_P9_VECTOR"
> +  "xvcvsphp %x0,%x1"
> +[(set_attr "type" "vecfloat")])
> +
>  ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
>  ;; format of scalars is actually DF.
>  (define_insn "vsx_xscvdpsp_scalar"
>    [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
>         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
> diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1-p9-runnable.c 
> b/gcc/testsuite/gcc.target/powerpc/builtins-1-p9-runnable.c
> index 0e4ab48..038ba44 100644
> --- a/gcc/testsuite/gcc.target/powerpc/builtins-1-p9-runnable.c
> +++ b/gcc/testsuite/gcc.target/powerpc/builtins-1-p9-runnable.c
> @@ -1,25 +1,37 @@
>  /* { dg-do run { target { powerpc*-*-linux* && { lp64 && p9vector_hw } } } } 
> */
>  /* { dg-require-effective-target powerpc_p9vector_ok } */
>  /* { dg-options "-O2 -mdejagnu-cpu=power9" } */
>
>  #include <altivec.h>
> +#include <stdio.h>
>
>  void abort (void);
>
>  int main() {
>    int i;
>    vector float vfa, vfb;
> -  vector unsigned short vur, vuexpt;
> +  vector unsigned short vur, vresult;
>
> -  vfa = (vector float){3.4, 5.0, 20.0, 50.9 };
> -  vfb = (vector float){10.0, 40.0, 70.0, 100.0 };
> -  vuexpt = (vector unsigned short){ 3, 5, 20, 50,
> -                                    10, 40, 70, 100};
> +  vfa = (vector float){0.4, 1.6, 20.0, 99.9 };
> +  vfb = (vector float){10.0, -2.0, 70.0, 999.0 };
>
> +  /* Results from converting those single-precion floats to half-precision 
> packed values. */
> +  vresult = (vector unsigned short) { 0x3666, 0x3e66, 0x4d00, 0x563e,
> +                                     0x4900, 0xc000, 0x5460, 0x63ce };
> +
> +  /* convert from single-precion float to unsigned short and pack */
>    vur = vec_pack_to_short_fp32 (vfa, vfb);
>
> +#ifdef DEBUG
> +  for(i = 0; i< 4; i++) { printf("i=[%d] %f  \n",i,vfa[i]); }
> +  for(i = 0; i< 4; i++) { printf("i=[%d] %f  \n",i+4,vfb[i]); }
> +  for(i = 0; i< 8; i++) { printf("i=[%d] %d  \n",i,vur[i]); }
> +#endif
> +
>    for(i = 0; i< 8; i++) {
> -    if (vur[i] != vuexpt[i])
> +    if (vur[i] != vresult[i]) {
> +           printf("i=[%d] 0x%x != 0x%x \n",i,vur[i],vresult[i]);
>        abort();
> +    }
>    }
>  }
>

Re: [PATCH, RS6000 PR target/94954] Fix wrong codegen for vec_pack_to_short_fp32() builtin

Reply via email to