Ping! please review.
Thanks & Regards Jeevitha On 09/09/25 10:36 am, jeevitha wrote: > Ping! > > please review. > > Thanks & Regards > Jeevitha > > On 26/08/25 6:42 pm, jeevitha wrote: >> >> Hi All, >> >> The following patch has been bootstrapped and regtested on powerpc64le-linux. >> >> Previously, vec_slo/vec_sll always default to V4SI, inserting unwanted >> VIEW_CONVERT_EXPR int casts. This caused widening of char/short vectors, >> constants >> exceeding vspltisb/xxspltib range. >> >> For example: >> vui8_t vra, tmp; >> _2 = VIEW_CONVERT_EXPR<__vector signed int>(vra); >> _3 = VIEW_CONVERT_EXPR<__vector signed int>(tmp); >> _4 = __builtin_altivec_vslo_v16qi(_2, _3); >> >> With this patch, vec_slo/vec_sll now select the correct vector type based on >> their arguments. For example: >> >> vui8_t vra, tmp; >> _2 = VIEW_CONVERT_EXPR<__vector signed char>(vra); >> _3 = VIEW_CONVERT_EXPR<__vector signed char>(tmp); >> _4 = __builtin_altivec_vslo_v16qi(_2, _3); >> >> This ensures proper handling across all supported modes (V16QI, V8HI, V4SI, >> V2DI, V1TI, V4SF). Mode-specific builtins for vsl and vslo were added to >> avoid >> unnecessary casting. >> >> 2025-08-26 Jeevitha Palanisamy <[email protected]> >> >> gcc/ >> PR target/118480 >> PR target/117818 >> * config/rs6000/altivec.md (altivec_vslo_<mode>): New define_insn. >> (altivec_vsl_<mode>): New define_insn. >> * config/rs6000/rs6000-builtins.def: Add builtins for vsl/vslo with >> mode-specific support. >> * config/rs6000/rs6000-overload.def: Update vec_sll/vec_slo overloads >> to use new mode-specific variants. >> >> gcc/testsuite/ >> PR target/118480 >> PR target/117818 >> * gcc.target/powerpc/pr118480-3.c: New test. >> * gcc.target/powerpc/pr117818-1.c: New test. >> >> >> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md >> index 7edc288a656..c11acd30870 100644 >> --- a/gcc/config/rs6000/altivec.md >> +++ b/gcc/config/rs6000/altivec.md >> @@ -54,6 +54,7 @@ >> UNSPEC_VPACK_UNS_UNS_MOD >> UNSPEC_VPACK_UNS_UNS_MOD_DIRECT >> UNSPEC_VREVEV >> + UNSPEC_VSL >> UNSPEC_VSLV4SI >> UNSPEC_VSLO >> UNSPEC_VSR >> @@ -2071,6 +2072,15 @@ >> "vrlqnm %0,%1,%2" >> [(set_attr "type" "veclogical")]) >> >> +(define_insn "altivec_vsl_<mode>" >> + [(set (match_operand:VSX_MM 0 "register_operand" "=v") >> + (unspec:VSX_MM [(match_operand:VSX_MM 1 "register_operand" "v") >> + (match_operand:V16QI 2 "register_operand" "v")] >> + UNSPEC_VSL))] >> + "TARGET_ALTIVEC" >> + "vsl %0,%1,%2" >> + [(set_attr "type" "vecperm")]) >> + >> (define_insn "altivec_vsl" >> [(set (match_operand:V4SI 0 "register_operand" "=v") >> (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") >> @@ -2080,11 +2090,11 @@ >> "vsl %0,%1,%2" >> [(set_attr "type" "vecperm")]) >> >> -(define_insn "altivec_vslo" >> - [(set (match_operand:V4SI 0 "register_operand" "=v") >> - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") >> - (match_operand:V4SI 2 "register_operand" "v")] >> - UNSPEC_VSLO))] >> +(define_insn "altivec_vslo_<mode>" >> + [(set (match_operand:VM 0 "register_operand" "=v") >> + (unspec:VM [(match_operand:VM 1 "register_operand" "v") >> + (match_operand:V16QI 2 "register_operand" "v")] >> + UNSPEC_VSLO))] >> "TARGET_ALTIVEC" >> "vslo %0,%1,%2" >> [(set_attr "type" "vecperm")]) >> diff --git a/gcc/config/rs6000/rs6000-builtins.def >> b/gcc/config/rs6000/rs6000-builtins.def >> index 555d7d58950..c7622f11816 100644 >> --- a/gcc/config/rs6000/rs6000-builtins.def >> +++ b/gcc/config/rs6000/rs6000-builtins.def >> @@ -948,6 +948,21 @@ >> const vsi __builtin_altivec_vsl (vsi, vsi); >> VSL altivec_vsl {} >> >> + const vsc __builtin_altivec_vsl_v16qi (vsc, vsc); >> + VSL_16QI altivec_vsl_v16qi {} >> + >> + const vss __builtin_altivec_vsl_v8hi (vss, vsc); >> + VSL_8HI altivec_vsl_v8hi {} >> + >> + const vsi __builtin_altivec_vsl_v4si (vsi, vsc); >> + VSL_4SI altivec_vsl_v4si {} >> + >> + const vsll __builtin_altivec_vsl_v2di (vsll, vsc); >> + VSL_2DI altivec_vsl_v2di {} >> + >> + const vsq __builtin_altivec_vsl_v1ti (vsq, vsc); >> + VSL_1TI altivec_vsl_v1ti {} >> + >> const vsc __builtin_altivec_vslb (vsc, vuc); >> VSLB vashlv16qi3 {} >> >> @@ -969,8 +984,23 @@ >> const vss __builtin_altivec_vslh (vss, vus); >> VSLH vashlv8hi3 {} >> >> - const vsi __builtin_altivec_vslo (vsi, vsi); >> - VSLO altivec_vslo {} >> + const vsc __builtin_altivec_vslo_v16qi (vsc, vsc); >> + VSLO_16QI altivec_vslo_v16qi {} >> + >> + const vss __builtin_altivec_vslo_v8hi (vss, vsc); >> + VSLO_8HI altivec_vslo_v8hi {} >> + >> + const vf __builtin_altivec_vslo_v4sf (vf, vsc); >> + VSLO_4SF altivec_vslo_v4sf {} >> + >> + const vsi __builtin_altivec_vslo_v4si (vsi, vsc); >> + VSLO_4SI altivec_vslo_v4si {} >> + >> + const vsll __builtin_altivec_vslo_v2di (vsll, vsc); >> + VSLO_2DI altivec_vslo_v2di {} >> + >> + const vsq __builtin_altivec_vslo_v1ti (vsq, vsc); >> + VSLO_1TI altivec_vslo_v1ti {} >> >> const vsi __builtin_altivec_vslw (vsi, vui); >> VSLW vashlv4si3 {} >> diff --git a/gcc/config/rs6000/rs6000-overload.def >> b/gcc/config/rs6000/rs6000-overload.def >> index b4266c54464..62a29b9ce03 100644 >> --- a/gcc/config/rs6000/rs6000-overload.def >> +++ b/gcc/config/rs6000/rs6000-overload.def >> @@ -3454,27 +3454,27 @@ >> >> [VEC_SLL, vec_sll, __builtin_vec_sll] >> vsc __builtin_vec_sll (vsc, vuc); >> - VSL VSL_VSC >> + VSL_16QI VSL_VSC >> vuc __builtin_vec_sll (vuc, vuc); >> - VSL VSL_VUC >> + VSL_16QI VSL_VUC >> vss __builtin_vec_sll (vss, vuc); >> - VSL VSL_VSS >> + VSL_8HI VSL_VSS >> vus __builtin_vec_sll (vus, vuc); >> - VSL VSL_VUS >> + VSL_8HI VSL_VUS >> vp __builtin_vec_sll (vp, vuc); >> - VSL VSL_VP >> + VSL_8HI VSL_VP >> vsi __builtin_vec_sll (vsi, vuc); >> - VSL VSL_VSI >> + VSL_4SI VSL_VSI >> vui __builtin_vec_sll (vui, vuc); >> - VSL VSL_VUI >> + VSL_4SI VSL_VUI >> vsll __builtin_vec_sll (vsll, vuc); >> - VSL VSL_VSLL >> + VSL_2DI VSL_VSLL >> vull __builtin_vec_sll (vull, vuc); >> - VSL VSL_VULL >> + VSL_2DI VSL_VULL >> vsq __builtin_vec_sll (vsq, vuc); >> - VSL VSL_VSQ >> + VSL_1TI VSL_VSQ >> vuq __builtin_vec_sll (vuq, vuc); >> - VSL VSL_VUQ >> + VSL_1TI VSL_VUQ >> ; The following variants are deprecated. >> vsc __builtin_vec_sll (vsc, vus); >> VSL VSL_VSC_VUS >> @@ -3531,53 +3531,53 @@ >> >> [VEC_SLO, vec_slo, __builtin_vec_slo] >> vsc __builtin_vec_slo (vsc, vsc); >> - VSLO VSLO_VSCS >> + VSLO_16QI VSLO_VSCS >> vsc __builtin_vec_slo (vsc, vuc); >> - VSLO VSLO_VSCU >> + VSLO_16QI VSLO_VSCU >> vuc __builtin_vec_slo (vuc, vsc); >> - VSLO VSLO_VUCS >> + VSLO_16QI VSLO_VUCS >> vuc __builtin_vec_slo (vuc, vuc); >> - VSLO VSLO_VUCU >> + VSLO_16QI VSLO_VUCU >> vss __builtin_vec_slo (vss, vsc); >> - VSLO VSLO_VSSS >> + VSLO_8HI VSLO_VSSS >> vss __builtin_vec_slo (vss, vuc); >> - VSLO VSLO_VSSU >> + VSLO_8HI VSLO_VSSU >> vus __builtin_vec_slo (vus, vsc); >> - VSLO VSLO_VUSS >> + VSLO_8HI VSLO_VUSS >> vus __builtin_vec_slo (vus, vuc); >> - VSLO VSLO_VUSU >> + VSLO_8HI VSLO_VUSU >> vp __builtin_vec_slo (vp, vsc); >> - VSLO VSLO_VPS >> + VSLO_8HI VSLO_VPS >> vp __builtin_vec_slo (vp, vuc); >> - VSLO VSLO_VPU >> + VSLO_8HI VSLO_VPU >> vsi __builtin_vec_slo (vsi, vsc); >> - VSLO VSLO_VSIS >> + VSLO_4SI VSLO_VSIS >> vsi __builtin_vec_slo (vsi, vuc); >> - VSLO VSLO_VSIU >> + VSLO_4SI VSLO_VSIU >> vui __builtin_vec_slo (vui, vsc); >> - VSLO VSLO_VUIS >> + VSLO_4SI VSLO_VUIS >> vui __builtin_vec_slo (vui, vuc); >> - VSLO VSLO_VUIU >> + VSLO_4SI VSLO_VUIU >> vsll __builtin_vec_slo (vsll, vsc); >> - VSLO VSLO_VSLLS >> + VSLO_2DI VSLO_VSLLS >> vsll __builtin_vec_slo (vsll, vuc); >> - VSLO VSLO_VSLLU >> + VSLO_2DI VSLO_VSLLU >> vull __builtin_vec_slo (vull, vsc); >> - VSLO VSLO_VULLS >> + VSLO_2DI VSLO_VULLS >> vull __builtin_vec_slo (vull, vuc); >> - VSLO VSLO_VULLU >> + VSLO_2DI VSLO_VULLU >> vf __builtin_vec_slo (vf, vsc); >> - VSLO VSLO_VFS >> + VSLO_4SF VSLO_VFS >> vf __builtin_vec_slo (vf, vuc); >> - VSLO VSLO_VFU >> + VSLO_4SF VSLO_VFU >> vsq __builtin_vec_slo (vsq, vsc); >> - VSLO VSLDO_VSQS >> + VSLO_1TI VSLDO_VSQS >> vsq __builtin_vec_slo (vsq, vuc); >> - VSLO VSLDO_VSQU >> + VSLO_1TI VSLDO_VSQU >> vuq __builtin_vec_slo (vuq, vsc); >> - VSLO VSLDO_VUQS >> + VSLO_1TI VSLDO_VUQS >> vuq __builtin_vec_slo (vuq, vuc); >> - VSLO VSLDO_VUQU >> + VSLO_1TI VSLDO_VUQU >> >> [VEC_SLV, vec_slv, __builtin_vec_vslv] >> vuc __builtin_vec_vslv (vuc, vuc); >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr117818-1.c >> b/gcc/testsuite/gcc.target/powerpc/pr117818-1.c >> new file mode 100644 >> index 00000000000..e0e8b6701e4 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/pr117818-1.c >> @@ -0,0 +1,33 @@ >> +/* { dg-do compile { target lp64 } } */ >> +/* { dg-options "-mdejagnu-cpu=power8 -mvsx -O2" } */ >> + >> +#include <altivec.h> >> + >> +typedef vector unsigned char vui8_t; >> + >> +vui8_t >> +test_splat1 (vui8_t vra) >> +{ >> + vui8_t result; >> + vui8_t tmp = vec_splat_u8(-9); /* VSPLTISB */ >> + tmp = vec_add (tmp, tmp); /* VADDUBM */ >> + result = vec_slo ((vui8_t) vra, tmp); /* VSLO */ >> + return (vui8_t) vec_sll (result, tmp); /* VSL */ >> +} >> + >> +vui8_t >> +test_splat2 (vui8_t vra) >> +{ >> + vui8_t result; >> + vui8_t tmp = vec_splat_u8(9); /* VSPLTISB */ >> + tmp = vec_add (tmp, tmp); /* VADDUBM */ >> + result = vec_slo ((vui8_t) vra, tmp); /* VSLO */ >> + return (vui8_t) vec_sll (result, tmp); /* VSLO */ >> +} >> + >> +/* { dg-final { scan-assembler-times {\mvspltisb\M} 2 } } */ >> +/* { dg-final { scan-assembler-times {\mvaddubm\M} 2 } } */ >> +/* { dg-final { scan-assembler-times {\mvslo\M} 2 } } */ >> +/* { dg-final { scan-assembler-times {\mvsl\M} 2 } } */ >> +/* { dg-final { scan-assembler-not {\mlvx?\M} } } */ >> +/* { dg-final { scan-assembler-not {\mvadduwm\M} } } */ >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr118480-3.c >> b/gcc/testsuite/gcc.target/powerpc/pr118480-3.c >> new file mode 100644 >> index 00000000000..37388cf944a >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/pr118480-3.c >> @@ -0,0 +1,39 @@ >> +/* { dg-do compile { target lp64 } } */ >> +/* { dg-options "-mdejagnu-cpu=power9 -mvsx -O2" } */ >> + >> +#include <altivec.h> >> + >> +typedef vector unsigned char vui8_t; >> + >> +vui8_t >> +test_slqi_char_18_V3 (vui8_t vra) >> +{ >> + vui8_t result; >> + vui8_t tmp = vec_splats((unsigned char)18); /* XXSPLTIB */ >> + result = vec_vslo ((vui8_t) vra, tmp); /* VSLO */ >> + return vec_vsl (result, tmp); /* VSL */ >> +} >> + >> +vui8_t >> +test_slqi_char_116_V3 (vui8_t vra) >> +{ >> + vui8_t result; >> + vui8_t tmp = vec_splats((unsigned char)116); /* XXSPLTIB */ >> + result = vec_slo (vra, tmp); /* VSLO */ >> + return vec_sll (result, tmp); /* VSL */ >> +} >> + >> +vui8_t >> +test_slqi_char_116_V0 (vui8_t vra) >> +{ >> + vui8_t result; >> + vui8_t tmp = vec_splat_u8(-12); /* XXSPLTIB */ >> + result = vec_slo (vra, tmp); /* VSLO */ >> + return vec_sll (result, tmp); /* VSL */ >> +} >> + >> +/* { dg-final { scan-assembler-times {\mxxspltib\M} 3 } } */ >> +/* { dg-final { scan-assembler-times {\mvslo\M} 3 } } */ >> +/* { dg-final { scan-assembler-times {\mvsl\M} 3 } } */ >> +/* { dg-final { scan-assembler-not {\mlxv?\M} } } */ >> +/* { dg-final { scan-assembler-not {\mvspltisb\M} } } */ >
