> On 2 Sep 2025, at 12:57, Tamar Christina <tamar.christ...@arm.com> wrote:
> 
> This implements the new vector optabs vec_<su>addh_narrow<mode>
> adding support for in-vectorizer use for early break.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> -m32, -m64 and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> * config/aarch64/aarch64-simd.md (vec_addh_narrow<mode>): New.
> * config/aarch64/iterators.md (UNSPEC_ADDHN): New.
> 
> gcc/testsuite/ChangeLog:
> 
> * gcc.target/aarch64/vect-addhn_1.c: New test.
> 
> ---
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> 8b75c3d7f6d5ddc5c44f841da961423caaebe8b8..af9154bec67c21d186463baaf7321c9559c1094b
>  100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -949,6 +949,22 @@ (define_expand "vec_widen_<su>abd_lo_<mode>"
>   }
> )
> 
> +(define_expand "vec_addh_narrow<mode>"
> +  [(set (match_operand:<VNARROWQ> 0 "register_operand")
> + (unspec:VQN [(plus:VQN (match_operand:VQN 1 "register_operand")
> +       (match_operand:VQN 2 "register_operand"))]
> +    UNSPEC_ADDHN))]
> +  "TARGET_SIMD"
> +  {
> +    rtx shft
> +      = aarch64_simd_gen_const_vector_dup (<MODE>mode,
> + GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
> +    emit_insn (gen_aarch64_addhn<mode>_insn (operands[0], operands[1],
> +     operands[2], shft));
> +    DONE;
> +  }
> +)

Just use emit_insn (gen_aarch64_addhn<mode> (operands[0], operands[1], 
operands[2])) here.
The RTL template in the define_expand can just be bare (match_operand … ) 
constructs, no need for an unspec.


> +
> (define_insn "aarch64_<su>abal<mode>"
>   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
> (plus:<VWIDE>
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 
> b15e57843fed78ffe7edd927cfd7acbf395414a4..45c3968a5c937938b07992ab2869b5ae4db193a8
>  100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -806,6 +806,7 @@ (define_c_enum "unspec"
>     UNSPEC_UHADD ; Used in aarch64-simd.md.
>     UNSPEC_SRHADD ; Used in aarch64-simd.md.
>     UNSPEC_URHADD ; Used in aarch64-simd.md.
> +    UNSPEC_ADDHN ; Used in aarch64-simd.md.

Then you wouldn’t need a new UNSPEC value.
Ok with those changes.
Thanks,
Kyrill


>     UNSPEC_SHSUB ; Used in aarch64-simd.md.
>     UNSPEC_UHSUB ; Used in aarch64-simd.md.
>     UNSPEC_SQDMULH ; Used in aarch64-simd.md.
> diff --git a/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c 
> b/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..b6f66f979ff71d40d1e09292c1c12df3c262ce9c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c
> @@ -0,0 +1,88 @@
> +/* { dg-require-effective-target vect_int } */
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +
> +#include "tree-vect.h"
> +
> +#define N 1000
> +#define CHECK_ERROR(cond, fmt, ...) \
> +  do { if (cond) { printf(fmt "\n", ##__VA_ARGS__); __builtin_abort (); } } 
> while (0)
> +
> +// Generates all test components for a given type combo
> +#define TEST_COMBO(A_TYPE, C_TYPE, CAST_TYPE, SHIFT)                         
>          \
> +  A_TYPE a_##A_TYPE##_##C_TYPE[N];                                           
>          \
> +  A_TYPE b_##A_TYPE##_##C_TYPE[N];                                           
>          \
> +  C_TYPE c_##A_TYPE##_##C_TYPE[N];                                           
>          \
> +  C_TYPE ref_##A_TYPE##_##C_TYPE[N];                                         
>          \
> +                                                                             
>          \
> +  void init_##A_TYPE##_##C_TYPE() {                                          
>          \
> +    _Pragma ("GCC novector")      \
> +    for (int i = 0; i < N; i++) {                                            
>          \
> +      a_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 3);                            
>          \
> +      b_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 7);                            
>          \
> +    }                                                                        
>          \
> +  }                                                                          
>          \
> +                                                                             
>          \
> +  void foo_##A_TYPE##_##C_TYPE() {                                           
>          \
> +    for (int i = 0; i < N; i++)                                              
>          \
> +      c_##A_TYPE##_##C_TYPE[i] =                                             
>          \
> +        ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] +                               
>          \
> +         (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT;                      
>          \
> +  }                                                                          
>          \
> +                                                                             
>          \
> +  void ref_##A_TYPE##_##C_TYPE##_compute() {                                 
>          \
> +    _Pragma ("GCC novector")      \
> +    for (int i = 0; i < N; i++)                                              
>          \
> +      ref_##A_TYPE##_##C_TYPE[i] =                                           
>          \
> +        ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] +                               
>          \
> +         (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT;                      
>          \
> +  }                                                                          
>          \
> +                                                                             
>          \
> +  void validate_##A_TYPE##_##C_TYPE(const char* variant_name) {              
>          \
> +    _Pragma ("GCC novector")      \
> +    for (int i = 0; i < N; i++) {                                            
>          \
> +      if (c_##A_TYPE##_##C_TYPE[i] != ref_##A_TYPE##_##C_TYPE[i]) {          
>          \
> +        printf("FAIL [%s]: Index %d: got %lld, expected %lld\n",             
>          \
> +               variant_name, i,                                              
>          \
> +               (long long)c_##A_TYPE##_##C_TYPE[i],                          
>          \
> +               (long long)ref_##A_TYPE##_##C_TYPE[i]);                       
>          \
> +        __builtin_abort ();                                                  
>          \
> +      }                                                                      
>          \
> +    }                                                                        
>          \
> +  }
> +
> +// Runs the test for one combo with name output
> +#define RUN_COMBO(A_TYPE, C_TYPE)                          \
> +  do {                                                     \
> +    init_##A_TYPE##_##C_TYPE();                            \
> +    foo_##A_TYPE##_##C_TYPE();                             \
> +    ref_##A_TYPE##_##C_TYPE##_compute();                   \
> +    validate_##A_TYPE##_##C_TYPE(#A_TYPE " -> " #C_TYPE);  \
> +  } while (0)
> +
> +// Instantiate all valid combinations
> +TEST_COMBO(int16_t, int8_t, int32_t, 8)
> +TEST_COMBO(uint16_t, uint8_t, uint32_t, 8)
> +TEST_COMBO(int32_t, int16_t, int64_t, 16)
> +TEST_COMBO(uint32_t, uint16_t, uint64_t, 16)
> +#if defined(__aarch64__)
> +TEST_COMBO(int64_t, int32_t, __int128_t, 32)
> +TEST_COMBO(uint64_t, uint32_t, unsigned __int128, 32)
> +#endif
> +
> +int main() {
> +  check_vect ();
> +
> +  RUN_COMBO(int16_t, int8_t);
> +  RUN_COMBO(uint16_t, uint8_t);
> +  RUN_COMBO(int32_t, int16_t);
> +  RUN_COMBO(uint32_t, uint16_t);
> +#if defined(__aarch64__)
> +  RUN_COMBO(int64_t, int32_t);
> +  RUN_COMBO(uint64_t, uint32_t);
> +#endif
> +
> +  return 0;
> +}
> +
> 
> 
> -- 
> <rb19744.patch>

Reply via email to