Pengxuan Zheng <quic_pzh...@quicinc.com> writes:
> Similar to the canonicalization done in combine, we canonicalize vec_merge 
> with
> swap_communattive_operands_p in simplify_ternary_operation too.
>
> gcc/ChangeLog:
>
>       * config/aarch64/aarch64-protos.h (aarch64_exact_log2_inverse): New.
>       * config/aarch64/aarch64-simd.md (aarch64_simd_vec_set_zero<mode>):
>       Update pattern accordingly.
>       * config/aarch64/aarch64.cc (aarch64_exact_log2_inverse): New.
>       * simplify-rtx.cc (simplify_context::simplify_ternary_operation):
>       Canonicalize vec_merge.

OK for GCC 16, thanks.  aarch64_exact_log2_inverse isn't really
target-specific, but I can't think of a target-independent set of
interfaces that it would naturally fit.

Richard

>
> Signed-off-by: Pengxuan Zheng <quic_pzh...@quicinc.com>
> ---
>  gcc/config/aarch64/aarch64-protos.h |  1 +
>  gcc/config/aarch64/aarch64-simd.md  | 10 ++++++----
>  gcc/config/aarch64/aarch64.cc       | 10 ++++++++++
>  gcc/simplify-rtx.cc                 |  7 +++++++
>  4 files changed, 24 insertions(+), 4 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index 4235f4a0ca5..2391b99cacd 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -1051,6 +1051,7 @@ void aarch64_subvti_scratch_regs (rtx, rtx, rtx *,
>                                 rtx *, rtx *, rtx *);
>  void aarch64_expand_subvti (rtx, rtx, rtx,
>                           rtx, rtx, rtx, rtx, bool);
> +int aarch64_exact_log2_inverse (unsigned int, rtx);
>  
>  
>  /* Initialize builtins for SIMD intrinsics.  */
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index e2afe87e513..1099e742cbf 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -1193,12 +1193,14 @@ (define_insn "@aarch64_simd_vec_set<mode>"
>  (define_insn "aarch64_simd_vec_set_zero<mode>"
>    [(set (match_operand:VALL_F16 0 "register_operand" "=w")
>       (vec_merge:VALL_F16
> -         (match_operand:VALL_F16 1 "aarch64_simd_imm_zero" "")
> -         (match_operand:VALL_F16 3 "register_operand" "0")
> +         (match_operand:VALL_F16 1 "register_operand" "0")
> +         (match_operand:VALL_F16 3 "aarch64_simd_imm_zero" "")
>           (match_operand:SI 2 "immediate_operand" "i")))]
> -  "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
> +  "TARGET_SIMD && aarch64_exact_log2_inverse (<nunits>, operands[2]) >= 0"
>    {
> -    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
> +    int elt = ENDIAN_LANE_N (<nunits>,
> +                          aarch64_exact_log2_inverse (<nunits>,
> +                                                      operands[2]));
>      operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
>      return "ins\\t%0.<Vetype>[%p2], <vwcore>zr";
>    }
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index f5f23f6ff4b..103a00915e5 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -23682,6 +23682,16 @@ aarch64_strided_registers_p (rtx *operands, unsigned 
> int num_operands,
>    return true;
>  }
>  
> +/* Return the base 2 logarithm of the bit inverse of OP masked by the lowest
> +   NELTS bits, if OP is a power of 2.  Otherwise, returns -1.  */
> +
> +int
> +aarch64_exact_log2_inverse (unsigned int nelts, rtx op)
> +{
> +  return exact_log2 ((~INTVAL (op))
> +                  & ((HOST_WIDE_INT_1U << nelts) - 1));
> +}
> +
>  /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
>     HIGH (exclusive).  */
>  void
> diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
> index c478bd060fc..22002d1e1ab 100644
> --- a/gcc/simplify-rtx.cc
> +++ b/gcc/simplify-rtx.cc
> @@ -7307,6 +7307,13 @@ simplify_context::simplify_ternary_operation (rtx_code 
> code, machine_mode mode,
>             return gen_rtx_CONST_VECTOR (mode, v);
>           }
>  
> +       if (swap_commutative_operands_p (op0, op1)
> +           /* Two operands have same precedence, then first bit of mask
> +              select first operand.  */
> +           || (!swap_commutative_operands_p (op1, op0) && !(sel & 1)))
> +         return simplify_gen_ternary (code, mode, mode, op1, op0,
> +                                      GEN_INT (~sel & mask));
> +
>         /* Replace (vec_merge (vec_merge a b m) c n) with (vec_merge b c n)
>            if no element from a appears in the result.  */
>         if (GET_CODE (op0) == VEC_MERGE)

Reply via email to