On Wed, 27 Sep 2023, Tamar Christina wrote:

> Hi All,
> 
> When we have a vector conditional on a masked target which is doing a 
> selection
> on the result of a conditional operation where one of the operands of the
> conditional operation is the other operand of the select, then we can fold the
> vector conditional into the operation.
> 
> Concretely this transforms
> 
>   c = mask1 ? (masked_op mask2 a b) : b
> 
> into
> 
>   c = masked_op (mask1 & mask2) a b
> 
> The mask is then propagated upwards by the compiler.  In the SVE case we don't
> end up needing a mask AND here since `mask2` will end up in the instruction
> creating `mask` which gives us a natural &.
> 
> Such transformations are more common now in GCC 13+ as PRE has not started
> unsharing of common code in case it can make one branch fully independent.
> 
> e.g. in this case `b` becomes a loop invariant value after PRE.
> 
> This transformation removes the extra select for masked architectures but
> doesn't fix the general case.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>       PR tree-optimization/109154
>       * match.pd: Add new cond_op rule.
> 
> gcc/testsuite/ChangeLog:
> 
>       PR tree-optimization/109154
>       * gcc.target/aarch64/sve/pre_cond_share_1.c: New test.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 
> 8ebde06dcd4b26d694826cffad0fb17e1136600a..20b9ea211385d9cc3876a5002f771267533e8868
>  100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -8827,6 +8827,30 @@ and,
>    (IFN_COND_ADD @0 @1 (vec_cond @2 @3 integer_zerop) @1)
>     (IFN_COND_ADD (bit_and @0 @2) @1 @3 @1))
>  
> +/* Detect simplification for vector condition folding where
> +
> +  c = mask1 ? (masked_op mask2 a b) : b
> +
> +  into
> +
> +  c = masked_op (mask1 & mask2) a b
> +
> +  where the operation can be partially applied to one operand. */
> +
> +(for cond_op (COND_BINARY)
> + (simplify
> +  (vec_cond @0
> +   (cond_op:s @1 @2 @3 @4) @3)
> +  (cond_op (BIT_AND_EXPR @1 @0) @2 @3 @4)))

(bit_and ..., not BIT_AND_EXPR please

> +
> +/* And same for ternary expressions.  */
> +
> +(for cond_op (COND_TERNARY)
> + (simplify
> +  (vec_cond @0
> +   (cond_op:s @1 @2 @3 @4 @5) @4)
> +  (cond_op (BIT_AND_EXPR @1 @0) @2 @3 @4 @5)))

likewise

OK with that change.

Thanks,
Richard.

> +
>  /* For pointers @0 and @2 and nonnegative constant offset @1, look for
>     expressions like:
>  
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..b51d0f298ea1fcf556365fe4afc875ebcd67584b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c
> @@ -0,0 +1,132 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -fdump-tree-optimized" } */
> +
> +#include <stdint.h>
> +#include <stddef.h>
> +#include <math.h>
> +#include <float.h>
> +
> +typedef struct __attribute__((__packed__)) _Atom {
> +    float x, y, z;
> +    int32_t type;
> +} Atom;
> +
> +typedef struct __attribute__((__packed__)) _FFParams {
> +    int32_t hbtype;
> +    float radius;
> +    float hphb;
> +    float elsc;
> +} FFParams;
> +
> +#ifndef PPWI
> +#define PPWI (64)
> +#endif
> +
> +#ifndef ITERS
> +#define ITERS 8
> +#endif
> +
> +#define DIFF_TOLERANCE_PCT 0.025f
> +
> +#define POSES_SIZE 393216
> +#define PROTEIN_SIZE 938
> +#define LIGAND_SIZE 26
> +#define FORCEFIELD_SIZE 34
> +
> +#define ZERO 0.0f
> +#define QUARTER 0.25f
> +#define HALF 0.5f
> +#define ONE 1.0f
> +#define TWO 2.0f
> +#define FOUR 4.0f
> +#define CNSTNT 45.0f
> +
> +// Energy evaluation parameters
> +#define HBTYPE_F 70
> +#define HBTYPE_E 69
> +#define HARDNESS 38.0f
> +#define NPNPDIST 5.5f
> +#define NPPDIST 1.0f
> +
> +void
> +fasten_main(size_t group, size_t ntypes, size_t nposes, size_t natlig, 
> size_t natpro,        //
> +            const Atom *protein, const Atom *ligand,                         
>                 //
> +            const float *transforms_0, const float *transforms_1, const 
> float *transforms_2, //
> +            const float *transforms_3, const float *transforms_4, const 
> float *transforms_5, //
> +            const FFParams *forcefield, float *energies                      
>                 //
> +) {
> +
> +    float etot[PPWI];
> +    float lpos_x[PPWI];
> +
> +    for (int l = 0; l < PPWI; l++) {
> +        etot[l] = 0.f;
> +        lpos_x[l] = 0.f;
> +    }
> +
> +    // Loop over ligand atoms
> +    for (int il = 0; il < natlig; il++) {
> +        // Load ligand atom data
> +        const Atom l_atom = ligand[il];
> +        const FFParams l_params = forcefield[l_atom.type];
> +        const int lhphb_ltz = l_params.hphb < 0.f;
> +        const int lhphb_gtz = l_params.hphb > 0.f;
> +
> +        // Transform ligand atom
> +
> +        // Loop over protein atoms
> +        for (int ip = 0; ip < natpro; ip++) {
> +            // Load protein atom data
> +            const Atom p_atom = protein[ip];
> +            const FFParams p_params = forcefield[p_atom.type];
> +
> +            const float radij = p_params.radius + l_params.radius;
> +            const float r_radij = ONE / radij;
> +
> +            const float elcdst = (p_params.hbtype == HBTYPE_F && 
> l_params.hbtype == HBTYPE_F) ? FOUR
> +                                                                             
>                  : TWO;
> +            const float elcdst1 = (p_params.hbtype == HBTYPE_F && 
> l_params.hbtype == HBTYPE_F)
> +                                  ? QUARTER : HALF;
> +            const int type_E = ((p_params.hbtype == HBTYPE_E || 
> l_params.hbtype == HBTYPE_E));
> +
> +            const int phphb_ltz = p_params.hphb < 0.f;
> +            const int phphb_gtz = p_params.hphb > 0.f;
> +            const int phphb_nz = p_params.hphb != 0.f;
> +            const float p_hphb = p_params.hphb * (phphb_ltz && lhphb_gtz ? 
> -ONE : ONE);
> +            const float l_hphb = l_params.hphb * (phphb_gtz && lhphb_ltz ? 
> -ONE : ONE);
> +            const float distdslv = (phphb_ltz ? (lhphb_ltz ? NPNPDIST : 
> NPPDIST) : (lhphb_ltz
> +                                                                             
>        ? NPPDIST
> +                                                                             
>        : -FLT_MAX));
> +            const float r_distdslv = ONE / distdslv;
> +
> +            const float chrg_init = l_params.elsc * p_params.elsc;
> +            const float dslv_init = p_hphb + l_hphb;
> +
> +            for (int l = 0; l < PPWI; l++) {
> +                // Calculate distance between atoms
> +                const float x = lpos_x[l] - p_atom.x;
> +                const float distij = (x * x);
> +
> +                // Calculate the sum of the sphere radii
> +                const float distbb = distij - radij;
> +
> +                const int zone1 = (distbb < ZERO);
> +
> +                // Calculate formal and dipole charge interactions
> +                float chrg_e = chrg_init * ((zone1 ? ONE : (ONE - distbb * 
> elcdst1)) *
> +                                            (distbb < elcdst ? ONE : ZERO));
> +                float neg_chrg_e = -fabsf(chrg_e);
> +                chrg_e = type_E ? neg_chrg_e : chrg_e;
> +                etot[l] += chrg_e * CNSTNT;
> +            }
> +        }
> +    }
> +
> +    // Write result
> +    for (int l = 0; l < PPWI; l++) {
> +        energies[group * PPWI + l] = etot[l] * HALF;
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump-times {\.COND_MUL} 1 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times {\.VCOND} 1 "optimized" } } */
> 
> 
> 
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to