On Wed, 27 Sep 2023, Tamar Christina wrote: > Hi All, > > When we have a vector conditional on a masked target which is doing a > selection > on the result of a conditional operation where one of the operands of the > conditional operation is the other operand of the select, then we can fold the > vector conditional into the operation. > > Concretely this transforms > > c = mask1 ? (masked_op mask2 a b) : b > > into > > c = masked_op (mask1 & mask2) a b > > The mask is then propagated upwards by the compiler. In the SVE case we don't > end up needing a mask AND here since `mask2` will end up in the instruction > creating `mask` which gives us a natural &. > > Such transformations are more common now in GCC 13+ as PRE has not started > unsharing of common code in case it can make one branch fully independent. > > e.g. in this case `b` becomes a loop invariant value after PRE. > > This transformation removes the extra select for masked architectures but > doesn't fix the general case. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > PR tree-optimization/109154 > * match.pd: Add new cond_op rule. > > gcc/testsuite/ChangeLog: > > PR tree-optimization/109154 > * gcc.target/aarch64/sve/pre_cond_share_1.c: New test. > > --- inline copy of patch -- > diff --git a/gcc/match.pd b/gcc/match.pd > index > 8ebde06dcd4b26d694826cffad0fb17e1136600a..20b9ea211385d9cc3876a5002f771267533e8868 > 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -8827,6 +8827,30 @@ and, > (IFN_COND_ADD @0 @1 (vec_cond @2 @3 integer_zerop) @1) > (IFN_COND_ADD (bit_and @0 @2) @1 @3 @1)) > > +/* Detect simplification for vector condition folding where > + > + c = mask1 ? (masked_op mask2 a b) : b > + > + into > + > + c = masked_op (mask1 & mask2) a b > + > + where the operation can be partially applied to one operand. */ > + > +(for cond_op (COND_BINARY) > + (simplify > + (vec_cond @0 > + (cond_op:s @1 @2 @3 @4) @3) > + (cond_op (BIT_AND_EXPR @1 @0) @2 @3 @4)))
(bit_and ..., not BIT_AND_EXPR please > + > +/* And same for ternary expressions. */ > + > +(for cond_op (COND_TERNARY) > + (simplify > + (vec_cond @0 > + (cond_op:s @1 @2 @3 @4 @5) @4) > + (cond_op (BIT_AND_EXPR @1 @0) @2 @3 @4 @5))) likewise OK with that change. Thanks, Richard. > + > /* For pointers @0 and @2 and nonnegative constant offset @1, look for > expressions like: > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..b51d0f298ea1fcf556365fe4afc875ebcd67584b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c > @@ -0,0 +1,132 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Ofast -fdump-tree-optimized" } */ > + > +#include <stdint.h> > +#include <stddef.h> > +#include <math.h> > +#include <float.h> > + > +typedef struct __attribute__((__packed__)) _Atom { > + float x, y, z; > + int32_t type; > +} Atom; > + > +typedef struct __attribute__((__packed__)) _FFParams { > + int32_t hbtype; > + float radius; > + float hphb; > + float elsc; > +} FFParams; > + > +#ifndef PPWI > +#define PPWI (64) > +#endif > + > +#ifndef ITERS > +#define ITERS 8 > +#endif > + > +#define DIFF_TOLERANCE_PCT 0.025f > + > +#define POSES_SIZE 393216 > +#define PROTEIN_SIZE 938 > +#define LIGAND_SIZE 26 > +#define FORCEFIELD_SIZE 34 > + > +#define ZERO 0.0f > +#define QUARTER 0.25f > +#define HALF 0.5f > +#define ONE 1.0f > +#define TWO 2.0f > +#define FOUR 4.0f > +#define CNSTNT 45.0f > + > +// Energy evaluation parameters > +#define HBTYPE_F 70 > +#define HBTYPE_E 69 > +#define HARDNESS 38.0f > +#define NPNPDIST 5.5f > +#define NPPDIST 1.0f > + > +void > +fasten_main(size_t group, size_t ntypes, size_t nposes, size_t natlig, > size_t natpro, // > + const Atom *protein, const Atom *ligand, > // > + const float *transforms_0, const float *transforms_1, const > float *transforms_2, // > + const float *transforms_3, const float *transforms_4, const > float *transforms_5, // > + const FFParams *forcefield, float *energies > // > +) { > + > + float etot[PPWI]; > + float lpos_x[PPWI]; > + > + for (int l = 0; l < PPWI; l++) { > + etot[l] = 0.f; > + lpos_x[l] = 0.f; > + } > + > + // Loop over ligand atoms > + for (int il = 0; il < natlig; il++) { > + // Load ligand atom data > + const Atom l_atom = ligand[il]; > + const FFParams l_params = forcefield[l_atom.type]; > + const int lhphb_ltz = l_params.hphb < 0.f; > + const int lhphb_gtz = l_params.hphb > 0.f; > + > + // Transform ligand atom > + > + // Loop over protein atoms > + for (int ip = 0; ip < natpro; ip++) { > + // Load protein atom data > + const Atom p_atom = protein[ip]; > + const FFParams p_params = forcefield[p_atom.type]; > + > + const float radij = p_params.radius + l_params.radius; > + const float r_radij = ONE / radij; > + > + const float elcdst = (p_params.hbtype == HBTYPE_F && > l_params.hbtype == HBTYPE_F) ? FOUR > + > : TWO; > + const float elcdst1 = (p_params.hbtype == HBTYPE_F && > l_params.hbtype == HBTYPE_F) > + ? QUARTER : HALF; > + const int type_E = ((p_params.hbtype == HBTYPE_E || > l_params.hbtype == HBTYPE_E)); > + > + const int phphb_ltz = p_params.hphb < 0.f; > + const int phphb_gtz = p_params.hphb > 0.f; > + const int phphb_nz = p_params.hphb != 0.f; > + const float p_hphb = p_params.hphb * (phphb_ltz && lhphb_gtz ? > -ONE : ONE); > + const float l_hphb = l_params.hphb * (phphb_gtz && lhphb_ltz ? > -ONE : ONE); > + const float distdslv = (phphb_ltz ? (lhphb_ltz ? NPNPDIST : > NPPDIST) : (lhphb_ltz > + > ? NPPDIST > + > : -FLT_MAX)); > + const float r_distdslv = ONE / distdslv; > + > + const float chrg_init = l_params.elsc * p_params.elsc; > + const float dslv_init = p_hphb + l_hphb; > + > + for (int l = 0; l < PPWI; l++) { > + // Calculate distance between atoms > + const float x = lpos_x[l] - p_atom.x; > + const float distij = (x * x); > + > + // Calculate the sum of the sphere radii > + const float distbb = distij - radij; > + > + const int zone1 = (distbb < ZERO); > + > + // Calculate formal and dipole charge interactions > + float chrg_e = chrg_init * ((zone1 ? ONE : (ONE - distbb * > elcdst1)) * > + (distbb < elcdst ? ONE : ZERO)); > + float neg_chrg_e = -fabsf(chrg_e); > + chrg_e = type_E ? neg_chrg_e : chrg_e; > + etot[l] += chrg_e * CNSTNT; > + } > + } > + } > + > + // Write result > + for (int l = 0; l < PPWI; l++) { > + energies[group * PPWI + l] = etot[l] * HALF; > + } > +} > + > +/* { dg-final { scan-tree-dump-times {\.COND_MUL} 1 "optimized" } } */ > +/* { dg-final { scan-tree-dump-times {\.VCOND} 1 "optimized" } } */ > > > > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)