Jakub Jelinek <ja...@redhat.com> wrote: >On Fri, Apr 05, 2013 at 12:46:48PM +0200, Richard Biener wrote: >> >BTW, the integer_all_onesp stuff is broken for this from what I can >> >see, for complex >> >numbers it returns true for -1 + 0i where all bytes aren't 0xff, so >we >> >need >> >to rule out COMPLEX_CSTs (or do integer_all_onesp on each part >> >instead). >> >And TYPE_PRECISION on VECTOR_CSTs won't be what we are looking for. >> >> Hmm, indeed. Or remove the -1 special casing altogether. > >Ok, zero/CONSTRUCTOR moved into the function, all_onesp handling >removed (so >only on the CHAR_BIT == 8 hosts and BITS_PER_UNIT == 8 targets it will >be >optimized). Ok for trunk?
Ok. Thanks, Richard. >> Marc is probably right with his note as well. > >I'll defer that to Marc ;) > >2013-04-05 Jakub Jelinek <ja...@redhat.com> > > * tree-loop-distribution.c (const_with_all_bytes_same): New function. > (generate_memset_builtin): Only handle integer_all_onesp as -1 val if > TYPE_PRECISION is equal to mode bitsize. Use >const_with_all_bytes_same > if possible to compute val. > (classify_partition): Verify CONSTRUCTOR doesn't have any elts. > For QImode integers don't require anything about precision. Use > const_with_all_bytes_same to find out if the constant doesn't have > repeated bytes in it. > > * gcc.dg/pr56837.c: New test. > >--- gcc/tree-loop-distribution.c.jj 2013-04-04 15:03:28.000000000 +0200 >+++ gcc/tree-loop-distribution.c 2013-04-05 15:21:10.641668895 +0200 >@@ -297,6 +297,36 @@ build_addr_arg_loc (location_t loc, data >return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), >addr_base); > } > >+/* If VAL memory representation contains the same value in all bytes, >+ return that value, otherwise return -1. >+ E.g. for 0x24242424 return 0x24, for IEEE double >+ 747708026454360457216.0 return 0x44, etc. */ >+ >+static int >+const_with_all_bytes_same (tree val) >+{ >+ unsigned char buf[64]; >+ int i, len; >+ >+ if (integer_zerop (val) >+ || real_zerop (val) >+ || (TREE_CODE (val) == CONSTRUCTOR >+ && !TREE_CLOBBER_P (val) >+ && CONSTRUCTOR_NELTS (val) == 0)) >+ return 0; >+ >+ if (CHAR_BIT != 8 || BITS_PER_UNIT != 8) >+ return -1; >+ >+ len = native_encode_expr (val, buf, sizeof (buf)); >+ if (len == 0) >+ return -1; >+ for (i = 1; i < len; i++) >+ if (buf[i] != buf[0]) >+ return -1; >+ return buf[0]; >+} >+ > /* Generate a call to memset for PARTITION in LOOP. */ > > static void >@@ -327,24 +357,20 @@ generate_memset_builtin (struct loop *lo > >/* This exactly matches the pattern recognition in classify_partition. >*/ > val = gimple_assign_rhs1 (stmt); >- if (integer_zerop (val) >- || real_zerop (val) >- || TREE_CODE (val) == CONSTRUCTOR) >- val = integer_zero_node; >- else if (integer_all_onesp (val)) >- val = build_int_cst (integer_type_node, -1); >- else >- { >- if (TREE_CODE (val) == INTEGER_CST) >- val = fold_convert (integer_type_node, val); >- else if (!useless_type_conversion_p (integer_type_node, >TREE_TYPE (val))) >- { >- gimple cstmt; >- tree tem = make_ssa_name (integer_type_node, NULL); >- cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, >NULL_TREE); >- gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING); >- val = tem; >- } >+ /* Handle constants like 0x15151515 and similarly >+ floating point constants etc. where all bytes are the same. */ >+ int bytev = const_with_all_bytes_same (val); >+ if (bytev != -1) >+ val = build_int_cst (integer_type_node, bytev); >+ else if (TREE_CODE (val) == INTEGER_CST) >+ val = fold_convert (integer_type_node, val); >+ else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE >(val))) >+ { >+ gimple cstmt; >+ tree tem = make_ssa_name (integer_type_node, NULL); >+ cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, >NULL_TREE); >+ gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING); >+ val = tem; > } > > fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); >@@ -354,10 +380,8 @@ generate_memset_builtin (struct loop *lo > if (dump_file && (dump_flags & TDF_DETAILS)) > { > fprintf (dump_file, "generated memset"); >- if (integer_zerop (val)) >+ if (bytev == 0) > fprintf (dump_file, " zero\n"); >- else if (integer_all_onesp (val)) >- fprintf (dump_file, " minus one\n"); > else > fprintf (dump_file, "\n"); > } >@@ -941,18 +965,10 @@ classify_partition (loop_p loop, struct > { > gimple stmt = DR_STMT (single_store); > tree rhs = gimple_assign_rhs1 (stmt); >- if (!(integer_zerop (rhs) >- || real_zerop (rhs) >- || (TREE_CODE (rhs) == CONSTRUCTOR >- && !TREE_CLOBBER_P (rhs)) >- || ((integer_all_onesp (rhs) >- || (INTEGRAL_TYPE_P (TREE_TYPE (rhs)) >- && (TYPE_MODE (TREE_TYPE (rhs)) >- == TYPE_MODE (unsigned_char_type_node)))) >- /* For stores of a non-zero value require that the precision >- of the value matches its actual size. */ >- && (TYPE_PRECISION (TREE_TYPE (rhs)) >- == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs))))))) >+ if (const_with_all_bytes_same (rhs) == -1 >+ && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs)) >+ || (TYPE_MODE (TREE_TYPE (rhs)) >+ != TYPE_MODE (unsigned_char_type_node)))) > return; > if (TREE_CODE (rhs) == SSA_NAME > && !SSA_NAME_IS_DEFAULT_DEF (rhs) >--- gcc/testsuite/gcc.dg/pr56837.c.jj 2013-04-04 17:37:58.458675152 >+0200 >+++ gcc/testsuite/gcc.dg/pr56837.c 2013-04-04 17:36:40.000000000 +0200 >@@ -0,0 +1,67 @@ >+/* Limit this test to selected targets with IEEE double, 8-byte long >long, >+ supported 4x int vectors, 4-byte int. */ >+/* { dg-do compile { target { i?86-*-* x86_64-*-* powerpc*-*-* } } } >*/ >+/* { dg-options "-O3 -fdump-tree-optimized" } */ >+/* { dg-additional-options "-msse2" { target ia32 } } */ >+/* { dg-additional-options "-mvsx -maltivec" { target powerpc*-*-* } } >*/ >+ >+typedef int V __attribute__((__vector_size__ (16))); >+#define N 1024 >+double d[N]; >+long long int l[N]; >+_Bool b[N]; >+_Complex double c[N]; >+V v[N]; >+ >+void >+fd (void) >+{ >+ int i; >+ for (i = 0; i < N; i++) >+ d[i] = 747708026454360457216.0; >+} >+ >+void >+fl (void) >+{ >+ int i; >+ for (i = 0; i < N; i++) >+ l[i] = 0x7c7c7c7c7c7c7c7cULL; >+} >+ >+void >+fb (void) >+{ >+ int i; >+ for (i = 0; i < N; i++) >+ b[i] = 1; >+} >+ >+void >+fc (void) >+{ >+ int i; >+ for (i = 0; i < N; i++) >+ c[i] = 747708026454360457216.0 + 747708026454360457216.0i; >+} >+ >+void >+fv (void) >+{ >+ int i; >+ for (i = 0; i < N; i++) >+ v[i] = (V) { 0x12121212, 0x12121212, 0x12121212, 0x12121212 }; >+} >+ >+/* Look for >+ __builtin_memset (&d, 68, 8192); >+ __builtin_memset (&l, 124, 8192); >+ __builtin_memset (&b, 1, 1024); >+ __builtin_memset (&c, 68, 16384); >+ __builtin_memset (&v, 18, 16384); */ >+/* { dg-final { scan-tree-dump-times "memset ..d, 68, 8192.;" 1 >"optimized" } } */ >+/* { dg-final { scan-tree-dump-times "memset ..l, 124, 8192.;" 1 >"optimized" } } */ >+/* { dg-final { scan-tree-dump-times "memset ..b, 1, 1024.;" 1 >"optimized" } } */ >+/* { dg-final { scan-tree-dump-times "memset ..c, 68, 16384.;" 1 >"optimized" } } */ >+/* { dg-final { scan-tree-dump-times "memset ..v, 18, 16384.;" 1 >"optimized" } } */ >+/* { dg-final { cleanup-tree-dump "optimized" } } */ > > > Jakub