Hi!

As mentioned in the second PR, using table names like
crc_table_for_crc_8_polynomial_0x12
in the user namespace is wrong, user could have defined such variables
in their code and as can be seen on the last testcase, then it just
misbehaves.
At minimum such names should start with 2 underscores, moving it into
implementation namespace, and if possible have some dot or dollar in the
name if target supports it.
I think assemble_crc_table right now always emits tables a local variables,
I really don't see what would be setting TREE_PUBLIC flag on
IDENTIFIER_NODEs.
It might be nice to share the tables between TUs in the same binary or
shared library, but it in that case should have hidden visibility if
possible, so that it isn't exported from the libraries or binaries, we don't
want the optimization to affect set of exported symbols from libraries.
And, as can be seen in the first PR, building gen_rtx_SYMBOL_REF by hand
is certainly unexpected on some targets, e.g. those which use
-fsection-anchors, so we should instead use DECL_RTL of the VAR_DECL.
For that we'd need to look it up if we haven't emitted it already, while
IDENTIFIER_NODEs can be looked up easily, I guess for the VAR_DECLs we'd
need custom hash table.

Now, all of the above (except sharing between multiple TUs) is already
implemented in output_constant_def, so I think it is much better to just
use that function.

And, if we want to share it between multiple TUs, we could extend the
SHF_MERGE usage in gcc, currently we only use it for constant pool
entries with same size as alignment, from 1 to 32 bytes, using .rodata.cstN
sections.  We could just use say .rodata.cstM.N sections where M would be
alignment and N would be the entity size.  We could use that for all
constant pool entries say up to 2048 bytes.
Though, as the current code doesn't share between multiple TUs, I think it
can be done incrementally (either still for GCC 15, or GCC 16+).

Bootstrapped/regtested on {x86_64,i686,aarch64,powerpc64le,s390x}-linux, on
aarch64 it also fixes
-FAIL: crypto/rsa
-FAIL: hash
ok for trunk?

2025-01-12  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/117997
        PR middle-end/118415
        * expr.cc (assemble_crc_table): Make static, remove id argument,
        use output_constant_def.  Emit note if -fdump-rtl-expand-details
        about which table has been emitted.
        (generate_crc_table): Make static, adjust assemble_crc_table
        caller, call it always.
        (calculate_table_based_CRC): Make static.
        * internal-fn.cc (expand_crc_optab_fn): Emit note if
        -fdump-rtl-expand-details about using optab for crc.  Formatting fix.

        * gcc.dg/crc-builtin-target32.c: Add -fdump-rtl-expand-details
        as dg-additional-options.  Scan expand dump rather than assembly,
        adjust the regexps.
        * gcc.dg/crc-builtin-target64.c: Likewise.
        * gcc.dg/crc-builtin-rev-target32.c: Likewise.
        * gcc.dg/crc-builtin-rev-target64.c: Likewise.
        * gcc.dg/pr117997.c: New test.
        * gcc.dg/pr118415.c: New test.

--- gcc/expr.cc.jj      2025-01-08 10:05:24.498994763 +0100
+++ gcc/expr.cc 2025-01-12 18:59:59.209245917 +0100
@@ -14247,25 +14247,16 @@ calculate_crc (unsigned HOST_WIDE_INT cr
   return crc;
 }
 
-/* Assemble CRC table with 256 elements for the given POLYNOM and CRC_BITS with
-   given ID.
-   ID is the identifier of the table, the name of the table is unique,
-   contains CRC size and the polynomial.
+/* Assemble CRC table with 256 elements for the given POLYNOM and CRC_BITS.
    POLYNOM is the polynomial used to calculate the CRC table's elements.
    CRC_BITS is the size of CRC, may be 8, 16, ... . */
 
-rtx
-assemble_crc_table (tree id, unsigned HOST_WIDE_INT polynom,
-                   unsigned short crc_bits)
+static rtx
+assemble_crc_table (unsigned HOST_WIDE_INT polynom, unsigned short crc_bits)
 {
   unsigned table_el_n = 0x100;
   tree ar = build_array_type (make_unsigned_type (crc_bits),
                              build_index_type (size_int (table_el_n - 1)));
-  tree decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, id, ar);
-  SET_DECL_ASSEMBLER_NAME (decl, id);
-  DECL_ARTIFICIAL (decl) = 1;
-  rtx tab = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (id));
-  TREE_ASM_WRITTEN (decl) = 0;
 
   /* Initialize the table.  */
   vec<tree, va_gc> *initial_values;
@@ -14276,21 +14267,20 @@ assemble_crc_table (tree id, unsigned HO
       tree element = build_int_cstu (make_unsigned_type (crc_bits), crc);
       vec_safe_push (initial_values, element);
     }
-  DECL_INITIAL (decl) = build_constructor_from_vec (ar, initial_values);
-
-  TREE_READONLY (decl) = 1;
-  TREE_STATIC (decl) = 1;
-
-  if (TREE_PUBLIC (id))
+  tree ctor = build_constructor_from_vec (ar, initial_values);
+  rtx mem = output_constant_def (ctor, 1);
+  gcc_assert (MEM_P (mem));
+  if (dump_file && (dump_flags & TDF_DETAILS))
     {
-      TREE_PUBLIC (decl) = 1;
-      make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
+      fprintf (dump_file,
+              ";; emitting crc table crc_%u_polynomial_"
+              HOST_WIDE_INT_PRINT_HEX " ",
+              crc_bits, polynom);
+      print_rtl_single (dump_file, XEXP (mem, 0));
+      fprintf (dump_file, "\n");
     }
 
-  mark_decl_referenced (decl);
-  varpool_node::finalize_decl (decl);
-
-  return tab;
+  return XEXP (mem, 0);
 }
 
 /* Generate CRC lookup table by calculating CRC for all possible
@@ -14299,24 +14289,12 @@ assemble_crc_table (tree id, unsigned HO
    POLYNOM is the polynomial used to calculate the CRC table's elements.
    CRC_BITS is the size of CRC, may be 8, 16, ... .  */
 
-rtx
+static rtx
 generate_crc_table (unsigned HOST_WIDE_INT polynom, unsigned short crc_bits)
 {
   gcc_assert (crc_bits <= 64);
 
-  /* Buf size - 24 letters + 6 '_'
-     + 20 numbers (2 for crc bit size + 2 for 0x + 16 for 64-bit polynomial)
-     + 1 for \0.  */
-  char buf[51];
-  sprintf (buf, "crc_table_for_crc_%u_polynomial_" HOST_WIDE_INT_PRINT_HEX,
-          crc_bits, polynom);
-
-  tree id = maybe_get_identifier (buf);
-  if (id)
-    return gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (id));
-
-  id = get_identifier (buf);
-  return assemble_crc_table (id, polynom, crc_bits);
+  return assemble_crc_table (polynom, crc_bits);
 }
 
 /* Generate table-based CRC code for the given CRC, INPUT_DATA and the
@@ -14335,7 +14313,7 @@ generate_crc_table (unsigned HOST_WIDE_I
    If input data size is not 8, then first we extract upper 8 bits,
    then the other 8 bits, and so on.  */
 
-void
+static void
 calculate_table_based_CRC (rtx *crc, const rtx &input_data,
                           const rtx &polynomial,
                           machine_mode data_mode)
--- gcc/internal-fn.cc.jj       2025-01-02 11:23:02.466713008 +0100
+++ gcc/internal-fn.cc  2025-01-12 19:05:51.214271429 +0100
@@ -4026,7 +4026,7 @@ expand_crc_optab_fn (internal_fn fn, gca
   rtx data = expand_normal (rhs2);
   gcc_assert (TREE_CODE (rhs3) == INTEGER_CST);
   rtx polynomial = gen_rtx_CONST_INT (TYPE_MODE (result_type),
-  TREE_INT_CST_LOW (rhs3));
+                                     TREE_INT_CST_LOW (rhs3));
 
   /* Use target specific expansion if it exists.
      Otherwise, generate table-based CRC.  */
@@ -4034,6 +4034,16 @@ expand_crc_optab_fn (internal_fn fn, gca
                                      OPTIMIZE_FOR_SPEED))
     {
       class expand_operand ops[4];
+
+      if (dump_file && (dump_flags & TDF_DETAILS))
+       {
+         fprintf (dump_file,
+                  ";; using optab for crc_%u_polynomial_"
+                  HOST_WIDE_INT_PRINT_HEX "\n",
+                  GET_MODE_BITSIZE (GET_MODE (dest)).to_constant (),
+                  TREE_INT_CST_LOW (rhs3));
+       }
+
       create_call_lhs_operand (&ops[0], dest, TYPE_MODE (result_type));
       create_input_operand (&ops[1], crc, TYPE_MODE (result_type));
       create_input_operand (&ops[2], data, TYPE_MODE (data_type));
--- gcc/testsuite/gcc.dg/crc-builtin-target32.c.jj      2024-11-28 
15:36:31.199828815 +0100
+++ gcc/testsuite/gcc.dg/crc-builtin-target32.c 2025-01-12 19:11:30.151481611 
+0100
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target int32plus } */
+/* { dg-additional-options "-fdump-rtl-expand-details" } */
 
 #include <stdint-gcc.h>
 
@@ -33,6 +34,6 @@ int32_t crc32_data32 ()
   return __builtin_crc32_data32 (0xffffffff, 0x123546ff, 0x4002123);
 }
 
-/* { dg-final { scan-assembler "crc_table_for_crc_8_polynomial_0x12|mul" } } */
-/* { dg-final { scan-assembler "crc_table_for_crc_16_polynomial_0x1021|mul"} } 
*/
-/* { dg-final { scan-assembler 
"crc_table_for_crc_32_polynomial_0x4002123|mul"} } */
+/* { dg-final { scan-rtl-dump ";; (?:using optab for|emitting crc table) 
crc_8_polynomial_0x12" "expand" } } */
+/* { dg-final { scan-rtl-dump ";; (?:using optab for|emitting crc table) 
crc_16_polynomial_0x1021" "expand" } } */
+/* { dg-final { scan-rtl-dump ";; (?:using optab for|emitting crc table) 
crc_32_polynomial_0x4002123" "expand" } } */
--- gcc/testsuite/gcc.dg/crc-builtin-target64.c.jj      2024-11-28 
15:36:31.199828815 +0100
+++ gcc/testsuite/gcc.dg/crc-builtin-target64.c 2025-01-12 19:12:36.214548019 
+0100
@@ -1,5 +1,6 @@
 /* { dg-do compile { target lp64 } } */
 /* { dg-require-effective-target int32plus } */
+/* { dg-additional-options "-fdump-rtl-expand-details" } */
 
 #include <stdint-gcc.h>
 
@@ -55,7 +56,6 @@ int64_t crc64_data64 ()
                                 0x40021234002123);
 }
 
-/* { dg-final { scan-assembler "crc_table_for_crc_8_polynomial_0x12|mul" } } */
-/* { dg-final { scan-assembler "crc_table_for_crc_16_polynomial_0x1021|mul" } 
} */
-/* { dg-final { scan-assembler "crc_table_for_crc_32_polynomial_0x4002123|mul" 
} } */
-/* { dg-final { scan-assembler 
"crc_table_for_crc_64_polynomial_0x40021234002123|mul" } } */
+/* { dg-final { scan-rtl-dump ";; (?:using optab for|emitting crc table) 
crc_16_polynomial_0x1021" "expand" } } */
+/* { dg-final { scan-rtl-dump ";; (?:using optab for|emitting crc table) 
crc_32_polynomial_0x4002123" "expand" } } */
+/* { dg-final { scan-rtl-dump ";; (?:using optab for|emitting crc table) 
crc_64_polynomial_0x40021234002123" "expand" } } */
--- gcc/testsuite/gcc.dg/crc-builtin-rev-target32.c.jj  2024-11-28 
15:36:31.198828829 +0100
+++ gcc/testsuite/gcc.dg/crc-builtin-rev-target32.c     2025-01-12 
19:13:01.603189231 +0100
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target int32plus } */
+/* { dg-additional-options "-fdump-rtl-expand-details" } */
 
 #include <stdint-gcc.h>
 
@@ -33,6 +34,6 @@ int32_t rev_crc32_data32 ()
   return __builtin_rev_crc32_data32 (0xffffffff, 0x123546ff, 0x4002123);
 }
 
-/* { dg-final { scan-assembler "crc_table_for_crc_8_polynomial_0x12|mul"} } */
-/* { dg-final { scan-assembler "crc_table_for_crc_16_polynomial_0x1021|mul"} } 
*/
-/* { dg-final { scan-assembler 
"crc_table_for_crc_32_polynomial_0x4002123|mul"} } */
+/* { dg-final { scan-rtl-dump ";; (?:using optab for|emitting crc table) 
crc_8_polynomial_0x12" "expand" } } */
+/* { dg-final { scan-rtl-dump ";; (?:using optab for|emitting crc table) 
crc_16_polynomial_0x1021" "expand" } } */
+/* { dg-final { scan-rtl-dump ";; (?:using optab for|emitting crc table) 
crc_32_polynomial_0x4002123" "expand" } } */
--- gcc/testsuite/gcc.dg/crc-builtin-rev-target64.c.jj  2024-11-28 
15:36:31.198828829 +0100
+++ gcc/testsuite/gcc.dg/crc-builtin-rev-target64.c     2025-01-12 
19:13:21.869902823 +0100
@@ -1,5 +1,6 @@
 /* { dg-do compile { target lp64 } } */
 /* { dg-require-effective-target int32plus } */
+/* { dg-additional-options "-fdump-rtl-expand-details" } */
 
 #include <stdint-gcc.h>
 
@@ -57,6 +58,6 @@ int64_t rev_crc64_data64 ()
                                     0x40021234002123);
 }
 
-/* { dg-final { scan-assembler "crc_table_for_crc_8_polynomial_0x12|mul" } } */
-/* { dg-final { scan-assembler "crc_table_for_crc_16_polynomial_0x1021|mul" } 
} */
-/* { dg-final { scan-assembler "crc_table_for_crc_32_polynomial_0x4002123|mul" 
} } */
+/* { dg-final { scan-rtl-dump ";; (?:using optab for|emitting crc table) 
crc_8_polynomial_0x12" "expand" } } */
+/* { dg-final { scan-rtl-dump ";; (?:using optab for|emitting crc table) 
crc_16_polynomial_0x1021" "expand" } } */
+/* { dg-final { scan-rtl-dump ";; (?:using optab for|emitting crc table) 
crc_32_polynomial_0x4002123" "expand" } } */
--- gcc/testsuite/gcc.dg/pr117997.c.jj  2025-01-11 14:06:56.395319031 +0100
+++ gcc/testsuite/gcc.dg/pr117997.c     2025-01-11 14:10:22.995431371 +0100
@@ -0,0 +1,112 @@
+/* PR tree-optimization/117997 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-fpie" { target pie } } */
+
+__attribute__((noipa)) unsigned char
+foo (const unsigned char *data, unsigned int len)
+{
+  __builtin_abort ();
+}
+
+__attribute__((noipa)) unsigned short
+bar (const unsigned char *data, unsigned int len)
+{
+  __builtin_abort ();
+}
+
+static unsigned char
+baz (unsigned char byte, unsigned char crc)
+{
+  unsigned int i;
+  crc ^= byte;
+  for (i = 0; i < 8; i++)
+    crc = (crc << 1) ^ ((crc >> 7) ? 0x07 : 0);
+  return crc;
+}
+
+static unsigned short
+qux (unsigned char byte, unsigned short crc)
+{
+  unsigned int i;
+  crc ^= byte << 8;
+  for (i = 0; i < 8; i++)
+    crc = (crc << 1) ^ ((crc >> 15) ? 0x8005 : 0);
+  return crc;
+}
+
+__attribute__((noipa)) int
+corge (const unsigned char *data)
+{
+  const unsigned char expected[] = {
+    0x00, 0x07, 0x1b, 0x48, 0xe3, 0xbc, 0x2f, 0xd8,
+    0x3e, 0x85, 0xa4, 0x44, 0xff, 0xd0, 0x14, 0x41,
+    0xb0, 0x6e, 0x73, 0x27, 0x99, 0xad, 0x28, 0xbd,
+    0x72, 0x16, 0x24, 0xbd, 0x6e, 0x5e, 0xc7, 0x06
+  };
+  for (int i = 0; i < 32; ++i)
+    if (data[i] != expected[i])
+      __builtin_abort ();
+  return 1;
+}
+
+static int
+garply (const unsigned char *data, unsigned long size)
+{
+  unsigned int i;
+  unsigned char crc0, crc1;
+  crc0 = 0;
+  crc1 = foo (data, 0);
+  if (crc1 != crc0)
+    return 0;
+  for (i = 0; i < size; i++)
+    {
+      crc0 = baz (data[i], crc0);
+      crc1 = foo (data, i + 1);
+      if (crc1 != crc0)
+       return 0;
+    }
+  return 1;
+}
+
+static int
+freddy (const unsigned char *data, unsigned long size)
+{
+  unsigned int i;
+  unsigned short crc0, crc1;
+  crc0 = 0;
+  crc1 = bar (data, 0);
+  if (crc1 != crc0)
+    return 0;
+  for (i = 0; i < size; i++)
+    {
+      crc0 = qux (data[i], crc0);
+      crc1 = bar (data, i + 1);
+      if (crc1 != crc0)
+       return 0;
+    }
+  return 1;
+}
+
+__attribute__((noipa)) int
+blah (void)
+{
+  unsigned int i;
+  unsigned char data[64] = { 0 };
+  for (i = 1; i < 64; i++)
+    data[i] = baz (i % 256, data[i - 1]);
+  if (corge (data))
+    return 0;
+  if (!garply (data, 64))
+    return 1;
+  if (!freddy (data, 64))
+    return 1;
+   return 1;
+}
+
+int
+main ()
+{
+  if (__CHAR_BIT__ == 8 && __SIZEOF_SHORT__ == 2 && blah ())
+    __builtin_abort ();
+}
--- gcc/testsuite/gcc.dg/pr118415.c.jj  2025-01-11 14:09:40.818020888 +0100
+++ gcc/testsuite/gcc.dg/pr118415.c     2025-01-11 14:09:26.782217070 +0100
@@ -0,0 +1,25 @@
+/* PR middle-end/118415 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+unsigned char crc_table_for_crc_8_polynomial_0x7[2] = { 0xff, 0xff };
+
+static unsigned char
+foo (unsigned char byte, unsigned char crc)
+{
+  unsigned int i;
+  crc ^= byte;
+  for (i = 0; i < 8; i++)
+    crc = (crc << 1) ^ ((crc >> 7) ? 0x07 : 0);
+  return crc;
+}
+
+int
+main ()
+{
+  volatile unsigned char byte = 1;
+  volatile unsigned char crc = 0;
+  crc = foo (byte, crc);
+  if (__CHAR_BIT__ == 8 && crc != 7)
+    __builtin_abort ();
+}

        Jakub

Reply via email to