On 1/2/25 4:28 AM, Jakub Jelinek wrote:
Hi!

The embed-17.C testcase is miscompiled and pr118214.C testcase used to be
miscompiled on the trunk before I've temporarily reverted the
r15-6339 C++ large initializer speed-up commit in r15-6448.
The problem is that reshape_* is only sometimes allowed to modify the given
CONSTRUCTOR in place (when reuse is true, so
                first_initializer_p
                 && (complain & tf_error)
                 && !CP_AGGREGATE_TYPE_P (elt_type)
                 && !TREE_SIDE_EFFECTS (first_initializer_p)
) and at other times is not allowed to change it.  But the RAW_DATA_CST
handling was modifying those in place always, by peeling off whatever
was needed for the processing of the current element or set of elements
and leaving the rest in the original CONSTRUCTOR_ELTS, either as
RAW_DATA_CST with adjusted RAW_DATA_POINTER/RAW_DATA_LENGTH, or turning
it into INTEGER_CST if it would be a RAW_DATA_LENGTH == 1 RAW_DATA_CST.

The following patch fixes that by adding raw_idx member into
struct reshape_iter where we for the RAW_DATA_CST current elements track
offset into the current RAW_DATA_CST (how many elements were processed
from it already) and modifying the original CONSTRUCTOR_ELTS only if reuse
is true and we used the whole RAW_DATA_CST (with zero raw_idx); which means
just modifying its type in place.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2025-01-02  Jakub Jelinek  <ja...@redhat.com>

        PR c++/118214
        * decl.cc (struct reshape_iter): Add raw_idx member.
        (cp_maybe_split_raw_data): Add inc_cur parameter, set *inc_cur,
        don't modify original CONSTRUCTOR, use d->raw_idx to track index
        into a RAW_DATA_CST d->cur->value.
        (consume_init): Adjust cp_maybe_split_raw_data caller, increment
        d->cur when cur_inc is true.
        (reshape_init_array_1): Don't modify original CONSTRUCTOR when
        handling RAW_DATA_CST d->cur->value and !reuse, instead use
        d->raw_idx to track index into RAW_DATA_CST.
        (reshape_single_init): Initialize iter.raw_idx.
        (reshape_init_class): Adjust for introduction of d->raw_idx,
        adjust cp_maybe_split_raw_data caller, do d->cur++ if inc_cur
        rather than when it returns non-NULL.
        (reshape_init_r): Likewise.
        (reshape_init): Initialize d.raw_idx.

        * g++.dg/cpp/embed-17.C: New test.
        * g++.dg/cpp0x/pr118214.C: New test.

--- gcc/cp/decl.cc.jj   2024-12-27 16:03:52.622536496 +0100
+++ gcc/cp/decl.cc      2024-12-30 13:35:31.585034994 +0100
@@ -6816,11 +6816,13 @@ check_for_uninitialized_const_var (tree
  
  /* Structure holding the current initializer being processed by reshape_init.
     CUR is a pointer to the current element being processed, END is a pointer
-   after the last element present in the initializer.  */
+   after the last element present in the initializer and RAW_IDX is index into
+   RAW_DATA_CST if that is CUR elt.  */
  struct reshape_iter
  {
    constructor_elt *cur;
    constructor_elt *end;
+  unsigned raw_idx;
  };
static tree reshape_init_r (tree, reshape_iter *, tree, tsubst_flags_t);
@@ -6888,18 +6890,20 @@ is_direct_enum_init (tree type, tree ini
  }
/* Helper function for reshape_init*. Split first element of
-   RAW_DATA_CST and save the rest to d->cur->value.  */
+   RAW_DATA_CST or return NULL for other elements.  Set *INC_CUR
+   to true if the whole d->cur has been consumed.  */
static tree
-cp_maybe_split_raw_data (reshape_iter *d)
+cp_maybe_split_raw_data (reshape_iter *d, bool *inc_cur)
  {
+  *inc_cur = true;
    if (TREE_CODE (d->cur->value) != RAW_DATA_CST)
      return NULL_TREE;
-  tree ret = *raw_data_iterator (d->cur->value, 0);
-  ++RAW_DATA_POINTER (d->cur->value);
-  --RAW_DATA_LENGTH (d->cur->value);
-  if (RAW_DATA_LENGTH (d->cur->value) == 1)
-    d->cur->value = *raw_data_iterator (d->cur->value, 0);
+  tree ret = *raw_data_iterator (d->cur->value, d->raw_idx++);
+  if (d->raw_idx != (unsigned) RAW_DATA_LENGTH (d->cur->value))
+    *inc_cur = false;
+  else
+    d->raw_idx = 0;
    return ret;
  }
@@ -6911,9 +6915,11 @@ cp_maybe_split_raw_data (reshape_iter *d
  static tree
  consume_init (tree init, reshape_iter *d)
  {
-  if (tree raw_init = cp_maybe_split_raw_data (d))
-    return raw_init;
-  d->cur++;
+  bool inc_cur;
+  if (tree raw_init = cp_maybe_split_raw_data (d, &inc_cur))
+    init = raw_init;
+  if (inc_cur)
+    d->cur++;
    return init;
  }
@@ -6972,10 +6978,8 @@ reshape_init_array_1 (tree elt_type, tre
      {
        tree elt_init;
        constructor_elt *old_cur = d->cur;
-      const char *old_raw_data_ptr = NULL;
-
-      if (TREE_CODE (d->cur->value) == RAW_DATA_CST)
-       old_raw_data_ptr = RAW_DATA_POINTER (d->cur->value);
+      unsigned int old_raw_idx = d->raw_idx;
+      bool old_raw_data_cst = TREE_CODE (d->cur->value) == RAW_DATA_CST;
if (d->cur->index)
        CONSTRUCTOR_IS_DESIGNATED_INIT (new_init) = true;
@@ -6988,25 +6992,23 @@ reshape_init_array_1 (tree elt_type, tre
          && !vector_p)
        {
          elt_init = d->cur->value;
-         if (!sized_array_p
-             || ((unsigned) RAW_DATA_LENGTH (d->cur->value)
-                 <= max_index_cst - index + 1))
-           d->cur++;
+         unsigned int off = d->raw_idx;
+         unsigned int len = RAW_DATA_LENGTH (elt_init) - off;
+         if (!sized_array_p || len <= max_index_cst - index + 1)
+           {
+             d->cur++;
+             d->raw_idx = 0;
+           }
          else
            {
-             unsigned int len = max_index_cst - index + 1;
-             if ((unsigned) RAW_DATA_LENGTH (d->cur->value) == len + 1)
-               d->cur->value
-                 = build_int_cst (integer_type_node,
-                                  *(const unsigned char *)
-                                  RAW_DATA_POINTER (d->cur->value) + len);
-             else
-               {
-                 d->cur->value = copy_node (elt_init);
-                 RAW_DATA_LENGTH (d->cur->value) -= len;
-                 RAW_DATA_POINTER (d->cur->value) += len;
-               }
+             len = max_index_cst - index + 1;
+             d->raw_idx += len;
+           }
+         if (!reuse || off || d->cur == old_cur)
+           {
+             elt_init = copy_node (elt_init);
              RAW_DATA_LENGTH (elt_init) = len;
+             RAW_DATA_POINTER (elt_init) += off;
            }
          TREE_TYPE (elt_init) = elt_type;
        }
@@ -7017,7 +7019,7 @@ reshape_init_array_1 (tree elt_type, tre
        if (elt_init == error_mark_node)
        return error_mark_node;
        tree idx = size_int (index);
-      if (reuse && old_raw_data_ptr && d->cur == old_cur)
+      if (reuse && old_raw_data_cst && d->cur == old_cur)
        {
          /* We need to stop reusing as some RAW_DATA_CST in the original
             ctor had to be split.  */
@@ -7058,9 +7060,7 @@ reshape_init_array_1 (tree elt_type, tre
        /* This can happen with an invalid initializer (c++/54501).  */
        if (d->cur == old_cur
          && !sized_array_p
-         && (old_raw_data_ptr == NULL
-             || (TREE_CODE (d->cur->value) == RAW_DATA_CST
-                 && RAW_DATA_POINTER (d->cur->value) == old_raw_data_ptr)))
+         && d->raw_idx == old_raw_idx)
        break;
if (TREE_CODE (elt_init) == RAW_DATA_CST)
@@ -7129,7 +7129,7 @@ reshape_single_init (tree type, tree ini
    /* We could also implement this by wrapping init in a new CONSTRUCTOR and
       calling reshape_init, but this way can just live on the stack.  */
    constructor_elt elt = { /*index=*/NULL_TREE, init };
-  reshape_iter iter = { &elt, &elt + 1 };
+  reshape_iter iter = { &elt, &elt + 1, 0 };
    return reshape_init_r (type, &iter,
                         /*first_initializer_p=*/NULL_TREE,
                         complain);
@@ -7190,12 +7190,9 @@ reshape_init_class (tree type, reshape_i
      {
        tree field_init;
        constructor_elt *old_cur = d->cur;
-      const char *old_raw_data_ptr = NULL;
+      unsigned old_raw_idx = d->raw_idx;
        bool direct_desig = false;
- if (TREE_CODE (d->cur->value) == RAW_DATA_CST)
-       old_raw_data_ptr = RAW_DATA_POINTER (d->cur->value);
-
        /* Handle C++20 designated initializers.  */
        if (d->cur->index)
        {
@@ -7322,11 +7319,7 @@ reshape_init_class (tree type, reshape_i
        if (field_init == error_mark_node)
        return error_mark_node;
- if (d->cur == old_cur
-         && d->cur->index
-         && (old_raw_data_ptr == NULL
-             || (TREE_CODE (d->cur->value) == RAW_DATA_CST
-                 && RAW_DATA_POINTER (d->cur->value) == old_raw_data_ptr)))
+      if (d->cur == old_cur && d->cur->index && d->raw_idx == old_raw_idx)
        {
          /* This can happen with an invalid initializer for a flexible
             array member (c++/54441).  */
@@ -7362,7 +7355,8 @@ reshape_init_class (tree type, reshape_i
    if (last_was_pack_expansion)
      {
        tree init = d->cur->value;
-      if (tree raw_init = cp_maybe_split_raw_data (d))
+      bool inc_cur;
+      if (tree raw_init = cp_maybe_split_raw_data (d, &inc_cur))
        init = raw_init;
        CONSTRUCTOR_APPEND_ELT (CONSTRUCTOR_ELTS (new_init),
                              last_was_pack_expansion, init);
@@ -7432,12 +7426,18 @@ reshape_init_r (tree type, reshape_iter
        {
          vec<constructor_elt, va_gc> *v = 0;
          CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init);
-         tree raw_init = cp_maybe_split_raw_data (d);
+         bool inc_cur;
+         reshape_iter dsave = *d;
+         tree raw_init = cp_maybe_split_raw_data (d, &inc_cur);
          CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
                                  raw_init ? raw_init : d->cur->value);
          if (has_designator_problem (d, complain))
-           return error_mark_node;
-         if (!raw_init)
+           {
+             if (!inc_cur)
+               *d = dsave;

Why restore *d on the error path? Won't the caller just return error_mark_node as well?

+             return error_mark_node;
+           }
+         if (inc_cur)
            d->cur++;
          init = build_constructor (init_list_type_node, v);
        }
@@ -7705,6 +7705,7 @@ reshape_init (tree type, tree init, tsub
    /* Recurse on this CONSTRUCTOR.  */
    d.cur = &(*v)[0];
    d.end = d.cur + v->length ();
+  d.raw_idx = 0;
new_init = reshape_init_r (type, &d, init, complain);
    if (new_init == error_mark_node)
--- gcc/testsuite/g++.dg/cpp/embed-17.C.jj      2024-12-30 11:35:19.872880361 
+0100
+++ gcc/testsuite/g++.dg/cpp/embed-17.C 2024-12-30 11:40:15.301700534 +0100
@@ -0,0 +1,24 @@
+// PR c++/118214
+// { dg-do run { target c++11 } }
+// { dg-options "" }
+
+struct A { int a[256]; };
+unsigned char b[] = {
+#embed __FILE__ limit (160)
+};
+
+void
+foo (A a)
+{
+  for (int i = 0; i < 256; ++i)
+    if (a.a[i] != (i < sizeof (b) ? b[i] : 0))
+      __builtin_abort ();
+}
+
+int
+main ()
+{
+  foo ({
+#embed __FILE__ limit (160)
+       });
+}
--- gcc/testsuite/g++.dg/cpp0x/pr118214.C.jj    2024-12-30 13:41:25.572219186 
+0100
+++ gcc/testsuite/g++.dg/cpp0x/pr118214.C       2024-12-30 13:40:40.018838914 
+0100
@@ -0,0 +1,26 @@
+// PR c++/118214
+// { dg-do run { target c++11 } }
+
+struct A { int a[256]; };
+
+void
+foo (A a)
+{
+  for (int i = 0; i < 256; ++i)
+    if (a.a[i] != (i < 130 && (i & 1) == 0))
+      __builtin_abort ();
+}
+
+int
+main ()
+{
+  foo ({ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+        1, 0, });
+}

        Jakub


Reply via email to