date:20250528

[PATCH v4 13/20] Refactor riscv target parsing to take string_slice.

2025-05-28 Thread Alfie Richards

This is a quick refactor of the riscv target processing code
to take a string_slice rather than a decl.

The reason for this is to enable it to work with target_clones
where merging logic requires reasoning about each version string
individually in the front end.

This refactor primarily serves just to get this working. Ideally the
logic here would be further refactored as currenly there is no way to
check if a parse fails or not without emitting an error.
This makes things difficult for later patches which intends to emit a
warning and ignoring unrecognised/not parsed target_clone values rather
than erroring which can't currenly be achieved with the current riscv
code.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (riscv_process_target_version_str): New 
function..
* config/riscv/riscv-target-attr.cc (riscv_process_target_attr): 
Refactor to take
string_slice.
(riscv_process_target_version_str): Ditto.
* config/riscv/riscv.cc (parse_features_for_version): Refactor to take
string_slice.
(riscv_compare_version_priority): Ditto.
(dispatch_function_versions): Change to pass location.
---
 gcc/config/riscv/riscv-protos.h   |  2 ++
 gcc/config/riscv/riscv-target-attr.cc | 14 +---
 gcc/config/riscv/riscv.cc | 50 ++-
 3 files changed, 37 insertions(+), 29 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 2bedd878a04..1efe45d63e6 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -813,6 +813,8 @@ riscv_option_valid_attribute_p (tree, tree, tree, int);
 extern bool
 riscv_option_valid_version_attribute_p (tree, tree, tree, int);
 extern bool
+riscv_process_target_version_str (string_slice, location_t);
+extern bool
 riscv_process_target_version_attr (tree, location_t);
 extern void
 riscv_override_options_internal (struct gcc_options *);
diff --git a/gcc/config/riscv/riscv-target-attr.cc 
b/gcc/config/riscv/riscv-target-attr.cc
index 1d968655f95..d3f06fb15d4 100644
--- a/gcc/config/riscv/riscv-target-attr.cc
+++ b/gcc/config/riscv/riscv-target-attr.cc
@@ -354,11 +354,11 @@ num_occurrences_in_str (char c, char *str)
and update the global target options space.  */
 
 bool
-riscv_process_target_attr (const char *args,
+riscv_process_target_attr (string_slice args,
   location_t loc,
   const struct riscv_attribute_info *attrs)
 {
-  size_t len = strlen (args);
+  size_t len = args.size ();
 
   /* No need to emit warning or error on empty string here, generic code 
already
  handle this case.  */
@@ -369,7 +369,7 @@ riscv_process_target_attr (const char *args,
 
   std::unique_ptr buf (new char[len+1]);
   char *str_to_check = buf.get ();
-  strcpy (str_to_check, args);
+  strncpy (str_to_check, args.begin (), args.size ());
 
   /* Used to catch empty spaces between semi-colons i.e.
  attribute ((target ("attr1;;attr2"))).  */
@@ -391,8 +391,7 @@ riscv_process_target_attr (const char *args,
 
   if (num_attrs != num_semicolons + 1)
 {
-  error_at (loc, "malformed % attribute",
-   args);
+  error_at (loc, "malformed % attribute", &args);
   return false;
 }
 
@@ -513,6 +512,11 @@ riscv_process_target_version_attr (tree args, location_t 
loc)
   return riscv_process_target_attr (str, loc, riscv_target_version_attrs);
 }
 
+bool
+riscv_process_target_version_str (string_slice str, location_t loc)
+{
+  return riscv_process_target_attr (str, loc, riscv_target_version_attrs);
+}
 
 /* Implement TARGET_OPTION_VALID_VERSION_ATTRIBUTE_P.  This is used to
process attribute ((target_version ("..."))).  */
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 946658e0d5e..ddeb321cb44 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -13092,31 +13092,22 @@ riscv_c_mode_for_floating_type (enum tree_index ti)
   return default_mode_for_floating_type (ti);
 }
 
-/* This parses the attribute arguments to target_version in DECL and modifies
-   the feature mask and priority required to select those targets.  */
-static void
-parse_features_for_version (tree decl,
+/* This parses STR and modifies the feature mask and priority required to
+   select those targets.  */
+static bool
+parse_features_for_version (string_slice version_str,
+   location_t loc,
struct riscv_feature_bits &res,
int &priority)
 {
-  tree version_attr = lookup_attribute ("target_version",
-   DECL_ATTRIBUTES (decl));
-  if (version_attr == NULL_TREE)
+  gcc_assert (version_str.is_valid ());
+  if (version_str == "default")
 {
   res.length = 0;
   priority = 0;
-  return;
+  return true;
 }
 
-  const char *version_string = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE
-

[PATCH v4 07/20] Change make_attribute to take string_slice.

2025-05-28 Thread Alfie Richards

gcc/ChangeLog:

* attribs.cc (make_attribute): Change arguments.
* attribs.h (make_attribute): Change arguments.

Approved by Richard Sandiford.
---
 gcc/attribs.cc | 16 +---
 gcc/attribs.h  |  2 +-
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/gcc/attribs.cc b/gcc/attribs.cc
index f6667839c01..3fce9d62525 100644
--- a/gcc/attribs.cc
+++ b/gcc/attribs.cc
@@ -1076,21 +1076,15 @@ apply_tm_attr (tree fndecl, tree attr)
it to CHAIN.  */
 
 tree
-make_attribute (const char *name, const char *arg_name, tree chain)
+make_attribute (string_slice name, string_slice arg_name, tree chain)
 {
-  tree attr_name;
-  tree attr_arg_name;
-  tree attr_args;
-  tree attr;
-
-  attr_name = get_identifier (name);
-  attr_arg_name = build_string (strlen (arg_name), arg_name);
-  attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
-  attr = tree_cons (attr_name, attr_args, chain);
+  tree attr_name = get_identifier_with_length (name.begin (), name.size ());
+  tree attr_arg_name = build_string (arg_name.size (), arg_name.begin ());
+  tree attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
+  tree attr = tree_cons (attr_name, attr_args, chain);
   return attr;
 }
 
-
 /* Common functions used for target clone support.  */
 
 /* Comparator function to be used in qsort routine to sort attribute
diff --git a/gcc/attribs.h b/gcc/attribs.h
index 4b946390f76..b8b6838599c 100644
--- a/gcc/attribs.h
+++ b/gcc/attribs.h
@@ -45,7 +45,7 @@ extern bool cxx11_attribute_p (const_tree);
 extern tree get_attribute_name (const_tree);
 extern tree get_attribute_namespace (const_tree);
 extern void apply_tm_attr (tree, tree);
-extern tree make_attribute (const char *, const char *, tree);
+extern tree make_attribute (string_slice, string_slice, tree);
 extern bool attribute_ignored_p (tree);
 extern bool attribute_ignored_p (const attribute_spec *const);
 extern bool any_nonignored_attribute_p (tree);
-- 
2.34.1

[PATCH v1] testsuite: Remove spurious comments [PR117025]

2025-05-28 Thread Alejandro Colomar

PR c/117025

gcc/testsuite/ChangeLog:

* gcc.dg/countof-vla.c: Remove spurious comments.
* gcc.dg/countof-zero-compile.c: Remove spurious comments.

Fixes: 517c9487f8fd (2025-05-27; "c: Add _Countof operator [PR117025]")
Reported-by: Sam James 
Signed-off-by: Alejandro Colomar 
---

Hi Sam,

Thanks for catching that!  Here's a fix.  We can just drop those
comments.


Have a lovely day!
Alex


 gcc/testsuite/gcc.dg/countof-vla.c  | 16 
 gcc/testsuite/gcc.dg/countof-zero-compile.c |  2 +-
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/countof-vla.c 
b/gcc/testsuite/gcc.dg/countof-vla.c
index cc225df20689..68e650c4c844 100644
--- a/gcc/testsuite/gcc.dg/countof-vla.c
+++ b/gcc/testsuite/gcc.dg/countof-vla.c
@@ -6,7 +6,7 @@ void fix_fix (int i,
  int (*x)[_Countof (*a)],
  short (*)[_Generic(x, int (*)[3]: 1)]);
 void fix_var (int i,
- char (*a)[3][i], /* dg-warn "variable" */
+ char (*a)[3][i],
  int (*x)[_Countof (*a)],
  short (*)[_Generic(x, int (*)[3]: 1)]);
 void fix_uns (int i,
@@ -15,20 +15,20 @@ void fix_uns (int i,
  short (*)[_Generic(x, int (*)[3]: 1)]);
 
 void var_fix (int i,
- char (*a)[i][5], /* dg-warn "variable" */
- int (*x)[_Countof (*a)]); /* dg-warn "variable" */
+ char (*a)[i][5],
+ int (*x)[_Countof (*a)]);
 void var_var (int i,
- char (*a)[i][i], /* dg-warn "variable" */
- int (*x)[_Countof (*a)]); /* dg-warn "variable" */
+ char (*a)[i][i],
+ int (*x)[_Countof (*a)]);
 void var_uns (int i,
- char (*a)[i][*], /* dg-warn "variable" */
- int (*x)[_Countof (*a)]); /* dg-warn "variable" */
+ char (*a)[i][*],
+ int (*x)[_Countof (*a)]);
 
 void uns_fix (int i,
  char (*a)[*][5],
  int (*x)[_Countof (*a)]);
 void uns_var (int i,
- char (*a)[*][i], /* dg-warn "variable" */
+ char (*a)[*][i],
  int (*x)[_Countof (*a)]);
 void uns_uns (int i,
  char (*a)[*][*],
diff --git a/gcc/testsuite/gcc.dg/countof-zero-compile.c 
b/gcc/testsuite/gcc.dg/countof-zero-compile.c
index b561186166c3..bae9cb6969d3 100644
--- a/gcc/testsuite/gcc.dg/countof-zero-compile.c
+++ b/gcc/testsuite/gcc.dg/countof-zero-compile.c
@@ -19,7 +19,7 @@ void zro_fix (int i,
  int (*x)[_Countof (*a)],
  short (*)[_Generic(x, int (*)[0]: 1)]);
 void zro_var (int i,
- char (*a)[0][i], /* dg-warn "variable" */
+ char (*a)[0][i],
  int (*x)[_Countof (*a)],
  short (*)[_Generic(x, int (*)[0]: 1)]);
 void zro_uns (int i,

Range-diff against v0:
-:   > 1:  567a0c44c1ea testsuite: Remove spurious comments 
[PR117025]
-- 
2.49.0

[PATCH v4 17/20] Support mixing of target_clones and target_version.

2025-05-28 Thread Alfie Richards

Add support for a FMV set defined by a combination of target_clones and
target_version definitions.

Additionally, change is_function_default_version to consider a function
declaration annotated with target_clones containing default to be a
default version.

Lastly, add support for the case that a target_clone has all versions filtered
out and therefore the declaration should be removed. This is relevant as now
the default could be defined in a target_version, so a target_clones no longer
necessarily contains the default.

This takes advantage of refactoring done in previous patches changing
how target_clones are expanded and how conflicting decls are handled.

gcc/ChangeLog:

* attribs.cc (is_function_default_version): Update to handle
target_clones.
* cgraph.h (FOR_EACH_FUNCTION_REMOVABLE): New macro.
* multiple_target.cc (expand_target_clones): Update logic to delete
empty target_clones and modify diagnostic.
(ipa_target_clone): Update to use
FOR_EACH_FUNCTION_REMOVABLE.

gcc/c-family/ChangeLog:

* c-attribs.cc: Add support for target_version and target_clone mixing.

gcc/testsuite/ChangeLog:

* g++.target/aarch64/mv-and-mvc1.C: New test.
* g++.target/aarch64/mv-and-mvc2.C: New test.
* g++.target/aarch64/mv-and-mvc3.C: New test.
* g++.target/aarch64/mv-and-mvc4.C: New test.
---
 gcc/attribs.cc| 10 -
 gcc/c-family/c-attribs.cc |  9 +---
 gcc/cgraph.h  |  7 
 gcc/multiple_target.cc| 24 +--
 .../g++.target/aarch64/mv-and-mvc1.C  | 38 +
 .../g++.target/aarch64/mv-and-mvc2.C  | 29 +
 .../g++.target/aarch64/mv-and-mvc3.C  | 41 +++
 .../g++.target/aarch64/mv-and-mvc4.C  | 38 +
 8 files changed, 183 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/aarch64/mv-and-mvc1.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/mv-and-mvc2.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/mv-and-mvc3.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/mv-and-mvc4.C

diff --git a/gcc/attribs.cc b/gcc/attribs.cc
index 06785eaa136..2ca82674f7c 100644
--- a/gcc/attribs.cc
+++ b/gcc/attribs.cc
@@ -1242,7 +1242,8 @@ make_dispatcher_decl (const tree decl)
With the target attribute semantics, returns true if the function is marked
as default with the target version.
With the target_version attribute semantics, returns true if the function
-   is either not annotated, or annotated as default.  */
+   is either not annotated, annotated as default, or is a target_clone
+   containing the default declaration.  */
 
 bool
 is_function_default_version (const tree decl)
@@ -1259,6 +1260,13 @@ is_function_default_version (const tree decl)
 }
   else
 {
+  if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (decl)))
+   {
+ int num_defaults = 0;
+ get_clone_versions (decl, &num_defaults);
+ return num_defaults > 0;
+   }
+
   attr = lookup_attribute ("target_version", DECL_ATTRIBUTES (decl));
   if (!attr)
return true;
diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc
index b5287f0da06..a4e657d9ffd 100644
--- a/gcc/c-family/c-attribs.cc
+++ b/gcc/c-family/c-attribs.cc
@@ -249,13 +249,6 @@ static const struct attribute_spec::exclusions 
attr_target_clones_exclusions[] =
   ATTR_EXCL ("always_inline", true, true, true),
   ATTR_EXCL ("target", TARGET_HAS_FMV_TARGET_ATTRIBUTE,
 TARGET_HAS_FMV_TARGET_ATTRIBUTE, TARGET_HAS_FMV_TARGET_ATTRIBUTE),
-  ATTR_EXCL ("target_version", true, true, true),
-  ATTR_EXCL (NULL, false, false, false),
-};
-
-static const struct attribute_spec::exclusions 
attr_target_version_exclusions[] =
-{
-  ATTR_EXCL ("target_clones", true, true, true),
   ATTR_EXCL (NULL, false, false, false),
 };
 
@@ -543,7 +536,7 @@ const struct attribute_spec c_common_gnu_attributes[] =
  attr_target_exclusions },
   { "target_version", 1, 1, true, false, false, false,
  handle_target_version_attribute,
- attr_target_version_exclusions },
+ NULL },
   { "target_clones",  1, -1, true, false, false, false,
  handle_target_clones_attribute,
  attr_target_clones_exclusions },
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 0eed6a9d46d..fb89a7b5919 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -3093,6 +3093,13 @@ symbol_table::next_function_with_gimple_body 
(cgraph_node *node)
for ((node) = symtab->first_function (); (node); \
(node) = symtab->next_function ((node)))
 
+/* Walk all functions but precompute so a node can be deleted if needed.  */
+#define FOR_EACH_FUNCTION_REMOVABLE(node) \
+   cg

[PATCH v4 19/20] Add diagnostic tests for Aarch64 FMV.

2025-05-28 Thread Alfie Richards

Add tests covering many FMV errors for Aarch64, including
redeclaration, and mixing target_clones and target_versions.

gcc/testsuite/ChangeLog:

* g++.target/aarch64/mv-and-mvc-error1.C: New test.
* g++.target/aarch64/mv-and-mvc-error2.C: New test.
* g++.target/aarch64/mv-and-mvc-error3.C: New test.
* g++.target/aarch64/mv-error1.C: New test.
* g++.target/aarch64/mv-error2.C: New test.
* g++.target/aarch64/mv-error3.C: New test.
* g++.target/aarch64/mv-error4.C: New test.
* g++.target/aarch64/mv-error5.C: New test.
* g++.target/aarch64/mv-error6.C: New test.
* g++.target/aarch64/mv-error7.C: New test.
* g++.target/aarch64/mv-error8.C: New test.
* g++.target/aarch64/mvc-error1.C: New test.
* g++.target/aarch64/mvc-error2.C: New test.
* g++.target/aarch64/mvc-warning1.C: Modified test.
---
 .../g++.target/aarch64/mv-and-mvc-error1.C| 10 +
 .../g++.target/aarch64/mv-and-mvc-error2.C| 10 +
 .../g++.target/aarch64/mv-and-mvc-error3.C|  9 
 gcc/testsuite/g++.target/aarch64/mv-error1.C  | 19 +
 gcc/testsuite/g++.target/aarch64/mv-error2.C  | 10 +
 gcc/testsuite/g++.target/aarch64/mv-error3.C  | 13 
 gcc/testsuite/g++.target/aarch64/mv-error4.C  | 10 +
 gcc/testsuite/g++.target/aarch64/mv-error5.C  |  9 
 gcc/testsuite/g++.target/aarch64/mv-error6.C  | 21 +++
 gcc/testsuite/g++.target/aarch64/mv-error7.C  | 12 +++
 gcc/testsuite/g++.target/aarch64/mv-error8.C  | 13 
 gcc/testsuite/g++.target/aarch64/mvc-error1.C | 10 +
 gcc/testsuite/g++.target/aarch64/mvc-error2.C | 10 +
 .../g++.target/aarch64/mvc-warning1.C | 12 +--
 14 files changed, 166 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/aarch64/mv-and-mvc-error1.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/mv-and-mvc-error2.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/mv-and-mvc-error3.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/mv-error1.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/mv-error2.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/mv-error3.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/mv-error4.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/mv-error5.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/mv-error6.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/mv-error7.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/mv-error8.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/mvc-error1.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/mvc-error2.C

diff --git a/gcc/testsuite/g++.target/aarch64/mv-and-mvc-error1.C 
b/gcc/testsuite/g++.target/aarch64/mv-and-mvc-error1.C
new file mode 100644
index 000..00d3826f757
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/mv-and-mvc-error1.C
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O0" } */
+/* { dg-additional-options "-Wno-experimental-fmv-target" } */
+
+__attribute__ ((target_version ("dotprod"))) int
+foo () { return 3; } /* { dg-message "previous definition" } */
+
+__attribute__ ((target_clones ("dotprod", "sve"))) int
+foo () { return 1; } /* { dg-error "conflicting .dotprod. versions" } */
diff --git a/gcc/testsuite/g++.target/aarch64/mv-and-mvc-error2.C 
b/gcc/testsuite/g++.target/aarch64/mv-and-mvc-error2.C
new file mode 100644
index 000..bf8a4112a21
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/mv-and-mvc-error2.C
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O0" } */
+/* { dg-additional-options "-Wno-experimental-fmv-target" } */
+
+__attribute__ ((target_version ("default"))) int
+foo () { return 1; } /* { dg-message "old declaration" } */
+
+__attribute__ ((target_clones ("dotprod", "sve"))) float
+foo () { return 3; } /* { dg-error "ambiguating new declaration of" } */
diff --git a/gcc/testsuite/g++.target/aarch64/mv-and-mvc-error3.C 
b/gcc/testsuite/g++.target/aarch64/mv-and-mvc-error3.C
new file mode 100644
index 000..3233a98d1ad
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/mv-and-mvc-error3.C
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O0" } */
+/* { dg-additional-options "-Wno-experimental-fmv-target" } */
+
+float foo () { return 1; } /* { dg-message "previous definition" } */
+
+__attribute__ ((target_clones ("default", "dotprod", "sve"))) float
+foo () { return 3; } /* { dg-error "conflicting .default. versions" } */
diff --git a/gcc/testsuite/g++.target/aarch64/mv-error1.C 
b/gcc/testsuite/g++.target/aarch64/mv-error1.C
new file mode 100644
index 000..0b9642c9ab6
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/mv-error1.C
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O0"

[PATCH v4 12/20] Refactor FMV name mangling.

2025-05-28 Thread Alfie Richards

This patch is an overhaul of how FMV name mangling works. Previously
mangling logic was duplicated in several places across both target
specific and independent code. This patch changes this such that all
mangling is done in targetm.mangle_decl_assembler_name (including for the
dispatched symbol and dispatcher resolver).

This allows for the removing of previous hacks, such as where the default
mangled decl's assembler name was unmangled to then remangle all versions
and the resolver and dispatched symbol.

This introduces a change (shown in test changes) for the assembler name of the
dispatched symbol for a x86 versioned function set. Previously it used the
function name mangled twice. This was hard to reproduce without hacks I
wasn't comfortable with. Therefore, the mangling is changed to instead append
".ifunc" which matches clang's behavior.

This change also refactors expand_target_clone using
targetm.mangle_decl_assembler_name for mangling and get_clone_versions.
It is modified such that if the target_clone is in a FMV structure
the ordering is preserved once expanded. This is used later for ACLE semantics
and target_clone/target_version mixing.

gcc/ChangeLog:

* attribs.cc (make_dispatcher_decl): Move duplicated cgraph logic into
this function and change to use targetm.mangle_decl_assembler_name for
mangling.
* cgraph.cc (delete_function_version): Made public static member of
cgraph_node.
* cgraph.h (delete_function_version): Ditto.
* config/aarch64/aarch64.cc (aarch64_parse_fmv_features): Change to
support string_slice.
(aarch64_process_target_version_attr): Ditto.
(get_feature_mask_for_version): Ditto.
(aarch64_mangle_decl_assembler_name): Add logic for mangling dispatched
symbol and resolver.
(get_suffixed_assembler_name): Removed.
(make_resolver_func): Refactor to use
aarch64_mangle_decl_assembler_name for mangling.
(aarch64_generate_version_dispatcher_body): Remove remangling.
(aarch64_get_function_versions_dispatcher): Refactor to remove
duplicated cgraph logic.
* config/i386/i386-features.cc (is_valid_asm_symbol): Moved from
multiple_target.cc.
(create_new_asm_name): Ditto.
(ix86_mangle_function_version_assembler_name): Refactor to use
clone_identifier and to mangle default.
(ix86_mangle_decl_assembler_name): Add logic for mangling dispatched
symbol and resolver.
(ix86_get_function_versions_dispatcher): Remove duplicated cgraph
logic.
(make_resolver_func): Refactor to use ix86_mangle_decl_assembler_name
for mangling.
* config/riscv/riscv.cc (riscv_mangle_decl_assembler_name): Add logic
for FMV mangling.
(get_suffixed_assembler_name): Removed.
(make_resolver_func): Refactor to use riscv_mangle_decl_assembler_name
for mangling.
(riscv_generate_version_dispatcher_body): Remove unnecessary remangling.
(riscv_get_function_versions_dispatcher): Remove duplicated cgraph
logic.
* config/rs6000/rs6000.cc (rs6000_mangle_decl_assembler_name): New
function.
(rs6000_get_function_versions_dispatcher): Remove duplicated cgraph
logic.
(make_resolver_func): Refactor to use rs6000_mangle_decl_assembler_name
for mangling.
(is_valid_asm_symbol): Move from multiple_target.cc.
(create_new_asm_name): Ditto.
(rs6000_mangle_function_version_assembler_name): New function.
* multiple_target.cc (create_dispatcher_calls): Remove mangling code.
(get_attr_str): Removed.
(separate_attrs): Ditto.
(is_valid_asm_symbol): Moved to target specific.
(create_new_asm_name): Ditto.
(expand_target_clones): Refactor to use
targetm.mangle_decl_assembler_name for mangling and be more general.
* tree.cc (get_target_clone_attr_len): Removed.
* tree.h (get_target_clone_attr_len): Removed.

gcc/cp/ChangeLog:

* decl.cc (maybe_mark_function_versioned): Change to insert function 
version
and therefore record assembler name.

gcc/testsuite/ChangeLog:

* g++.target/i386/mv-symbols1.C: Update x86 FMV mangling.
* g++.target/i386/mv-symbols3.C: Ditto.
* g++.target/i386/mv-symbols4.C: Ditto.
* g++.target/i386/mv-symbols5.C: Ditto.
---
 gcc/attribs.cc  |  45 +++-
 gcc/cgraph.cc   |   4 +-
 gcc/cgraph.h|   2 +
 gcc/config/aarch64/aarch64.cc   | 163 +---
 gcc/config/i386/i386-features.cc| 108 +---
 gcc/config/riscv/riscv.cc   | 110 +++-
 gcc/config/rs6000/rs6000.cc | 115 +++--
 gcc/cp/decl.cc  |   7 +
 gcc/multiple_target.cc  | 262 +++-
 gcc/testsu

[PATCH v4 15/20] Change target_version semantics to follow ACLE specification.

2025-05-28 Thread Alfie Richards

This patch changes the semantics of target_version and target_clones attributes
to match the behavior described in the Arm C Language extension.

The changes to behavior are:

- The scope and signature of an FMV function set is now that of the default
  version.
- The FMV resolver is now created at the locations of the default version
  implementation. Previously this was at the first call to an FMV function.
- When a TU has a single annotated function version, it gets mangled.
  - This includes a lone annotated default version.

This only affects targets with TARRGET_HAS_FMV_TARGET_ATTRIBUTE set to false.
Currently that is aarch64 and riscv.

This is achieved by:

- Skipping the existing FMV dispatching code at C++ gimplification and instead
  making use of the target_clones dispatching code in multiple_targets.cc.
  (This fixes PR target/118313 for aarch64 and riscv).
- Splitting target_clones pass in two, an early and late pass, where the early
  pass handles cases where multiple declarations are used to define a version,
  and the late pass handling target semantics targets, and cases where a FMV
  set is defined by a single target_clones decl.
- Changing the logic in add_candidates and resolve_address of overloaded
  function to prevent resolution of any version except a default version.
  (thus making the default version determine scope and signature of the
  versioned function set).
- Adding logic for dispatching a lone annotated default version in
  multiple_targets.cc
  - As as annotated default version gets mangled an alias is created from the
dispatched symbol to the default version as no ifunc resolution is required
in this case. (ie, an alias from `_Z3foov_` to `_Z3foov.default`)
- Adding logic to symbol_table::remove_unreachable_nodes and analyze_functions
  that a reference to the default function version also implies a possible
  reference to the other versions (so they shouldnt be deleted and do need to
  be analyzed).

gcc/ChangeLog:

PR target/118313
* cgraphunit.cc (analyze_functions): Add logic for target version
dependencies.
* ipa.cc (symbol_table::remove_unreachable_nodes): Ditto.
* multiple_target.cc (create_dispatcher_calls): Change to support
target version semantics.
(ipa_target_clone): Change to dispatch all function sets in
target_version semantics, and to have early and late pass.
(is_simple_target_clones_case): New function.
* config/aarch64/aarch64.cc: (aarch64_get_function_versions_dispatcher):
Refactor with the assumption that the DECL node will be default.
* config/riscv/riscv.cc: (riscv_get_function_versions_dispatcher):
Refactor with the assumption that the DECL node will be default.
* passes.def: Split target_clones pass into early and late version.

gcc/cp/ChangeLog:

PR target/118313
* call.cc (add_candidates): Change to not resolve non-default versions 
in
target_version semantics.
* class.cc (resolve_address_of_overloaded_function): Ditto.
* cp-gimplify.cc (cp_genericize_r): Change logic to not apply for
target_version semantics.
* decl.cc (start_decl): Change to mark and therefore mangle all
target_version decls.
(start_preparsed_function): Ditto.
* typeck.cc (cp_build_function_call_vec): Add error for calling 
unresolvable
non-default node in target_version semantics.

gcc/testsuite/ChangeLog:

* g++.target/aarch64/mv-1.C: Change for target_version semantics.
* g++.target/aarch64/mv-symbols2.C: Ditto.
* g++.target/aarch64/mv-symbols3.C: Ditto.
* g++.target/aarch64/mv-symbols4.C: Ditto.
* g++.target/aarch64/mv-symbols5.C: Ditto.
* g++.target/aarch64/mvc-symbols3.C: Ditto.
* g++.target/riscv/mv-symbols2.C: Ditto.
* g++.target/riscv/mv-symbols3.C: Ditto.
* g++.target/riscv/mv-symbols4.C: Ditto.
* g++.target/riscv/mv-symbols5.C: Ditto.
* g++.target/riscv/mvc-symbols3.C: Ditto.
* g++.target/aarch64/mv-symbols10.C: New test.
* g++.target/aarch64/mv-symbols11.C: New test.
* g++.target/aarch64/mv-symbols12.C: New test.
* g++.target/aarch64/mv-symbols13.C: New test.
* g++.target/aarch64/mv-symbols6.C: New test.
* g++.target/aarch64/mv-symbols7.C: New test.
* g++.target/aarch64/mv-symbols8.C: New test.
* g++.target/aarch64/mv-symbols9.C: New test.
---
 gcc/cgraphunit.cc |   9 ++
 gcc/config/aarch64/aarch64.cc |  43 ++
 gcc/config/riscv/riscv.cc |  43 ++
 gcc/cp/call.cc|  10 ++
 gcc/cp/class.cc   |  13 +-
 gcc/cp/cp-gimplify.cc |  11 +-
 gcc/cp/decl.cc|  14 ++
 gcc/cp/typeck.cc  |  10 ++
 gcc/ipa.cc

[PATCH v4 18/20] Fix FMV return type ambiguation

2025-05-28 Thread Alfie Richards

Add logic for the case of two FMV annotated functions with identical
signature other than the return type.

Previously this was ignored, this changes the behavior to emit a diagnostic.

gcc/cp/ChangeLog:
PR c++/119498
* decl.cc (duplicate_decls): Change logic to not always exclude FMV
annotated functions in cases of return type non-ambiguation.
---
 gcc/cp/decl.cc | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index 4a374fa29e3..6494944e3ba 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -2022,8 +2022,11 @@ duplicate_decls (tree newdecl, tree olddecl, bool 
hiding, bool was_hidden)
}
  /* For function versions, params and types match, but they
 are not ambiguous.  */
- else if ((!DECL_FUNCTION_VERSIONED (newdecl)
-   && !DECL_FUNCTION_VERSIONED (olddecl))
+ else if (((!DECL_FUNCTION_VERSIONED (newdecl)
+&& !DECL_FUNCTION_VERSIONED (olddecl))
+   || !comptypes (TREE_TYPE (TREE_TYPE (newdecl)),
+  TREE_TYPE (TREE_TYPE (olddecl)),
+  COMPARE_STRICT))
   /* Let constrained hidden friends coexist for now, we'll
  check satisfaction later.  */
   && !member_like_constrained_friend_p (newdecl)
-- 
2.34.1

[PATCH v4 06/20] Refactor record_function_versions.

2025-05-28 Thread Alfie Richards

Renames record_function_versions to add_function_version, and make it
explicit that it is adding a single version to the function structure.

Additionally, change the insertion point to always maintain priority ordering
of the versions.

This allows for removing logic for moving the default to the first
position which was duplicated across target specific code and enables
easier reasoning about function sets.

gcc/ChangeLog:

* cgraph.cc (cgraph_node::record_function_versions): Refactor and
rename to...
(cgraph_node::add_function_version): new function.
* cgraph.h (cgraph_node::record_function_versions): Refactor and
rename to...
(cgraph_node::add_function_version): new function.
* config/aarch64/aarch64.cc (aarch64_get_function_versions_dispatcher):
Remove reordering.
* config/i386/i386-features.cc (ix86_get_function_versions_dispatcher):
Remove reordering.
* config/riscv/riscv.cc (riscv_get_function_versions_dispatcher):
Remove reordering.
* config/rs6000/rs6000.cc (rs6000_get_function_versions_dispatcher):
Remove reordering.

gcc/cp/ChangeLog:

* decl.cc (maybe_version_functions): Change record_function_versions
call to add_function_version.
---
 gcc/cgraph.cc| 75 +++-
 gcc/cgraph.h |  6 +--
 gcc/config/aarch64/aarch64.cc| 34 +++
 gcc/config/i386/i386-features.cc | 33 +++---
 gcc/config/riscv/riscv.cc| 38 +++-
 gcc/config/rs6000/rs6000.cc  | 35 +++
 gcc/cp/decl.cc   |  8 +++-
 7 files changed, 78 insertions(+), 151 deletions(-)

diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc
index 6ae6a97f6f5..feaeebec40b 100644
--- a/gcc/cgraph.cc
+++ b/gcc/cgraph.cc
@@ -231,45 +231,60 @@ cgraph_node::delete_function_version_by_decl (tree decl)
   decl_node->remove ();
 }
 
-/* Record that DECL1 and DECL2 are semantically identical function
+/* Add decl to the structure of semantically identical function versions.
+   The node is inserted at the point maintaining the priority ordering on the
versions.  */
 void
-cgraph_node::record_function_versions (tree decl1, tree decl2)
+cgraph_node::add_function_version (cgraph_function_version_info *fn_v,
+  tree decl)
 {
-  cgraph_node *decl1_node = cgraph_node::get_create (decl1);
-  cgraph_node *decl2_node = cgraph_node::get_create (decl2);
-  cgraph_function_version_info *decl1_v = NULL;
-  cgraph_function_version_info *decl2_v = NULL;
-  cgraph_function_version_info *before;
-  cgraph_function_version_info *after;
-
-  gcc_assert (decl1_node != NULL && decl2_node != NULL);
-  decl1_v = decl1_node->function_version ();
-  decl2_v = decl2_node->function_version ();
-
-  if (decl1_v != NULL && decl2_v != NULL)
-return;
-
-  if (decl1_v == NULL)
-decl1_v = decl1_node->insert_new_function_version ();
+  cgraph_node *decl_node = cgraph_node::get_create (decl);
+  cgraph_function_version_info *decl_v = NULL;
 
-  if (decl2_v == NULL)
-decl2_v = decl2_node->insert_new_function_version ();
+  gcc_assert (decl_node != NULL);
 
-  /* Chain decl2_v and decl1_v.  All semantically identical versions
- will be chained together.  */
+  decl_v = decl_node->function_version ();
 
-  before = decl1_v;
-  after = decl2_v;
+  /* If the nodes are already linked, skip.  */
+  if (decl_v != NULL && (decl_v->next || decl_v->prev))
+return;
 
-  while (before->next != NULL)
-before = before->next;
+  if (decl_v == NULL)
+decl_v = decl_node->insert_new_function_version ();
+
+  gcc_assert (decl_v);
+  gcc_assert (fn_v);
+
+  /* Go to start of the FMV structure.  */
+  while (fn_v->prev)
+fn_v = fn_v->prev;
+
+  cgraph_function_version_info *insert_point_before = NULL;
+  cgraph_function_version_info *insert_point_after = fn_v;
+
+  /* Find the insertion point for the new version to maintain ordering.
+ The default node must always go at the beginning.  */
+  if (!is_function_default_version (decl))
+while (insert_point_after
+  && (targetm.compare_version_priority
+(decl, insert_point_after->this_node->decl) > 0
+  || is_function_default_version
+   (insert_point_after->this_node->decl)
+  || lookup_attribute
+   ("target_clones",
+DECL_ATTRIBUTES (insert_point_after->this_node->decl
+  {
+   insert_point_before = insert_point_after;
+   insert_point_after = insert_point_after->next;
+  }
 
-  while (after->prev != NULL)
-after= after->prev;
+  decl_v->prev = insert_point_before;
+  decl_v->next= insert_point_after;
 
-  before->next = after;
-  after->prev = before;
+  if (insert_point_before)
+insert_point_before->next = decl_v;
+  if (insert_point_after)
+insert_point_after->prev = decl_v;
 }
 
 /* Initialize callgraph dump file.

[PATCH v4 08/20] Add get_clone_versions and get_version functions.

2025-05-28 Thread Alfie Richards

This is a reimplementation of get_target_clone_attr_len,
get_attr_str, and separate_attrs using string_slice and auto_vec to make
memory management and use simpler.

Adds get_target_version helper function to get the target_version string
from a decl.

gcc/c-family/ChangeLog:

* c-attribs.cc (handle_target_clones_attribute): Change to use
get_clone_versions.

gcc/ChangeLog:

* tree.cc (get_clone_versions): New function.
(get_clone_attr_versions): New function.
(get_version): New function.
* tree.h (get_clone_versions): New function.
(get_clone_attr_versions): New function.
(get_target_version): New function.
---
 gcc/c-family/c-attribs.cc |  4 ++-
 gcc/tree.cc   | 59 +++
 gcc/tree.h| 11 
 3 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc
index 5a0e3d328ba..5dff489fcca 100644
--- a/gcc/c-family/c-attribs.cc
+++ b/gcc/c-family/c-attribs.cc
@@ -6132,7 +6132,9 @@ handle_target_clones_attribute (tree *node, tree name, 
tree ARG_UNUSED (args),
}
}
 
-  if (get_target_clone_attr_len (args) == -1)
+  auto_vec versions= get_clone_attr_versions (args, NULL);
+
+  if (versions.length () == 1)
{
  warning (OPT_Wattributes,
   "single % attribute is ignored");
diff --git a/gcc/tree.cc b/gcc/tree.cc
index eccfcc89da4..fdcdfb336bc 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -15372,6 +15372,65 @@ get_target_clone_attr_len (tree arglist)
   return str_len_sum;
 }
 
+/* Returns an auto_vec of string_slices containing the version strings from
+   ARGLIST.  DEFAULT_COUNT is incremented for each default version found.  */
+
+auto_vec
+get_clone_attr_versions (const tree arglist, int *default_count)
+{
+  gcc_assert (TREE_CODE (arglist) == TREE_LIST);
+  auto_vec versions;
+
+  static const char separator_str[] = {TARGET_CLONES_ATTR_SEPARATOR, 0};
+  string_slice separators = string_slice (separator_str);
+
+  for (tree arg = arglist; arg; arg = TREE_CHAIN (arg))
+{
+  string_slice str = string_slice (TREE_STRING_POINTER (TREE_VALUE (arg)));
+  while (str.is_valid ())
+   {
+ string_slice attr = string_slice::tokenize (&str, separators);
+ attr = attr.strip ();
+
+ if (attr == "default" && default_count)
+   (*default_count)++;
+ versions.safe_push (attr);
+   }
+}
+  return versions;
+}
+
+/* Returns an auto_vec of string_slices containing the version strings from
+   the target_clone attribute from DECL.  DEFAULT_COUNT is incremented for each
+   default version found.  */
+auto_vec
+get_clone_versions (const tree decl, int *default_count)
+{
+  tree attr = lookup_attribute ("target_clones", DECL_ATTRIBUTES (decl));
+  if (!attr)
+return auto_vec ();
+  tree arglist = TREE_VALUE (attr);
+  return get_clone_attr_versions (arglist, default_count);
+}
+
+/* If DECL has a target_version attribute, returns a string_slice containing 
the
+   attribute value.  Otherwise, returns string_slice::invalid.
+   Only works for target_version due to target attributes allowing multiple
+   string arguments to specify one target.  */
+string_slice
+get_target_version (const tree decl)
+{
+  gcc_assert (!TARGET_HAS_FMV_TARGET_ATTRIBUTE);
+
+  tree attr = lookup_attribute ("target_version", DECL_ATTRIBUTES (decl));
+
+  if (!attr)
+return string_slice::invalid ();
+
+  return string_slice (TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr
+  .strip ();
+}
+
 void
 tree_cc_finalize (void)
 {
diff --git a/gcc/tree.h b/gcc/tree.h
index 99f26177628..a89f3cf7189 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #include "tree-core.h"
 #include "options.h"
+#include "vec.h"
 
 /* Convert a target-independent built-in function code to a combined_fn.  */
 
@@ -7052,4 +7053,14 @@ extern tree get_attr_nonstring_decl (tree, tree * = 
NULL);
 
 extern int get_target_clone_attr_len (tree);
 
+/* Returns the version string for a decl with target_version attribute.
+   Returns an invalid string_slice if no attribute is present.  */
+extern string_slice get_target_version (const tree);
+/* Returns a vector of the version strings from a target_clones attribute on
+   a decl.  Can also record the number of default versions found.  */
+extern auto_vec get_clone_versions (const tree, int * = NULL);
+/* Returns a vector of the version strings from a target_clones attribute
+   directly.  */
+extern auto_vec get_clone_attr_versions (const tree, int *);
+
 #endif  /* GCC_TREE_H  */
-- 
2.34.1

[PATCH v4 14/20] Add reject_target_clone hook for filtering target_clone versions.

2025-05-28 Thread Alfie Richards

This patch introduces the TARGET_REJECT_FUNCTION_CLONE_VERSION hook
which is used to determine if a target_clones version string parses.

If true is returned, a warning is emitted and from then on the version
is ignored.

This is as specified in the Arm C Language Extension. The purpose of this
is to allow some portability of code using target_clones attributes.

Currently this is only properly implemented for the Aarch64 backend.

For riscv which is the only other backend which uses target_version
semantics a partial implementation is present, where this hook is used
to check parsing, in which errors will be emitted on a failed parse
rather than warnings. A refactor of the riscv parsing logic would be
required to enable this functionality fully.

This fixes PR 118339 where parse failures could cause ICE in Aarch64.

gcc/ChangeLog:

PR target/118339
* target.def: Add reject_target_clone_version hook.
* tree.cc (get_clone_attr_versions): Add filter and location argument.
(get_clone_versions): Update call to get_clone_attr_versions.
* tree.h (get_clone_attr_versions): Add filter and location argument.
* config/aarch64/aarch64.cc (aarch64_reject_target_clone_version):
New function
(TARGET_REJECT_FUNCTION_CLONE_VERSION): New define.
* config/riscv/riscv.cc (riscv_reject_target_clone_version):
New function.
(TARGET_REJECT_FUNCTION_CLONE_VERSION): New define.
* doc/tm.texi: Regenerated.
* doc/tm.texi.in: Add documentation for new hook.
* hooks.h (hook_stringslice_locationt_false): New function.
* hooks.cc (hook_stringslice_locationt_false): New function.

gcc/c-family/ChangeLog:

* c-attribs.cc (handle_target_clones_attribute): Update to emit warnings
for rejected versions.
---
 gcc/c-family/c-attribs.cc | 26 +-
 gcc/config/aarch64/aarch64.cc | 20 
 gcc/config/riscv/riscv.cc | 18 ++
 gcc/doc/tm.texi   |  5 +
 gcc/doc/tm.texi.in|  2 ++
 gcc/hooks.cc  |  6 ++
 gcc/hooks.h   |  3 +++
 gcc/target.def|  8 
 gcc/tree.cc   | 12 ++--
 gcc/tree.h|  8 ++--
 10 files changed, 99 insertions(+), 9 deletions(-)

diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc
index 5dff489fcca..b5287f0da06 100644
--- a/gcc/c-family/c-attribs.cc
+++ b/gcc/c-family/c-attribs.cc
@@ -6132,12 +6132,28 @@ handle_target_clones_attribute (tree *node, tree name, 
tree ARG_UNUSED (args),
}
}
 
-  auto_vec versions= get_clone_attr_versions (args, NULL);
-
-  if (versions.length () == 1)
-   {
+  int num_defaults = 0;
+  auto_vec versions= get_clone_attr_versions (args,
+ &num_defaults,
+ DECL_SOURCE_LOCATION (*node),
+ false);
+
+  for (auto v : versions)
+   if (targetm.reject_function_clone_version
+ (v, DECL_SOURCE_LOCATION (*node)))
  warning (OPT_Wattributes,
-  "single % attribute is ignored");
+  "invalid % version %qB ignored",
+  &v);
+
+  /* Lone target_clones version is always ignored for target attr 
semantics.
+Only ignore under target_version semantics if it is a default
+version.  */
+  if (versions.length () == 1 && (TARGET_HAS_FMV_TARGET_ATTRIBUTE
+ || num_defaults == 1))
+   {
+ if (TARGET_HAS_FMV_TARGET_ATTRIBUTE)
+   warning (OPT_Wattributes,
+"single % attribute is ignored");
  *no_add_attrs = true;
}
   else
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 99e351fb65b..43ac50c7734 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -31229,6 +31229,23 @@ aarch64_expand_reversed_crc_using_pmull (scalar_mode 
crc_mode,
 }
 }
 
+bool
+aarch64_reject_target_clone_version (string_slice str,
+location_t loc ATTRIBUTE_UNUSED)
+{
+  str = str.strip ();
+
+  if (str == "default")
+return false;
+
+  enum aarch_parse_opt_result parse_res;
+  auto isa_flags = aarch64_asm_isa_flags;
+  parse_res = aarch64_parse_fmv_features (str, &isa_flags, NULL, NULL);
+
+  /* Reject any version which does not parse.  */
+  return parse_res != AARCH_PARSE_OK;
+}
+
 /* Target-specific selftests.  */
 
 #if CHECKING_P
@@ -32052,6 +32069,9 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_OPTION_FUNCTION_VERSIONS
 #define TARGET_OPTION_FUNCTION_VERSIONS aarch64_common_function_versions
 
+#undef TARGET_REJECT_FUNCTION_CLONE_VERSION
+#define TARGET_REJECT_FUNCTION_CLONE_VERSION 
aarch64_reject_target_clone_version
+
 #undef TARGET_COMPARE_VERSION_PRIORITY
 #def

Re: [PATCH] [RFC] RISC-V: Add extra check to help choosing multilib with equivalent arch.

2025-05-28 Thread Kito Cheng

I thought this issue should be fixed when we implement those
implication rules correctly? Does march=rv32imaf_zca/mabi=ilp32 still
not able select march=rv32imac/mabi=ilp32 still happen after this[1]
patch?

[1] 
https://github.com/gcc-mirror/gcc/commit/42ce61eaefc4db70e2e7ea2d8ef091daa458eb48

On Wed, May 28, 2025 at 4:04 PM  wrote:
>
> From: Yunze Zhu 
>
> Currently when choosing multilib set for target like 
> march=rv32imaf_zca/mabi=ilp32,
> gnu toolchain reports "Cannot find suitable multilib set".
> This is because in current dependent extension zca implies c when has 
> combinations of extensions: Zca, F_Zca_Zcf or FD_Zca_Zcf_Zcd,
> and f_zca is not one of these combinations and therefore extension c can not 
> be implied,
> and multilib set march=rv32imac/mabi=ilp32 cannot be selected.
> The most accurate method to fix this problem is changing multilib in 
> MULTILIB_REQUIRED: march=rv32imac/mabi=ilp32
> to an equivalent one: march=rv32ima_zca/mabi=ilp32.
> However, this method may cause compatibility issues with multilib path in 
> previos toolchain.
> There is an alternative method that add an extra check in multilib selection 
> functions,
> which checks whether c extension in multilibs is subset of zc* extensions in 
> arch string.
> By this method not only totally matched multilib sets but equivalent multilib 
> subsets could be selected.
>
> gcc/ChangeLog:
>
> * common/config/riscv/riscv-common.cc 
> (riscv_subset_list::match_score_inc_p): New Function.
> * config/riscv/riscv-subset.h: New Function.
> ---
>  gcc/common/config/riscv/riscv-common.cc | 27 +
>  gcc/config/riscv/riscv-subset.h |  2 ++
>  2 files changed, 29 insertions(+)
>
> diff --git a/gcc/common/config/riscv/riscv-common.cc 
> b/gcc/common/config/riscv/riscv-common.cc
> index a6d8763f032..f43899bb413 100644
> --- a/gcc/common/config/riscv/riscv-common.cc
> +++ b/gcc/common/config/riscv/riscv-common.cc
> @@ -412,12 +412,39 @@ riscv_subset_list::match_score (riscv_subset_list 
> *list) const
>for (s = list->m_head; s != NULL; s = s->next)
>  if (this->lookup (s->name.c_str ()) != NULL)
>score++;
> +else if (this->match_score_inc_p (s->name.c_str (), list))
> +  score++;
>  else
>return 0;
>
>return score;
>  }
>
> +/* Check if given extension is equivalent to one or group of extensions
> +in given subset list.  */
> +bool
> +riscv_subset_list::match_score_inc_p (std::string name,
> +riscv_subset_list *multilib) const
> +{
> +  if (name.compare ("c") != 0 || this->lookup ("zca") == NULL)
> +return false;
> +
> +  /* Check equivalent requirment when having d extension in multilib.  */
> +  if (multilib->lookup ("d") != NULL)
> +{
> +  if (multilib->xlen () == 32)
> +   return this->lookup ("zcf") != NULL && this->lookup ("zcd") != NULL;
> +  else
> +   return this->lookup ("zcd") != NULL;
> +}
> +
> +  /* Check equivalent requirment when having f extension in multilib.  */
> +  if (multilib->lookup ("f") != NULL && multilib->xlen () == 32)
> +return this->lookup ("zcf") != NULL;
> +
> +  return true;
> +}
> +
>  /* Get the rank for single-letter subsets, lower value meaning higher
> priority.  */
>
> diff --git a/gcc/config/riscv/riscv-subset.h b/gcc/config/riscv/riscv-subset.h
> index c5d9fab4de9..f80210cd755 100644
> --- a/gcc/config/riscv/riscv-subset.h
> +++ b/gcc/config/riscv/riscv-subset.h
> @@ -114,6 +114,8 @@ public:
>
>int match_score (riscv_subset_list *) const;
>
> +  bool match_score_inc_p (std::string, riscv_subset_list *) const;
> +
>void set_loc (location_t);
>
>void set_allow_adding_dup (bool v) { m_allow_adding_dup = v; }
> --
> 2.47.1
>

[PATCH v4 16/20] Refactor FMV frontend conflict and merging logic and hooks.

2025-05-28 Thread Alfie Richards

This change refactors FMV handling in the frontend to allows greater
reasoning about versions in shared code.

This is needed for allowing target_clones and target_versions to be used
together in a function set, as there is then two distinct concerns when
encountering two declarations that previously were conflated:

1. Are these two declarations completely distinct FMV declarations
(ie. the sets of versions they define have no overlap). If so, they don't
conflict so there is no need to merge and both can be pushed.
2. For two declarations that aren't completely distinct, are they matching
and therefore mergeable. (ie. two target_clone decls that define the same set
of versions, or an un-annotated declaration, and a target_clones definition
containing the default version). If so, continue to the existing merging logic
to try to merge these and diagnose if it's not possible.
If not, then diagnose the confliciting declarations.

To do this the common_function_versions function has been renamed
distinct_function_versions (meaning, are the version sets defined by these
two decl's completely distinct from eachother).

The common function version hook was modified to instead take two
string_slice's (each representing a single version) and determine if they
define the same version.

A new function, called mergeable_version_decls is added, which checks
if two decls (with overlapping version sets) can be merged (only in terms of
the attributes, the existing logic is used to detect other mergability conflicts
like redefinition).

This change also records the conflicting version string so that it can be
included in diagnostics.

This only effects targets with TARGET_HAS_FMV_TARGET_ATTRIBUTE set to false.
(ie. aarch64 and riscv), the existing logic for i86 and ppc is unchanged.
This also means the common version hook is only used for aarch64 and riscv.

gcc/ChangeLog:

* attribs.cc (common_function_versions): Change to an error, existing
logic moved to distinct_version_decls.
* attribs.h (common_function_versions): Change arguments.
* config/aarch64/aarch64.cc (aarch64_common_function_versions):
New function.
* config/riscv/riscv.cc (riscv_common_function_versions): New function.
* doc/tm.texi: Regenerated.
* target.def: Change common_function_versions hook.
* tree.cc (distinct_version_decls): New function.
(mergeable_version_decls): Ditto.
* tree.h (distinct_version_decls): New function.
(mergeable_version_decls): Ditto.
* hooks.h (hook_stringslice_stringslice_unreachable): New function.
* hooks.cc (hook_stringslice_stringslice_unreachable): New function.

gcc/cp/ChangeLog:

* class.cc (resolve_address_of_overloaded_function): Updated to use
distinct_version_decls instead of common_function_version hook.
* cp-tree.h (decls_match): Updated to use
distinct_version_decls instead of common_function_version hook.
* decl.cc (decls_match): Refacture to use distinct_version_decls and
to pass through conflicting_version argument.
(maybe_version_functions): Updated to use
distinct_version_decls instead of common_function_version hook.
(duplicate_decls): Add logic to handle conflicting unmergable decls
and improve diagnostics for conflicting versions.
* decl2.cc (check_classfn): Updated to use
distinct_version_decls instead of common_function_version hook.
---
 gcc/attribs.cc|  74 ++--
 gcc/attribs.h |   3 +-
 gcc/config/aarch64/aarch64.cc |  16 ++-
 gcc/config/riscv/riscv.cc |  30 ++---
 gcc/cp/class.cc   |   4 +-
 gcc/cp/cp-tree.h  |   2 +-
 gcc/cp/decl.cc|  43 +--
 gcc/cp/decl2.cc   |   2 +-
 gcc/doc/tm.texi   |  11 +-
 gcc/hooks.cc  |   7 ++
 gcc/hooks.h   |   1 +
 gcc/target.def|  13 +--
 gcc/tree.cc   | 204 ++
 gcc/tree.h|   6 +
 14 files changed, 305 insertions(+), 111 deletions(-)

diff --git a/gcc/attribs.cc b/gcc/attribs.cc
index c75fd1371fd..06785eaa136 100644
--- a/gcc/attribs.cc
+++ b/gcc/attribs.cc
@@ -1086,7 +1086,14 @@ make_attribute (string_slice name, string_slice 
arg_name, tree chain)
   return attr;
 }
 
-/* Common functions used for target clone support.  */
+/* Used for targets with target_version semantics.  */
+
+bool
+common_function_versions (string_slice fn1 ATTRIBUTE_UNUSED,
+ string_slice fn2 ATTRIBUTE_UNUSED)
+{
+  gcc_unreachable ();
+}
 
 /* Comparator function to be used in qsort routine to sort attribute
specification strings to "target".  */
@@ -1177,71 +1184,6 @@ sorted_attr_string (tree arglist)
   return ret_str;
 }
 
-
-/* This function returns true if FN1 and FN2 are versions of the same function,
-   that is, the target strings of

[PATCH v4 09/20] Add assembler_name to cgraph_function_version_info.

2025-05-28 Thread Alfie Richards

Add the assembler_name member to cgraph_function_version_info to store
the base assembler name of the funciton set, before FMV mangling. This is
used in later patches for refactoring FMV mangling.

gcc/ChangeLog:

* cgraph.cc (cgraph_node::insert_new_function_version): Record
assembler_name.
* cgraph.h (struct cgraph_function_version_info): Add assembler_name.
---
 gcc/cgraph.cc | 1 +
 gcc/cgraph.h  | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc
index feaeebec40b..23f7748e49e 100644
--- a/gcc/cgraph.cc
+++ b/gcc/cgraph.cc
@@ -187,6 +187,7 @@ cgraph_node::insert_new_function_version (void)
   version_info_node = NULL;
   version_info_node = ggc_cleared_alloc ();
   version_info_node->this_node = this;
+  version_info_node->assembler_name = DECL_ASSEMBLER_NAME (this->decl);
 
   if (cgraph_fnver_htab == NULL)
 cgraph_fnver_htab = hash_table::create_ggc (2);
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 6759505bf33..4a4fb7302b1 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -856,6 +856,9 @@ struct GTY((for_user)) cgraph_function_version_info {
  dispatcher. The dispatcher decl is an alias to the resolver
  function decl.  */
   tree dispatcher_resolver;
+
+  /* The assmbly name of the function set before version mangling.  */
+  tree assembler_name;
 };
 
 #define DEFCIFCODE(code, type, string) CIF_ ## code,
-- 
2.34.1

Defuse 'RESULT_DECL' check in 'pass_nrv' for offloading compilation [PR119835] (was: Disable 'pass_nrv' for offloading compilation [PR119835])

2025-05-28 Thread Thomas Schwinge

Hi!

On 2025-05-28T09:18:29+0200, Richard Biener  wrote:
> On Tue, 27 May 2025, Thomas Schwinge wrote:
>> "'GIMPLE_RETURN' vs. 'RESULT_DECL' if 'aggregate_value_p'" isn't actually
>> a GIMPLE semantics invariant, thanks.  I conclude that in case that this
>> "invariant" is violated, that's not a problem for RTL expansion of
>> 'GIMPLE_RETURN', which is then handled like all the other cases where
>> "we are not returning the current function's RESULT_DECL".
>> 
>> I'm not sure whether just disabling the 'assert' in
>> 'gcc/tree-nrv.cc:pass_nrv::execute' is conceptually right (or may
>> potentially drive that pass into an inconsistent state), and as we of
>> course intend to eventually fix this issue properly (thanks for your
>> ideas in PR119835!), so for now, I propose to simply
>> "Disable 'pass_nrv' for offloading compilation [PR119835]", see attached.
>> Any comments before I push that?
>
> I'm not sure you can disable this pass - it runs even at -O0

No, runs only for 'optimize > 0'.

(I guess you were looking at 'pass_return_slot', living in the same
file.)

> so parts
> of it might be required for correctness, since some types cannot be
> copied.  Maybe RTL expansion will apply NRV if that's the case,
> irrespective of whether the flag is set, but maybe not.
>
> I think a more appropriate solution would be to simply change
> the assert as follows

> --- a/gcc/tree-nrv.cc
> +++ b/gcc/tree-nrv.cc
> @@ -171,12 +171,12 @@ pass_nrv::execute (function *fun)
>  
>   if (greturn *return_stmt = dyn_cast  (stmt))
> {
> - /* In a function with an aggregate return value, the
> -gimplifier has changed all non-empty RETURN_EXPRs to
> -return the RESULT_DECL.  */
> + /* In a function with an aggregate return value, if
> +there is a return that does not return RESULT_DECL
> +we cannot perform NRV optimizations.  */
>   ret_val = gimple_return_retval (return_stmt);
> - if (ret_val)
> -   gcc_assert (ret_val == result);
> + if (ret_val && ret_val != result)
> +   return 0;
> }
>   else if (gimple_has_lhs (stmt)

Ah, right, in this scanning stage, no code transformations have been done
yet, so we may still 'return 0;' (..., which then effectively also
disables the pass).

But, really also lose the check for non-offloading configurations, or do
this defensive variant only '#ifdef ACCEL_COMPILER', as in the attached
"Defuse 'RESULT_DECL' check in 'pass_nrv' for offloading compilation 
[PR119835]"?


Grüße
 Thomas


>From 6f391a97d49072f3ff32ea397e3d70ad9103c196 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Tue, 27 May 2025 16:02:05 +0200
Subject: [PATCH] Defuse 'RESULT_DECL' check in 'pass_nrv' for offloading
 compilation [PR119835]

... to avoid running into ICEs per PR119835, until that's resolved properly.

	PR middle-end/119835
	gcc/
	* tree-nrv.cc (pass_nrv::gate) [ACCEL_COMPILER]: 'return false;'.
	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c:
	'#pragma GCC optimize "-fno-inline"'.
	* testsuite/libgomp.c-c++-common/target-abi-struct-1.c: New.
	* testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c: Adjust.

Co-authored-by: Richard Biener 
---
 gcc/tree-nrv.cc| 10 +-
 .../libgomp.c-c++-common/target-abi-struct-1-O0.c  |  2 +-
 .../libgomp.c-c++-common/target-abi-struct-1.c |  1 +
 .../testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c |  6 +-
 4 files changed, 16 insertions(+), 3 deletions(-)
 create mode 100644 libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c

diff --git a/gcc/tree-nrv.cc b/gcc/tree-nrv.cc
index 180ce39de4c..171d4c19f3e 100644
--- a/gcc/tree-nrv.cc
+++ b/gcc/tree-nrv.cc
@@ -176,7 +176,15 @@ pass_nrv::execute (function *fun)
 		 return the RESULT_DECL.  */
 	  ret_val = gimple_return_retval (return_stmt);
 	  if (ret_val)
-		gcc_assert (ret_val == result);
+		{
+#ifdef ACCEL_COMPILER
+		  /* PR119835 */
+		  if (ret_val != result)
+		return 0;
+#else
+		  gcc_assert (ret_val == result);
+#endif
+		}
 	}
 	  else if (gimple_has_lhs (stmt)
 		   && gimple_get_lhs (stmt) == result)
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c
index 35ec75d648d..9bf949a1f06 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c
@@ -1,3 +1,3 @@
 /* { dg-additional-options -O0 } */
 
-#include "../libgomp.oacc-c-c++-common/abi-struct-1.c"
+#include "target-abi-struct-1.c"
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c
new file mode 100644
index 000..d9268af55cf
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct

[PATCH v4 05/20] Update is_function_default_version to work with target_version.

2025-05-28 Thread Alfie Richards

Notably this respects target_version semantics where an unannotated
function can be the default version.

gcc/ChangeLog:

* attribs.cc (is_function_default_version): Add target_version logic.

Approved by Richard Sandiford.
---
 gcc/attribs.cc | 27 ---
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/gcc/attribs.cc b/gcc/attribs.cc
index 56dd18c2fa8..f6667839c01 100644
--- a/gcc/attribs.cc
+++ b/gcc/attribs.cc
@@ -1279,18 +1279,31 @@ make_dispatcher_decl (const tree decl)
   return func_decl;
 }
 
-/* Returns true if DECL is multi-versioned using the target attribute, and this
-   is the default version.  This function can only be used for targets that do
-   not support the "target_version" attribute.  */
+/* Returns true if DECL a multiversioned default.
+   With the target attribute semantics, returns true if the function is marked
+   as default with the target version.
+   With the target_version attribute semantics, returns true if the function
+   is either not annotated, or annotated as default.  */
 
 bool
 is_function_default_version (const tree decl)
 {
-  if (TREE_CODE (decl) != FUNCTION_DECL
-  || !DECL_FUNCTION_VERSIONED (decl))
+  tree attr;
+  if (TREE_CODE (decl) != FUNCTION_DECL)
 return false;
-  tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
-  gcc_assert (attr);
+  if (TARGET_HAS_FMV_TARGET_ATTRIBUTE)
+{
+  if (!DECL_FUNCTION_VERSIONED (decl))
+   return false;
+  attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
+  gcc_assert (attr);
+}
+  else
+{
+  attr = lookup_attribute ("target_version", DECL_ATTRIBUTES (decl));
+  if (!attr)
+   return true;
+}
   attr = TREE_VALUE (TREE_VALUE (attr));
   return (TREE_CODE (attr) == STRING_CST
  && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
-- 
2.34.1

[PATCH v4 11/20] Add clone_identifier function.

2025-05-28 Thread Alfie Richards

This is similar to clone_function_name and its siblings but takes an
identifier tree node rather than a function declaration.

This is to be used in conjunction with the identifier node stored in
cgraph_function_version_info::assembler_name to mangle FMV functions in
later patches.

gcc/ChangeLog:

* cgraph.h (clone_identifier): New function.
* cgraphclones.cc (clone_identifier): New function.
clone_function_name: Refactored to use clone_identifier.
---
 gcc/cgraph.h|  1 +
 gcc/cgraphclones.cc | 16 ++--
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 55812cc09a2..d6d8e066da6 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -2630,6 +2630,7 @@ tree clone_function_name (const char *name, const char 
*suffix,
 tree clone_function_name (tree decl, const char *suffix,
  unsigned long number);
 tree clone_function_name (tree decl, const char *suffix);
+tree clone_identifier (tree decl, const char *suffix);
 
 void tree_function_versioning (tree, tree, vec *,
   ipa_param_adjustments *,
diff --git a/gcc/cgraphclones.cc b/gcc/cgraphclones.cc
index 5332a433317..6b650849a63 100644
--- a/gcc/cgraphclones.cc
+++ b/gcc/cgraphclones.cc
@@ -557,6 +557,14 @@ clone_function_name (tree decl, const char *suffix)
   /* For consistency this needs to behave the same way as
  ASM_FORMAT_PRIVATE_NAME does, but without the final number
  suffix.  */
+  return clone_identifier (identifier, suffix);
+}
+
+/* Return a new clone of ID ending with the string SUFFIX.  */
+
+tree
+clone_identifier (tree id, const char *suffix)
+{
   char *separator = XALLOCAVEC (char, 2);
   separator[0] = symbol_table::symbol_suffix_separator ();
   separator[1] = 0;
@@ -565,15 +573,11 @@ clone_function_name (tree decl, const char *suffix)
 #else
   const char *prefix = "";
 #endif
-  char *result = ACONCAT ((prefix,
-  IDENTIFIER_POINTER (identifier),
-  separator,
-  suffix,
-  (char*)0));
+  char *result = ACONCAT (
+(prefix, IDENTIFIER_POINTER (id), separator, suffix, (char *) 0));
   return get_identifier (result);
 }
 
-
 /* Create callgraph node clone with new declaration.  The actual body will be
copied later at compilation stage.  The name of the new clone will be
constructed from the name of the original node, SUFFIX and NUM_SUFFIX.
-- 
2.34.1

[PATCH v4 20/20] Remove FMV beta warning.

2025-05-28 Thread Alfie Richards

This patch removes the warning for target_version and target_clones
in aarch64 as it is now spec compliant.

gcc/ChangeLog:

* config/aarch64/aarch64.cc (aarch64_process_target_version_attr):
Remove warning.
* config/aarch64/aarch64.opt: Mark -Wno-experimental-fmv-target
deprecated.
* doc/invoke.texi: Ditto.

gcc/testsuite/ChangeLog:

* g++.target/aarch64/mv-1.C: Remove option.
* g++.target/aarch64/mv-and-mvc-error1.C: Ditto.
* g++.target/aarch64/mv-and-mvc-error2.C: Ditto.
* g++.target/aarch64/mv-and-mvc-error3.C: Ditto.
* g++.target/aarch64/mv-and-mvc1.C: Ditto.
* g++.target/aarch64/mv-and-mvc2.C: Ditto.
* g++.target/aarch64/mv-and-mvc3.C: Ditto.
* g++.target/aarch64/mv-and-mvc4.C: Ditto.
* g++.target/aarch64/mv-error1.C: Ditto.
* g++.target/aarch64/mv-error2.C: Ditto.
* g++.target/aarch64/mv-error3.C: Ditto.
* g++.target/aarch64/mv-error4.C: Ditto.
* g++.target/aarch64/mv-error5.C: Ditto.
* g++.target/aarch64/mv-error6.C: Ditto.
* g++.target/aarch64/mv-error7.C: Ditto.
* g++.target/aarch64/mv-error8.C: Ditto.
* g++.target/aarch64/mv-pragma.C: Ditto.
* g++.target/aarch64/mv-symbols1.C: Ditto.
* g++.target/aarch64/mv-symbols10.C: Ditto.
* g++.target/aarch64/mv-symbols11.C: Ditto.
* g++.target/aarch64/mv-symbols12.C: Ditto.
* g++.target/aarch64/mv-symbols13.C: Ditto.
* g++.target/aarch64/mv-symbols2.C: Ditto.
* g++.target/aarch64/mv-symbols3.C: Ditto.
* g++.target/aarch64/mv-symbols4.C: Ditto.
* g++.target/aarch64/mv-symbols5.C: Ditto.
* g++.target/aarch64/mv-symbols6.C: Ditto.
* g++.target/aarch64/mv-symbols7.C: Ditto.
* g++.target/aarch64/mv-symbols8.C: Ditto.
* g++.target/aarch64/mv-symbols9.C: Ditto.
* g++.target/aarch64/mvc-error1.C: Ditto.
* g++.target/aarch64/mvc-error2.C: Ditto.
* g++.target/aarch64/mvc-symbols1.C: Ditto.
* g++.target/aarch64/mvc-symbols2.C: Ditto.
* g++.target/aarch64/mvc-symbols3.C: Ditto.
* g++.target/aarch64/mvc-symbols4.C: Ditto.
* g++.target/aarch64/mv-warning1.C: Removed.
* g++.target/aarch64/mvc-warning1.C: Removed.
---
 gcc/config/aarch64/aarch64.cc| 9 -
 gcc/config/aarch64/aarch64.opt   | 2 +-
 gcc/doc/invoke.texi  | 5 +
 gcc/testsuite/g++.target/aarch64/mv-1.C  | 1 -
 gcc/testsuite/g++.target/aarch64/mv-and-mvc-error1.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-and-mvc-error2.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-and-mvc-error3.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-and-mvc1.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-and-mvc2.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-and-mvc3.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-and-mvc4.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-error1.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-error2.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-error3.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-error4.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-error5.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-error6.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-error7.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-error8.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-pragma.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols1.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols10.C  | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols11.C  | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols12.C  | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols13.C  | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols2.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols3.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols4.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols5.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols6.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols7.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols8.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols9.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-warning1.C   | 9 -
 gcc/testsuite/g++.target/aarch64/mvc-error1.C| 1 -
 gcc/testsuite/g++.target/aarch64/mvc-error2.C| 1 -
 gcc/testsuite/g++.target/aarch64/mvc-symbols1.C  | 1 -
 gcc/testsuite/g++.target/aarch64/mvc-symbols2.C  | 1 -
 gcc/testsuite/g++.target/aarch64/mvc-symbols3.C  | 1 -
 gcc/testsuite/g++.target/aarch64/mvc-symbols4.C  | 1 -
 gcc/testsuite/g++.target/aarch64/mvc-warning1.C  | 1 -
 41 files changed, 2 insertions(+), 60 deletions(-)
 delete mode 100644 gcc/testsuite/g++.target/aarch64/mv-warning1.C

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aar

Do not erase static profile by 0 autofdo profile

2025-05-28 Thread Jan Hubicka

Hi,
This patch makes auto-fdo more careful about keeping info we have
from static profile prediction.

If all counters in function are 0, we can keep original auto-fdo profile.
Having all 0 profile is not very useful especially becuase 0 in autofdo is not
very informative and the code still may have been executed in the train run.
I added comment about adding GUESSED_GLOBAL0_AFDO which would still preserve
info that the function is not hot in the profile, but I would like to do this
incrementally.

If function has non-zero counters, we can still keep info about zero being
reliable from static prediction (i.e. after EH or with cold attribute).

Bootstrapped/regtested x86_64-linux, comitted.

gcc/ChangeLog:

* auto-profile.cc (update_count_by_afdo_count): New function.
(afdo_set_bb_count): Add debug output; only set count if it is
non-zero.
(afdo_find_equiv_class): Add debug output.
(afdo_calculate_branch_prob): Fix formating.
(afdo_annotate_cfg): Add debug output; do not erase static
profile if autofdo profile is all 0.

diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc
index 91d829908d2..3eefb970fde 100644
--- a/gcc/auto-profile.cc
+++ b/gcc/auto-profile.cc
@@ -1061,6 +1061,19 @@ set_bb_annotated (basic_block bb, bb_set *annotated)
   annotated->insert (bb);
 }
 
+/* Update profile_count by known autofdo count.  */
+void
+update_count_by_afdo_count (profile_count *count, gcov_type c)
+{
+  if (c)
+*count = profile_count::from_gcov_type (c).afdo ();
+  /* In case we have guessed profile which is already zero, preserve
+ quality info.  */
+  else if (count->nonzero_p ()
+  || count->quality () == GUESSED)
+*count = profile_count::zero ().afdo ();
+}
+
 /* For a given BB, set its execution count. Attach value profile if a stmt
is not in PROMOTED, because we only want to promote an indirect call once.
Return TRUE if BB is annotated.  */
@@ -1071,6 +1084,8 @@ afdo_set_bb_count (basic_block bb, const stmt_set 
&promoted)
   gimple_stmt_iterator gsi;
   gcov_type max_count = 0;
   bool has_annotated = false;
+  if (dump_file)
+fprintf (dump_file, " Looking up AFDO count of bb %i\n", bb->index);
 
   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 {
@@ -1082,6 +1097,12 @@ afdo_set_bb_count (basic_block bb, const stmt_set 
&promoted)
 {
   if (info.count > max_count)
 max_count = info.count;
+ if (dump_file && info.count)
+   {
+ fprintf (dump_file, "  count %" PRIu64 " in stmt: ",
+  (int64_t)info.count);
+ print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+   }
   has_annotated = true;
   if (info.targets.size () > 0
   && promoted.find (stmt) == promoted.end ())
@@ -1112,6 +1133,13 @@ afdo_set_bb_count (basic_block bb, const stmt_set 
&promoted)
{
  if (info.count > max_count)
max_count = info.count;
+ if (dump_file && info.count)
+   {
+ fprintf (dump_file,
+  "  phi op in BB %i with count %" PRIu64": ",
+  bb_succ->index, (int64_t)info.count);
+ print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
+   }
  has_annotated = true;
}
}
@@ -1121,7 +1149,14 @@ afdo_set_bb_count (basic_block bb, const stmt_set 
&promoted)
return false;
 }
 
-  bb->count = profile_count::from_gcov_type (max_count).afdo ();
+  if (max_count)
+{
+  update_count_by_afdo_count (&bb->count, max_count);
+  if (dump_file)
+   fprintf (dump_file,
+" Annotated bb %i with count %" PRId64 "\n",
+bb->index, (int64_t)max_count);
+}
   return true;
 }
 
@@ -1154,6 +1189,14 @@ afdo_find_equiv_class (bb_set *annotated_bb)
  bb1->aux = bb;
  if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
{
+ if (dump_file)
+   {
+ fprintf (dump_file,
+  "  Copying count of bb %i to bb %i; count is:",
+  bb1->index,
+  bb->index);
+ bb1->count.dump (dump_file);
+   }
  bb->count = bb1->count;
  set_bb_annotated (bb, annotated_bb);
}
@@ -1166,6 +1209,14 @@ afdo_find_equiv_class (bb_set *annotated_bb)
  bb1->aux = bb;
  if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
{
+ if (dump_file)
+   {
+ fprintf (dump_file,
+  "  Copying count of bb %i to bb %i; count is:",
+  bb1->index,
+  bb->index);
+ bb1->count.dump (dump_file);
+   }

[PATCH] RISC-V: Add 'bclr+binv' peephole2 optimization.

2025-05-28 Thread Jiawei

This patch adds a peephole2 optimization that combines a 'bclr' followed by
a 'binv' into a single 'bset' instruction when the Zbs extension is enabled.

The motivation for this patch is that PR116398 limits 2→2 RTL combinations,
which prevents certain simplifications in the combiner pass. As a result,
combining 'bclr' and 'binv' through standard RTL combination is not feasible
when Zbs is enabled. An example is the testcase
g++.target/riscv/redundant-bitmap-2.C[1] from Jeff Law's patch[2].

PR116398: 
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4d7a634f6d41029811cdcbd5f7282b5b07890094
[1] https://godbolt.org/z/dhYoTMY1v
[2] 
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=05daf617ea22e1d818295ed2d037456937e23530

gcc/ChangeLog:

* config/riscv/bitmanip.md (*bset_2): New pattern.
* config/riscv/peephole.md: Ditto.

Signed-off-by: Jiawei 
---
 gcc/config/riscv/bitmanip.md |  9 +
 gcc/config/riscv/peephole.md | 16 
 2 files changed, 25 insertions(+)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 21426f49679..1bd66c4aa19 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -615,6 +615,15 @@
   "bset\t%0,x0,%1"
   [(set_attr "type" "bitmanip")])
 
+(define_insn "*bset_2"
+  [(set (match_operand:X 0 "register_operand" "=r")
+   (ior:X (match_operand:X 1 "register_operand" "r")
+  (ashift:X (const_int 1)
+(match_operand:QI 2 "register_operand" "r"]
+  "TARGET_ZBS"
+  "bset\t%0,%1,%2"
+  [(set_attr "type" "bitmanip")])
+
 ;; The result will always have bits 32..63 clear, so the zero-extend
 ;; is redundant.  We could split it to bset_1, but it seems
 ;; unnecessary.
diff --git a/gcc/config/riscv/peephole.md b/gcc/config/riscv/peephole.md
index b5cc1924c76..1d5d15e9005 100644
--- a/gcc/config/riscv/peephole.md
+++ b/gcc/config/riscv/peephole.md
@@ -39,6 +39,22 @@
   operands[5] = GEN_INT (INTVAL (operands[2]) - INTVAL (operands[5]));
 })
 
+;; ZBS
+(define_peephole2
+  [(set (match_operand:X 1 "register_operand")
+   (and:X (rotate:X (const_int -2)
+(match_operand:QI 3 "register_operand"))
+  (match_operand:X 2 "register_operand")))
+   (set (match_operand:X 0 "register_operand")
+   (xor:X (ashift:X (const_int 1)
+(match_dup 3))
+  (match_dup 1)))]
+  "TARGET_ZBS"
+  [(set (match_dup 0)
+   (ior:X (match_dup 2)
+  (ashift:X (const_int 1)
+(match_dup 3])
+
 ;; ZCMP
 (define_peephole2
   [(set (match_operand:X 0 "a0a1_reg_operand")
-- 
2.43.0

Re: [PATCH] i386: Use Shuffles instead of shifts for Reduction in AMD znver4/5

2025-05-28 Thread Jan Hubicka

> gcc/ChangeLog:
> 
>   * config/i386/i386-expand.cc (emit_reduc_half): Use shuffles to
>   generate reduc half for V4SI, similar modes.
>   * config/i386/i386.h (TARGET_SSE_REDUCTION_PREFER_PSHUF): New Macro.
>   * config/i386/x86-tune.def (X86_TUNE_SSE_REDUCTION_PREFER_PSHUF):
>   New tuning.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/i386/reduc-pshuf.c: New test.
> ---
>  gcc/config/i386/i386-expand.cc  | 28 ++---
>  gcc/config/i386/i386.h  |  2 ++
>  gcc/config/i386/x86-tune.def|  5 
>  gcc/testsuite/gcc.target/i386/reduc-pshuf.c | 14 +++
>  4 files changed, 46 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/reduc-pshuf.c
> 
> diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> index 7fd03c88630..c7aec716a55 100644
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -18724,9 +18724,31 @@ emit_reduc_half (rtx dest, rtx src, int i)
>  case E_V8HFmode:
>  case E_V4SImode:
>  case E_V2DImode:
> -  d = gen_reg_rtx (V1TImode);
> -  tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
> - GEN_INT (i / 2));
> +  if (TARGET_SSE_REDUCTION_PREFER_PSHUF) {
> +if (i == 128) {
> +  d = gen_reg_rtx(V4SImode);
> +  tem = gen_sse2_pshufd_1(
> +  d, force_reg(V4SImode, gen_lowpart(V4SImode, src)), GEN_INT(2),
> +  GEN_INT(3), GEN_INT(2), GEN_INT(3));
> +} else if (i == 64) {
> +  d = gen_reg_rtx(V4SImode);
> +  tem = gen_sse2_pshufd_1(
> +  d, force_reg(V4SImode, gen_lowpart(V4SImode, src)), GEN_INT(1),
> +  GEN_INT(1), GEN_INT(1), GEN_INT(1));
> +} else if (i == 32) {
> +  d = gen_reg_rtx(V8HImode);
> +  tem = gen_sse2_pshuflw_1(
> +  d, force_reg(V8HImode, gen_lowpart(V8HImode, src)), GEN_INT(1),
> +  GEN_INT(1), GEN_INT(1), GEN_INT(1));
> +} else {
> +  d = gen_reg_rtx(V1TImode);
> +  tem =
> +  gen_sse2_lshrv1ti3(d, gen_lowpart(V1TImode, src), GEN_INT(i / 
> 2));
> +}
> +  } else {
> +d = gen_reg_rtx(V1TImode);
> +tem = gen_sse2_lshrv1ti3(d, gen_lowpart(V1TImode, src), GEN_INT(i / 
> 2));

Instead of duplicating gen_sse2_lshrv1ti3 it is probably cleaner to
simply break after each gen_sse_pshuw call and remove else.

OK with that change
Honza

Re: Defuse 'RESULT_DECL' check in 'pass_nrv' for offloading compilation [PR119835] (was: Disable 'pass_nrv' for offloading compilation [PR119835])

2025-05-28 Thread Richard Biener

On Wed, 28 May 2025, Thomas Schwinge wrote:

> Hi!
> 
> On 2025-05-28T09:18:29+0200, Richard Biener  wrote:
> > On Tue, 27 May 2025, Thomas Schwinge wrote:
> >> "'GIMPLE_RETURN' vs. 'RESULT_DECL' if 'aggregate_value_p'" isn't actually
> >> a GIMPLE semantics invariant, thanks.  I conclude that in case that this
> >> "invariant" is violated, that's not a problem for RTL expansion of
> >> 'GIMPLE_RETURN', which is then handled like all the other cases where
> >> "we are not returning the current function's RESULT_DECL".
> >> 
> >> I'm not sure whether just disabling the 'assert' in
> >> 'gcc/tree-nrv.cc:pass_nrv::execute' is conceptually right (or may
> >> potentially drive that pass into an inconsistent state), and as we of
> >> course intend to eventually fix this issue properly (thanks for your
> >> ideas in PR119835!), so for now, I propose to simply
> >> "Disable 'pass_nrv' for offloading compilation [PR119835]", see attached.
> >> Any comments before I push that?
> >
> > I'm not sure you can disable this pass - it runs even at -O0
> 
> No, runs only for 'optimize > 0'.
> 
> (I guess you were looking at 'pass_return_slot', living in the same
> file.)
> 
> > so parts
> > of it might be required for correctness, since some types cannot be
> > copied.  Maybe RTL expansion will apply NRV if that's the case,
> > irrespective of whether the flag is set, but maybe not.
> >
> > I think a more appropriate solution would be to simply change
> > the assert as follows
> 
> > --- a/gcc/tree-nrv.cc
> > +++ b/gcc/tree-nrv.cc
> > @@ -171,12 +171,12 @@ pass_nrv::execute (function *fun)
> >  
> >   if (greturn *return_stmt = dyn_cast  (stmt))
> > {
> > - /* In a function with an aggregate return value, the
> > -gimplifier has changed all non-empty RETURN_EXPRs to
> > -return the RESULT_DECL.  */
> > + /* In a function with an aggregate return value, if
> > +there is a return that does not return RESULT_DECL
> > +we cannot perform NRV optimizations.  */
> >   ret_val = gimple_return_retval (return_stmt);
> > - if (ret_val)
> > -   gcc_assert (ret_val == result);
> > + if (ret_val && ret_val != result)
> > +   return 0;
> > }
> >   else if (gimple_has_lhs (stmt)
> 
> Ah, right, in this scanning stage, no code transformations have been done
> yet, so we may still 'return 0;' (..., which then effectively also
> disables the pass).
> 
> But, really also lose the check for non-offloading configurations, or do
> this defensive variant only '#ifdef ACCEL_COMPILER', as in the attached
> "Defuse 'RESULT_DECL' check in 'pass_nrv' for offloading compilation 
> [PR119835]"?

Nah, I don't like #ifdef ACCEL_COMPILER sprinkled around.  You could
amend the comment to mention that while gimplification tries to ensure
the result is the RESULT_DECL this breaks for offloading.

Richard.
 
> 
> Grüße
>  Thomas
> 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Re: [PATCH v25 1/3] c: Add _Countof operator

2025-05-28 Thread Sam James

Alejandro Colomar  writes:

> [...]
> diff --git a/gcc/testsuite/gcc.dg/countof-vla.c 
> b/gcc/testsuite/gcc.dg/countof-vla.c
> new file mode 100644
> index ..cc225df20689
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/countof-vla.c
> @@ -0,0 +1,35 @@
> +/* { dg-do compile } */
> +/* { dg-options "-std=c2y -pedantic-errors -Wvla-parameter" } */
> +
> +void fix_fix (int i,
> +   char (*a)[3][5],
> +   int (*x)[_Countof (*a)],
> +   short (*)[_Generic(x, int (*)[3]: 1)]);
> +void fix_var (int i,
> +   char (*a)[3][i], /* dg-warn "variable" */

'dg-warn' is not a valid dg directive. It should be 'dg-warning', but it
needs to be surrounded by braces too, i.e.

{ dg-warning "variable" }

[PATCH] i386: Use Shuffles instead of shifts for Reduction in AMD znver4/5

2025-05-28 Thread Pranav Gorantla

In AMD znver4, znver5 targets vpshufd, vpsrldq have latencies 1,2 and
throughput 4 (2 for znver4),2 respectively. It is better to generate
shuffles instead of shifts wherever possible. In this patch we try to
generate appropriate shuffle instruction to copy higher half to lower
half instead of a simple right shift during horizontal vector reduction.

gcc/ChangeLog:

* config/i386/i386-expand.cc (emit_reduc_half): Use shuffles to
generate reduc half for V4SI, similar modes.
* config/i386/i386.h (TARGET_SSE_REDUCTION_PREFER_PSHUF): New Macro.
* config/i386/x86-tune.def (X86_TUNE_SSE_REDUCTION_PREFER_PSHUF):
New tuning.

gcc/testsuite/ChangeLog:

* gcc.target/i386/reduc-pshuf.c: New test.
---
 gcc/config/i386/i386-expand.cc  | 28 ++---
 gcc/config/i386/i386.h  |  2 ++
 gcc/config/i386/x86-tune.def|  5 
 gcc/testsuite/gcc.target/i386/reduc-pshuf.c | 14 +++
 4 files changed, 46 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/reduc-pshuf.c

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 7fd03c88630..c7aec716a55 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -18724,9 +18724,31 @@ emit_reduc_half (rtx dest, rtx src, int i)
 case E_V8HFmode:
 case E_V4SImode:
 case E_V2DImode:
-  d = gen_reg_rtx (V1TImode);
-  tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
-   GEN_INT (i / 2));
+  if (TARGET_SSE_REDUCTION_PREFER_PSHUF) {
+if (i == 128) {
+  d = gen_reg_rtx(V4SImode);
+  tem = gen_sse2_pshufd_1(
+  d, force_reg(V4SImode, gen_lowpart(V4SImode, src)), GEN_INT(2),
+  GEN_INT(3), GEN_INT(2), GEN_INT(3));
+} else if (i == 64) {
+  d = gen_reg_rtx(V4SImode);
+  tem = gen_sse2_pshufd_1(
+  d, force_reg(V4SImode, gen_lowpart(V4SImode, src)), GEN_INT(1),
+  GEN_INT(1), GEN_INT(1), GEN_INT(1));
+} else if (i == 32) {
+  d = gen_reg_rtx(V8HImode);
+  tem = gen_sse2_pshuflw_1(
+  d, force_reg(V8HImode, gen_lowpart(V8HImode, src)), GEN_INT(1),
+  GEN_INT(1), GEN_INT(1), GEN_INT(1));
+} else {
+  d = gen_reg_rtx(V1TImode);
+  tem =
+  gen_sse2_lshrv1ti3(d, gen_lowpart(V1TImode, src), GEN_INT(i / 
2));
+}
+  } else {
+d = gen_reg_rtx(V1TImode);
+tem = gen_sse2_lshrv1ti3(d, gen_lowpart(V1TImode, src), GEN_INT(i / 
2));
+  }
   break;
 case E_V8SFmode:
   if (i == 256)
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 5aa056ff553..ef1700da0e7 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -491,6 +491,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_SSE_MOVCC_USE_BLENDV]
 #define TARGET_ALIGN_TIGHT_LOOPS \
 ix86_tune_features[X86_TUNE_ALIGN_TIGHT_LOOPS]
+#define TARGET_SSE_REDUCTION_PREFER_PSHUF \
+   ix86_tune_features[X86_TUNE_SSE_REDUCTION_PREFER_PSHUF]
 
 
 /* Feature tests against the various architecture variations.  */
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index e6044c6032e..f7213de9c48 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -572,6 +572,11 @@ DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD,
 DEF_TUNE (X86_TUNE_SSE_MOVCC_USE_BLENDV,
  "sse_movcc_use_blendv", ~m_CORE_ATOM)
 
+/* X86_TUNE_V4SI_REDUCTION_PREFER_SHUFD: Prefer pshuf to reduce V16QI,
+   V8HI, V8HI, V4SI, V4FI, V2DI modes when lshr are costlier. */
+DEF_TUNE (X86_TUNE_SSE_REDUCTION_PREFER_PSHUF, 
+   "sse_reduction_prefer_pshuf", m_ZNVER4 | m_ZNVER5)
+
 /*/
 /* AVX instruction selection tuning (some of SSE flags affects AVX, too) */
 /*/
diff --git a/gcc/testsuite/gcc.target/i386/reduc-pshuf.c 
b/gcc/testsuite/gcc.target/i386/reduc-pshuf.c
new file mode 100644
index 000..26998afc14c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/reduc-pshuf.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=znver5 " } */
+
+#define N 32
+#define T short
+T foo(T *a) {
+T sum = 0;
+for ( int i = 0 ; i < N ; i++ )
+sum += a[i];
+return sum;
+}
+
+/* { dg-final { scan-assembler-times "vpsrl" 0 } } */
+/* { dg-final { scan-assembler-times "vpshuf" 3 } } */
-- 
2.34.1

[PATCH v4 10/20] Add dispatcher_resolver_function and is_target_clone flags to cgraph_node.

2025-05-28 Thread Alfie Richards

These are needed to correctly mangle FMV declarations.

gcc/ChangeLog:

* cgraph.h (struct cgraph_node): Add dispatcher_resolver_function and
is_target_clone.
---
 gcc/cgraph.h | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 4a4fb7302b1..55812cc09a2 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -907,7 +907,9 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public 
symtab_node
   used_as_abstract_origin (false),
   lowered (false), process (false), frequency (NODE_FREQUENCY_NORMAL),
   only_called_at_startup (false), only_called_at_exit (false),
-  tm_clone (false), dispatcher_function (false), calls_comdat_local 
(false),
+  tm_clone (false), dispatcher_function (false),
+  dispatcher_resolver_function (false), is_target_clone (false),
+  calls_comdat_local (false),
   icf_merged (false), nonfreeing_fn (false), merged_comdat (false),
   merged_extern_inline (false), parallelized_function (false),
   split_part (false), indirect_call_target (false), local (false),
@@ -1465,6 +1467,12 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : 
public symtab_node
   unsigned tm_clone : 1;
   /* True if this decl is a dispatcher for function versions.  */
   unsigned dispatcher_function : 1;
+  /* True if this decl is a resolver for function versions.  */
+  unsigned dispatcher_resolver_function : 1;
+  /* True this is part of a multiversioned set and this version comes from a
+ target_clone attribute.  Or if this is a dispatched symbol or resolver
+ and the default version comes from a target_clones.  */
+  unsigned is_target_clone : 1;
   /* True if this decl calls a COMDAT-local function.  This is set up in
  compute_fn_summary and inline_call.  */
   unsigned calls_comdat_local : 1;
-- 
2.34.1

[PATCH] libstdc++: Pass small trivial types by value in polymorphic wrappers

2025-05-28 Thread Tomasz Kamiński

This patch adjust the passing of parameters for the move_only_function,
copyable_function and function_ref. For types that are declared as being passed
by value in signature template argument, the are passed by value to the invoker,
when they are small (at most two pointers), trivially move constructible and
trivially destructible. The later guarantees that passing them by value has not
user visible side effects.

In particular, this extents the set of types forwarded by value, that was
previously limited to scalars, to also include specializations of std::span and
std::string_view, and similar standard and program defined-types.

Checking the suitability of the parameter types requires the types to be 
complete.
As consequence implementation imposes requirements on instantiation of
move_only_function and copyable_function. To avoid producing the errors from
the implementation details, and static_assertion was added to partial
specializations of copyable_function, move_only_function and function_ref.
The static assertion uses existing __is_complete_or_unbounded, as arrays type
parameters are automatically decayed in function type.

Standard already specifies in [res.on.functions] p2.5 that instantiating these
partial specialization with incomplete types leads to undefined behavior.

libstdc++-v3/ChangeLog:

* include/bits/funcwrap.h (__polyfunc::__pass_by_rref): Define.
(__polyfunc::__param_t): Update to use __pass_by_rref.
* include/bits/cpyfunc_impl.h:: Assert that are parameters type
are complete.
* include/bits/funcref_impl.h: Likewise.
* include/bits/mofunc_impl.h: Likewise.
* testsuite/20_util/copyable_function/call.cc: New test.
* testsuite/20_util/function_ref/call.cc: New test.
* testsuite/20_util/move_only_function/call.cc: New test.
* testsuite/20_util/copyable_function/conv.cc: New test.
* testsuite/20_util/function_ref/conv.cc: New test.
* testsuite/20_util/move_only_function/conv.cc: New test.
* testsuite/20_util/copyable_function/incomplete_neg.cc: New test.
* testsuite/20_util/function_ref/incomplete_neg.cc: New test.
* testsuite/20_util/move_only_function/incomplete_neg.cc: New test.
---
Tested on x86_54-linux. OK for trunk?

 libstdc++-v3/include/bits/cpyfunc_impl.h  |  4 +++
 libstdc++-v3/include/bits/funcref_impl.h  |  4 +++
 libstdc++-v3/include/bits/funcwrap.h  | 18 +-
 libstdc++-v3/include/bits/mofunc_impl.h   |  4 +++
 .../20_util/copyable_function/call.cc |  7 ++--
 .../20_util/copyable_function/conv.cc | 35 +++
 .../copyable_function/incomplete_neg.cc   | 18 ++
 .../testsuite/20_util/function_ref/call.cc| 10 +++---
 .../testsuite/20_util/function_ref/conv.cc| 34 ++
 .../20_util/function_ref/incomplete_neg.cc| 18 ++
 .../20_util/move_only_function/call.cc|  7 ++--
 .../20_util/move_only_function/conv.cc| 35 +++
 .../move_only_function/incomplete_neg.cc  | 18 ++
 13 files changed, 200 insertions(+), 12 deletions(-)
 create mode 100644 
libstdc++-v3/testsuite/20_util/copyable_function/incomplete_neg.cc
 create mode 100644 
libstdc++-v3/testsuite/20_util/function_ref/incomplete_neg.cc
 create mode 100644 
libstdc++-v3/testsuite/20_util/move_only_function/incomplete_neg.cc

diff --git a/libstdc++-v3/include/bits/cpyfunc_impl.h 
b/libstdc++-v3/include/bits/cpyfunc_impl.h
index bc44cd3e313..f1918ddf87a 100644
--- a/libstdc++-v3/include/bits/cpyfunc_impl.h
+++ b/libstdc++-v3/include/bits/cpyfunc_impl.h
@@ -64,6 +64,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_GLIBCXX_MOF_REF noexcept(_Noex)>
 : __polyfunc::_Cpy_base
 {
+  static_assert(
+   (std::__is_complete_or_unbounded(__type_identity<_ArgTypes>()) && ...),
+   "each parameter type must be a complete class");
+
   using _Base = __polyfunc::_Cpy_base;
   using _Invoker = __polyfunc::_Invoker<_Noex, _Res, _ArgTypes...>;
   using _Signature = _Invoker::_Signature;
diff --git a/libstdc++-v3/include/bits/funcref_impl.h 
b/libstdc++-v3/include/bits/funcref_impl.h
index 1e19866035f..44c992281be 100644
--- a/libstdc++-v3/include/bits/funcref_impl.h
+++ b/libstdc++-v3/include/bits/funcref_impl.h
@@ -68,6 +68,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 class function_ref<_Res(_ArgTypes...) _GLIBCXX_MOF_CV
   noexcept(_Noex)>
 {
+  static_assert(
+   (std::__is_complete_or_unbounded(__type_identity<_ArgTypes>()) && ...),
+   "each parameter type must be a complete class");
+
   using _Invoker = __polyfunc::_Invoker<_Noex, _Res, _ArgTypes...>;
   using _Signature = _Invoker::_Signature;
 
diff --git a/libstdc++-v3/include/bits/funcwrap.h 
b/libstdc++-v3/include/bits/funcwrap.h
index cf261bcd4c8..4fa9e1e84f2 100644
--- a/libstdc++-v3/include/bits/funcwrap.h
+++ b/libstdc++-v3/include

Re: [PATCH][GCC16][GCC15] aarch64: Add support for FUJITSU-MONAKA (-mcpu=fujitsu-monaka) CPU

2025-05-28 Thread Kyrylo Tkachov

Hi Yuta-san

> On 23 May 2025, at 07:49, Yuta Mukai (Fujitsu)  wrote:
> 
> Hello,
> 
> We would like to enable features for FUJITSU-MONAKA that were implemented in 
> GCC after we added support for FUJITSU-MONAKA.
> As the features were implemented in GCC15, we also want to backport it to 
> GCC15.
> 
> Thanks to Andre Vieira for notifying us.
> 
> Bootstrapped/regtested on aarch64-unknown-linux-gnu.
> 
> We would be grateful if someone could push this on our behalf, as we do not 
> have write access.

Thanks, this is ok and I’ve pushed it to trunk with an adjusted ChangeLog entry.
I’ll push a backport to the GCC 15 branch next week after some simple smoke 
testing.

Kyrill

   2025-05-23  Yuta Mukai  

   gcc/ChangeLog:

   * config/aarch64/aarch64-cores.def (fujitsu-monaka): Update ISA
   features.

> 
> Thanks,
> Yuta
> --
> Yuta Mukai
> Fujitsu Limited
> 
> <0001-aarch64-Enable-newly-implemented-features-for-FUJITS.patch>

RE: [PATCH] doc: Fix extend.texi menu

2025-05-28 Thread Jiang, Haochen

> From: Jakub Jelinek 
> Sent: Wednesday, May 28, 2025 3:41 PM
> 
> On Wed, May 28, 2025 at 10:44:20AM +0800, Haochen Jiang wrote:
> > gcc/ChangeLog:
> >
> > * doc/extend.texi (C Extensions): Add missing menu items.
> > ---
> >  gcc/doc/extend.texi | 1 +
> >  1 file changed, 1 insertion(+)
> >
> > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index
> > edd3a0d96c5..44d492a9d40 100644
> > --- a/gcc/doc/extend.texi
> > +++ b/gcc/doc/extend.texi
> > @@ -31,6 +31,7 @@ extensions, accepted by GCC in C90 mode and in C++.
> >  * Thread-Local::Per-thread variables.
> >  * OpenMP::  Multiprocessing extensions.
> >  * OpenACC:: Extensions for offloading code to accelerator 
> > devices.
> > +* _Countof::   The number of elements of arrays.
> >  * Inline::  Defining inline functions (as fast as macros).
> >  * Volatiles::   What constitutes an access to a volatile object.
> >  * Using Assembly Language with C:: Instructions and extensions for
> interfacing C with assembler.
> 
> All the surrounding entries are using spaces but the new line uses tabs.
> Please use spaces for consistency and align "The" right below "Extensions".
> 
> Ok for trunk with that change.
> 

Committed with that change.

Thx,
Haochen

Re: [PATCH v2 2/3] vect: Remove non-SLP paths in strided slp/elementwise.

2025-05-28 Thread Richard Biener

On Tue, May 27, 2025 at 6:02 PM Robin Dapp  wrote:
>
> This removes the non-SLP paths that were made unreachable in the
> previous patch.

This short series is OK.  Please squash the two commits before pushing.

Thanks,
Richard.

> gcc/ChangeLog:
>
> * tree-vect-stmts.cc (vectorizable_load): Remove non-SLP paths.
> ---
>  gcc/tree-vect-stmts.cc | 49 --
>  1 file changed, 18 insertions(+), 31 deletions(-)
>
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 07e309d02e5..3710694ac75 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -10689,8 +10689,7 @@ vectorizable_load (vec_info *vinfo,
>   first_dr_info = dr_info;
> }
>
> -  if (1 && grouped_load
> - && memory_access_type == VMAT_STRIDED_SLP)
> +  if (grouped_load && memory_access_type == VMAT_STRIDED_SLP)
> {
>   group_size = DR_GROUP_SIZE (first_stmt_info);
>   ref_type = get_group_alias_ptr_type (first_stmt_info);
> @@ -10830,22 +10829,20 @@ vectorizable_load (vec_info *vinfo,
>   ltype = build_aligned_type (ltype, align * BITS_PER_UNIT);
> }
>
> -  if (1)
> +  /* For SLP permutation support we need to load the whole group,
> +not only the number of vector stmts the permutation result
> +fits in.  */
> +  if (slp_perm)
> {
> - /* For SLP permutation support we need to load the whole group,
> -not only the number of vector stmts the permutation result
> -fits in.  */
> - if (slp_perm)
> -   {
> - /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
> -variable VF.  */
> - unsigned int const_vf = vf.to_constant ();
> - ncopies = CEIL (group_size * const_vf, const_nunits);
> - dr_chain.create (ncopies);
> -   }
> - else
> -   ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
> + /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
> +variable VF.  */
> + unsigned int const_vf = vf.to_constant ();
> + ncopies = CEIL (group_size * const_vf, const_nunits);
> + dr_chain.create (ncopies);
> }
> +  else
> +   ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
> +
>unsigned int group_el = 0;
>unsigned HOST_WIDE_INT
> elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
> @@ -10883,14 +10880,13 @@ vectorizable_load (vec_info *vinfo,
> CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_temp);
>
>   group_el += lnel;
> - if (0
> - || group_el == group_size)
> + if (group_el == group_size)
> {
>   n_groups++;
>   /* When doing SLP make sure to not load elements from
>  the next vector iteration, those will not be accessed
>  so just use the last element again.  See PR107451.  */
> - if (0 || known_lt (n_groups, vf))
> + if (known_lt (n_groups, vf))
> {
>   tree newoff = copy_ssa_name (running_off);
>   gimple *incr
> @@ -10938,19 +10934,10 @@ vectorizable_load (vec_info *vinfo,
>
>   if (!costing_p)
> {
> - if (1)
> -   {
> - if (slp_perm)
> -   dr_chain.quick_push (gimple_assign_lhs (new_stmt));
> - else
> -   slp_node->push_vec_def (new_stmt);
> -   }
> + if (slp_perm)
> +   dr_chain.quick_push (gimple_assign_lhs (new_stmt));
>   else
> -   {
> - if (j == 0)
> -   *vec_stmt = new_stmt;
> - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
> -   }
> +   slp_node->push_vec_def (new_stmt);
> }
> }
>if (slp_perm)
> --
> 2.49.0
>

[PATCH] [RFC] RISC-V: Add extra check to help choosing multilib with equivalent arch.

2025-05-28 Thread yunzezhu

From: Yunze Zhu 

Currently when choosing multilib set for target like 
march=rv32imaf_zca/mabi=ilp32,
gnu toolchain reports "Cannot find suitable multilib set".
This is because in current dependent extension zca implies c when has 
combinations of extensions: Zca, F_Zca_Zcf or FD_Zca_Zcf_Zcd,
and f_zca is not one of these combinations and therefore extension c can not be 
implied,
and multilib set march=rv32imac/mabi=ilp32 cannot be selected.
The most accurate method to fix this problem is changing multilib in 
MULTILIB_REQUIRED: march=rv32imac/mabi=ilp32
to an equivalent one: march=rv32ima_zca/mabi=ilp32.
However, this method may cause compatibility issues with multilib path in 
previos toolchain.
There is an alternative method that add an extra check in multilib selection 
functions,
which checks whether c extension in multilibs is subset of zc* extensions in 
arch string.
By this method not only totally matched multilib sets but equivalent multilib 
subsets could be selected.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc 
(riscv_subset_list::match_score_inc_p): New Function.
* config/riscv/riscv-subset.h: New Function.
---
 gcc/common/config/riscv/riscv-common.cc | 27 +
 gcc/config/riscv/riscv-subset.h |  2 ++
 2 files changed, 29 insertions(+)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index a6d8763f032..f43899bb413 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -412,12 +412,39 @@ riscv_subset_list::match_score (riscv_subset_list *list) 
const
   for (s = list->m_head; s != NULL; s = s->next)
 if (this->lookup (s->name.c_str ()) != NULL)
   score++;
+else if (this->match_score_inc_p (s->name.c_str (), list))
+  score++;
 else
   return 0;
 
   return score;
 }
 
+/* Check if given extension is equivalent to one or group of extensions
+in given subset list.  */
+bool
+riscv_subset_list::match_score_inc_p (std::string name,
+riscv_subset_list *multilib) const
+{
+  if (name.compare ("c") != 0 || this->lookup ("zca") == NULL)
+return false;
+
+  /* Check equivalent requirment when having d extension in multilib.  */
+  if (multilib->lookup ("d") != NULL)
+{
+  if (multilib->xlen () == 32)
+   return this->lookup ("zcf") != NULL && this->lookup ("zcd") != NULL;
+  else
+   return this->lookup ("zcd") != NULL;
+}
+
+  /* Check equivalent requirment when having f extension in multilib.  */
+  if (multilib->lookup ("f") != NULL && multilib->xlen () == 32)
+return this->lookup ("zcf") != NULL;
+
+  return true;
+}
+
 /* Get the rank for single-letter subsets, lower value meaning higher
priority.  */
 
diff --git a/gcc/config/riscv/riscv-subset.h b/gcc/config/riscv/riscv-subset.h
index c5d9fab4de9..f80210cd755 100644
--- a/gcc/config/riscv/riscv-subset.h
+++ b/gcc/config/riscv/riscv-subset.h
@@ -114,6 +114,8 @@ public:
 
   int match_score (riscv_subset_list *) const;
 
+  bool match_score_inc_p (std::string, riscv_subset_list *) const;
+
   void set_loc (location_t);
 
   void set_allow_adding_dup (bool v) { m_allow_adding_dup = v; }
-- 
2.47.1

[Repost PATCH v4 00/20] FMV refactor and ACLE compliance.

2025-05-28 Thread Alfie Richards

Hi all,

Reposting this as requested by Jeff Law.

-

Another update to this series.

This patch changes the version info structure to be sorted by
priority. This allows easier reasoning for optimisations and prevents having to
calculate the priority of functions repeatedly.

The other change is that the target_clones pass was split in two. This is
because the target_clones pass now dispatches the target_versions and
target_clones, and different versions may have arbitrarily idfferent bodies.
Therefore, allowing passes like efvp before dispatching made some invalid
optimisations.
However, as Alice Carlotti (alice.carlo...@arm.com) pointed out offline, the
target_clones pass was likely put in this position late as for target_clones
it is valid, as all the versions have the same body.
So I split it in two. In the early stage complicated cases where there are
multiple decls are expanded and dispatched. In the later stages, the simple
case of a lone target_clones decl is dispatched (as is always the case
for TARGET_HAS_FMV_TARGET_ATTRIBUTE targets).

Regression tested and bootstrapped for aarch64-none-linux-gnu
and x86_64-unknown-linux-gnu.

Cross compiled and checked FMV tests for riscv and powerpc.

Hoping for GCC16 stage 1 for this.

I have a Forgejo PR if reviewers want to try using that for review:
https://forge.sourceware.org/gcc/gcc-TEST/pulls/49

Kind regards,
Alfie

Change log
==

V4:
- Changed version_info structure to be sorted by priority
- Split the target_clones pass into early/late stages
- Split out fix for PR c++/119498

V3: https://gcc.gnu.org/pipermail/gcc-patches/2025-March/679488.html
- Added reject target_clones version logic and hook
- Added pretty print for string_slice
- Refactored merging and conflict logic in front end
- Improved diagnostics

V2: https://gcc.gnu.org/pipermail/gcc-patches/2025-February/675960.html
- Changed recording of assembly name to be done in version into initialisation
- Changed behaviour for a lone default decl

V1: 
https://gcc.gnu.org/pipermail/gcc-patches/2025-February/674973.htmlhttps://gcc.gnu.org/pipermail/gcc-patches/2025-February/674973.html
- Initial

Alfie Richards (18):
  Add string_slice class.
  Remove unnecessary `record` argument from maybe_version_functions.
  Update is_function_default_version to work with target_version (Approved).
  Refactor record_function_versions.
  Change make_attribute to take string_slice (Approved).
  Add get_clone_versions and get_target_version functions.
  Add assembler_name to cgraph_function_version_info.
  Add dispatcher_resolver_function and is_target_clone flags to
cgraph_node.
  Add clone_identifier function.
  Refactor FMV name mangling.
  Refactor riscv target parsing to take string_slice.
  Add reject_target_clone hook for filtering target_clone versions.
  Change target_version semantics to follow ACLE specification.
  Refactor FMV frontend conflict and merging logic and hooks.
  Support mixing of target_clones and target_version.
  Fix FMV return type ambiguation
  Add diagnostic tests for Aarch64 FMV.
  Remove FMV beta warning.

Alice Carlotti (2):
  Add PowerPC FMV symbol tests.
  Add x86 FMV symbol tests

 gcc/attribs.cc| 170 ---
 gcc/attribs.h |   5 +-
 gcc/c-family/c-attribs.cc |  33 +-
 gcc/c-family/c-format.cc  |   7 +
 gcc/c-family/c-format.h   |   1 +
 gcc/cgraph.cc |  80 ++--
 gcc/cgraph.h  |  29 +-
 gcc/cgraphclones.cc   |  16 +-
 gcc/cgraphunit.cc |   9 +
 gcc/config/aarch64/aarch64.cc | 273 +---
 gcc/config/aarch64/aarch64.opt|   2 +-
 gcc/config/i386/i386-features.cc  | 141 +++---
 gcc/config/riscv/riscv-protos.h   |   2 +
 gcc/config/riscv/riscv-target-attr.cc |  14 +-
 gcc/config/riscv/riscv.cc | 267 +--
 gcc/config/rs6000/rs6000.cc   | 150 +--
 gcc/cp/call.cc|  10 +
 gcc/cp/class.cc   |  19 +-
 gcc/cp/cp-gimplify.cc |  11 +-
 gcc/cp/cp-tree.h  |   4 +-
 gcc/cp/decl.cc|  90 +++-
 gcc/cp/decl2.cc   |   2 +-
 gcc/cp/typeck.cc  |  10 +
 gcc/doc/invoke.texi   |   5 +-
 gcc/doc/tm.texi   |  16 +-
 gcc/doc/tm.texi.in|   2 +
 gcc/hooks.cc  |  13 +
 gcc/hooks.h   |   4 +
 gcc/ipa.cc|  11 +
 gcc/multiple_target.cc| 421 ++
 gcc/passes.def|   3 +-
 gcc/pretty-print.cc

[PATCH v4 03/20] Add string_slice class.

2025-05-28 Thread Alfie Richards

The string_slice inherits from array_slice and is used to refer to a
substring of an array that is memory managed elsewhere without modifying
the underlying array.

For example, this is useful in cases such as when needing to refer to a
substring of an attribute in the syntax tree.

Adds some minimal helper functions for string_slice,
such as a strtok alternative, equality operators, strcmp, and a function
to strip whitespace from the beginning and end of a string_slice.

gcc/c-family/ChangeLog:

* c-format.cc (local_string_slice_node): New node type.
(asm_fprintf_char_table): New entry.
(init_dynamic_diag_info): Add support for string_slice.
* c-format.h (T_STRING_SLICE): New node type.

gcc/ChangeLog:

* pretty-print.cc (format_phase_2): Add support for string_slice.
* vec.cc (string_slice::tokenize): New method.
(strcmp): New implementation for string_slice.
(string_slice::strip): New method.
(test_string_slice_initializers): New test.
(test_string_slice_tokenize): Ditto.
(test_string_slice_strcmp): Ditto.
(test_string_slice_equality): Ditto.
(test_string_slice_inequality): Ditto.
(test_string_slice_invalid): Ditto.
(test_string_slice_strip): Ditto.
(vec_cc_tests): Add new tests.
* vec.h (class string_slice): New class.
(strcmp): New implementation for stirng_slice.
---
 gcc/c-family/c-format.cc |   7 ++
 gcc/c-family/c-format.h  |   1 +
 gcc/pretty-print.cc  |  10 ++
 gcc/vec.cc   | 207 +++
 gcc/vec.h|  45 +
 5 files changed, 270 insertions(+)

diff --git a/gcc/c-family/c-format.cc b/gcc/c-family/c-format.cc
index 211d20dd25b..dd650d9d520 100644
--- a/gcc/c-family/c-format.cc
+++ b/gcc/c-family/c-format.cc
@@ -70,6 +70,7 @@ static GTY(()) tree local_event_ptr_node;
 static GTY(()) tree local_pp_element_ptr_node;
 static GTY(()) tree local_gimple_ptr_node;
 static GTY(()) tree local_cgraph_node_ptr_node;
+static GTY(()) tree local_string_slice_node;
 static GTY(()) tree locus;
 
 static bool decode_format_attr (const_tree, tree, tree, function_format_info *,
@@ -770,6 +771,7 @@ static const format_char_info asm_fprintf_char_table[] =
   { "p",   1, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  
BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",  "c",  NULL }, \
   { "r",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  
BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "","//cR",   NULL 
}, \
   { "@",   1, STD_C89, { T_EVENT_PTR,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  
BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL }, \
+  { "B",   1, STD_C89, { T_STRING_SLICE,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  
BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q", "",   NULL }, \
   { "e",   1, STD_C89, { T_PP_ELEMENT_PTR,   BADLEN,  BADLEN,  BADLEN,  
BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"", NULL }, \
   { "<",   0, STD_C89, NOARGUMENTS, "",  "<",   NULL }, \
   { ">",   0, STD_C89, NOARGUMENTS, "",  ">",   NULL }, \
@@ -5211,6 +5213,11 @@ init_dynamic_diag_info (void)
   || local_cgraph_node_ptr_node == void_type_node)
 local_cgraph_node_ptr_node = get_named_type ("cgraph_node");
 
+  /* Similar to the above but for string_slice*.  */
+  if (!local_string_slice_node
+  || local_string_slice_node == void_type_node)
+local_string_slice_node = get_named_type ("string_slice");
+
   /* Similar to the above but for diagnostic_event_id_t*.  */
   if (!local_event_ptr_node
   || local_event_ptr_node == void_type_node)
diff --git a/gcc/c-family/c-format.h b/gcc/c-family/c-format.h
index 323338cb8e7..d44d3862d83 100644
--- a/gcc/c-family/c-format.h
+++ b/gcc/c-family/c-format.h
@@ -317,6 +317,7 @@ struct format_kind_info
 #define T89_G   { STD_C89, NULL, &local_gimple_ptr_node }
 #define T_CGRAPH_NODE   { STD_C89, NULL, &local_cgraph_node_ptr_node }
 #define T_EVENT_PTR{ STD_C89, NULL, &local_event_ptr_node }
+#define T_STRING_SLICE{ STD_C89, NULL, &local_string_slice_node }
 #define T_PP_ELEMENT_PTR{ STD_C89, NULL, &local_pp_element_ptr_node }
 #define T89_T   { STD_C89, NULL, &local_tree_type_node }
 #define T89_V  { STD_C89, NULL, T_V }
diff --git a/gcc/pretty-print.cc b/gcc/pretty-print.cc
index abd6c0b528f..aacd43420dd 100644
--- a/gcc/pretty-print.cc
+++ b/gcc/pretty-print.cc
@@ -2035,6 +2035,16 @@ format_phase_2 (pretty_printer *pp,
pp_string (pp, va_arg (*text.m_args_ptr, const char *));
  break;
 
+   case 'B':
+ {
+   string_slice s = *va_arg (*text.m_args_ptr, string_slice *);
+   if (quote)
+ pp_quoted_string (pp, s.begin (), s.size ());
+   else
+ pp_string_n (pp, s.begin (), s.size ());
+   break;
+ }
+
case 'p':
  pp_pointer (pp, va_arg (*text.m_args_ptr, void *));

[PATCH v4 08/20] Add get_clone_versions and get_target_version functions.

2025-05-28 Thread Alfie Richards

This is a reimplementation of get_target_clone_attr_len,
get_attr_str, and separate_attrs using string_slice and auto_vec to make
memory management and use simpler.

Adds get_target_version helper function to get the target_version string
from a decl.

gcc/c-family/ChangeLog:

* c-attribs.cc (handle_target_clones_attribute): Change to use
get_clone_versions.

gcc/ChangeLog:

* tree.cc (get_clone_versions): New function.
(get_clone_attr_versions): New function.
(get_version): New function.
* tree.h (get_clone_versions): New function.
(get_clone_attr_versions): New function.
(get_target_version): New function.
---
 gcc/c-family/c-attribs.cc |  4 ++-
 gcc/tree.cc   | 59 +++
 gcc/tree.h| 11 
 3 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc
index 5a0e3d328ba..5dff489fcca 100644
--- a/gcc/c-family/c-attribs.cc
+++ b/gcc/c-family/c-attribs.cc
@@ -6132,7 +6132,9 @@ handle_target_clones_attribute (tree *node, tree name, 
tree ARG_UNUSED (args),
}
}
 
-  if (get_target_clone_attr_len (args) == -1)
+  auto_vec versions= get_clone_attr_versions (args, NULL);
+
+  if (versions.length () == 1)
{
  warning (OPT_Wattributes,
   "single % attribute is ignored");
diff --git a/gcc/tree.cc b/gcc/tree.cc
index eccfcc89da4..fdcdfb336bc 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -15372,6 +15372,65 @@ get_target_clone_attr_len (tree arglist)
   return str_len_sum;
 }
 
+/* Returns an auto_vec of string_slices containing the version strings from
+   ARGLIST.  DEFAULT_COUNT is incremented for each default version found.  */
+
+auto_vec
+get_clone_attr_versions (const tree arglist, int *default_count)
+{
+  gcc_assert (TREE_CODE (arglist) == TREE_LIST);
+  auto_vec versions;
+
+  static const char separator_str[] = {TARGET_CLONES_ATTR_SEPARATOR, 0};
+  string_slice separators = string_slice (separator_str);
+
+  for (tree arg = arglist; arg; arg = TREE_CHAIN (arg))
+{
+  string_slice str = string_slice (TREE_STRING_POINTER (TREE_VALUE (arg)));
+  while (str.is_valid ())
+   {
+ string_slice attr = string_slice::tokenize (&str, separators);
+ attr = attr.strip ();
+
+ if (attr == "default" && default_count)
+   (*default_count)++;
+ versions.safe_push (attr);
+   }
+}
+  return versions;
+}
+
+/* Returns an auto_vec of string_slices containing the version strings from
+   the target_clone attribute from DECL.  DEFAULT_COUNT is incremented for each
+   default version found.  */
+auto_vec
+get_clone_versions (const tree decl, int *default_count)
+{
+  tree attr = lookup_attribute ("target_clones", DECL_ATTRIBUTES (decl));
+  if (!attr)
+return auto_vec ();
+  tree arglist = TREE_VALUE (attr);
+  return get_clone_attr_versions (arglist, default_count);
+}
+
+/* If DECL has a target_version attribute, returns a string_slice containing 
the
+   attribute value.  Otherwise, returns string_slice::invalid.
+   Only works for target_version due to target attributes allowing multiple
+   string arguments to specify one target.  */
+string_slice
+get_target_version (const tree decl)
+{
+  gcc_assert (!TARGET_HAS_FMV_TARGET_ATTRIBUTE);
+
+  tree attr = lookup_attribute ("target_version", DECL_ATTRIBUTES (decl));
+
+  if (!attr)
+return string_slice::invalid ();
+
+  return string_slice (TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr
+  .strip ();
+}
+
 void
 tree_cc_finalize (void)
 {
diff --git a/gcc/tree.h b/gcc/tree.h
index 99f26177628..a89f3cf7189 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #include "tree-core.h"
 #include "options.h"
+#include "vec.h"
 
 /* Convert a target-independent built-in function code to a combined_fn.  */
 
@@ -7052,4 +7053,14 @@ extern tree get_attr_nonstring_decl (tree, tree * = 
NULL);
 
 extern int get_target_clone_attr_len (tree);
 
+/* Returns the version string for a decl with target_version attribute.
+   Returns an invalid string_slice if no attribute is present.  */
+extern string_slice get_target_version (const tree);
+/* Returns a vector of the version strings from a target_clones attribute on
+   a decl.  Can also record the number of default versions found.  */
+extern auto_vec get_clone_versions (const tree, int * = NULL);
+/* Returns a vector of the version strings from a target_clones attribute
+   directly.  */
+extern auto_vec get_clone_attr_versions (const tree, int *);
+
 #endif  /* GCC_TREE_H  */
-- 
2.34.1

[PATCH v4 04/20] Remove unnecessary `record` argument from maybe_version_functions.

2025-05-28 Thread Alfie Richards

Previously, the `record` argument in maybe_version_function allowed the
call to cgraph_node::record_function_versions to be skipped.  However,
this was only skipped when both decls were already marked as versioned,
in which case we trigger the early exit in record_function_versions
instead. Therefore, the argument is unnecessary.

gcc/cp/ChangeLog:

* class.cc (add_method): Remove argument.
* cp-tree.h (maybe_version_functions): Ditto.
* decl.cc (decls_match): Ditto.
(maybe_version_functions): Ditto.
---
 gcc/cp/class.cc  |  2 +-
 gcc/cp/cp-tree.h |  2 +-
 gcc/cp/decl.cc   | 13 +
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/gcc/cp/class.cc b/gcc/cp/class.cc
index 2b694b98e56..93f1a1bdd81 100644
--- a/gcc/cp/class.cc
+++ b/gcc/cp/class.cc
@@ -1402,7 +1402,7 @@ add_method (tree type, tree method, bool via_using)
   /* If these are versions of the same function, process and
 move on.  */
   if (TREE_CODE (fn) == FUNCTION_DECL
- && maybe_version_functions (method, fn, true))
+ && maybe_version_functions (method, fn))
continue;
 
   if (DECL_INHERITED_CTOR (method))
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 55f986e25c1..898054c2891 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7122,7 +7122,7 @@ extern void determine_local_discriminator (tree, tree = 
NULL_TREE);
 extern bool member_like_constrained_friend_p   (tree);
 extern bool fns_correspond (tree, tree);
 extern int decls_match (tree, tree, bool = true);
-extern bool maybe_version_functions(tree, tree, bool);
+extern bool maybe_version_functions(tree, tree);
 extern bool validate_constexpr_redeclaration   (tree, tree);
 extern bool merge_default_template_args(tree, tree, bool);
 extern tree duplicate_decls(tree, tree,
diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index 4e97093b134..9cb56eac4a9 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -1216,9 +1216,7 @@ decls_match (tree newdecl, tree olddecl, bool 
record_versions /* = true */)
  && targetm.target_option.function_versions (newdecl, olddecl))
{
  if (record_versions)
-   maybe_version_functions (newdecl, olddecl,
-(!DECL_FUNCTION_VERSIONED (newdecl)
- || !DECL_FUNCTION_VERSIONED (olddecl)));
+   maybe_version_functions (newdecl, olddecl);
  return 0;
}
 }
@@ -1285,11 +1283,11 @@ maybe_mark_function_versioned (tree decl)
 }
 
 /* NEWDECL and OLDDECL have identical signatures.  If they are
-   different versions adjust them and return true.
-   If RECORD is set to true, record function versions.  */
+   different versions adjust them, record function versions, and return
+   true.  */
 
 bool
-maybe_version_functions (tree newdecl, tree olddecl, bool record)
+maybe_version_functions (tree newdecl, tree olddecl)
 {
   if (!targetm.target_option.function_versions (newdecl, olddecl))
 return false;
@@ -1312,8 +1310,7 @@ maybe_version_functions (tree newdecl, tree olddecl, bool 
record)
   maybe_mark_function_versioned (newdecl);
 }
 
-  if (record)
-cgraph_node::record_function_versions (olddecl, newdecl);
+  cgraph_node::record_function_versions (olddecl, newdecl);
 
   return true;
 }
-- 
2.34.1

Re: [PATCH 0/3] Redirect to specific target based on TARGET_VERSION_COMPATIBLE

2025-05-28 Thread Alfie Richards




On 28/05/2025 03:36, Jeff Law wrote:



On 5/27/25 2:36 AM, Alfie Richards wrote:

Hi Jeff,

On 22/05/2025 21:02, Jeff Law wrote:



On 5/22/25 9:05 AM, Alfie Richards wrote:

Hi Jeff,

I sent this patch with my implementation a while ago:
https://gcc.gnu.org/pipermail/gcc-patches/2025-April/681043.html

There hasn't been any feedback on that patch yet.

These patches are still useful and I would like to go ahead with 
them. I am in favour of using my implementation as it is a bit 
stronger, but it also requires my larger FMV series to be approved 
first.
Can you ping your larger FMV series?  I strongly suspect everyone is 
digging out from everything that queued up while the trunk was in 
bugfixing stages.



Hers the series: https://gcc.gnu.org/pipermail/gcc-patches/2025- 
April/681047.html


I'd love any feedback on that and to get it moving.

You submitted the original at a bad time, right near a release ;-)

Can you repost the series, I think it's like 20 patches and things may 
have moved since then.  It's virtually certain I don't have them in my 
inbox right now ;-)  I may have thought they were primary ARM and punted 
assuming Richard S. would take care of them.



Just reposted now!

Just FYI, there is a V5 in progress addressing Jason’s C++ diagnostics. 
Only a minor diagnostics change so far.


Thank you for the advice and the help!

Thanks,
Alfie


Thanks,

Jeff

[PATCH v4 02/20] Add x86 FMV symbol tests

2025-05-28 Thread Alfie Richards

From: Alice Carlotti 

This is for testing the x86 mangling of FMV versioned function
assembly names.

gcc/testsuite/ChangeLog:

* g++.target/i386/mv-symbols1.C: New test.
* g++.target/i386/mv-symbols2.C: New test.
* g++.target/i386/mv-symbols3.C: New test.
* g++.target/i386/mv-symbols4.C: New test.
* g++.target/i386/mv-symbols5.C: New test.
* g++.target/i386/mvc-symbols1.C: New test.
* g++.target/i386/mvc-symbols2.C: New test.
* g++.target/i386/mvc-symbols3.C: New test.
* g++.target/i386/mvc-symbols4.C: New test.

Co-authored-by: Alfie Richards 
---
 gcc/testsuite/g++.target/i386/mv-symbols1.C  | 68 
 gcc/testsuite/g++.target/i386/mv-symbols2.C  | 56 
 gcc/testsuite/g++.target/i386/mv-symbols3.C  | 44 +
 gcc/testsuite/g++.target/i386/mv-symbols4.C  | 50 ++
 gcc/testsuite/g++.target/i386/mv-symbols5.C  | 56 
 gcc/testsuite/g++.target/i386/mvc-symbols1.C | 44 +
 gcc/testsuite/g++.target/i386/mvc-symbols2.C | 29 +
 gcc/testsuite/g++.target/i386/mvc-symbols3.C | 35 ++
 gcc/testsuite/g++.target/i386/mvc-symbols4.C | 23 +++
 9 files changed, 405 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/i386/mv-symbols1.C
 create mode 100644 gcc/testsuite/g++.target/i386/mv-symbols2.C
 create mode 100644 gcc/testsuite/g++.target/i386/mv-symbols3.C
 create mode 100644 gcc/testsuite/g++.target/i386/mv-symbols4.C
 create mode 100644 gcc/testsuite/g++.target/i386/mv-symbols5.C
 create mode 100644 gcc/testsuite/g++.target/i386/mvc-symbols1.C
 create mode 100644 gcc/testsuite/g++.target/i386/mvc-symbols2.C
 create mode 100644 gcc/testsuite/g++.target/i386/mvc-symbols3.C
 create mode 100644 gcc/testsuite/g++.target/i386/mvc-symbols4.C

diff --git a/gcc/testsuite/g++.target/i386/mv-symbols1.C 
b/gcc/testsuite/g++.target/i386/mv-symbols1.C
new file mode 100644
index 000..1290299aea5
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/mv-symbols1.C
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O0" } */
+
+__attribute__((target("default")))
+int foo ()
+{
+  return 1;
+}
+
+__attribute__((target("arch=slm")))
+int foo ()
+{
+  return 3;
+}
+
+__attribute__((target("sse4.2")))
+int foo ()
+{
+  return 5;
+}
+
+__attribute__((target("sse4.2")))
+int foo (int)
+{
+  return 6;
+}
+
+__attribute__((target("arch=slm")))
+int foo (int)
+{
+  return 4;
+}
+
+__attribute__((target("default")))
+int foo (int)
+{
+  return 2;
+}
+
+int bar()
+{
+  return foo ();
+}
+
+int bar(int x)
+{
+  return foo (x);
+}
+
+/* When updating any of the symbol names in these tests, make sure to also
+   update any tests for their absence in mvc-symbolsN.C */
+
+/* { dg-final { scan-assembler-times "\n_Z3foov:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.arch_slm:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.sse4.2:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.resolver:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\tcall\t_Z7_Z3foovv\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.type\t_Z7_Z3foovv, 
@gnu_indirect_function\n" 1 } } */
+/* { dg-final { scan-assembler-times 
"\n\t\.set\t_Z7_Z3foovv,_Z3foov\.resolver\n" 1 } } */
+
+/* { dg-final { scan-assembler-times "\n_Z3fooi:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3fooi\.arch_slm:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3fooi\.sse4.2:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3fooi\.resolver:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\tcall\t_Z7_Z3fooii\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.type\t_Z7_Z3fooii, 
@gnu_indirect_function\n" 1 } } */
+/* { dg-final { scan-assembler-times 
"\n\t\.set\t_Z7_Z3fooii,_Z3fooi\.resolver\n" 1 } } */
diff --git a/gcc/testsuite/g++.target/i386/mv-symbols2.C 
b/gcc/testsuite/g++.target/i386/mv-symbols2.C
new file mode 100644
index 000..8b75565d78d
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/mv-symbols2.C
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O0" } */
+
+__attribute__((target("default")))
+int foo ()
+{
+  return 1;
+}
+
+__attribute__((target("arch=slm")))
+int foo ()
+{
+  return 3;
+}
+
+__attribute__((target("sse4.2")))
+int foo ()
+{
+  return 5;
+}
+
+__attribute__((target("sse4.2")))
+int foo (int)
+{
+  return 6;
+}
+
+__attribute__((target("arch=slm")))
+int foo (int)
+{
+  return 4;
+}
+
+__attribute__((target("default")))
+int foo (int)
+{
+  return 2;
+}
+
+/* When updating any of the symbol names in these tests, make sure to also
+   update any tests for their absence in mvc-symbolsN.C */
+
+/* { dg-final { scan-assembler-times "\n_Z3foov:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.arch_slm:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.sse4.2:\n" 1 } } */
+/* { dg-final { scan-assembl

[PATCH v4 01/20] Add PowerPC FMV symbol tests.

2025-05-28 Thread Alfie Richards

From: Alice Carlotti 

This tests the mangling of function assembly names when annotated with
target_clones attributes.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/mvc-symbols1.C: New test.
* g++.target/powerpc/mvc-symbols2.C: New test.
* g++.target/powerpc/mvc-symbols3.C: New test.
* g++.target/powerpc/mvc-symbols4.C: New test.

Co-authored-by: Alfie Richards 
---
 .../g++.target/powerpc/mvc-symbols1.C | 47 +++
 .../g++.target/powerpc/mvc-symbols2.C | 35 ++
 .../g++.target/powerpc/mvc-symbols3.C | 41 
 .../g++.target/powerpc/mvc-symbols4.C | 29 
 4 files changed, 152 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/powerpc/mvc-symbols1.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/mvc-symbols2.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/mvc-symbols3.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/mvc-symbols4.C

diff --git a/gcc/testsuite/g++.target/powerpc/mvc-symbols1.C 
b/gcc/testsuite/g++.target/powerpc/mvc-symbols1.C
new file mode 100644
index 000..9424382bf14
--- /dev/null
+++ b/gcc/testsuite/g++.target/powerpc/mvc-symbols1.C
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O0" } */
+
+__attribute__((target_clones("default", "cpu=power6", "cpu=power6x")))
+int foo ()
+{
+  return 1;
+}
+
+__attribute__((target_clones("cpu=power6x", "cpu=power6", "default")))
+int foo (int)
+{
+  return 2;
+}
+
+int bar()
+{
+  return foo ();
+}
+
+int bar(int x)
+{
+  return foo (x);
+}
+
+/* { dg-final { scan-assembler-times "\n_Z3foov\.default:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.cpu_power6:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.cpu_power6x:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.resolver:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\tbl _Z3foov\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.type\t_Z3foov, 
@gnu_indirect_function\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.set\t_Z3foov,_Z3foov\.resolver\n" 
1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.quad\t_Z3foov\.default\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.quad\t_Z3foov\.cpu_power6\n" 1 } } 
*/
+/* { dg-final { scan-assembler-times "\n\t\.quad\t_Z3foov\.cpu_power6x\n" 0 } 
} */
+
+/* { dg-final { scan-assembler-times "\n_Z3fooi\.default:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3fooi\.cpu_power6:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3fooi\.cpu_power6x:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3fooi\.resolver:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\tbl _Z3fooi\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.type\t_Z3fooi, 
@gnu_indirect_function\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.set\t_Z3fooi,_Z3fooi\.resolver\n" 
1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.quad\t_Z3fooi\.default\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.quad\t_Z3fooi\.cpu_power6\n" 0 } } 
*/
+/* { dg-final { scan-assembler-times "\n\t\.quad\t_Z3fooi\.cpu_power6x\n" 1 } 
} */
diff --git a/gcc/testsuite/g++.target/powerpc/mvc-symbols2.C 
b/gcc/testsuite/g++.target/powerpc/mvc-symbols2.C
new file mode 100644
index 000..edf54480efd
--- /dev/null
+++ b/gcc/testsuite/g++.target/powerpc/mvc-symbols2.C
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O0" } */
+
+__attribute__((target_clones("default", "cpu=power6", "cpu=power6x")))
+int foo ()
+{
+  return 1;
+}
+
+__attribute__((target_clones("cpu=power6x", "cpu=power6", "default")))
+int foo (int)
+{
+  return 2;
+}
+
+/* { dg-final { scan-assembler-times "\n_Z3foov\.default:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.cpu_power6:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.cpu_power6x:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.resolver:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.type\t_Z3foov, 
@gnu_indirect_function\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.set\t_Z3foov,_Z3foov\.resolver\n" 
1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.quad\t_Z3foov\.default\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.quad\t_Z3foov\.cpu_power6\n" 1 } } 
*/
+/* { dg-final { scan-assembler-times "\n\t\.quad\t_Z3foov\.cpu_power6x\n" 0 } 
} */
+
+/* { dg-final { scan-assembler-times "\n_Z3fooi\.default:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3fooi\.cpu_power6:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3fooi\.cpu_power6x:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3fooi\.resolver:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.type\t_Z3fooi, 
@gnu_indirect_function\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.set\t_Z3fooi,_Z3fooi\.resolver\n" 
1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.q

Re: Disable 'pass_nrv' for offloading compilation [PR119835] (was: [PATCH] Verify 'GIMPLE_RETURN' vs. 'RESULT_DECL' if 'aggregate_value_p' [PR119835])

2025-05-28 Thread Richard Biener

On Tue, 27 May 2025, Thomas Schwinge wrote:

> Hi!
> 
> On 2025-05-23T17:01:31+0200, Richard Biener  wrote:
> > Am 23.05.2025 um 16:49 schrieb Thomas Schwinge :
> >> This fell out of me looking into PR119835.  This doesn't resolve the 
> >> underlying
> >> issue, but instead of failing GIMPLE semantics verification just by chance 
> >> in
> >> the 'GIMPLE pass: nrv' context, it makes the issue observable generally.
> >> (... thereby regressing a small number of offloading test cases where host 
> >> vs.
> >> offload compilers disagree on 'aggregate_value_p' for functions that return
> >> aggregate types.)
> >> 
> >> This cross-references just the three places in the code that I ran into;
> >> likely there are more?
> >> 
> >> No regressions for powerpc64le-unknown-linux-gnu, x86_64-pc-linux-gnu 
> >> bootstrap
> >> and 'make check' (without offloading configured).
> >
> > I think this is a step in the wrong direction in absence of quoting the 
> > wrong thing that happens downstream when we violate this (an assert does 
> > not qualify).  ESP. When at the same time we allow the actual thing 
> > returned to be a register (aka SSA name)
> 
> ACK; you certainly understand GIMPLE and RTL expansion semantics better
> than I do.  ;-)
> 
> You're also implicitly telling me that 
> "'GIMPLE_RETURN' vs. 'RESULT_DECL' if 'aggregate_value_p'" isn't actually
> a GIMPLE semantics invariant, thanks.  I conclude that in case that this
> "invariant" is violated, that's not a problem for RTL expansion of
> 'GIMPLE_RETURN', which is then handled like all the other cases where
> "we are not returning the current function's RESULT_DECL".
> 
> I'm not sure whether just disabling the 'assert' in
> 'gcc/tree-nrv.cc:pass_nrv::execute' is conceptually right (or may
> potentially drive that pass into an inconsistent state), and as we of
> course intend to eventually fix this issue properly (thanks for your
> ideas in PR119835!), so for now, I propose to simply
> "Disable 'pass_nrv' for offloading compilation [PR119835]", see attached.
> Any comments before I push that?

I'm not sure you can disable this pass - it runs even at -O0 so parts
of it might be required for correctness, since some types cannot be
copied.  Maybe RTL expansion will apply NRV if that's the case,
irrespective of whether the flag is set, but maybe not.

I think a more appropriate solution would be to simply change
the assert as follows

diff --git a/gcc/tree-nrv.cc b/gcc/tree-nrv.cc
index 180ce39de4c..3a036bc2c82 100644
--- a/gcc/tree-nrv.cc
+++ b/gcc/tree-nrv.cc
@@ -171,12 +171,12 @@ pass_nrv::execute (function *fun)
 
  if (greturn *return_stmt = dyn_cast  (stmt))
{
- /* In a function with an aggregate return value, the
-gimplifier has changed all non-empty RETURN_EXPRs to
-return the RESULT_DECL.  */
+ /* In a function with an aggregate return value, if
+there is a return that does not return RESULT_DECL
+we cannot perform NRV optimizations.  */
  ret_val = gimple_return_retval (return_stmt);
- if (ret_val)
-   gcc_assert (ret_val == result);
+ if (ret_val && ret_val != result)
+   return 0;
}
  else if (gimple_has_lhs (stmt)


Richard.

> 
> Grüße
>  Thomas
> 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

DEFAULT_PCC_STRUCT_RETURN on NetBSD vs Linux

2025-05-28 Thread John Paul Adrian Glaubitz

Hi,

while looking into building GCC with 4 bytes alignment by default on
m68k, I ran into the following definition in gcc/config/m68k/linux.h:

/* For m68k SVR4, structures are returned using the reentrant   

   
   technique.  */
#undef PCC_STATIC_STRUCT_RETURN
#define DEFAULT_PCC_STRUCT_RETURN 0

For NetBSD (gcc/config/m68k/netbsd-elf.h), we have:

/* The svr4 ABI for the m68k says that records and unions are returned  

   
   in memory.  */

#undef DEFAULT_PCC_STRUCT_RETURN 
#define DEFAULT_PCC_STRUCT_RETURN 1

Two questions:

Shouldn't the #undef in linux.h undefine DEFAULT_PCC_STRUCT_RETURN and not
PCC_STATIC_STRUCT_RETURN? And, secondly, shouldn't the comment in linux.h
be corrected since apparently linux.h and netbsd-elf.h disagree on what
the SVR4 ABI specifies how structs and unions are returned?

In particular, it seems that DEFAULT_PCC_STRUCT_RETURN is always 0 on Linux
and always 1 on NetBSD according to a brief "git grep 
DEFAULT_PCC_STRUCT_RETURN".

Thanks,
Adrian

-- 
 .''`.  John Paul Adrian Glaubitz
: :' :  Debian Developer
`. `'   Physicist
  `-GPG: 62FF 8A75 84E0 2956 9546  0006 7426 3B37 F5B5 F913

Re: [PATCH] doc: Fix extend.texi menu

2025-05-28 Thread Jakub Jelinek

On Wed, May 28, 2025 at 10:44:20AM +0800, Haochen Jiang wrote:
> gcc/ChangeLog:
> 
>   * doc/extend.texi (C Extensions): Add missing menu items.
> ---
>  gcc/doc/extend.texi | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index edd3a0d96c5..44d492a9d40 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -31,6 +31,7 @@ extensions, accepted by GCC in C90 mode and in C++.
>  * Thread-Local::Per-thread variables.
>  * OpenMP::  Multiprocessing extensions.
>  * OpenACC:: Extensions for offloading code to accelerator 
> devices.
> +* _Countof:: The number of elements of arrays.
>  * Inline::  Defining inline functions (as fast as macros).
>  * Volatiles::   What constitutes an access to a volatile object.
>  * Using Assembly Language with C:: Instructions and extensions for 
> interfacing C with assembler.

All the surrounding entries are using spaces but the new line uses tabs.
Please use spaces for consistency and align "The" right below "Extensions".

Ok for trunk with that change.

Jakub

Re: [PATCH v2 0/3] Refine the avg_floor with fixed point vaadd

2025-05-28 Thread Robin Dapp


LGTM, thanks.

--
Regards
Robin

回复：[PATCH] [RFC] RISC-V: Add extra check to help choosing multilib with equivalent arch.

2025-05-28 Thread yunzezhu

> I thought this issue should be fixed when we implement those
> implication rules correctly? Does march=rv32imaf_zca/mabi=ilp32 still
> not able select march=rv32imac/mabi=ilp32 still happen after this[1]
> patch?
> 
> [1] 
> https://github.com/gcc-mirror/gcc/commit/42ce61eaefc4db70e2e7ea2d8ef091daa458eb48
>  
>   >
Yes, march=rv32imaf_zca/mabi=ilp32 still not able to select 
march=rv32imac/mabi=ilp32.
 In my opinion that in order to imply C from zca and f ext, the arch must 
contain zcf, because C+F is equivalent to F+Zca+Zcf and vice versa. The arch 
rv32imaf_zca contains F and zca but no zcf so we cannot imply C and therefore 
multilb rv32imac/mabi=ilp32 cannot be selected.
--
发件人：Kito Cheng 
发送时间：2025年5月28日(周三) 18:11
收件人：yunzezhu
抄　送："gcc-patches"
主　题：Re: [PATCH] [RFC] RISC-V: Add extra check to help choosing multilib with 
equivalent arch.
I thought this issue should be fixed when we implement those
implication rules correctly? Does march=rv32imaf_zca/mabi=ilp32 still
not able select march=rv32imac/mabi=ilp32 still happen after this[1]
patch?
[1] 
https://github.com/gcc-mirror/gcc/commit/42ce61eaefc4db70e2e7ea2d8ef091daa458eb48
 

On Wed, May 28, 2025 at 4:04 PM  wrote:
>
> From: Yunze Zhu 
>
> Currently when choosing multilib set for target like 
> march=rv32imaf_zca/mabi=ilp32,
> gnu toolchain reports "Cannot find suitable multilib set".
> This is because in current dependent extension zca implies c when has 
> combinations of extensions: Zca, F_Zca_Zcf or FD_Zca_Zcf_Zcd,
> and f_zca is not one of these combinations and therefore extension c can not 
> be implied,
> and multilib set march=rv32imac/mabi=ilp32 cannot be selected.
> The most accurate method to fix this problem is changing multilib in 
> MULTILIB_REQUIRED: march=rv32imac/mabi=ilp32
> to an equivalent one: march=rv32ima_zca/mabi=ilp32.
> However, this method may cause compatibility issues with multilib path in 
> previos toolchain.
> There is an alternative method that add an extra check in multilib selection 
> functions,
> which checks whether c extension in multilibs is subset of zc* extensions in 
> arch string.
> By this method not only totally matched multilib sets but equivalent multilib 
> subsets could be selected.
>
> gcc/ChangeLog:
>
> * common/config/riscv/riscv-common.cc (riscv_subset_list::match_score_inc_p): 
> New Function.
> * config/riscv/riscv-subset.h: New Function.
> ---
> gcc/common/config/riscv/riscv-common.cc | 27 +
> gcc/config/riscv/riscv-subset.h | 2 ++
> 2 files changed, 29 insertions(+)
>
> diff --git a/gcc/common/config/riscv/riscv-common.cc 
> b/gcc/common/config/riscv/riscv-common.cc
> index a6d8763f032..f43899bb413 100644
> --- a/gcc/common/config/riscv/riscv-common.cc
> +++ b/gcc/common/config/riscv/riscv-common.cc
> @@ -412,12 +412,39 @@ riscv_subset_list::match_score (riscv_subset_list 
> *list) const
> for (s = list->m_head; s != NULL; s = s->next)
> if (this->lookup (s->name.c_str ()) != NULL)
> score++;
> + else if (this->match_score_inc_p (s->name.c_str (), list))
> + score++;
> else
> return 0;
>
> return score;
> }
>
> +/* Check if given extension is equivalent to one or group of extensions
> +in given subset list. */
> +bool
> +riscv_subset_list::match_score_inc_p (std::string name,
> + riscv_subset_list *multilib) const
> +{
> + if (name.compare ("c") != 0 || this->lookup ("zca") == NULL)
> + return false;
> +
> + /* Check equivalent requirment when having d extension in multilib. */
> + if (multilib->lookup ("d") != NULL)
> + {
> + if (multilib->xlen () == 32)
> + return this->lookup ("zcf") != NULL && this->lookup ("zcd") != NULL;
> + else
> + return this->lookup ("zcd") != NULL;
> + }
> +
> + /* Check equivalent requirment when having f extension in multilib. */
> + if (multilib->lookup ("f") != NULL && multilib->xlen () == 32)
> + return this->lookup ("zcf") != NULL;
> +
> + return true;
> +}
> +
> /* Get the rank for single-letter subsets, lower value meaning higher
> priority. */
>
> diff --git a/gcc/config/riscv/riscv-subset.h b/gcc/config/riscv/riscv-subset.h
> index c5d9fab4de9..f80210cd755 100644
> --- a/gcc/config/riscv/riscv-subset.h
> +++ b/gcc/config/riscv/riscv-subset.h
> @@ -114,6 +114,8 @@ public:
>
> int match_score (riscv_subset_list *) const;
>
> + bool match_score_inc_p (std::string, riscv_subset_list *) const;
> +
> void set_loc (location_t);
>
> void set_allow_adding_dup (bool v) { m_allow_adding_dup = v; }
> --
> 2.47.1
>

Re: [PATCH v4 1/8] libstdc++: Improve naming and whitespace for extents.

2025-05-28 Thread Tomasz Kaminski

On Mon, May 26, 2025 at 4:06 PM Luc Grosheintz 
wrote:

> libstdc++-v3/ChangeLog:
>
> * include/std/mdspan(__mdspan::_ExtentsStorage): Change name
> of private member _M_dynamic_extens to _M_dyn_exts.
> * include/std/mdspan(extents): Change name of private member
> from _M_dynamic_extents to _M_exts.
> * include/std/mdspan: Fix two instances of
> whitespace errors: `for(` -> `for (`.
>
> Signed-off-by: Luc Grosheintz 
> ---
>
LGTM.

>  libstdc++-v3/include/std/mdspan | 24 
>  1 file changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/libstdc++-v3/include/std/mdspan
> b/libstdc++-v3/include/std/mdspan
> index bcf2fa60fea..0f49b0e09a0 100644
> --- a/libstdc++-v3/include/std/mdspan
> +++ b/libstdc++-v3/include/std/mdspan
> @@ -69,12 +69,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> //
> // If __r is the index of a dynamic extent, then
> // _S_dynamic_index[__r] is the index of that extent in
> -   // _M_dynamic_extents.
> +   // _M_dyn_exts.
> static constexpr auto _S_dynamic_index = [] consteval
> {
>   array __ret;
>   size_t __dyn = 0;
> - for(size_t __i = 0; __i < _S_rank; ++__i)
> + for (size_t __i = 0; __i < _S_rank; ++__i)
> {
>   __ret[__i] = __dyn;
>   __dyn += _S_is_dyn(_Extents[__i]);
> @@ -105,7 +105,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> {
>   auto __se = _Extents[__r];
>   if (__se == dynamic_extent)
> -   return _M_dynamic_extents[_S_dynamic_index[__r]];
> +   return _M_dyn_exts[_S_dynamic_index[__r]];
>   else
> return __se;
> }
> @@ -114,12 +114,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>   constexpr void
>   _M_init_dynamic_extents(_GetOtherExtent __get_extent) noexcept
>   {
> -   for(size_t __i = 0; __i < _S_rank_dynamic; ++__i)
> +   for (size_t __i = 0; __i < _S_rank_dynamic; ++__i)
>   {
> size_t __di = __i;
> if constexpr (_OtherRank != _S_rank_dynamic)
>   __di = _S_dynamic_index_inv[__i];
> -   _M_dynamic_extents[__i] = _S_int_cast(__get_extent(__di));
> +   _M_dyn_exts[__i] = _S_int_cast(__get_extent(__di));
>   }
>   }
>
> @@ -146,7 +146,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>
>private:
> using _S_storage = __array_traits<_IndexType,
> _S_rank_dynamic>::_Type;
> -   [[no_unique_address]] _S_storage _M_dynamic_extents{};
> +   [[no_unique_address]] _S_storage _M_dyn_exts{};
>};
>
>  template
> @@ -197,7 +197,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> if constexpr (rank() == 0)
>   __builtin_trap();
> else
> - return _M_dynamic_extents._M_extent(__r);
> + return _M_exts._M_extent(__r);
>}
>
>constexpr
> @@ -233,14 +233,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> requires (_S_is_compatible_extents<_OExtents...>())
> constexpr explicit(_S_ctor_explicit<_OIndexType, _OExtents...>())
> extents(const extents<_OIndexType, _OExtents...>& __other) noexcept
> -   : _M_dynamic_extents(__other._M_dynamic_extents)
> +   : _M_exts(__other._M_exts)
> { }
>
>template<__mdspan::__valid_index_type... _OIndexTypes>
> requires (sizeof...(_OIndexTypes) == rank()
>   || sizeof...(_OIndexTypes) == rank_dynamic())
> constexpr explicit extents(_OIndexTypes... __exts) noexcept
> -   : _M_dynamic_extents(span sizeof...(_OIndexTypes)>(
> +   : _M_exts(span(
> initializer_list{_S_storage::_S_int_cast(__exts)...}))
> { }
>
> @@ -248,7 +248,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> requires (_Nm == rank() || _Nm == rank_dynamic())
> constexpr explicit(_Nm != rank_dynamic())
> extents(span<_OIndexType, _Nm> __exts) noexcept
> -   : _M_dynamic_extents(span(__exts))
> +   : _M_exts(span(__exts))
> { }
>
>
> @@ -256,7 +256,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> requires (_Nm == rank() || _Nm == rank_dynamic())
> constexpr explicit(_Nm != rank_dynamic())
> extents(const array<_OIndexType, _Nm>& __exts) noexcept
> -   : _M_dynamic_extents(span(__exts))
> +   : _M_exts(span(__exts))
> { }
>
>template
> @@ -278,7 +278,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  private:
>using _S_storage = __mdspan::_ExtentsStorage<
> _IndexType, array{_Extents...}>;
> -  [[no_unique_address]] _S_storage _M_dynamic_extents;
> +  [[no_unique_address]] _S_storage _M_exts;
>
>template
> friend class extents;
> --
> 2.49.0
>
>

Re: [PATCH v4 2/8] libstdc++: Implement layout_left from mdspan.

2025-05-28 Thread Tomasz Kaminski

On Mon, May 26, 2025 at 4:15 PM Luc Grosheintz 
wrote:

> Implements the parts of layout_left that don't depend on any of the
> other layouts.
>
> libstdc++-v3/ChangeLog:
>
> * include/std/mdspan (layout_left): New class.
> * src/c++23/std.cc.in: Add layout_left.
>
> Signed-off-by: Luc Grosheintz 
> ---

Looks very good.
Only few minor suggestions here:
* marking internal helpers noexcept (as they are called from noexcept
members)
* using extent_type instead of _Extents in public facing API
* _M_extents is currently public, and should be private.

>  libstdc++-v3/include/std/mdspan  | 304 ++-
>  libstdc++-v3/src/c++23/std.cc.in |   1 +
>  2 files changed, 304 insertions(+), 1 deletion(-)
>
> diff --git a/libstdc++-v3/include/std/mdspan
> b/libstdc++-v3/include/std/mdspan
> index 0f49b0e09a0..d81072596b4 100644
> --- a/libstdc++-v3/include/std/mdspan
> +++ b/libstdc++-v3/include/std/mdspan
> @@ -144,6 +144,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>   { return __exts[__i]; });
>   }
>
> +   static constexpr span
> +   _S_static_extents(size_t __begin, size_t __end) noexcept
> +   {
> + return {_Extents.data() + __begin, _Extents.data() + __end};
> +   }
> +
> +   constexpr span
> +   _M_dynamic_extents(size_t __begin, size_t __end) const noexcept
> +   requires (_Extents.size() > 0)
> +   {
> + return {_M_dyn_exts + _S_dynamic_index[__begin],
> + _M_dyn_exts + _S_dynamic_index[__end]};
> +   }
> +
>private:
> using _S_storage = __array_traits<_IndexType,
> _S_rank_dynamic>::_Type;
> [[no_unique_address]] _S_storage _M_dyn_exts{};
> @@ -160,6 +174,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> || _Extent <= numeric_limits<_IndexType>::max();
>}
>
> +  namespace __mdspan
> +  {
> +template
> +  constexpr span
> +  __static_extents(size_t __begin = 0, size_t __end =
> _Extents::rank())
>
The called accessor is noexcept, declare it also here.

> +  { return _Extents::_S_storage::_S_static_extents(__begin, __end); }
> +
> +template
> +  constexpr span
> +  __dynamic_extents(const _Extents& __exts, size_t __begin = 0,
> +   size_t __end = _Extents::rank())
>
Same here.

> +  {
> +   return __exts._M_exts._M_dynamic_extents(__begin, __end);
> +  }
> +  }
> +
>template
>  class extents
>  {
> @@ -251,7 +281,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> : _M_exts(span(__exts))
> { }
>
> -
>template<__mdspan::__valid_index_type _OIndexType,
> size_t _Nm>
> requires (_Nm == rank() || _Nm == rank_dynamic())
> constexpr explicit(_Nm != rank_dynamic())
> @@ -276,6 +305,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> }
>
>  private:
> +  friend span
> +  __mdspan::__static_extents(size_t, size_t);
> +
> +  friend span
> +  __mdspan::__dynamic_extents(const extents&, size_t,
> size_t);
> +
>using _S_storage = __mdspan::_ExtentsStorage<
> _IndexType, array{_Extents...}>;
>[[no_unique_address]] _S_storage _M_exts;
> @@ -286,6 +321,58 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>
>namespace __mdspan
>{
> +template
> +  constexpr bool
>
We call this function from __exts_prod on dynamic exts,
so I think we should make it noexcept.

> +  __contains_zero(span<_Tp, _Nm> __exts)

+  {
> +   for (size_t __i = 0; __i < __exts.size(); ++__i)
> + if (__exts[__i] == 0)
> +   return true;
> +   return false;
> +  }
> +
> +constexpr size_t
>
+__static_extents_prod(const auto& __sta_exts)
>
Similarly make it noexcept.

> +{
> +  size_t __ret = 1;
> +  for (auto __factor : __sta_exts)
> +   if (__factor != dynamic_extent)
> + __ret *= __factor;
> +  return __ret;
> +}
> +
> +template
> +  constexpr typename _Extents::index_type
> +  __exts_prod(const _Extents& __exts, size_t __begin, size_t __end)
> noexcept
> +  {
> +   using _IndexType = typename _Extents::index_type;
> +
> +   auto __sta_exts = __static_extents<_Extents>(__begin, __end);
> +   size_t __sta_prod = __static_extents_prod(__sta_exts);
>
This seems to be unused.

> +
> +   size_t __ret = 1;
> +   if constexpr (_Extents::rank_dynamic() != _Extents::rank())
>
I would integrate the zero check here:
if (!(__ret = __static_extents_prod(__sta_exts)))
  return 0;

> + __ret = __static_extents_prod(__sta_exts);
> +
> +   if (__ret == 0)
> + return 0;
> +
> +   if constexpr (_Extents::rank_dynamic() > 0)
> + for (auto __factor : __dynamic_extents(__exts, __begin, __end))
> +   __ret *= size_t(__factor);
> +   return _IndexType(__ret);
> +  }
> +
> +template
> +  constexpr typename _Extents::index_type
> +  __fwd_prod(const _Extents& __exts, size_t __r) noexcept
> +  {

Do not drop AFDO profile if entry block has count of 0

2025-05-28 Thread Jan Hubicka

Hi,
with normal profile feedback checking entry block count to be non-zero is quite
reliable check for presence of non-0 profile in the body since the function
body can only be executed if the entry block was executed.  With autofdo this
is not true, since the entry block may just execute too few times to be
recorded.  As a consequence we currently drop AFDO profile quite often.  This
patch fixes it.

Bootstraooed/regtested x86_64-linux, comitted.

gcc/ChangeLog:

* predict.cc (rebuild_frequencies): look harder for presence
of profile feedback.

diff --git a/gcc/predict.cc b/gcc/predict.cc
index 16dd9b01112..872f54d957a 100644
--- a/gcc/predict.cc
+++ b/gcc/predict.cc
@@ -,11 +,14 @@ rebuild_frequencies (void)
   bool inconsistency_found = false;
   bool uninitialized_probablity_found = false;
   bool uninitialized_count_found = false;
+  bool feedback_found = false;
 
   cfun->cfg->count_max = profile_count::uninitialized ();
   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
 {
   cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count);
+  if (bb->count.nonzero_p () && bb->count.quality () >= AFDO)
+   feedback_found = true;
   /* Uninitialized count may be result of inlining or an omision in an
  optimization pass.  */
   if (!bb->count.initialized_p ())
@@ -4516,8 +4519,7 @@ rebuild_frequencies (void)
  Propagating from probabilities would make profile look consistent, but
  because probablities after code duplication may not be representative
  for a given run, we would only propagate the error further.  */
-  if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ().nonzero_p ()
-  && !uninitialized_count_found)
+  if (feedback_found && !uninitialized_count_found)
 {
   if (dump_file)
fprintf (dump_file,

Re: [PATCH v3 2/2] gimple-fold: extend vector simplification to match scalar bitwise optimizations [PR119196]

2025-05-28 Thread Icen Zeyada

Hi Richard,
I've implemented some of your suggested changes, but I'm not entirely sure 
there's an elegant way to handle the second one:
> "So here you'd want to verify we can to LT_EXPR for the types involved, and 
> the cases which simplify to constant_boolean_node do not need any such check. 
> Possibly the same issue applies to the cases below; I did not verify."
Most of those expressions are selected from `code1` or `code2` in the switch 
statements, while the rest—like the example you mentioned—are their folded or 
simplified forms (e.g., `NE` and `LE` becoming `LT`). How can I determine those 
expressions at the start of the simplification?
Or are you suggesting that `expand_vec_cmp_expr_p` should be distributed within 
the functions—that is, inside the conditionals that decide which expression to 
return? So we would end up with something like:
```
(if (code1 == NE_EXPR
&& code2 == LE_EXPR
&& cmp == 0
&& (allbits
|| (VECTOR_BOOLEAN_TYPE_P (type)
&& expand_vec_cmp_expr_p (TREE_TYPE (@1), type, LT_EXPR
(lt @c0 (convert @1)))
```
...applied across all expressions?

Kind Regards,
Icen


From: Richard Biener 
Sent: 27 May 2025 13:47
To: Icen Zeyada 
Cc: gcc-patches@gcc.gnu.org ; jeffreya...@gmail.com 
; i...@airs.com ; Richard Earnshaw 
; pins...@gmail.com ; Victor Do 
Nascimento ; Tamar Christina 

Subject: Re: [PATCH v3 2/2] gimple-fold: extend vector simplification to match 
scalar bitwise optimizations [PR119196]

On Wed, 21 May 2025, Icen Zeyada wrote:

> Generalize existing scalar gimple_fold rules to apply the same
> bitwise comparison simplifications to vector types.  Previously, an
> expression like
>
> (x < y) && (x > y)
>
> would fold to `false` if x and y are scalars, but equivalent vector
> comparisons were left untouched.  This patch enables folding of
> patterns of the form
>
> (cmp x y) bit_and (cmp x y)
> (cmp x y) bit_ior (cmp x y)
> (cmp x y) bit_xor (cmp x y)
>
> for vector operands as well, ensuring consistent optimization across
> all data types.
>
> PR tree-optimization/119196
>
> gcc/ChangeLog:
>
>   * match.pd: Allow scalar optimizations with bitwise AND/OR/XOR to apply 
> to vectors.
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/aarch64/vector-compare-5.c: Add new test for vector 
> compare simplification.
>
> Signed-off-by: Icen Zeyada 
> ---
>  gcc/match.pd  | 16 -
>  .../gcc.target/aarch64/vector-compare-5.c | 67 +++
>  2 files changed, 81 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/vector-compare-5.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 611f05ef9f9c..7a7df6aeb6c5 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3635,6 +3635,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (if ((TREE_CODE (@1) == INTEGER_CST
> && TREE_CODE (@2) == INTEGER_CST)
>|| ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
> +   || (VECTOR_TYPE_P (TREE_TYPE (@1))

Note this does not verify we are doing a vector compare, our IL
allows vector ==/!= vector to scalar bool compares.  The appropriate
test should be VECTOR_BOOLEAN_TYPE_P (type) to check for
a vector compare (to gate an expand_vec_cmp_expr_p check)
and for the bitwise_equal_p guard your change looks OK.

> +   && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, code2))

The expand_vec_cmp_expr_p is misplaced - we generate not 'code2'
but a comparison code depending on it, like for

  (if (code1 == NE_EXPR
   && code2 == LE_EXPR
   && cmp == 0
   && allbits)
   (lt @c0 (convert @1)))

so here you'd want to verify we can to LT_EXPR for the types involved
and the cases which simplify to constant_boolean_node do not need
any such check.  Possibly the same issue applies to the cases below,
I did not verify.

Thanks,
Richard.

> || POINTER_TYPE_P (TREE_TYPE (@1)))
>&& bitwise_equal_p (@1, @2)))
>  (with
> @@ -3712,6 +3714,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>(if ((TREE_CODE (@1) == INTEGER_CST
>&& TREE_CODE (@2) == INTEGER_CST)
> || ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
> +   || (VECTOR_TYPE_P (TREE_TYPE (@1))
> +   && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, code2))
>|| POINTER_TYPE_P (TREE_TYPE (@1)))
>   && operand_equal_p (@1, @2)))
> (with
> @@ -3762,6 +3766,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (if ((TREE_CODE (@1) == INTEGER_CST
> && TREE_CODE (@2) == INTEGER_CST)
>|| ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
> +   || (VECTOR_TYPE_P (TREE_TYPE (@1))
> +   && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, code2))
>|| POINTER_TYPE_P (TREE_TYPE (@1)))
>&& bitwise_equal_p (@1, @2)))
>  (with
> @@ -3885,7 +3891,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>rcmp (ne le gt ne lt ge)
>(simplify
> (op:c (cmp1:c @0 @1) (cmp2 @0 @1))
> -   (if (

RE: [PATCH] i386: Use Shuffles instead of shifts for Reduction in AMD znver4/5

2025-05-28 Thread Gorantla, Pranav

[AMD Official Use Only - AMD Internal Distribution Only]

> -Original Message-
> From: Jan Hubicka 
> Sent: Wednesday, May 28, 2025 3:52 PM
> To: Gorantla, Pranav 
> Cc: gcc-patches@gcc.gnu.org; Kumar, Venkataramanan
> ; ubiz...@gmail.com
> Subject: Re: [PATCH] i386: Use Shuffles instead of shifts for Reduction in AMD
> znver4/5
>
> Caution: This message originated from an External Source. Use proper caution
> when opening attachments, clicking links, or responding.
>
>
> > gcc/ChangeLog:
> >
> >   * config/i386/i386-expand.cc (emit_reduc_half): Use shuffles to
> >   generate reduc half for V4SI, similar modes.
> >   * config/i386/i386.h (TARGET_SSE_REDUCTION_PREFER_PSHUF): New
> Macro.
> >   * config/i386/x86-tune.def
> (X86_TUNE_SSE_REDUCTION_PREFER_PSHUF):
> >   New tuning.
> >
> > gcc/testsuite/ChangeLog:
> >
> >   * gcc.target/i386/reduc-pshuf.c: New test.
> > ---
> >  gcc/config/i386/i386-expand.cc  | 28 ++---
> >  gcc/config/i386/i386.h  |  2 ++
> >  gcc/config/i386/x86-tune.def|  5 
> >  gcc/testsuite/gcc.target/i386/reduc-pshuf.c | 14 +++
> >  4 files changed, 46 insertions(+), 3 deletions(-)  create mode 100644
> > gcc/testsuite/gcc.target/i386/reduc-pshuf.c
> >
> > diff --git a/gcc/config/i386/i386-expand.cc
> > b/gcc/config/i386/i386-expand.cc index 7fd03c88630..c7aec716a55 100644
> > --- a/gcc/config/i386/i386-expand.cc
> > +++ b/gcc/config/i386/i386-expand.cc
> > @@ -18724,9 +18724,31 @@ emit_reduc_half (rtx dest, rtx src, int i)
> >  case E_V8HFmode:
> >  case E_V4SImode:
> >  case E_V2DImode:
> > -  d = gen_reg_rtx (V1TImode);
> > -  tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
> > - GEN_INT (i / 2));
> > +  if (TARGET_SSE_REDUCTION_PREFER_PSHUF) {
> > +if (i == 128) {
> > +  d = gen_reg_rtx(V4SImode);
> > +  tem = gen_sse2_pshufd_1(
> > +  d, force_reg(V4SImode, gen_lowpart(V4SImode, src)), 
> > GEN_INT(2),
> > +  GEN_INT(3), GEN_INT(2), GEN_INT(3));
> > +} else if (i == 64) {
> > +  d = gen_reg_rtx(V4SImode);
> > +  tem = gen_sse2_pshufd_1(
> > +  d, force_reg(V4SImode, gen_lowpart(V4SImode, src)), 
> > GEN_INT(1),
> > +  GEN_INT(1), GEN_INT(1), GEN_INT(1));
> > +} else if (i == 32) {
> > +  d = gen_reg_rtx(V8HImode);
> > +  tem = gen_sse2_pshuflw_1(
> > +  d, force_reg(V8HImode, gen_lowpart(V8HImode, src)), 
> > GEN_INT(1),
> > +  GEN_INT(1), GEN_INT(1), GEN_INT(1));
> > +} else {
> > +  d = gen_reg_rtx(V1TImode);
> > +  tem =
> > +  gen_sse2_lshrv1ti3(d, gen_lowpart(V1TImode, src), GEN_INT(i 
> > / 2));
> > +}
> > +  } else {
> > +d = gen_reg_rtx(V1TImode);
> > +tem = gen_sse2_lshrv1ti3(d, gen_lowpart(V1TImode, src),
> > + GEN_INT(i / 2));
>
> Instead of duplicating gen_sse2_lshrv1ti3 it is probably cleaner to simply 
> break after
> each gen_sse_pshuw call and remove else.
>
> OK with that change
> Honza
Updated the patch as Suggested. Can you please commit the patch on my behalf as
don't have write permission.

Thanks
Pranav


>From 5070975a014dbfd0ca8ccb279a50d2266c2a6a18 Mon Sep 17 00:00:00 2001
From: Pranav Gorantla 
Date: Wed, 28 May 2025 10:05:46 +0530
Subject: [PATCH v1] i386: Use Shuffles instead of shifts for Reduction in AMD
 znver4/5

In AMD znver4, znver5 targets vpshufd, vpsrldq have latencies 1,2 and
throughput 4 (2 for znver4),2 respectively. It is better to generate
shuffles instead of shifts wherever possible. In this patch we try to
generate appropriate shuffle instruction to copy higher half to lower
half instead of a simple right shift during horizontal vector reduction.

gcc/ChangeLog:

* config/i386/i386-expand.cc (emit_reduc_half): Use shuffles to
generate reduc half for V4SI, similar modes.
* config/i386/i386.h (TARGET_SSE_REDUCTION_PREFER_PSHUF): New Macro.
* config/i386/x86-tune.def (X86_TUNE_SSE_REDUCTION_PREFER_PSHUF):
New tuning.

gcc/testsuite/ChangeLog:

* gcc.target/i386/reduc-pshuf.c: New test.
---
 gcc/config/i386/i386-expand.cc  | 29 -
 gcc/config/i386/i386.h  |  2 ++
 gcc/config/i386/x86-tune.def|  5 
 gcc/testsuite/gcc.target/i386/reduc-pshuf.c | 14 ++
 4 files changed, 49 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/reduc-pshuf.c

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 7fd03c88630..96333c3c18e 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -18724,9 +18724,36 @@ emit_reduc_half (rtx dest, rtx src, int i)
 case E_V8HFmode:
 case E_V4SImode:
 case E_V2DImode:
+  if (TARGET_SSE_REDUCTION_PREFER_PSHUF)
+{
+  i

Re: [PATCH v3 2/2] gimple-fold: extend vector simplification to match scalar bitwise optimizations [PR119196]

2025-05-28 Thread Richard Biener

On Wed, 28 May 2025, Icen Zeyada wrote:

> Hi Richard,
> I've implemented some of your suggested changes, but I'm not entirely sure 
> there's an elegant way to handle the second one:
> > "So here you'd want to verify we can to LT_EXPR for the types involved, and 
> > the cases which simplify to constant_boolean_node do not need any such 
> > check. Possibly the same issue applies to the cases below; I did not 
> > verify."
> Most of those expressions are selected from `code1` or `code2` in the switch 
> statements, while the rest—like the example you mentioned—are their folded or 
> simplified forms (e.g., `NE` and `LE` becoming `LT`). How can I determine 
> those expressions at the start of the simplification?
> Or are you suggesting that `expand_vec_cmp_expr_p` should be distributed 
> within the functions—that is, inside the conditionals that decide which 
> expression to return? So we would end up with something like:
> ```
> (if (code1 == NE_EXPR
> && code2 == LE_EXPR
> && cmp == 0
> && (allbits
> || (VECTOR_BOOLEAN_TYPE_P (type)
> && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, LT_EXPR
> (lt @c0 (convert @1)))
> ```
> ...applied across all expressions?

Yes, this is what I would suggest.

Richard.

> Kind Regards,
> Icen
> 
> 
> From: Richard Biener 
> Sent: 27 May 2025 13:47
> To: Icen Zeyada 
> Cc: gcc-patches@gcc.gnu.org ; jeffreya...@gmail.com 
> ; i...@airs.com ; Richard Earnshaw 
> ; pins...@gmail.com ; Victor Do 
> Nascimento ; Tamar Christina 
> 
> Subject: Re: [PATCH v3 2/2] gimple-fold: extend vector simplification to 
> match scalar bitwise optimizations [PR119196]
> 
> On Wed, 21 May 2025, Icen Zeyada wrote:
> 
> > Generalize existing scalar gimple_fold rules to apply the same
> > bitwise comparison simplifications to vector types.  Previously, an
> > expression like
> >
> > (x < y) && (x > y)
> >
> > would fold to `false` if x and y are scalars, but equivalent vector
> > comparisons were left untouched.  This patch enables folding of
> > patterns of the form
> >
> > (cmp x y) bit_and (cmp x y)
> > (cmp x y) bit_ior (cmp x y)
> > (cmp x y) bit_xor (cmp x y)
> >
> > for vector operands as well, ensuring consistent optimization across
> > all data types.
> >
> > PR tree-optimization/119196
> >
> > gcc/ChangeLog:
> >
> >   * match.pd: Allow scalar optimizations with bitwise AND/OR/XOR to 
> > apply to vectors.
> >
> > gcc/testsuite/ChangeLog:
> >
> >   * gcc.target/aarch64/vector-compare-5.c: Add new test for vector 
> > compare simplification.
> >
> > Signed-off-by: Icen Zeyada 
> > ---
> >  gcc/match.pd  | 16 -
> >  .../gcc.target/aarch64/vector-compare-5.c | 67 +++
> >  2 files changed, 81 insertions(+), 2 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/vector-compare-5.c
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 611f05ef9f9c..7a7df6aeb6c5 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -3635,6 +3635,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > (if ((TREE_CODE (@1) == INTEGER_CST
> > && TREE_CODE (@2) == INTEGER_CST)
> >|| ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
> > +   || (VECTOR_TYPE_P (TREE_TYPE (@1))
> 
> Note this does not verify we are doing a vector compare, our IL
> allows vector ==/!= vector to scalar bool compares.  The appropriate
> test should be VECTOR_BOOLEAN_TYPE_P (type) to check for
> a vector compare (to gate an expand_vec_cmp_expr_p check)
> and for the bitwise_equal_p guard your change looks OK.
> 
> > +   && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, code2))
> 
> The expand_vec_cmp_expr_p is misplaced - we generate not 'code2'
> but a comparison code depending on it, like for
> 
>   (if (code1 == NE_EXPR
>&& code2 == LE_EXPR
>&& cmp == 0
>&& allbits)
>(lt @c0 (convert @1)))
> 
> so here you'd want to verify we can to LT_EXPR for the types involved
> and the cases which simplify to constant_boolean_node do not need
> any such check.  Possibly the same issue applies to the cases below,
> I did not verify.
> 
> Thanks,
> Richard.
> 
> > || POINTER_TYPE_P (TREE_TYPE (@1)))
> >&& bitwise_equal_p (@1, @2)))
> >  (with
> > @@ -3712,6 +3714,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >(if ((TREE_CODE (@1) == INTEGER_CST
> >&& TREE_CODE (@2) == INTEGER_CST)
> > || ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
> > +   || (VECTOR_TYPE_P (TREE_TYPE (@1))
> > +   && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, code2))
> >|| POINTER_TYPE_P (TREE_TYPE (@1)))
> >   && operand_equal_p (@1, @2)))
> > (with
> > @@ -3762,6 +3766,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > (if ((TREE_CODE (@1) == INTEGER_CST
> > && TREE_CODE (@2) == INTEGER_CST)
> >|| ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
> > +   || (VECTOR_TYPE_P

Re: [PATCH 30/61] MSA: Make MSA and microMIPS R5 unsupported

2025-05-28 Thread Aleksandar Rakic

HTEC Public

Hi,

Could you please let us know if you have any comments
on the latest version of this patch?

Kind regards,
Aleksandar Rakic


From: Aleksandar Rakic 
Sent: Wednesday, May 14, 2025 4:06 PM
To: Sam James; Xi Ruoyao
Cc: Jeff Law; gcc-patches@gcc.gnu.org; Djordje Todorovic; c...@mips.com
Subject: Re: [PATCH 30/61] MSA: Make MSA and microMIPS R5 unsupported

Hi,

Could you please let us know if you have any comments
on the latest version of this patch?

Kind regards,
Aleksandar Rakic

Re: [PATCH 11/61] Fix unsafe comparison against stack_pointer_rtx

2025-05-28 Thread Aleksandar Rakic

HTEC Public

Hi,

Could you please let us know if you have any comments
on the latest reply on this patch?

Kind regards,
Aleksandar Rakic


From: Aleksandar Rakic 
Sent: Wednesday, May 14, 2025 3:55 PM
To: Jeff Law; gcc-patches@gcc.gnu.org
Cc: Djordje Todorovic; c...@mips.com
Subject: Re: [PATCH 11/61] Fix unsafe comparison against stack_pointer_rtx

Hi,

> > GCC can modify a rtx which was created using stack_pointer_rtx.
> > This means that just doing a straight address comparision of a rtx
> > against stack_pointer_rtx to see whether it is the stack pointer
> > register will not be correct in all cases.
> Umm, no.  There is one and only one stack_pointer_rtx.  If something is
> modifying stack_pointer_rtx, then that's a bug.  This feels like it's
> papering over a problem elsewhere.  At the least it would need a better
> explanation of how/why you're getting addresses that reference the same
> hard register as the stack pointer, but which aren't stack_pointer_rtx.

> I vaguely recall a problem in this space from regrename.cc, but I
> thought we fixed that long ago.

> Jeff

I searched the targets other than the MIPS target and found out that
there are the commits 74dc3e9 and c21242e that test for a REGNO instead
of comparing the rtx to stack_pointer_rtx directly.

Also, I found that the commit d60e544 states the following:
"We can't just compare with STACK_POINTER_RTX because the reference to
the stack pointer might be in some other mode. In particular, an explict
clobber in an asm statement will result in a QImode clober."
I am wondering if this could be a satisfying explanation.

Kind regards,
Aleksandar

Re: [PATCH 0/4] c++: Support modules streaming some internal structures with no DECL_CONTEXT

2025-05-28 Thread Patrick Palka

On Thu, 22 May 2025, Nathaniel Shead wrote:

> This patch series adds support for streaming some internal declarations
> in C++20 modules that we previously would ICE on.  

I like this patch series a lot, thanks for working on it!  I can't
approve but it looks pretty good to me.

> 
> The series has been successfully bootstrapped and regtested on
> x86_64-pc-linux-gnu.  Additionally, modules.exp now passes with
> '--target_board=unix/-fsanitize=undefined'.
> 
> Nathaniel Shead (4):
>   c++: Add flag to detect underlying representative of bitfield decls
>   c++/modules: Implement streaming of uncontexted TYPE_DECLs [PR98735]
>   c++/modules: Support streaming new size cookie for constexpr
> [PR120040]
>   c++/modules: Avoid name clashes when streaming internal labels
> [PR98375,PR118904]
> 
>  gcc/cp/constexpr.cc|   2 +-
>  gcc/cp/cp-gimplify.cc  |   5 +-
>  gcc/cp/init.cc |  10 +-
>  gcc/cp/module.cc   | 170 +++--
>  gcc/stor-layout.cc |   1 +
>  gcc/testsuite/g++.dg/modules/pr120040_a.C  |  19 +++
>  gcc/testsuite/g++.dg/modules/pr120040_b.C  |  15 ++
>  gcc/testsuite/g++.dg/modules/src-loc-1.h   |   6 +
>  gcc/testsuite/g++.dg/modules/src-loc-1_a.H |   7 +
>  gcc/testsuite/g++.dg/modules/src-loc-1_b.C |   5 +
>  gcc/testsuite/g++.dg/modules/src-loc-1_c.C |  16 ++
>  gcc/testsuite/g++.dg/modules/ubsan-1_a.C   |  10 ++
>  gcc/testsuite/g++.dg/modules/ubsan-1_b.C   |  14 ++
>  gcc/testsuite/g++.dg/ubsan/module-1-aux.cc |  12 ++
>  gcc/testsuite/g++.dg/ubsan/module-1.C  |  11 ++
>  gcc/tree-core.h|   1 +
>  gcc/tree.cc|  51 +++
>  gcc/tree.h |  12 ++
>  gcc/ubsan.cc   |  16 +-
>  19 files changed, 350 insertions(+), 33 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/modules/pr120040_a.C
>  create mode 100644 gcc/testsuite/g++.dg/modules/pr120040_b.C
>  create mode 100644 gcc/testsuite/g++.dg/modules/src-loc-1.h
>  create mode 100644 gcc/testsuite/g++.dg/modules/src-loc-1_a.H
>  create mode 100644 gcc/testsuite/g++.dg/modules/src-loc-1_b.C
>  create mode 100644 gcc/testsuite/g++.dg/modules/src-loc-1_c.C
>  create mode 100644 gcc/testsuite/g++.dg/modules/ubsan-1_a.C
>  create mode 100644 gcc/testsuite/g++.dg/modules/ubsan-1_b.C
>  create mode 100644 gcc/testsuite/g++.dg/ubsan/module-1-aux.cc
>  create mode 100644 gcc/testsuite/g++.dg/ubsan/module-1.C
> 
> -- 
> 2.47.0
> 
>

Re: [PATCH, Fortran] Bug 119856 - Missing commas in I/O formats not diagnosed by default at compile time.

2025-05-28 Thread Jerry D


On 5/28/25 10:09 AM, Steve Kargl wrote:

On Wed, May 28, 2025 at 08:11:05AM -0700, Jerry D wrote:

The attached patch is simple and self explanatory in the git log entry.

Regression tested on X86_64-linux-gnu.

OK for trunk?



Yes, with one question.


commit 845768cbead03f76265e491bcf5ea6de7020ff39
Author: Jerry DeLisle 
Date:   Wed May 28 07:56:12 2025 -0700

 Fortran: Adjust handling of optional comma in FORMAT.
 
 This change adjusts the error messages for optional commas

 in format strings to give a warning at compile time unless
 -std=legacy is used. This is more consistant with the
 runtime library. The comma seprator should really not be
 encouraged as it is non-standard fortran.


Is that last sentence correct?  I would think that the
comma separator is not only encouraged, but it is required



Pushed with fixing the comment. And ... with my fat fingers I reversed 
two digits of the PR number in the log and I do not know how to fix that.


Jerry

Re: [PATCH] fortran: add constant input support for trig functions with half-revolutions

2025-05-28 Thread Harald Anlauf


On 5/28/25 19:51, Tobias Burnus wrote:

Hi Yuao,

Yuao Ma wrote:
[…]

Done.


LGTM :-) I have now applied it as r16-938-ge8fdd55ec90749.

Thanks for the patch!

Tobias




This breaks bootstrap here on openSUSE Leap 15.6 with mpfr-4.0.2:

../../gcc-trunk/gcc/fortran/simplify.cc: In function 'gfc_expr* 
gfc_simplify_cospi(gfc_expr*)':
../../gcc-trunk/gcc/fortran/simplify.cc:2305:3: error: 'mpfr_fmod_ui' 
was not declared in this scope; did you mean 'mpfr_fmodquo'?

 2305 |   mpfr_fmod_ui (cs, n, 2, GFC_RND_MODE);
  |   ^~~~
  |   mpfr_fmodquo
../../gcc-trunk/gcc/fortran/simplify.cc: In function 'gfc_expr* 
gfc_simplify_sinpi(gfc_expr*)':
../../gcc-trunk/gcc/fortran/simplify.cc:2346:3: error: 'mpfr_fmod_ui' 
was not declared in this scope; did you mean 'mpfr_fmodquo'?

 2346 |   mpfr_fmod_ui (sn, n, 2, GFC_RND_MODE);
  |   ^~~~
  |   mpfr_fmodquo

Re: [PATCH] fortran: add constant input support for trig functions with half-revolutions

2025-05-28 Thread Tobias Burnus


Hi Yuao,

Yuao Ma wrote:
[…]

Done.


LGTM :-) I have now applied it as r16-938-ge8fdd55ec90749.

Thanks for the patch!

Tobias

Re: [PATCH v1 0/3] RISC-V: Combine vec_duplicate + vmul.vv to vmul.vx on GR2VR cost

2025-05-28 Thread Robin Dapp


This patch would like to introduce the combine of vec_dup + vmul.vv into
vmul.vx on the cost value of GR2VR.  The late-combine will take place if
the cost of GR2VR is zero, or reject the combine if non-zero like 1, 15
in test.  There will be two cases for the combine:


OK.

--
Regards
Robin

Re: [PATCH v2] c++: Unwrap type traits defined in terms of builtins within diagnostics [PR117294]

2025-05-28 Thread Patrick Palka

On Tue, 27 May 2025, Nathaniel Shead wrote:

> On Wed, Nov 27, 2024 at 11:45:40AM -0500, Patrick Palka wrote:
> > On Fri, 8 Nov 2024, Nathaniel Shead wrote:
> > 
> > > Does this approach seem reasonable?  I'm pretty sure that the way I've
> > > handled the templating here is unideal but I'm not sure what a neat way
> > > to do what I'm trying to do here would be; any comments are welcome.
> > 
> > Clever approach, I like it!
> > 
> > > 
> > > -- >8 --
> > > 
> > > Currently, concept failures of standard type traits just report
> > > 'expression X evaluates to false'.  However, many type traits are
> > > actually defined in terms of compiler builtins; we can do better here.
> > > For instance, 'is_constructible_v' could go on to explain why the type
> > > is not constructible, or 'is_invocable_v' could list potential
> > > candidates.
> > 
> > That'd be great improvement.
> > 
> > > 
> > > As a first step to supporting that we need to be able to map the
> > > standard type traits to the builtins that they use.  Rather than adding
> > > another list that would need to be kept up-to-date whenever a builtin is
> > > added, this patch instead tries to detect any variable template defined
> > > directly in terms of a TRAIT_EXPR.
> > > 
> > > To avoid false positives, we ignore any variable templates that have any
> > > specialisations (partial or explicit), even if we wouldn't have chosen
> > > that specialisation anyway.  This shouldn't affect any of the standard
> > > library type traits that I could see.
> > 
> > You should be able to tsubst the TEMPLATE_ID_EXPR directly and look at
> > its TI_PARTIAL_INFO in order to determine which (if any) partial
> > specialization was selected.  And if an explicit specialization was
> > selected the resulting VAR_DECL will have DECL_TEMPLATE_SPECIALIZATION
> > set.
> > 
> > > ...[snip]...
> > 
> > If we substituted the TEMPLATE_ID_EXPR as a whole we could use the
> > DECL_TI_ARGS of that IIUC?
> > 
> 
> Thanks for your comments, they were very helpful.  Here's a totally new
> approach which I'm much happier with.  I've also removed the "disable in
> case any specialisation exists" logic, as on further reflection I don't
> imagine this to be the kind of issue I thought it might have been.
> 
> With this patch,
> 
>   template 
>   constexpr bool is_default_constructible_v = __is_constructible(T);
> 
>   template 
>   concept default_constructible = is_default_constructible_v;
> 
>   static_assert(default_constructible);
> 
> now emits the following error:
> 
>   test.cpp:6:15: error: static assertion failed
>   6 | static_assert(default_constructible);
> |   ^~~
>   test.cpp:6:15: note: constraints not satisfied
>   test.cpp:4:9:   required by the constraints of ‘template concept 
> default_constructible’
>   test.cpp:4:33: note:   ‘void’ is not default constructible
>   4 | concept default_constructible = is_default_constructible_v;
> | ^
> 
> There's still a lot of improvements to be made in this area, I think:
> 
> - I haven't yet looked into updating the specific diagnostics emitted by
>   the traits; I'd like to try to avoid too much code duplication with
>   the implementation in cp/semantics.cc.  (I also don't think the manual
>   indentation at the start of the message is particularly helpful?)

For is_xible / is_convertible etc, perhaps they could use a 'complain'
parameter that they propagate through instead of always passing tf_none,
similar to build_invoke?  Then we can call those predicates directly
from diagnose_trait_expr with complain=tf_error so that they elaborate
why they failed.

Agreed about the extra indentation

> 
> - The message doesn't print the mapping '[with T = void]'; I tried a
>   couple of things but this doesn't currently look especially
>   straight-forward, as we don't currently associate the args with the
>   normalised atomic constraint of the declaration.

Maybe we can still print the

 note: the expression ‘normal [with T = void]’ evaluated to ‘false’

note alongside the extended diagnostics?  Which would mean moving the
maybe_diagnose_standard_trait call a bit lower in
diagnose_atomic_constraint.

This would arguably make the diagnostic even noiser, but IMHO the
parameter mapping is an important piece of information to omit.

> 
> - Just generally I think there's still a lot of noise in the diagnostic,
>   and I find the back-to-front ordering of 'required by...' confusing.

Agreed in general, but in the case of these type trait diagnostics I
think noisiness is kind of inevitable especially if we start explaining
in detail why the trait is unsatisified (as e.g. PR117294 requests).

> 
> Depending on how much time I find myself with I might take a look at
> some of these further issues later, but in the meantime, does this look
> like an improvement over the status quo?
> 
> Bootstrapped and regtested on x86_64-pc-l

Re: [PATCH] c++/modules: Support re-streaming TU_LOCAL_ENTITYs [PR120412]

2025-05-28 Thread Patrick Palka

On Sat, 24 May 2025, Nathaniel Shead wrote:

> Bootstrapped and regtested on x86_64-pc-linux-gnu, OK for trunk/15?

LGTM FWIW

> 
> -- >8 --
> 
> When emitting a primary module interface, we must re-stream any TU-local
> entities that we saw in a partition.  This patch adds the missing
> members from core_vals.
> 
> As a drive-by fix, in some cases we might have a typedef referring to a
> TU-local entity; we need to handle that case as well.
> 
>   PR c++/120412
> 
> gcc/cp/ChangeLog:
> 
>   * module.cc (trees_out::core_vals): Write TU_LOCAL_ENTITY bits.
>   (trees_in::core_vals): Read it.
>   (trees_in::tree_node): Handle TU_LOCAL_ENTITY typedefs.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.dg/modules/internal-14_a.C: New test.
>   * g++.dg/modules/internal-14_b.C: New test.
>   * g++.dg/modules/internal-14_c.C: New test.
> 
> Signed-off-by: Nathaniel Shead 
> ---
>  gcc/cp/module.cc | 15 ++-
>  gcc/testsuite/g++.dg/modules/internal-14_a.C | 17 +
>  gcc/testsuite/g++.dg/modules/internal-14_b.C |  6 ++
>  gcc/testsuite/g++.dg/modules/internal-14_c.C |  9 +
>  4 files changed, 46 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/g++.dg/modules/internal-14_a.C
>  create mode 100644 gcc/testsuite/g++.dg/modules/internal-14_b.C
>  create mode 100644 gcc/testsuite/g++.dg/modules/internal-14_c.C
> 
> diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
> index 17c040d26b0..16322fddde7 100644
> --- a/gcc/cp/module.cc
> +++ b/gcc/cp/module.cc
> @@ -6774,6 +6774,13 @@ trees_out::core_vals (tree t)
>if (streaming_p ())
>   WU (((lang_tree_node *)t)->trait_expression.kind);
>break;
> +
> +case TU_LOCAL_ENTITY:
> +  WT (((lang_tree_node *)t)->tu_local_entity.name);
> +  if (state)
> + state->write_location
> +   (*this, ((lang_tree_node *)t)->tu_local_entity.loc);
> +  break;
>  }
>  
>if (CODE_CONTAINS_STRUCT (code, TS_TYPED))
> @@ -7317,6 +7324,11 @@ trees_in::core_vals (tree t)
>RT (((lang_tree_node *)t)->trait_expression.type2);
>RUC (cp_trait_kind, ((lang_tree_node *)t)->trait_expression.kind);
>break;
> +
> +case TU_LOCAL_ENTITY:
> +  RT (((lang_tree_node *)t)->tu_local_entity.name);
> +  ((lang_tree_node *)t)->tu_local_entity.loc
> + = state->read_location (*this);
>  }
>  
>if (CODE_CONTAINS_STRUCT (code, TS_TYPED))
> @@ -10125,7 +10137,8 @@ trees_in::tree_node (bool is_use)
>   && dump ("Read %stypedef %C:%N",
>DECL_IMPLICIT_TYPEDEF_P (res) ? "implicit " : "",
>TREE_CODE (res), res);
> -   res = TREE_TYPE (res);
> +   if (TREE_CODE (res) != TU_LOCAL_ENTITY)
> + res = TREE_TYPE (res);
>   }
>break;
>  
> diff --git a/gcc/testsuite/g++.dg/modules/internal-14_a.C 
> b/gcc/testsuite/g++.dg/modules/internal-14_a.C
> new file mode 100644
> index 000..07eb9658951
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/modules/internal-14_a.C
> @@ -0,0 +1,17 @@
> +// PR c++/120412
> +// { dg-additional-options "-fmodules -std=c++20 -Wtemplate-names-tu-local" }
> +// { dg-module-cmi m:part }
> +
> +export module m:part;
> +
> +export template 
> +auto fun1(F) {
> +  return true;
> +}
> +
> +using Dodgy = decltype([]{});
> +
> +export template 
> +auto fun2(T&&) {  // { dg-warning "TU-local" }
> +  return fun1(Dodgy{});
> +}
> diff --git a/gcc/testsuite/g++.dg/modules/internal-14_b.C 
> b/gcc/testsuite/g++.dg/modules/internal-14_b.C
> new file mode 100644
> index 000..ad3b09d0722
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/modules/internal-14_b.C
> @@ -0,0 +1,6 @@
> +// PR c++/120412
> +// { dg-additional-options "-fmodules -std=c++20 -Wtemplate-names-tu-local" }
> +// { dg-module-cmi m }
> +
> +export module m;
> +export import :part;
> diff --git a/gcc/testsuite/g++.dg/modules/internal-14_c.C 
> b/gcc/testsuite/g++.dg/modules/internal-14_c.C
> new file mode 100644
> index 000..4f8e785ce87
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/modules/internal-14_c.C
> @@ -0,0 +1,9 @@
> +// PR c++/120412
> +// { dg-additional-options "-fmodules -std=c++20" }
> +
> +import m;
> +
> +int main() {
> +  // { dg-error "instantiation exposes TU-local entity '(fun1|Dodgy)'" "" { 
> target *-*-* } 0 }
> +  fun2(123);  // { dg-message "required from here" }
> +}
> -- 
> 2.47.0
> 
>

[PATCH] libstdc++: Implement C++23 P1659R3 starts_with and ends_with

2025-05-28 Thread Patrick Palka

Change in this version:

  * Add test using an integer-class distance type, based on views::iota.

-- >8 --

This implements ranges::starts_with and ranges::ends_with from the C++23
paper P1659R3.  The logic of these algorithms is contained in an _S_impl
member function that takes optional size parameters __n1 and __n2 of the
two ranges, where -1 means the corresponding size is not known.

libstdc++-v3/ChangeLog:

* include/bits/ranges_algo.h (__starts_with_fn, starts_with):
Define.
(__ends_with_fn, ends_with): Define.
* include/bits/version.def (ranges_starts_ends_with): Define.
* include/bits/version.h: Regenerate.
* include/std/algorithm: Provide __cpp_lib_ranges_starts_ends_with.
* src/c++23/std.cc.in (ranges::starts_with): Export.
(ranges::ends_with): Export.
* testsuite/25_algorithms/ends_with/1.cc: New test.
* testsuite/25_algorithms/starts_with/1.cc: New test.

Reviewed-by: Tomasz Kamiński 
---
 libstdc++-v3/include/bits/ranges_algo.h   | 248 ++
 libstdc++-v3/include/bits/version.def |   8 +
 libstdc++-v3/include/bits/version.h   |  10 +
 libstdc++-v3/include/std/algorithm|   1 +
 libstdc++-v3/src/c++23/std.cc.in  |   4 +
 .../testsuite/25_algorithms/ends_with/1.cc| 165 
 .../testsuite/25_algorithms/starts_with/1.cc  | 158 +++
 7 files changed, 594 insertions(+)
 create mode 100644 libstdc++-v3/testsuite/25_algorithms/ends_with/1.cc
 create mode 100644 libstdc++-v3/testsuite/25_algorithms/starts_with/1.cc

diff --git a/libstdc++-v3/include/bits/ranges_algo.h 
b/libstdc++-v3/include/bits/ranges_algo.h
index f36e7dd59911..ac5ef1e38520 100644
--- a/libstdc++-v3/include/bits/ranges_algo.h
+++ b/libstdc++-v3/include/bits/ranges_algo.h
@@ -438,6 +438,254 @@ namespace ranges
 
   inline constexpr __search_n_fn search_n{};
 
+#if __glibcxx_ranges_starts_ends_with // C++ >= 23
+  struct __starts_with_fn
+  {
+template _Sent1,
+input_iterator _Iter2, sentinel_for<_Iter2> _Sent2,
+typename _Pred = ranges::equal_to,
+typename _Proj1 = identity, typename _Proj2 = identity>
+  requires indirectly_comparable<_Iter1, _Iter2, _Pred, _Proj1, _Proj2>
+  constexpr bool
+  operator()(_Iter1 __first1, _Sent1 __last1,
+_Iter2 __first2, _Sent2 __last2, _Pred __pred = {},
+_Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const
+  {
+   iter_difference_t<_Iter1> __n1 = -1;
+   iter_difference_t<_Iter2> __n2 = -1;
+   if constexpr (sized_sentinel_for<_Sent1, _Iter1>)
+ __n1 = __last1 - __first1;
+   if constexpr (sized_sentinel_for<_Sent2, _Iter2>)
+ __n2 = __last2 - __first2;
+   return _S_impl(std::move(__first1), __last1, __n1,
+  std::move(__first2), __last2, __n2,
+  std::move(__pred),
+  std::move(__proj1), std::move(__proj2));
+  }
+
+template
+  requires indirectly_comparable, iterator_t<_Range2>,
+_Pred, _Proj1, _Proj2>
+  constexpr bool
+  operator()(_Range1&& __r1, _Range2&& __r2, _Pred __pred = {},
+_Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const
+  {
+   range_difference_t<_Range1> __n1 = -1;
+   range_difference_t<_Range1> __n2 = -1;
+   if constexpr (sized_range<_Range1>)
+ __n1 = ranges::size(__r1);
+   if constexpr (sized_range<_Range2>)
+ __n2 = ranges::size(__r2);
+   return _S_impl(ranges::begin(__r1), ranges::end(__r1), __n1,
+  ranges::begin(__r2), ranges::end(__r2), __n2,
+  std::move(__pred),
+  std::move(__proj1), std::move(__proj2));
+  }
+
+  private:
+template
+  static constexpr bool
+  _S_impl(_Iter1 __first1, _Sent1 __last1, iter_difference_t<_Iter1> __n1,
+ _Iter2 __first2, _Sent2 __last2, iter_difference_t<_Iter2> __n2,
+ _Pred __pred, _Proj1 __proj1, _Proj2 __proj2)
+  {
+   if (__first2 == __last2) [[unlikely]]
+ return true;
+   else if (__n1 == -1 || __n2 == -1)
+ return ranges::mismatch(std::move(__first1), __last1,
+ std::move(__first2), __last2,
+ std::move(__pred),
+ std::move(__proj1), std::move(__proj2)).in2 
== __last2;
+   else if (__n1 < __n2)
+ return false;
+   else if constexpr (random_access_iterator<_Iter1>)
+ return ranges::equal(__first1, __first1 + 
iter_difference_t<_Iter1>(__n2),
+  std::move(__first2), __last2,
+  std::move(__pred),
+  std::move(__proj1), std::move(__proj2));
+   else
+ return ranges::equal(counted_iterator(std::move(__first1),
+

Re: [PATCH 07/61] Testsuite: Fix tests properly for compact-branches

2025-05-28 Thread Aleksandar Rakic

HTEC Public

Hi,

Could you please let us know if you have any comments
on the latest reply on this patch?

Kind regards,
Aleksandar Rakic


From: Aleksandar Rakic 
Sent: Wednesday, April 23, 2025 1:08 PM
To: Jeff Law; gcc-patches@gcc.gnu.org
Cc: Djordje Todorovic; c...@mips.com
Subject: Re: [PATCH 07/61] Testsuite: Fix tests properly for compact-branches

Hi,

> This likely needs to be updated for the trunk.

> Before:


>  === gcc Summary ===

> # of expected passes95
> # of unexpected failures25


> After:
>  === gcc Summary ===

> # of expected passes70
> # of unexpected failures50

> Clearly not going in the right direction.  Configured as
> mips64el-linux-gnuabi64.  Running just the near-far-?.c tests.

> Jeff

I would like to inform you that the version 2 of this patch with the
appropriate ChangeLog entry is available at the following link:

https://gcc.gnu.org/pipermail/gcc-patches/2025-March/677827.html

Please find attached scripts that I used for building the GCC
cross-compiler and for running the GCC testsuite for the
mips64-r6-linux-gnu target.
The script run_mips_gcc_testsuite is meant to be run inside the
$BUILD_DIR/gcc-build directory with the following arguments:

--sys-root=$SYSROOT --test-driver=mips.exp --test-regex="near-far-?.c"

I ran the near-far-?.c tests and all of them passed:

Before:
=== gcc Summary ===

# of expected passes168

After:
=== gcc Summary ===

# of expected passes168

Kind regards,
Aleksandar

[Patch] libgomp: Add OpenACC's acc_memcpy_device{,_async} routines [PR93226]

2025-05-28 Thread Tobias Burnus


We somehow missed to implement these OpenACC 2.6 functions (the
Fortran routines are newer: 3.3). It is actually used, at least,
by two SPEC hpc/accel (soma + lbm) tests (and OpenACC_VV) - and
it was trivial to implement, which was my workaround to make them
compile.

Besides adding the same-device copy function, it also adds some
shortcuts (size 0 → to nothing; same ptr (shared mem or same device)
→ do nothing, using memcpy not memmove per OpenACC semantics.)

Unless there are comments, I intent to commit the attached patch
on Friday.

Tobias
libgomp: Add OpenACC's acc_memcpy_device{,_async} routines [PR93226]

libgomp/ChangeLog:

	PR libgomp/93226
	* libgomp-plugin.h (GOMP_OFFLOAD_openacc_async_dev2dev): New
	prototype.
	* libgomp.h (struct acc_dispatch_t): Add dev2dev_func.
	(gomp_copy_dev2dev): New prototype.
	* libgomp.map (OACC_2.6.1): New; add acc_memcpy_device{,_async}.
	* libgomp.texi (acc_memcpy_device): New.
	* oacc-mem.c (memcpy_tofrom_device): Change to take from/to
	device boolean; use memcpy not memmove; add early return if
	size == 0 or same device + same ptr.
	(acc_memcpy_to_device, acc_memcpy_to_device_async,
	acc_memcpy_from_device, acc_memcpy_from_device_async): Update.
	(acc_memcpy_device, acc_memcpy_device_async): New.
	* openacc.f90 (acc_memcpy_device, acc_memcpy_device_async):
	Add interface.
	* openacc_lib.h (acc_memcpy_device, acc_memcpy_device_async):
	Likewise.
	* openacc.h (acc_memcpy_device, acc_memcpy_device_async): Add
	prototype.
	* plugin/plugin-gcn.c (GOMP_OFFLOAD_openacc_async_host2dev):
	Update comment.
	(GOMP_OFFLOAD_openacc_async_dev2host): Update call.
	(GOMP_OFFLOAD_openacc_async_dev2dev): New.
	* plugin/plugin-nvptx.c (GOMP_OFFLOAD_openacc_async_dev2dev):
	New.
	* target.c (gomp_copy_dev2dev): New.
	(gomp_load_plugin_for_device): Load dev2dev and async_dev2dev.
	* testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c: New test.
	* testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90: New test.

 libgomp/libgomp-plugin.h   |   2 +
 libgomp/libgomp.h  |   4 +
 libgomp/libgomp.map|   6 ++
 libgomp/libgomp.texi   |  39 +++
 libgomp/oacc-mem.c |  44 +---
 libgomp/openacc.f90|  22 
 libgomp/openacc.h  |   4 +-
 libgomp/openacc_lib.h  |  24 +
 libgomp/plugin/plugin-gcn.c|  17 +++-
 libgomp/plugin/plugin-nvptx.c  |  43 
 libgomp/target.c   |  14 +++
 .../acc_memcpy_device-1.c  |  96 +
 .../libgomp.oacc-fortran/acc_memcpy_device-1.f90   | 113 +
 13 files changed, 409 insertions(+), 19 deletions(-)

diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h
index 924fc1f44b1..50c89feaf73 100644
--- a/libgomp/libgomp-plugin.h
+++ b/libgomp/libgomp-plugin.h
@@ -200,6 +200,8 @@ extern bool GOMP_OFFLOAD_openacc_async_dev2host (int, void *, const void *, size
 		 struct goacc_asyncqueue *);
 extern bool GOMP_OFFLOAD_openacc_async_host2dev (int, void *, const void *, size_t,
 		 struct goacc_asyncqueue *);
+extern bool GOMP_OFFLOAD_openacc_async_dev2dev (int, void *, const void *, size_t,
+		struct goacc_asyncqueue *);
 extern void *GOMP_OFFLOAD_openacc_cuda_get_current_device (void);
 extern void *GOMP_OFFLOAD_openacc_cuda_get_current_context (void);
 extern void *GOMP_OFFLOAD_openacc_cuda_get_stream (struct goacc_asyncqueue *);
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 6030f9d0a2c..ed4e23ae3e1 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -1360,6 +1360,7 @@ typedef struct acc_dispatch_t
 __typeof (GOMP_OFFLOAD_openacc_async_exec) *exec_func;
 __typeof (GOMP_OFFLOAD_openacc_async_dev2host) *dev2host_func;
 __typeof (GOMP_OFFLOAD_openacc_async_host2dev) *host2dev_func;
+__typeof (GOMP_OFFLOAD_openacc_async_dev2dev) *dev2dev_func;
   } async;
 
   __typeof (GOMP_OFFLOAD_openacc_get_property) *get_property_func;
@@ -1467,6 +1468,9 @@ extern void gomp_copy_host2dev (struct gomp_device_descr *,
 extern void gomp_copy_dev2host (struct gomp_device_descr *,
 struct goacc_asyncqueue *, void *, const void *,
 size_t);
+extern void gomp_copy_dev2dev (struct gomp_device_descr *,
+			   struct goacc_asyncqueue *, void *, const void *,
+			   size_t);
 extern uintptr_t gomp_map_val (struct target_mem_desc *, void **, size_t);
 extern bool gomp_attach_pointer (struct gomp_device_descr *,
  struct goacc_asyncqueue *, splay_tree,
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index eae2f53bab1..ad9787ca4c0 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -609,6 +609,12 @@ OACC_2.6 {
 	acc_get_property_string_h_;
 } OACC_2.5.1;
 
+OACC_2.6.1 {
+  global:
+	acc_memcpy_device;
+	acc_memcpy_device_asy

Re: [PATCH] libstdc++: Fix flat_map::operator[] for const lvalue keys [PR120432]

2025-05-28 Thread Patrick Palka

On Wed, 28 May 2025, Tomasz Kaminski wrote:

> 
> 
> On Tue, May 27, 2025 at 7:08 PM Patrick Palka  wrote:
>   Tested on x86_64-pc-linux-gnu, does this look OK for trunk/15?
> 
>   The 'volatile' issue from that PR Will be fixed in a separate patch as
>   operator[] isn't the only operation that's affected.
> 
>   -- >8 --
> 
>   The const lvalue operator[] overload wasn't properly forwarding the key
>   type to the generic overload.
> 
>           PR libstdc++/120432
> 
>   libstdc++-v3/ChangeLog:
> 
>           * include/std/flat_map (_Flat_map_base::operator[]): Correct
>           forwarding from the const lvalue key overload.
>           * testsuite/23_containers/flat_map/1.cc (test08): New test.
>           * testsuite/23_containers/flat_multimap/1.cc (test08): New test.
>   ---
>    libstdc++-v3/include/std/flat_map                      |  2 +-
>    libstdc++-v3/testsuite/23_containers/flat_map/1.cc     | 10 ++
>    .../testsuite/23_containers/flat_multimap/1.cc         | 10 ++
>    3 files changed, 21 insertions(+), 1 deletion(-)
> 
>   diff --git a/libstdc++-v3/include/std/flat_map 
> b/libstdc++-v3/include/std/flat_map
>   index 6593988d213c..4d9ced1e8191 100644
>   --- a/libstdc++-v3/include/std/flat_map
>   +++ b/libstdc++-v3/include/std/flat_map
>   @@ -1142,7 +1142,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>          // element access
>          mapped_type&
>          operator[](const key_type& __x)
>   -      { return operator[](__x); }
>   +      { return operator[](__x); }
> 
> Given that the operator[] that we are forading to is implemented as:
>         { return try_emplace(std::forward<_Key2>(__x)).first->second; }
> I would just call try_emplace directly:

Good point, the implementation is a simple one-liner either way, and it
addresses the volatile key issue.  Like so?

-- >8 --

Subject: [PATCH] libstdc++: Fix flat_map::operator[] for const lvalue keys
 [PR120432]

The const lvalue operator[] overload wasn't properly forwarding the key
type to the generic overload, causing a hard error for const keys.

This patch fixes this by making the non-template overloads call
try_emplace directly instead, which means we can remove the non-standard
same_as constraint on the generic overload.

PR libstdc++/120432

libstdc++-v3/ChangeLog:

* include/std/flat_map (flat_map::operator[]): Make the
non-template overloads call try_emplace directly.  Remove
non-standard same_as constraint on the template overload.
* testsuite/23_containers/flat_map/1.cc (test08): New test.
---
 libstdc++-v3/include/std/flat_map  |  6 +++---
 libstdc++-v3/testsuite/23_containers/flat_map/1.cc | 10 ++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/std/flat_map 
b/libstdc++-v3/include/std/flat_map
index 6593988d213c..5f9a2eda1939 100644
--- a/libstdc++-v3/include/std/flat_map
+++ b/libstdc++-v3/include/std/flat_map
@@ -1142,14 +1142,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // element access
   mapped_type&
   operator[](const key_type& __x)
-  { return operator[](__x); }
+  { return try_emplace(__x).first->second; }
 
   mapped_type&
   operator[](key_type&& __x)
-  { return operator[](std::move(__x)); }
+  { return try_emplace(std::move(__x)).first->second; }
 
   template
-   requires same_as, _Key> || 
__transparent_comparator<_Compare>
+   requires __transparent_comparator<_Compare>
mapped_type&
operator[](_Key2&& __x)
{ return try_emplace(std::forward<_Key2>(__x)).first->second; }
diff --git a/libstdc++-v3/testsuite/23_containers/flat_map/1.cc 
b/libstdc++-v3/testsuite/23_containers/flat_map/1.cc
index a9690208b09f..2af516410279 100644
--- a/libstdc++-v3/testsuite/23_containers/flat_map/1.cc
+++ b/libstdc++-v3/testsuite/23_containers/flat_map/1.cc
@@ -253,6 +253,15 @@ test07()
   VERIFY( std::ranges::equal(m, (std::pair[]){{3,4}}) );
 }
 
+void
+test08()
+{
+  // PR libstdc++/120432 - flat_map operator[] is broken for const lvalue keys
+  std::flat_map m;
+  const int k = 42;
+  m[k] = 0;
+}
+
 int
 main()
 {
@@ -266,4 +275,5 @@ main()
   test05();
   test06();
   test07();
+  test08();
 }
-- 
2.49.0.654.g845c48a16a

[PATCH 3/3] OpenMP: Handle more cases in user/condition selector

2025-05-28 Thread Sandra Loosemore

Tobias had noted that the C front end was not treating C23 constexprs
as constant in the user/condition selector property, which led to
missed opportunities to resolve metadirectives at parse time.
Additionally neither C nor C++ was permitting the expression to have
pointer or floating-point type -- the former being a common idiom in
other C/C++ conditional expressions.  By using the existing front-end
hooks for the implicit conversion to bool in conditional expressions,
we also get free support for using a C++ class object that has a bool
conversion operator in the user/condition selector.

gcc/c/ChangeLog
* c-parser.cc (c_parser_omp_context_selector): Call
convert_lvalue_to_rvalue and c_objc_common_truthvalue_conversion
on the expression for OMP_TRAIT_PROPERTY_BOOL_EXPR.

gcc/cp/ChangeLog
* parser.cc (cp_parser_omp_context_selector): Call
convert_from_reference and condition_conversion on the expression
for OMP_TRAIT_PROPERTY_BOOL_EXPR.
* pt.cc (tsubst_omp_context_selector): Likewise.

gcc/testsuite/ChangeLog
* c-c++-common/gomp/declare-variant-2.c: Update expected output.
* c-c++-common/gomp/metadirective-condition-constexpr.c: New.
* c-c++-common/gomp/metadirective-condition.c: New.
* c-c++-common/gomp/metadirective-error-recovery.c: Update expected
output.
* g++.dg/gomp/metadirective-condition-class.C: New.
---
 gcc/c/c-parser.cc | 19 ++--
 gcc/cp/parser.cc  |  9 +++-
 gcc/cp/pt.cc  | 23 +++---
 .../c-c++-common/gomp/declare-variant-2.c |  2 +-
 .../gomp/metadirective-condition-constexpr.c  | 13 ++
 .../gomp/metadirective-condition.c| 25 +++
 .../gomp/metadirective-error-recovery.c   |  9 +++-
 .../gomp/metadirective-condition-class.C  | 43 +++
 8 files changed, 129 insertions(+), 14 deletions(-)
 create mode 100644 
gcc/testsuite/c-c++-common/gomp/metadirective-condition-constexpr.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/metadirective-condition.c
 create mode 100644 gcc/testsuite/g++.dg/gomp/metadirective-condition-class.C

diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index faef65879c6..ec1d7698aac 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -26818,17 +26818,30 @@ c_parser_omp_context_selector (c_parser *parser, enum 
omp_tss_code set,
  break;
case OMP_TRAIT_PROPERTY_DEV_NUM_EXPR:
case OMP_TRAIT_PROPERTY_BOOL_EXPR:
- t = c_parser_expr_no_commas (parser, NULL).value;
+ {
+   c_expr texpr = c_parser_expr_no_commas (parser, NULL);
+   texpr = convert_lvalue_to_rvalue (token->location, texpr,
+ true, true);
+   t = texpr.value;
+ }
  if (t == error_mark_node)
return error_mark_node;
  mark_exp_read (t);
- t = c_fully_fold (t, false, NULL);
- if (!INTEGRAL_TYPE_P (TREE_TYPE (t)))
+ if (property_kind == OMP_TRAIT_PROPERTY_BOOL_EXPR)
+   {
+ t = c_objc_common_truthvalue_conversion (token->location,
+  t,
+  boolean_type_node);
+ if (t == error_mark_node)
+   return error_mark_node;
+   }
+ else if (!INTEGRAL_TYPE_P (TREE_TYPE (t)))
{
  error_at (token->location,
"property must be integer expression");
  return error_mark_node;
}
+ t = c_fully_fold (t, false, NULL);
  properties = make_trait_property (NULL_TREE, t, properties);
  break;
case OMP_TRAIT_PROPERTY_CLAUSE_LIST:
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 022e4db4650..20ad6f160b3 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -49877,7 +49877,14 @@ cp_parser_omp_context_selector (cp_parser *parser, 
enum omp_tss_code set,
  && !value_dependent_expression_p (t))
{
  t = fold_non_dependent_expr (t);
- if (!INTEGRAL_TYPE_P (TREE_TYPE (t)))
+ t = convert_from_reference (t);
+ if (property_kind == OMP_TRAIT_PROPERTY_BOOL_EXPR)
+   {
+ t = condition_conversion (t);
+ if (t == error_mark_node)
+   return error_mark_node;
+   }
+ else if (!INTEGRAL_TYPE_P (TREE_TYPE (t)))
{
  error_at (token->location,
"property must be integer expression");
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index c687fdc71a3..40d8ebd15a5 100644
--- a/gcc

Re: [PATCH] libstdc++: Pass small trivial types by value in polymorphic wrappers

2025-05-28 Thread Patrick Palka

On Wed, 28 May 2025, Tomasz Kamiński wrote:

> This patch adjust the passing of parameters for the move_only_function,
> copyable_function and function_ref. For types that are declared as being 
> passed
> by value in signature template argument, the are passed by value to the 
> invoker,

they

> when they are small (at most two pointers), trivially move constructible and
> trivially destructible. The later guarantees that passing them by value has 
> not

latter

> user visible side effects.
> 
> In particular, this extents the set of types forwarded by value, that was

extends

> previously limited to scalars, to also include specializations of std::span 
> and
> std::string_view, and similar standard and program defined-types.
> 
> Checking the suitability of the parameter types requires the types to be 
> complete.
> As consequence implementation imposes requirements on instantiation of
> move_only_function and copyable_function. To avoid producing the errors from
> the implementation details, and static_assertion was added to partial
> specializations of copyable_function, move_only_function and function_ref.
> The static assertion uses existing __is_complete_or_unbounded, as arrays type
> parameters are automatically decayed in function type.
> 
> Standard already specifies in [res.on.functions] p2.5 that instantiating these
> partial specialization with incomplete types leads to undefined behavior.
> 
> libstdc++-v3/ChangeLog:
> 
>   * include/bits/funcwrap.h (__polyfunc::__pass_by_rref): Define.
>   (__polyfunc::__param_t): Update to use __pass_by_rref.
>   * include/bits/cpyfunc_impl.h:: Assert that are parameters type
>   are complete.
>   * include/bits/funcref_impl.h: Likewise.
>   * include/bits/mofunc_impl.h: Likewise.
>   * testsuite/20_util/copyable_function/call.cc: New test.
>   * testsuite/20_util/function_ref/call.cc: New test.
>   * testsuite/20_util/move_only_function/call.cc: New test.
>   * testsuite/20_util/copyable_function/conv.cc: New test.
>   * testsuite/20_util/function_ref/conv.cc: New test.
>   * testsuite/20_util/move_only_function/conv.cc: New test.
>   * testsuite/20_util/copyable_function/incomplete_neg.cc: New test.
>   * testsuite/20_util/function_ref/incomplete_neg.cc: New test.
>   * testsuite/20_util/move_only_function/incomplete_neg.cc: New test.
> ---
> Tested on x86_54-linux. OK for trunk?
> 
>  libstdc++-v3/include/bits/cpyfunc_impl.h  |  4 +++
>  libstdc++-v3/include/bits/funcref_impl.h  |  4 +++
>  libstdc++-v3/include/bits/funcwrap.h  | 18 +-
>  libstdc++-v3/include/bits/mofunc_impl.h   |  4 +++
>  .../20_util/copyable_function/call.cc |  7 ++--
>  .../20_util/copyable_function/conv.cc | 35 +++
>  .../copyable_function/incomplete_neg.cc   | 18 ++
>  .../testsuite/20_util/function_ref/call.cc| 10 +++---
>  .../testsuite/20_util/function_ref/conv.cc| 34 ++
>  .../20_util/function_ref/incomplete_neg.cc| 18 ++
>  .../20_util/move_only_function/call.cc|  7 ++--
>  .../20_util/move_only_function/conv.cc| 35 +++
>  .../move_only_function/incomplete_neg.cc  | 18 ++
>  13 files changed, 200 insertions(+), 12 deletions(-)
>  create mode 100644 
> libstdc++-v3/testsuite/20_util/copyable_function/incomplete_neg.cc
>  create mode 100644 
> libstdc++-v3/testsuite/20_util/function_ref/incomplete_neg.cc
>  create mode 100644 
> libstdc++-v3/testsuite/20_util/move_only_function/incomplete_neg.cc
> 
> diff --git a/libstdc++-v3/include/bits/cpyfunc_impl.h 
> b/libstdc++-v3/include/bits/cpyfunc_impl.h
> index bc44cd3e313..f1918ddf87a 100644
> --- a/libstdc++-v3/include/bits/cpyfunc_impl.h
> +++ b/libstdc++-v3/include/bits/cpyfunc_impl.h
> @@ -64,6 +64,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>   _GLIBCXX_MOF_REF noexcept(_Noex)>
>  : __polyfunc::_Cpy_base
>  {
> +  static_assert(
> + (std::__is_complete_or_unbounded(__type_identity<_ArgTypes>()) && ...),
> + "each parameter type must be a complete class");
> +
>using _Base = __polyfunc::_Cpy_base;
>using _Invoker = __polyfunc::_Invoker<_Noex, _Res, _ArgTypes...>;
>using _Signature = _Invoker::_Signature;
> diff --git a/libstdc++-v3/include/bits/funcref_impl.h 
> b/libstdc++-v3/include/bits/funcref_impl.h
> index 1e19866035f..44c992281be 100644
> --- a/libstdc++-v3/include/bits/funcref_impl.h
> +++ b/libstdc++-v3/include/bits/funcref_impl.h
> @@ -68,6 +68,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  class function_ref<_Res(_ArgTypes...) _GLIBCXX_MOF_CV
>  noexcept(_Noex)>
>  {
> +  static_assert(
> + (std::__is_complete_or_unbounded(__type_identity<_ArgTypes>()) && ...),
> + "each parameter type must be a complete class");
> +
>using _Invoker = __polyfunc::_Invoker<_Noex, _Res, _ArgTypes...>;
>using _Si

[PATCH 1/3] OpenMP: Fix ICE in metadirective recovery after error [PR120180]

2025-05-28 Thread Sandra Loosemore

It's not clear whether a metadirective in a loop nest is supposed to
be valid, but GCC certainly shouldn't be ICE'ing after diagnosing it
as an error.

gcc/c/ChangeLog
PR c/120180
* c-parser.cc (c_parser_omp_metadirective): Only consume the
token if it is the expected ')'.

gcc/cp/ChangeLog
PR c/120180
* parser.cc (cp_parser_omp_metadirective): Only consume the
token if it is the expected ')'.

gcc/testsuite/ChangeLog
PR c/120180
* c-c++-common/gomp/pr12180.c: New.
---
 gcc/c/c-parser.cc  |  7 ---
 gcc/cp/parser.cc   |  7 ---
 gcc/testsuite/c-c++-common/gomp/pr120180.c | 22 ++
 3 files changed, 30 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/gomp/pr120180.c

diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 8a63dc54c79..4a25bf283a3 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -29249,7 +29249,10 @@ c_parser_omp_metadirective (c_parser *parser, bool 
*if_p)
  goto add;
case CPP_CLOSE_PAREN:
  if (nesting_depth-- == 0)
-   break;
+   {
+ c_parser_consume_token (parser);
+ break;
+   }
  goto add;
default:
add:
@@ -29261,8 +29264,6 @@ c_parser_omp_metadirective (c_parser *parser, bool 
*if_p)
  break;
}
 
-  c_parser_consume_token (parser);
-
   if (!skip)
{
  c_token eol_token;
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 1fb9e7fd872..3e0b58e3492 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -51610,7 +51610,10 @@ cp_parser_omp_metadirective (cp_parser *parser, 
cp_token *pragma_tok,
  goto add;
case CPP_CLOSE_PAREN:
  if (nesting_depth-- == 0)
-   break;
+   {
+ cp_lexer_consume_token (parser->lexer);
+ break;
+   }
  goto add;
default:
add:
@@ -51622,8 +51625,6 @@ cp_parser_omp_metadirective (cp_parser *parser, 
cp_token *pragma_tok,
  break;
}
 
-  cp_lexer_consume_token (parser->lexer);
-
   if (!skip)
{
  cp_token eol_token = {};
diff --git a/gcc/testsuite/c-c++-common/gomp/pr120180.c 
b/gcc/testsuite/c-c++-common/gomp/pr120180.c
new file mode 100644
index 000..cb5a0d5a819
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/gomp/pr120180.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+
+/* This test used to ICE after erroring on the metadirective in the
+   loop nest.  */
+
+int main()
+{
+  int blksize = 15000;
+  double *qq;
+  int i, k, nq;
+
+  #pragma omp metadirective when(user={condition(0)}: target teams distribute 
parallel for collapse(2) map(qq[:0]) private(i)) \
+when(user={condition(0)}: target teams distribute 
parallel for map(qq[:0]) private(i)) \
+when(user={condition(1)}: target teams loop 
collapse(2) map(qq[:0]) private(i))
+  for(k=0; k

[PATCH 0/3] OpenMP: clean up metadirective issues

2025-05-28 Thread Sandra Loosemore

This group of patches are for some lingering problems with
metadirective, which was a relatively late addition to GCC 15.  The
first two address ICE-after-invalid problems in the C and C++ front
ends, and I think are suitable for backport to the GCC 15 branch as
well as trunk.  The third patch addresses a missed-optimization
situation and adds new functionality to the user/condition selector;
the OpenMP spec is rather vague on this so it's not clear to me if the
existing behavior is a bug or not, but apparently some external testsuites
do depend on implicit conversion of pointer to bool here, which is
rejected without this patch (and triggers the ICE fixed by part 2).

OK for trunk, at least?

Sandra Loosemore (3):
  OpenMP: Fix ICE in metadirective recovery after error [PR120180]
  OpenMP: Fix ICE and other issues in C/C++ metadirective error
recovery.
  OpenMP: Handle more cases in user/condition selector

 gcc/c/c-parser.cc | 106 +-
 gcc/cp/parser.cc  |  25 +++--
 gcc/cp/pt.cc  |  23 ++--
 .../c-c++-common/gomp/declare-variant-2.c |  15 ++-
 .../gomp/metadirective-condition-constexpr.c  |  13 +++
 .../gomp/metadirective-condition.c|  25 +
 .../gomp/metadirective-error-recovery.c   |  25 +
 gcc/testsuite/c-c++-common/gomp/pr120180.c|  22 
 .../gomp/metadirective-condition-class.C  |  43 +++
 9 files changed, 245 insertions(+), 52 deletions(-)
 create mode 100644 
gcc/testsuite/c-c++-common/gomp/metadirective-condition-constexpr.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/metadirective-condition.c
 create mode 100644 
gcc/testsuite/c-c++-common/gomp/metadirective-error-recovery.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/pr120180.c
 create mode 100644 gcc/testsuite/g++.dg/gomp/metadirective-condition-class.C

-- 
2.34.1

[PATCH 2/3] OpenMP: Fix ICE and other issues in C/C++ metadirective error recovery.

2025-05-28 Thread Sandra Loosemore

The new testcase included in this patch used to ICE in gcc after
diagnosing the first error, and in g++ it only diagnosed the error in
the first metadirective, ignoring the second one.  The solution is to
make error recovery in the C front end more like that in the C++ front
end, and remove the code in both front ends that previously tried to
skip all the way over the following statement (instead of just to the
end of the metadirective pragma) after an error.

gcc/c/ChangeLog
* c-parser.cc (c_parser_skip_to_closing_brace): New, copied from
the equivalent function in the C++ front end.
(c_parser_skip_to_end_of_block_or_statement): Pass false to
the error flag.
(c_parser_omp_context_selector): Immediately return error_mark_node
after giving an error that the integer trait property is invalid,
similarly to C++ front end.
(c_parser_omp_context_selector_specification): Likewise handle
error return from c_parser_omp_context_selector similarly to C++.
(c_parser_omp_metadirective): Do not call
c_parser_skip_to_end_of_block_or_statement after an error.

gcc/cp/ChangeLog
* parser.cc (cp_parser_omp_metadirective): Do not call
cp_parser_skip_to_end_of_block_or_statement after an error.

gcc/testsuite/ChangeLog
* c-c++-common/gomp/declare-variant-2.c: Adjust patterns now that
C and C++ now behave similarly.
* c-c++-common/gomp/metadirective-error-recovery.c: New.
---
 gcc/c/c-parser.cc | 84 ++-
 gcc/cp/parser.cc  |  9 +-
 .../c-c++-common/gomp/declare-variant-2.c | 13 ++-
 .../gomp/metadirective-error-recovery.c   | 20 +
 4 files changed, 90 insertions(+), 36 deletions(-)
 create mode 100644 
gcc/testsuite/c-c++-common/gomp/metadirective-error-recovery.c

diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 4a25bf283a3..faef65879c6 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -1420,6 +1420,51 @@ c_parser_skip_to_end_of_parameter (c_parser *parser)
   parser->error = false;
 }
 
+/* Skip tokens until a non-nested closing curly brace is the next
+   token, or there are no more tokens. Return true in the first case,
+   false otherwise.  */
+
+static bool
+c_parser_skip_to_closing_brace (c_parser *parser)
+{
+  unsigned nesting_depth = 0;
+
+  while (true)
+{
+  c_token *token = c_parser_peek_token (parser);
+
+  switch (token->type)
+   {
+   case CPP_PRAGMA_EOL:
+ if (!parser->in_pragma)
+   break;
+ /* FALLTHRU */
+   case CPP_EOF:
+ /* If we've run out of tokens, stop.  */
+ return false;
+
+   case CPP_CLOSE_BRACE:
+ /* If the next token is a non-nested `}', then we have reached
+the end of the current block.  */
+ if (nesting_depth-- == 0)
+   return true;
+ break;
+
+   case CPP_OPEN_BRACE:
+ /* If it the next token is a `{', then we are entering a new
+block.  Consume the entire block.  */
+ ++nesting_depth;
+ break;
+
+   default:
+ break;
+   }
+
+  /* Consume the token.  */
+  c_parser_consume_token (parser);
+}
+}
+
 /* Expect to be at the end of the pragma directive and consume an
end of line marker.  */
 
@@ -1535,7 +1580,7 @@ c_parser_skip_to_end_of_block_or_statement (c_parser 
*parser,
 here for secondary error recovery, after parser->error has
 been cleared.  */
  c_parser_consume_pragma (parser);
- c_parser_skip_to_pragma_eol (parser);
+ c_parser_skip_to_pragma_eol (parser, false);
  parser->error = save_error;
  continue;
 
@@ -26774,19 +26819,17 @@ c_parser_omp_context_selector (c_parser *parser, enum 
omp_tss_code set,
case OMP_TRAIT_PROPERTY_DEV_NUM_EXPR:
case OMP_TRAIT_PROPERTY_BOOL_EXPR:
  t = c_parser_expr_no_commas (parser, NULL).value;
- if (t != error_mark_node)
-   {
- mark_exp_read (t);
- t = c_fully_fold (t, false, NULL);
- if (!INTEGRAL_TYPE_P (TREE_TYPE (t)))
-   error_at (token->location,
- "property must be integer expression");
- else
-   properties = make_trait_property (NULL_TREE, t,
- properties);
-   }
- else
+ if (t == error_mark_node)
return error_mark_node;
+ mark_exp_read (t);
+ t = c_fully_fold (t, false, NULL);
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (t)))
+   {
+ error_at (token->location,
+   "property must be integer expression");
+ return error_mark_node;
+   }
+ properties = make_trait_property (NULL_

Re: [PATCH v4 0/8] Implement layouts from mdspan.

2025-05-28 Thread Tomasz Kaminski

I have reviewed and posted feedback up to, but not including layout_stride
today.
Will try to finish tomorrow.
Thank you again for continuous work on the patches.

On Tue, May 27, 2025 at 4:40 PM Tomasz Kaminski  wrote:

>
>
> On Tue, May 27, 2025 at 4:32 PM Luc Grosheintz 
> wrote:
>
>> Since, I believe now we're through the larger questions about
>> how to implement layouts. If reviewing all three over and over
>> is too painful, it might now make sense to split the patch into
>> separate patches, one per layout.
>>
> I think we are OK. As you mentioned we are past general discussion,
> so I need to do more throughroul review with checking against the standard.
> I will try to book some time for this this week.
>
>
>> On 5/26/25 16:04, Luc Grosheintz wrote:
>> > This follows up on:
>> > https://gcc.gnu.org/pipermail/libstdc++/2025-May/061572.html
>> >
>> > Note that this patch series can only be applied after merging:
>> > https://gcc.gnu.org/pipermail/libstdc++/2025-May/061653.html
>> >
>> > The important changes since v3 are:
>> >* Fixed and testsed several related overflow issues that occured in
>> >  extents of size 0 by using `size_t` to compute products.
>> >* Fixed and tested default ctors.
>> >* Add missing code for module support.
>> >* Documented deviation from standard.
>> >
>> > The smaller changes include:
>> >* Squashed the three small commits that make cosmetic changes to
>> >  std::extents.
>> >* Remove layout_left related changes from the layout_stride commit.
>> >* Remove superfluous `mapping(extents_type(__exts))`.
>> >* Fix indenting and improve comment in layout_stride.
>> >* Add an easy check for representable required_span_size to
>> >  layout_stride.
>> >* Inline __dynamic_extents_prod
>> >
>> > Thank you Tomasz for all the great reviews!
>> >
>> > Luc Grosheintz (8):
>> >libstdc++: Improve naming and whitespace for extents.
>> >libstdc++: Implement layout_left from mdspan.
>> >libstdc++: Add tests for layout_left.
>> >libstdc++: Implement layout_right from mdspan.
>> >libstdc++: Add tests for layout_right.
>> >libstdc++: Implement layout_stride from mdspan.
>> >libstdc++: Add tests for layout_stride.
>> >libstdc++: Make layout_left(layout_stride) noexcept.
>> >
>> >   libstdc++-v3/include/std/mdspan   | 711 +-
>> >   libstdc++-v3/src/c++23/std.cc.in  |   5 +-
>> >   .../mdspan/layouts/class_mandate_neg.cc   |  42 ++
>> >   .../23_containers/mdspan/layouts/ctors.cc | 459 +++
>> >   .../23_containers/mdspan/layouts/empty.cc |  78 ++
>> >   .../23_containers/mdspan/layouts/mapping.cc   | 568 ++
>> >   .../23_containers/mdspan/layouts/stride.cc| 500 
>> >   7 files changed, 2349 insertions(+), 14 deletions(-)
>> >   create mode 100644
>> libstdc++-v3/testsuite/23_containers/mdspan/layouts/class_mandate_neg.cc
>> >   create mode 100644
>> libstdc++-v3/testsuite/23_containers/mdspan/layouts/ctors.cc
>> >   create mode 100644
>> libstdc++-v3/testsuite/23_containers/mdspan/layouts/empty.cc
>> >   create mode 100644
>> libstdc++-v3/testsuite/23_containers/mdspan/layouts/mapping.cc
>> >   create mode 100644
>> libstdc++-v3/testsuite/23_containers/mdspan/layouts/stride.cc
>> >
>>
>>

Re: [PATCH v4 5/8] libstdc++: Add tests for layout_right.

2025-05-28 Thread Tomasz Kaminski

On Mon, May 26, 2025 at 4:13 PM Luc Grosheintz 
wrote:

> Adds tests for layout_right and for the parts of layout_left that depend
> on layout_right.
>
> libstdc++-v3/ChangeLog:
>
> * testsuite/23_containers/mdspan/layouts/class_mandate_neg.cc: Add
> tests for layout_stride.
> * testsuite/23_containers/mdspan/layouts/ctors.cc: Add tests for
> layout_right and the interaction with layout_left.
> * testsuite/23_containers/mdspan/layouts/mapping.cc: ditto.
>
> Signed-off-by: Luc Grosheintz 
> ---
>
LGTM. One single comment, and as you will see, I am scraping the bottom of
the barrel with them.

>  .../mdspan/layouts/class_mandate_neg.cc   |  1 +
>  .../23_containers/mdspan/layouts/ctors.cc | 64 +++
>  .../23_containers/mdspan/layouts/empty.cc |  1 +
>  .../23_containers/mdspan/layouts/mapping.cc   | 78 ---
>  4 files changed, 134 insertions(+), 10 deletions(-)
>
> diff --git
> a/libstdc++-v3/testsuite/23_containers/mdspan/layouts/class_mandate_neg.cc
> b/libstdc++-v3/testsuite/23_containers/mdspan/layouts/class_mandate_neg.cc
> index b276fbd333e..a41bad988d2 100644
> ---
> a/libstdc++-v3/testsuite/23_containers/mdspan/layouts/class_mandate_neg.cc
> +++
> b/libstdc++-v3/testsuite/23_containers/mdspan/layouts/class_mandate_neg.cc
> @@ -18,5 +18,6 @@ template
>};
>
>  A a_left; // { dg-error "required
> from" }
> +A a_right;   // { dg-error "required
> from" }
>
>  // { dg-prune-output "must be representable as index_type" }
> diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/layouts/ctors.cc
> b/libstdc++-v3/testsuite/23_containers/mdspan/layouts/ctors.cc
> index 18d9743a57b..cc719dfee10 100644
> --- a/libstdc++-v3/testsuite/23_containers/mdspan/layouts/ctors.cc
> +++ b/libstdc++-v3/testsuite/23_containers/mdspan/layouts/ctors.cc
> @@ -269,6 +269,66 @@ namespace from_same_layout
>  }
>  }
>
> +// ctor: mapping(layout_{right,left}::mapping)
> +namespace from_left_or_right
> +{
> +  template +  typename OExtents>
> +constexpr void
> +verify_ctor(OExtents oexts)
> +{
> +  using SMapping = typename SLayout::mapping;
> +  using OMapping = typename OLayout::mapping;
> +
> +  constexpr bool expected = std::is_convertible_v;
> +  if constexpr (expected)
> +   verify_nothrow_convertible(OMapping(oexts));
> +  else
> +   verify_nothrow_constructible(OMapping(oexts));
> +}
> +
> +  template
> +constexpr bool
> +test_ctor()
> +{
> +  assert_not_constructible<
> +   typename SLayout::mapping>,
> +   typename OLayout::mapping>>();
> +
> +  verify_ctor>(
> +   std::extents{});
> +
> +  verify_ctor>(
> +   std::extents{});
> +
> +  assert_not_constructible<
> +   typename SLayout::mapping>,
> +   typename OLayout::mapping>>();
> +
> +  verify_ctor>(
> +   std::extents{});
> +
> +  verify_ctor>(
> +   std::extents{});
> +
> +  verify_ctor>(
> +   std::extents{});
> +
> +  assert_not_constructible<
> +   typename SLayout::mapping>,
> +   typename OLayout::mapping>>();
> +  return true;
> +}
> +
> +  template
> +constexpr void
> +test_all()
> +{
> +  test_ctor();
> +  static_assert(test_ctor());
> +}
> +}
> +
>  template
>constexpr void
>test_all()
> @@ -282,5 +342,9 @@ int
>  main()
>  {
>test_all();
> +  test_all();
> +
> +  from_left_or_right::test_all();
> +  from_left_or_right::test_all();
>return 0;
>  }
> diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/layouts/empty.cc
> b/libstdc++-v3/testsuite/23_containers/mdspan/layouts/empty.cc
> index 8cca8171d12..e95eacd80b6 100644
> --- a/libstdc++-v3/testsuite/23_containers/mdspan/layouts/empty.cc
> +++ b/libstdc++-v3/testsuite/23_containers/mdspan/layouts/empty.cc
> @@ -66,5 +66,6 @@ int
>  main()
>  {
>static_assert(test_all());
> +  static_assert(test_all());
>return 0;
>  }
> diff --git
> a/libstdc++-v3/testsuite/23_containers/mdspan/layouts/mapping.cc
> b/libstdc++-v3/testsuite/23_containers/mdspan/layouts/mapping.cc
> index a5be1166617..40a0c828cc4 100644
> --- a/libstdc++-v3/testsuite/23_containers/mdspan/layouts/mapping.cc
> +++ b/libstdc++-v3/testsuite/23_containers/mdspan/layouts/mapping.cc
> @@ -293,6 +293,15 @@ template<>
>  VERIFY(m.stride(1) == 3);
>}
>
> +template<>
> +  constexpr void
> +  test_stride_2d()
> +  {
> +std::layout_right::mapping> m;
> +VERIFY(m.stride(0) == 5);
> +VERIFY(m.stride(1) == 1);
> +  }
> +
>  template
>constexpr void
>test_stride_3d();
> @@ -307,6 +316,16 @@ template<>
>  VERIFY(m.stride(2) == 3*5);
>}
>
> +template<>
> +  constexpr void
> +  test_stride_3d()
> +  {
> +std::layout_right::mapping m(std::dextents(3, 5, 7));
> +VERIFY(m.stride(0) == 35);
>
I prefer to say 5 * 7 here, as for layout_left.

> +VERIFY(m.stride(1) == 7);
> +VERIFY(m.stride(2) == 1);
>

Re: [PATCH v5] libstdc++: Implement C++23 P1659R3 starts_with and ends_with

2025-05-28 Thread Tomasz Kaminski

On Wed, May 28, 2025 at 4:27 PM Patrick Palka  wrote:

>
> >
> >
> > On Tue, May 20, 2025 at 6:32 PM Patrick Palka  wrote:
> >   On Tue, 20 May 2025, Tomasz Kaminski wrote:
> >
> >   > I think I do not have any more suggestions for cases to check,
> so the impl LGTM.
> >
> >   It's cool how many optimizations we came up with for this
> algorithm :)
> >
> >   >
> >   > On Tue, May 20, 2025 at 4:33 PM Patrick Palka 
> wrote:
> >   >   Changes in v5:
> >   > * dispatch to starts_with for the both-bidi/common range
> case
> >   >
> >   >   Changes in v4:
> >   > * optimize the both-bidi/common ranges case, as
> suggested by
> >   >   Tomasz
> >   > * add tests for that code path
> >   >
> >   >   Changes in v3:
> >   > * Use the forward_range code path for a (non-sized)
> bidirectional
> >   >   haystack, since it's slightly fewer
> increments/decrements
> >   >   overall.
> >   > * Fix wrong iter_difference_t cast in starts_with.
> >   >
> >   >   Changes in v2:
> >   > Addressed Tomasz's review comments, namely:
> >   > * Added explicit iter_difference_t casts
> >   > * Made _S_impl member private
> >   > * Optimized sized bidirectional case of ends_with
> >   > * Rearranged control flow of starts_with::_S_impl
> >   >
> >   >   Still left to do:
> >   > * Add tests for integer-class types
> >   > * Still working on a better commit description ;)
> >   >
> >   >   -- >8 --
> >   >
> >   >   libstdc++-v3/ChangeLog:
> >   >
> >   >   * include/bits/ranges_algo.h (__starts_with_fn,
> starts_with):
> >   >   Define.
> >   >   (__ends_with_fn, ends_with): Define.
> >   >   * include/bits/version.def
> (ranges_starts_ends_with): Define.
> >   >   * include/bits/version.h: Regenerate.
> >   >   * include/std/algorithm: Provide
> __cpp_lib_ranges_starts_ends_with.
> >   >   * src/c++23/std.cc.in (ranges::starts_with):
> Export.
> >   >   (ranges::ends_with): Export.
> >   >   * testsuite/25_algorithms/ends_with/1.cc: New test.
> >   >   * testsuite/25_algorithms/starts_with/1.cc: New
> test.
> >   >   ---
> >   >libstdc++-v3/include/bits/ranges_algo.h   | 247
> ++
> >   >libstdc++-v3/include/bits/version.def |   8 +
> >   >libstdc++-v3/include/bits/version.h   |  10 +
> >   >libstdc++-v3/include/std/algorithm|   1 +
> >   >libstdc++-v3/src/c++23/std.cc.in  |   4 +
> >   >.../testsuite/25_algorithms/ends_with/1.cc| 135
> ++
> >   >.../testsuite/25_algorithms/starts_with/1.cc  | 128
> +
> >   >7 files changed, 533 insertions(+)
> >   >create mode 100644
> libstdc++-v3/testsuite/25_algorithms/ends_with/1.cc
> >   >create mode 100644
> libstdc++-v3/testsuite/25_algorithms/starts_with/1.cc
> >   >
> >   >   diff --git a/libstdc++-v3/include/bits/ranges_algo.h
> b/libstdc++-v3/include/bits/ranges_algo.h
> >   >   index f36e7dd59911..60f7bf841f3f 100644
> >   >   --- a/libstdc++-v3/include/bits/ranges_algo.h
> >   >   +++ b/libstdc++-v3/include/bits/ranges_algo.h
> >   >   @@ -438,6 +438,253 @@ namespace ranges
> >   >
> >   >  inline constexpr __search_n_fn search_n{};
> >   >
> >   >   +#if __glibcxx_ranges_starts_ends_with // C++ >= 23
> >   >   +  struct __starts_with_fn
> >   >   +  {
> >   >   +template
> _Sent1,
> >   >   +input_iterator _Iter2, sentinel_for<_Iter2>
> _Sent2,
> >   >   +typename _Pred = ranges::equal_to,
> >   >   +typename _Proj1 = identity, typename _Proj2 =
> identity>
> >   >   +  requires indirectly_comparable<_Iter1, _Iter2,
> _Pred, _Proj1, _Proj2>
> >   >   +  constexpr bool
> >   >   +  operator()(_Iter1 __first1, _Sent1 __last1,
> >   >   +_Iter2 __first2, _Sent2 __last2, _Pred
> __pred = {},
> >   >   +_Proj1 __proj1 = {}, _Proj2 __proj2 = {})
> const
> >   >   +  {
> >   >   +   iter_difference_t<_Iter1> __n1 = -1;
> >   >   +   iter_difference_t<_Iter2> __n2 = -1;
> >   >   +   if constexpr (sized_sentinel_for<_Sent1, _Iter1>)
> >   >   + __n1 = __last1 - __first1;
> >   >   +   if constexpr (sized_sentinel_for<_Sent2, _Iter2>)
> >   >   + __n2 = __last2 - __first2;
> >   >   +   return _S_impl(std::move(_

Re: [PATCH] gimple-fold: Implement simple copy propagation for aggregates [PR14295]

2025-05-28 Thread Andrew Pinski

On Mon, May 26, 2025 at 1:40 PM Andrew Pinski  wrote:
>
> On Mon, May 26, 2025 at 5:36 AM Richard Biener
>  wrote:
> >
> > On Sun, May 18, 2025 at 10:58 PM Andrew Pinski  
> > wrote:
> > >
> > > This implements a simple copy propagation for aggregates in the similar
> > > fashion as we already do for copy prop of zeroing.
> > >
> > > Right now this only looks at the previous vdef statement but this allows 
> > > us
> > > to catch a lot of cases that show up in C++ code.
> > >
> > > Also deletes aggregate copies that are to the same location (PR57361), 
> > > this was
> > > already done in DSE but we should do it here also since it is simple to 
> > > add and
> > > when doing a copy to a temporary and back to itself should be deleted too.
> > > So we need a variant that tests DSE and one for forwprop.
> > >
> > > Also adds a variant of pr22237.c which was found while working on this 
> > > patch.
> > >
> > > PR tree-optimization/14295
> > > PR tree-optimization/108358
> > > PR tree-optimization/114169
> > >
> > > gcc/ChangeLog:
> > >
> > > * tree-ssa-forwprop.cc (optimize_agr_copyprop): New function.
> > > (pass_forwprop::execute): Call optimize_agr_copyprop for 
> > > load/store statements.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > * gcc.dg/tree-ssa/20031106-6.c: Un-xfail. Add scan for forwprop1.
> > > * g++.dg/opt/pr66119.C: Disable forwprop since that does
> > > the copy prop now.
> > > * gcc.dg/tree-ssa/pr108358-a.c: New test.
> > > * gcc.dg/tree-ssa/pr114169-1.c: New test.
> > > * gcc.c-torture/execute/builtins/pr22237-1-lib.c: New test.
> > > * gcc.c-torture/execute/builtins/pr22237-1.c: New test.
> > > * gcc.dg/tree-ssa/pr57361.c: Disable forwprop1.
> > > * gcc.dg/tree-ssa/pr57361-1.c: New test.
> > >
> > > Signed-off-by: Andrew Pinski 
> > > ---
> > >  gcc/testsuite/g++.dg/opt/pr66119.C|   2 +-
> > >  .../execute/builtins/pr22237-1-lib.c  |  27 +
> > >  .../execute/builtins/pr22237-1.c  |  57 ++
> > >  gcc/testsuite/gcc.dg/tree-ssa/20031106-6.c|   8 +-
> > >  gcc/testsuite/gcc.dg/tree-ssa/pr108358-a.c|  33 ++
> > >  gcc/testsuite/gcc.dg/tree-ssa/pr114169-1.c|  39 +++
> > >  gcc/testsuite/gcc.dg/tree-ssa/pr57361-1.c |   9 ++
> > >  gcc/testsuite/gcc.dg/tree-ssa/pr57361.c   |   2 +-
> > >  gcc/tree-ssa-forwprop.cc  | 103 ++
> > >  9 files changed, 276 insertions(+), 4 deletions(-)
> > >  create mode 100644 
> > > gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1-lib.c
> > >  create mode 100644 
> > > gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1.c
> > >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr108358-a.c
> > >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr114169-1.c
> > >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr57361-1.c
> > >
> > > diff --git a/gcc/testsuite/g++.dg/opt/pr66119.C 
> > > b/gcc/testsuite/g++.dg/opt/pr66119.C
> > > index d1b1845a258..52362e44434 100644
> > > --- a/gcc/testsuite/g++.dg/opt/pr66119.C
> > > +++ b/gcc/testsuite/g++.dg/opt/pr66119.C
> > > @@ -3,7 +3,7 @@
> > > the value of MOVE_RATIO now is.  */
> > >
> > >  /* { dg-do compile  { target { { i?86-*-* x86_64-*-* } && c++11 } }  }  
> > > */
> > > -/* { dg-options "-O3 -mavx -fdump-tree-sra -march=slm -mtune=slm 
> > > -fno-early-inlining" } */
> > > +/* { dg-options "-O3 -mavx -fdump-tree-sra -fno-tree-forwprop -march=slm 
> > > -mtune=slm -fno-early-inlining" } */
> > >  // { dg-skip-if "requires hosted libstdc++ for cstdlib malloc" { ! 
> > > hostedlib } }
> > >
> > >  #include 
> > > diff --git a/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1-lib.c 
> > > b/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1-lib.c
> > > new file mode 100644
> > > index 000..44032357405
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1-lib.c
> > > @@ -0,0 +1,27 @@
> > > +extern void abort (void);
> > > +
> > > +void *
> > > +memcpy (void *dst, const void *src, __SIZE_TYPE__ n)
> > > +{
> > > +  const char *srcp;
> > > +  char *dstp;
> > > +
> > > +  srcp = src;
> > > +  dstp = dst;
> > > +
> > > +  if (dst < src)
> > > +{
> > > +  if (dst + n > src)
> > > +   abort ();
> > > +}
> > > +  else
> > > +{
> > > +  if (src + n > dst)
> > > +   abort ();
> > > +}
> > > +
> > > +  while (n-- != 0)
> > > +*dstp++ = *srcp++;
> > > +
> > > +  return dst;
> > > +}
> > > diff --git a/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1.c 
> > > b/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1.c
> > > new file mode 100644
> > > index 000..0a12b0fc9a1
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1.c
> > > @@ -0,0 +1,57 @@
> > > +extern void abort (void);
> > > +extern void exit (int);
> > > +struct s { unsigned char a[256]; };
> > > +union u

[Patch] Fortran: gfc_simplify_{cospi,sinpi} - fix for MPFR < 4.2.0 (was: [PATCH] fortran: add constant input support for trig functions with half-revolutions)

2025-05-28 Thread Tobias Burnus


Hi Harald,

Harald Anlauf wrote:

This breaks bootstrap here on openSUSE Leap 15.6 with mpfr-4.0.2:

../../gcc-trunk/gcc/fortran/simplify.cc: In function 'gfc_expr* 
gfc_simplify_cospi(gfc_expr*)':
../../gcc-trunk/gcc/fortran/simplify.cc:2305:3: error: 'mpfr_fmod_ui' 
was not declared in this scope; did you mean 'mpfr_fmodquo'?

 2305 |   mpfr_fmod_ui (cs, n, 2, GFC_RND_MODE);
  |   ^~~~


Interestingly, mpfr_fmod_ui has been added only in 4.2.0,
at the same time as mpfr_sinpi.

Solution is to use mpfr_fmod.

I have now committed the attached patch as r16-940-ga64a7f0a6cf8af.

* * *

I build GCC successfully with mpfr 3.1.6 and "make check-fortran"
in $build/gcc was also successful. Except for the following fails:

I get an ICE for gfortran.dg/complex_intrinsic_8.f90 in gfc_simplify_atan:

mpfr/src/init2.c:52: MPFR assertion failed:
   p >= 2 && p <= ((mpfr_prec_t)((mpfr_uprec_t)(~(mpfr_uprec_t)0)>>1))

but that looks unrelated. Likewise (same assert in init2.c) for
arith_power via gfortran.dg/integer_exponentiation_4.f90 and
in gimple_resimplify2 (with -O1) for /gfortran.dg/large_real_kind_2.F90.

As those seem to be mpfr bugs, they don't trigger for the ...pi functions,
I have decided to ignore them when applying the patch.

Sorry for the breakage!

Tobias

PS: I tried to build mpfr 3.1.0 in tree but that had compile issues with my
GCC 14 system compiler; but at least mpfr-3.1.6 seems to build fine. I think
MPFR 4.0.x avoids the assert.


@Yuao: To build mpfr in tree, you can use:
  ./contrib/download_prerequisites
and the then build like normal. This installs multiple libraries - but not
all are needed and mpfr 4.1.0 might be to new. Thus, the following is better:

For only mpfr, the simplest is to download it from 
https://www.mpfr.org/history.html
unpack the tarball, and add a symlink 'ln -s mpfr-3.1.6 mpfr'. If you now build
GCC, it will automatically first configure and build 'mpfr' and then use it to 
build
GCC.
Fortran: gfc_simplify_{cospi,sinpi} - fix for MPFR < 4.2.0

gcc/fortran/ChangeLog:

	PR fortran/113152
	* simplify.cc (gfc_simplify_cospi, gfc_simplify_sinpi): Avoid using
	mpfr_fmod_ui in the MPFR < 4.2.0 version.

 gcc/fortran/simplify.cc | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/gcc/fortran/simplify.cc b/gcc/fortran/simplify.cc
index 2ceb479faf5..b25cd2c2388 100644
--- a/gcc/fortran/simplify.cc
+++ b/gcc/fortran/simplify.cc
@@ -2288,10 +2288,10 @@ gfc_simplify_cospi (gfc_expr *x)
 #if MPFR_VERSION >= MPFR_VERSION_NUM(4, 2, 0)
   mpfr_cospi (result->value.real, x->value.real, GFC_RND_MODE);
 #else
-  mpfr_t cs, n, r;
+  mpfr_t cs, n, r, two;
   int s;
 
-  mpfr_inits2 (2 * mpfr_get_prec (x->value.real), cs, n, r, NULL);
+  mpfr_inits2 (2 * mpfr_get_prec (x->value.real), cs, n, r, two, NULL);
 
   mpfr_abs (r, x->value.real, GFC_RND_MODE);
   mpfr_modf (n, r, r, GFC_RND_MODE);
@@ -2302,7 +2302,8 @@ gfc_simplify_cospi (gfc_expr *x)
   return result;
 }
 
-  mpfr_fmod_ui (cs, n, 2, GFC_RND_MODE);
+  mpfr_set_ui (two, 2, GFC_RND_MODE);
+  mpfr_fmod (cs, n, two, GFC_RND_MODE);
   s = mpfr_cmp_ui (cs, 0) == 0 ? 1 : -1;
 
   mpfr_const_pi (cs, GFC_RND_MODE);
@@ -2310,7 +2311,7 @@ gfc_simplify_cospi (gfc_expr *x)
   mpfr_cos (cs, cs, GFC_RND_MODE);
   mpfr_mul_si (result->value.real, cs, s, GFC_RND_MODE);
 
-  mpfr_clears (cs, n, r, NULL);
+  mpfr_clears (cs, n, r, two, NULL);
 #endif
 
   return range_check (result, "COSPI");
@@ -2329,10 +2330,10 @@ gfc_simplify_sinpi (gfc_expr *x)
 #if MPFR_VERSION >= MPFR_VERSION_NUM(4, 2, 0)
   mpfr_sinpi (result->value.real, x->value.real, GFC_RND_MODE);
 #else
-  mpfr_t sn, n, r;
+  mpfr_t sn, n, r, two;
   int s;
 
-  mpfr_inits2 (2 * mpfr_get_prec (x->value.real), sn, n, r, NULL);
+  mpfr_inits2 (2 * mpfr_get_prec (x->value.real), sn, n, r, two, NULL);
 
   mpfr_abs (r, x->value.real, GFC_RND_MODE);
   mpfr_modf (n, r, r, GFC_RND_MODE);
@@ -2343,7 +2344,8 @@ gfc_simplify_sinpi (gfc_expr *x)
   return result;
 }
 
-  mpfr_fmod_ui (sn, n, 2, GFC_RND_MODE);
+  mpfr_set_ui (two, 2, GFC_RND_MODE);
+  mpfr_fmod (sn, n, two, GFC_RND_MODE);
   s = mpfr_cmp_si (x->value.real, 0) < 0 ? -1 : 1;
   s *= mpfr_cmp_ui (sn, 0) == 0 ? 1 : -1;
 
@@ -2352,7 +2354,7 @@ gfc_simplify_sinpi (gfc_expr *x)
   mpfr_sin (sn, sn, GFC_RND_MODE);
   mpfr_mul_si (result->value.real, sn, s, GFC_RND_MODE);
 
-  mpfr_clears (sn, n, r, NULL);
+  mpfr_clears (sn, n, r, two, NULL);
 #endif
 
   return range_check (result, "SINPI");

Re: [PATCH 1/3] OpenMP: Fix ICE in metadirective recovery after error [PR120180]

2025-05-28 Thread Tobias Burnus


Sandra Loosemore wrote:


It's not clear whether a metadirective in a loop nest is supposed to
be valid, but GCC certainly shouldn't be ICE'ing after diagnosing it
as an error.

gcc/c/ChangeLog
PR c/120180
* c-parser.cc (c_parser_omp_metadirective): Only consume the
token if it is the expected ')'.

gcc/cp/ChangeLog
PR c/120180
* parser.cc (cp_parser_omp_metadirective): Only consume the
token if it is the expected ')'.

gcc/testsuite/ChangeLog
PR c/120180
* c-c++-common/gomp/pr12180.c: New.


LGTM; it is IMHO worthwhile to cherry-pick the ICE fix to GCC 15 (as 
proposed by you).


Thanks,

Tobias

Re: DEFAULT_PCC_STRUCT_RETURN on NetBSD vs Linux

2025-05-28 Thread Andreas Schwab

On Mai 28 2025, John Paul Adrian Glaubitz wrote:

> Shouldn't the #undef in linux.h undefine DEFAULT_PCC_STRUCT_RETURN and not
> PCC_STATIC_STRUCT_RETURN?

No, they are separate target options.  PCC_STATIC_STRUCT_RETURN is no
longer defined by default, so this is redundant now.

> And, secondly, shouldn't the comment in linux.h be corrected since
> apparently linux.h and netbsd-elf.h disagree on what the SVR4 ABI
> specifies how structs and unions are returned?

This is controlled by TARGET_RETURN_IN_MEMORY if
DEFAULT_PCC_STRUCT_RETURN is 0.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."

Re: [PATCH] RISC-V: Add 'bclr+binv' peephole2 optimization.

2025-05-28 Thread Jeff Law





On 5/28/25 4:23 AM, Jiawei wrote:

This patch adds a peephole2 optimization that combines a 'bclr' followed by
a 'binv' into a single 'bset' instruction when the Zbs extension is enabled.

The motivation for this patch is that PR116398 limits 2→2 RTL combinations,
which prevents certain simplifications in the combiner pass. As a result,
combining 'bclr' and 'binv' through standard RTL combination is not feasible
when Zbs is enabled. An example is the testcase
g++.target/riscv/redundant-bitmap-2.C[1] from Jeff Law's patch[2].

PR116398: 
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4d7a634f6d41029811cdcbd5f7282b5b07890094
[1] https://godbolt.org/z/dhYoTMY1v
[2] 
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=05daf617ea22e1d818295ed2d037456937e23530

gcc/ChangeLog:

* config/riscv/bitmanip.md (*bset_2): New pattern.
* config/riscv/peephole.md: Ditto.

Signed-off-by: Jiawei 
---
  gcc/config/riscv/bitmanip.md |  9 +
  gcc/config/riscv/peephole.md | 16 
  2 files changed, 25 insertions(+)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 21426f49679..1bd66c4aa19 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -615,6 +615,15 @@
"bset\t%0,x0,%1"
[(set_attr "type" "bitmanip")])
  
+(define_insn "*bset_2"

+  [(set (match_operand:X 0 "register_operand" "=r")
+   (ior:X (match_operand:X 1 "register_operand" "r")
+  (ashift:X (const_int 1)
+(match_operand:QI 2 "register_operand" "r"]
+  "TARGET_ZBS"
+  "bset\t%0,%1,%2"
+  [(set_attr "type" "bitmanip")])

Isn't this case handled by this pattern:


(define_insn "*"
  [(set (match_operand:X 0 "register_operand" "=r")
(any_or:X (ashift:X (const_int 1)
(match_operand:QI 2 "register_operand" "r"))
  (match_operand:X 1 "register_operand" "r")))]
  "TARGET_ZBS"
  "\t%0,%1,%2"
  [(set_attr "type" "bitmanip")])
  
Oh, it's the operand order. Your pattern isn't canonical.  If something 
generated that form, it needs to be fixed.  Here's the relevant passage 
that covers canonicalization of associative operators:




For associative operators, a sequence of operators will always chain
to the left; for instance, only the left operand of an integer @code{plus}
can itself be a @code{plus}.  @code{and}, @code{ior}, @code{xor},
@code{plus}, @code{mult}, @code{smin}, @code{smax}, @code{umin}, and
@code{umax} are associative when applied to integers, and sometimes to
floating-point.








+
  ;; The result will always have bits 32..63 clear, so the zero-extend
  ;; is redundant.  We could split it to bset_1, but it seems
  ;; unnecessary.
diff --git a/gcc/config/riscv/peephole.md b/gcc/config/riscv/peephole.md
index b5cc1924c76..1d5d15e9005 100644
--- a/gcc/config/riscv/peephole.md
+++ b/gcc/config/riscv/peephole.md
@@ -39,6 +39,22 @@
operands[5] = GEN_INT (INTVAL (operands[2]) - INTVAL (operands[5]));
  })
  
+;; ZBS

+(define_peephole2
+  [(set (match_operand:X 1 "register_operand")
+   (and:X (rotate:X (const_int -2)
+(match_operand:QI 3 "register_operand"))
+  (match_operand:X 2 "register_operand")))
+   (set (match_operand:X 0 "register_operand")
+   (xor:X (ashift:X (const_int 1)
+(match_dup 3))
+  (match_dup 1)))]
+  "TARGET_ZBS"
+  [(set (match_dup 0)
+   (ior:X (match_dup 2)
+  (ashift:X (const_int 1)
+(match_dup 3])
This seems like it would be much better as a combine pattern.   In fact, 
I'm a bit surprised that combine didn't simplify this series of 
operations into a IOR.  So I'd really like to see the .combine dump with 
and without this hunk for the relevant testcase.


Also this patch should include a testcase or an addition/adjustment to 
an existing testcase.



jeff

Re: [PATCH] rtl-ssa: Reject non-address uses of autoinc regs [PR120347]

2025-05-28 Thread Richard Sandiford

Richard Biener  writes:
> On Thu, May 22, 2025 at 12:19 PM Richard Sandiford
>  wrote:
>>
>> As the rtl.texi documentation of RTX_AUTOINC expressions says:
>>
>>   If a register used as the operand of these expressions is used in
>>   another address in an insn, the original value of the register is
>>   used.  Uses of the register outside of an address are not permitted
>>   within the same insn as a use in an embedded side effect expression
>>   because such insns behave differently on different machines and hence
>>   must be treated as ambiguous and disallowed.
>>
>> late-combine was failing to follow this rule.  One option would have
>> been to enforce it during the substitution phase, like combine does.
>> This could either be a dedicated condition in the substitution code
>> or, more generally, an extra condition in can_merge_accesses.
>> (The latter would include extending is_pre_post_modify to uses.)
>>
>> However, since the restriction applies to patterns rather than to
>> actions on patterns, the more robust fix seemed to be test and reject
>> this case in (a subroutine of) rtl_ssa::recog.  We already do something
>> similar for hard-coded register clobbers.
>>
>> Using vec_rtx_properties isn't the lightest-weight operation
>> out there.  I did wonder about relying on the is_pre_post_modify
>> flag of the definitions in the new_defs array, but that would
>> require callers that create new autoincs to set the flag before
>> calling recog.  Normally these flags are instead updated
>> automatically based on the final pattern.
>>
>> Besides, recog itself has had to traverse the whole pattern,
>> and it is even less light-weight than vec_rtx_properties.
>> At least the pattern should be in cache.
>>
>> Tested on arm-linux-gnueabihf, aarch64-linux-gnu and
>> x86_64-linux-gnu.  OK for trunk and backports?
>
> LGTM, note the 14 branch is currently frozen.

Thanks.  It turns out that I looked at the wrong results for the
arm-linux-gnueabihf testing :-(, and the Linaro CI flagged up a
regression.  Although I think the rtl-ssa fix is still the right
one, it showed up a mistake (of mine) in the rtl_properties walker:
try_to_add_src would drop all flags except IN_NOTE before recursing
into RTX_AUTOINC addresses.

RTX_AUTOINCs only occur in addresses, and so for them, the flags coming
into try_to_add_src are set by:

  unsigned int base_flags = flags & rtx_obj_flags::STICKY_FLAGS;
  ...
  if (MEM_P (x))
{
  ...

  unsigned int addr_flags = base_flags | rtx_obj_flags::IN_MEM_STORE;
  if (flags & rtx_obj_flags::IS_READ)
addr_flags |= rtx_obj_flags::IN_MEM_LOAD;
  try_to_add_src (XEXP (x, 0), addr_flags);
  return;
}

This means that the only flags that can be set are:

- IN_NOTE (the sole member of STICKY_FLAGS)
- IN_MEM_STORE
- IN_MEM_LOAD

Thus dropping all flags except IN_NOTE had the effect of dropping
IN_MEM_STORE and IN_MEM_LOAD, and nothing else.  But those flags
are the ones that mark something as being part of a mem address.
The exclusion was therefore exactly wrong.

So is the patch OK with the extra rtlanal.cc hunk below?  I was wondering
whether it would count as obvious, but the length of the explanation above
suggests not :)

Richard


gcc/
PR rtl-optimization/120347
* rtlanal.cc (rtx_properties::try_to_add_src): Don't drop the
IN_MEM_LOAD and IN_MEM_STORE flags for autoinc registers.
* rtl-ssa/changes.cc (recog_level2): Check whether an
RTX_AUTOINCed register also appears outside of an address.

gcc/testsuite/
PR rtl-optimization/120347
* gcc.dg/torture/pr120347.c: New test.
---
 gcc/rtl-ssa/changes.cc  | 18 ++
 gcc/rtlanal.cc  |  2 +-
 gcc/testsuite/gcc.dg/torture/pr120347.c | 13 +
 3 files changed, 32 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr120347.c

diff --git a/gcc/rtl-ssa/changes.cc b/gcc/rtl-ssa/changes.cc
index eb579ad3ad7..f7aa6a66cdf 100644
--- a/gcc/rtl-ssa/changes.cc
+++ b/gcc/rtl-ssa/changes.cc
@@ -1106,6 +1106,24 @@ recog_level2 (insn_change &change, add_regno_clobber_fn 
add_regno_clobber)
}
}
 
+  // Per rtl.texi, registers that are modified using RTX_AUTOINC operations
+  // cannot also appear outside an address.
+  vec_rtx_properties properties;
+  properties.add_pattern (pat);
+  for (rtx_obj_reference def : properties.refs ())
+if (def.is_pre_post_modify ())
+  for (rtx_obj_reference use : properties.refs ())
+   if (def.regno == use.regno && !use.in_address ())
+ {
+   if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+   fprintf (dump_file, "register %d is both auto-modified"
+" and used outside an address:\n", def.regno);
+   print_rtl_single (dump_file, pat);
+ }
+   return false;
+ }
+
   // check_asm_operands checks the constraints after RA, s

Re: [PATCH 2/3] OpenMP: Fix ICE and other issues in C/C++ metadirective error recovery.

2025-05-28 Thread Tobias Burnus


Sandra Loosemore wrote:

The new testcase included in this patch used to ICE in gcc after
diagnosing the first error, and in g++ it only diagnosed the error in
the first metadirective, ignoring the second one.  The solution is to
make error recovery in the C front end more like that in the C++ front
end, and remove the code in both front ends that previously tried to
skip all the way over the following statement (instead of just to the
end of the metadirective pragma) after an error.


(If anyone wonders: there is still one use case left for
{c,cp}_parser_skip_to_end_of_block_or_statement - in the handling
of standalone directives (in the non-error case).)


gcc/c/ChangeLog
* c-parser.cc (c_parser_skip_to_closing_brace): New, copied from
the equivalent function in the C++ front end.
(c_parser_skip_to_end_of_block_or_statement): Pass false to
the error flag.
(c_parser_omp_context_selector): Immediately return error_mark_node
after giving an error that the integer trait property is invalid,
similarly to C++ front end.
(c_parser_omp_context_selector_specification): Likewise handle
error return from c_parser_omp_context_selector similarly to C++.
(c_parser_omp_metadirective): Do not call
c_parser_skip_to_end_of_block_or_statement after an error.

gcc/cp/ChangeLog
* parser.cc (cp_parser_omp_metadirective): Do not call
cp_parser_skip_to_end_of_block_or_statement after an error.

gcc/testsuite/ChangeLog
* c-c++-common/gomp/declare-variant-2.c: Adjust patterns now that
C and C++ now behave similarly.
* c-c++-common/gomp/metadirective-error-recovery.c: New.


LGTM. Thanks again - and I concur that it is suitable for GCC 15.

Tobias

[PATCH v2 2/4] cfgloopmanip: Add infrastructure for scaling of multi-exit loops [PR117790]

2025-05-28 Thread Alex Coplan

Hi!

This is a v2 of the patch originally posted here:
https://gcc.gnu.org/pipermail/gcc-patches/2025-January/672677.html

It addresses Honza's feedback in this mail:
https://gcc.gnu.org/pipermail/gcc-patches/2025-April/681119.html

It was approved with the requested changes above, but it only really
makes sense to push together with the 3/4 and 4/4 patches, and the
latter is waiting on a reply to this mail:
https://gcc.gnu.org/pipermail/gcc-patches/2025-April/682033.html

So I figured I'd post the respin in the meantime.  Re-testing also
showed a small change to the 3/4 patch was needed, hence re-posting the
series.

Bootstrapped/regtested as a series on aarch64-linux-gnu and
x86_64-linux-gnu, no regressions.

Thanks,
Alex

-- >8 --

As it stands, scale_loop_profile doesn't correctly handle loops with
multiple exits.  In particular, in the case where the expected niters
exceeds iteration_bound, scale_loop_profile attempts to reduce the
number of iterations with a call to scale_loop_frequencies, which
multiplies the count of each BB by a given probability.  This
transformation preserves the relationships between the counts of the BBs
within the loop (and thus the edge probabilities stay the same) but this
cannot possibly work for loops with multiple exits, since in order for
the expected niters to reduce (and counts along exit edges to remain the
same), the exit edge probabilities must increase, thus decreasing the
probabilities of the internal edges, meaning that the ratios of the
counts of the BBs inside the loop must change.  So we need a different
approach (not a straightforward multiplicative scaling) to adjust the
expected niters of a loop with multiple exits.

This patch introduces a new helper, flow_scale_loop_freqs, which can be
used to correctly scale the profile of a loop with multiple exits.  It
is parameterized by a probability (with which to scale the header and
therefore the expected niters) and a lambda which gives the desired
counts for the exit edges.  In this patch, to make things simpler,
flow_scale_loop_freqs only handles loop shapes without internal control
flow, and we introduce a predicate can_flow_scale_loop_freqs_p to test
whether a given loop meets these criteria.  This restriction is
reasonable since this patch is motivated by fixing the profile
consistency for early break vectorization, and we don't currently
vectorize loops with internal control flow.  We also fall back to a
multiplicative scaling (the status quo) for loops that
flow_scale_loop_freqs can't handle, so the patch should be a net
improvement.

We wrap the call to flow_scale_loop_freqs in a helper
scale_loop_freqs_with_exit_counts which handles the above-mentioned
fallback.  This wrapper is still generic in that it accepts a lambda to
allow overriding the desired exit edge counts.  We specialize this with
another wrapper, scale_loop_freqs_hold_exit_counts (keeping the
counts along exit edges fixed), which is then used to implement the
niters-scaling case of scale_loop_profile, thus fixing this path through
the function for loops with multiple exits.

Finally, we expose two new wrapper functions in cfgloopmanip.h for use
in subsequent vectorizer patches.  scale_loop_profile_hold_exit_counts
is a variant of scale_loop_profile which assumes we want to keep the
counts along exit edges of the loop fixed through both parts of the
transformation (including the initial probability scale).
scale_loop_freqs_with_new_exit_count is intended to be used in a
subsequent patch when adding a skip edge around the epilog, where the
reduction of count entering the loop is mirrored by a reduced count
along a given exit edge.

gcc/ChangeLog:

PR tree-optimization/117790
* cfgloopmanip.cc (can_flow_scale_loop_freqs_p): New.
(flow_scale_loop_freqs): New.
(scale_loop_freqs_with_exit_counts): New.
(scale_loop_freqs_hold_exit_counts): New.
(scale_loop_profile): Refactor to use the newly-added
scale_loop_profile_1, and use scale_loop_freqs_hold_exit_counts to
correctly handle reducing the expected niters for loops with multiple
exits.
(scale_loop_freqs_with_new_exit_count): New.
(scale_loop_profile_1): New.
(scale_loop_profile_hold_exit_counts): New.
* cfgloopmanip.h (scale_loop_profile_hold_exit_counts): New.
(scale_loop_freqs_with_new_exit_count): New.
---
 gcc/cfgloopmanip.cc | 327 +---
 gcc/cfgloopmanip.h  |   7 +
 2 files changed, 312 insertions(+), 22 deletions(-)

diff --git a/gcc/cfgloopmanip.cc b/gcc/cfgloopmanip.cc
index 2c28437b34d..6b9a94fc2f7 100644
--- a/gcc/cfgloopmanip.cc
+++ b/gcc/cfgloopmanip.cc
@@ -683,17 +683,261 @@ update_loop_exit_probability_scale_dom_bbs (class loop *loop,
   return exit_edge;
 }
 
-/* Scale profile in LOOP by P.
-   If ITERATION_BOUND is not -1, scale even further if loop is predicted
-   to iterate too many times.
-   Before caling this function, preh

[PATCH v2 3/4] vect: Ensure profile consistency when adding epilog guard [PR117790]

2025-05-28 Thread Alex Coplan

This is a v2.  v1 was posted here:
https://gcc.gnu.org/pipermail/gcc-patches/2025-January/672678.html

Changes since v1:
 - Made epilog profile update code more resilient against
   already-inconsistent profiles (e.g. PR120065), avoid ICEing in such a
   situation.

Bootstrapped/regtested as a series on aarch64-linux-gnu and
x86_64-linux-gnu.  OK for trunk?

Thanks,
Alex

-- >8 --

This patch tries to make the CFG profile consistent when adding a guard
edge to skip the epilog during peeling.

The changes can be summarized as follows:
 - We avoid adding the guard edge entirely if the guard condition folds
   to false, otherwise the profile will become inconsistent since
   the cfgcleanup code doesn't attempt to update it on removing the dead
   edge.
 - If the guard condition instead folds to true, we account for this by
   giving the skip edge 100% probability (otherwise the profile will
   again become inconsistent when removing the other now-dead edge).
 - Finally, we use the new helper scale_loop_freqs_with_new_exit_count instead
   of scale_loop_profile to update the epilog frequencies / probabiltiies.
   We make the assumption here that if the IV exit is taken in the vector loop,
   then it will also be taken in the epilog (and not an early exit).  Since we
   add the guard to the vector iv exit, we know any reduction in count
   associated with the epilog skip should be accounted for by a reduction in the
   epilog's iv exit edge count.

gcc/ChangeLog:

PR tree-optimization/117790
* tree-vect-loop-manip.cc (vect_do_peeling): Attempt to maintain
consistency of the CFG profile when adding an epilog skip edge.

gcc/testsuite/ChangeLog:

PR tree-optimization/117790
* gcc.dg/vect/vect-early-break-profile-1.c: New test.
---
 .../gcc.dg/vect/vect-early-break-profile-1.c  | 10 
 gcc/tree-vect-loop-manip.cc   | 53 ++-
 2 files changed, 50 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break-profile-1.c

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-profile-1.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-profile-1.c
new file mode 100644
index 000..5387e3a0465
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-profile-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-additional-options "-fdump-tree-vect-blocks-details" } */
+int a[100];
+void f()
+{
+  for (int i = 0; i < 100 && a[i]; i++)
+a[i]++;
+}
+/* { dg-final { scan-tree-dump-not "Invalid sum" "vect" } } */
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 56a4e9a8b63..127d596ce79 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -3564,20 +3564,25 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
 
   /* If we have a peeled vector iteration we will never skip the epilog loop
 	 and we can simplify the cfg a lot by not doing the edge split.  */
-  if (skip_epilog
-	  || (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
-	  && !LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)))
+  guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
+niters, niters_vector_mult_vf);
+  if ((skip_epilog
+	   || (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+	   && !LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)))
+	  && !integer_zerop (guard_cond))
 	{
-	  guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
-niters, niters_vector_mult_vf);
+	  profile_probability prob_skip
+	= integer_onep (guard_cond)
+	? profile_probability::always ()
+	: prob_epilog.invert ();
 
 	  guard_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
+	  edge enter_e = single_succ_edge (guard_bb);
 	  edge epilog_e = LOOP_VINFO_EPILOGUE_IV_EXIT (loop_vinfo);
 	  guard_to = epilog_e->dest;
 	  guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
 	   skip_vector ? anchor : guard_bb,
-	   prob_epilog.invert (),
-	   irred_flag);
+	   prob_skip, irred_flag);
 	  doms.safe_push (guard_to);
 	  if (vect_epilogues)
 	epilogue_vinfo->skip_this_loop_edge = guard_e;
@@ -3606,15 +3611,37 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
 		}
 	}
 
-	  /* Only need to handle basic block before epilog loop if it's not
-	 the guard_bb, which is the case when skip_vector is true.  */
-	  if (guard_bb != bb_before_epilog)
+	  basic_block epilog_ph = loop_preheader_edge (epilog)->src;
+	  if (enter_e->dest->count < guard_e->count ()
+	  || epilog_ph->count < guard_e->count ()
+	  || epilog_e->count () < guard_e->count ())
 	{
-	  prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
+	  if (dump_enabled_p ())
+		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "epilog profile cannot be updated; "
+ "profile is inconsistent\n");
+	}
+	  else
+	{
+	  profile_probability epilog_scale
+		= (epilog_

Re: [PATCH 04/61] Enable LSAN and TSAN for mips with the 64-bit abi

2025-05-28 Thread Aleksandar Rakic

HTEC Public

Hi,

Could you please let us know if you have any comments
on the latest reply on this patch?

Kind regards,
Aleksandar Rakic


From: Aleksandar Rakic 
Sent: Tuesday, April 22, 2025 9:34 PM
To: Jeff Law; gcc-patches@gcc.gnu.org
Cc: Djordje Todorovic; c...@mips.com; Chao-ying Fu
Subject: Re: [PATCH 04/61] Enable LSAN and TSAN for mips with the 64-bit abi

Hi,

> This is probably OK, but it's unclear to me if it's dependent upon any
> of the earlier changes.  If it's independent of other changes, then it
> could go in now with a suitable ChangeLog entry.

> Jeff

I would like to inform you that the version 2 of this patch is available
at the following link and that it is not dependent upon any of the
earlier changes:

https://gcc.gnu.org/pipermail/gcc-patches/2025-March/677855.html

However, maybe this improvement is going to be sent to upstream
libsanitizer, as explained here:

https://gcc.gnu.org/pipermail/gcc-patches/2025-April/679916.html

Kind regards,
Aleksandar

Re: [PATCH] libstdc++: Fix flat_map::operator[] for const lvalue keys [PR120432]

2025-05-28 Thread Tomasz Kaminski

On Wed, May 28, 2025 at 3:00 PM Patrick Palka  wrote:

> On Wed, 28 May 2025, Tomasz Kaminski wrote:
>
> >
> >
> > On Tue, May 27, 2025 at 7:08 PM Patrick Palka  wrote:
> >   Tested on x86_64-pc-linux-gnu, does this look OK for trunk/15?
> >
> >   The 'volatile' issue from that PR Will be fixed in a separate
> patch as
> >   operator[] isn't the only operation that's affected.
>
LGTM to me, thanks.

> >
> >   -- >8 --
> >
> >   The const lvalue operator[] overload wasn't properly forwarding
> the key
> >   type to the generic overload.
> >
> >   PR libstdc++/120432
> >
> >   libstdc++-v3/ChangeLog:
> >
> >   * include/std/flat_map (_Flat_map_base::operator[]):
> Correct
> >   forwarding from the const lvalue key overload.
> >   * testsuite/23_containers/flat_map/1.cc (test08): New test.
> >   * testsuite/23_containers/flat_multimap/1.cc (test08): New
> test.
> >   ---
> >libstdc++-v3/include/std/flat_map  |  2 +-
> >libstdc++-v3/testsuite/23_containers/flat_map/1.cc | 10
> ++
> >.../testsuite/23_containers/flat_multimap/1.cc | 10
> ++
> >3 files changed, 21 insertions(+), 1 deletion(-)
> >
> >   diff --git a/libstdc++-v3/include/std/flat_map
> b/libstdc++-v3/include/std/flat_map
> >   index 6593988d213c..4d9ced1e8191 100644
> >   --- a/libstdc++-v3/include/std/flat_map
> >   +++ b/libstdc++-v3/include/std/flat_map
> >   @@ -1142,7 +1142,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >  // element access
> >  mapped_type&
> >  operator[](const key_type& __x)
> >   -  { return operator[](__x); }
> >   +  { return operator[](__x); }
> >
> > Given that the operator[] that we are forading to is implemented as:
> > { return try_emplace(std::forward<_Key2>(__x)).first->second; }
> > I would just call try_emplace directly:
>
> Good point, the implementation is a simple one-liner either way, and it
> addresses the volatile key issue.  Like so?
>
> -- >8 --
>
> Subject: [PATCH] libstdc++: Fix flat_map::operator[] for const lvalue keys
>  [PR120432]
>
> The const lvalue operator[] overload wasn't properly forwarding the key
> type to the generic overload, causing a hard error for const keys.
>
> This patch fixes this by making the non-template overloads call
> try_emplace directly instead, which means we can remove the non-standard
> same_as constraint on the generic overload.
>
> PR libstdc++/120432
>
> libstdc++-v3/ChangeLog:
>
> * include/std/flat_map (flat_map::operator[]): Make the
> non-template overloads call try_emplace directly.  Remove
> non-standard same_as constraint on the template overload.
> * testsuite/23_containers/flat_map/1.cc (test08): New test.
> ---
>  libstdc++-v3/include/std/flat_map  |  6 +++---
>  libstdc++-v3/testsuite/23_containers/flat_map/1.cc | 10 ++
>  2 files changed, 13 insertions(+), 3 deletions(-)
>
> diff --git a/libstdc++-v3/include/std/flat_map
> b/libstdc++-v3/include/std/flat_map
> index 6593988d213c..5f9a2eda1939 100644
> --- a/libstdc++-v3/include/std/flat_map
> +++ b/libstdc++-v3/include/std/flat_map
> @@ -1142,14 +1142,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>// element access
>mapped_type&
>operator[](const key_type& __x)
> -  { return operator[](__x); }
> +  { return try_emplace(__x).first->second; }
>
>mapped_type&
>operator[](key_type&& __x)
> -  { return operator[](std::move(__x)); }
> +  { return try_emplace(std::move(__x)).first->second; }
>
>template
> -   requires same_as, _Key> ||
> __transparent_comparator<_Compare>
> +   requires __transparent_comparator<_Compare>
> mapped_type&
> operator[](_Key2&& __x)
> { return try_emplace(std::forward<_Key2>(__x)).first->second; }
> diff --git a/libstdc++-v3/testsuite/23_containers/flat_map/1.cc
> b/libstdc++-v3/testsuite/23_containers/flat_map/1.cc
> index a9690208b09f..2af516410279 100644
> --- a/libstdc++-v3/testsuite/23_containers/flat_map/1.cc
> +++ b/libstdc++-v3/testsuite/23_containers/flat_map/1.cc
> @@ -253,6 +253,15 @@ test07()
>VERIFY( std::ranges::equal(m, (std::pair[]){{3,4}}) );
>  }
>
> +void
> +test08()
> +{
> +  // PR libstdc++/120432 - flat_map operator[] is broken for const lvalue
> keys
> +  std::flat_map m;
> +  const int k = 42;
> +  m[k] = 0;
> +}
> +
>  int
>  main()
>  {
> @@ -266,4 +275,5 @@ main()
>test05();
>test06();
>test07();
> +  test08();
>  }
> --
> 2.49.0.654.g845c48a16a
>

[PATCH, Fortran] Bug 119856 - Missing commas in I/O formats not diagnosed by default at compile time.

2025-05-28 Thread Jerry D


The attached patch is simple and self explanatory in the git log entry.

Regression tested on X86_64-linux-gnu.

OK for trunk?

Regards,

Jerrycommit 845768cbead03f76265e491bcf5ea6de7020ff39
Author: Jerry DeLisle 
Date:   Wed May 28 07:56:12 2025 -0700

Fortran: Adjust handling of optional comma in FORMAT.

This change adjusts the error messages for optional commas
in format strings to give a warning at compile time unless
-std=legacy is used. This is more consistant with the
runtime library. The comma seprator should really not be
encouraged as it is non-standard fortran.

PR fortran/119586

gcc/fortran/ChangeLog:

* io.cc: Set error check to STD_STD_LEGACY.

gcc/testsuite/ChangeLog:

* gfortran.dg/comma_format_extension_1.f: Update dg-options to
"-std=legacy".
* gfortran.dg/comma_format_extension_3.f: Likewise.
* gfortran.dg/continuation_13.f90: Likewise.

diff --git a/gcc/fortran/io.cc b/gcc/fortran/io.cc
index b5c9d333749..7466d8fe094 100644
--- a/gcc/fortran/io.cc
+++ b/gcc/fortran/io.cc
@@ -1228,7 +1228,8 @@ between_desc:
 default:
   if (mode != MODE_FORMAT)
 	format_locus.nextc += format_string_pos - 1;
-  if (!gfc_notify_std (GFC_STD_GNU, "Missing comma at %L", &format_locus))
+  if (!gfc_notify_std (GFC_STD_LEGACY,
+	  "Missing comma in FORMAT string at %L", &format_locus))
 	return false;
   /* If we do not actually return a failure, we need to unwind this
  before the next round.  */
@@ -1290,7 +1291,8 @@ extension_optional_comma:
 default:
   if (mode != MODE_FORMAT)
 	format_locus.nextc += format_string_pos;
-  if (!gfc_notify_std (GFC_STD_GNU, "Missing comma at %L", &format_locus))
+  if (!gfc_notify_std (GFC_STD_LEGACY,
+	  "Missing comma in FORMAT string at %L", &format_locus))
 	return false;
   /* If we do not actually return a failure, we need to unwind this
  before the next round.  */
diff --git a/gcc/testsuite/gfortran.dg/comma_format_extension_1.f b/gcc/testsuite/gfortran.dg/comma_format_extension_1.f
index a3a5a98f155..c4b43f01bc3 100644
--- a/gcc/testsuite/gfortran.dg/comma_format_extension_1.f
+++ b/gcc/testsuite/gfortran.dg/comma_format_extension_1.f
@@ -1,5 +1,5 @@
 ! { dg-do compile }
-! { dg-options "" }
+! { dg-options "-std=legacy" }
 ! test that the extension for a missing comma is accepted
 
   subroutine mysub
diff --git a/gcc/testsuite/gfortran.dg/comma_format_extension_3.f b/gcc/testsuite/gfortran.dg/comma_format_extension_3.f
index 0b002249b46..9d974d6b90c 100644
--- a/gcc/testsuite/gfortran.dg/comma_format_extension_3.f
+++ b/gcc/testsuite/gfortran.dg/comma_format_extension_3.f
@@ -3,7 +3,7 @@
 ! did do the correct thing at runtime.
 ! Note the missing , before i1 in the format.
 ! { dg-do run }
-! { dg-options "" }
+! { dg-options "-std=legacy" }
   character*12 c
 
   write (c,100) 0, 1
diff --git a/gcc/testsuite/gfortran.dg/continuation_13.f90 b/gcc/testsuite/gfortran.dg/continuation_13.f90
index 9799b59e86e..475c8963998 100644
--- a/gcc/testsuite/gfortran.dg/continuation_13.f90
+++ b/gcc/testsuite/gfortran.dg/continuation_13.f90
@@ -1,5 +1,5 @@
 ! { dg-do run }
-! { dg-options "-std=gnu" }
+! { dg-options "-std=legacy" }
 ! PR64506
 character(25) :: astring

[PATCH v1 1/3] RISC-V: Combine vec_duplicate + vmul.vv to vmul.vx on GR2VR cost

2025-05-28 Thread pan2 . li

From: Pan Li 

This patch would like to combine the vec_duplicate + vmul.vv to the
vmul.vx.  From example as below code.  The related pattern will depend
on the cost of vec_duplicate from GR2VR.  Then the late-combine will
take action if the cost of GR2VR is zero, and reject the combination
if the GR2VR cost is greater than zero.

Assume we have example code like below, GR2VR cost is 0.

  #define DEF_VX_BINARY(T, OP)\
  void\
  test_vx_binary (T * restrict out, T * restrict in, T x, unsigned n) \
  {   \
for (unsigned i = 0; i < n; i++)  \
  out[i] = in[i] OP x;\
  }

  DEF_VX_BINARY(int32_t, |)

Before this patch:
  10   │ test_vx_binary_or_int32_t_case_0:
  11   │ beq a3,zero,.L8
  12   │ vsetvli a5,zero,e32,m1,ta,ma
  13   │ vmv.v.x v2,a2
  14   │ sllia3,a3,32
  15   │ srlia3,a3,32
  16   │ .L3:
  17   │ vsetvli a5,a3,e32,m1,ta,ma
  18   │ vle32.v v1,0(a1)
  19   │ sllia4,a5,2
  20   │ sub a3,a3,a5
  21   │ add a1,a1,a4
  22   │ vmul.vv v1,v1,v2
  23   │ vse32.v v1,0(a0)
  24   │ add a0,a0,a4
  25   │ bne a3,zero,.L3

After this patch:
  10   │ test_vx_binary_or_int32_t_case_0:
  11   │ beq a3,zero,.L8
  12   │ sllia3,a3,32
  13   │ srlia3,a3,32
  14   │ .L3:
  15   │ vsetvli a5,a3,e32,m1,ta,ma
  16   │ vle32.v v1,0(a1)
  17   │ sllia4,a5,2
  18   │ sub a3,a3,a5
  19   │ add a1,a1,a4
  20   │ vmul.vx v1,v1,a2
  21   │ vse32.v v1,0(a0)
  22   │ add a0,a0,a4
  23   │ bne a3,zero,.L3

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv-v.cc (expand_vx_binary_vec_dup_vec): Add
new case for MULT op.
(expand_vx_binary_vec_vec_dup): Ditto.
* config/riscv/riscv.cc (riscv_rtx_costs): Ditto.
* config/riscv/vector-iterators.md: Add new op mult to no_shift_vx_ops.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-v.cc  | 2 ++
 gcc/config/riscv/riscv.cc| 1 +
 gcc/config/riscv/vector-iterators.md | 2 +-
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index eedcda2b8ff..61627975725 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -5536,6 +5536,7 @@ expand_vx_binary_vec_dup_vec (rtx op_0, rtx op_1, rtx 
op_2,
 case AND:
 case IOR:
 case XOR:
+case MULT:
   icode = code_for_pred_scalar (code, mode);
   break;
 case MINUS:
@@ -5565,6 +5566,7 @@ expand_vx_binary_vec_vec_dup (rtx op_0, rtx op_1, rtx 
op_2,
 case AND:
 case IOR:
 case XOR:
+case MULT:
   icode = code_for_pred_scalar (code, mode);
   break;
 default:
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 2cc69b4458a..d3cee96d3aa 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3919,6 +3919,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
  case AND:
  case IOR:
  case XOR:
+ case MULT:
{
  rtx op_0 = XEXP (x, 0);
  rtx op_1 = XEXP (x, 1);
diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index 77d72a78c1b..2bd99ee5372 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -4042,7 +4042,7 @@ (define_code_iterator any_int_binop [plus minus and ior 
xor ashift ashiftrt lshi
 ])
 
 (define_code_iterator any_int_binop_no_shift_vx [
-  plus minus and ior xor
+  plus minus and ior xor mult
 ])
 
 (define_code_iterator any_int_unop [neg not])
-- 
2.43.0

Re: [PATCH] libstdc++: Pass small trivial types by value in polymorphic wrappers

2025-05-28 Thread Tomasz Kaminski

On Wed, May 28, 2025 at 5:14 PM Tomasz Kaminski  wrote:

>
>
> On Wed, May 28, 2025 at 4:53 PM Patrick Palka  wrote:
>
>> On Wed, 28 May 2025, Tomasz Kamiński wrote:
>>
>> > This patch adjust the passing of parameters for the move_only_function,
>> > copyable_function and function_ref. For types that are declared as
>> being passed
>> > by value in signature template argument, the are passed by value to the
>> invoker,
>>
>> they
>>
>> > when they are small (at most two pointers), trivially move
>> constructible and
>> > trivially destructible. The later guarantees that passing them by value
>> has not
>>
>> latter
>>
>> > user visible side effects.
>> >
>> > In particular, this extents the set of types forwarded by value, that
>> was
>>
>> extends
>>
>> > previously limited to scalars, to also include specializations of
>> std::span and
>> > std::string_view, and similar standard and program defined-types.
>> >
>> > Checking the suitability of the parameter types requires the types to
>> be complete.
>> > As consequence implementation imposes requirements on instantiation of
>> > move_only_function and copyable_function. To avoid producing the errors
>> from
>> > the implementation details, and static_assertion was added to partial
>> > specializations of copyable_function, move_only_function and
>> function_ref.
>> > The static assertion uses existing __is_complete_or_unbounded, as
>> arrays type
>> > parameters are automatically decayed in function type.
>> >
>> > Standard already specifies in [res.on.functions] p2.5 that
>> instantiating these
>> > partial specialization with incomplete types leads to undefined
>> behavior.
>> >
>> > libstdc++-v3/ChangeLog:
>> >
>> >   * include/bits/funcwrap.h (__polyfunc::__pass_by_rref): Define.
>> >   (__polyfunc::__param_t): Update to use __pass_by_rref.
>> >   * include/bits/cpyfunc_impl.h:: Assert that are parameters type
>> >   are complete.
>> >   * include/bits/funcref_impl.h: Likewise.
>> >   * include/bits/mofunc_impl.h: Likewise.
>> >   * testsuite/20_util/copyable_function/call.cc: New test.
>> >   * testsuite/20_util/function_ref/call.cc: New test.
>> >   * testsuite/20_util/move_only_function/call.cc: New test.
>> >   * testsuite/20_util/copyable_function/conv.cc: New test.
>> >   * testsuite/20_util/function_ref/conv.cc: New test.
>> >   * testsuite/20_util/move_only_function/conv.cc: New test.
>> >   * testsuite/20_util/copyable_function/incomplete_neg.cc: New test.
>> >   * testsuite/20_util/function_ref/incomplete_neg.cc: New test.
>> >   * testsuite/20_util/move_only_function/incomplete_neg.cc: New
>> test.
>> > ---
>> > Tested on x86_54-linux. OK for trunk?
>> >
>> >  libstdc++-v3/include/bits/cpyfunc_impl.h  |  4 +++
>> >  libstdc++-v3/include/bits/funcref_impl.h  |  4 +++
>> >  libstdc++-v3/include/bits/funcwrap.h  | 18 +-
>> >  libstdc++-v3/include/bits/mofunc_impl.h   |  4 +++
>> >  .../20_util/copyable_function/call.cc |  7 ++--
>> >  .../20_util/copyable_function/conv.cc | 35 +++
>> >  .../copyable_function/incomplete_neg.cc   | 18 ++
>> >  .../testsuite/20_util/function_ref/call.cc| 10 +++---
>> >  .../testsuite/20_util/function_ref/conv.cc| 34 ++
>> >  .../20_util/function_ref/incomplete_neg.cc| 18 ++
>> >  .../20_util/move_only_function/call.cc|  7 ++--
>> >  .../20_util/move_only_function/conv.cc| 35 +++
>> >  .../move_only_function/incomplete_neg.cc  | 18 ++
>> >  13 files changed, 200 insertions(+), 12 deletions(-)
>> >  create mode 100644
>> libstdc++-v3/testsuite/20_util/copyable_function/incomplete_neg.cc
>> >  create mode 100644
>> libstdc++-v3/testsuite/20_util/function_ref/incomplete_neg.cc
>> >  create mode 100644
>> libstdc++-v3/testsuite/20_util/move_only_function/incomplete_neg.cc
>> >
>> > diff --git a/libstdc++-v3/include/bits/cpyfunc_impl.h
>> b/libstdc++-v3/include/bits/cpyfunc_impl.h
>> > index bc44cd3e313..f1918ddf87a 100644
>> > --- a/libstdc++-v3/include/bits/cpyfunc_impl.h
>> > +++ b/libstdc++-v3/include/bits/cpyfunc_impl.h
>> > @@ -64,6 +64,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> >   _GLIBCXX_MOF_REF noexcept(_Noex)>
>> >  : __polyfunc::_Cpy_base
>> >  {
>> > +  static_assert(
>> > + (std::__is_complete_or_unbounded(__type_identity<_ArgTypes>()) &&
>> ...),
>> > + "each parameter type must be a complete class");
>> > +
>> >using _Base = __polyfunc::_Cpy_base;
>> >using _Invoker = __polyfunc::_Invoker<_Noex, _Res, _ArgTypes...>;
>> >using _Signature = _Invoker::_Signature;
>> > diff --git a/libstdc++-v3/include/bits/funcref_impl.h
>> b/libstdc++-v3/include/bits/funcref_impl.h
>> > index 1e19866035f..44c992281be 100644
>> > --- a/libstdc++-v3/include/bits/funcref_impl.h
>> > +++ b/libstdc++-v3/include/bits/funcref_impl.h
>> > @@ -68,6 +68,10

[PATCH v1 3/3] RISC-V: Add test for vec_duplicate + vmul.vv combine case 1 with GR2VR cost 0, 1 and 2

2025-05-28 Thread pan2 . li

From: Pan Li 

Add asm dump check test for vec_duplicate + vmul.vv combine to vmul.vx,
with the GR2VR cost is 0, 1 and 2.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c: Add asm
check for vmul.vx combine.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i8.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i16.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i8.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i16.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i8.c: Ditto.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i8.c  | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i16.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i32.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i64.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i8.c  | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i16.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i32.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i64.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i8.c  | 2 ++
 12 files changed, 24 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c
index 58dc66dcec9..a1b24f710e0 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c
@@ -11,6 +11,7 @@ DEF_VX_BINARY_REVERSE_CASE_1_WRAP(T, -, rsub, 
VX_BINARY_REVERSE_BODY_X16)
 DEF_VX_BINARY_CASE_1_WRAP(T, &, and, VX_BINARY_BODY_X16)
 DEF_VX_BINARY_CASE_1_WRAP(T, |, or, VX_BINARY_BODY_X16)
 DEF_VX_BINARY_CASE_1_WRAP(T, ^, xor, VX_BINARY_BODY_X16)
+DEF_VX_BINARY_CASE_1_WRAP(T, *, mul, VX_BINARY_BODY_X16)
 
 /* { dg-final { scan-assembler {vadd.vx} } } */
 /* { dg-final { scan-assembler {vsub.vx} } } */
@@ -18,3 +19,4 @@ DEF_VX_BINARY_CASE_1_WRAP(T, ^, xor, VX_BINARY_BODY_X16)
 /* { dg-final { scan-assembler {vand.vx} } } */
 /* { dg-final { scan-assembler {vor.vx} } } */
 /* { dg-final { scan-assembler {vxor.vx} } } */
+/* { dg-final { scan-assembler {vmul.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c
index b13ec16983c..53bd7448bfe 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c
@@ -11,6 +11,7 @@ DEF_VX_BINARY_REVERSE_CASE_1_WRAP(T, -, rsub, 
VX_BINARY_REVERSE_BODY_X4)
 DEF_VX_BINARY_CASE_1_WRAP(T, &, and, VX_BINARY_BODY_X4)
 DEF_VX_BINARY_CASE_1_WRAP(T, |, or, VX_BINARY_BODY_X4)
 DEF_VX_BINARY_CASE_1_WRAP(T, ^, xor, VX_BINARY_BODY_X4)
+DEF_VX_BINARY_CASE_1_WRAP(T, *, mul, VX_BINARY_BODY_X4)
 
 /* { dg-final { scan-assembler {vadd.vx} } } */
 /* { dg-final { scan-assembler {vsub.vx} } } */
@@ -18,3 +19,4 @@ DEF_VX_BINARY_CASE_1_WRAP(T, ^, xor, VX_BINARY_BODY_X4)
 /* { dg-final { scan-assembler {vand.vx} } } */
 /* { dg-final { scan-assembler {vor.vx} } } */
 /* { dg-final { scan-assembler {vxor.vx} } } */
+/* { dg-final { scan-assembler {vmul.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c
index cd861a4ba6b..73cb89d2ad6 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c
@@ -11,6 +11,7 @@ DEF_VX_BINARY_REVERSE_CASE_1_WRAP(T, -, rsub, 
VX_BINARY_REVERSE_BODY)
 DEF_VX_BINARY_CASE_1_WRAP(T, &, and, VX_BINARY_BODY)
 DEF_VX_BINARY_CASE_1_WRAP(T, |, or, VX_BINARY_BODY)
 DEF_VX_BINARY_CASE_1_WRAP(T, ^, xor, VX_BINARY_BODY)
+DEF_VX_BINARY_CASE_1_WRAP(T, *, mul, VX_BINARY_BODY)
 
 /* { dg-final { scan-assembler {vadd.vx} } } */
 /* { dg-final { scan-assembler {vsub.vx} } } */
@@ -18,3 +19,4 @@ DEF_VX_BINARY_CASE_1_WRAP(T, ^, xor, VX_BINARY_BODY)
 /* { dg-final { scan-assembler {vand.vx} } } */
 /* { dg-final { scan-assembler {vor.vx} } } */
 /* { dg-final { scan-assembler {vxor.vx} } } */
+/* { dg-final { scan-assembler {vmul.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/a

[PATCH v1 2/3] RISC-V: Add test for vec_duplicate + vmul.vv combine case 0 with GR2VR cost 0, 2 and 15

2025-05-28 Thread pan2 . li

From: Pan Li 

Add asm dump check test for vec_duplicate + vmul.vv combine to vmul.vx,
with the GR2VR cost is 0, 2 and 15.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c: Add asm check
for vmul.vx combine.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i8.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i16.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i8.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i16.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i8.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h: Add test
data for vmul run test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vmul-run-1-i16.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vmul-run-1-i32.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vmul-run-1-i64.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vmul-run-1-i8.c: New test.

Signed-off-by: Pan Li 
---
 .../riscv/rvv/autovec/vx_vf/vx-1-i16.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-1-i32.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-1-i64.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-1-i8.c |   2 +
 .../riscv/rvv/autovec/vx_vf/vx-2-i16.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-2-i32.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-2-i64.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-2-i8.c |   2 +
 .../riscv/rvv/autovec/vx_vf/vx-3-i16.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-3-i32.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-3-i64.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-3-i8.c |   2 +
 .../riscv/rvv/autovec/vx_vf/vx_binary_data.h  | 196 ++
 .../rvv/autovec/vx_vf/vx_vmul-run-1-i16.c |  15 ++
 .../rvv/autovec/vx_vf/vx_vmul-run-1-i32.c |  15 ++
 .../rvv/autovec/vx_vf/vx_vmul-run-1-i64.c |  15 ++
 .../rvv/autovec/vx_vf/vx_vmul-run-1-i8.c  |  15 ++
 17 files changed, 280 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vmul-run-1-i16.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vmul-run-1-i32.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vmul-run-1-i64.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vmul-run-1-i8.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c
index b9be0f674ae..144d1bad6af 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c
@@ -11,6 +11,7 @@ DEF_VX_BINARY_REVERSE_CASE_0_WRAP(T, -, rsub);
 DEF_VX_BINARY_CASE_0_WRAP(T, &, and)
 DEF_VX_BINARY_CASE_0_WRAP(T, |, or)
 DEF_VX_BINARY_CASE_0_WRAP(T, ^, xor)
+DEF_VX_BINARY_CASE_0_WRAP(T, *, mul)
 
 /* { dg-final { scan-assembler-times {vadd.vx} 1 } } */
 /* { dg-final { scan-assembler-times {vsub.vx} 1 } } */
@@ -18,3 +19,4 @@ DEF_VX_BINARY_CASE_0_WRAP(T, ^, xor)
 /* { dg-final { scan-assembler-times {vand.vx} 1 } } */
 /* { dg-final { scan-assembler-times {vor.vx} 1 } } */
 /* { dg-final { scan-assembler-times {vxor.vx} 1 } } */
+/* { dg-final { scan-assembler-times {vmul.vx} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c
index 2a84980cb50..74d35d13cf6 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c
@@ -11,6 +11,7 @@ DEF_VX_BINARY_REVERSE_CASE_0_WRAP(T, -, rsub);
 DEF_VX_BINARY_CASE_0_WRAP(T, &, and)
 DEF_VX_BINARY_CASE_0_WRAP(T, |, or)
 DEF_VX_BINARY_CASE_0_WRAP(T, ^, xor)
+DEF_VX_BINARY_CASE_0_WRAP(T, *, mul)
 
 /* { dg-final { scan-assembler-times {vadd.vx} 1 } } */
 /* { dg-final { scan-assembler-times {vsub.vx} 1 } } */
@@ -18,3 +19,4 @@ DEF_VX_BINARY_CASE_0_WRAP(T, ^, xor)
 /* { dg-final { scan-assembler-times {vand.vx} 1 } } */
 /* { dg-final { scan-assembler-times {vor.vx} 1 } } */
 /* { dg-final { scan-assembler-times {vxor.vx} 1 } } */
+/* { dg-final { scan-assembler-times {vmul.vx} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c
index 9c7ea5fa413..ac512ff7fbd 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c
@@ -11,6 +11,7 @@ DEF_VX_BI

[PATCH v1 0/3] RISC-V: Combine vec_duplicate + vmul.vv to vmul.vx on GR2VR cost

2025-05-28 Thread pan2 . li

From: Pan Li 

This patch would like to introduce the combine of vec_dup + vmul.vv into
vmul.vx on the cost value of GR2VR.  The late-combine will take place if
the cost of GR2VR is zero, or reject the combine if non-zero like 1, 15
in test.  There will be two cases for the combine:

Case 0:
 |   ...
 |   vmv.v.x
 | L1:
 |   vmul.vv
 |   J L1
 |   ...

Case 1:
 |   ...
 | L1:
 |   vmv.v.x
 |   vmul.vv
 |   J L1
 |   ...

Both will be combined to below if the cost of GR2VR is zero.
 |   ...
 | L1:
 |   vmul.vx
 |   J L1
 |   ...

The below test suites are passed for this patch series.
* The rv64gcv fully regression test.

Pan Li (3):
  RISC-V: Combine vec_duplicate + vmul.vv to vmul.vx on GR2VR cost
  RISC-V: Add test for vec_duplicate + vmul.vv combine case 0 with GR2VR cost 
0, 2 and 15
  RISC-V: Add test for vec_duplicate + vmul.vv combine case 1 with GR2VR cost 
0, 1 and 2

 gcc/config/riscv/riscv-v.cc   |   2 +
 gcc/config/riscv/riscv.cc |   1 +
 gcc/config/riscv/vector-iterators.md  |   2 +-
 .../riscv/rvv/autovec/vx_vf/vx-1-i16.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-1-i32.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-1-i64.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-1-i8.c |   2 +
 .../riscv/rvv/autovec/vx_vf/vx-2-i16.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-2-i32.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-2-i64.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-2-i8.c |   2 +
 .../riscv/rvv/autovec/vx_vf/vx-3-i16.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-3-i32.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-3-i64.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-3-i8.c |   2 +
 .../riscv/rvv/autovec/vx_vf/vx-4-i16.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-4-i32.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-4-i64.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-4-i8.c |   2 +
 .../riscv/rvv/autovec/vx_vf/vx-5-i16.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-5-i32.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-5-i64.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-5-i8.c |   2 +
 .../riscv/rvv/autovec/vx_vf/vx-6-i16.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-6-i32.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-6-i64.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-6-i8.c |   2 +
 .../riscv/rvv/autovec/vx_vf/vx_binary_data.h  | 196 ++
 .../rvv/autovec/vx_vf/vx_vmul-run-1-i16.c |  15 ++
 .../rvv/autovec/vx_vf/vx_vmul-run-1-i32.c |  15 ++
 .../rvv/autovec/vx_vf/vx_vmul-run-1-i64.c |  15 ++
 .../rvv/autovec/vx_vf/vx_vmul-run-1-i8.c  |  15 ++
 32 files changed, 308 insertions(+), 1 deletion(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vmul-run-1-i16.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vmul-run-1-i32.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vmul-run-1-i64.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vmul-run-1-i8.c

-- 
2.43.0

Re: [PATCH] libstdc++: Pass small trivial types by value in polymorphic wrappers

2025-05-28 Thread Tomasz Kaminski

On Wed, May 28, 2025 at 4:53 PM Patrick Palka  wrote:

> On Wed, 28 May 2025, Tomasz Kamiński wrote:
>
> > This patch adjust the passing of parameters for the move_only_function,
> > copyable_function and function_ref. For types that are declared as being
> passed
> > by value in signature template argument, the are passed by value to the
> invoker,
>
> they
>
> > when they are small (at most two pointers), trivially move constructible
> and
> > trivially destructible. The later guarantees that passing them by value
> has not
>
> latter
>
> > user visible side effects.
> >
> > In particular, this extents the set of types forwarded by value, that was
>
> extends
>
> > previously limited to scalars, to also include specializations of
> std::span and
> > std::string_view, and similar standard and program defined-types.
> >
> > Checking the suitability of the parameter types requires the types to be
> complete.
> > As consequence implementation imposes requirements on instantiation of
> > move_only_function and copyable_function. To avoid producing the errors
> from
> > the implementation details, and static_assertion was added to partial
> > specializations of copyable_function, move_only_function and
> function_ref.
> > The static assertion uses existing __is_complete_or_unbounded, as arrays
> type
> > parameters are automatically decayed in function type.
> >
> > Standard already specifies in [res.on.functions] p2.5 that instantiating
> these
> > partial specialization with incomplete types leads to undefined behavior.
> >
> > libstdc++-v3/ChangeLog:
> >
> >   * include/bits/funcwrap.h (__polyfunc::__pass_by_rref): Define.
> >   (__polyfunc::__param_t): Update to use __pass_by_rref.
> >   * include/bits/cpyfunc_impl.h:: Assert that are parameters type
> >   are complete.
> >   * include/bits/funcref_impl.h: Likewise.
> >   * include/bits/mofunc_impl.h: Likewise.
> >   * testsuite/20_util/copyable_function/call.cc: New test.
> >   * testsuite/20_util/function_ref/call.cc: New test.
> >   * testsuite/20_util/move_only_function/call.cc: New test.
> >   * testsuite/20_util/copyable_function/conv.cc: New test.
> >   * testsuite/20_util/function_ref/conv.cc: New test.
> >   * testsuite/20_util/move_only_function/conv.cc: New test.
> >   * testsuite/20_util/copyable_function/incomplete_neg.cc: New test.
> >   * testsuite/20_util/function_ref/incomplete_neg.cc: New test.
> >   * testsuite/20_util/move_only_function/incomplete_neg.cc: New test.
> > ---
> > Tested on x86_54-linux. OK for trunk?
> >
> >  libstdc++-v3/include/bits/cpyfunc_impl.h  |  4 +++
> >  libstdc++-v3/include/bits/funcref_impl.h  |  4 +++
> >  libstdc++-v3/include/bits/funcwrap.h  | 18 +-
> >  libstdc++-v3/include/bits/mofunc_impl.h   |  4 +++
> >  .../20_util/copyable_function/call.cc |  7 ++--
> >  .../20_util/copyable_function/conv.cc | 35 +++
> >  .../copyable_function/incomplete_neg.cc   | 18 ++
> >  .../testsuite/20_util/function_ref/call.cc| 10 +++---
> >  .../testsuite/20_util/function_ref/conv.cc| 34 ++
> >  .../20_util/function_ref/incomplete_neg.cc| 18 ++
> >  .../20_util/move_only_function/call.cc|  7 ++--
> >  .../20_util/move_only_function/conv.cc| 35 +++
> >  .../move_only_function/incomplete_neg.cc  | 18 ++
> >  13 files changed, 200 insertions(+), 12 deletions(-)
> >  create mode 100644
> libstdc++-v3/testsuite/20_util/copyable_function/incomplete_neg.cc
> >  create mode 100644
> libstdc++-v3/testsuite/20_util/function_ref/incomplete_neg.cc
> >  create mode 100644
> libstdc++-v3/testsuite/20_util/move_only_function/incomplete_neg.cc
> >
> > diff --git a/libstdc++-v3/include/bits/cpyfunc_impl.h
> b/libstdc++-v3/include/bits/cpyfunc_impl.h
> > index bc44cd3e313..f1918ddf87a 100644
> > --- a/libstdc++-v3/include/bits/cpyfunc_impl.h
> > +++ b/libstdc++-v3/include/bits/cpyfunc_impl.h
> > @@ -64,6 +64,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >   _GLIBCXX_MOF_REF noexcept(_Noex)>
> >  : __polyfunc::_Cpy_base
> >  {
> > +  static_assert(
> > + (std::__is_complete_or_unbounded(__type_identity<_ArgTypes>()) &&
> ...),
> > + "each parameter type must be a complete class");
> > +
> >using _Base = __polyfunc::_Cpy_base;
> >using _Invoker = __polyfunc::_Invoker<_Noex, _Res, _ArgTypes...>;
> >using _Signature = _Invoker::_Signature;
> > diff --git a/libstdc++-v3/include/bits/funcref_impl.h
> b/libstdc++-v3/include/bits/funcref_impl.h
> > index 1e19866035f..44c992281be 100644
> > --- a/libstdc++-v3/include/bits/funcref_impl.h
> > +++ b/libstdc++-v3/include/bits/funcref_impl.h
> > @@ -68,6 +68,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >  class function_ref<_Res(_ArgTypes...) _GLIBCXX_MOF_CV
> >  noexcept(_Noex)>
> >  {
> > +  static_assert(
>

[PATCH v2] libstdc++: Pass small trivial types by value in polymorphic wrappers

2025-05-28 Thread Tomasz Kamiński

This patch adjust the passing of parameters for the move_only_function,
copyable_function and function_ref. For types that are declared as being passed
by value in signature template argument, they are passed by value to the 
invoker,
when they are small (at most two pointers), trivially move constructible and
trivially destructible. The latter guarantees that passing them by value has not
user visible side effects.

In particular, this extends the set of types forwarded by value, that was
previously limited to scalars, to also include specializations of std::span and
std::string_view, and similar standard and program defined-types.

Checking the suitability of the parameter types requires the types to be 
complete.
As consequence implementation imposes requirements on instantiation of
move_only_function and copyable_function. To avoid producing the errors from
the implementation details, and static_assertion was added to partial
specializations of copyable_function, move_only_function and function_ref.
The static assertion uses existing __is_complete_or_unbounded, as arrays type
parameters are automatically decayed in function type.

Standard already specifies in [res.on.functions] p2.5 that instantiating these
partial specialization with incomplete types leads to undefined behavior.

libstdc++-v3/ChangeLog:

* include/bits/funcwrap.h (__polyfunc::__pass_by_rref): Define.
(__polyfunc::__param_t): Update to use __pass_by_rref.
* include/bits/cpyfunc_impl.h:: Assert that are parameters type
are complete.
* include/bits/funcref_impl.h: Likewise.
* include/bits/mofunc_impl.h: Likewise.
* testsuite/20_util/copyable_function/call.cc: New test.
* testsuite/20_util/function_ref/call.cc: New test.
* testsuite/20_util/move_only_function/call.cc: New test.
* testsuite/20_util/copyable_function/conv.cc: New test.
* testsuite/20_util/function_ref/conv.cc: New test.
* testsuite/20_util/move_only_function/conv.cc: New test.
* testsuite/20_util/copyable_function/incomplete_neg.cc: New test.
* testsuite/20_util/function_ref/incomplete_neg.cc: New test.
* testsuite/20_util/move_only_function/incomplete_neg.cc: New test.

Reviewed-by: Patrick Palka 
---
Changes in v2:
- fixed typos in commit description
- break line before __pass_by_rref
- add comment why is_reference needs to be checked before sizeof

OK for trunk?

 libstdc++-v3/include/bits/cpyfunc_impl.h  |  4 +++
 libstdc++-v3/include/bits/funcref_impl.h  |  4 +++
 libstdc++-v3/include/bits/funcwrap.h  | 21 ++-
 libstdc++-v3/include/bits/mofunc_impl.h   |  4 +++
 .../20_util/copyable_function/call.cc |  7 ++--
 .../20_util/copyable_function/conv.cc | 35 +++
 .../copyable_function/incomplete_neg.cc   | 18 ++
 .../testsuite/20_util/function_ref/call.cc| 10 +++---
 .../testsuite/20_util/function_ref/conv.cc| 34 ++
 .../20_util/function_ref/incomplete_neg.cc| 18 ++
 .../20_util/move_only_function/call.cc|  7 ++--
 .../20_util/move_only_function/conv.cc| 35 +++
 .../move_only_function/incomplete_neg.cc  | 18 ++
 13 files changed, 203 insertions(+), 12 deletions(-)
 create mode 100644 
libstdc++-v3/testsuite/20_util/copyable_function/incomplete_neg.cc
 create mode 100644 
libstdc++-v3/testsuite/20_util/function_ref/incomplete_neg.cc
 create mode 100644 
libstdc++-v3/testsuite/20_util/move_only_function/incomplete_neg.cc

diff --git a/libstdc++-v3/include/bits/cpyfunc_impl.h 
b/libstdc++-v3/include/bits/cpyfunc_impl.h
index bc44cd3e313..f1918ddf87a 100644
--- a/libstdc++-v3/include/bits/cpyfunc_impl.h
+++ b/libstdc++-v3/include/bits/cpyfunc_impl.h
@@ -64,6 +64,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_GLIBCXX_MOF_REF noexcept(_Noex)>
 : __polyfunc::_Cpy_base
 {
+  static_assert(
+   (std::__is_complete_or_unbounded(__type_identity<_ArgTypes>()) && ...),
+   "each parameter type must be a complete class");
+
   using _Base = __polyfunc::_Cpy_base;
   using _Invoker = __polyfunc::_Invoker<_Noex, _Res, _ArgTypes...>;
   using _Signature = _Invoker::_Signature;
diff --git a/libstdc++-v3/include/bits/funcref_impl.h 
b/libstdc++-v3/include/bits/funcref_impl.h
index 1e19866035f..44c992281be 100644
--- a/libstdc++-v3/include/bits/funcref_impl.h
+++ b/libstdc++-v3/include/bits/funcref_impl.h
@@ -68,6 +68,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 class function_ref<_Res(_ArgTypes...) _GLIBCXX_MOF_CV
   noexcept(_Noex)>
 {
+  static_assert(
+   (std::__is_complete_or_unbounded(__type_identity<_ArgTypes>()) && ...),
+   "each parameter type must be a complete class");
+
   using _Invoker = __polyfunc::_Invoker<_Noex, _Res, _ArgTypes...>;
   using _Signature = _Invoker::_Signature;
 
diff --git a/libstdc++-v3/include/bi

Re: [PATCH] libstdc++: Pass small trivial types by value in polymorphic wrappers

2025-05-28 Thread Patrick Palka

On Wed, 28 May 2025, Tomasz Kaminski wrote:

> 
> 
> On Wed, May 28, 2025 at 4:53 PM Patrick Palka  wrote:
>   On Wed, 28 May 2025, Tomasz Kamiński wrote:
> 
>   > This patch adjust the passing of parameters for the 
> move_only_function,
>   > copyable_function and function_ref. For types that are declared as 
> being passed
>   > by value in signature template argument, the are passed by value to 
> the invoker,
> 
>   they
> 
>   > when they are small (at most two pointers), trivially move 
> constructible and
>   > trivially destructible. The later guarantees that passing them by 
> value has not
> 
>   latter
> 
>   > user visible side effects.
>   >
>   > In particular, this extents the set of types forwarded by value, that 
> was
> 
>   extends
> 
>   > previously limited to scalars, to also include specializations of 
> std::span and
>   > std::string_view, and similar standard and program defined-types.
>   >
>   > Checking the suitability of the parameter types requires the types to 
> be complete.
>   > As consequence implementation imposes requirements on instantiation of

I think you want "As a consequence, the"

>   > move_only_function and copyable_function. To avoid producing the 
> errors from
>   > the implementation details, and static_assertion was added to partial

a static assertion

>   > specializations of copyable_function, move_only_function and 
> function_ref.
>   > The static assertion uses existing __is_complete_or_unbounded, as 
> arrays type
>   > parameters are automatically decayed in function type.
>   >
>   > Standard already specifies in [res.on.functions] p2.5 that 
> instantiating these
>   > partial specialization with incomplete types leads to undefined 
> behavior.
>   >
>   > libstdc++-v3/ChangeLog:
>   >
>   >       * include/bits/funcwrap.h (__polyfunc::__pass_by_rref): Define.
>   >       (__polyfunc::__param_t): Update to use __pass_by_rref.
>   >       * include/bits/cpyfunc_impl.h:: Assert that are parameters type
>   >       are complete.
>   >       * include/bits/funcref_impl.h: Likewise.
>   >       * include/bits/mofunc_impl.h: Likewise.
>   >       * testsuite/20_util/copyable_function/call.cc: New test.
>   >       * testsuite/20_util/function_ref/call.cc: New test.
>   >       * testsuite/20_util/move_only_function/call.cc: New test.
>   >       * testsuite/20_util/copyable_function/conv.cc: New test.
>   >       * testsuite/20_util/function_ref/conv.cc: New test.
>   >       * testsuite/20_util/move_only_function/conv.cc: New test.
>   >       * testsuite/20_util/copyable_function/incomplete_neg.cc: New 
> test.
>   >       * testsuite/20_util/function_ref/incomplete_neg.cc: New test.
>   >       * testsuite/20_util/move_only_function/incomplete_neg.cc: New 
> test.
>   > ---
>   > Tested on x86_54-linux. OK for trunk?
>   >
>   >  libstdc++-v3/include/bits/cpyfunc_impl.h      |  4 +++
>   >  libstdc++-v3/include/bits/funcref_impl.h      |  4 +++
>   >  libstdc++-v3/include/bits/funcwrap.h          | 18 +-
>   >  libstdc++-v3/include/bits/mofunc_impl.h       |  4 +++
>   >  .../20_util/copyable_function/call.cc         |  7 ++--
>   >  .../20_util/copyable_function/conv.cc         | 35 
> +++
>   >  .../copyable_function/incomplete_neg.cc       | 18 ++
>   >  .../testsuite/20_util/function_ref/call.cc    | 10 +++---
>   >  .../testsuite/20_util/function_ref/conv.cc    | 34 ++
>   >  .../20_util/function_ref/incomplete_neg.cc    | 18 ++
>   >  .../20_util/move_only_function/call.cc        |  7 ++--
>   >  .../20_util/move_only_function/conv.cc        | 35 
> +++
>   >  .../move_only_function/incomplete_neg.cc      | 18 ++
>   >  13 files changed, 200 insertions(+), 12 deletions(-)
>   >  create mode 100644 
> libstdc++-v3/testsuite/20_util/copyable_function/incomplete_neg.cc
>   >  create mode 100644 
> libstdc++-v3/testsuite/20_util/function_ref/incomplete_neg.cc
>   >  create mode 100644 
> libstdc++-v3/testsuite/20_util/move_only_function/incomplete_neg.cc
>   >
>   > diff --git a/libstdc++-v3/include/bits/cpyfunc_impl.h 
> b/libstdc++-v3/include/bits/cpyfunc_impl.h
>   > index bc44cd3e313..f1918ddf87a 100644
>   > --- a/libstdc++-v3/include/bits/cpyfunc_impl.h
>   > +++ b/libstdc++-v3/include/bits/cpyfunc_impl.h
>   > @@ -64,6 +64,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>   >                           _GLIBCXX_MOF_REF noexcept(_Noex)>
>   >      : __polyfunc::_Cpy_base
>   >      {
>   > +      static_assert(
>   > +     (std::__is_complete_or_unbounded(__type_identity<_ArgTypes>()) 
> && ...),
>   > +     "each parameter type must be a complete class");
>   > +
>

[PATCH] fortran: add constant input support for trig functions with half-revolutions

2025-05-28 Thread Yuao Ma

Hi Tobias,

> you will notice that the PR is not recognized. The format as mentioned before 
> is "PR component/number". Namely:

Thanks for the reminder! I'll use `-p` to double-check PR numbers going
forward.

> The second part is not what you are doing, you are actually changing the
> call from gfc_resolve_trigd{,2} to gfc_resolve_trig{,2}.

Done.

> > + gfc_error ("If first argument of ATAN2PI at %L is zero, then the " +
> > "second argument must not be zero", + &y->where);
> >
> I am a non-native speaker, but I think there is a "the" missing before
> "first".

You're right, I've corrected this and the two existing instances.

> BTW: If you have '(1)', you need to escape it with '\\(1\\)' or as the
> (...) don't matter, just use '.1.' as pattern. For '[...]' you need to
> make sure that [...] is not read as pattern range (such as '[a-z]'),
> i.e. use '\\\[-1, 1\\\]' (albeit it also works with only two \\).

Yeah, I forgot to use double escaping. Done.

> BTW: You could also use "intrinsic :: acospi" - which tells the compiler
> that the function is supposed to be an intrinsic.

Done. This will make the test case much cleaner!

Yuao




0001-fortran-add-constant-input-support-for-trig-function.patch
Description: 0001-fortran-add-constant-input-support-for-trig-function.patch

Re: [PATCH 01/61] Multilib changes

2025-05-28 Thread Aleksandar Rakic

HTEC Public

Hi,

Could you please let us know if you have any comments
on the latest reply on this patch?

Kind regards,
Aleksandar Rakic


From: Aleksandar Rakic 
Sent: Tuesday, April 22, 2025 9:00 PM
To: Jeff Law; gcc-patches@gcc.gnu.org
Cc: Djordje Todorovic; c...@mips.com; Robert Suchanek; Matthew Fortune
Subject: Re: [PATCH 01/61] Multilib changes

Hi,

> So I'm not at all concerned about the mips specific bits of this patch.
> After all, they only affect mips ports and the changes seem sensible.
> They would need a ChangeLog entry to go forward through.

> What is concerning is the config.ml change which has no comments about
> what it's doing or justification in the cover letter.

> Similarly it's not clear why we need a blob of mips specific code in
> configure.ac and the files autogenerated from that.

> Jeff

I would like to inform you that the version 2 of this patch is available
at the following link:

https://gcc.gnu.org/pipermail/gcc-patches/2025-March/677811.html

Kind regards,
Aleksandar

[PATCH v2 4/4] vect: Fix scale_profile_for_vect_loop for multiple exits [PR117790]

2025-05-28 Thread Alex Coplan

This is just a rebase of the v1 patch, currently waiting on a conclusion
of the discussion here:
https://gcc.gnu.org/pipermail/gcc-patches/2025-April/682033.html

Tested as a series on aarch64-linux-gnu and x86_64-linux-gnu.  OK for
trunk?

Thanks,
Alex

-- >8 --

This adjusts scale_profile_for_vect_loop to DTRT for loops with multiple exits,
namely using scale_loop_profile_hold_exit_counts instead and scaling the
expected niters by 1 / VF.

gcc/ChangeLog:

PR tree-optimization/117790
* tree-vect-loop.cc (scale_profile_for_vect_loop): Use
scale_loop_profile_hold_exit_counts instead of scale_loop_profile.  Drop
the exit edge parameter, since the code now handles multiple exits.
Adjust the caller ...
(vect_transform_loop): ... here.

gcc/testsuite/ChangeLog:

PR tree-optimization/117790
* gcc.dg/vect/vect-early-break-profile-2.c: New test.
---
 .../gcc.dg/vect/vect-early-break-profile-2.c  | 21 +++
 gcc/tree-vect-loop.cc | 21 ++-
 2 files changed, 27 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break-profile-2.c

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-profile-2.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-profile-2.c
new file mode 100644
index 000..03c67802b74
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-profile-2.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-additional-options "-fdump-tree-vect-blocks-details" } */
+int DECPOWERS[11];
+int multies[] = {5, 3, 1049, 0};
+short decNumberSquareRoot_accnext;
+int decNumberSquareRoot_accunits;
+void decGetDigits(short *, int);
+void decNumberSquareRoot() {
+  int exponent, drop = 0;
+  for (;; drop++) {
+if (exponent >= 0)
+  break;
+if (decNumberSquareRoot_accnext * multies[drop] >> 7 * DECPOWERS[drop])
+  break;
+exponent++;
+  }
+  if (drop)
+decGetDigits(&decNumberSquareRoot_accnext, decNumberSquareRoot_accunits);
+}
+/* { dg-final { scan-tree-dump-not "Invalid sum" "vect" } } */
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 2d1a6883e6b..41b9d553765 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -11391,7 +11391,7 @@ vect_gen_loop_len_mask (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
profile.  */
 
 static void
-scale_profile_for_vect_loop (class loop *loop, edge exit_e, unsigned vf, bool flat)
+scale_profile_for_vect_loop (class loop *loop, unsigned vf, bool flat)
 {
   /* For flat profiles do not scale down proportionally by VF and only
  cap by known iteration count bounds.  */
@@ -11422,18 +11422,10 @@ scale_profile_for_vect_loop (class loop *loop, edge exit_e, unsigned vf, bool fl
   vf /= 2;
 }
 
-  if (entry_count.nonzero_p ())
-set_edge_probability_and_rescale_others
-	(exit_e,
-	 entry_count.probability_in (loop->header->count / vf));
-  /* Avoid producing very large exit probability when we do not have
- sensible profile.  */
-  else if (exit_e->probability < profile_probability::always () / (vf * 2))
-set_edge_probability_and_rescale_others (exit_e, exit_e->probability * vf);
-  loop->latch->count = single_pred_edge (loop->latch)->count ();
-
-  scale_loop_profile (loop, profile_probability::always () / vf,
-		  get_likely_max_loop_iterations_int (loop));
+  const auto likely_max_niters = get_likely_max_loop_iterations_int (loop);
+  scale_loop_profile_hold_exit_counts (loop,
+   profile_probability::always () / vf,
+   likely_max_niters);
 }
 
 /* Helper function to pass to simplify_replace_tree to enable replacing tree's
@@ -12010,8 +12002,7 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
 			  assumed_vf) - 1
 	 : wi::udiv_floor (loop->nb_iterations_estimate + bias_for_assumed,
 			   assumed_vf) - 1);
-  scale_profile_for_vect_loop (loop, LOOP_VINFO_IV_EXIT (loop_vinfo),
-			   assumed_vf, flat);
+  scale_profile_for_vect_loop (loop, assumed_vf, flat);
 
   if (dump_enabled_p ())
 {

Re: [PATCH v4 4/8] libstdc++: Implement layout_right from mdspan.

2025-05-28 Thread Tomasz Kaminski

On Mon, May 26, 2025 at 4:15 PM Luc Grosheintz 
wrote:

> Implement the parts of layout_left that depend on layout_right; and the
> parts of layout_right that don't depend on layout_stride.
>
> libstdc++-v3/ChangeLog:
>
> * include/std/mdspan (layout_right): New class.
> * src/c++23/std.cc.in: Add layout_right.
>
> Signed-off-by: Luc Grosheintz 
>
LGTM. Only some very subjective comments regarding parenthesis.
Also added some comments for possible future improvements for extents
converting constructor.

> ---
>  libstdc++-v3/include/std/mdspan  | 153 ++-
>  libstdc++-v3/src/c++23/std.cc.in |   1 +
>  2 files changed, 153 insertions(+), 1 deletion(-)
>
> diff --git a/libstdc++-v3/include/std/mdspan
> b/libstdc++-v3/include/std/mdspan
> index d81072596b4..7daa0713716 100644
> --- a/libstdc++-v3/include/std/mdspan
> +++ b/libstdc++-v3/include/std/mdspan
> @@ -397,6 +397,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>class mapping;
>};
>
> +  struct layout_right
> +  {
> +template
> +  class mapping;
> +  };
> +
>namespace __mdspan
>{
>  template
> @@ -489,7 +495,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>   _Mapping>;
>
>  template
> -  concept __standardized_mapping = __mapping_of _Mapping>;
> +  concept __standardized_mapping = __mapping_of
> +  || __mapping_of _Mapping>;
>
>  template
>concept __mapping_like = requires
> @@ -539,6 +546,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> : mapping(__other.extents(), __mdspan::__internal_ctor{})
> { }
>
> +  template
> +   requires (_Extents::rank() <= 1
> + && is_constructible_v<_Extents, _OExtents>)
>
I got confused for a moment by parametrization here. My preference would be
to use  (_Extents::rank() <= 1) && is_constructible_v<_Extents, _OExtents>?

> +   constexpr explicit(!is_convertible_v<_OExtents, _Extents>)
> +   mapping(const layout_right::mapping<_OExtents>& __other) noexcept
> +   : mapping(__other.extents(), __mdspan::__internal_ctor{})
> +   { }
> +
>constexpr mapping&
>operator=(const mapping&) noexcept = default;
>
> @@ -606,6 +621,142 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> [[no_unique_address]] _Extents _M_extents{};
>  };
>
> +  namespace __mdspan
> +  {
> +template
> +  constexpr typename _Extents::index_type
> +  __linear_index_right(const _Extents& __exts, _Indices... __indices)
> +  {
> +   using _IndexType = typename _Extents::index_type;
> +   array<_IndexType, sizeof...(__indices)> __ind_arr{__indices...};
> +   _IndexType __res = 0;
> +   if constexpr (sizeof...(__indices) > 0)
> + {
> +   _IndexType __mult = 1;
> +   auto __update = [&, __pos = __exts.rank()](_IndexType) mutable
> + {
> +   --__pos;
> +   __res += __ind_arr[__pos] * __mult;
> +   __mult *= __exts.extent(__pos);
> + };
> +   (__update(__indices), ...);
> + }
> +   return __res;
> +  }
> +  }
> +
> +  template
> +class layout_right::mapping
> +{
> +public:
> +  using extents_type = _Extents;
> +  using index_type = typename extents_type::index_type;
> +  using size_type = typename extents_type::size_type;
> +  using rank_type = typename extents_type::rank_type;
> +  using layout_type = layout_right;
> +
> +  static_assert(__mdspan::__representable_size<_Extents, index_type>,
> +   "The size of extents_type must be representable as index_type");
> +
> +  constexpr
> +  mapping() noexcept = default;
> +
> +  constexpr
> +  mapping(const mapping&) noexcept = default;
> +
> +  constexpr
> +  mapping(const _Extents& __extents) noexcept
> +  : _M_extents(__extents)
> +  {
> __glibcxx_assert(__mdspan::__is_representable_extents(_M_extents)); }
> +
> +  template
> +   requires (is_constructible_v)
>
I do not think this parense are necessary? Are they?

> +   constexpr explicit(!is_convertible_v<_OExtents, extents_type>)
> +   mapping(const mapping<_OExtents>& __other) noexcept
> +   : mapping(__other.extents(), __mdspan::__internal_ctor{})
> +   { }
> +
> +  template
> +   requires (extents_type::rank() <= 1
> +   && is_constructible_v)
>
Same comment regarding parametrization.

> +   constexpr explicit(!is_convertible_v<_OExtents, extents_type>)
> +   mapping(const layout_left::mapping<_OExtents>& __other) noexcept
> +   : mapping(__other.extents(), __mdspan::__internal_ctor{})
> +   { }
> +
> +  constexpr mapping&
> +  operator=(const mapping&) noexcept = default;
> +
> +  constexpr const _Extents&
> +  extents() const noexcept { return _M_extents; }
> +
> +  constexpr index_type
> +  required_span_size() const noexcept
> +  { return __mdspan::__fwd_prod(_M_extents, extent

Re: [PATCH v5] libstdc++: Implement C++23 P1659R3 starts_with and ends_with

2025-05-28 Thread Patrick Palka


> 
> 
> On Tue, May 20, 2025 at 6:32 PM Patrick Palka  wrote:
>   On Tue, 20 May 2025, Tomasz Kaminski wrote:
> 
>   > I think I do not have any more suggestions for cases to check, so the 
> impl LGTM.
> 
>   It's cool how many optimizations we came up with for this algorithm :)
> 
>   >
>   > On Tue, May 20, 2025 at 4:33 PM Patrick Palka  
> wrote:
>   >       Changes in v5:
>   >         * dispatch to starts_with for the both-bidi/common range case
>   >
>   >       Changes in v4:
>   >         * optimize the both-bidi/common ranges case, as suggested by
>   >           Tomasz
>   >         * add tests for that code path
>   >
>   >       Changes in v3:
>   >         * Use the forward_range code path for a (non-sized) 
> bidirectional
>   >           haystack, since it's slightly fewer increments/decrements
>   >           overall.
>   >         * Fix wrong iter_difference_t cast in starts_with.
>   >
>   >       Changes in v2:
>   >         Addressed Tomasz's review comments, namely:
>   >         * Added explicit iter_difference_t casts
>   >         * Made _S_impl member private
>   >         * Optimized sized bidirectional case of ends_with
>   >         * Rearranged control flow of starts_with::_S_impl
>   >
>   >       Still left to do:
>   >         * Add tests for integer-class types
>   >         * Still working on a better commit description ;)
>   >
>   >       -- >8 --
>   >
>   >       libstdc++-v3/ChangeLog:
>   >
>   >               * include/bits/ranges_algo.h (__starts_with_fn, 
> starts_with):
>   >               Define.
>   >               (__ends_with_fn, ends_with): Define.
>   >               * include/bits/version.def (ranges_starts_ends_with): 
> Define.
>   >               * include/bits/version.h: Regenerate.
>   >               * include/std/algorithm: Provide 
> __cpp_lib_ranges_starts_ends_with.
>   >               * src/c++23/std.cc.in (ranges::starts_with): Export.
>   >               (ranges::ends_with): Export.
>   >               * testsuite/25_algorithms/ends_with/1.cc: New test.
>   >               * testsuite/25_algorithms/starts_with/1.cc: New test.
>   >       ---
>   >        libstdc++-v3/include/bits/ranges_algo.h       | 247 
> ++
>   >        libstdc++-v3/include/bits/version.def         |   8 +
>   >        libstdc++-v3/include/bits/version.h           |  10 +
>   >        libstdc++-v3/include/std/algorithm            |   1 +
>   >        libstdc++-v3/src/c++23/std.cc.in              |   4 +
>   >        .../testsuite/25_algorithms/ends_with/1.cc    | 135 ++
>   >        .../testsuite/25_algorithms/starts_with/1.cc  | 128 +
>   >        7 files changed, 533 insertions(+)
>   >        create mode 100644 
> libstdc++-v3/testsuite/25_algorithms/ends_with/1.cc
>   >        create mode 100644 
> libstdc++-v3/testsuite/25_algorithms/starts_with/1.cc
>   >
>   >       diff --git a/libstdc++-v3/include/bits/ranges_algo.h 
> b/libstdc++-v3/include/bits/ranges_algo.h
>   >       index f36e7dd59911..60f7bf841f3f 100644
>   >       --- a/libstdc++-v3/include/bits/ranges_algo.h
>   >       +++ b/libstdc++-v3/include/bits/ranges_algo.h
>   >       @@ -438,6 +438,253 @@ namespace ranges
>   >
>   >          inline constexpr __search_n_fn search_n{};
>   >
>   >       +#if __glibcxx_ranges_starts_ends_with // C++ >= 23
>   >       +  struct __starts_with_fn
>   >       +  {
>   >       +    template 
> _Sent1,
>   >       +            input_iterator _Iter2, sentinel_for<_Iter2> _Sent2,
>   >       +            typename _Pred = ranges::equal_to,
>   >       +            typename _Proj1 = identity, typename _Proj2 = 
> identity>
>   >       +      requires indirectly_comparable<_Iter1, _Iter2, _Pred, 
> _Proj1, _Proj2>
>   >       +      constexpr bool
>   >       +      operator()(_Iter1 __first1, _Sent1 __last1,
>   >       +                _Iter2 __first2, _Sent2 __last2, _Pred __pred 
> = {},
>   >       +                _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const
>   >       +      {
>   >       +       iter_difference_t<_Iter1> __n1 = -1;
>   >       +       iter_difference_t<_Iter2> __n2 = -1;
>   >       +       if constexpr (sized_sentinel_for<_Sent1, _Iter1>)
>   >       +         __n1 = __last1 - __first1;
>   >       +       if constexpr (sized_sentinel_for<_Sent2, _Iter2>)
>   >       +         __n2 = __last2 - __first2;
>   >       +       return _S_impl(std::move(__first1), __last1, __n1,
>   >       +                      std::move(__first2), __last2, __n2,
>   >       +                      std::move(__pred),
>   >       +                      std::move(__proj1), std::move(__proj2));

Re: [PATCH v2] ext-dce: Don't refine live width with SUBREG mode if !TRULY_NOOP_TRUNCATION_MODES_P [PR 120050]

2025-05-28 Thread Richard Sandiford

Sorry for the slow reply, had a few days off.

Xi Ruoyao  writes:
> If we see a promoted subreg and TRULY_NOOP_TRUNCATION says the
> truncation is not a noop, then all bits of the inner reg are live.  We
> cannot reduce the live mask to that of the mode of the subreg.
>
> gcc/ChangeLog:
>
>   PR rtl-optimization/120050
>   * ext-dce.cc (ext_dce_process_uses): Break early if a SUBREG in
>   rhs is promoted and the truncation from the inner mode to the
>   outer mode is not a noop when handling SETs.
> ---
>
> Bootstrapped on mips64el-linux-gnuabi64.  Ok for trunk?
>
>  gcc/ext-dce.cc | 12 ++--
>  1 file changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc
> index a0343950141..3b21e68b90c 100644
> --- a/gcc/ext-dce.cc
> +++ b/gcc/ext-dce.cc
> @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "print-rtl.h"
>  #include "dbgcnt.h"
>  #include "diagnostic-core.h"
> +#include "target.h"
>  
>  /* These should probably move into a C++ class.  */
>  static vec livein;
> @@ -764,13 +765,20 @@ ext_dce_process_uses (rtx_insn *insn, rtx obj,
>We don't want to mark those bits live unnecessarily
>as that inhibits extension elimination in important
>cases such as those in Coremark.  So we need that
> -  outer code.  */
> +  outer code.
> +
> +  But if !TRULY_NOOP_TRUNCATION_MODES_P, those bits
> +  may be actually alive with any promoted subreg
> +  regardless of the outer code.  See PR 120050.  */

How about expanding on this a bit:

 If !TRULY_NOOP_TRUNCATION_MODES_P holds true for
 the subreg, then the mode change performed by Y
 would normally need to be a TRUNCATE rather than
 a SUBREG.  It is probably the guarantee provided
 by SUBREG_PROMOTED_VAR_P that allows the SUBREG
 in Y as an exception.  We must therefore preserve
 that guarantee and treat the upper bits of the
 inner register as live regardless of the outer code.
 See PR 120050.  */

OK with that change, thanks.

Richard

> if (!REG_P (SUBREG_REG (y))
> || (SUBREG_PROMOTED_VAR_P (y)
> && ((GET_CODE (SET_SRC (x)) == SIGN_EXTEND
>  && SUBREG_PROMOTED_SIGNED_P (y))
> || (GET_CODE (SET_SRC (x)) == ZERO_EXTEND
> -   && SUBREG_PROMOTED_UNSIGNED_P (y)
> +   && SUBREG_PROMOTED_UNSIGNED_P (y))
> +   || !TRULY_NOOP_TRUNCATION_MODES_P (
> + GET_MODE (y),
> + GET_MODE (SUBREG_REG (y))
>   break;
>  
> bit = subreg_lsb (y).to_constant ();

[PATCH v4 06/10] AArch64: recognize `+cmpbr` option

2025-05-28 Thread Karl Meakin

Add the `+cmpbr` option to enable the FEAT_CMPBR architectural
extension.

gcc/ChangeLog:

* config/aarch64/aarch64-option-extensions.def (cmpbr): New
option.
* config/aarch64/aarch64.h (TARGET_CMPBR): New macro.
* doc/invoke.texi (cmpbr): New option.
---
 gcc/config/aarch64/aarch64-option-extensions.def | 2 ++
 gcc/config/aarch64/aarch64.h | 3 +++
 gcc/doc/invoke.texi  | 3 +++
 3 files changed, 8 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-option-extensions.def 
b/gcc/config/aarch64/aarch64-option-extensions.def
index dbbb021f05a..1c3e69799f5 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -249,6 +249,8 @@ AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "mops")
 
 AARCH64_OPT_EXTENSION("cssc", CSSC, (), (), (), "cssc")
 
+AARCH64_OPT_EXTENSION("cmpbr", CMPBR, (), (), (), "cmpbr")
+
 AARCH64_OPT_EXTENSION("lse128", LSE128, (LSE), (), (), "lse128")
 
 AARCH64_OPT_EXTENSION("d128", D128, (LSE128), (), (), "d128")
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index e8bd8c73c12..d5c4a42e96d 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -202,326 +202,329 @@ constexpr auto AARCH64_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
   = AARCH64_ISA_MODE_SM_OFF;
 constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
   = aarch64_feature_flags (AARCH64_DEFAULT_ISA_MODE);
 
 #endif
 
 /* Macros to test ISA flags.
 
There is intentionally no macro for AARCH64_FL_CRYPTO, since this flag bit
is not always set when its constituent features are present.
Check (TARGET_AES && TARGET_SHA2) instead.  */
 
 #define AARCH64_HAVE_ISA(X) (bool (aarch64_isa_flags & AARCH64_FL_##X))
 
 #define AARCH64_ISA_MODE((aarch64_isa_flags & AARCH64_FL_ISA_MODES).val[0])
 
 /* The current function is a normal non-streaming function.  */
 #define TARGET_NON_STREAMING AARCH64_HAVE_ISA (SM_OFF)
 
 /* The current function has a streaming body.  */
 #define TARGET_STREAMING AARCH64_HAVE_ISA (SM_ON)
 
 /* The current function has a streaming-compatible body.  */
 #define TARGET_STREAMING_COMPATIBLE \
   ((aarch64_isa_flags & AARCH64_FL_SM_STATE) == 0)
 
 /* PSTATE.ZA is enabled in the current function body.  */
 #define TARGET_ZA AARCH64_HAVE_ISA (ZA_ON)
 
 /* AdvSIMD is supported in the default configuration, unless disabled by
-mgeneral-regs-only or by the +nosimd extension.  The set of available
instructions is then subdivided into:
 
- the "base" set, available both in SME streaming mode and in
  non-streaming mode
 
- the full set, available only in non-streaming mode.  */
 #define TARGET_BASE_SIMD AARCH64_HAVE_ISA (SIMD)
 #define TARGET_SIMD (TARGET_BASE_SIMD && TARGET_NON_STREAMING)
 #define TARGET_FLOAT AARCH64_HAVE_ISA (FP)
 
 /* AARCH64_FL options necessary for system register implementation.  */
 
 /* Define AARCH64_FL aliases for architectural features which are protected
by -march flags in binutils but which receive no special treatment by GCC.
 
Such flags are inherited from the Binutils definition of system registers
and are mapped to the architecture in which the feature is implemented.  */
 #define AARCH64_FL_RASAARCH64_FL_V8A
 #define AARCH64_FL_LORAARCH64_FL_V8_1A
 #define AARCH64_FL_PANAARCH64_FL_V8_1A
 #define AARCH64_FL_AMUAARCH64_FL_V8_4A
 #define AARCH64_FL_SCXTNUMAARCH64_FL_V8_5A
 #define AARCH64_FL_ID_PFR2AARCH64_FL_V8_5A
 
 /* Armv8.9-A extension feature bits defined in Binutils but absent from GCC,
aliased to their base architecture.  */
 #define AARCH64_FL_AIEAARCH64_FL_V8_9A
 #define AARCH64_FL_DEBUGv8p9  AARCH64_FL_V8_9A
 #define AARCH64_FL_FGT2   AARCH64_FL_V8_9A
 #define AARCH64_FL_ITEAARCH64_FL_V8_9A
 #define AARCH64_FL_PFAR   AARCH64_FL_V8_9A
 #define AARCH64_FL_PMUv3_ICNTRAARCH64_FL_V8_9A
 #define AARCH64_FL_PMUv3_SS   AARCH64_FL_V8_9A
 #define AARCH64_FL_PMUv3p9AARCH64_FL_V8_9A
 #define AARCH64_FL_RASv2  AARCH64_FL_V8_9A
 #define AARCH64_FL_S1PIE  AARCH64_FL_V8_9A
 #define AARCH64_FL_S1POE  AARCH64_FL_V8_9A
 #define AARCH64_FL_S2PIE  AARCH64_FL_V8_9A
 #define AARCH64_FL_S2POE  AARCH64_FL_V8_9A
 #define AARCH64_FL_SCTLR2 AARCH64_FL_V8_9A
 #define AARCH64_FL_SEBEP  AARCH64_FL_V8_9A
 #define AARCH64_FL_SPE_FDSAARCH64_FL_V8_9A
 #define AARCH64_FL_TCR2   AARCH64_FL_V8_9A
 
 #define TARGET_V8R AARCH64_HAVE_ISA (V8R)
 #define TARGET_V9A AARCH64_HAVE_ISA (V9A)
 
 
 /* SHA2 is an optional extension to AdvSIMD.  */
 #define TARGET_SHA2 AARCH64_HAVE_ISA (SHA2)
 
 /* SHA3 is an optional extension to AdvSIMD.  */
 #define TARGET_SHA3 AARCH64_HAVE_ISA (SHA3)
 
 /* AES is an optional extension to AdvSIMD.  */
 #define TARGET_AES AARCH64_HAVE_ISA (AES)
 
 /* SM is an optiona

[PATCH v4 05/10] AArch64: make `far_branch` attribute a boolean

2025-05-28 Thread Karl Meakin

The `far_branch` attribute only ever takes the values 0 or 1, so make it
a `no/yes` valued string attribute instead.

gcc/ChangeLog:

* config/aarch64/aarch64.md (far_branch): Replace 0/1 with
no/yes.
(aarch64_bcond): Handle rename.
(aarch64_cbz1): Likewise.
(*aarch64_tbz1): Likewise.
(@aarch64_tbz): Likewise.
---
 gcc/config/aarch64/aarch64.md | 22 ++
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c31ad4fc16e..b61e3e5a72f 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -554,16 +554,14 @@ (define_attr "mode_enabled" "false,true"
 ;; Attribute that controls whether an alternative is enabled or not.
 (define_attr "enabled" "no,yes"
   (if_then_else (and (eq_attr "arch_enabled" "yes")
 (eq_attr "mode_enabled" "true"))
(const_string "yes")
(const_string "no")))
 
 ;; Attribute that specifies whether we are dealing with a branch to a
 ;; label that is far away, i.e. further away than the maximum/minimum
 ;; representable in a signed 21-bits number.
-;; 0 :=: no
-;; 1 :=: yes
-(define_attr "far_branch" "" (const_int 0))
+(define_attr "far_branch" "no,yes" (const_string "no"))
 
 ;; Attribute that specifies whether the alternative uses MOVPRFX.
 (define_attr "movprfx" "no,yes" (const_string "no"))
@@ -759,45 +757,45 @@ (define_expand "cbranchcc4"
 ;; Emit `B`, assuming that the condition is already in the CC register.
 (define_insn "aarch64_bcond"
   [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
[(match_operand 1 "cc_register")
 (const_int 0)])
   (label_ref (match_operand 2))
   (pc)))]
   ""
   {
 /* GCC's traditional style has been to use "beq" instead of "b.eq", etc.,
but the "." is required for SVE conditions.  */
 bool use_dot_p = GET_MODE (operands[1]) == CC_NZCmode;
 if (get_attr_length (insn) == 8)
   return aarch64_gen_far_branch (operands, 2, "Lbcond",
 use_dot_p ? "b.%M0\\t" : "b%M0\\t");
 else
   return use_dot_p ? "b.%m0\\t%l2" : "b%m0\\t%l2";
   }
   [(set_attr "type" "branch")
(set (attr "length")
(if_then_else (and (ge (minus (match_dup 2) (pc))
   (const_int BRANCH_LEN_N_1MiB))
   (lt (minus (match_dup 2) (pc))
   (const_int BRANCH_LEN_P_1MiB)))
  (const_int 4)
  (const_int 8)))
(set (attr "far_branch")
(if_then_else (and (ge (minus (match_dup 2) (pc))
   (const_int BRANCH_LEN_N_1MiB))
   (lt (minus (match_dup 2) (pc))
   (const_int BRANCH_LEN_P_1MiB)))
- (const_int 0)
- (const_int 1)))]
+ (const_string "no")
+ (const_string "yes")))]
 )
 
 ;; For a 24-bit immediate CST we can optimize the compare for equality
 ;; and branch sequence from:
 ;; mov x0, #imm1
 ;; movkx0, #imm2, lsl 16 /* x0 contains CST.  */
 ;; cmp x1, x0
 ;; b .Label
 ;; into the shorter:
 ;; sub x0, x1, #(CST & 0xfff000)
 ;; subsx0, x0, #(CST & 0x000fff)
 ;; b .Label
@@ -829,77 +827,77 @@ (define_insn_and_split "*aarch64_bcond_wide_imm"
 ;; For an EQ/NE comparison against zero, emit `CBZ`/`CBNZ`
 (define_insn "aarch64_cbz1"
   [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
(const_int 0))
   (label_ref (match_operand 1))
   (pc)))]
   "!aarch64_track_speculation"
   {
 if (get_attr_length (insn) == 8)
   return aarch64_gen_far_branch (operands, 1, "Lcb", "\\t%0, ");
 else
   return "\\t%0, %l1";
   }
   [(set_attr "type" "branch")
(set (attr "length")
(if_then_else (and (ge (minus (match_dup 1) (pc))
   (const_int BRANCH_LEN_N_1MiB))
   (lt (minus (match_dup 1) (pc))
   (const_int BRANCH_LEN_P_1MiB)))
  (const_int 4)
  (const_int 8)))
(set (attr "far_branch")
(if_then_else (and (ge (minus (match_dup 2) (pc))
   (const_int BRANCH_LEN_N_1MiB))
   (lt (minus (match_dup 2) (pc))
   (const_int BRANCH_LEN_P_1MiB)))
- (const_int 0)
- (const_int 1)))]
+ (const_string "no")
+ (const_string "yes")))]
 )
 
 ;; For an LT/GE comparison against zero, emit `TBZ`/`TBNZ`
 (define_insn "*aarch64_tbz1"
   [(set (pc) (if_then_else (LTGE (match_operand:A

[PATCH v4 08/10] AArch64: rules for CMPBR instructions

2025-05-28 Thread Karl Meakin

Add rules for lowering `cbranch4` to CBB/CBH/CB when
CMPBR extension is enabled.

gcc/ChangeLog:

* config/aarch64/aarch64.md (BRANCH_LEN_P_1Kib): New constant.
(BRANCH_LEN_N_1Kib): Likewise.
(cbranch4): Emit CMPBR instructions if possible.
(cbranch4): New expand rule.
(*aarch64_cb): Likewise.
(*aarch64_cb): Likewise.
* config/aarch64/aarch64.cc (aarch64_cb_rhs): New function.
* config/aarch64/aarch64-protos.h (aarch64_cb_rhs): Likewise.
* config/aarch64/iterators.md (cmpbr_suffix): New mode attr.
* config/aarch64/predicates.md (const_0_to_63_operand): New
predicate.
(aarch64_cb_immediate): Likewise.
(aarch64_cb_operand): Likewise.
(aarch64_cb_short_operand): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/cmpbr.c: update tests.
---
 gcc/config/aarch64/aarch64-protos.h  |   2 +
 gcc/config/aarch64/aarch64.cc|  34 ++
 gcc/config/aarch64/aarch64.md|  87 +++-
 gcc/config/aarch64/iterators.md  |   5 +
 gcc/config/aarch64/predicates.md |  15 +
 gcc/testsuite/gcc.target/aarch64/cmpbr.c | 586 ---
 6 files changed, 343 insertions(+), 386 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 8f37e56d440..842b9a94334 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1131,6 +1131,8 @@ bool aarch64_general_check_builtin_call (location_t, 
vec,
 unsigned int, tree, unsigned int,
 tree *);
 
+bool aarch64_cb_rhs (rtx op, rtx rhs);
+
 namespace aarch64 {
   void report_non_ice (location_t, tree, unsigned int);
   void report_out_of_range (location_t, tree, unsigned int, HOST_WIDE_INT,
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index be5a97294dd..1d4ae73a963 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -944,16 +944,50 @@ static const char *
 svpattern_token (enum aarch64_svpattern pattern)
 {
   switch (pattern)
 {
 #define CASE(UPPER, LOWER, VALUE) case AARCH64_SV_##UPPER: return #LOWER;
 AARCH64_FOR_SVPATTERN (CASE)
 #undef CASE
 case AARCH64_NUM_SVPATTERNS:
   break;
 }
   gcc_unreachable ();
 }
 
+/* Return true if rhs is an operand suitable for a CB (immediate)
+ * instruction. */
+bool
+aarch64_cb_rhs (rtx op, rtx rhs)
+{
+  if (!CONST_INT_P (rhs))
+return REG_P (rhs);
+
+  HOST_WIDE_INT rhs_val = INTVAL (rhs);
+  rtx_code code = GET_CODE (op);
+
+  switch (code)
+{
+case EQ:
+case NE:
+case GT:
+case GTU:
+case LT:
+case LTU:
+  return IN_RANGE (rhs_val, 0, 63);
+
+case GE:  /* CBGE:   signed greater than or equal */
+case GEU: /* CBHS: unsigned greater than or equal */
+  return IN_RANGE (rhs_val, 1, 64);
+
+case LE:  /* CBLE:   signed less than or equal */
+case LEU: /* CBLS: unsigned less than or equal */
+  return IN_RANGE (rhs_val, -1, 62);
+
+default:
+  return false;
+}
+}
+
 /* Return the location of a piece that is known to be passed or returned
in registers.  FIRST_ZR is the first unused vector argument register
and FIRST_PR is the first unused predicate argument register.  */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index b61e3e5a72f..d54b4b8595f 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -697,37 +697,60 @@ (define_insn "jump"
 ;; Maximum PC-relative positive/negative displacements for various branching
 ;; instructions.
 (define_constants
   [
 ;; +/- 128MiB.  Used by B, BL.
 (BRANCH_LEN_P_128MiB  134217724)
 (BRANCH_LEN_N_128MiB -134217728)
 
 ;; +/- 1MiB.  Used by B., CBZ, CBNZ.
 (BRANCH_LEN_P_1MiB  1048572)
 (BRANCH_LEN_N_1MiB -1048576)
 
 ;; +/- 32KiB.  Used by TBZ, TBNZ.
 (BRANCH_LEN_P_32KiB  32764)
 (BRANCH_LEN_N_32KiB -32768)
+
+;; +/- 1KiB.  Used by CBB, CBH, CB.
+(BRANCH_LEN_P_1Kib  1020)
+(BRANCH_LEN_N_1Kib -1024)
   ]
 )
 
 ;; ---
 ;; Conditional jumps
 ;; ---
 
-(define_expand "cbranch4"
+(define_expand "cbranch4"
   [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
[(match_operand:GPI 1 "register_operand")
 (match_operand:GPI 2 "aarch64_plus_operand")])
   (label_ref (match_operand 3))
   (pc)))]
   ""
-  "
-  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
-operands[2]);
-  operands[2] = const0_rtx;
-  "
+  {
+  if (TARGET_CMPBR && aarch64_cb_rhs(operands[0], operands[2]))
+{
+  emit_jump_insn (gen_aarch64_cb (oper

Re: [PATCH, Fortran] Bug 119856 - Missing commas in I/O formats not diagnosed by default at compile time.

2025-05-28 Thread Steve Kargl

On Wed, May 28, 2025 at 08:11:05AM -0700, Jerry D wrote:
> The attached patch is simple and self explanatory in the git log entry.
> 
> Regression tested on X86_64-linux-gnu.
> 
> OK for trunk?
> 

Yes, with one question.

> commit 845768cbead03f76265e491bcf5ea6de7020ff39
> Author: Jerry DeLisle 
> Date:   Wed May 28 07:56:12 2025 -0700
> 
> Fortran: Adjust handling of optional comma in FORMAT.
> 
> This change adjusts the error messages for optional commas
> in format strings to give a warning at compile time unless
> -std=legacy is used. This is more consistant with the
> runtime library. The comma seprator should really not be
> encouraged as it is non-standard fortran.

Is that last sentence correct?  I would think that the 
comma separator is not only encouraged, but it is required

-- 
Steve

[PATCH v4 02/10] AArch64: reformat branch instruction rules

2025-05-28 Thread Karl Meakin

Make the formatting of the RTL templates in the rules for branch
instructions more consistent with each other.

gcc/ChangeLog:

* config/aarch64/aarch64.md (cbranch4): Reformat.
(cbranchcc4): Likewise.
(condjump): Likewise.
(*compare_condjump): Likewise.
(aarch64_cb1): Likewise.
(*cb1): Likewise.
(tbranch_3): Likewise.
(@aarch64_tb): Likewise.
---
 gcc/config/aarch64/aarch64.md | 77 +--
 1 file changed, 38 insertions(+), 39 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 874df262781..05d86595bb1 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -705,229 +705,228 @@ (define_insn "jump"
 (define_expand "cbranch4"
   [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
[(match_operand:GPI 1 "register_operand")
 (match_operand:GPI 2 "aarch64_plus_operand")])
-  (label_ref (match_operand 3 "" ""))
+  (label_ref (match_operand 3))
   (pc)))]
   ""
   "
   operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
 operands[2]);
   operands[2] = const0_rtx;
   "
 )
 
 (define_expand "cbranch4"
-  [(set (pc) (if_then_else
-   (match_operator 0 "aarch64_comparison_operator"
-[(match_operand:GPF_F16 1 "register_operand")
- (match_operand:GPF_F16 2 "aarch64_fp_compare_operand")])
-   (label_ref (match_operand 3 "" ""))
-   (pc)))]
+  [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
+   [(match_operand:GPF_F16 1 "register_operand")
+(match_operand:GPF_F16 2 
"aarch64_fp_compare_operand")])
+  (label_ref (match_operand 3))
+  (pc)))]
   ""
-  "
+  {
   operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
 operands[2]);
   operands[2] = const0_rtx;
-  "
+  }
 )
 
 (define_expand "cbranchcc4"
-  [(set (pc) (if_then_else
- (match_operator 0 "aarch64_comparison_operator"
-  [(match_operand 1 "cc_register")
-   (match_operand 2 "const0_operand")])
- (label_ref (match_operand 3 "" ""))
- (pc)))]
+  [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
+   [(match_operand 1 "cc_register")
+(match_operand 2 "const0_operand")])
+  (label_ref (match_operand 3))
+  (pc)))]
   ""
-  "")
+  ""
+)
 
 (define_insn "condjump"
   [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
-   [(match_operand 1 "cc_register" "") (const_int 0)])
-  (label_ref (match_operand 2 "" ""))
+   [(match_operand 1 "cc_register")
+(const_int 0)])
+  (label_ref (match_operand 2))
   (pc)))]
   ""
   {
 /* GCC's traditional style has been to use "beq" instead of "b.eq", etc.,
but the "." is required for SVE conditions.  */
 bool use_dot_p = GET_MODE (operands[1]) == CC_NZCmode;
 if (get_attr_length (insn) == 8)
   return aarch64_gen_far_branch (operands, 2, "Lbcond",
 use_dot_p ? "b.%M0\\t" : "b%M0\\t");
 else
   return use_dot_p ? "b.%m0\\t%l2" : "b%m0\\t%l2";
   }
   [(set_attr "type" "branch")
(set (attr "length")
(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
   (lt (minus (match_dup 2) (pc)) (const_int 1048572)))
  (const_int 4)
  (const_int 8)))
(set (attr "far_branch")
(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
   (lt (minus (match_dup 2) (pc)) (const_int 1048572)))
  (const_int 0)
  (const_int 1)))]
 )
 
 ;; For a 24-bit immediate CST we can optimize the compare for equality
 ;; and branch sequence from:
 ;; mov x0, #imm1
 ;; movkx0, #imm2, lsl 16 /* x0 contains CST.  */
 ;; cmp x1, x0
 ;; b .Label
 ;; into the shorter:
 ;; sub x0, x1, #(CST & 0xfff000)
 ;; subsx0, x0, #(CST & 0x000fff)
 ;; b .Label
 (define_insn_and_split "*compare_condjump"
-  [(set (pc) (if_then_else (EQL
- (match_operand:GPI 0 "register_operand" "r")
- (match_operand:GPI 1 "aarch64_imm24" "n"))
-  (label_ref:P (match_operand 2 "" ""))
+  [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
+

1 2 >

1 - 100 of 136 matches

Mail list logo