[PATCH] [MAINTAINERS] Update my email address

2024-07-01 Thread Claudiu Zissulescu
Update my email address.

ChangeLog:

* MAINTAINERS: Update claziss email address.

Signed-off-by: Claudiu Zissulescu 
---
 MAINTAINERS | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 41319595bb5..ddeea7b497f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -56,7 +56,7 @@ aarch64 port  Kyrylo Tkachov  

 alpha port Richard Henderson   
 amdgcn portJulian Brown
 amdgcn portAndrew Stubbs   
-arc port   Claudiu Zissulescu  
+arc port   Claudiu Zissulescu  
 arm port   Nick Clifton
 arm port   Richard Earnshaw
 arm port   Ramana Radhakrishnan
@@ -267,7 +267,6 @@ check in changes outside of the parts of the compiler they 
maintain.
 
Reviewers
 
-arc port   Claudiu Zissulescu  
 callgraph  Martin Jambor   
 C front endMarek Polacek   
 CTF, BTF   Indu Bhagat 
-- 
2.30.2



[PATCH] Add single-lane SLP support to .GOMP_SIMD_LANE vectorization

2024-07-01 Thread Richard Biener
The following adds support for single-lane SLP .GOMP_SIMD_LANE
vectorization.

This doesn't handle much, esp. g++.dg/vect/simd-*.cc with their
'inscan' uses are unhandled.

* tree-vect-slp.cc (no_arg_map): New.
(vect_get_operand_map): Handle IFN_GOMP_SIMD_LANE.
(vect_build_slp_tree_1): Likewise.
* tree-vect-stmts.cc (vectorizable_call): Handle single-lane SLP
for .GOMP_SIMD_LANE calls.
---
 gcc/tree-vect-slp.cc   | 11 +++
 gcc/tree-vect-stmts.cc | 27 +++
 2 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 3138a815da7..f3743997e9c 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -505,6 +505,7 @@ static const int cond_expr_maps[3][5] = {
   { 4, -2, -1, 1, 2 },
   { 4, -1, -2, 2, 1 }
 };
+static const int no_arg_map[] = { 0 };
 static const int arg0_map[] = { 1, 0 };
 static const int arg1_map[] = { 1, 1 };
 static const int arg2_map[] = { 1, 2 };
@@ -585,6 +586,9 @@ vect_get_operand_map (const gimple *stmt, bool 
gather_scatter_p = false,
  case IFN_CTZ:
return arg0_map;
 
+ case IFN_GOMP_SIMD_LANE:
+   return no_arg_map;
+
  default:
break;
  }
@@ -1168,6 +1172,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
  ldst_p = true;
  rhs_code = CFN_MASK_STORE;
}
+ else if (cfn == CFN_GOMP_SIMD_LANE)
+   ;
  else if ((cfn != CFN_LAST
&& cfn != CFN_MASK_CALL
&& internal_fn_p (cfn)
@@ -1271,6 +1277,11 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
  need_same_oprnds = true;
  first_op1 = gimple_call_arg (call_stmt, 1);
}
+ else if (rhs_code == CFN_GOMP_SIMD_LANE)
+   {
+ need_same_oprnds = true;
+ first_op1 = gimple_call_arg (call_stmt, 1);
+   }
}
   else
{
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 840ff8a3406..270c5a5dd34 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -3341,7 +3341,7 @@ vectorizable_call (vec_info *vinfo,
   if (ifn == IFN_LAST && !fndecl)
 {
   if (cfn == CFN_GOMP_SIMD_LANE
- && !slp_node
+ && (!slp_node || SLP_TREE_LANES (slp_node) == 1)
  && loop_vinfo
  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
  && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
@@ -3487,18 +3487,15 @@ vectorizable_call (vec_info *vinfo,
  /* Build argument list for the vectorized call.  */
  if (slp_node)
{
- vec vec_oprnds0;
-
+ unsigned int vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
  vect_get_slp_defs (vinfo, slp_node, &vec_defs);
- vec_oprnds0 = vec_defs[0];
 
  /* Arguments are ready.  Create the new vector stmt.  */
- FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
+ for (i = 0; i < vec_num; ++i)
{
  int varg = 0;
  if (masked_loop_p && reduc_idx >= 0)
{
- unsigned int vec_num = vec_oprnds0.length ();
  /* Always true for SLP.  */
  gcc_assert (ncopies == 1);
  vargs[varg++] = vect_get_loop_mask (loop_vinfo,
@@ -3539,11 +3536,26 @@ vectorizable_call (vec_info *vinfo,
  vect_finish_stmt_generation (vinfo, stmt_info,
   new_stmt, gsi);
}
+ else if (cfn == CFN_GOMP_SIMD_LANE)
+   {
+ /* ???  For multi-lane SLP we'd need to build
+{ 0, 0, .., 1, 1, ... }.  */
+ tree cst = build_index_vector (vectype_out,
+i * nunits_out, 1);
+ tree new_var
+   = vect_get_new_ssa_name (vectype_out, vect_simple_var,
+"cst_");
+ gimple *init_stmt = gimple_build_assign (new_var, cst);
+ vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
+ new_temp = make_ssa_name (vec_dest);
+ new_stmt = gimple_build_assign (new_temp, new_var);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
+  gsi);
+   }
  else
{
  if (len_opno >= 0 && len_loop_p)
{
- unsigned int vec_num = vec_oprnds0.length ();
  /* Always true for SLP.  */
  gcc_assert (ncopies == 1);
  tree len
@@ -3557,7 +3569,6 @@ vect

[PATCH] tree-optimization/115723 - ICE with .COND_ADD reduction

2024-07-01 Thread Richard Biener
The following fixes an ICE with a .COND_ADD discovered as reduction
even though its else value isn't the reduction chain link but a
constant.  This would be wrong-code with --disable-checking I think.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

PR tree-optimization/115723
* tree-vect-loop.cc (check_reduction_path): For a .COND_ADD
verify the else value also refers to the reduction chain op.

* gcc.dg/vect/pr115723.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/pr115723.c | 25 +
 gcc/tree-vect-loop.cc| 12 
 2 files changed, 33 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr115723.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr115723.c 
b/gcc/testsuite/gcc.dg/vect/pr115723.c
new file mode 100644
index 000..b98b29d4870
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr115723.c
@@ -0,0 +1,25 @@
+/* { dg-additional-options "-ffast-math -fno-unsafe-math-optimizations" } */
+
+#include "tree-vect.h"
+
+double __attribute__((noipa))
+foo (double *x, double *y, int n)
+{
+  double res = 0.;
+  for (int i = 0; i < n; ++i)
+if (y[i] > 0.)
+  res += x[i];
+else
+  res = 64.;
+  return res;
+}
+
+double y[16] = { 1., 1., 1., 1., 0., 1., 1., 1.,
+ 1., 1., 1., 1., 1., 1., 1., 1. };
+int main ()
+{
+  check_vect ();
+  if (foo (y, y, 16) != 64. + 11.)
+abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 3095ff5ab6b..a64b5082bd1 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -4163,15 +4163,19 @@ pop:
 
   FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op.ops[opi])
{
-   /* In case of a COND_OP (mask, op1, op2, op1) reduction we might have
-  op1 twice (once as definition, once as else) in the same operation.
-  Allow this.  */
+ /* In case of a COND_OP (mask, op1, op2, op1) reduction we should
+have op1 twice (once as definition, once as else) in the same
+operation.  Enforce this.  */
  if (cond_fn_p && op_use_stmt == use_stmt)
{
  gcall *call = as_a (use_stmt);
  unsigned else_pos
= internal_fn_else_index (internal_fn (op.code));
-
+ if (gimple_call_arg (call, else_pos) != op.ops[opi])
+   {
+ fail = true;
+ break;
+   }
  for (unsigned int j = 0; j < gimple_call_num_args (call); ++j)
{
  if (j == else_pos)
-- 
2.43.0


[PATCH] i386: Support APX NF and NDD for imul/mul

2024-07-01 Thread kong lingling
Add some missing APX NF and NDD support for imul and mul.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.

Ok for trunk?

gcc/ChangeLog:

* config/i386/i386.md (*imulhizu): Added APX
NF support.
(*imulhizu): New define_insn.
(*mulsi3_1_zext): Ditto.
(*mul3_1): Ditto.
(*mulqihi3_1): Ditto.
(*mul3_1): Added APX NDD support.
(*mulv4): Ditto.
(*mulvhi4): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/apx-ndd.c: Add test for imul ndd.
---
 gcc/config/i386/i386.md | 98 +
 gcc/testsuite/gcc.target/i386/apx-ndd.c |  8 ++
 2 files changed, 61 insertions(+), 45 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index fd48e764469..c1f29fee412 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6488,8 +6488,8 @@
 (define_subst_attr "nf_nonf_x64_attr" "nf_subst" "noapx_nf" "x64")

 (define_subst "nf_subst"
-  [(set (match_operand:SWI 0)
-   (match_operand:SWI 1))]
+  [(set (match_operand:SWIDWI 0)
+   (match_operand:SWIDWI 1))]
   ""
   [(set (match_dup 0)
(match_dup 1))
@@ -10028,24 +10028,26 @@
 ;; On BDVER1, all HI MULs use DoublePath

 (define_insn "*mul3_1"
-  [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r")
+  [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r,r")
(mult:SWIM248
- (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0")
- (match_operand:SWIM248 2 "" "K,,r")))]
+ (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0,r")
+ (match_operand:SWIM248 2 "" "K,,r,r")))]
   "!(MEM_P (operands[1]) && MEM_P (operands[2]))
&& "
   "@
imul{}\t{%2, %1, %0|%0, %1, %2}
imul{}\t{%2, %1, %0|%0, %1, %2}
-   imul{}\t{%2, %0|%0, %2}"
+   imul{}\t{%2, %0|%0, %2}
+   imul{}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "imul")
-   (set_attr "prefix_0f" "0,0,1")
+   (set_attr "prefix_0f" "0,0,1,1")
+   (set_attr "isa" "*,*,*,apx_ndd")
(set (attr "athlon_decode")
(cond [(eq_attr "cpu" "athlon")
  (const_string "vector")
   (eq_attr "alternative" "1")
  (const_string "vector")
-  (and (eq_attr "alternative" "2")
+  (and (eq_attr "alternative" "2,3")
(ior (match_test "mode == HImode")
 (match_operand 1 "memory_operand")))
  (const_string "vector")]
@@ -10063,33 +10065,34 @@
(const_string "direct")))
(set_attr "mode" "")])

-(define_insn "*imulhizu"
+(define_insn "*imulhizu"
   [(set (match_operand:SWI48x 0 "register_operand" "=r,r")
(zero_extend:SWI48x
  (mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm")
-  (match_operand:HI 2 "immediate_operand" "K,n"
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_APX_ZU"
+  (match_operand:HI 2 "immediate_operand" "K,n"]
+  "TARGET_APX_ZU && "
   "@
-   imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}
-   imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}"
+   imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}
+   imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}"
   [(set_attr "type" "imul")
(set_attr "mode" "HI")])

-(define_insn "*mulsi3_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+(define_insn "*mulsi3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
(zero_extend:DI
- (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
-  (match_operand:SI 2 "x86_64_general_operand"
"K,e,BMr"
-   (clobber (reg:CC FLAGS_REG))]
+ (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0,r")
+  (match_operand:SI 2 "x86_64_general_operand"
"K,e,BMr,BMr"]
   "TARGET_64BIT
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
+   && "
   "@
-   imul{l}\t{%2, %1, %k0|%k0, %1, %2}
-   imul{l}\t{%2, %1, %k0|%k0, %1, %2}
-   imul{l}\t{%2, %k0|%k0, %2}"
+   imul{l}\t{%2, %1, %k0|%k0, %1, %2}
+   imul{l}\t{%2, %1, %k0|%k0, %1, %2}
+   imul{l}\t{%2, %k0|%k0, %2}
+   imul{l}\t{%2, %1, %k0|%k0, %1, %2}"
   [(set_attr "type" "imul")
-   (set_attr "prefix_0f" "0,0,1")
+   (set_attr "prefix_0f" "0,0,1,1")
+   (set_attr "isa" "*,*,*,apx_ndd")
(set (attr "athlon_decode")
(cond [(eq_attr "cpu" "athlon")
  (const_string "vector")
@@ -10158,30 +10161,32 @@
   [(set (reg:CCO FLAGS_REG)
(eq:CCO (mult:
   (sign_extend:
- (match_operand:SWI48 1 "nonimmediate_operand"
"%rm,0"))
+ (match_operand:SWI48 1 "nonimmediate_operand"
"%rm,0,r"))
   (sign_extend:
- (match_operand:SWI48 2 "x86_64_sext_operand"
"We,mr")))
+ (match_operand:SWI48 2 "x86_64_sext_operand"
"We,mr,mr")))
(sign_extend:
   (mult:SWI48 (match_dup 1) (match_dup 2)
-   (set (match

[PATCH] libffi: Fix 32-bit SPARC structure passing [PR115681]

2024-07-01 Thread Rainer Orth
The libffi.closures/single_entry_structs2.c test FAILs on 32-bit SPARC:

FAIL: libffi.closures/single_entry_structs2.c -W -Wall -Wno-psabi -O0 execution 
test

The issue has been reported, analyzed and fixed upstream:

Several tests FAIL on 32-bit Solaris/SPARC
https://github.com/libffi/libffi/issues/841

Therefore this patch imports the fix into the GCC tree.

Tested on sparc-sun-solaris2.11.  Ok for trunk?

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


2024-07-01  Rainer Orth  

libffi:
PR libffi/115681
* src/sparc/ffi.c (ffi_call_int): Copy structure arguments to
maintain call-by-value semantics.

# HG changeset patch
# Parent  31401e58fa469c62c2278e38d35817fab3407e97
libffi: Fix 32-bit SPARC structure passing

diff --git a/libffi/src/sparc/ffi.c b/libffi/src/sparc/ffi.c
--- a/libffi/src/sparc/ffi.c
+++ b/libffi/src/sparc/ffi.c
@@ -286,6 +286,8 @@ ffi_call_int (ffi_cif *cif, void (*fn)(v
 	  void **avalue, void *closure)
 {
   size_t bytes = cif->bytes;
+  size_t i, nargs = cif->nargs;
+  ffi_type **arg_types = cif->arg_types;
 
   FFI_ASSERT (cif->abi == FFI_V8);
 
@@ -295,6 +297,20 @@ ffi_call_int (ffi_cif *cif, void (*fn)(v
   && (cif->flags & SPARC_FLAG_RET_MASK) == SPARC_RET_STRUCT)
 bytes += FFI_ALIGN (cif->rtype->size, 8);
 
+  /* If we have any structure arguments, make a copy so we are passing
+ by value.  */
+  for (i = 0; i < nargs; i++)
+{
+  ffi_type *at = arg_types[i];
+  int size = at->size;
+  if (at->type == FFI_TYPE_STRUCT)
+{
+  char *argcopy = alloca (size);
+  memcpy (argcopy, avalue[i], size);
+  avalue[i] = argcopy;
+}
+}
+
   ffi_call_v8(cif, fn, rvalue, avalue, -bytes, closure);
 }
 


Re: [PATCH] libffi: Fix 32-bit SPARC structure passing [PR115681]

2024-07-01 Thread Eric Botcazou
> The issue has been reported, analyzed and fixed upstream:
> 
>   Several tests FAIL on 32-bit Solaris/SPARC
>   https://github.com/libffi/libffi/issues/841
> 
> Therefore this patch imports the fix into the GCC tree.
> 
> Tested on sparc-sun-solaris2.11.  Ok for trunk?

Sure, thanks!

-- 
Eric Botcazou




[PATCH v1] RISC-V: Implement the .SAT_TRUNC for scalar

2024-07-01 Thread pan2 . li
From: Pan Li 

This patch would like to implement the simple .SAT_TRUNC pattern
in the riscv backend. Aka:

Form 1:
  #define DEF_SAT_U_TRUC_FMT_1(NT, WT) \
  NT __attribute__((noinline)) \
  sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
  {\
bool overflow = x > (WT)(NT)(-1);  \
return ((NT)x) | (NT)-overflow;\
  }

DEF_SAT_U_TRUC_FMT_1(uint32_t, uint64_t)

Before this patch:
__attribute__((noinline))
uint8_t sat_u_truc_uint16_t_to_uint8_t_fmt_1 (uint16_t x)
{
  _Bool overflow;
  unsigned char _1;
  unsigned char _2;
  unsigned char _3;
  uint8_t _6;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  overflow_5 = x_4(D) > 255;
  _1 = (unsigned char) x_4(D);
  _2 = (unsigned char) overflow_5;
  _3 = -_2;
  _6 = _1 | _3;
  return _6;
;;succ:   EXIT

}

After this patch:
__attribute__((noinline))
uint8_t sat_u_truc_uint16_t_to_uint8_t_fmt_1 (uint16_t x)
{
  uint8_t _6;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _6 = .SAT_TRUNC (x_4(D)); [tail call]
  return _6;
;;succ:   EXIT

}

The below tests suites are passed for this patch
1. The rv64gcv fully regression test.
2. The rv64gcv build with glibc

gcc/ChangeLog:

* config/riscv/iterators.md (TARGET_64BIT): Add new iterator
and related attr(s).
* config/riscv/riscv-protos.h (riscv_expand_ustrunc): Add new
func decl for expanding ustrunc
* config/riscv/riscv.cc (riscv_expand_ustrunc): Add new func
impl to expand ustrunc.
* config/riscv/riscv.md (ustrunc2): Add
new pattern ustrunc2.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macro.
* gcc.target/riscv/sat_arith_data.h: New test.
* gcc.target/riscv/sat_u_trunc-1.c: New test.
* gcc.target/riscv/sat_u_trunc-2.c: New test.
* gcc.target/riscv/sat_u_trunc-3.c: New test.
* gcc.target/riscv/sat_u_trunc-run-1.c: New test.
* gcc.target/riscv/sat_u_trunc-run-2.c: New test.
* gcc.target/riscv/sat_u_trunc-run-3.c: New test.
* gcc.target/riscv/scalar_sat_unary.h: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/iterators.md | 10 
 gcc/config/riscv/riscv-protos.h   |  1 +
 gcc/config/riscv/riscv.cc | 40 +
 gcc/config/riscv/riscv.md | 10 
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 16 ++
 .../gcc.target/riscv/sat_arith_data.h | 56 +++
 .../gcc.target/riscv/sat_u_trunc-1.c  | 17 ++
 .../gcc.target/riscv/sat_u_trunc-2.c  | 20 +++
 .../gcc.target/riscv/sat_u_trunc-3.c  | 19 +++
 .../gcc.target/riscv/sat_u_trunc-run-1.c  | 16 ++
 .../gcc.target/riscv/sat_u_trunc-run-2.c  | 16 ++
 .../gcc.target/riscv/sat_u_trunc-run-3.c  | 16 ++
 .../gcc.target/riscv/scalar_sat_unary.h   | 22 
 13 files changed, 259 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_arith_data.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/scalar_sat_unary.h

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 20745faa55e..5e2216fdafb 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -65,6 +65,16 @@ (define_mode_iterator SUBX [QI HI (SI "TARGET_64BIT")])
 ;; Iterator for hardware-supported integer modes.
 (define_mode_iterator ANYI [QI HI SI (DI "TARGET_64BIT")])
 
+(define_mode_iterator ANYI_NARROW [HI SI (DI "TARGET_64BIT")])
+
+(define_mode_attr ANYI_NARROWED [
+  (HI "QI") (SI "HI") (DI "SI")
+])
+
+(define_mode_attr anyi_narrowed [
+  (HI "qi") (SI "hi") (DI "si")
+])
+
 ;; Iterator for hardware-supported floating-point modes.
 (define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT || TARGET_ZFINX")
(DF "TARGET_DOUBLE_FLOAT || TARGET_ZDINX")
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index a8b76173fa0..61a22a187df 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -135,6 +135,7 @@ riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int);
 extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
 extern void riscv_expand_usadd (rtx, rtx, rtx);
 extern void riscv_expand_ussub (rtx, rtx, rtx);
+extern void riscv_expand_ustrunc (rtx, rtx);
 
 #ifdef RTX_CODE
 extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool 
*invert_ptr = 0);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/

[PATCH 2/3] s390: Enable vcond_mask for 128-bit ops

2024-07-01 Thread Stefan Schulze Frielinghaus
In preparation of dropping vcond{,u,eq} optabs
https://gcc.gnu.org/pipermail/gcc-patches/2024-June/654690.html
enable 128-bit operands for vcond_mask---including integer as well as
floating point.

This fixes partially PR115519 w.r.t. autovec-long-double-signaling-*.c
tests.

gcc/ChangeLog:

* config/s390/vector.md: Enable vcond_mask for 128-bit ops.
---
 Bootstrapped and regtested on s390.  Ok for mainline?

 gcc/config/s390/vector.md | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 032ec44542c..0e57dd1650c 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -756,12 +756,12 @@
 })
 
 (define_expand "vcond_mask_"
-  [(set (match_operand:V 0 "register_operand" "")
-   (if_then_else:V
+  [(set (match_operand:VT 0 "register_operand" "")
+   (if_then_else:VT
 (eq (match_operand: 3 "register_operand" "")
 (match_dup 4))
-(match_operand:V 2 "register_operand" "")
-(match_operand:V 1 "register_operand" "")))]
+(match_operand:VT 2 "register_operand" "")
+(match_operand:VT 1 "register_operand" "")))]
   "TARGET_VX"
   "operands[4] = CONST0_RTX (mode);")
 
-- 
2.45.2



[PATCH] doc: Document -fasm as the opposite of -fno-asm

2024-07-01 Thread Alejandro Colomar
gcc/ChangeLog:

* doc/invoke.texi: Document -fasm.

Signed-off-by: Alejandro Colomar 
---
 gcc/doc/invoke.texi | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 30c4b002d1f..2d55f2715b3 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -198,7 +198,7 @@ in the following sections.
 @item C Language Options
 @xref{C Dialect Options,,Options Controlling C Dialect}.
 @gccoptlist{-ansi  -std=@var{standard}  -aux-info @var{filename}
--fno-asm
+-f@r{[}no-@r{]}asm
 -fno-builtin  -fno-builtin-@var{function}  -fcond-mismatch
 -ffreestanding  -fgimple  -fgnu-tm  -fgnu89-inline  -fhosted
 -flax-vector-conversions  -fms-extensions
@@ -2600,8 +2600,8 @@ comments, after the declaration.
 
 @opindex fno-asm
 @opindex fasm
-@item -fno-asm
-Do not recognize @code{asm}, @code{inline} or @code{typeof} as a
+@item -f@r{[}no-@r{]}asm
+Do (or do not) recognize @code{asm}, @code{inline} or @code{typeof} as a
 keyword, so that code can use these words as identifiers.  You can use
 the keywords @code{__asm__}, @code{__inline__} and @code{__typeof__}
 instead.  In C, @option{-ansi} implies @option{-fno-asm}.
-- 
2.45.2



signature.asc
Description: PGP signature


[PATCH] GCC: Enable very long gcc command line options - PR111527

2024-07-01 Thread Deepthi . Hemraj
From: Deepthi Hemraj 

For excessively long environment variables i.e >128KB
Store the arguments in a temporary file and collect them back together in 
collect2.

This commit patches for COLLECT_GCC_OPTIONS issue:
GCC should not limit the length of command line passed to collect2
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111527

The Linux kernel has the following limits on shell commands:
I.  Total number of bytes used to specify arguments must be under 128KB.
II. Each environment variable passed to an executable must be under 128 KiB

In order to circumvent these limitations, many build tools support
response-files, i.e. files that contain the arguments for the executed
command. These are typically passed using @ syntax.

1: In gcc, the command line arguments passed to subprocesses are
controlled by 'spec-files'. The default spec for cc1plus includes the
spec of CPP (C-preprocessor). Instead of passing the include directory as
path to a response file to the CPP, gcc expanded it and passed them as
a long string. The same problem was in the assembler (as).
https://gcc.gnu.org/onlinedocs/gcc/Spec-Files.html

2: Gcc uses the COLLECT_GCC_OPTIONS environment variable to transfer the
expanded command line to collect2. With many options, this exceeds the limit II.

GCC : Added new Testcase for PR111527

PR111527-2.c : If the command line argument less than 128kb, gcc should use
   COLLECT_GCC_OPTION to communicate and compile fine.
PR111527-3.c : If the command line argument in the range of 128kb to 2mb,
   gcc should copy arguments in a file and use FILE_GCC_OPTIONS
   to communicate and compile fine.
PR111527-4.c : If the command line argument greater than 2mb, gcc should
   fail the compile and report an error. (Expected FAIL)

Signed-off-by: Topi Kuutela 
Signed-off-by: Sunil Dora 
---
 gcc/collect2.cc   | 38 +--
 gcc/gcc.cc| 36 --
 gcc/testsuite/gcc.dg/longcmd/longcmd.exp  | 16 
 gcc/testsuite/gcc.dg/longcmd/pr111527-1.c | 46 +++
 gcc/testsuite/gcc.dg/longcmd/pr111527-2.c | 12 ++
 gcc/testsuite/gcc.dg/longcmd/pr111527-3.c | 13 +++
 gcc/testsuite/gcc.dg/longcmd/pr111527-4.c | 11 ++
 7 files changed, 166 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/longcmd/longcmd.exp
 create mode 100644 gcc/testsuite/gcc.dg/longcmd/pr111527-1.c
 create mode 100644 gcc/testsuite/gcc.dg/longcmd/pr111527-2.c
 create mode 100644 gcc/testsuite/gcc.dg/longcmd/pr111527-3.c
 create mode 100644 gcc/testsuite/gcc.dg/longcmd/pr111527-4.c

Signed-off-by: Deepthi Hemraj 
---
 gcc/collect2.cc   | 38 +--
 gcc/gcc.cc| 36 --
 gcc/testsuite/gcc.dg/longcmd/longcmd.exp  | 16 
 gcc/testsuite/gcc.dg/longcmd/pr111527-1.c | 46 +++
 gcc/testsuite/gcc.dg/longcmd/pr111527-2.c | 12 ++
 gcc/testsuite/gcc.dg/longcmd/pr111527-3.c | 13 +++
 gcc/testsuite/gcc.dg/longcmd/pr111527-4.c | 11 ++
 7 files changed, 166 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/longcmd/longcmd.exp
 create mode 100644 gcc/testsuite/gcc.dg/longcmd/pr111527-1.c
 create mode 100644 gcc/testsuite/gcc.dg/longcmd/pr111527-2.c
 create mode 100644 gcc/testsuite/gcc.dg/longcmd/pr111527-3.c
 create mode 100644 gcc/testsuite/gcc.dg/longcmd/pr111527-4.c

diff --git a/gcc/collect2.cc b/gcc/collect2.cc
index 902014a9cc1..c45a13075d5 100644
--- a/gcc/collect2.cc
+++ b/gcc/collect2.cc
@@ -375,6 +375,38 @@ typedef int scanfilter;
same format, and begins at __DTOR_LIST__.  */
 
 static void scan_prog_file (const char *, scanpass, scanfilter);
+char* getenv_extended(const char* var_name)
+{
+  int file_size;
+  char* buf = NULL;
+
+  char* string = getenv(var_name);
+  if (!string)
+{
+  char* string = getenv("FILE_GCC_OPTIONS");
+  FILE *fptr;
+  fptr = fopen(string, "r");
+  if (fptr == NULL)
+   return(0);
+  /* Copy contents from temporary file to buffer */
+  if (fseek(fptr, 0, SEEK_END) == -1)
+   return(0);
+  file_size = ftell(fptr);
+  rewind(fptr);
+  buf = (char *)xmalloc(file_size + 1);
+  if (buf == NULL)
+   return(0);
+  if (fread((void *) buf, file_size, 1, fptr) <= 0)
+   {
+ free(buf);
+ fatal_error (input_location, "fread failed");
+ return(0);
+   }
+  buf[file_size] = '\0';
+  return buf;
+}
+  return string;
+}
 
 
 /* Delete tempfiles and exit function.  */
@@ -1004,7 +1036,7 @@ main (int argc, char **argv)
 /* Now pick up any flags we want early from COLLECT_GCC_OPTIONS
The LTO options are passed here as are other options that might
be unsuitable for ld (e.g. -save-temps).  */
-p = getenv ("COLLECT_GCC_OPTIONS");
+p = getenv_extended ("COLLECT_GCC_OPTIONS");
 while (p && *p)
   {
 

[PING][PATCH] [alpha] adjust MEM alignment for block move [PR115459] (was: Re: [PATCH v2] [PR100106] Reject unaligned subregs when strict alignment is required)

2024-07-01 Thread Maciej W. Rozycki
On Thu, 13 Jun 2024, Alexandre Oliva wrote:

> Before issuing loads or stores for a block move, adjust the MEM
> alignments if analysis of the addresses enabled the inference of
> stricter alignment.  This ensures that the MEMs are sufficiently
> aligned for the corresponding insns, which avoids trouble in case of
> e.g. substitutions into SUBREGs.

 Ping for:

,
.

  Maciej


Re: [PATCH] doc: Document -fasm as the opposite of -fno-asm

2024-07-01 Thread Jakub Jelinek
On Mon, Jul 01, 2024 at 11:37:40AM +0200, Alejandro Colomar wrote:
> gcc/ChangeLog:
> 
>   * doc/invoke.texi: Document -fasm.

Why?  We have almost 1300 options which accept the negative forms
and we don't document any of them this way, the manual explicitly states
that:

Many options have long names starting with @samp{-f} or with
@samp{-W}---for example,
@option{-fmove-loop-invariants}, @option{-Wformat} and so on.  Most of
these have both positive and negative forms; the negative form of
@option{-ffoo} is @option{-fno-foo}.  This manual documents
only one of these two forms, whichever one is not the default.

> Signed-off-by: Alejandro Colomar 
> ---
>  gcc/doc/invoke.texi | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 30c4b002d1f..2d55f2715b3 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -198,7 +198,7 @@ in the following sections.
>  @item C Language Options
>  @xref{C Dialect Options,,Options Controlling C Dialect}.
>  @gccoptlist{-ansi  -std=@var{standard}  -aux-info @var{filename}
> --fno-asm
> +-f@r{[}no-@r{]}asm
>  -fno-builtin  -fno-builtin-@var{function}  -fcond-mismatch
>  -ffreestanding  -fgimple  -fgnu-tm  -fgnu89-inline  -fhosted
>  -flax-vector-conversions  -fms-extensions
> @@ -2600,8 +2600,8 @@ comments, after the declaration.
>  
>  @opindex fno-asm
>  @opindex fasm
> -@item -fno-asm
> -Do not recognize @code{asm}, @code{inline} or @code{typeof} as a
> +@item -f@r{[}no-@r{]}asm
> +Do (or do not) recognize @code{asm}, @code{inline} or @code{typeof} as a
>  keyword, so that code can use these words as identifiers.  You can use
>  the keywords @code{__asm__}, @code{__inline__} and @code{__typeof__}
>  instead.  In C, @option{-ansi} implies @option{-fno-asm}.
> -- 
> 2.45.2
> 



Jakub



[PING][PATCH 1/1] ada: Make the names of uninstalled cross-gnattools consistent across builds

2024-07-01 Thread Maciej W. Rozycki
On Tue, 18 Jun 2024, Maciej W. Rozycki wrote:

> Fix the problem by moving the renaming of gnattools to a separate 'make' 
> recipe, pasted into a new 'gnattools-cross-mv' target and the existing 
> legacy 'cross-gnattools' target.  Then invoke the new target explicitly 
> from the 'gnattools-cross' recipe in gnattools/.

 Ping for:
,
.

  Maciej


Re: [PATCH] doc: Document -fasm as the opposite of -fno-asm

2024-07-01 Thread Alejandro Colomar
On Mon, Jul 01, 2024 at 12:40:45PM GMT, Jakub Jelinek wrote:
> On Mon, Jul 01, 2024 at 11:37:40AM +0200, Alejandro Colomar wrote:
> > gcc/ChangeLog:
> > 
> > * doc/invoke.texi: Document -fasm.
> 
> Why?  We have almost 1300 options which accept the negative forms
> and we don't document any of them this way, the manual explicitly states
> that:
> 
> Many options have long names starting with @samp{-f} or with
> @samp{-W}---for example,
> @option{-fmove-loop-invariants}, @option{-Wformat} and so on.  Most of
> these have both positive and negative forms; the negative form of
> @option{-ffoo} is @option{-fno-foo}.  This manual documents
> only one of these two forms, whichever one is not the default.

Ahh; hadn't seen that.  Thanks!

> 
> > Signed-off-by: Alejandro Colomar 
> > ---
> >  gcc/doc/invoke.texi | 6 +++---
> >  1 file changed, 3 insertions(+), 3 deletions(-)
> > 
> > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> > index 30c4b002d1f..2d55f2715b3 100644
> > --- a/gcc/doc/invoke.texi
> > +++ b/gcc/doc/invoke.texi
> > @@ -198,7 +198,7 @@ in the following sections.
> >  @item C Language Options
> >  @xref{C Dialect Options,,Options Controlling C Dialect}.
> >  @gccoptlist{-ansi  -std=@var{standard}  -aux-info @var{filename}
> > --fno-asm
> > +-f@r{[}no-@r{]}asm
> >  -fno-builtin  -fno-builtin-@var{function}  -fcond-mismatch
> >  -ffreestanding  -fgimple  -fgnu-tm  -fgnu89-inline  -fhosted
> >  -flax-vector-conversions  -fms-extensions
> > @@ -2600,8 +2600,8 @@ comments, after the declaration.
> >  
> >  @opindex fno-asm
> >  @opindex fasm
> > -@item -fno-asm
> > -Do not recognize @code{asm}, @code{inline} or @code{typeof} as a
> > +@item -f@r{[}no-@r{]}asm
> > +Do (or do not) recognize @code{asm}, @code{inline} or @code{typeof} as a
> >  keyword, so that code can use these words as identifiers.  You can use
> >  the keywords @code{__asm__}, @code{__inline__} and @code{__typeof__}
> >  instead.  In C, @option{-ansi} implies @option{-fno-asm}.
> > -- 
> > 2.45.2
> > 
> 
> 
> 
>   Jakub
> 

-- 



signature.asc
Description: PGP signature


[testsuite,applied] ad testsuite/52641

2024-07-01 Thread Georg-Johann Lay

Applied some fixes / skips to test cases.

Johann

PR testsuite/52641
gcc/testsuite/
* gcc.dg/analyzer/pr109577.c: Use __SIZE_TYPE__ instead of 
"unsigned long".
* gcc.dg/analyzer/pr93032-mztools-signed-char.c: Requires 
int32plus.
* gcc.dg/analyzer/pr93032-mztools-unsigned-char.c: Requires 
int32plus.

* gcc.dg/analyzer/putenv-1.c: Skip on avr.
* gcc.dg/torture/type-generic-1.c: Skip on avr.


diff --git a/gcc/testsuite/gcc.dg/analyzer/pr109577.c 
b/gcc/testsuite/gcc.dg/analyzer/pr109577.c

index 74d1629f3c7..18417818b54 100644
--- a/gcc/testsuite/gcc.dg/analyzer/pr109577.c
+++ b/gcc/testsuite/gcc.dg/analyzer/pr109577.c
@@ -2,18 +2,18 @@
Therefore this test has been duplicated as
c-c++-common/analyzer/pr109577-noexcept.c  */

-void *malloc (unsigned long);
+void *malloc (__SIZE_TYPE__);

 double *
-unsafe (unsigned long n)
+unsafe (__SIZE_TYPE__ n)
 {
   return (double *) malloc (n * sizeof (double));
 }

 double *
-safer (unsigned long n)
+safer (__SIZE_TYPE__ n)
 {
-  unsigned long nbytes;
+  __SIZE_TYPE__ nbytes;
   if (__builtin_mul_overflow (n, sizeof (double), &nbytes))
 return 0;
   return (double *) malloc (nbytes); /* Exceptions enabled cause a 
leak here. */
diff --git a/gcc/testsuite/gcc.dg/analyzer/pr93032-mztools-signed-char.c 
b/gcc/testsuite/gcc.dg/analyzer/pr93032-mztools-signed-char.c

index 45599e228b8..7a7320fca2b 100644
--- a/gcc/testsuite/gcc.dg/analyzer/pr93032-mztools-signed-char.c
+++ b/gcc/testsuite/gcc.dg/analyzer/pr93032-mztools-signed-char.c
@@ -5,6 +5,7 @@

 /* { dg-do "compile" } */
 /* { dg-additional-options "-fsigned-char" } */
+/* { dg-require-effective-target int32plus } */

 /* TODO (PR analyzer/112528): remove need for this.  */
 /* { dg-additional-options "--param 
analyzer-max-enodes-per-program-point=40 --param 
analyzer-bb-explosion-factor=10" } */
diff --git 
a/gcc/testsuite/gcc.dg/analyzer/pr93032-mztools-unsigned-char.c 
b/gcc/testsuite/gcc.dg/analyzer/pr93032-mztools-unsigned-char.c

index a59fc49c2b3..10832757127 100644
--- a/gcc/testsuite/gcc.dg/analyzer/pr93032-mztools-unsigned-char.c
+++ b/gcc/testsuite/gcc.dg/analyzer/pr93032-mztools-unsigned-char.c
@@ -5,6 +5,7 @@

 /* { dg-do "compile" } */
 /* { dg-additional-options "-funsigned-char" } */
+/* { dg-require-effective-target int32plus } */

 /* TODO (PR analyzer/112528): remove need for this.  */
 /* { dg-additional-options "--param 
analyzer-max-enodes-per-program-point=40 --param 
analyzer-bb-explosion-factor=10" } */
diff --git a/gcc/testsuite/gcc.dg/analyzer/putenv-1.c 
b/gcc/testsuite/gcc.dg/analyzer/putenv-1.c

index 5c4e08c68df..2be52f05033 100644
--- a/gcc/testsuite/gcc.dg/analyzer/putenv-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/putenv-1.c
@@ -1,5 +1,6 @@
 /* { dg-additional-options "-Wno-analyzer-null-argument" } */
 /* { dg-require-effective-target alloca } */
+/* { dg-skip-if "has no putenv" { "avr-*-*" } } */

 #include 
 #include 
diff --git a/gcc/testsuite/gcc.dg/torture/type-generic-1.c 
b/gcc/testsuite/gcc.dg/torture/type-generic-1.c

index b2aacd933f8..1dd0534a5fb 100644
--- a/gcc/testsuite/gcc.dg/torture/type-generic-1.c
+++ b/gcc/testsuite/gcc.dg/torture/type-generic-1.c
@@ -4,6 +4,7 @@
 /* { dg-do run } */
 /* { dg-require-effective-target inf } */
 /* { dg-skip-if "No subnormal support" { csky-*-* } { "-mhard-float" } 
} */

+/* { dg-skip-if "Not fully IEEE" { "avr-*-*" } } */
 /* { dg-options "-DUNSAFE" { target tic6x*-*-* visium-*-* nvptx-*-* } } */
 /* { dg-add-options ieee } */



[patch,avr,applied] PR88236, PR115726: Fix __memx code in the presence of hard regs.

2024-07-01 Thread Georg-Johann Lay

Applies this patch to fix code when the destination
register overlaps with a hard register used by insn
xload_A resp. xload8qi_A (PR115726).

Also fixed PR88236 in one go because that PR is very
similar in its outcome, and it's not possible to discriminate
in a test case which is which, resp. when only one PR is fixed,
test case will still fail.

There are still tests that fail due to ICE

FAIL: gcc.target/avr/torture/pr88236-pr115726.c   -O3 -g  (internal 
compiler error: in add_dwarf_attr, at dwarf2out.cc:4515)


but that's a long standing bug not related to the PRs addressed
by this patch.

Johann

--

AVR: target/88236, target/115726 - Fix __memx code generation.

PR target/88236
PR target/115726
gcc/
* config/avr/avr.md (mov) [avr_mem_memx_p]: Expand in such a
way that the destination does not overlap with any hard register
clobbered / used by xload8qi_A resp. xload_A.
* config/avr/avr.cc (avr_out_xload): Avoid early-clobber
situation for Z by executing just one load when the output register
overlaps with Z.
gcc/testsuite/
* gcc.target/avr/torture/pr88236-pr115726.c: New test.
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 61c325f2497..a110af62cd5 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -4686,7 +4686,13 @@ avr_out_xload (rtx_insn * /*insn*/, rtx *op, int *plen)
   xop[2] = lpm_addr_reg_rtx;
   xop[3] = AVR_HAVE_LPMX ? op[0] : lpm_reg_rtx;
 
-  avr_asm_len (AVR_HAVE_LPMX ? "lpm %3,%a2" : "lpm", xop, plen, -1);
+  if (plen)
+*plen = 0;
+
+  if (reg_overlap_mentioned_p (xop[3], lpm_addr_reg_rtx))
+avr_asm_len ("sbrs %1,7", xop, plen, 1);
+
+  avr_asm_len (AVR_HAVE_LPMX ? "lpm %3,%a2" : "lpm", xop, plen, 1);
 
   avr_asm_len ("sbrc %1,7" CR_TAB
 	   "ld %3,%a2", xop, plen, 2);
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 75d35d5e14b..dabf4c0fc5a 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -735,12 +735,26 @@ (define_expand "mov"
 if (!REG_P (addr))
   src = replace_equiv_address (src, copy_to_mode_reg (PSImode, addr));
 
+rtx dest2 = reg_overlap_mentioned_p (dest, lpm_addr_reg_rtx)
+  ? gen_reg_rtx (mode)
+  : dest;
+
 if (!avr_xload_libgcc_p (mode))
   // No  here because gen_xload8_A only iterates over ALL1.
   // insn-emit does not depend on the mode, it's all about operands.
-  emit_insn (gen_xload8qi_A (dest, src));
+  emit_insn (gen_xload8qi_A (dest2, src));
 else
-  emit_insn (gen_xload_A (dest, src));
+  {
+rtx reg_22 = gen_rtx_REG (mode, REG_22);
+if (reg_overlap_mentioned_p (dest2, reg_22)
+|| reg_overlap_mentioned_p (dest2, all_regs_rtx[REG_21]))
+  dest2 = gen_reg_rtx (mode);
+
+emit_insn (gen_xload_A (dest2, src));
+  }
+
+if (dest2 != dest)
+  emit_move_insn (dest, dest2);
 
 DONE;
   }
diff --git a/gcc/testsuite/gcc.target/avr/torture/pr88236-pr115726.c b/gcc/testsuite/gcc.target/avr/torture/pr88236-pr115726.c
new file mode 100644
index 000..9fd5fd3b5f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/pr88236-pr115726.c
@@ -0,0 +1,115 @@
+/* { dg-do run { target { ! avr_tiny } } } */
+/* { dg-additional-options "-std=gnu99" } */
+
+const __flash char fvals8[] = { 1, 2, 3 };
+char rvals8[] = { 0, 2, 4 };
+
+const __flash int fvals16[] = { 1, 2, 3 };
+int rvals16[] = { 0, 2, 4 };
+
+__attribute__((noinline, noclone))
+char xload8_r30 (const __memx char *pc)
+{
+  register char c __asm ("r30");
+  c = *pc;
+  __asm (";;" : "+r" (c));
+  return c;
+}
+
+__attribute__((noinline, noclone))
+int xload16_r30 (const __memx int *pc)
+{
+  register int c __asm ("r30");
+  c = *pc;
+  __asm (";;" : "+r" (c));
+  return c;
+}
+
+__attribute__((noinline, noclone))
+char xload8_r22 (const __memx char *pc)
+{
+  register char c __asm ("r22");
+  c = *pc;
+  __asm (";;" : "+r" (c));
+  return c;
+}
+
+__attribute__((noinline, noclone))
+int xload16_r22 (const __memx int *pc)
+{
+  register int c __asm ("r22");
+  c = *pc;
+  __asm (";;" : "+r" (c));
+  return c;
+}
+
+__attribute__((noinline, noclone))
+int xload16_r20 (const __memx int *pc)
+{
+  register int c __asm ("r20");
+  c = *pc;
+  __asm (";;" : "+r" (c));
+  return c;
+}
+
+void test8 (void)
+{
+  char c;
+  for (int i = 0; i < 3; ++i)
+{
+  c = xload8_r30 (fvals8 + i);
+  if (c != 1 + i)
+	__builtin_exit (__LINE__);
+
+  c = xload8_r22 (fvals8 + i);
+  if (c != 1 + i)
+	__builtin_exit (__LINE__);
+
+  c = xload8_r30 (rvals8 + i);
+  if (c != 2 * i)
+	__builtin_exit (__LINE__);
+
+  c = xload8_r22 (rvals8 + i);
+  if (c != 2 * i)
+	__builtin_exit (__LINE__);
+}
+}
+
+void test16 (void)
+{
+  int c;
+  for (int i = 0; i < 3; ++i)
+{
+  c = xload16_r30 (fvals16 + i);
+  if (c != 1 + i)
+	__builtin_exit (__LINE__);

Re: nvptx vs. [PATCH] Add a late-combine pass [PR106594]

2024-07-01 Thread Thomas Schwinge
Hi Richard!

On 2024-06-28T17:48:30+0100, Richard Sandiford  
wrote:
> Richard Sandiford  writes:
>> Thomas Schwinge  writes:
>>> On 2024-06-27T23:20:18+0200, I wrote:
 On 2024-06-27T22:27:21+0200, I wrote:
> On 2024-06-27T18:49:17+0200, I wrote:
>> On 2023-10-24T19:49:10+0100, Richard Sandiford 
>>  wrote:
>>> This patch adds a combine pass that runs late in the pipeline.
>
> [After sending, I realized I replied to a previous thread of this work.]
>
>> I've beek looking a bit through recent nvptx target code generation
>> changes for GCC target libraries, and thought I'd also share here my
>> findings for the "late-combine" changes in isolation, for nvptx target.
>> 
>> First the unexpected thing:
>
> So much for "unexpected thing" -- next level of unexpected here...
> Appreciated if anyone feels like helping me find my way through this, but
> I totally understand if you've got other things to do.

 OK, I found something already.  (Unexpectedly quickly...)  ;-)

>> there are a few cases where we now see unused
>> registers get declared
>>>
 But in fact, for both cases
>>>
>>> Now tested: 's%both%all'.  :-)
>>>
 the unexpected difference goes away if after
 'pass_late_combine' I inject a 'pass_fast_rtl_dce'.  That's normally run
 as part of 'PUSH_INSERT_PASSES_WITHIN (pass_postreload)' -- but that's
 all not active for nvptx target given '!reload_completed', given nvptx is
 'targetm.no_register_allocation'.  Maybe we need to enable a few more
 passes, or is there anything in 'pass_late_combine' to change, so that we
 don't run into this?  Does it inadvertently mark registers live or
 something like that?
>>>
>>> Basically, is 'pass_late_combine' potentionally doing things that depend
>>> on later clean-up?  (..., or shouldn't it be doing these things in the
>>> first place?)
>>
>> It's possible that late-combine could expose dead code, but I imagine
>> it's a niche case.
>>
>> I had a look at the nvptx logs from my comparison, and the cases in
>> which I saw this seemed to be those where late-combine doesn't find
>> anything to do.  Does that match your examples?  Specifically,
>> the effect should be the same with -fdbg-cnt=late_combine:0-0
>>
>> I think what's happening is that:
>>
>> - combine exposes dead code
>>
>> - ce2 previously ran df_analyze with DF_LR_RUN_DCE set, and so cleared
>>   up the dead code
>>
>> - late-combine instead runs df_analyze without that flag (since late-combine
>>   itself doesn't really care whether dead code is present)
>>
>> - if late-combine doesn't do anything, ce2's df_analyze call has nothing
>>   to do, and skips even the DCE
>>
>> The easiest fix would be to add:
>>
>>   df_set_flags (DF_LR_RUN_DCE);
>>
>> before df_analyze in late-combine.cc, so that it behaves like ce2.
>> But the arrangement feels wrong.  I would have expected DF_LR_RUN_DCE
>> to depend on whether df_analyze had been called since the last DCE pass
>> (whether DF_LR_RUN_DCE or a full DCE).
>
> I'm testing the attached patch to do that.  I'll submit it properly if
> testing passes, but it seems to fix the extra-register problem for me.

> Give fast DCE a separate dirty flag

Thanks, and yes, your analysis makes sense to me (to the extent that I
only superficially understand these parts of GCC) -- and I confirm that
your proposed change to "Give fast DCE a separate dirty flag" does
address the issue for nvptx target.


Grüße
 Thomas


> Thomas pointed out that we sometimes failed to eliminate some dead code
> (specifically clobbers of otherwise unused registers) on nvptx when
> late-combine is enabled.  This happens because:
>
> - combine is able to optimise the function in a way that exposes dead code.
>   This leaves the df information in a "dirty" state.
>
> - late_combine calls df_analyze without DF_LR_RUN_DCE run set.
>   This updates the df information and clears the "dirty" state.
>
> - late_combine doesn't find any extra optimisations, and so leaves
>   the df information up-to-date.
>
> - if_after_combine (ce2) calls df_analyze with DF_LR_RUN_DCE set.
>   Because the df information is already up-to-date, fast DCE is
>   not run.
>
> The upshot is that running late-combine has the effect of suppressing
> a DCE opportunity that would have been noticed without late_combine.
>
> I think this shows that we should track the state of the DCE separately
> from the LR problem.  Every pass updates the latter, but not all passes
> update the former.
>
> gcc/
>   * df.h (DF_LR_DCE): New df_problem_id.
>   (df_lr_dce): New macro.
>   * df-core.cc (rest_of_handle_df_finish): Check for a null free_fun.
>   * df-problems.cc (df_lr_finalize): Split out fast DCE handling to...
>   (df_lr_dce_finalize): ...this new function.
>   (problem_LR_DCE): New df_problem.
>   (df_lr_add_problem): Register LR_DCE rather than LR itself.
>   * dce.cc (fast_dce): Clear 

WIP Move 'pass_fast_rtl_dce' from 'pass_postreload' into 'pass_late_compilation' (was: nvptx vs. [PATCH] Add a late-combine pass [PR106594])

2024-07-01 Thread Thomas Schwinge
Hi!

On 2024-06-28T00:41:54+0200, I wrote:
> On 2024-06-27T23:20:18+0200, I wrote:
>> On 2024-06-27T22:27:21+0200, I wrote:
>>> On 2024-06-27T18:49:17+0200, I wrote:
 On 2023-10-24T19:49:10+0100, Richard Sandiford  
 wrote:
> This patch adds a combine pass that runs late in the pipeline.
>>>
>>> [After sending, I realized I replied to a previous thread of this work.]
>>>
 I've beek looking a bit through recent nvptx target code generation
 changes for GCC target libraries, and thought I'd also share here my
 findings for the "late-combine" changes in isolation, for nvptx target.
 
 First the unexpected thing:
>>>
>>> So much for "unexpected thing" -- next level of unexpected here...
>>> Appreciated if anyone feels like helping me find my way through this, but
>>> I totally understand if you've got other things to do.
>>
>> OK, I found something already.  (Unexpectedly quickly...)  ;-)
>>
 there are a few cases where we now see unused
 registers get declared
>
>> But in fact, for both cases
>
> Now tested: 's%both%all'.  :-)
>
>> the unexpected difference goes away if after
>> 'pass_late_combine' I inject a 'pass_fast_rtl_dce'.

The following will be unnecessary assuming that Richard's proposed
"Give fast DCE a separate dirty flag" gets accepted, but may still be
useful if we follow through with the idea to enable (parts of)
'pass_postreload' for nvptx (as discussing with Roger), so, for later:

>> The following makes these two cases work, but evidently needs a lot more
>> analysis: a lot of other passes are enabled that may be anything between
>> beneficial and harmful for 'targetm.no_register_allocation'/nvptx.
>>
>> --- gcc/passes.cc
>> +++ gcc/passes.cc
>> @@ -676,17 +676,17 @@ const pass_data pass_data_postreload =
>>  class pass_postreload : public rtl_opt_pass
>>  {
>>  public:
>>pass_postreload (gcc::context *ctxt)
>>  : rtl_opt_pass (pass_data_postreload, ctxt)
>>{}
>>  
>>/* opt_pass methods: */
>> -  bool gate (function *) final override { return reload_completed; }
>> +  bool gate (function *) final override { return reload_completed || 
>> targetm.no_register_allocation; }
>> --- gcc/regcprop.cc
>> +++ gcc/regcprop.cc
>> @@ -1305,17 +1305,17 @@ class pass_cprop_hardreg : public rtl_opt_pass
>>  public:
>>pass_cprop_hardreg (gcc::context *ctxt)
>>  : rtl_opt_pass (pass_data_cprop_hardreg, ctxt)
>>{}
>>  
>>/* opt_pass methods: */
>>bool gate (function *) final override
>>  {
>> -  return (optimize > 0 && (flag_cprop_registers));
>> +  return (optimize > 0 && flag_cprop_registers && 
>> !targetm.no_register_allocation);
>>  }
>
> Also, that quickly ICEs; more '[...] && !targetm.no_register_allocation'
> are needed elsewhere, at least.
>
> The following simpler thing, however, does work; move 'pass_fast_rtl_dce'
> out of 'pass_postreload':
>
> --- gcc/passes.cc
> +++ gcc/passes.cc
> @@ -677,14 +677,15 @@ class pass_postreload : public rtl_opt_pass
>  {
>  public:
>pass_postreload (gcc::context *ctxt)
>  : rtl_opt_pass (pass_data_postreload, ctxt)
>{}
>  
>/* opt_pass methods: */
> +  opt_pass * clone () final override { return new pass_postreload 
> (m_ctxt); }
>bool gate (function *) final override { return reload_completed; }
>  
>  }; // class pass_postreload
> --- gcc/passes.def
> +++ gcc/passes.def
> @@ -529,7 +529,10 @@ along with GCC; see the file COPYING3.  If not see
>   NEXT_PASS (pass_regrename);
>   NEXT_PASS (pass_fold_mem_offsets);
>   NEXT_PASS (pass_cprop_hardreg);
> - NEXT_PASS (pass_fast_rtl_dce);
> +  POP_INSERT_PASSES ()
> +  NEXT_PASS (pass_fast_rtl_dce);
> +  NEXT_PASS (pass_postreload);
> +  PUSH_INSERT_PASSES_WITHIN (pass_postreload)
>   NEXT_PASS (pass_reorder_blocks);
>   NEXT_PASS (pass_leaf_regs);
>   NEXT_PASS (pass_split_before_sched2);
>
> This (only) cleans up "the mess that 'pass_late_combine' created"; no
> further changes in GCC target libraries for nvptx.  (For avoidance of
> doubt: "mess" is a great exaggeration here.)

But that then disturbs non-nvptx targets; see (prerequisite)

"Handle 'NUM' in 'PUSH_INSERT_PASSES_WITHIN'" for why.

Then, see the attached -- just for later, for now --
"WIP Move 'pass_fast_rtl_dce' from 'pass_postreload' into 
'pass_late_compilation'"
for how to make this work properly.  (This also puts back
'pass_fast_rtl_dce' into 'pass_late_compilation' instead of running it
unconditionally, in order to not change any behavior in that regard.)


Grüße
 Thomas


>>> But: should we expect '-fno-late-combine-instructions' vs.
>>> '-flate-combine-instructions' to behave in the sa

Re: [PATCH v1 3/4] RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM form 3

2024-07-01 Thread juzhe.zh...@rivai.ai
LGTM



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2024-07-01 09:35
To: gcc-patches
CC: juzhe.zhong; kito.cheng; jeffreyalaw; rdapp.gcc; Pan Li
Subject: [PATCH v1 3/4] RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM 
form 3
From: Pan Li 
 
This patch would like to add test cases for the unsigned scalar
.SAT_ADD IMM form 3.  Aka:
 
Form 3:
  #define DEF_SAT_U_ADD_IMM_FMT_3(T)   \
  T __attribute__((noinline))  \
  sat_u_add_imm_##T##_fmt_3 (T x)  \
  {\
T ret; \
return __builtin_add_overflow (x, 8, &ret) ? -1 : ret; \
  }
 
DEF_SAT_U_ADD_IMM_FMT_3(uint64_t)
 
The below test is passed for this patch.
* The rv64gcv regression test.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/sat_arith.h: Add helper test macro.
* gcc.target/riscv/sat_u_add_imm-10.c: New test.
* gcc.target/riscv/sat_u_add_imm-11.c: New test.
* gcc.target/riscv/sat_u_add_imm-12.c: New test.
* gcc.target/riscv/sat_u_add_imm-9.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-10.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-11.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-12.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-9.c: New test.
 
Signed-off-by: Pan Li 
---
gcc/testsuite/gcc.target/riscv/sat_arith.h| 11 +
.../gcc.target/riscv/sat_u_add_imm-10.c   | 21 +
.../gcc.target/riscv/sat_u_add_imm-11.c   | 18 
.../gcc.target/riscv/sat_u_add_imm-12.c   | 17 +++
.../gcc.target/riscv/sat_u_add_imm-9.c| 19 
.../gcc.target/riscv/sat_u_add_imm-run-10.c   | 46 +++
.../gcc.target/riscv/sat_u_add_imm-run-11.c   | 46 +++
.../gcc.target/riscv/sat_u_add_imm-run-12.c   | 46 +++
.../gcc.target/riscv/sat_u_add_imm-run-9.c| 46 +++
9 files changed, 270 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-10.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-12.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-9.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-run-10.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-run-11.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-run-12.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-run-9.c
 
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index d94f0fd602c..83b294db476 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -74,12 +74,23 @@ sat_u_add_imm##IMM##_##T##_fmt_2 (T x)  \
   return (T)(x + IMM) < x ? -1 : (x + IMM); \
}
+#define DEF_SAT_U_ADD_IMM_FMT_3(T, IMM)\
+T __attribute__((noinline))\
+sat_u_add_imm##IMM##_##T##_fmt_3 (T x) \
+{  \
+  T ret;   \
+  return __builtin_add_overflow (x, IMM, &ret) ? -1 : ret; \
+}
+
#define RUN_SAT_U_ADD_IMM_FMT_1(T, x, IMM, expect) \
   if (sat_u_add_imm##IMM##_##T##_fmt_1(x) != expect) __builtin_abort ()
#define RUN_SAT_U_ADD_IMM_FMT_2(T, x, IMM, expect) \
   if (sat_u_add_imm##IMM##_##T##_fmt_2(x) != expect) __builtin_abort ()
+#define RUN_SAT_U_ADD_IMM_FMT_3(T, x, IMM, expect) \
+  if (sat_u_add_imm##IMM##_##T##_fmt_3(x) != expect) __builtin_abort ()
+
/**/
/* Saturation Sub (Unsigned and Signed)   */
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-10.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-10.c
new file mode 100644
index 000..24cdd267cca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-10.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_imm3_uint16_t_fmt_3:
+** addi\s+[atx][0-9]+,\s*a0,\s*3
+** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_ADD_IMM_FMT_3(uint16_t, 3)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c
new file mode 100644
index 000..f

Re: [PATCH v1 2/4] RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM form 2

2024-07-01 Thread juzhe.zh...@rivai.ai
LGTM



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2024-07-01 09:35
To: gcc-patches
CC: juzhe.zhong; kito.cheng; jeffreyalaw; rdapp.gcc; Pan Li
Subject: [PATCH v1 2/4] RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM 
form 2
From: Pan Li 
 
This patch would like to add test cases for the unsigned scalar
.SAT_ADD IMM form 2.  Aka:
 
Form 2:
  #define DEF_SAT_U_ADD_IMM_FMT_2(T)  \
  T __attribute__((noinline)) \
  sat_u_add_imm_##T##_fmt_1 (T x) \
  {   \
return (T)(x + 9) < x ? -1 : (x + 9); \
  }
 
DEF_SAT_U_ADD_IMM_FMT_2(uint64_t)
 
The below test is passed for this patch.
* The rv64gcv regression test.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/sat_arith.h: Add helper test macro.
* gcc.target/riscv/sat_u_add_imm-5.c: New test.
* gcc.target/riscv/sat_u_add_imm-6.c: New test.
* gcc.target/riscv/sat_u_add_imm-7.c: New test.
* gcc.target/riscv/sat_u_add_imm-8.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-5.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-6.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-7.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-8.c: New test.
 
Signed-off-by: Pan Li 
---
gcc/testsuite/gcc.target/riscv/sat_arith.h| 10 
.../gcc.target/riscv/sat_u_add_imm-5.c| 19 
.../gcc.target/riscv/sat_u_add_imm-6.c| 21 +
.../gcc.target/riscv/sat_u_add_imm-7.c| 18 
.../gcc.target/riscv/sat_u_add_imm-8.c| 17 +++
.../gcc.target/riscv/sat_u_add_imm-run-5.c| 46 +++
.../gcc.target/riscv/sat_u_add_imm-run-6.c| 46 +++
.../gcc.target/riscv/sat_u_add_imm-run-7.c| 46 +++
.../gcc.target/riscv/sat_u_add_imm-run-8.c| 46 +++
9 files changed, 269 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-5.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-6.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-7.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-8.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-run-5.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-run-6.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-run-7.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-run-8.c
 
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 4ec4ec36cc1..d94f0fd602c 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -67,9 +67,19 @@ sat_u_add_imm##IMM##_##T##_fmt_1 (T x)   \
   return (T)(x + IMM) >= x ? (x + IMM) : -1; \
}
+#define DEF_SAT_U_ADD_IMM_FMT_2(T, IMM) \
+T __attribute__((noinline)) \
+sat_u_add_imm##IMM##_##T##_fmt_2 (T x)  \
+{   \
+  return (T)(x + IMM) < x ? -1 : (x + IMM); \
+}
+
#define RUN_SAT_U_ADD_IMM_FMT_1(T, x, IMM, expect) \
   if (sat_u_add_imm##IMM##_##T##_fmt_1(x) != expect) __builtin_abort ()
+#define RUN_SAT_U_ADD_IMM_FMT_2(T, x, IMM, expect) \
+  if (sat_u_add_imm##IMM##_##T##_fmt_2(x) != expect) __builtin_abort ()
+
/**/
/* Saturation Sub (Unsigned and Signed)   */
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-5.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-5.c
new file mode 100644
index 000..19b502db6c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-5.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_imm9_uint8_t_fmt_2:
+** addi\s+[atx][0-9]+,\s*a0,\s*9
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+DEF_SAT_U_ADD_IMM_FMT_2(uint8_t, 9)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-6.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-6.c
new file mode 100644
index 000..0317370b67e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-6.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_imm3_uint16_t_fmt_2:
+** addi\s+[atx][0-9]+,\s*a0,\s*3
+** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** slt

Re: [PATCH v1 4/4] RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM form 4

2024-07-01 Thread juzhe.zh...@rivai.ai
LGTM



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2024-07-01 09:35
To: gcc-patches
CC: juzhe.zhong; kito.cheng; jeffreyalaw; rdapp.gcc; Pan Li
Subject: [PATCH v1 4/4] RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM 
form 4
From: Pan Li 
 
This patch would like to add test cases for the unsigned scalar
.SAT_ADD IMM form 4.  Aka:
 
Form 4:
  #define DEF_SAT_U_ADD_IMM_FMT_4(T)\
  T __attribute__((noinline))   \
  sat_u_add_imm_##T##_fmt_4 (T x)   \
  { \
T ret;  \
return __builtin_add_overflow (x, 9, &ret) == 0 ? ret : -1; \
  }
 
DEF_SAT_U_ADD_IMM_FMT_4(uint64_t)
 
The below test is passed for this patch.
* The rv64gcv regression test.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/sat_arith.h: Add helper test macro.
* gcc.target/riscv/sat_u_add_imm-13.c: New test.
* gcc.target/riscv/sat_u_add_imm-14.c: New test.
* gcc.target/riscv/sat_u_add_imm-15.c: New test.
* gcc.target/riscv/sat_u_add_imm-16.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-13.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-14.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-15.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-16.c: New test.
 
Signed-off-by: Pan Li 
---
gcc/testsuite/gcc.target/riscv/sat_arith.h| 11 +
.../gcc.target/riscv/sat_u_add_imm-13.c   | 19 
.../gcc.target/riscv/sat_u_add_imm-14.c   | 21 +
.../gcc.target/riscv/sat_u_add_imm-15.c   | 18 
.../gcc.target/riscv/sat_u_add_imm-16.c   | 17 +++
.../gcc.target/riscv/sat_u_add_imm-run-13.c   | 46 +++
.../gcc.target/riscv/sat_u_add_imm-run-14.c   | 46 +++
.../gcc.target/riscv/sat_u_add_imm-run-15.c   | 46 +++
.../gcc.target/riscv/sat_u_add_imm-run-16.c   | 46 +++
9 files changed, 270 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-13.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-14.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-15.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-16.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-run-13.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-run-14.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-run-15.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-run-16.c
 
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 83b294db476..75442c94dc1 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -82,6 +82,14 @@ sat_u_add_imm##IMM##_##T##_fmt_3 (T x) \
   return __builtin_add_overflow (x, IMM, &ret) ? -1 : ret; \
}
+#define DEF_SAT_U_ADD_IMM_FMT_4(T, IMM) \
+T __attribute__((noinline)) \
+sat_u_add_imm##IMM##_##T##_fmt_4 (T x)  \
+{   \
+  T ret;\
+  return __builtin_add_overflow (x, IMM, &ret) == 0 ? ret : -1; \
+}
+
#define RUN_SAT_U_ADD_IMM_FMT_1(T, x, IMM, expect) \
   if (sat_u_add_imm##IMM##_##T##_fmt_1(x) != expect) __builtin_abort ()
@@ -91,6 +99,9 @@ sat_u_add_imm##IMM##_##T##_fmt_3 (T x) \
#define RUN_SAT_U_ADD_IMM_FMT_3(T, x, IMM, expect) \
   if (sat_u_add_imm##IMM##_##T##_fmt_3(x) != expect) __builtin_abort ()
+#define RUN_SAT_U_ADD_IMM_FMT_4(T, x, IMM, expect) \
+  if (sat_u_add_imm##IMM##_##T##_fmt_4(x) != expect) __builtin_abort ()
+
/**/
/* Saturation Sub (Unsigned and Signed)   */
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-13.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-13.c
new file mode 100644
index 000..a3b2679233c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-13.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_imm9_uint8_t_fmt_4:
+** addi\s+[atx][0-9]+,\s*a0,\s*9
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+DEF_SAT_U_ADD_IMM_FMT_4(uint8_t, 9)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add

Re: [PATCH v1 1/4] RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM form 1

2024-07-01 Thread juzhe.zh...@rivai.ai
LGTM



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2024-07-01 09:35
To: gcc-patches
CC: juzhe.zhong; kito.cheng; jeffreyalaw; rdapp.gcc; Pan Li
Subject: [PATCH v1 1/4] RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM 
form 1
From: Pan Li 
 
This patch would like to add test cases for the unsigned scalar
.SAT_ADD IMM form 1.  Aka:
 
Form 1:
  #define DEF_SAT_U_ADD_IMM_FMT_1(T)   \
  T __attribute__((noinline))  \
  sat_u_add_imm_##T##_fmt_1 (T x)  \
  {\
return (T)(x + 9) >= x ? (x + 9) : -1; \
  }
 
DEF_SAT_U_ADD_IMM_FMT_1(uint64_t)
 
The below test is passed for this patch.
* The rv64gcv regression test.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/sat_arith.h: Add helper test macro.
* gcc.target/riscv/sat_u_add_imm-1.c: New test.
* gcc.target/riscv/sat_u_add_imm-2.c: New test.
* gcc.target/riscv/sat_u_add_imm-3.c: New test.
* gcc.target/riscv/sat_u_add_imm-4.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-1.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-2.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-3.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-4.c: New test.
 
Signed-off-by: Pan Li 
---
gcc/testsuite/gcc.target/riscv/sat_arith.h| 10 
.../gcc.target/riscv/sat_u_add_imm-1.c| 19 
.../gcc.target/riscv/sat_u_add_imm-2.c| 21 +
.../gcc.target/riscv/sat_u_add_imm-3.c| 18 
.../gcc.target/riscv/sat_u_add_imm-4.c| 17 +++
.../gcc.target/riscv/sat_u_add_imm-run-1.c| 46 +++
.../gcc.target/riscv/sat_u_add_imm-run-2.c| 46 +++
.../gcc.target/riscv/sat_u_add_imm-run-3.c| 46 +++
.../gcc.target/riscv/sat_u_add_imm-run-4.c| 46 +++
9 files changed, 269 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-run-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-run-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-run-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-run-4.c
 
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 0c2e44af718..4ec4ec36cc1 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -60,6 +60,16 @@ sat_u_add_##T##_fmt_6 (T x, T y)\
#define RUN_SAT_U_ADD_FMT_5(T, x, y) sat_u_add_##T##_fmt_5(x, y)
#define RUN_SAT_U_ADD_FMT_6(T, x, y) sat_u_add_##T##_fmt_6(x, y)
+#define DEF_SAT_U_ADD_IMM_FMT_1(T, IMM)  \
+T __attribute__((noinline))  \
+sat_u_add_imm##IMM##_##T##_fmt_1 (T x)   \
+{\
+  return (T)(x + IMM) >= x ? (x + IMM) : -1; \
+}
+
+#define RUN_SAT_U_ADD_IMM_FMT_1(T, x, IMM, expect) \
+  if (sat_u_add_imm##IMM##_##T##_fmt_1(x) != expect) __builtin_abort ()
+
/**/
/* Saturation Sub (Unsigned and Signed)   */
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-1.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-1.c
new file mode 100644
index 000..14e9b7595a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_imm9_uint8_t_fmt_1:
+** addi\s+[atx][0-9]+,\s*a0,\s*9
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+DEF_SAT_U_ADD_IMM_FMT_1(uint8_t, 9)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-2.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-2.c
new file mode 100644
index 000..c1a3c6ff21d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-2.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_imm3_uint16_t_fmt_1:
+** addi\s+[atx][0-9]+,\s*a0,\s*3
+** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0

[PATCH] Give fast DCE a separate dirty flag

2024-07-01 Thread Richard Sandiford
Thomas pointed out that we sometimes failed to eliminate some dead code
(specifically clobbers of otherwise unused registers) on nvptx when
late-combine is enabled.  This happens because:

- combine is able to optimise the function in a way that exposes dead code.
  This leaves the df information in a "dirty" state.

- late_combine calls df_analyze without DF_LR_RUN_DCE run set.
  This updates the df information and clears the "dirty" state.

- late_combine doesn't find any extra optimisations, and so leaves
  the df information up-to-date.

- if_after_combine (ce2) calls df_analyze with DF_LR_RUN_DCE set.
  Because the df information is already up-to-date, fast DCE is
  not run.

The upshot is that running late-combine has the effect of suppressing
a DCE opportunity that would have been noticed without late-combine.

I think this shows that we should track the state of the DCE separately
from the LR problem.  Every pass updates the latter, but not all passes
update the former.

Bootstrapped & regression-tested on aarch64-linux-gnu.  Thomas also
confirms that it fixes the nvptx problem.  OK to install?

Richard


gcc/
* df.h (DF_LR_DCE): New df_problem_id.
(df_lr_dce): New macro.
* df-core.cc (rest_of_handle_df_finish): Check for a null free_fun.
* df-problems.cc (df_lr_finalize): Split out fast DCE handling to...
(df_lr_dce_finalize): ...this new function.
(problem_LR_DCE): New df_problem.
(df_lr_add_problem): Register LR_DCE rather than LR itself.
* dce.cc (fast_dce): Clear df_lr_dce->solutions_dirty.
---
 gcc/dce.cc |  3 ++
 gcc/df-core.cc |  3 +-
 gcc/df-problems.cc | 96 --
 gcc/df.h   |  2 +
 4 files changed, 74 insertions(+), 30 deletions(-)

diff --git a/gcc/dce.cc b/gcc/dce.cc
index be1a2a87732..04e8d98818d 100644
--- a/gcc/dce.cc
+++ b/gcc/dce.cc
@@ -1182,6 +1182,9 @@ fast_dce (bool word_level)
   BITMAP_FREE (processed);
   BITMAP_FREE (redo_out);
   BITMAP_FREE (all_blocks);
+
+  /* Both forms of DCE should make further DCE unnecessary.  */
+  df_lr_dce->solutions_dirty = false;
 }
 
 
diff --git a/gcc/df-core.cc b/gcc/df-core.cc
index b0e8a88d433..8fd778a8618 100644
--- a/gcc/df-core.cc
+++ b/gcc/df-core.cc
@@ -806,7 +806,8 @@ rest_of_handle_df_finish (void)
   for (i = 0; i < df->num_problems_defined; i++)
 {
   struct dataflow *dflow = df->problems_in_order[i];
-  dflow->problem->free_fun ();
+  if (dflow->problem->free_fun)
+   dflow->problem->free_fun ();
 }
 
   free (df->postorder);
diff --git a/gcc/df-problems.cc b/gcc/df-problems.cc
index 88ee0dd67fc..bfd24bd1e86 100644
--- a/gcc/df-problems.cc
+++ b/gcc/df-problems.cc
@@ -1054,37 +1054,10 @@ df_lr_transfer_function (int bb_index)
 }
 
 
-/* Run the fast dce as a side effect of building LR.  */
-
 static void
-df_lr_finalize (bitmap all_blocks)
+df_lr_finalize (bitmap)
 {
   df_lr->solutions_dirty = false;
-  if (df->changeable_flags & DF_LR_RUN_DCE)
-{
-  run_fast_df_dce ();
-
-  /* If dce deletes some instructions, we need to recompute the lr
-solution before proceeding further.  The problem is that fast
-dce is a pessimestic dataflow algorithm.  In the case where
-it deletes a statement S inside of a loop, the uses inside of
-S may not be deleted from the dataflow solution because they
-were carried around the loop.  While it is conservatively
-correct to leave these extra bits, the standards of df
-require that we maintain the best possible (least fixed
-point) solution.  The only way to do that is to redo the
-iteration from the beginning.  See PR35805 for an
-example.  */
-  if (df_lr->solutions_dirty)
-   {
- df_clear_flags (DF_LR_RUN_DCE);
- df_lr_alloc (all_blocks);
- df_lr_local_compute (all_blocks);
- df_worklist_dataflow (df_lr, all_blocks, df->postorder, df->n_blocks);
- df_lr_finalize (all_blocks);
- df_set_flags (DF_LR_RUN_DCE);
-   }
-}
 }
 
 
@@ -1266,6 +1239,69 @@ static const struct df_problem problem_LR =
   false   /* Reset blocks on dropping out of 
blocks_to_analyze.  */
 };
 
+/* Run the fast DCE after building LR.  This is a separate problem so that
+   the "dirty" flag is only cleared after a DCE pass is actually run.  */
+
+static void
+df_lr_dce_finalize (bitmap all_blocks)
+{
+  if (!(df->changeable_flags & DF_LR_RUN_DCE))
+return;
+
+  /* Also clears df_lr_dce->solutions_dirty.  */
+  run_fast_df_dce ();
+
+  /* If dce deletes some instructions, we need to recompute the lr
+ solution before proceeding further.  The problem is that fast
+ dce is a pessimestic dataflow algorithm.  In the case where
+ it deletes a statement S inside of a loop, the uses inside of
+ S may not be deleted from the dataflow solution because they
+ were carried around the loop.  While it

[PATCH 1/3] s390: Emulate vec_cmp{eq,gt,gtu} for 128-bit integers

2024-07-01 Thread Stefan Schulze Frielinghaus
Mode iterator V_HW enables V1TI for target VXE which means
vec_cmpv1tiv1ti becomes available which leads to an ICE since there is
no corresponding insn.

Fixed by emulating comparisons and enabling mode V1TI unconditionally
for V_HW.  For the sake of symmetry, I also added TI mode to V_HW since
TF mode is already included.  As a consequence the consumers of V_HW
vec_{splat,slb,sld,sldw,sldb,srdb,srab,srb,test_mask_int,test_mask}
also become available for 128-bit integers.

This fixes gcc.c-torture/execute/pr105613.c and gcc.dg/pr106063.c.

gcc/ChangeLog:

* config/s390/vector.md (V_HW): Enable V1TI unconditionally and
add TI.
(vec_cmpu): Add 128-bit integer
variants.
(*vec_cmpeq_nocc_emu): Emulate operation.
(*vec_cmpgt_nocc_emu): Emulate operation.
(*vec_cmpgtu_nocc_emu): Emulate operation.

gcc/testsuite/ChangeLog:

* gcc.target/s390/vector/vec-cmp-emu-1.c: New test.
* gcc.target/s390/vector/vec-cmp-emu-2.c: New test.
* gcc.target/s390/vector/vec-cmp-emu-3.c: New test.
---
 Bootstrapped and regtested on s390.  Ok for mainline and GCC 14?

 gcc/config/s390/vector.md | 113 --
 .../gcc.target/s390/vector/vec-cmp-emu-1.c|  35 ++
 .../gcc.target/s390/vector/vec-cmp-emu-2.c|  18 +++
 .../gcc.target/s390/vector/vec-cmp-emu-3.c|  17 +++
 4 files changed, 171 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-cmp-emu-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-cmp-emu-2.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-cmp-emu-3.c

diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 40de0c75a7c..032ec44542c 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -30,7 +30,7 @@
 ; V_HW2 is for having two iterators expanding independently e.g. vcond.
 ; It's similar to V_HW, but not fully identical: V1TI is not included, because
 ; there are no 128-bit compares.
-(define_mode_iterator V_HW  [V16QI V8HI V4SI V2DI (V1TI "TARGET_VXE") V2DF
+(define_mode_iterator V_HW  [V16QI V8HI V4SI V2DI V1TI TI V2DF
 (V4SF "TARGET_VXE") (V1TF "TARGET_VXE")
 (TF "TARGET_VXE")])
 (define_mode_iterator V_HW2 [V16QI V8HI V4SI V2DI V2DF (V4SF "TARGET_VXE")
@@ -50,6 +50,7 @@
 (define_mode_iterator VI_HW_HSDT [V8HI V4SI V2DI V1TI TI])
 (define_mode_iterator VI_HW_HS  [V8HI  V4SI])
 (define_mode_iterator VI_HW_QH  [V16QI V8HI])
+(define_mode_iterator VI_HW_T   [V1TI TI])
 
 ; Directly supported vector modes with a certain number of elements
 (define_mode_iterator V_HW_2   [V2DI V2DF])
@@ -151,7 +152,7 @@
(V1HI "V1HI") (V2HI "V2HI") (V4HI "V4HI") (V8HI 
"V8HI")
(V1SI "V1SI") (V2SI "V2SI") (V4SI "V4SI")
(V1DI "V1DI") (V2DI "V2DI")
-   (V1TI "V1TI")
+   (V1TI "V1TI") (TI "V1TI")
(V1SF "V1SI") (V2SF "V2SI") (V4SF "V4SI")
(V1DF "V1DI") (V2DF "V2DI")
(V1TF "V1TI") (TF "V1TI")])
@@ -160,7 +161,7 @@
(V1HI "v1hi") (V2HI "v2hi") (V4HI "v4hi") (V8HI 
"v8hi")
(V1SI "v1si") (V2SI "v2si") (V4SI "v4si")
(V1DI "v1di") (V2DI "v2di")
-   (V1TI "v1ti")
+   (V1TI "v1ti") (TI "v1ti")
(V1SF "v1si") (V2SF "v2si") (V4SF "v4si")
(V1DF "v1di") (V2DF "v2di")
(V1TF "v1ti") (TF   "v1ti")])
@@ -1956,11 +1957,11 @@
   DONE;
 })
 
-(define_expand "vec_cmpu"
-  [(set (match_operand:VI_HW0 "register_operand" "")
-   (match_operator:VI_HW   1 ""
- [(match_operand:VI_HW 2 "register_operand" "")
-  (match_operand:VI_HW 3 "register_operand" "")]))]
+(define_expand "vec_cmpu"
+  [(set (match_operand:VIT_HW0 "register_operand" "")
+   (match_operator:VIT_HW   1 ""
+ [(match_operand:VIT_HW 2 "register_operand" "")
+  (match_operand:VIT_HW 3 "register_operand" "")]))]
   "TARGET_VX"
 {
   s390_expand_vec_compare (operands[0], GET_CODE(operands[1]), operands[2], 
operands[3]);
@@ -1975,6 +1976,94 @@
   "vc\t%v2,%v0,%v1"
   [(set_attr "op_type" "VRR")])
 
+(define_insn_and_split "*vec_cmpeq_nocc_emu"
+  [(set (match_operand:VI_HW_T 0 "register_operand" "=v")
+   (eq:VI_HW_T (match_operand:VI_HW_T 1 "register_operand"  "v")
+   (match_operand:VI_HW_T 2 "register_operand"  "v")))]
+  "TARGET_VX"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(set (match_dup 3)
+   (eq:V2DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 4)
+   (vec_select:V2DI (match_dup 3) (parallel [(const_int 1) (const_int 
0)])))
+   (set (match_dup 3)
+   (and:V2DI (match_dup 3) (match_dup 4)))
+   (set (m

Ping^3 [PATCH-1v3] Value Range: Add range op for builtin isinf

2024-07-01 Thread HAO CHEN GUI
Hi,
  Gently ping it.
https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653096.html

Thanks
Gui Haochen

在 2024/6/24 9:40, HAO CHEN GUI 写道:
> Hi,
>   Gently ping it.
> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653096.html
> 
> Thanks
> Gui Haochen
> 
> 在 2024/6/20 14:56, HAO CHEN GUI 写道:
>> Hi,
>>   Gently ping it.
>> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653096.html
>>
>> Thanks
>> Gui Haochen
>>
>> 在 2024/5/30 10:46, HAO CHEN GUI 写道:
>>> Hi,
>>>   The builtin isinf is not folded at front end if the corresponding optab
>>> exists. It causes the range evaluation failed on the targets which has
>>> optab_isinf. For instance, range-sincos.c will fail on the targets which
>>> has optab_isinf as it calls builtin_isinf.
>>>
>>>   This patch fixed the problem by adding range op for builtin isinf.
>>>
>>>   Compared with previous version, the main change is to set the range to
>>> 1 if it's infinite number otherwise to 0.
>>> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/652219.html
>>>
>>>   Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no
>>> regressions. Is it OK for the trunk?
>>>
>>> Thanks
>>> Gui Haochen
>>>
>>>
>>> ChangeLog
>>> Value Range: Add range op for builtin isinf
>>>
>>> The builtin isinf is not folded at front end if the corresponding optab
>>> exists.  So the range op for isinf is needed for value range analysis.
>>> This patch adds range op for builtin isinf.
>>>
>>> gcc/
>>> * gimple-range-op.cc (class cfn_isinf): New.
>>> (op_cfn_isinf): New variables.
>>> (gimple_range_op_handler::maybe_builtin_call): Handle
>>> CASE_FLT_FN (BUILT_IN_ISINF).
>>>
>>> gcc/testsuite/
>>> * gcc/testsuite/gcc.dg/tree-ssa/range-isinf.c: New test.
>>>
>>> patch.diff
>>> diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc
>>> index 55dfbb23ce2..4e60a42eaac 100644
>>> --- a/gcc/gimple-range-op.cc
>>> +++ b/gcc/gimple-range-op.cc
>>> @@ -1175,6 +1175,63 @@ private:
>>>bool m_is_pos;
>>>  } op_cfn_goacc_dim_size (false), op_cfn_goacc_dim_pos (true);
>>>
>>> +// Implement range operator for CFN_BUILT_IN_ISINF
>>> +class cfn_isinf : public range_operator
>>> +{
>>> +public:
>>> +  using range_operator::fold_range;
>>> +  using range_operator::op1_range;
>>> +  virtual bool fold_range (irange &r, tree type, const frange &op1,
>>> +  const irange &, relation_trio) const override
>>> +  {
>>> +if (op1.undefined_p ())
>>> +  return false;
>>> +
>>> +if (op1.known_isinf ())
>>> +  {
>>> +   wide_int one = wi::one (TYPE_PRECISION (type));
>>> +   r.set (type, one, one);
>>> +   return true;
>>> +  }
>>> +
>>> +if (op1.known_isnan ()
>>> +   || (!real_isinf (&op1.lower_bound ())
>>> +   && !real_isinf (&op1.upper_bound (
>>> +  {
>>> +   r.set_zero (type);
>>> +   return true;
>>> +  }
>>> +
>>> +r.set_varying (type);
>>> +return true;
>>> +  }
>>> +  virtual bool op1_range (frange &r, tree type, const irange &lhs,
>>> + const frange &, relation_trio) const override
>>> +  {
>>> +if (lhs.undefined_p ())
>>> +  return false;
>>> +
>>> +if (lhs.zero_p ())
>>> +  {
>>> +   nan_state nan (true);
>>> +   r.set (type, real_min_representable (type),
>>> +  real_max_representable (type), nan);
>>> +   return true;
>>> +  }
>>> +
>>> +if (!range_includes_zero_p (lhs))
>>> +  {
>>> +   // The range is [-INF,-INF][+INF,+INF], but it can't be represented.
>>> +   // Set range to [-INF,+INF]
>>> +   r.set_varying (type);
>>> +   r.clear_nan ();
>>> +   return true;
>>> +  }
>>> +
>>> +r.set_varying (type);
>>> +return true;
>>> +  }
>>> +} op_cfn_isinf;
>>>
>>>  // Implement range operator for CFN_BUILT_IN_
>>>  class cfn_parity : public range_operator
>>> @@ -1268,6 +1325,11 @@ gimple_range_op_handler::maybe_builtin_call ()
>>>m_operator = &op_cfn_signbit;
>>>break;
>>>
>>> +CASE_FLT_FN (BUILT_IN_ISINF):
>>> +  m_op1 = gimple_call_arg (call, 0);
>>> +  m_operator = &op_cfn_isinf;
>>> +  break;
>>> +
>>>  CASE_CFN_COPYSIGN_ALL:
>>>m_op1 = gimple_call_arg (call, 0);
>>>m_op2 = gimple_call_arg (call, 1);
>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/range-isinf.c 
>>> b/gcc/testsuite/gcc.dg/tree-ssa/range-isinf.c
>>> new file mode 100644
>>> index 000..468f1bcf5c7
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/range-isinf.c
>>> @@ -0,0 +1,44 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-O2 -fdump-tree-evrp" } */
>>> +
>>> +#include 
>>> +void link_error();
>>> +
>>> +void
>>> +test1 (double x)
>>> +{
>>> +  if (x > __DBL_MAX__ && !__builtin_isinf (x))
>>> +link_error ();
>>> +  if (x < -__DBL_MAX__ && !__builtin_isinf (x))
>>> +link_error ();
>>> +}
>>> +
>>> +void
>>> +test2 (float x)
>>> +{
>>> +  if (x > __FLT_MAX__ && !__builtin_isinf (x))
>>> +link_error ();
>>> +  if (x < -__FLT_MAX__ && !__builtin_isinf (x))
>>> +link_error ()

[x86 PATCH] Add additional variant of bswaphisi2_lowpart peephole2.

2024-07-01 Thread Roger Sayle

This patch adds an additional variation of the peephole2 used to convert
bswaphisi2_lowpart into rotlhi3_1_slp, which converts xchgb %ah,%al into
rotw if the flags register isn't live.  The motivating example is:

void ext(int x);
void foo(int x)
{
  ext((x&~0x)|((x>>8)&0xff)|((x&0xff)<<8));
}

where GCC with -O2 currently produces:

foo:movl%edi, %eax
rolw$8, %ax
movl%eax, %edi
jmp ext

The issue is that the original xchgb (bswaphisi2_lowpart) can only be
performed in "Q" registers that allow the %?h register to be used, so
reload generates the above two movl.  However, it's later in peephole2
where we see that CC_FLAGS can be clobbered, so we can use a rotate word,
which is more forgiving with register allocations.  With the additional
peephole2 proposed here, we now generate:

foo:rolw$8, %di
jmp ext


This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures.  Ok for mainline?


2024-07-01  Roger Sayle  

gcc/ChangeLog
* config/i386/i386.md (bswaphisi2_lowpart peephole2): New
peephole2 variant to eliminate register shuffling.

gcc/testsuite/ChangeLog
* gcc.target/i386/xchg-4.c: New test case.


Thanks again,
Roger
--

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index b6ccb1e..9bc0eb7 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -21459,6 +21459,30 @@
  (clobber (reg:CC FLAGS_REG))])]
   "operands[0] = gen_lowpart (HImode, operands[0]);")
 
+;; Variant of above peephole2 to improve register allocation.
+(define_peephole2
+  [(set (match_operand:SI 0 "general_reg_operand")
+(match_operand:SI 1 "register_operand"))
+   (set (match_dup 0)
+   (ior:SI (and:SI (match_dup 0)
+   (const_int -65536))
+   (lshiftrt:SI (bswap:SI (match_dup 0))
+(const_int 16
+   (set (match_operand:SI 2 "general_reg_operand") (match_dup 0))]
+  "!(TARGET_USE_XCHGB ||
+ TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && peep2_regno_dead_p (0, FLAGS_REG)
+   && peep2_reg_dead_p(3, operands[0])"
+  [(parallel
+[(set (strict_low_part (match_dup 3))
+ (rotate:HI (match_dup 3) (const_int 8)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+  if (!rtx_equal_p (operands[1], operands[2]))
+emit_move_insn (operands[2], operands[1]);
+  operands[3] = gen_lowpart (HImode, operands[2]);
+})
+
 (define_expand "paritydi2"
   [(set (match_operand:DI 0 "register_operand")
(parity:DI (match_operand:DI 1 "register_operand")))]
diff --git a/gcc/testsuite/gcc.target/i386/xchg-4.c 
b/gcc/testsuite/gcc.target/i386/xchg-4.c
new file mode 100644
index 000..de099e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/xchg-4.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+void ext(int x);
+void foo(int x) 
+{
+ext((x&~0x)|((x>>8)&0xff)|((x&0xff)<<8));
+}
+
+/* { dg-final { scan-assembler "rolw" } } */
+/* { dg-final { scan-assembler-not "mov" } } */


Re: [PATCH] RISC-V: use fclass insns to implement isfinite and isnormal builtins

2024-07-01 Thread HAO CHEN GUI
The problem should be fixed after my value range patches being accepted.
[PATCH-1v3] Value Range: Add range op for builtin isinf
https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653096.html
[PATCH-2v4] Value Range: Add range op for builtin isfinite
https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653094.html
[PATCH-3v2] Value Range: Add range op for builtin isnormal
https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653095.html



在 2024/6/29 9:35, Vineet Gupta 写道:
> 
> 
> On 6/28/24 17:53, Vineet Gupta wrote:
>> Currently isfinite and isnormal use float compare instructions with fp
>> flags save/restored around them. Our perf team complained this could be
>> costly in uarch. RV Base ISA already has FCLASS.{d,s,h} instruction to
>> do FP compares w/o disturbing FP exception flags.
>>
>> Coincidently, upstream ijust few days back got support for the
>> corresponding optabs. All that is needed is to wire these up in the
>> backend.
>>
>> I was also hoping to get __builtin_inf() done but unforutnately it
>> requires little more rtl foo/bar to implement a tri-modal return.
>>
>> Currently going thru CI testing.
> 
> My local testing spotted one additional failure.
> 
> FAIL: g++.dg/opt/pr107569.C  -std=gnu++20  scan-tree-dump-times vrp1
> "return 1;" 2
> 
> The reason being
> 
> bool
> bar (double x)
> {
>   [[assume (std::isfinite (x))]];
>   return std::isfinite (x);
> }
> 
> generating the new seq
> 
> .LFB4:
>     fclass.d    a0,fa0
>     andi    a0,a0,126
>     snez    a0,a0
>     ret
> 
> vs.
> 
>     li    a0,1
>     ret
> 
> I have a hunch this requires the pending value range patch from Hao Chen
> GUI.
> 
> Thx,
> -Vineet
> 
> [1] https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653094.html


Ping^3 [PATCH-3v2] Value Range: Add range op for builtin isnormal

2024-07-01 Thread HAO CHEN GUI
Hi,
  Gently ping it.
https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653095.html

Thanks
Gui Haochen

在 2024/6/24 9:41, HAO CHEN GUI 写道:
> Hi,
>   Gently ping it.
> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653095.html
> 
> Thanks
> Gui Haochen
> 
> 在 2024/6/20 14:58, HAO CHEN GUI 写道:
>> Hi,
>>   Gently ping it.
>> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653095.html
>>
>> Thanks
>> Gui Haochen
>>
>> 在 2024/5/30 10:46, HAO CHEN GUI 写道:
>>> Hi,
>>>   This patch adds the range op for builtin isnormal. It also adds two
>>> help function in frange to detect range of normal floating-point and
>>> range of subnormal or zero.
>>>
>>>   Compared to previous version, the main change is to set the range to
>>> 1 if it's normal number otherwise to 0.
>>> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/652221.html
>>>
>>>   Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no
>>> regressions. Is it OK for the trunk?
>>>
>>> Thanks
>>> Gui Haochen
>>>
>>> ChangeLog
>>> Value Range: Add range op for builtin isnormal
>>>
>>> The former patch adds optab for builtin isnormal. Thus builtin isnormal
>>> might not be folded at front end.  So the range op for isnormal is needed
>>> for value range analysis.  This patch adds range op for builtin isnormal.
>>>
>>> gcc/
>>> * gimple-range-op.cc (class cfn_isfinite): New.
>>> (op_cfn_finite): New variables.
>>> (gimple_range_op_handler::maybe_builtin_call): Handle
>>> CFN_BUILT_IN_ISFINITE.
>>> * value-range.h (class frange): Declare known_isnormal and
>>> known_isdenormal_or_zero.
>>> (frange::known_isnormal): Define.
>>> (frange::known_isdenormal_or_zero): Define.
>>>
>>> gcc/testsuite/
>>> * gcc/testsuite/gcc.dg/tree-ssa/range-isnormal.c: New test.
>>>
>>> patch.diff
>>> diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc
>>> index 5ec5c828fa4..6787f532f11 100644
>>> --- a/gcc/gimple-range-op.cc
>>> +++ b/gcc/gimple-range-op.cc
>>> @@ -1289,6 +1289,61 @@ public:
>>>}
>>>  } op_cfn_isfinite;
>>>
>>> +//Implement range operator for CFN_BUILT_IN_ISNORMAL
>>> +class cfn_isnormal :  public range_operator
>>> +{
>>> +public:
>>> +  using range_operator::fold_range;
>>> +  using range_operator::op1_range;
>>> +  virtual bool fold_range (irange &r, tree type, const frange &op1,
>>> +  const irange &, relation_trio) const override
>>> +  {
>>> +if (op1.undefined_p ())
>>> +  return false;
>>> +
>>> +if (op1.known_isnormal ())
>>> +  {
>>> +   wide_int one = wi::one (TYPE_PRECISION (type));
>>> +   r.set (type, one, one);
>>> +   return true;
>>> +  }
>>> +
>>> +if (op1.known_isnan ()
>>> +   || op1.known_isinf ()
>>> +   || op1.known_isdenormal_or_zero ())
>>> +  {
>>> +   r.set_zero (type);
>>> +   return true;
>>> +  }
>>> +
>>> +r.set_varying (type);
>>> +return true;
>>> +  }
>>> +  virtual bool op1_range (frange &r, tree type, const irange &lhs,
>>> + const frange &, relation_trio) const override
>>> +  {
>>> +if (lhs.undefined_p ())
>>> +  return false;
>>> +
>>> +if (lhs.zero_p ())
>>> +  {
>>> +   r.set_varying (type);
>>> +   return true;
>>> +  }
>>> +
>>> +if (!range_includes_zero_p (lhs))
>>> +  {
>>> +   nan_state nan (false);
>>> +   r.set (type, real_min_representable (type),
>>> +  real_max_representable (type), nan);
>>> +   return true;
>>> +  }
>>> +
>>> +r.set_varying (type);
>>> +return true;
>>> +  }
>>> +} op_cfn_isnormal;
>>> +
>>>  // Implement range operator for CFN_BUILT_IN_
>>>  class cfn_parity : public range_operator
>>>  {
>>> @@ -1391,6 +1446,11 @@ gimple_range_op_handler::maybe_builtin_call ()
>>>m_operator = &op_cfn_isfinite;
>>>break;
>>>
>>> +case CFN_BUILT_IN_ISNORMAL:
>>> +  m_op1 = gimple_call_arg (call, 0);
>>> +  m_operator = &op_cfn_isnormal;
>>> +  break;
>>> +
>>>  CASE_CFN_COPYSIGN_ALL:
>>>m_op1 = gimple_call_arg (call, 0);
>>>m_op2 = gimple_call_arg (call, 1);
>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/range-isnormal.c 
>>> b/gcc/testsuite/gcc.dg/tree-ssa/range-isnormal.c
>>> new file mode 100644
>>> index 000..c4df4d839b0
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/range-isnormal.c
>>> @@ -0,0 +1,37 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-O2 -fdump-tree-evrp" } */
>>> +
>>> +#include 
>>> +void link_error();
>>> +
>>> +void test1 (double x)
>>> +{
>>> +  if (x < __DBL_MAX__ && x > __DBL_MIN__ && !__builtin_isnormal (x))
>>> +link_error ();
>>> +
>>> +  if (x < -__DBL_MIN__ && x > -__DBL_MAX__ && !__builtin_isnormal (x))
>>> +link_error ();
>>> +}
>>> +
>>> +void test2 (float x)
>>> +{
>>> +  if (x < __FLT_MAX__ && x > __FLT_MIN__ && !__builtin_isnormal (x))
>>> +link_error ();
>>> +
>>> +  if (x < -__FLT_MIN__ && x > - __FLT_MAX__ && !__builtin_isnormal (x))
>>> +link_error ();
>>> +}
>>> +
>>> +void test3 (double x)
>

Ping^3 [PATCH-2v4] Value Range: Add range op for builtin isfinite

2024-07-01 Thread HAO CHEN GUI
Hi,
  Gently ping it.
https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653094.html

Thanks
Gui Haochen

在 2024/6/24 9:41, HAO CHEN GUI 写道:
> Hi,
>   Gently ping it.
> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653094.html
> 
> Thanks
> Gui Haochen
> 
> 在 2024/6/20 14:57, HAO CHEN GUI 写道:
>> Hi,
>>   Gently ping it.
>> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653094.html
>>
>> Thanks
>> Gui Haochen
>>
>> 在 2024/5/30 10:46, HAO CHEN GUI 写道:
>>> Hi,
>>>   This patch adds the range op for builtin isfinite.
>>>
>>>   Compared to previous version, the main change is to set the range to
>>> 1 if it's finite number otherwise to 0.
>>> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/652220.html
>>>
>>>   Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no
>>> regressions. Is it OK for the trunk?
>>>
>>> Thanks
>>> Gui Haochen
>>>
>>> ChangeLog
>>> Value Range: Add range op for builtin isfinite
>>>
>>> The former patch adds optab for builtin isfinite. Thus builtin isfinite
>>> might not be folded at front end.  So the range op for isfinite is needed
>>> for value range analysis.  This patch adds range op for builtin isfinite.
>>>
>>> gcc/
>>> * gimple-range-op.cc (class cfn_isfinite): New.
>>> (op_cfn_finite): New variables.
>>> (gimple_range_op_handler::maybe_builtin_call): Handle
>>> CFN_BUILT_IN_ISFINITE.
>>>
>>> gcc/testsuite/
>>> * gcc/testsuite/gcc.dg/tree-ssa/range-isfinite.c: New test.
>>>
>>> patch.diff
>>> diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc
>>> index 4e60a42eaac..5ec5c828fa4 100644
>>> --- a/gcc/gimple-range-op.cc
>>> +++ b/gcc/gimple-range-op.cc
>>> @@ -1233,6 +1233,62 @@ public:
>>>}
>>>  } op_cfn_isinf;
>>>
>>> +//Implement range operator for CFN_BUILT_IN_ISFINITE
>>> +class cfn_isfinite : public range_operator
>>> +{
>>> +public:
>>> +  using range_operator::fold_range;
>>> +  using range_operator::op1_range;
>>> +  virtual bool fold_range (irange &r, tree type, const frange &op1,
>>> +  const irange &, relation_trio) const override
>>> +  {
>>> +if (op1.undefined_p ())
>>> +  return false;
>>> +
>>> +if (op1.known_isfinite ())
>>> +  {
>>> +   wide_int one = wi::one (TYPE_PRECISION (type));
>>> +   r.set (type, one, one);
>>> +   return true;
>>> +  }
>>> +
>>> +if (op1.known_isnan ()
>>> +   || op1.known_isinf ())
>>> +  {
>>> +   r.set_zero (type);
>>> +   return true;
>>> +  }
>>> +
>>> +r.set_varying (type);
>>> +return true;
>>> +  }
>>> +  virtual bool op1_range (frange &r, tree type, const irange &lhs,
>>> + const frange &, relation_trio) const override
>>> +  {
>>> +if (lhs.undefined_p ())
>>> +  return false;
>>> +
>>> +if (lhs.zero_p ())
>>> +  {
>>> +   // The range is [-INF,-INF][+INF,+INF] NAN, but it can't be represented.
>>> +   // Set range to varying
>>> +   r.set_varying (type);
>>> +   return true;
>>> +  }
>>> +
>>> +if (!range_includes_zero_p (lhs))
>>> +  {
>>> +   nan_state nan (false);
>>> +   r.set (type, real_min_representable (type),
>>> +  real_max_representable (type), nan);
>>> +   return true;
>>> +  }
>>> +
>>> +r.set_varying (type);
>>> +return true;
>>> +  }
>>> +} op_cfn_isfinite;
>>> +
>>>  // Implement range operator for CFN_BUILT_IN_
>>>  class cfn_parity : public range_operator
>>>  {
>>> @@ -1330,6 +1386,11 @@ gimple_range_op_handler::maybe_builtin_call ()
>>>m_operator = &op_cfn_isinf;
>>>break;
>>>
>>> +case CFN_BUILT_IN_ISFINITE:
>>> +  m_op1 = gimple_call_arg (call, 0);
>>> +  m_operator = &op_cfn_isfinite;
>>> +  break;
>>> +
>>>  CASE_CFN_COPYSIGN_ALL:
>>>m_op1 = gimple_call_arg (call, 0);
>>>m_op2 = gimple_call_arg (call, 1);
>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/range-isfinite.c 
>>> b/gcc/testsuite/gcc.dg/tree-ssa/range-isfinite.c
>>> new file mode 100644
>>> index 000..f5dce0a0486
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/range-isfinite.c
>>> @@ -0,0 +1,31 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-O2 -fdump-tree-evrp" } */
>>> +
>>> +#include 
>>> +void link_error();
>>> +
>>> +void test1 (double x)
>>> +{
>>> +  if (x < __DBL_MAX__ && x > -__DBL_MAX__ && !__builtin_isfinite (x))
>>> +link_error ();
>>> +}
>>> +
>>> +void test2 (float x)
>>> +{
>>> +  if (x < __FLT_MAX__ && x > -__FLT_MAX__ && !__builtin_isfinite (x))
>>> +link_error ();
>>> +}
>>> +
>>> +void test3 (double x)
>>> +{
>>> +  if (__builtin_isfinite (x) && __builtin_isinf (x))
>>> +link_error ();
>>> +}
>>> +
>>> +void test4 (float x)
>>> +{
>>> +  if (__builtin_isfinite (x) && __builtin_isinf (x))
>>> +link_error ();
>>> +}
>>> +
>>> +/* { dg-final { scan-tree-dump-not "link_error" "evrp" } } */


Re: [PATCH] RISC-V: use fclass insns to implement isfinite and isnormal builtins

2024-07-01 Thread Jeff Law




On 6/30/24 6:46 PM, Vineet Gupta wrote:



On 6/30/24 06:59, Jeff Law wrote:

Any ideas on how I can keep this and then adjust rest of patterns.

Yea.  Drop the "SImode" references from the RTL template of the
expander.   Then you'll need to verify the modes in the C fragment that
generates code.  You'd want to test the mode of operand0 and the mode of
the UNSPEC.  If they aren't word_mode, then FAIL.


Testing specifically for word_mode doesn't work for int operand.

Oh, yea.  If it's always a CONST_INT, then it'll always be VOIDmode.





This works because those expanders are allowed to use FAIL.  Some
expanders aren't allowed to do that (they're supposed to be documented
appropriately in the internals manual).

In the matching define_insns, you can use X and adjust their names.


Sorry I'm still not sure how to use X in the define insn. It seems we
have to skip mode in match_operand specification there as well.
I'll post v2 and take it from there.
It's the  expander I was most focused on.   For the fclass case I'd 
indirect through an expander.   ie, have an expander without a model ike 
I've suggested for isfinite/isnormal, then match it with a define_insn 
that accepts X.


The define_insn's name can have an additional  suffix in that 
case to distinguish between the SI and DI variants.


jeff


Ping^2 [PATCHv2, rs6000] Optimize vector construction with two vector doubleword loads [PR103568]

2024-07-01 Thread HAO CHEN GUI
Hi,
 Gently ping it.
https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653180.html

Thanks
Gui Haochen


在 2024/6/20 15:01, HAO CHEN GUI 写道:
> Hi,
>  Gently ping it.
> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653180.html
> 
> Thanks
> Gui Haochen
> 
> 在 2024/5/31 11:25, HAO CHEN GUI 写道:
>> Hi,
>>   This patch optimizes vector construction with two vector doubleword loads.
>> It generates an optimal insn sequence as "xxlor" has lower latency than
>> "mtvsrdd" on Power10.
>>
>>   Compared with previous version, the main change is to use "isa" attribute
>> to guard "lxsd" and "lxsdx".
>> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653103.html
>>
>>   Bootstrapped and tested on powerpc64-linux BE and LE with no
>> regressions. OK for the trunk?
>>
>> Thanks
>> Gui Haochen
>>
>> ChangeLog
>> rs6000: Optimize vector construction with two vector doubleword loads
>>
>> When constructing a vector by two doublewords from memory, originally it
>> does
>>  ld 10,0(3)
>>  ld 9,0(4)
>>  mtvsrdd 34,9,10
>>
>> An optimal sequence on Power10 should be
>>  lxsd 0,0(4)
>>  lxvrdx 1,0,3
>>  xxlor 34,1,32
>>
>> This patch does this optimization by insn combine and split.
>>
>> gcc/
>>  PR target/103568
>>  * config/rs6000/vsx.md (vsx_ld_lowpart_zero_): New insn
>>  pattern.
>>  (vsx_ld_highpart_zero_): New insn pattern.
>>  (vsx_concat_mem_): New insn_and_split pattern.
>>
>> gcc/testsuite/
>>  PR target/103568
>>  * gcc.target/powerpc/pr103568.c: New test.
>>
>> patch.diff
>> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
>> index f135fa079bd..f9a2a260e89 100644
>> --- a/gcc/config/rs6000/vsx.md
>> +++ b/gcc/config/rs6000/vsx.md
>> @@ -1395,6 +1395,27 @@ (define_insn "vsx_ld_elemrev_v2di"
>>"lxvd2x %x0,%y1"
>>[(set_attr "type" "vecload")])
>>
>> +(define_insn "vsx_ld_lowpart_zero_"
>> +  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=v,wa")
>> +(vec_concat:VSX_D
>> +  (match_operand: 1 "memory_operand" "wY,Z")
>> +  (match_operand: 2 "zero_constant" "j,j")))]
>> +  ""
>> +  "@
>> +   lxsd %0,%1
>> +   lxsdx %x0,%y1"
>> +  [(set_attr "type" "vecload,vecload")
>> +   (set_attr "isa" "p9v,p7v")])
>> +
>> +(define_insn "vsx_ld_highpart_zero_"
>> +  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
>> +(vec_concat:VSX_D
>> +  (match_operand: 1 "zero_constant" "j")
>> +  (match_operand: 2 "memory_operand" "Z")))]
>> +  "TARGET_POWER10"
>> +  "lxvrdx %x0,%y2"
>> +  [(set_attr "type" "vecload")])
>> +
>>  (define_insn "vsx_ld_elemrev_v1ti"
>>[(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
>>  (vec_select:V1TI
>> @@ -3063,6 +3084,26 @@ (define_insn "vsx_concat_"
>>  }
>>[(set_attr "type" "vecperm,vecmove")])
>>
>> +(define_insn_and_split "vsx_concat_mem_"
>> +  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=v,wa")
>> +(vec_concat:VSX_D
>> +  (match_operand: 1 "memory_operand" "wY,Z")
>> +  (match_operand: 2 "memory_operand" "Z,Z")))]
>> +  "TARGET_POWER10 && can_create_pseudo_p ()"
>> +  "#"
>> +  "&& 1"
>> +  [(const_int 0)]
>> +{
>> +  rtx tmp1 = gen_reg_rtx (mode);
>> +  rtx tmp2 = gen_reg_rtx (mode);
>> +  emit_insn (gen_vsx_ld_highpart_zero_ (tmp1, CONST0_RTX 
>> (mode),
>> +  operands[1]));
>> +  emit_insn (gen_vsx_ld_lowpart_zero_ (tmp2, operands[2],
>> + CONST0_RTX (mode)));
>> +  emit_insn (gen_ior3 (operands[0], tmp1, tmp2));
>> +  DONE;
>> +})
>> +
>>  ;; Combiner patterns to allow creating XXPERMDI's to access either double
>>  ;; word element in a vector register.
>>  (define_insn "*vsx_concat__1"
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr103568.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr103568.c
>> new file mode 100644
>> index 000..b2a06fb2162
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr103568.c
>> @@ -0,0 +1,17 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
>> +
>> +vector double test (double *a, double *b)
>> +{
>> +  return (vector double) {*a, *b};
>> +}
>> +
>> +vector long long test1 (long long *a, long long *b)
>> +{
>> +  return (vector long long) {*a, *b};
>> +}
>> +
>> +/* { dg-final { scan-assembler-times {\mlxsd} 2 } } */
>> +/* { dg-final { scan-assembler-times {\mlxvrdx\M} 2 } } */
>> +/* { dg-final { scan-assembler-times {\mxxlor\M} 2 } } */
>> +


[PATCH 3/3] s390: Drop vcond{,u} expanders

2024-07-01 Thread Stefan Schulze Frielinghaus
Optabs vcond{,u} will be removed for GCC 15.  Since regtest shows no
fallout, dropping the expanders, now.

gcc/ChangeLog:

PR target/114189
* config/s390/vector.md (V_HW2): Remove.
(vcond): Remove.
(vcondu): Remove.
---
 Bootstrapped and regtested on s390.  Ok for mainline?

 gcc/config/s390/vector.md | 35 ---
 1 file changed, 35 deletions(-)

diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 0e57dd1650c..1caf732d1f9 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -27,14 +27,9 @@
V2SF V4SF V1DF V2DF V1TF V1TI TI])
 
 ; All modes directly supported by the hardware having full vector reg size
-; V_HW2 is for having two iterators expanding independently e.g. vcond.
-; It's similar to V_HW, but not fully identical: V1TI is not included, because
-; there are no 128-bit compares.
 (define_mode_iterator V_HW  [V16QI V8HI V4SI V2DI V1TI TI V2DF
 (V4SF "TARGET_VXE") (V1TF "TARGET_VXE")
 (TF "TARGET_VXE")])
-(define_mode_iterator V_HW2 [V16QI V8HI V4SI V2DI V2DF (V4SF "TARGET_VXE")
-(V1TF "TARGET_VXE") (TF "TARGET_VXE")])
 
 (define_mode_iterator VT_HW_HSDT [V8HI V4SI V4SF V2DI V2DF V1TI V1TF TI TF])
 (define_mode_iterator V_HW_HSD [V8HI V4SI (V4SF "TARGET_VXE") V2DI V2DF])
@@ -725,36 +720,6 @@
 }
 })
 
-(define_expand "vcond"
-  [(set (match_operand:V_HW 0 "register_operand" "")
-   (if_then_else:V_HW
-(match_operator 3 "vcond_comparison_operator"
-[(match_operand:V_HW2 4 "register_operand" "")
- (match_operand:V_HW2 5 "nonmemory_operand" "")])
-(match_operand:V_HW 1 "nonmemory_operand" "")
-(match_operand:V_HW 2 "nonmemory_operand" "")))]
-  "TARGET_VX && GET_MODE_NUNITS (mode) == GET_MODE_NUNITS 
(mode)"
-{
-  s390_expand_vcond (operands[0], operands[1], operands[2],
-GET_CODE (operands[3]), operands[4], operands[5]);
-  DONE;
-})
-
-(define_expand "vcondu"
-  [(set (match_operand:V_HW 0 "register_operand" "")
-   (if_then_else:V_HW
-(match_operator 3 "comparison_operator"
-[(match_operand:V_HW2 4 "register_operand" "")
- (match_operand:V_HW2 5 "nonmemory_operand" "")])
-(match_operand:V_HW 1 "nonmemory_operand" "")
-(match_operand:V_HW 2 "nonmemory_operand" "")))]
-  "TARGET_VX && GET_MODE_NUNITS (mode) == GET_MODE_NUNITS 
(mode)"
-{
-  s390_expand_vcond (operands[0], operands[1], operands[2],
-GET_CODE (operands[3]), operands[4], operands[5]);
-  DONE;
-})
-
 (define_expand "vcond_mask_"
   [(set (match_operand:VT 0 "register_operand" "")
(if_then_else:VT
-- 
2.45.2



[PATCH 0/3] Prepare and drop vcond expanders

2024-07-01 Thread Stefan Schulze Frielinghaus
This drops vcond expanders.  The first patch
"s390: Emulate vec_cmp{eq,gt,gtu} for 128-bit integers" is somewhat
independent of the other two, since we run already in ICEs.  However,
since after removing vcond expanders testsuite shows one additional
fallout without this patch, which is why I would like to make sure that
this patch lands first and included it in this series.

Stefan Schulze Frielinghaus (3):
  s390: Emulate vec_cmp{eq,gt,gtu} for 128-bit integers
  s390: Enable vcond_mask for 128-bit ops
  s390: Drop vcond{,u} expanders

 gcc/config/s390/vector.md | 156 --
 .../gcc.target/s390/vector/vec-cmp-emu-1.c|  35 
 .../gcc.target/s390/vector/vec-cmp-emu-2.c|  18 ++
 .../gcc.target/s390/vector/vec-cmp-emu-3.c|  17 ++
 4 files changed, 175 insertions(+), 51 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-cmp-emu-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-cmp-emu-2.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-cmp-emu-3.c

-- 
2.45.2



[PATCH V6 1/2] split complicate 64bit constant to memory

2024-07-01 Thread Jiufu Guo
Hi,

Sometimes, a complicated constant is built via 3(or more)
instructions.  Generally speaking, it would not be as fast
as loading it from the constant pool (as the discussions in
PR63281):
"ld" is one instruction.  If consider "address/toc" adjust,
we may count it as 2 instructions. And "pld" may need fewer
cycles.

As known, because the constant is load from memory by this
patch,  so this functionality may affect the cache missing.
While, IMHO, this patch would be still do the right thing.

Compare with the previous version:
This version:
1. added a parameter to control how complicate constant should
be put into the constant pool.
2. updated test cases, and to keep the orignal test point.

Boostrap & regtest pass on ppc64{,le}.
Is this ok for trunk?

BR,
Jeff (Jiufu Guo)

PR target/63281

gcc/ChangeLog:

* config/rs6000/rs6000.cc (rs6000_emit_set_const): Split constant to
memory for -m64.
* config/rs6000/rs6000.opt (rs6000-min-insns-constant-in-pool): New
parameter.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/const_anchors.c: Test final-rtl.
* gcc.target/powerpc/parall_5insn_const.c: Add option
--param=rs6000-min-insns-constant-in-pool=5 to keep the original test.
* gcc.target/powerpc/pr106550.c: Likewise.
* gcc.target/powerpc/pr106550_1.c: Likewise.
* gcc.target/powerpc/pr93012.c: Likewise.
* gcc.target/powerpc/pr87870.c: Update instruction counts.
* gcc.target/powerpc/pr63281.c: New test.


---
 gcc/config/rs6000/rs6000.cc   | 19 +++
 gcc/config/rs6000/rs6000.opt  |  5 +
 .../gcc.target/powerpc/const_anchors.c|  5 +++--
 .../gcc.target/powerpc/parall_5insn_const.c   |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr106550.c   |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr106550_1.c |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr63281.c| 11 +++
 gcc/testsuite/gcc.target/powerpc/pr87870.c|  5 -
 gcc/testsuite/gcc.target/powerpc/pr93012.c|  2 +-
 9 files changed, 46 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr63281.c

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 2046a831938..ec384e87868 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10240,6 +10240,25 @@ rs6000_emit_set_const (rtx dest, rtx source)
  c = sext_hwi (c, 32);
  emit_move_insn (lo, GEN_INT (c));
}
+
+  /* Use base_reg_operand to avoid spliting "r0=xxx" to "r0=[r0+off]"
+after RA when reusing the DEST register to build the value.  */
+  else if ((can_create_pseudo_p () || base_reg_operand (dest, mode))
+  && num_insns_constant (source, mode)
+   > rs6000_min_insns_constant_in_pool
+  && TARGET_64BIT)
+   {
+ rtx sym = force_const_mem (mode, source);
+ if (TARGET_TOC && SYMBOL_REF_P (XEXP (sym, 0))
+ && use_toc_relative_ref (XEXP (sym, 0), mode))
+   {
+ rtx toc = create_TOC_reference (XEXP (sym, 0), dest);
+ sym = gen_const_mem (mode, toc);
+ set_mem_alias_set (sym, get_TOC_alias_set ());
+   }
+
+ emit_move_insn (dest, sym);
+   }
   else
rs6000_emit_set_long_const (dest, c);
   break;
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index e8ca70340df..a1c0d1e89c5 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -679,3 +679,8 @@ default value is 4.
 Target Undocumented Joined UInteger Var(rs6000_vect_unroll_reduc_threshold) 
Init(1) Param
 When reduction factor computed for a loop exceeds the threshold specified by
 this parameter, prefer to unroll this loop.  The default value is 1.
+
+-param=rs6000-min-insns-constant-in-pool=
+Target Undocumented Joined UInteger Var(rs6000_min_insns_constant_in_pool) 
Init(2) IntegerRange(2, 5) Param
+The minimum instruction number of building a constant to force loading it from
+the constant pool.
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/powerpc/const_anchors.c 
b/gcc/testsuite/gcc.target/powerpc/const_anchors.c
index 542e2674b12..682e773d506 100644
--- a/gcc/testsuite/gcc.target/powerpc/const_anchors.c
+++ b/gcc/testsuite/gcc.target/powerpc/const_anchors.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target has_arch_ppc64 } } */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -fdump-rtl-final" } */
 
 #define C1 0x2351847027482577ULL
 #define C2 0x2351847027482578ULL
@@ -17,4 +17,5 @@ void __attribute__ ((noinline)) foo1 (long long *a, long long 
b)
 *a++ = C2;
 }
 
-/* { dg-final { scan-assembler-times {\maddi\M} 2 } } */
+/* { dg-final { scan-rtl-dump-times {\madddi3\M} 2 "final" } } */
+
diff --git a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c 
b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
index e3a9a7264cf..e39479bbf86 100644
--- a/gcc

[RFC/PATCH] isel: Fold more in gimple_expand_vec_cond_expr with andc/iorc

2024-07-01 Thread Kewen.Lin
Hi,

As PR115659 shows, assuming c = x CMP y, there are some
folding chances for patterns r = c ? 0/z : z/-1:
  - For r = c ? 0 : z, it can be folded into r = ~c & z.
  - For r = c ? z : -1, it can be folded into r = ~c | z.

But BIT_AND/BIT_IOR applied on one BIT_NOT operand is a
compound operation, I'm not sure if each target with
vector capability have a single vector instruction for it,
if no, it's arguable to consider it always beats vector
selection (like vector constant gets hoisted or combined
and selection has same latency as normal logical operation).
So IMHO we probably need to query target with new optabs.
So this patch is to introduce new optabs andc, iorc and its
corresponding internal functions BIT_{ANDC,IORC} (looking
for suggestion for naming optabs and ifns), and if targets
defines such optabs for vector modes, it means targets
support these hardware insns and should be not worse than
vector selection.  btw, the rs6000 changes are meant to
give an example for a target supporting andc/iorc.

Does this sound reasonable?

BR,
Kewen
-

PR tree-optimzation/115659

gcc/ChangeLog:

* config/rs6000/rs6000-builtins.def: Update some bif expanders by
replacing orc3 with iorc3.
* config/rs6000/rs6000-string.cc (expand_cmp_vec_sequence): Update gen
function by replacing orc3 with iorc3.
* config/rs6000/rs6000.md (orc3): Rename to ...
(iorc3): ... this.
* doc/md.texi: Document andcm3 and iorcm3.
* gimple-isel.cc (gimple_expand_vec_cond_expr): Add more foldings for
patterns x CMP y ? 0 : z and x CMP y ? z : -1.
* internal-fn.def (BIT_ANDC): New internal function.
(BIT_IORC): Likewise.
* optabs.def (andc, iorc): New optab.
---
 gcc/config/rs6000/rs6000-builtins.def | 24 
 gcc/config/rs6000/rs6000-string.cc|  2 +-
 gcc/config/rs6000/rs6000.md   |  2 +-
 gcc/doc/md.texi   | 10 ++
 gcc/gimple-isel.cc| 24 
 gcc/internal-fn.def   |  4 
 gcc/optabs.def|  2 ++
 7 files changed, 54 insertions(+), 14 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 3bc7fed6956..736890fe6cb 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2147,40 +2147,40 @@
 NEG_V2DI negv2di2 {}

   const vsc __builtin_altivec_orc_v16qi (vsc, vsc);
-ORC_V16QI orcv16qi3 {}
+ORC_V16QI iorcv16qi3 {}

   const vuc __builtin_altivec_orc_v16qi_uns (vuc, vuc);
-ORC_V16QI_UNS orcv16qi3 {}
+ORC_V16QI_UNS iorcv16qi3 {}

   const vsq __builtin_altivec_orc_v1ti (vsq, vsq);
-ORC_V1TI orcv1ti3 {}
+ORC_V1TI iorcv1ti3 {}

   const vuq __builtin_altivec_orc_v1ti_uns (vuq, vuq);
-ORC_V1TI_UNS orcv1ti3 {}
+ORC_V1TI_UNS iorcv1ti3 {}

   const vd __builtin_altivec_orc_v2df (vd, vd);
-ORC_V2DF orcv2df3 {}
+ORC_V2DF iorcv2df3 {}

   const vsll __builtin_altivec_orc_v2di (vsll, vsll);
-ORC_V2DI orcv2di3 {}
+ORC_V2DI iorcv2di3 {}

   const vull __builtin_altivec_orc_v2di_uns (vull, vull);
-ORC_V2DI_UNS orcv2di3 {}
+ORC_V2DI_UNS iorcv2di3 {}

   const vf __builtin_altivec_orc_v4sf (vf, vf);
-ORC_V4SF orcv4sf3 {}
+ORC_V4SF iorcv4sf3 {}

   const vsi __builtin_altivec_orc_v4si (vsi, vsi);
-ORC_V4SI orcv4si3 {}
+ORC_V4SI iorcv4si3 {}

   const vui __builtin_altivec_orc_v4si_uns (vui, vui);
-ORC_V4SI_UNS orcv4si3 {}
+ORC_V4SI_UNS iorcv4si3 {}

   const vss __builtin_altivec_orc_v8hi (vss, vss);
-ORC_V8HI orcv8hi3 {}
+ORC_V8HI iorcv8hi3 {}

   const vus __builtin_altivec_orc_v8hi_uns (vus, vus);
-ORC_V8HI_UNS orcv8hi3 {}
+ORC_V8HI_UNS iorcv8hi3 {}

   const vsc __builtin_altivec_vclzb (vsc);
 VCLZB clzv16qi2 {}
diff --git a/gcc/config/rs6000/rs6000-string.cc 
b/gcc/config/rs6000/rs6000-string.cc
index 917f5572a6d..c4c62e8e2f9 100644
--- a/gcc/config/rs6000/rs6000-string.cc
+++ b/gcc/config/rs6000/rs6000-string.cc
@@ -743,7 +743,7 @@ expand_cmp_vec_sequence (unsigned HOST_WIDE_INT 
bytes_to_compare,
  rtx cmp_combined = gen_reg_rtx (load_mode);
  emit_insn (gen_altivec_eqv16qi (cmp_res, s1data, s2data));
  emit_insn (gen_altivec_eqv16qi (cmp_zero, s1data, zero_reg));
- emit_insn (gen_orcv16qi3 (vec_result, cmp_zero, cmp_res));
+ emit_insn (gen_iorcv16qi3 (vec_result, cmp_zero, cmp_res));
  emit_insn (gen_altivec_vcmpequb_p (cmp_combined, vec_result, 
zero_reg));
}
}
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index a5d20594789..276a5c9cf2d 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7324,7 +7324,7 @@ (define_expand "nand3"

 ;; The canonical form is to have the negated element first, so we need to
 ;; reverse arguments.
-(define_expand "orc3"
+(define_expand 

[PATCH] isel: Fold more in gimple_expand_vec_cond_expr [PR115659]

2024-07-01 Thread Kewen.Lin
Hi,

As PR115659 shows, assuming c = x CMP y, there are some
folding chances for patterns r = c ? -1/z : z/0.

For r = c ? -1 : z, it can be folded into:
  - r = c | z (with ior_optab supported)
  - or r = c ? c : z

while for r = c ?  z : 0, it can be foled into:
  - r = c & z (with and_optab supported)
  - or r = c ? z : c

This patch is to teach ISEL to take care of them and also
remove the redundant gsi_replace as the caller of function
gimple_expand_vec_cond_expr will handle it.

Bootstrapped and regtested on x86_64-redhat-linux and
powerpc64{,le}-linux-gnu.

Is it ok for trunk?

BR,
Kewen
-
PR tree-optimization/115659

gcc/ChangeLog:

* gimple-isel.cc (gimple_expand_vec_cond_expr): Add more foldings for
patterns x CMP y ? -1 : z and x CMP y ? z : 0.
---
 gcc/gimple-isel.cc | 48 +++---
 1 file changed, 41 insertions(+), 7 deletions(-)

diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
index 54c1801038b..71af1a8cd97 100644
--- a/gcc/gimple-isel.cc
+++ b/gcc/gimple-isel.cc
@@ -240,16 +240,50 @@ gimple_expand_vec_cond_expr (struct function *fun, 
gimple_stmt_iterator *gsi,
can_compute_op0 = expand_vec_cmp_expr_p (op0a_type, op0_type,
 tcode);

- /* Try to fold x CMP y ? -1 : 0 to x CMP y.  */
  if (can_compute_op0
- && integer_minus_onep (op1)
- && integer_zerop (op2)
  && TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (TREE_TYPE (op0)))
{
- tree conv_op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (lhs), op0);
- gassign *new_stmt = gimple_build_assign (lhs, conv_op);
- gsi_replace (gsi, new_stmt, true);
- return new_stmt;
+ /* Assuming c = x CMP y.  */
+ bool op1_minus_onep = integer_minus_onep (op1);
+ bool op2_zerop = integer_zerop (op2);
+ tree vtype = TREE_TYPE (lhs);
+ machine_mode vmode = TYPE_MODE (vtype);
+ /* Try to fold r = c ? -1 : 0 to r = c.  */
+ if (op1_minus_onep && op2_zerop)
+   {
+ tree conv_op = build1 (VIEW_CONVERT_EXPR, vtype, op0);
+ return gimple_build_assign (lhs, conv_op);
+   }
+ /* Try to fold r = c ? -1 : z to r = c | z, or
+r = c ? c : z.  */
+ if (op1_minus_onep)
+   {
+ tree conv_op = build1 (VIEW_CONVERT_EXPR, vtype, op0);
+ tree new_op0 = make_ssa_name (vtype);
+ gassign *new_stmt = gimple_build_assign (new_op0, conv_op);
+ gsi_insert_seq_before (gsi, new_stmt, GSI_SAME_STMT);
+ if (optab_handler (ior_optab, vmode) != CODE_FOR_nothing)
+   /* r = c | z */
+   return gimple_build_assign (lhs, BIT_IOR_EXPR, new_op0,
+   op2);
+ /* r = c ? c : z */
+ op1 = new_op0;
+   }
+ /* Try to fold r = c ? z : 0 to r = c & z, or
+r = c ? z : c.  */
+ else if (op2_zerop)
+   {
+ tree conv_op = build1 (VIEW_CONVERT_EXPR, vtype, op0);
+ tree new_op0 = make_ssa_name (vtype);
+ gassign *new_stmt = gimple_build_assign (new_op0, conv_op);
+ gsi_insert_seq_before (gsi, new_stmt, GSI_SAME_STMT);
+ if (optab_handler (and_optab, vmode) != CODE_FOR_nothing)
+   /* r = c | z */
+   return gimple_build_assign (lhs, BIT_AND_EXPR, new_op0,
+   op1);
+ /* r = c ? z : c */
+ op2 = new_op0;
+   }
}

  /* When the compare has EH we do not want to forward it when
--
2.43.0


Re: [PATCH] isel: Fold more in gimple_expand_vec_cond_expr [PR115659]

2024-07-01 Thread Richard Biener
On Mon, Jul 1, 2024 at 8:16 AM Kewen.Lin  wrote:
>
> Hi,
>
> As PR115659 shows, assuming c = x CMP y, there are some
> folding chances for patterns r = c ? -1/z : z/0.
>
> For r = c ? -1 : z, it can be folded into:
>   - r = c | z (with ior_optab supported)
>   - or r = c ? c : z
>
> while for r = c ?  z : 0, it can be foled into:
>   - r = c & z (with and_optab supported)
>   - or r = c ? z : c
>
> This patch is to teach ISEL to take care of them and also
> remove the redundant gsi_replace as the caller of function
> gimple_expand_vec_cond_expr will handle it.

Yeah, not the nicest API ...

> Bootstrapped and regtested on x86_64-redhat-linux and
> powerpc64{,le}-linux-gnu.
>
> Is it ok for trunk?

Minor nit below

> BR,
> Kewen
> -
> PR tree-optimization/115659
>
> gcc/ChangeLog:
>
> * gimple-isel.cc (gimple_expand_vec_cond_expr): Add more foldings for
> patterns x CMP y ? -1 : z and x CMP y ? z : 0.
> ---
>  gcc/gimple-isel.cc | 48 +++---
>  1 file changed, 41 insertions(+), 7 deletions(-)
>
> diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
> index 54c1801038b..71af1a8cd97 100644
> --- a/gcc/gimple-isel.cc
> +++ b/gcc/gimple-isel.cc
> @@ -240,16 +240,50 @@ gimple_expand_vec_cond_expr (struct function *fun, 
> gimple_stmt_iterator *gsi,
> can_compute_op0 = expand_vec_cmp_expr_p (op0a_type, op0_type,
>  tcode);
>
> - /* Try to fold x CMP y ? -1 : 0 to x CMP y.  */
>   if (can_compute_op0
> - && integer_minus_onep (op1)
> - && integer_zerop (op2)
>   && TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (TREE_TYPE (op0)))
> {
> - tree conv_op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (lhs), op0);
> - gassign *new_stmt = gimple_build_assign (lhs, conv_op);
> - gsi_replace (gsi, new_stmt, true);
> - return new_stmt;
> + /* Assuming c = x CMP y.  */
> + bool op1_minus_onep = integer_minus_onep (op1);
> + bool op2_zerop = integer_zerop (op2);
> + tree vtype = TREE_TYPE (lhs);
> + machine_mode vmode = TYPE_MODE (vtype);
> + /* Try to fold r = c ? -1 : 0 to r = c.  */
> + if (op1_minus_onep && op2_zerop)
> +   {
> + tree conv_op = build1 (VIEW_CONVERT_EXPR, vtype, op0);
> + return gimple_build_assign (lhs, conv_op);
> +   }
> + /* Try to fold r = c ? -1 : z to r = c | z, or
> +r = c ? c : z.  */
> + if (op1_minus_onep)
> +   {
> + tree conv_op = build1 (VIEW_CONVERT_EXPR, vtype, op0);
> + tree new_op0 = make_ssa_name (vtype);
> + gassign *new_stmt = gimple_build_assign (new_op0, conv_op);
> + gsi_insert_seq_before (gsi, new_stmt, GSI_SAME_STMT);
> + if (optab_handler (ior_optab, vmode) != CODE_FOR_nothing)
> +   /* r = c | z */
> +   return gimple_build_assign (lhs, BIT_IOR_EXPR, new_op0,
> +   op2);
> + /* r = c ? c : z */
> + op1 = new_op0;

maybe better call it new_op1 then?  Or new_op.

> +   }
> + /* Try to fold r = c ? z : 0 to r = c & z, or
> +r = c ? z : c.  */
> + else if (op2_zerop)
> +   {
> + tree conv_op = build1 (VIEW_CONVERT_EXPR, vtype, op0);
> + tree new_op0 = make_ssa_name (vtype);
> + gassign *new_stmt = gimple_build_assign (new_op0, conv_op);
> + gsi_insert_seq_before (gsi, new_stmt, GSI_SAME_STMT);
> + if (optab_handler (and_optab, vmode) != CODE_FOR_nothing)
> +   /* r = c | z */
> +   return gimple_build_assign (lhs, BIT_AND_EXPR, new_op0,
> +   op1);
> + /* r = c ? z : c */
> + op2 = new_op0;

Likewise (new_op2 or also new_op).

OK with that nit fixed.

Thanks,
Richard.

> +   }
> }
>
>   /* When the compare has EH we do not want to forward it when
> --
> 2.43.0


Re: [RFC/PATCH] isel: Fold more in gimple_expand_vec_cond_expr with andc/iorc

2024-07-01 Thread Richard Biener
On Mon, Jul 1, 2024 at 8:17 AM Kewen.Lin  wrote:
>
> Hi,
>
> As PR115659 shows, assuming c = x CMP y, there are some
> folding chances for patterns r = c ? 0/z : z/-1:
>   - For r = c ? 0 : z, it can be folded into r = ~c & z.
>   - For r = c ? z : -1, it can be folded into r = ~c | z.
>
> But BIT_AND/BIT_IOR applied on one BIT_NOT operand is a
> compound operation, I'm not sure if each target with
> vector capability have a single vector instruction for it,
> if no, it's arguable to consider it always beats vector
> selection (like vector constant gets hoisted or combined
> and selection has same latency as normal logical operation).
> So IMHO we probably need to query target with new optabs.
> So this patch is to introduce new optabs andc, iorc and its
> corresponding internal functions BIT_{ANDC,IORC} (looking
> for suggestion for naming optabs and ifns), and if targets
> defines such optabs for vector modes, it means targets
> support these hardware insns and should be not worse than
> vector selection.  btw, the rs6000 changes are meant to
> give an example for a target supporting andc/iorc.
>
> Does this sound reasonable?

I think it's reasonable to have andc - there are quite some CPUs
that have this op on GPRs as well I think, called andn (but I don't
want to get into bike-shedding).  A corresponding iorc is then
a natural extension (likewise xorc).  AVX512 has a very powerful
vector ternlog (but no scalar andn).

I was surprised to not see an existing optab for andn.

So OK from my side in case there are no negative comments or
bikeshedding on the name.  I can't approve the rs6000 changes
though.

Thanks,
Richard.

> BR,
> Kewen
> -
>
> PR tree-optimzation/115659
>
> gcc/ChangeLog:
>
> * config/rs6000/rs6000-builtins.def: Update some bif expanders by
> replacing orc3 with iorc3.
> * config/rs6000/rs6000-string.cc (expand_cmp_vec_sequence): Update gen
> function by replacing orc3 with iorc3.
> * config/rs6000/rs6000.md (orc3): Rename to ...
> (iorc3): ... this.
> * doc/md.texi: Document andcm3 and iorcm3.
> * gimple-isel.cc (gimple_expand_vec_cond_expr): Add more foldings for
> patterns x CMP y ? 0 : z and x CMP y ? z : -1.
> * internal-fn.def (BIT_ANDC): New internal function.
> (BIT_IORC): Likewise.
> * optabs.def (andc, iorc): New optab.
> ---
>  gcc/config/rs6000/rs6000-builtins.def | 24 
>  gcc/config/rs6000/rs6000-string.cc|  2 +-
>  gcc/config/rs6000/rs6000.md   |  2 +-
>  gcc/doc/md.texi   | 10 ++
>  gcc/gimple-isel.cc| 24 
>  gcc/internal-fn.def   |  4 
>  gcc/optabs.def|  2 ++
>  7 files changed, 54 insertions(+), 14 deletions(-)
>
> diff --git a/gcc/config/rs6000/rs6000-builtins.def 
> b/gcc/config/rs6000/rs6000-builtins.def
> index 3bc7fed6956..736890fe6cb 100644
> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -2147,40 +2147,40 @@
>  NEG_V2DI negv2di2 {}
>
>const vsc __builtin_altivec_orc_v16qi (vsc, vsc);
> -ORC_V16QI orcv16qi3 {}
> +ORC_V16QI iorcv16qi3 {}
>
>const vuc __builtin_altivec_orc_v16qi_uns (vuc, vuc);
> -ORC_V16QI_UNS orcv16qi3 {}
> +ORC_V16QI_UNS iorcv16qi3 {}
>
>const vsq __builtin_altivec_orc_v1ti (vsq, vsq);
> -ORC_V1TI orcv1ti3 {}
> +ORC_V1TI iorcv1ti3 {}
>
>const vuq __builtin_altivec_orc_v1ti_uns (vuq, vuq);
> -ORC_V1TI_UNS orcv1ti3 {}
> +ORC_V1TI_UNS iorcv1ti3 {}
>
>const vd __builtin_altivec_orc_v2df (vd, vd);
> -ORC_V2DF orcv2df3 {}
> +ORC_V2DF iorcv2df3 {}
>
>const vsll __builtin_altivec_orc_v2di (vsll, vsll);
> -ORC_V2DI orcv2di3 {}
> +ORC_V2DI iorcv2di3 {}
>
>const vull __builtin_altivec_orc_v2di_uns (vull, vull);
> -ORC_V2DI_UNS orcv2di3 {}
> +ORC_V2DI_UNS iorcv2di3 {}
>
>const vf __builtin_altivec_orc_v4sf (vf, vf);
> -ORC_V4SF orcv4sf3 {}
> +ORC_V4SF iorcv4sf3 {}
>
>const vsi __builtin_altivec_orc_v4si (vsi, vsi);
> -ORC_V4SI orcv4si3 {}
> +ORC_V4SI iorcv4si3 {}
>
>const vui __builtin_altivec_orc_v4si_uns (vui, vui);
> -ORC_V4SI_UNS orcv4si3 {}
> +ORC_V4SI_UNS iorcv4si3 {}
>
>const vss __builtin_altivec_orc_v8hi (vss, vss);
> -ORC_V8HI orcv8hi3 {}
> +ORC_V8HI iorcv8hi3 {}
>
>const vus __builtin_altivec_orc_v8hi_uns (vus, vus);
> -ORC_V8HI_UNS orcv8hi3 {}
> +ORC_V8HI_UNS iorcv8hi3 {}
>
>const vsc __builtin_altivec_vclzb (vsc);
>  VCLZB clzv16qi2 {}
> diff --git a/gcc/config/rs6000/rs6000-string.cc 
> b/gcc/config/rs6000/rs6000-string.cc
> index 917f5572a6d..c4c62e8e2f9 100644
> --- a/gcc/config/rs6000/rs6000-string.cc
> +++ b/gcc/config/rs6000/rs6000-string.cc
> @@ -743,7 +743,7 @@ expand_cmp_vec_sequence (unsigned HOST_WIDE_INT 
> bytes_to_compare,
>   rtx cmp_combined = gen_reg_rtx (load_m

[PATCH V6 2/2] split complicate 64bit constant to memory for -m32 -mpowerpc64

2024-07-01 Thread Jiufu Guo
Hi,

For "-m32 -mpowerpc64", it is also ok to use fewer instruciton (p?ld)
to loading 64bit constant from memory. So, splitting the complicate 64bit
constant to constant pool should also work for this case.

Compare with previous version:
This version is using the new parameter to control what kind of complicate
constanst should be put into memory.

Bootstrap and regtest pass on ppc64{,le}.
Also no regression for "-m32 -mpowerpc64" variation on ppc64.
Is this ok for trunk?

BR,
Jeff(Jiufu) Guo

gcc/ChangeLog:

* config/rs6000/rs6000.cc (rs6000_emit_set_const): Split constant to
pool for "-m32 -mpowerpc64".

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/pr63281.c: Allow checking -m32.

---
 gcc/config/rs6000/rs6000.cc| 21 +++--
 gcc/testsuite/gcc.target/powerpc/pr63281.c |  2 +-
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index ec384e87868..c785fb20b1b 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10245,8 +10245,7 @@ rs6000_emit_set_const (rtx dest, rtx source)
 after RA when reusing the DEST register to build the value.  */
   else if ((can_create_pseudo_p () || base_reg_operand (dest, mode))
   && num_insns_constant (source, mode)
-   > rs6000_min_insns_constant_in_pool
-  && TARGET_64BIT)
+   > rs6000_min_insns_constant_in_pool)
{
  rtx sym = force_const_mem (mode, source);
  if (TARGET_TOC && SYMBOL_REF_P (XEXP (sym, 0))
@@ -10256,6 +10255,24 @@ rs6000_emit_set_const (rtx dest, rtx source)
  sym = gen_const_mem (mode, toc);
  set_mem_alias_set (sym, get_TOC_alias_set ());
}
+ else if (TARGET_32BIT)
+   {
+ /* After RA, reuse 'DEST' reg.  */
+ rtx addr = can_create_pseudo_p ()
+  ? gen_reg_rtx (Pmode)
+  : gen_rtx_REG (Pmode, REGNO (dest));
+ rtx sym_ref = XEXP (sym, 0);
+ if (flag_pic)
+   emit_move_insn (addr, sym_ref);
+ else
+   {
+ emit_move_insn (addr, gen_rtx_HIGH (Pmode, sym_ref));
+ emit_move_insn (addr, gen_rtx_LO_SUM (Pmode, addr, sym_ref));
+   }
+ rtx mem = gen_rtx_MEM (mode, addr);
+ MEM_COPY_ATTRIBUTES (mem, sym);
+ sym = mem;
+   }
 
  emit_move_insn (dest, sym);
}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr63281.c 
b/gcc/testsuite/gcc.target/powerpc/pr63281.c
index 9763a7181fc..16a93b78606 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr63281.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr63281.c
@@ -7,5 +7,5 @@ foo (unsigned long long *a)
   *a++ = 0x2351847027482577ULL;
 }
 
-/* { dg-final { scan-assembler-times {\mp?ld\M} 1 { target { lp64 } } } } */
+/* { dg-final { scan-assembler-times {\mp?ld\M} 1 { target { has_arch_ppc64 } 
} } } */
 
-- 
2.45.2



Publication in Journal of Scopus(Q2)

2024-07-01 Thread Shen Zhang
   *|MC:SUBJECT|* p{ margin:10px 0; padding:0; } table{ 
border-collapse:collapse; } h1,h2,h3,h4,h5,h6{ display:block; margin:0; 
padding:0; } img,a img{ border:0; height:auto; outline:none; 
text-decoration:none; } body,#bodyTable,#bodyCell{ height:100%; margin:0; 
padding:0; width:100%; } .mcnPreviewText{ display:none !important; } #outlook 
a{ padding:0; } img{ -ms-interpolation-mode:bicubic; } table{ 
mso-table-lspace:0pt; mso-table-rspace:0pt; } .ReadMsgBody{ width:100%; } 
.ExternalClass{ width:100%; } p,a,li,td,blockquote{ 
mso-line-height-rule:exactly; } a[href^=tel],a[href^=sms]{ color:inherit; 
cursor:default; text-decoration:none; } p,a,li,td,body,table,blockquote{ 
-ms-text-size-adjust:100%; -webkit-text-size-adjust:100%; } 
.ExternalClass,.ExternalClass p,.ExternalClass td,.ExternalClass 
div,.ExternalClass span,.ExternalClass font{ line-height:100%; } 
a[x-apple-data-detectors]{ color:inherit !important; text-decoration:none 
!important; font-size:inherit !important; font-family:inherit !important; 
font-weight:inherit !important; line-height:inherit !important; } #bodyCell{ 
padding:10px; } .templateContainer{ max-width:600px !important; } a.mcnButton{ 
display:block; } .mcnImage,.mcnRetinaImage{ vertical-align:bottom; } 
.mcnTextContent{ word-break:break-word; } .mcnTextContent img{ height:auto 
!important; } .mcnDividerBlock{ table-layout:fixed !important; } /* @tab Page 
@section Background Style @tip Set the background color and top border for your 
email. You may want to choose colors that match your company's branding. */ 
body,#bodyTable{ /*@editable*/background-color:#FAFAFA; } /* @tab Page @section 
Background Style @tip Set the background color and top border for your email. 
You may want to choose colors that match your company's branding. */ #bodyCell{ 
/*@editable*/border-top:0; } /* @tab Page @section Email Border @tip Set the 
border for your email. */ .templateContainer{ /*@editable*/border:0; } /* @tab 
Page @section Heading 1 @tip Set the styling for all first-level headings in 
your emails. These should be the largest of your headings. @style heading 1 */ 
h1{ /*@editable*/color:#202020; /*@editable*/font-family:Helvetica; 
/*@editable*/font-size:26px; /*@editable*/font-style:normal; 
/*@editable*/font-weight:bold; /*@editable*/line-height:125%; 
/*@editable*/letter-spacing:normal; /*@editable*/text-align:left; } /* @tab 
Page @section Heading 2 @tip Set the styling for all second-level headings in 
your emails. @style heading 2 */ h2{ /*@editable*/color:#202020; 
/*@editable*/font-family:Helvetica; /*@editable*/font-size:22px; 
/*@editable*/font-style:normal; /*@editable*/font-weight:bold; 
/*@editable*/line-height:125%; /*@editable*/letter-spacing:normal; 
/*@editable*/text-align:left; } /* @tab Page @section Heading 3 @tip Set the 
styling for all third-level headings in your emails. @style heading 3 */ h3{ 
/*@editable*/color:#202020; /*@editable*/font-family:Helvetica; 
/*@editable*/font-size:20px; /*@editable*/font-style:normal; 
/*@editable*/font-weight:bold; /*@editable*/line-height:125%; 
/*@editable*/letter-spacing:normal; /*@editable*/text-align:left; } /* @tab 
Page @section Heading 4 @tip Set the styling for all fourth-level headings in 
your emails. These should be the smallest of your headings. @style heading 4 */ 
h4{ /*@editable*/color:#202020; /*@editable*/font-family:Helvetica; 
/*@editable*/font-size:18px; /*@editable*/font-style:normal; 
/*@editable*/font-weight:bold; /*@editable*/line-height:125%; 
/*@editable*/letter-spacing:normal; /*@editable*/text-align:left; } /* @tab 
Preheader @section Preheader Style @tip Set the background color and borders 
for your email's preheader area. */ #templatePreheader{ 
/*@editable*/background-color:#fafafa; /*@editable*/background-image:none; 
/*@editable*/background-repeat:no-repeat; 
/*@editable*/background-position:center; /*@editable*/background-size:cover; 
/*@editable*/border-top:0; /*@editable*/border-bottom:0; 
/*@editable*/padding-top:0px; /*@editable*/padding-bottom:0px; } /* @tab 
Preheader @section Preheader Text @tip Set the styling for your email's 
preheader text. Choose a size and color that is easy to read. */ 
#templatePreheader .mcnTextContent,#templatePreheader .mcnTextContent p{ 
/*@editable*/color:#656565; /*@editable*/font-family:Helvetica; 
/*@editable*/font-size:12px; /*@editable*/line-height:150%; 
/*@editable*/text-align:left; } /* @tab Preheader @section Preheader Link @tip 
Set the styling for your email's preheader links. Choose a color that helps 
them stand out from your text. */ #templatePreheader .mcnTextContent 
a,#templatePreheader .mcnTextContent p a{ /*@editable*/color:#656565; 
/*@editable*/font-weight:normal; /*@editable*/text-decoration:underline; } /* 
@tab Header @section Header Style @tip Set the background color and borders for 
your email's header area. */ #templateHeader{ 
/*@editable*/background-color:#FF; /*@editable*/background-image:none; 
/*@editable*/backgro

[PING] Re: Updated musttail patchkit

2024-07-01 Thread Andi Kleen
Andi Kleen  writes:

I wanted to ping this patch kit to add musttail support for C/C++,
to enable future python versions and other users and keep up with clang. 

https://gcc.gnu.org/pipermail/gcc-patches/2024-June/thread.html#655447

It unfortunately touches various different parts of the compiler.
All the previous feedback has been addressed, except for
- cannot make it a warning because that would defeat the purpose
- cannot move all of the checking to expand time (would be a whole
scale rewrite of the whole mechanism)

These are RTL level:
https://gcc.gnu.org/pipermail/gcc-patches/2024-June/655448.html
(got some feedback from the two Richards and Jakub earlier)
https://gcc.gnu.org/pipermail/gcc-patches/2024-June/655450.html
(got some feedback from Andrew)

C++:
https://gcc.gnu.org/pipermail/gcc-patches/2024-June/655449.html
https://gcc.gnu.org/pipermail/gcc-patches/2024-June/655451.html
https://gcc.gnu.org/pipermail/gcc-patches/2024-June/655453.html
(C++, already approved)

C:
https://gcc.gnu.org/pipermail/gcc-patches/2024-June/655452.html
(C, got some feedback from Joseph, but never got finally approved) 

https://gcc.gnu.org/pipermail/gcc-patches/2024-June/655455.html

Unreviewed patches, touching both tree-ssa-tailcall and calls.c expand:
https://gcc.gnu.org/pipermail/gcc-patches/2024-June/655454.html
https://gcc.gnu.org/pipermail/gcc-patches/2024-June/655457.html
https://gcc.gnu.org/pipermail/gcc-patches/2024-June/655456.html
https://gcc.gnu.org/pipermail/gcc-patches/2024-June/655458.html
https://gcc.gnu.org/pipermail/gcc-patches/2024-June/655459.html

Thanks,
-Andi

> - Fix problems with encoding musttail in tree structure (Thanks Jakub and 
> Jason)
> - Fixes a miscompilation that would break bootstrap with 
> --enable-checking=release
> - Avoids a 0.8% compile time penalty at -O0 for the new musttail pass by 
> using a cfun flag
> that is discovered by tree-cfg
> - Enables translation of musttail error messages
> - Further improves error reporting, avoiding "other reasons" error messages
> for various cases and reporting the correct error in others.
> - Adjusted the test suite to powerpc sibcall limitations
> - Addressed C++ review feedback
> - Improves dump file output
> - Improves the documentation
> - Some random cleanups
> - Rebased on trunk
>
> Tested full bootstrap on x86_64-linux and powerpc64le-linux, as well
> as a x86_64 LTO profiled bootstrap and some x86_64 testing with
> --enable-release=checking.


Re: [PATCH] Fix test errors introduced with fix for PR115157

2024-07-01 Thread Thiago Jung Bauermann
Hello Martin,

Martin Uecker  writes:

> This should fix the test failures introduced by the fix for PR115157.
>
> Tested on x86_64 and also tested with -m32.
>
>
> Fix test errors introduced with fix for PR115157.
> 
> Fix tests introduced when fixing PR115157 that assume 
> sizeof(enum)==sizeof(int)
> by adding the flag -fno-short-enums.
> 
> gcc/testsuite/Changelog:
> * gcc.dg/enum-alias-1.c: Add flag.
> * gcc.dg/enum-alias-2.c: Add flag.
> * gcc.dg/enum-alias-3.c: Add flag.
> * gcc.dg/enum-alias-4.c: Add flag.

Thank you for the patch! It fixes the execution test failures but
unfortunately they still have excess errors failures due to an
unexpected linker warning:

spawn -ignore SIGHUP 
/home/tcwg-build/workspace/tcwg_gnu_0/abe/builds/destdir/x86_64-pc-linux-gnu/bin/arm-eabi-gcc
 
/home/tcwg-build/workspace/tcwg_gnu_0/abe/snapshots/gcc.git~master/gcc/testsuite/gcc.dg/enum-alias-1.c
 -fdiagnostics-plain-output -O2 -fno-short-enums -specs=rdimon.specs -lm -o 
./enum-alias-1.exe
/home/tcwg-build/workspace/tcwg_gnu_0/abe/builds/destdir/x86_64-pc-linux-gnu/lib/gcc/arm-eabi/15.0.0/../../../../arm-eabi/bin/ld:
 warning: /tmp/ccP9AJZd.o uses 32-bit enums yet the output is to use 
variable-size enums; use of enum values across objects may fail
⋮
FAIL: gcc.dg/enum-alias-1.c (test for excess errors)
Excess errors:
/home/tcwg-build/workspace/tcwg_gnu_0/abe/builds/destdir/x86_64-pc-linux-gnu/lib/gcc/arm-eabi/15.0.0/../../../../arm-eabi/bin/ld:
 warning: /tmp/ccP9AJZd.o uses 32-bit enums yet the output is to use 
variable-size enums; use of enum values across objects may fail

The same happens with gcc.dg/enum-alias-[234].c.

-- 
Thiago


Re: gcc: docs: Fix documentation of two hooks

2024-07-01 Thread Matthew Malcomson

Ping plus some extra people on Cc since I wasn't sure who to ask for review.
(Adding maintainers for `middle-end` plus Richard S).

N.b. I'd update the cover-letter to also mention that no existing 
implementation of `function_attribute_inlinable_p` uses "the current 
function" in any way.



On 4/8/24 11:34, Matthew Malcomson wrote:

The `function_attribute_inlinable_p` hook documentation described it
returning the value if it is OK to inline the provided fndecl into "the
current function".  AFAICS This hook is only called when
`current_function_decl` is the same as the `fndecl` argument that the
hook is given, hence asking whether `fndecl` can be inlined into "the
current function" doesn't make sense.
Update the documentation to match this understanding.

The `unspec_may_trap_p` documentation mentioned applying to either
`unspec` or `unspec_volatile`.  AFAICS this hook is only used for
`unspec` codes since c84a808e493a, so I removed the mention of
`unspec_volatile`.

gcc/ChangeLog:

* doc/tm.texi (function_attribute_inlinable_p,
unspec_may_trap_p): Update documentation.
* target.def (function_attribute_inlinable_p,
unspec_may_trap_p): Update documentation.

--
N.b. not entirely sure who to ask for review, went with docs maintainers, but
if that's incorrect please do redirect me.

### Attachment also inlined for ease of reply###


diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 
c8b8b126b2424b6552f824ba42ac329cfaf84d84..f0051f0ae1e9444d5d585135c90a68ca760c2fbd
 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -10752,10 +10752,10 @@ attribute handlers.  So far this only affects the 
@var{noinit} and
  
  @deftypefn {Target Hook} bool TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P (const_tree @var{fndecl})

  @cindex inlining
-This target hook returns @code{true} if it is OK to inline @var{fndecl}
-into the current function, despite its having target-specific
-attributes, @code{false} otherwise.  By default, if a function has a
-target specific attribute attached to it, it will not be inlined.
+This target hook returns @code{false} if the target-specific attributes on
+@var{fndecl} always block it getting inlined, @code{true} otherwise.  By
+default, if a function has a target specific attribute attached to it, it
+will not be inlined.
  @end deftypefn
  
  @deftypefn {Target Hook} bool TARGET_OPTION_VALID_ATTRIBUTE_P (tree @var{fndecl}, tree @var{name}, tree @var{args}, int @var{flags})

@@ -12245,12 +12245,10 @@ allocation.
  @end deftypefn
  
  @deftypefn {Target Hook} int TARGET_UNSPEC_MAY_TRAP_P (const_rtx @var{x}, unsigned @var{flags})

-This target hook returns nonzero if @var{x}, an @code{unspec} or
-@code{unspec_volatile} operation, might cause a trap.  Targets can use
-this hook to enhance precision of analysis for @code{unspec} and
-@code{unspec_volatile} operations.  You may call @code{may_trap_p_1}
-to analyze inner elements of @var{x} in which case @var{flags} should be
-passed along.
+This target hook returns nonzero if @var{x}, an @code{unspec} might cause
+a trap.  Targets can use this hook to enhance precision of analysis for
+@code{unspec} operations.  You may call @code{may_trap_p_1} to analyze inner
+elements of @var{x} in which case @var{flags} should be passed along.
  @end deftypefn
  
  @deftypefn {Target Hook} void TARGET_SET_CURRENT_FUNCTION (tree @var{decl})

diff --git a/gcc/target.def b/gcc/target.def
index 
fdad7bbc93e2ad8aea30336d5cd4af67801e9c74..2b2a6c11807eff228788fae1cd1370e8971fbf3e
 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2314,10 +2314,10 @@ attribute handlers.  So far this only affects the 
@var{noinit} and\n\
  DEFHOOK
  (function_attribute_inlinable_p,
   "@cindex inlining\n\
-This target hook returns @code{true} if it is OK to inline @var{fndecl}\n\
-into the current function, despite its having target-specific\n\
-attributes, @code{false} otherwise.  By default, if a function has a\n\
-target specific attribute attached to it, it will not be inlined.",
+This target hook returns @code{false} if the target-specific attributes on\n\
+@var{fndecl} always block it getting inlined, @code{true} otherwise.  By\n\
+default, if a function has a target specific attribute attached to it, it\n\
+will not be inlined.",
   bool, (const_tree fndecl),
   hook_bool_const_tree_false)
  
@@ -4057,12 +4057,10 @@ allocation.",

 FLAGS has the same meaning as in rtlanal.cc: may_trap_p_1.  */
  DEFHOOK
  (unspec_may_trap_p,
- "This target hook returns nonzero if @var{x}, an @code{unspec} or\n\
-@code{unspec_volatile} operation, might cause a trap.  Targets can use\n\
-this hook to enhance precision of analysis for @code{unspec} and\n\
-@code{unspec_volatile} operations.  You may call @code{may_trap_p_1}\n\
-to analyze inner elements of @var{x} in which case @var{flags} should be\n\
-passed along.",
+ "This target hook returns nonzero if @var{x}, an @code{unspec} might cause\n\
+a

Re: [PATCH] c++: DR2627, Bit-fields and narrowing conversions [PR94058]

2024-07-01 Thread Jason Merrill

On 6/28/24 7:00 PM, Marek Polacek wrote:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?


OK.


-- >8 --
This DR (https://cplusplus.github.io/CWG/issues/2627.html) says that
even if we are converting from an integer type or unscoped enumeration type
to an integer type that cannot represent all the values of the original
type, it's not narrowing if "the source is a bit-field whose width w is
less than that of its type (or, for an enumeration type, its underlying
type) and the target type can represent all the values of a hypothetical
extended integer type with width w and with the same signedness as the
original type".

DR 2627
PR c++/94058
PR c++/104392

gcc/cp/ChangeLog:

* typeck2.cc (check_narrowing): Don't warn if the conversion isn't
narrowing as per DR 2627.

gcc/testsuite/ChangeLog:

* g++.dg/DRs/dr2627.C: New test.
* g++.dg/cpp0x/Wnarrowing22.C: New test.
* g++.dg/cpp2a/spaceship-narrowing1.C: New test.
* g++.dg/cpp2a/spaceship-narrowing2.C: New test.
---
  gcc/cp/typeck2.cc | 12 +
  gcc/testsuite/g++.dg/DRs/dr2627.C | 13 +
  gcc/testsuite/g++.dg/cpp0x/Wnarrowing22.C | 49 +++
  .../g++.dg/cpp2a/spaceship-narrowing1.C   | 34 +
  .../g++.dg/cpp2a/spaceship-narrowing2.C   | 26 ++
  5 files changed, 134 insertions(+)
  create mode 100644 gcc/testsuite/g++.dg/DRs/dr2627.C
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/Wnarrowing22.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/spaceship-narrowing1.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/spaceship-narrowing2.C

diff --git a/gcc/cp/typeck2.cc b/gcc/cp/typeck2.cc
index 7782f38da43..30a6fbe95c9 100644
--- a/gcc/cp/typeck2.cc
+++ b/gcc/cp/typeck2.cc
@@ -1012,6 +1012,18 @@ check_narrowing (tree type, tree init, tsubst_flags_t 
complain,
if (TREE_CODE (ftype) == ENUMERAL_TYPE)
/* Check for narrowing based on the values of the enumeration. */
ftype = ENUM_UNDERLYING_TYPE (ftype);
+  /* Undo convert_bitfield_to_declared_type (STRIP_NOPS isn't enough).  */
+  tree op = init;
+  while (CONVERT_EXPR_P (op))
+   op = TREE_OPERAND (op, 0);
+  /* Core 2627 says that we shouldn't warn when "the source is a bit-field
+whose width w is less than that of its type (or, for an enumeration
+type, its underlying type) and the target type can represent all the
+values of a hypothetical extended integer type with width w and with
+the same signedness as the original type".  */
+  if (is_bitfield_expr_with_lowered_type (op)
+ && TYPE_PRECISION (TREE_TYPE (op)) < TYPE_PRECISION (ftype))
+   ftype = TREE_TYPE (op);
if ((tree_int_cst_lt (TYPE_MAX_VALUE (type),
TYPE_MAX_VALUE (ftype))
   || tree_int_cst_lt (TYPE_MIN_VALUE (ftype),
diff --git a/gcc/testsuite/g++.dg/DRs/dr2627.C 
b/gcc/testsuite/g++.dg/DRs/dr2627.C
new file mode 100644
index 000..fe7f28613ca
--- /dev/null
+++ b/gcc/testsuite/g++.dg/DRs/dr2627.C
@@ -0,0 +1,13 @@
+// DR 2627 - Bit-fields and narrowing conversions
+// { dg-do compile { target c++20 } }
+
+#include 
+
+struct C {
+  long long i : 8;
+};
+
+void f() {
+  C x{1}, y{2};
+  x.i <=> y.i;
+}
diff --git a/gcc/testsuite/g++.dg/cpp0x/Wnarrowing22.C 
b/gcc/testsuite/g++.dg/cpp0x/Wnarrowing22.C
new file mode 100644
index 000..dd30451a7cc
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/Wnarrowing22.C
@@ -0,0 +1,49 @@
+// DR 2627 - Bit-fields and narrowing conversions
+// PR c++/94058
+// { dg-do compile { target c++11 } }
+// { dg-options "-Wno-error=narrowing" }
+
+using int64_t = __INT64_TYPE__;
+using int32_t = __INT32_TYPE__;
+
+struct A {
+  int64_t i1 : __CHAR_BIT__;
+  int64_t i2 : sizeof (int32_t) * __CHAR_BIT__ - 1;
+  int64_t i3 : sizeof (int32_t) * __CHAR_BIT__;
+  int64_t i4 : sizeof (int32_t) * __CHAR_BIT__ + 1;
+  int64_t i5 : sizeof (int64_t) * __CHAR_BIT__ - 1;
+  int64_t i6 : sizeof (int64_t) * __CHAR_BIT__;
+} a;
+
+int32_t i1{a.i1};
+int32_t i2{a.i2};
+int32_t i3{a.i3};
+int32_t i4{a.i4}; // { dg-warning "narrowing conversion" }
+int32_t i5{a.i5}; // { dg-warning "narrowing conversion" }
+int32_t i6{a.i6}; // { dg-warning "narrowing conversion" }
+
+struct B {
+  bool b1 : sizeof (bool) * __CHAR_BIT__;
+  bool b2 : sizeof (bool);
+} b;
+
+signed char b1{b.b1};
+signed char b2{b.b2};
+
+enum E : int64_t { E1 };
+
+struct C {
+  E e1 : __CHAR_BIT__;
+  E e2 : sizeof (int32_t) * __CHAR_BIT__ - 1;
+  E e3 : sizeof (int32_t) * __CHAR_BIT__;
+  E e4 : sizeof (int32_t) * __CHAR_BIT__ + 1;
+  E e5 : sizeof (int64_t) * __CHAR_BIT__ - 1;
+  E e6 : sizeof (int64_t) * __CHAR_BIT__;
+} c;
+
+int32_t e1{c.e1};
+int32_t e2{c.e2};
+int32_t e3{c.e3};
+int32_t e4{c.e4}; // { dg-warning "narrowing conversion" }
+int32_t e5{c.e5}; // { dg-warning "narrowing conversion" }
+int32_t e6{c.e6}; // { dg-warning "narrowing conversio

Re: [PATCH] c++: Relax too strict assert in stabilize_expr [PR111160]

2024-07-01 Thread Jason Merrill

On 6/26/24 3:00 PM, Simon Martin wrote:

The case in the ticket is an ICE on invalid due to an assert in stabilize_expr,
but the underlying issue can actually trigger on this *valid* code:

=== cut here ===
struct TheClass {
   TheClass() {}
   TheClass(volatile TheClass& t) {}
   TheClass operator=(volatile TheClass& t) volatile { return t; }
};
void the_func() {
   volatile TheClass x, y, z;
   (false ? x : y) = z;
}
=== cut here ===

The problem is that stabilize_expr asserts that it returns an expression
without TREE_SIDE_EFFECTS, which can't be if the involved type is volatile.

This patch relaxes the assert to accept having TREE_THIS_VOLATILE on the
returned expression.

Successfully tested on x86_64-pc-linux-gnu.


OK.


PR c++/60

gcc/cp/ChangeLog:

* tree.cc (stabilize_expr): Stabilized expressions can have
TREE_SIDE_EFFECTS if they're volatile.

gcc/testsuite/ChangeLog:

* g++.dg/overload/error8.C: New test.
* g++.dg/overload/volatile2.C: New test.

---
  gcc/cp/tree.cc|  2 +-
  gcc/testsuite/g++.dg/overload/error8.C|  9 +
  gcc/testsuite/g++.dg/overload/volatile2.C | 12 
  3 files changed, 22 insertions(+), 1 deletion(-)
  create mode 100644 gcc/testsuite/g++.dg/overload/error8.C
  create mode 100644 gcc/testsuite/g++.dg/overload/volatile2.C

diff --git a/gcc/cp/tree.cc b/gcc/cp/tree.cc
index 28648c14c6d..dfd4a3a948b 100644
--- a/gcc/cp/tree.cc
+++ b/gcc/cp/tree.cc
@@ -5969,7 +5969,7 @@ stabilize_expr (tree exp, tree* initp)
  }
*initp = init_expr;
  
-  gcc_assert (!TREE_SIDE_EFFECTS (exp));

+  gcc_assert (!TREE_SIDE_EFFECTS (exp) || TREE_THIS_VOLATILE (exp));
return exp;
  }
  
diff --git a/gcc/testsuite/g++.dg/overload/error8.C b/gcc/testsuite/g++.dg/overload/error8.C

new file mode 100644
index 000..a7e745860e0
--- /dev/null
+++ b/gcc/testsuite/g++.dg/overload/error8.C
@@ -0,0 +1,9 @@
+// PR c++/60
+// { dg-do compile { target c++11 } }
+
+class TheClass {}; // { dg-error "discards|bind|discards|bind" }
+void the_func() {
+  TheClass x;
+  volatile TheClass y;
+  (false ? x : x) = y; // { dg-error "ambiguous|ambiguous" }
+}
diff --git a/gcc/testsuite/g++.dg/overload/volatile2.C 
b/gcc/testsuite/g++.dg/overload/volatile2.C
new file mode 100644
index 000..9f27357aed6
--- /dev/null
+++ b/gcc/testsuite/g++.dg/overload/volatile2.C
@@ -0,0 +1,12 @@
+// PR c++/60
+// { dg-do compile { target c++11 } }
+
+struct TheClass {
+  TheClass() {}
+  TheClass(volatile TheClass& t) {}
+  TheClass operator=(volatile TheClass& t) volatile { return t; }
+};
+void the_func() {
+  volatile TheClass x, y, z;
+  (false ? x : y) = z;
+}




Re: [PATCH] c++: ICE with computed gotos [PR115469]

2024-07-01 Thread Jason Merrill

On 6/26/24 6:04 PM, Marek Polacek wrote:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
This is a low-prio crash on invalid code where we ICE on a VAR_DECL
with erroneous type.  I thought I'd try to avoid putting such decls
into ->names and ->names_in_scope but that sounds riskier than the
following cleanup.

PR c++/115469

gcc/cp/ChangeLog:

* decl.cc (decl_with_nontrivial_dtor_p): New.


This name doesn't suggest non-static variable to me.  Maybe 
automatic_var_with...?


While we're at it, we should also avoid complaining about thread-local 
by checking decl_storage_duration == dk_auto, since [stmt.dcl]/2 is 
specifically about automatic.



(poplevel_named_label_1): Use it.
(check_goto_1): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/label17.C: New test.
---
  gcc/cp/decl.cc | 19 +++
  gcc/testsuite/g++.dg/ext/label17.C | 18 ++
  2 files changed, 33 insertions(+), 4 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/ext/label17.C

diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index 03deb1493a4..e5696079c28 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -514,6 +514,19 @@ level_for_consteval_if (cp_binding_level *b)
  && IF_STMT_CONSTEVAL_P (b->this_entity));
  }
  
+/* True if T is a non-static VAR_DECL that has a non-trivial destructor.  */

+
+static bool
+decl_with_nontrivial_dtor_p (const_tree t)
+{
+  if (error_operand_p (t))
+return false;
+
+  return (VAR_P (t)
+ && !TREE_STATIC (t)
+ && TYPE_HAS_NONTRIVIAL_DESTRUCTOR (TREE_TYPE (t)));
+}
+
  /* Update data for defined and undefined labels when leaving a scope.  */
  
  int

@@ -575,8 +588,7 @@ poplevel_named_label_1 (named_label_entry **slot, 
cp_binding_level *bl)
if (bl->kind == sk_catch)
  vec_safe_push (cg, get_identifier ("catch"));
for (tree d = use->names_in_scope; d; d = DECL_CHAIN (d))
- if (TREE_CODE (d) == VAR_DECL && !TREE_STATIC (d)
- && TYPE_HAS_NONTRIVIAL_DESTRUCTOR (TREE_TYPE (d)))
+ if (decl_with_nontrivial_dtor_p (d))
vec_safe_push (cg, d);
  }
  
@@ -4003,8 +4015,7 @@ check_goto_1 (named_label_entry *ent, bool computed)

  tree end = b == level ? names : NULL_TREE;
  for (tree d = b->names; d != end; d = DECL_CHAIN (d))
{
- if (TREE_CODE (d) == VAR_DECL && !TREE_STATIC (d)
- && TYPE_HAS_NONTRIVIAL_DESTRUCTOR (TREE_TYPE (d)))
+ if (decl_with_nontrivial_dtor_p (d))
{
  if (!identified)
{
diff --git a/gcc/testsuite/g++.dg/ext/label17.C 
b/gcc/testsuite/g++.dg/ext/label17.C
new file mode 100644
index 000..076ef1f798e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/label17.C
@@ -0,0 +1,18 @@
+// PR c++/115469
+// { dg-do compile { target indirect_jumps } }
+// { dg-options "" }
+
+void
+fn1 ()
+{
+  b = &&c;// { dg-error "not declared|not defined" }
+  goto *0;
+}
+
+void
+fn2 ()
+{
+c:
+  b = &&c;  // { dg-error "not declared" }
+  goto *0;
+}

base-commit: 0731985920cdeeeb028f03ddb8a7f035565c1594




Re: [PATCH] build: Fix "make install" for MinGW

2024-07-01 Thread Jason Merrill

On 6/30/24 5:09 PM, Lewis Hyatt wrote:

Hello-

I noticed this while trying to test another patch on Windows (using the
MSYS2 environment). Tested that it fixes the issue for x86_64-w64-mingw32
and doesn't affect anything for x86_64-pc-linux-gnu. It looks like the same
fix for C was applied back in r11-702. OK? Thanks...


OK.


-Lewis

-- >8 --

Since r8-4925, the "make install" recipe generates a path which can start
with "//", causing problems for some Windows environments. Fix by removing
the redundant slash.
---
  gcc/cp/Make-lang.in | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/cp/Make-lang.in b/gcc/cp/Make-lang.in
index 026cf8d7088..e792ea4ddf3 100644
--- a/gcc/cp/Make-lang.in
+++ b/gcc/cp/Make-lang.in
@@ -344,7 +344,7 @@ c++.install-plugin: installdirs
  # Install import library.
  ifeq ($(plugin_implib),yes)
$(mkinstalldirs) $(DESTDIR)$(plugin_resourcesdir)
-   $(INSTALL_DATA) cc1plus$(exeext).a 
$(DESTDIR)/$(plugin_resourcesdir)/cc1plus$(exeext).a
+   $(INSTALL_DATA) cc1plus$(exeext).a 
$(DESTDIR)$(plugin_resourcesdir)/cc1plus$(exeext).a
  endif
  
  c++.uninstall:






Re: [x86 PATCH] Add additional variant of bswaphisi2_lowpart peephole2.

2024-07-01 Thread Uros Bizjak
On Mon, Jul 1, 2024 at 3:20 PM Roger Sayle  wrote:
>
>
> This patch adds an additional variation of the peephole2 used to convert
> bswaphisi2_lowpart into rotlhi3_1_slp, which converts xchgb %ah,%al into
> rotw if the flags register isn't live.  The motivating example is:
>
> void ext(int x);
> void foo(int x)
> {
>   ext((x&~0x)|((x>>8)&0xff)|((x&0xff)<<8));
> }
>
> where GCC with -O2 currently produces:
>
> foo:movl%edi, %eax
> rolw$8, %ax
> movl%eax, %edi
> jmp ext
>
> The issue is that the original xchgb (bswaphisi2_lowpart) can only be
> performed in "Q" registers that allow the %?h register to be used, so
> reload generates the above two movl.  However, it's later in peephole2
> where we see that CC_FLAGS can be clobbered, so we can use a rotate word,
> which is more forgiving with register allocations.  With the additional
> peephole2 proposed here, we now generate:
>
> foo:rolw$8, %di
> jmp ext
>
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32}
> with no new failures.  Ok for mainline?
>
>
> 2024-07-01  Roger Sayle  
>
> gcc/ChangeLog
> * config/i386/i386.md (bswaphisi2_lowpart peephole2): New
> peephole2 variant to eliminate register shuffling.
>
> gcc/testsuite/ChangeLog
> * gcc.target/i386/xchg-4.c: New test case.

OK.

Thanks,
Uros.

>
>
> Thanks again,
> Roger
> --
>


Re: [PATCH] c++: unresolved overload with comma op [PR115430]

2024-07-01 Thread Jason Merrill

On 6/26/24 11:42 AM, Marek Polacek wrote:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?


OK.


-- >8 --
This works:

   template
   int Func(T);
   typedef int (*funcptrtype)(int);
   funcptrtype fp0 = &Func;

but this doesn't:

   funcptrtype fp2 = (0, &Func);

because we only call resolve_nondeduced_context on the LHS (via
convert_to_void) but not on the RHS, so cp_build_compound_expr's
type_unknown_p check issues an error.

PR c++/115430

gcc/cp/ChangeLog:

* typeck.cc (cp_build_compound_expr): Call resolve_nondeduced_context
on RHS.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/noexcept41.C: Remove dg-error.
* g++.dg/overload/addr3.C: New test.
---
  gcc/cp/typeck.cc|  4 +++-
  gcc/testsuite/g++.dg/cpp0x/noexcept41.C |  2 +-
  gcc/testsuite/g++.dg/overload/addr3.C   | 24 
  3 files changed, 28 insertions(+), 2 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/overload/addr3.C

diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
index 50f48768a95..55ee867d329 100644
--- a/gcc/cp/typeck.cc
+++ b/gcc/cp/typeck.cc
@@ -8157,6 +8157,8 @@ cp_build_compound_expr (tree lhs, tree rhs, 
tsubst_flags_t complain)
return rhs;
  }
  
+  rhs = resolve_nondeduced_context (rhs, complain);

+
if (type_unknown_p (rhs))
  {
if (complain & tf_error)
@@ -8164,7 +8166,7 @@ cp_build_compound_expr (tree lhs, tree rhs, 
tsubst_flags_t complain)
  "no context to resolve type of %qE", rhs);
return error_mark_node;
  }
-
+
tree ret = build2 (COMPOUND_EXPR, TREE_TYPE (rhs), lhs, rhs);
if (eptype)
  ret = build1 (EXCESS_PRECISION_EXPR, eptype, ret);
diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept41.C 
b/gcc/testsuite/g++.dg/cpp0x/noexcept41.C
index 4cd3d8d7854..7c65cebb618 100644
--- a/gcc/testsuite/g++.dg/cpp0x/noexcept41.C
+++ b/gcc/testsuite/g++.dg/cpp0x/noexcept41.C
@@ -9,4 +9,4 @@ template  struct a {
  };
  template  auto f(d &&, c &&) -> decltype(declval);
  struct e {};
-static_assert((e{}, declval>),""); // { dg-error "no context to resolve 
type" }
+static_assert((e{}, declval>),"");
diff --git a/gcc/testsuite/g++.dg/overload/addr3.C 
b/gcc/testsuite/g++.dg/overload/addr3.C
new file mode 100644
index 000..b203326de32
--- /dev/null
+++ b/gcc/testsuite/g++.dg/overload/addr3.C
@@ -0,0 +1,24 @@
+// PR c++/115430
+// { dg-do compile }
+
+template
+int Func(T);
+typedef int (*funcptrtype)(int);
+funcptrtype fp0 = &Func;
+funcptrtype fp1 = +&Func;
+funcptrtype fp2 = (0, &Func);
+funcptrtype fp3 = (0, +&Func);
+funcptrtype fp4 = (0, 1, &Func);
+
+template
+void
+g ()
+{
+  funcptrtype fp5 = (0, &Func);
+}
+
+void
+f ()
+{
+  g();
+}

base-commit: 47b68cda2c4afe32e84c5f18da0196c39e5e0edf




Re: gcc: docs: Fix documentation of two hooks

2024-07-01 Thread Sandra Loosemore

On 7/1/24 11:39, Matthew Malcomson wrote:
Ping plus some extra people on Cc since I wasn't sure who to ask for 
review.

(Adding maintainers for `middle-end` plus Richard S).



gcc/ChangeLog:

* doc/tm.texi (function_attribute_inlinable_p,
unspec_may_trap_p): Update documentation.
* target.def (function_attribute_inlinable_p,
unspec_may_trap_p): Update documentation.



I have no particular knowledge of these target hooks, but doc/tm.texi is 
a generated file that is created by sucking the doc strings in 
target.def into the skeleton in doc/tm.texi.in.  You should not be 
editing it directly to update for changed target hooks, just 
regenerating it.  If that's what you already did, your ChangeLog should 
just indicate that you did so instead of mentioning specific things you 
changed, like:


* doc/tm.texi: Regenerated.

-Sandra


[PATCH 1/2]middle-end: fix wide_int_constant_multiple_p when VAL and DIV are 0. [PR114932]

2024-07-01 Thread Tamar Christina
Hi All,

wide_int_constant_multiple_p tries to check if for two tree expressions a and b
that there is a multiplier which makes a == b * c.

This code however seems to think that there's no c where a=0 and b=0 are equal
which is of course wrong.

This fixes it and also fixes the comment.

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu -m32, -m64 and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

PR tree-optimization/114932
* tree-affine.cc (wide_int_constant_multiple_p): Support 0 and 0 being
multiples.

---
diff --git a/gcc/tree-affine.cc b/gcc/tree-affine.cc
index 
d6309c4390362b680f0aa97a41fac3281ade66fd..bfea0fe826a6affa0ace154e3ca38c9ef632fcba
 100644
--- a/gcc/tree-affine.cc
+++ b/gcc/tree-affine.cc
@@ -880,11 +880,10 @@ free_affine_expand_cache (hash_map **cache)
   *cache = NULL;
 }
 
-/* If VAL != CST * DIV for any constant CST, returns false.
-   Otherwise, if *MULT_SET is true, additionally compares CST and MULT,
-   and if they are different, returns false.  Finally, if neither of these
-   two cases occur, true is returned, and CST is stored to MULT and MULT_SET
-   is set to true.  */
+/* If VAL == CST * DIV for any constant CST, returns true.
+   and if *MULT_SET is true, additionally compares CST and MULT
+   and if they are different, returns false.  If true is returned, CST is
+   stored to MULT and MULT_SET is set to true.  */
 
 static bool
 wide_int_constant_multiple_p (const poly_widest_int &val,
@@ -895,6 +894,12 @@ wide_int_constant_multiple_p (const poly_widest_int &val,
 
   if (known_eq (val, 0))
 {
+  if (maybe_eq (div, 0))
+   {
+ *mult = 1;
+ return true;
+   }
+
   if (*mult_set && maybe_ne (*mult, 0))
return false;
   *mult_set = true;




-- 
diff --git a/gcc/tree-affine.cc b/gcc/tree-affine.cc
index d6309c4390362b680f0aa97a41fac3281ade66fd..bfea0fe826a6affa0ace154e3ca38c9ef632fcba 100644
--- a/gcc/tree-affine.cc
+++ b/gcc/tree-affine.cc
@@ -880,11 +880,10 @@ free_affine_expand_cache (hash_map **cache)
   *cache = NULL;
 }
 
-/* If VAL != CST * DIV for any constant CST, returns false.
-   Otherwise, if *MULT_SET is true, additionally compares CST and MULT,
-   and if they are different, returns false.  Finally, if neither of these
-   two cases occur, true is returned, and CST is stored to MULT and MULT_SET
-   is set to true.  */
+/* If VAL == CST * DIV for any constant CST, returns true.
+   and if *MULT_SET is true, additionally compares CST and MULT
+   and if they are different, returns false.  If true is returned, CST is
+   stored to MULT and MULT_SET is set to true.  */
 
 static bool
 wide_int_constant_multiple_p (const poly_widest_int &val,
@@ -895,6 +894,12 @@ wide_int_constant_multiple_p (const poly_widest_int &val,
 
   if (known_eq (val, 0))
 {
+  if (maybe_eq (div, 0))
+	{
+	  *mult = 1;
+	  return true;
+	}
+
   if (*mult_set && maybe_ne (*mult, 0))
 	return false;
   *mult_set = true;





[PATCH 2/2]middle-end: replace constant_multiple_of with aff_combination_constant_multiple_p [PR114932]

2024-07-01 Thread Tamar Christina
Hi All,

The current implementation of constant_multiple_of is doing a more limited
version of aff_combination_constant_multiple_p.

The only non-debug usage of constant_multiple_of will proceed with the values
as affine trees.  There is scope for further optimization here, namely I believe
that if constant_multiple_of returns the aff_tree after the conversion then
get_computation_aff_1 can use it instead of manually creating the aff_tree.

However I think it makes sense to first commit this smaller change and then
incrementally change things.

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu -m32, -m64 and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

PR tree-optimization/114932
* tree-ssa-loop-ivopts.cc (constant_multiple_of): Use
aff_combination_constant_multiple_p instead.

---
diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc
index 
7cae5bdefea3648ddde238a357af527a934a569e..c3218a3e8eedbb8d0a7f14c01eeb069cb6024c29
 100644
--- a/gcc/tree-ssa-loop-ivopts.cc
+++ b/gcc/tree-ssa-loop-ivopts.cc
@@ -2146,65 +2146,15 @@ idx_record_use (tree base, tree *idx,
 static bool
 constant_multiple_of (tree top, tree bot, widest_int *mul)
 {
-  tree mby;
-  enum tree_code code;
-  unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
-  widest_int res, p0, p1;
-
-  STRIP_NOPS (top);
-  STRIP_NOPS (bot);
-
-  if (operand_equal_p (top, bot, 0))
-{
-  *mul = 1;
-  return true;
-}
-
-  code = TREE_CODE (top);
-  switch (code)
-{
-case MULT_EXPR:
-  mby = TREE_OPERAND (top, 1);
-  if (TREE_CODE (mby) != INTEGER_CST)
-   return false;
-
-  if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
-   return false;
-
-  *mul = wi::sext (res * wi::to_widest (mby), precision);
-  return true;
-
-case PLUS_EXPR:
-case MINUS_EXPR:
-  if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
- || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
-   return false;
-
-  if (code == MINUS_EXPR)
-   p1 = -p1;
-  *mul = wi::sext (p0 + p1, precision);
-  return true;
-
-case INTEGER_CST:
-  if (TREE_CODE (bot) != INTEGER_CST)
-   return false;
-
-  p0 = widest_int::from (wi::to_wide (top), SIGNED);
-  p1 = widest_int::from (wi::to_wide (bot), SIGNED);
-  if (p1 == 0)
-   return false;
-  *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
-  return res == 0;
-
-default:
-  if (POLY_INT_CST_P (top)
- && POLY_INT_CST_P (bot)
- && constant_multiple_p (wi::to_poly_widest (top),
- wi::to_poly_widest (bot), mul))
-   return true;
+  aff_tree aff_top, aff_bot;
+  tree_to_aff_combination (top, TREE_TYPE (top), &aff_top);
+  tree_to_aff_combination (bot, TREE_TYPE (bot), &aff_bot);
+  poly_widest_int poly_mul;
+  if (aff_combination_constant_multiple_p (&aff_top, &aff_bot, &poly_mul)
+  && poly_mul.is_constant (mul))
+return true;
 
-  return false;
-}
+  return false;
 }
 
 /* Return true if memory reference REF with step STEP may be unaligned.  */




-- 
diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc
index 7cae5bdefea3648ddde238a357af527a934a569e..c3218a3e8eedbb8d0a7f14c01eeb069cb6024c29 100644
--- a/gcc/tree-ssa-loop-ivopts.cc
+++ b/gcc/tree-ssa-loop-ivopts.cc
@@ -2146,65 +2146,15 @@ idx_record_use (tree base, tree *idx,
 static bool
 constant_multiple_of (tree top, tree bot, widest_int *mul)
 {
-  tree mby;
-  enum tree_code code;
-  unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
-  widest_int res, p0, p1;
-
-  STRIP_NOPS (top);
-  STRIP_NOPS (bot);
-
-  if (operand_equal_p (top, bot, 0))
-{
-  *mul = 1;
-  return true;
-}
-
-  code = TREE_CODE (top);
-  switch (code)
-{
-case MULT_EXPR:
-  mby = TREE_OPERAND (top, 1);
-  if (TREE_CODE (mby) != INTEGER_CST)
-	return false;
-
-  if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
-	return false;
-
-  *mul = wi::sext (res * wi::to_widest (mby), precision);
-  return true;
-
-case PLUS_EXPR:
-case MINUS_EXPR:
-  if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
-	  || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
-	return false;
-
-  if (code == MINUS_EXPR)
-	p1 = -p1;
-  *mul = wi::sext (p0 + p1, precision);
-  return true;
-
-case INTEGER_CST:
-  if (TREE_CODE (bot) != INTEGER_CST)
-	return false;
-
-  p0 = widest_int::from (wi::to_wide (top), SIGNED);
-  p1 = widest_int::from (wi::to_wide (bot), SIGNED);
-  if (p1 == 0)
-	return false;
-  *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
-  return res == 0;
-
-default:
-  if (POLY_INT_CST_P (top)
-	  && POLY_INT_CST_P (bot)
-	  && constant_multiple_p (wi::to_poly_widest (top),
-  wi::to_poly_widest (bot), mul))
-	return true;
+  aff_tree aff_to

Re: [RFC/PATCH] isel: Fold more in gimple_expand_vec_cond_expr with andc/iorc

2024-07-01 Thread Segher Boessenkool
On Mon, Jul 01, 2024 at 04:36:44PM +0200, Richard Biener wrote:
> On Mon, Jul 1, 2024 at 8:17 AM Kewen.Lin  wrote:
> > As PR115659 shows, assuming c = x CMP y, there are some
> > folding chances for patterns r = c ? 0/z : z/-1:
> >   - For r = c ? 0 : z, it can be folded into r = ~c & z.
> >   - For r = c ? z : -1, it can be folded into r = ~c | z.

(!c instead of ~c, right?)

> > But BIT_AND/BIT_IOR applied on one BIT_NOT operand is a
> > compound operation, I'm not sure if each target with
> > vector capability have a single vector instruction for it,
> > if no, it's arguable to consider it always beats vector
> > selection (like vector constant gets hoisted or combined
> > and selection has same latency as normal logical operation).
> > So IMHO we probably need to query target with new optabs.
> > So this patch is to introduce new optabs andc, iorc and its
> > corresponding internal functions BIT_{ANDC,IORC} (looking
> > for suggestion for naming optabs and ifns), and if targets
> > defines such optabs for vector modes, it means targets
> > support these hardware insns and should be not worse than
> > vector selection.  btw, the rs6000 changes are meant to
> > give an example for a target supporting andc/iorc.
> >
> > Does this sound reasonable?
> 
> I think it's reasonable to have andc - there are quite some CPUs
> that have this op on GPRs as well I think, called andn (but I don't
> want to get into bike-shedding).

The usual names are and for a & b, andc for a & ~b, andc1 for ~a & b,
andcc for ~a & ~b, and an "n" in front of everything to complement the
result.

> A corresponding iorc is then

Sure.  A full complement of *and* insns is equivalent to a full
complement of *or* insns, of course.

> a natural extension (likewise xorc).

xor and nxor (which is called "eqv" on powerpc) are all that can exist
of course :-)

> AVX512 has a very powerful
> vector ternlog (but no scalar andn).

We have that as well, "xxeval", a Power ISA v3.1 insn.  It just has a
full 8-bit logic table as part of the opcode.  But to fit that many bits
it is a prefixed insn.

Since day and age (well, the late 1990's) we have a full complement of
two-op logic insns for vectors, and since VSX exists (2010 or so?) for
all 64 vector regs we have since then.  And it always was there for
integer regs, and for condition bits as well.

The two-op things are cheaper than the generic three-op thing, if that
is all that is needed.

> I was surprised to not see an existing optab for andn.

For most RTL stuff we can deal with it just fine using existing
define_insn etc. stuff.  I have no idea if any of this is harder in
Gimple?

> So OK from my side in case there are no negative comments or
> bikeshedding on the name.  I can't approve the rs6000 changes
> though.

But I can :-)  I'll reply to just that.  Thanks for handling this!


Segher


RE: [PATCH 1/2]middle-end: fix wide_int_constant_multiple_p when VAL and DIV are 0. [PR114932]

2024-07-01 Thread Tamar Christina
> -Original Message-
> From: Tamar Christina 
> Sent: Monday, July 1, 2024 9:14 PM
> To: gcc-patches@gcc.gnu.org
> Cc: nd ; rguent...@suse.de; j...@ventanamicro.com
> Subject: [PATCH 1/2]middle-end: fix wide_int_constant_multiple_p when VAL and
> DIV are 0. [PR114932]
> 
> Hi All,
> 
> wide_int_constant_multiple_p tries to check if for two tree expressions a and 
> b
> that there is a multiplier which makes a == b * c.
> 
> This code however seems to think that there's no c where a=0 and b=0 are equal
> which is of course wrong.
> 
> This fixes it and also fixes the comment.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> x86_64-pc-linux-gnu -m32, -m64 and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   PR tree-optimization/114932
>   * tree-affine.cc (wide_int_constant_multiple_p): Support 0 and 0 being
>   multiples.
> 
> ---
> diff --git a/gcc/tree-affine.cc b/gcc/tree-affine.cc
> index
> d6309c4390362b680f0aa97a41fac3281ade66fd..bfea0fe826a6affa0ace154e3ca
> 38c9ef632fcba 100644
> --- a/gcc/tree-affine.cc
> +++ b/gcc/tree-affine.cc
> @@ -880,11 +880,10 @@ free_affine_expand_cache (hash_map name_expansion *> **cache)
>*cache = NULL;
>  }
> 
> -/* If VAL != CST * DIV for any constant CST, returns false.
> -   Otherwise, if *MULT_SET is true, additionally compares CST and MULT,
> -   and if they are different, returns false.  Finally, if neither of these
> -   two cases occur, true is returned, and CST is stored to MULT and MULT_SET
> -   is set to true.  */
> +/* If VAL == CST * DIV for any constant CST, returns true.
> +   and if *MULT_SET is true, additionally compares CST and MULT
> +   and if they are different, returns false.  If true is returned, CST is
> +   stored to MULT and MULT_SET is set to true.  */
> 
>  static bool
>  wide_int_constant_multiple_p (const poly_widest_int &val,
> @@ -895,6 +894,12 @@ wide_int_constant_multiple_p (const poly_widest_int
> &val,
> 
>if (known_eq (val, 0))
>  {
> +  if (maybe_eq (div, 0))
> + {
> +   *mult = 1;
> +   return true;
> + }
> +

Note, I also tested known_eq here, and also no regression on what I can test.
I picked maybe_eq since that's what the lines after this one tests.

I'm not sure I fully understand why one tests known and the other maybe.  It 
seems to me
that both should test known.  But I tested both so which ever one is felt to be 
more correct
I can commit If ok.

Thanks,
Tamar

>if (*mult_set && maybe_ne (*mult, 0))
>   return false;
>*mult_set = true;
> 
> 
> 
> 
> --


Re: [RFC/PATCH] isel: Fold more in gimple_expand_vec_cond_expr with andc/iorc

2024-07-01 Thread Segher Boessenkool
Hi!

On Mon, Jul 01, 2024 at 02:17:33PM +0800, Kewen.Lin wrote:
>   * config/rs6000/rs6000-builtins.def: Update some bif expanders by
>   replacing orc3 with iorc3.
>   * config/rs6000/rs6000-string.cc (expand_cmp_vec_sequence): Update gen
>   function by replacing orc3 with iorc3.
>   * config/rs6000/rs6000.md (orc3): Rename to ...
>   (iorc3): ... this.

Okido.  Okay for trunk and all backports you may want, thanks!  (For the
rs6000 parts ofc).

The name for the instruction patterns was the instruction mnemonic used,
but now it becomes the general RTL name for it.  That is fine, that is
what we do in many other places already.  It is clear what is meant no
matter what :-)


Segher


[PATCH v2] c++: ICE with computed gotos [PR115469]

2024-07-01 Thread Marek Polacek
On Mon, Jul 01, 2024 at 02:44:56PM -0400, Jason Merrill wrote:
> On 6/26/24 6:04 PM, Marek Polacek wrote:
> > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > 
> > -- >8 --
> > This is a low-prio crash on invalid code where we ICE on a VAR_DECL
> > with erroneous type.  I thought I'd try to avoid putting such decls
> > into ->names and ->names_in_scope but that sounds riskier than the
> > following cleanup.
> > 
> > PR c++/115469
> > 
> > gcc/cp/ChangeLog:
> > 
> > * decl.cc (decl_with_nontrivial_dtor_p): New.
> 
> This name doesn't suggest non-static variable to me.  Maybe
> automatic_var_with...?

Sounds good.

> While we're at it, we should also avoid complaining about thread-local by
> checking decl_storage_duration == dk_auto, since [stmt.dcl]/2 is
> specifically about automatic.

Done here.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
This is a low-prio crash on invalid code where we ICE on a VAR_DECL
with erroneous type.  I thought I'd try to avoid putting such decls
into ->names and ->names_in_scope but that sounds riskier than the
following cleanup.

PR c++/115469

gcc/cp/ChangeLog:

* decl.cc (automatic_var_with_nontrivial_dtor_p): New.
(poplevel_named_label_1): Use it.
(check_goto_1): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/label17.C: New test.
---
 gcc/cp/decl.cc | 21 +
 gcc/testsuite/g++.dg/ext/label17.C | 18 ++
 2 files changed, 35 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ext/label17.C

diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index 03deb1493a4..d439b04bfa7 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -514,6 +514,21 @@ level_for_consteval_if (cp_binding_level *b)
  && IF_STMT_CONSTEVAL_P (b->this_entity));
 }
 
+/* True if T is a non-static VAR_DECL that has a non-trivial destructor.
+   See [stmt.dcl]/2.  */
+
+static bool
+automatic_var_with_nontrivial_dtor_p (const_tree t)
+{
+  if (error_operand_p (t))
+return false;
+
+  return (VAR_P (t)
+ && !TREE_STATIC (t)
+ && decl_storage_duration (CONST_CAST_TREE (t)) == dk_auto
+ && TYPE_HAS_NONTRIVIAL_DESTRUCTOR (TREE_TYPE (t)));
+}
+
 /* Update data for defined and undefined labels when leaving a scope.  */
 
 int
@@ -575,8 +590,7 @@ poplevel_named_label_1 (named_label_entry **slot, 
cp_binding_level *bl)
if (bl->kind == sk_catch)
  vec_safe_push (cg, get_identifier ("catch"));
for (tree d = use->names_in_scope; d; d = DECL_CHAIN (d))
- if (TREE_CODE (d) == VAR_DECL && !TREE_STATIC (d)
- && TYPE_HAS_NONTRIVIAL_DESTRUCTOR (TREE_TYPE (d)))
+ if (automatic_var_with_nontrivial_dtor_p (d))
vec_safe_push (cg, d);
  }
 
@@ -4003,8 +4017,7 @@ check_goto_1 (named_label_entry *ent, bool computed)
  tree end = b == level ? names : NULL_TREE;
  for (tree d = b->names; d != end; d = DECL_CHAIN (d))
{
- if (TREE_CODE (d) == VAR_DECL && !TREE_STATIC (d)
- && TYPE_HAS_NONTRIVIAL_DESTRUCTOR (TREE_TYPE (d)))
+ if (automatic_var_with_nontrivial_dtor_p (d))
{
  if (!identified)
{
diff --git a/gcc/testsuite/g++.dg/ext/label17.C 
b/gcc/testsuite/g++.dg/ext/label17.C
new file mode 100644
index 000..076ef1f798e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/label17.C
@@ -0,0 +1,18 @@
+// PR c++/115469
+// { dg-do compile { target indirect_jumps } }
+// { dg-options "" }
+
+void
+fn1 ()
+{
+  b = &&c;// { dg-error "not declared|not defined" }
+  goto *0;
+}
+
+void
+fn2 ()
+{
+c:
+  b = &&c;  // { dg-error "not declared" }
+  goto *0;
+}

base-commit: c847dcf94499da62e5a28921b404e6e561645d99
-- 
2.45.2



[Patch, fortran] PR102689 - Segfault with RESHAPE of CLASS as actual argument

2024-07-01 Thread Paul Richard Thomas
Hi All,

This is one of those PRs where one thing led to another I think that
the patch is pretty complete and, while apparently quite heavy, is more or
less self explanatory through comments and the ChangeLog.

The first testcase concentrates on reshape in various guises, while the
second deals with all the other affected transformational intrinsic
functions. In the first, most of the test statements are factored out into
their own subroutines in order to expose the code generated for each. This
was essential for the debugging but can be undone if preferred.

Regtests just fine - OK for mainline?

Paul


Change.Logs
Description: Binary data
diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 19d69aec9c0..3926b42fcd1 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -1301,10 +1301,13 @@ get_array_ref_dim_for_loop_dim (gfc_ss *ss, int loop_dim)
is a class expression.  */
 
 static tree
-get_class_info_from_ss (stmtblock_t * pre, gfc_ss *ss, tree *eltype)
+get_class_info_from_ss (stmtblock_t * pre, gfc_ss *ss, tree *eltype,
+			gfc_ss **fcnss)
 {
+  gfc_ss *loop_ss = ss->loop->ss;
   gfc_ss *lhs_ss;
   gfc_ss *rhs_ss;
+  gfc_ss *fcn_ss = NULL;
   tree tmp;
   tree tmp2;
   tree vptr;
@@ -1313,11 +1316,13 @@ get_class_info_from_ss (stmtblock_t * pre, gfc_ss *ss, tree *eltype)
   bool unlimited_rhs = false;
   bool unlimited_lhs = false;
   bool rhs_function = false;
+  bool unlimited_arg1 = false;
   gfc_symbol *vtab;
+  tree cntnr = NULL_TREE;
 
   /* The second element in the loop chain contains the source for the
  temporary; ie. the rhs of the assignment.  */
-  rhs_ss = ss->loop->ss->loop_chain;
+  rhs_ss = loop_ss->loop_chain;
 
   if (rhs_ss != gfc_ss_terminator
   && rhs_ss->info
@@ -1335,19 +1340,49 @@ get_class_info_from_ss (stmtblock_t * pre, gfc_ss *ss, tree *eltype)
 	rhs_function = true;
 }
 
+  /* Usually, ss points to the function. When the function call is an actual
+ argument, it is instead rhs_ss. */
+  *fcnss = fcn_ss = rhs_function ? rhs_ss : ss;
+
+  /* If this is a transformational function with a class result, the info
+ class_container field points to the class container of arg1.  */
+  if (rhs_class_expr != NULL_TREE
+  && fcn_ss->info && fcn_ss->info->expr
+  && fcn_ss->info->expr->expr_type == EXPR_FUNCTION
+  && fcn_ss->info->expr->value.function.isym
+  && fcn_ss->info->expr->value.function.isym->transformational)
+{
+  cntnr = ss->info->class_container;
+  unlimited_arg1
+	   = UNLIMITED_POLY (fcn_ss->info->expr->value.function.actual->expr);
+}
+
   /* For an assignment the lhs is the next element in the loop chain.
  If we have a class rhs, this had better be a class variable
- expression!  */
+ expression!  Otherwise, the class container from arg1 can be used
+ to set the vptr and len fields of the result class container.  */
   lhs_ss = rhs_ss->loop_chain;
-  if (lhs_ss != gfc_ss_terminator
-  && lhs_ss->info
-  && lhs_ss->info->expr
+  if (lhs_ss && lhs_ss != gfc_ss_terminator
+  && lhs_ss->info && lhs_ss->info->expr
   && lhs_ss->info->expr->expr_type ==EXPR_VARIABLE
   && lhs_ss->info->expr->ts.type == BT_CLASS)
 {
   tmp = lhs_ss->info->data.array.descriptor;
   unlimited_lhs = UNLIMITED_POLY (rhs_ss->info->expr);
 }
+  else if (cntnr != NULL_TREE)
+{
+  tmp = gfc_class_vptr_get (rhs_class_expr);
+  gfc_add_modify (pre, tmp, fold_convert (TREE_TYPE (tmp),
+	  gfc_class_vptr_get (cntnr)));
+  if (unlimited_rhs)
+	{
+	  tmp = gfc_class_len_get (rhs_class_expr);
+	  if (unlimited_arg1)
+	gfc_add_modify (pre, tmp, gfc_class_len_get (cntnr));
+	}
+  tmp = NULL_TREE;
+}
   else
 tmp = NULL_TREE;
 
@@ -1369,11 +1404,9 @@ get_class_info_from_ss (stmtblock_t * pre, gfc_ss *ss, tree *eltype)
 gfc_class_vptr_get (rhs_class_expr)));
   if (unlimited_lhs)
 	{
+	  gcc_assert (unlimited_rhs);
 	  tmp = gfc_class_len_get (lhs_class_expr);
-	  if (unlimited_rhs)
-	tmp2 = gfc_class_len_get (rhs_class_expr);
-	  else
-	tmp2 = build_int_cst (TREE_TYPE (tmp), 0);
+	  tmp2 = gfc_class_len_get (rhs_class_expr);
 	  gfc_add_modify (pre, tmp, tmp2);
 	}
 
@@ -1383,7 +1416,7 @@ get_class_info_from_ss (stmtblock_t * pre, gfc_ss *ss, tree *eltype)
 	  gfc_conv_descriptor_offset_set (pre, tmp, gfc_index_zero_node);
 	}
 }
-  else
+  else if (rhs_ss->info->data.array.descriptor)
{
   /* lhs is class and rhs is intrinsic or derived type.  */
   *eltype = TREE_TYPE (rhs_ss->info->data.array.descriptor);
@@ -1452,6 +1485,7 @@ gfc_trans_create_temp_array (stmtblock_t * pre, stmtblock_t * post, gfc_ss * ss,
   tree or_expr;
   tree elemsize;
   tree class_expr = NULL_TREE;
+  gfc_ss *fcn_ss = NULL;
   int n, dim, tmp_dim;
   int total_dim = 0;
 
@@ -1471,7 +1505,7 @@ gfc_trans_create_temp_array (stmtblock_t * pre, stmtblock_t * post, gfc_ss * ss,
  The descriptor can b

[PATCH 1/4] Small optimization for complex addition, real/imag parts the same

2024-07-01 Thread Andrew Pinski
This is just a small optimization for the case where the real and imag
parts are the same when lowering complex addition/subtraction. We only
need to do the addition once when the real and imag parts are the same (on
both sides of the operator). This gets done later on by FRE/PRE/DOM but
having it done soon allows the cabs lowering to remove the sqrt and
just change it to a multiply by a constant.

Bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

* tree-complex.cc (expand_complex_addition): If both
operands have the same real and imag parts, only
add the addition once.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/complex-8.c: New test.

Signed-off-by: Andrew Pinski 
---
 gcc/testsuite/gcc.dg/tree-ssa/complex-8.c | 12 
 gcc/tree-complex.cc   |  7 ++-
 2 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/complex-8.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/complex-8.c 
b/gcc/testsuite/gcc.dg/tree-ssa/complex-8.c
new file mode 100644
index 000..a9636ff9e9a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/complex-8.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-cplxlower1-raw" } */
+
+_Complex double f(double a, double c)
+{
+  _Complex double d = __builtin_complex (a, a);
+  d+=__builtin_complex(c, c);
+  return d;
+}
+
+/* There should only be one plus as (a+c) is still (a+c) */
+/* { dg-final { scan-tree-dump-times "plus_expr, " 1 "cplxlower1" } } */
diff --git a/gcc/tree-complex.cc b/gcc/tree-complex.cc
index 8a879acffca..dfebec18ec3 100644
--- a/gcc/tree-complex.cc
+++ b/gcc/tree-complex.cc
@@ -984,7 +984,12 @@ expand_complex_addition (gimple_stmt_iterator *gsi, tree 
inner_type,
 case PAIR (VARYING, VARYING):
 general:
   rr = gimple_build (&stmts, loc, code, inner_type, ar, br);
-  ri = gimple_build (&stmts, loc, code, inner_type, ai, bi);
+  /* (a+ai) + (b+bi) -> (a+b)+(a+b)i
+ small optimization to remove one new statement. */
+  if (operand_equal_p (ar, ai) && operand_equal_p (br, bi))
+   ri = rr;
+  else
+   ri = gimple_build (&stmts, loc, code, inner_type, ai, bi);
   break;
 
 default:
-- 
2.43.0



[PATCH 0/4] Some improvements to complex lowering (cabs related)

2024-07-01 Thread Andrew Pinski
This patch series includes some improvements to complex lowering,
all related to cabs.
History of the cabs folding
cabs folding was originally add in builtins.c ( and 4.3 
[r0-78875-gd1ad84c20452e6])
The expansion to `sqrt(r*r+i*i)` added to cse sincos pass in GCC 4.7
(r0-109444-gd7e2a1c13835e7) to fix an lto issue where `r*r` would be convert
into `pow(r, 2.0)` after the cse_sincos pass (which at the time also did pow
expansion).
The other cabs folding for the constant/same value optimization was moved
to match in GCC 6 (r6-4111-gabcc43f5323869). This series moves all possible
cabs expansion/simplifications to complex lowering which already has a decent
lattice and folding which allows `cabs(x+xI)` and `cabs(x+0.0I)` and 
`cabs(0.0+xI)`
be detected.

Andrew Pinski (4):
  Small optimization for complex addition, real/imag parts the same
  Move cabs expansion from powcabs to complex lowering [PR115710]
  Add some optimizations to gimple_expand_builtin_cabs
  Rename expand_powcabs pass to expand_pow

 gcc/passes.def  |   2 +-
 gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c |   4 +-
 gcc/testsuite/gcc.dg/tree-ssa/cabs-1.c  |  14 +++
 gcc/testsuite/gcc.dg/tree-ssa/cabs-2.c  |  13 +++
 gcc/testsuite/gcc.dg/tree-ssa/cabs-3.c  |  24 +
 gcc/testsuite/gcc.dg/tree-ssa/cabs-4.c  |  16 +++
 gcc/testsuite/gcc.dg/tree-ssa/cabs-5.c  |  22 
 gcc/testsuite/gcc.dg/tree-ssa/cabs-6.c  |  16 +++
 gcc/testsuite/gcc.dg/tree-ssa/complex-8.c   |  12 +++
 gcc/testsuite/gfortran.dg/vect/pr115710.f90 |  18 
 gcc/timevar.def |   2 +-
 gcc/tree-complex.cc | 109 +++-
 gcc/tree-pass.h |   2 +-
 gcc/tree-ssa-math-opts.cc   |  91 +++-
 14 files changed, 258 insertions(+), 87 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cabs-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cabs-2.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cabs-3.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cabs-4.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cabs-5.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cabs-6.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/complex-8.c
 create mode 100644 gcc/testsuite/gfortran.dg/vect/pr115710.f90

-- 
2.43.0


[PATCH 2/4] Move cabs expansion from powcabs to complex lowering [PR115710]

2024-07-01 Thread Andrew Pinski
Expanding cabs in powcab might be too late as forwprop might
recombine the load from a memory with the complex expr. Moving
instead to complex lowering allows us to use directly the real/imag
component from the loads instead. This allows for vectorization too.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/115710

gcc/ChangeLog:

* tree-complex.cc (init_dont_simulate_again): Handle CABS.
(gimple_expand_builtin_cabs): New function, moved mostly
from tree-ssa-math-opts.cc.
(expand_complex_operations_1): Call gimple_expand_builtin_cabs.
* tree-ssa-math-opts.cc (gimple_expand_builtin_cabs): Remove.
(build_and_insert_binop): Remove.
(pass_data_expand_powcabs): Update comment.
(pass_expand_powcabs::execute): Don't handle CABS.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/cabs-1.c: New test.
* gcc.dg/tree-ssa/cabs-2.c: New test.
* gfortran.dg/vect/pr115710.f90: New test.

Signed-off-by: Andrew Pinski 
---
 gcc/testsuite/gcc.dg/tree-ssa/cabs-1.c  | 14 +
 gcc/testsuite/gcc.dg/tree-ssa/cabs-2.c  | 13 
 gcc/testsuite/gfortran.dg/vect/pr115710.f90 | 18 ++
 gcc/tree-complex.cc | 68 +++-
 gcc/tree-ssa-math-opts.cc   | 70 +
 5 files changed, 113 insertions(+), 70 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cabs-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cabs-2.c
 create mode 100644 gcc/testsuite/gfortran.dg/vect/pr115710.f90

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cabs-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/cabs-1.c
new file mode 100644
index 000..12ff6049e63
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cabs-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target sqrt_insn } } */
+/* { dg-options "-Ofast -fdump-tree-cplxlower1" } */
+/* { dg-add-options sqrt_insn } */
+
+
+double f(_Complex double a)
+{
+  a+= 1.0f;
+  return __builtin_cabs(a);
+}
+
+/* Check that cabs is expanded during complex lowering. */
+/* { dg-final { scan-tree-dump-not "__builtin_cabs " "cplxlower1" } } */
+/* { dg-final { scan-tree-dump "__builtin_sqrt " "cplxlower1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cabs-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/cabs-2.c
new file mode 100644
index 000..efe3de90cba
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cabs-2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cplxlower1" } */
+
+
+double f(_Complex double a)
+{
+  a+= 1.0f;
+  return __builtin_cabs(a);
+}
+
+/* Check that cabs is not expanded during complex lowering. */
+/* { dg-final { scan-tree-dump "__builtin_cabs " "cplxlower1" } } */
+/* { dg-final { scan-tree-dump-not "__builtin_sqrt " "cplxlower1" } } */
diff --git a/gcc/testsuite/gfortran.dg/vect/pr115710.f90 
b/gcc/testsuite/gfortran.dg/vect/pr115710.f90
new file mode 100644
index 000..3749210ac80
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/vect/pr115710.f90
@@ -0,0 +1,18 @@
+! { dg-do compile }
+! { dg-additional-options "-Ofast" }
+! { dg-require-effective-target vect_float }
+! { dg-require-effective-target vect_call_sqrtf }
+
+! { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } }
+! CABS expansion should allow for the vectorization to happen.
+
+subroutine foo(a,b,n)
+  complex(kind(1.0))::a(*)
+  real(kind(1.0))::b(*)
+  integer::i,n
+
+  do i=1,n
+ b(i)=abs(a(i))**2
+  end do
+
+end subroutine foo
diff --git a/gcc/tree-complex.cc b/gcc/tree-complex.cc
index dfebec18ec3..d1276dc1c2f 100644
--- a/gcc/tree-complex.cc
+++ b/gcc/tree-complex.cc
@@ -21,6 +21,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "system.h"
 #include "coretypes.h"
 #include "backend.h"
+#include "target.h"
 #include "rtl.h"
 #include "tree.h"
 #include "gimple.h"
@@ -42,7 +43,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "cfganal.h"
 #include "gimple-fold.h"
 #include "diagnostic-core.h"
-
+#include "case-cfn-macros.h"
+#include "builtins.h"
+#include "optabs-tree.h"
 
 /* For each complex ssa name, a lattice value.  We're interested in finding
out whether a complex number is degenerate in some way, having only real
@@ -238,7 +241,18 @@ init_dont_simulate_again (void)
{
case GIMPLE_CALL:
  if (gimple_call_lhs (stmt))
-   sim_again_p = is_complex_reg (gimple_call_lhs (stmt));
+   {
+ sim_again_p = is_complex_reg (gimple_call_lhs (stmt));
+ switch (gimple_call_combined_fn (stmt))
+   {
+   CASE_CFN_CABS:
+ /* Expand cabs only if unsafe math and optimizing. */
+ if (optimize && flag_unsafe_math_optimizations)
+   saw_a_complex_op = true;
+ break;
+   default:;
+   }
+   }
  break;
 
 

[PATCH 3/4] Add some optimizations to gimple_expand_builtin_cabs

2024-07-01 Thread Andrew Pinski
While looking into the original folding code for cabs
(moved to match in r6-4111-gabcc43f5323869), I noticed that
`cabs(x+0i)` was optimized even without the need of sqrt.
I also noticed that now the code generation in this case
will be worse if the target had a sqrt. So let's implement
this small optimizations in gimple_expand_builtin_cabs.
Note `cabs(x+0i)` is done without unsafe math optimizations.
This is because the definition of `cabs(x+0i)` is `hypot(x, 0)`
and the definition in the standard says that just returns `abs(x)`.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* tree-complex.cc (gimple_expand_builtin_cabs): Add
`cabs(a+ai)`, `cabs(x+0i)` and `cabs(0+xi)` optimizations.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/cabs-3.c: New test.
* gcc.dg/tree-ssa/cabs-4.c: New test.
* gcc.dg/tree-ssa/cabs-5.c: New test.
* gcc.dg/tree-ssa/cabs-6.c: New test.

Signed-off-by: Andrew Pinski 

Fix optimizations

Signed-off-by: Andrew Pinski 
---
 gcc/testsuite/gcc.dg/tree-ssa/cabs-3.c | 24 +++
 gcc/testsuite/gcc.dg/tree-ssa/cabs-4.c | 16 
 gcc/testsuite/gcc.dg/tree-ssa/cabs-5.c | 22 ++
 gcc/testsuite/gcc.dg/tree-ssa/cabs-6.c | 16 
 gcc/tree-complex.cc| 56 +-
 5 files changed, 123 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cabs-3.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cabs-4.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cabs-5.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cabs-6.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cabs-3.c 
b/gcc/testsuite/gcc.dg/tree-ssa/cabs-3.c
new file mode 100644
index 000..976c0169131
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cabs-3.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target sqrt_insn } } */
+/* { dg-options "-Ofast -fdump-tree-cplxlower1" } */
+/* { dg-add-options sqrt_insn } */
+
+
+double f(double a, double c)
+{
+  _Complex double b = a;
+  b+= c;
+  return __builtin_cabs(b);
+}
+
+double f1(double a, double c)
+{
+  _Complex double b = __builtin_complex(0.0, a);
+  b+= __builtin_complex(0.0, c);
+  return __builtin_cabs(b);
+}
+
+/* Check that cabs is expanded during complex lowering. */
+/* { dg-final { scan-tree-dump-not "__builtin_cabs " "cplxlower1" } } */
+/* { dg-final { scan-tree-dump-not "__builtin_sqrt " "cplxlower1" } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR <" 2 "cplxlower1" } } */
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cabs-4.c 
b/gcc/testsuite/gcc.dg/tree-ssa/cabs-4.c
new file mode 100644
index 000..00aa3c9d4e7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cabs-4.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-cplxlower1" } */
+
+double f(double a, double c)
+{
+  _Complex double d = __builtin_complex (a, a);
+  d+=__builtin_complex(1.0, 1.0);
+  return __builtin_cabs(d);
+}
+
+/* Check that cabs is expanded during complex lowering and there is no sqrt 
(since it is a constant). */
+/* { dg-final { scan-tree-dump-not "__builtin_cabs " "cplxlower1" } } */
+/* { dg-final { scan-tree-dump-not "__builtin_sqrt " "cplxlower1" } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR <" 1 "cplxlower1" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cabs-5.c 
b/gcc/testsuite/gcc.dg/tree-ssa/cabs-5.c
new file mode 100644
index 000..dd794079921
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cabs-5.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cplxlower1" } */
+
+double f(double a, double c)
+{
+  _Complex double b = a;
+  b+= c;
+  return __builtin_cabs(b);
+}
+
+double f1(double a, double c)
+{
+  _Complex double b = __builtin_complex(0.0, a);
+  b+= __builtin_complex(0.0, c);
+  return __builtin_cabs(b);
+}
+
+/* Check that cabs is expanded into ABS for both f and f1 during complex 
lowering. */
+/* { dg-final { scan-tree-dump-not "__builtin_cabs " "cplxlower1" } } */
+/* { dg-final { scan-tree-dump-not "__builtin_sqrt " "cplxlower1" } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR <" 2 "cplxlower1" } } */
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cabs-6.c 
b/gcc/testsuite/gcc.dg/tree-ssa/cabs-6.c
new file mode 100644
index 000..bc88932449c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cabs-6.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-cplxlower1" } */
+
+double f(double a, double c)
+{
+  _Complex double d = __builtin_complex (a, 0.0);
+  d+=__builtin_complex(0.0, a);
+  return __builtin_cabs(d);
+}
+
+/* Check that cabs is expanded during complex lowering and there is no sqrt 
(since it is a constant). */
+/* { dg-final { scan-tree-dump-not "__builtin_cabs " "cplxlower1" } } */
+/* { dg-final { scan-tree-dump-not "__builtin_sqrt " "cplxlower1" } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR <" 1 "cplxlower1" } } */
+
+
diff --git a/gcc/tree-complex.cc b/

[PATCH 4/4] Rename expand_powcabs pass to expand_pow

2024-07-01 Thread Andrew Pinski
Since cabs expansion was removed from this pass,
it would be good to rename it.

Bootstrapped and tested on x86_64-linux-gnu

gcc/ChangeLog:

* passes.def (expand_pow): Renamed from expand_powcabs.
* timevar.def (TV_TREE_POWCABS): Remove.
(TV_TREE_POW): Add
* tree-pass.h (make_pass_expand_powcabs): Rename to ...
(make_pass_expand_pow): This.
* tree-ssa-math-opts.cc (class pass_expand_powcabs): Rename to ...
(class pass_expand_pow): This.
(pass_expand_powcabs::execute): Rename to ...
(pass_expand_pow::execute): This.
(make_pass_expand_powcabs): Rename to ...
(make_pass_expand_pow): This.

gcc/testsuite/ChangeLog:

* gcc.dg/pow-sqrt-synth-1.c: Update testcase for renamed pass.

Signed-off-by: Andrew Pinski 
---
 gcc/passes.def  |  2 +-
 gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c |  4 ++--
 gcc/timevar.def |  2 +-
 gcc/tree-pass.h |  2 +-
 gcc/tree-ssa-math-opts.cc   | 25 -
 5 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/gcc/passes.def b/gcc/passes.def
index 13c9dc34ddf..b8c21b1e435 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -265,7 +265,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_ccp, true /* nonzero_p */);
   /* After CCP we rewrite no longer addressed locals into SSA
 form if possible.  */
-  NEXT_PASS (pass_expand_powcabs);
+  NEXT_PASS (pass_expand_pow);
   NEXT_PASS (pass_optimize_bswap);
   NEXT_PASS (pass_laddress);
   NEXT_PASS (pass_lim);
diff --git a/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c 
b/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c
index 484b29a8fc8..be81e43ad9a 100644
--- a/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c
+++ b/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target sqrt_insn } } */
-/* { dg-options "-fdump-tree-powcabs -Ofast --param max-pow-sqrt-depth=8" } */
+/* { dg-options "-fdump-tree-pow -Ofast --param max-pow-sqrt-depth=8" } */
 /* { dg-additional-options "-mfloat-abi=softfp -mfpu=neon-vfpv4" { target 
arm*-*-* } } */
 
 double
@@ -34,4 +34,4 @@ vecfoo (double *a)
 a[i] = __builtin_pow (a[i], 1.25);
 }
 
-/* { dg-final { scan-tree-dump-times "synthesizing" 7 "powcabs" } } */
+/* { dg-final { scan-tree-dump-times "synthesizing" 7 "pow" } } */
diff --git a/gcc/timevar.def b/gcc/timevar.def
index 6fc36859138..0f9d2c0b032 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -223,7 +223,7 @@ DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch 
conversion")
 DEFTIMEVAR (TV_TREE_SWITCH_LOWERING,   "tree switch lowering")
 DEFTIMEVAR (TV_TREE_RECIP, "gimple CSE reciprocals")
 DEFTIMEVAR (TV_TREE_SINCOS   , "gimple CSE sin/cos")
-DEFTIMEVAR (TV_TREE_POWCABS   , "gimple expand pow/cabs")
+DEFTIMEVAR (TV_TREE_POW  , "gimple expand pow")
 DEFTIMEVAR (TV_TREE_WIDEN_MUL, "gimple widening/fma detection")
 DEFTIMEVAR (TV_TRANS_MEM , "transactional memory")
 DEFTIMEVAR (TV_TREE_STRLEN   , "tree strlen optimization")
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 38902b1b01b..9843d189d27 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -451,7 +451,7 @@ extern gimple_opt_pass *make_pass_early_warn_uninitialized 
(gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_late_warn_uninitialized (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_cse_reciprocals (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_cse_sincos (gcc::context *ctxt);
-extern gimple_opt_pass *make_pass_expand_powcabs (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_expand_pow (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_optimize_bswap (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_store_merging (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_optimize_widening_mul (gcc::context *ctxt);
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 71f896a9790..a35caf5f058 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -2272,17 +2272,16 @@ make_pass_cse_sincos (gcc::context *ctxt)
   return new pass_cse_sincos (ctxt);
 }
 
-/* Expand powi(x,n) into an optimal number of multiplies, when n is a constant.
-   Note the name is powcabs but cabs expansion was moved to the lower complex
-   pass.  */
+/* Expand powi(x,n) into an optimal number of multiplies, when n is a
+   constant.  */
 namespace {
 
-const pass_data pass_data_expand_powcabs =
+const pass_data pass_data_expand_pow =
 {
   GIMPLE_PASS, /* type */
-  "powcabs", /* name */
+  "pow", /* name */
   OPTGROUP_NONE, /* optinfo_flags */
-  TV_TREE_POWCABS, /* tv_id */
+  TV_TREE_POW, /* tv_id */
   PROP_ssa, /* properties_required */
   PROP_gimple_opt_math, /* properties_provided */
   0, /* properties_destroyed */
@@ -2290,11 +2289,11 @@ const pass_data pass_data_expand_powcabs =
   TODO_up

[r15-1758 Regression] FAIL: g++.dg/cpp2a/spaceship-narrowing1.C -std=c++20 (test for excess errors) on Linux/x86_64

2024-07-01 Thread haochen.jiang
On Linux/x86_64,

52d71b6b1f0f465a6cf064f61b22fc99453ec132 is the first bad commit
commit 52d71b6b1f0f465a6cf064f61b22fc99453ec132
Author: Marek Polacek 
Date:   Fri Jun 28 17:51:19 2024 -0400

c++: DR2627, Bit-fields and narrowing conversions [PR94058]

caused

FAIL: g++.dg/cpp2a/spaceship-narrowing1.C  -std=c++20 (test for excess errors)

with GCC configured with

../../gcc/configure 
--prefix=/export/users/haochenj/src/gcc-bisect/master/master/r15-1758/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="dg.exp=g++.dg/cpp2a/spaceship-narrowing1.C 
--target_board='unix{-m32}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="dg.exp=g++.dg/cpp2a/spaceship-narrowing1.C 
--target_board='unix{-m32\ -march=cascadelake}'"

(Please do not reply to this email, for question about this report, contact me 
at haochen dot jiang at intel.com.)
(If you met problems with cascadelake related, disabling AVX512F in command 
line might save that.)
(However, please make sure that there is no potential problems with AVX512.)


Re: [PATCH v2] c++: ICE with computed gotos [PR115469]

2024-07-01 Thread Jason Merrill

On 7/1/24 5:09 PM, Marek Polacek wrote:

On Mon, Jul 01, 2024 at 02:44:56PM -0400, Jason Merrill wrote:

On 6/26/24 6:04 PM, Marek Polacek wrote:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
This is a low-prio crash on invalid code where we ICE on a VAR_DECL
with erroneous type.  I thought I'd try to avoid putting such decls
into ->names and ->names_in_scope but that sounds riskier than the
following cleanup.

PR c++/115469

gcc/cp/ChangeLog:

* decl.cc (decl_with_nontrivial_dtor_p): New.


This name doesn't suggest non-static variable to me.  Maybe
automatic_var_with...?


Sounds good.


While we're at it, we should also avoid complaining about thread-local by
checking decl_storage_duration == dk_auto, since [stmt.dcl]/2 is
specifically about automatic.


Done here.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
This is a low-prio crash on invalid code where we ICE on a VAR_DECL
with erroneous type.  I thought I'd try to avoid putting such decls
into ->names and ->names_in_scope but that sounds riskier than the
following cleanup.

PR c++/115469

gcc/cp/ChangeLog:

* decl.cc (automatic_var_with_nontrivial_dtor_p): New.
(poplevel_named_label_1): Use it.
(check_goto_1): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/label17.C: New test.
---
  gcc/cp/decl.cc | 21 +
  gcc/testsuite/g++.dg/ext/label17.C | 18 ++
  2 files changed, 35 insertions(+), 4 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/ext/label17.C

diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index 03deb1493a4..d439b04bfa7 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -514,6 +514,21 @@ level_for_consteval_if (cp_binding_level *b)
  && IF_STMT_CONSTEVAL_P (b->this_entity));
  }
  
+/* True if T is a non-static VAR_DECL that has a non-trivial destructor.

+   See [stmt.dcl]/2.  */
+
+static bool
+automatic_var_with_nontrivial_dtor_p (const_tree t)
+{
+  if (error_operand_p (t))
+return false;
+
+  return (VAR_P (t)
+ && !TREE_STATIC (t)


Checking TREE_STATIC is redundant with checking decl_storage_duration. 
OK without the above line.



+ && decl_storage_duration (CONST_CAST_TREE (t)) == dk_auto
+ && TYPE_HAS_NONTRIVIAL_DESTRUCTOR (TREE_TYPE (t)));
+}
+
  /* Update data for defined and undefined labels when leaving a scope.  */
  
  int

@@ -575,8 +590,7 @@ poplevel_named_label_1 (named_label_entry **slot, 
cp_binding_level *bl)
if (bl->kind == sk_catch)
  vec_safe_push (cg, get_identifier ("catch"));
for (tree d = use->names_in_scope; d; d = DECL_CHAIN (d))
- if (TREE_CODE (d) == VAR_DECL && !TREE_STATIC (d)
- && TYPE_HAS_NONTRIVIAL_DESTRUCTOR (TREE_TYPE (d)))
+ if (automatic_var_with_nontrivial_dtor_p (d))
vec_safe_push (cg, d);
  }
  
@@ -4003,8 +4017,7 @@ check_goto_1 (named_label_entry *ent, bool computed)

  tree end = b == level ? names : NULL_TREE;
  for (tree d = b->names; d != end; d = DECL_CHAIN (d))
{
- if (TREE_CODE (d) == VAR_DECL && !TREE_STATIC (d)
- && TYPE_HAS_NONTRIVIAL_DESTRUCTOR (TREE_TYPE (d)))
+ if (automatic_var_with_nontrivial_dtor_p (d))
{
  if (!identified)
{
diff --git a/gcc/testsuite/g++.dg/ext/label17.C 
b/gcc/testsuite/g++.dg/ext/label17.C
new file mode 100644
index 000..076ef1f798e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/label17.C
@@ -0,0 +1,18 @@
+// PR c++/115469
+// { dg-do compile { target indirect_jumps } }
+// { dg-options "" }
+
+void
+fn1 ()
+{
+  b = &&c;// { dg-error "not declared|not defined" }
+  goto *0;
+}
+
+void
+fn2 ()
+{
+c:
+  b = &&c;  // { dg-error "not declared" }
+  goto *0;
+}

base-commit: c847dcf94499da62e5a28921b404e6e561645d99




[pushed] testsuite: fix spaceship-narrowing1.C

2024-07-01 Thread Marek Polacek
Tested x86_64-pc-linux-gnu, applying to trunk.

-- >8 --
I made sure that Wnarrowing22.C works fine on ILP32, but apparently
I didn't verify that spaceship-narrowing1.C works there as well.  :(

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/spaceship-narrowing1.C: Use __INT64_TYPE__.
---
 gcc/testsuite/g++.dg/cpp2a/spaceship-narrowing1.C | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/cpp2a/spaceship-narrowing1.C 
b/gcc/testsuite/g++.dg/cpp2a/spaceship-narrowing1.C
index 7769f950bed..9f2ff3ceae4 100644
--- a/gcc/testsuite/g++.dg/cpp2a/spaceship-narrowing1.C
+++ b/gcc/testsuite/g++.dg/cpp2a/spaceship-narrowing1.C
@@ -16,7 +16,7 @@ constexpr strong_ordering strong_ordering::greater = 1;
 }
 
 struct A {
-  long i : 48;
+  __INT64_TYPE__ i : 48;
   auto operator <=> (const A&) const = default;
 };
 

base-commit: c847dcf94499da62e5a28921b404e6e561645d99
-- 
2.45.2



[PATCH v1] Match: Allow more types truncation for .SAT_TRUNC

2024-07-01 Thread pan2 . li
From: Pan Li 

The .SAT_TRUNC has the input and output types,  aka cvt from
itype to otype and the sizeof (otype) < sizeof (itype).  The
previous patch only allows the sizeof (otype) == sizeof (itype) / 2.
But actually we have 1/4 and 1/8 truncation.

This patch would like to support more types trunction when
sizeof (otype) < sizeof (itype).  The below truncation will be
covered.

* uint64_t => uint8_t
* uint64_t => uint16_t
* uint64_t => uint32_t
* uint32_t => uint8_t
* uint32_t => uint16_t
* uint16_t => uint8_t

The below test suites are passed for this patch:
1. The rv64gcv fully regression tests.
2. The rv64gcv build with glibc.
3. The x86 bootstrap tests.
4. The x86 fully regression tests.

gcc/ChangeLog:

* match.pd: Allow any otype is less than itype truncation.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 7fff7b5f9fe..f708f4622bd 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3239,16 +3239,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (unsigned_integer_sat_trunc @0)
  (bit_ior:c (negate (convert (gt @0 INTEGER_CST@1)))
(convert @0))
- (with {
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+  && TYPE_UNSIGNED (TREE_TYPE (@0)))
+ (with
+  {
unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0));
unsigned otype_precision = TYPE_PRECISION (type);
-   wide_int trunc_max = wi::mask (itype_precision / 2, false, itype_precision);
+   wide_int trunc_max = wi::mask (otype_precision, false, itype_precision);
wide_int int_cst = wi::to_wide (@1, itype_precision);
   }
-  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
-   && TYPE_UNSIGNED (TREE_TYPE (@0))
-   && otype_precision < itype_precision
-   && wi::eq_p (trunc_max, int_cst)
+  (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst))
 
 /* x >  y  &&  x != XXX_MIN  -->  x > y
x >  y  &&  x == XXX_MIN  -->  false . */
-- 
2.34.1



Re: [PATCH] i386: Support APX NF and NDD for imul/mul

2024-07-01 Thread Hongtao Liu
On Mon, Jul 1, 2024 at 4:51 PM kong lingling  wrote:
>
> Add some missing APX NF and NDD support for imul and mul.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
>
> Ok for trunk?
Ok.
>
>
> gcc/ChangeLog:
>
> * config/i386/i386.md (*imulhizu): Added APX
> NF support.
> (*imulhizu): New define_insn.
> (*mulsi3_1_zext): Ditto.
> (*mul3_1): Ditto.
> (*mulqihi3_1): Ditto.
> (*mul3_1): Added APX NDD support.
> (*mulv4): Ditto.
> (*mulvhi4): Ditto.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/apx-ndd.c: Add test for imul ndd.
> ---
>  gcc/config/i386/i386.md | 98 +
>  gcc/testsuite/gcc.target/i386/apx-ndd.c |  8 ++
>  2 files changed, 61 insertions(+), 45 deletions(-)
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index fd48e764469..c1f29fee412 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -6488,8 +6488,8 @@
>  (define_subst_attr "nf_nonf_x64_attr" "nf_subst" "noapx_nf" "x64")
>
>  (define_subst "nf_subst"
> -  [(set (match_operand:SWI 0)
> -   (match_operand:SWI 1))]
> +  [(set (match_operand:SWIDWI 0)
> +   (match_operand:SWIDWI 1))]
>""
>[(set (match_dup 0)
> (match_dup 1))
> @@ -10028,24 +10028,26 @@
>  ;; On BDVER1, all HI MULs use DoublePath
>
>  (define_insn "*mul3_1"
> -  [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r")
> +  [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r,r")
> (mult:SWIM248
> - (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0")
> - (match_operand:SWIM248 2 "" "K,,r")))]
> + (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0,r")
> + (match_operand:SWIM248 2 "" "K,,r,r")))]
>"!(MEM_P (operands[1]) && MEM_P (operands[2]))
> && "
>"@
> imul{}\t{%2, %1, %0|%0, %1, %2}
> imul{}\t{%2, %1, %0|%0, %1, %2}
> -   imul{}\t{%2, %0|%0, %2}"
> +   imul{}\t{%2, %0|%0, %2}
> +   imul{}\t{%2, %1, %0|%0, %1, %2}"
>[(set_attr "type" "imul")
> -   (set_attr "prefix_0f" "0,0,1")
> +   (set_attr "prefix_0f" "0,0,1,1")
> +   (set_attr "isa" "*,*,*,apx_ndd")
> (set (attr "athlon_decode")
> (cond [(eq_attr "cpu" "athlon")
>   (const_string "vector")
>(eq_attr "alternative" "1")
>   (const_string "vector")
> -  (and (eq_attr "alternative" "2")
> +  (and (eq_attr "alternative" "2,3")
> (ior (match_test "mode == HImode")
>  (match_operand 1 "memory_operand")))
>   (const_string "vector")]
> @@ -10063,33 +10065,34 @@
> (const_string "direct")))
> (set_attr "mode" "")])
>
> -(define_insn "*imulhizu"
> +(define_insn "*imulhizu"
>[(set (match_operand:SWI48x 0 "register_operand" "=r,r")
> (zero_extend:SWI48x
>   (mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm")
> -  (match_operand:HI 2 "immediate_operand" "K,n"
> -   (clobber (reg:CC FLAGS_REG))]
> -  "TARGET_APX_ZU"
> +  (match_operand:HI 2 "immediate_operand" "K,n"]
> +  "TARGET_APX_ZU && "
>"@
> -   imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}
> -   imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}"
> +   imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}
> +   imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}"
>[(set_attr "type" "imul")
> (set_attr "mode" "HI")])
>
> -(define_insn "*mulsi3_1_zext"
> -  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
> +(define_insn "*mulsi3_1_zext"
> +  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
> (zero_extend:DI
> - (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
> -  (match_operand:SI 2 "x86_64_general_operand" "K,e,BMr"
> -   (clobber (reg:CC FLAGS_REG))]
> + (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0,r")
> +  (match_operand:SI 2 "x86_64_general_operand" 
> "K,e,BMr,BMr"]
>"TARGET_64BIT
> -   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
> +   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
> +   && "
>"@
> -   imul{l}\t{%2, %1, %k0|%k0, %1, %2}
> -   imul{l}\t{%2, %1, %k0|%k0, %1, %2}
> -   imul{l}\t{%2, %k0|%k0, %2}"
> +   imul{l}\t{%2, %1, %k0|%k0, %1, %2}
> +   imul{l}\t{%2, %1, %k0|%k0, %1, %2}
> +   imul{l}\t{%2, %k0|%k0, %2}
> +   imul{l}\t{%2, %1, %k0|%k0, %1, %2}"
>[(set_attr "type" "imul")
> -   (set_attr "prefix_0f" "0,0,1")
> +   (set_attr "prefix_0f" "0,0,1,1")
> +   (set_attr "isa" "*,*,*,apx_ndd")
> (set (attr "athlon_decode")
> (cond [(eq_attr "cpu" "athlon")
>   (const_string "vector")
> @@ -10158,30 +10161,32 @@
>[(set (reg:CCO FLAGS_REG)
> (eq:CCO (mult:
>(sign_extend:
> - (match_operand:SWI48 1 "nonimmediate_operand" "%rm,0"))
> + (match_operand:SWI48 1 "nonimmediate_operan

[PATCH] x86: Update branch hint for Redwood Cove.

2024-07-01 Thread liuhongt
From: "H.J. Lu" 

According to Intel® 64 and IA-32 Architectures Optimization Reference
Manual[1], Branch Hint is updated for Redwood Cove.

cut from [1]-
Starting with the Redwood Cove microarchitecture, if the predictor has
no stored information about a branch, the branch has the Intel® SSE2
branch taken hint (i.e., instruction prefix 3EH), When the codec
decodes the branch, it flips the branch’s prediction from not-taken to
taken. It then flushes the pipeline in front of it and steers this
pipeline to fetch the taken path of the branch.
cut end -

For -mtune-ctrl=branch_prediction_hints, always generate branch hint for
conditional branches, this tune is disabled by default.

[1] 
https://www.intel.com/content/www/us/en/content-details/821612/intel-64-and-ia-32-architectures-optimization-reference-manual-volume-1.html

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready push to trunk.

gcc/

* config/i386/i386.cc (ix86_print_operand): Always generate
branch hint for conditional branches.
---
 gcc/config/i386/i386.cc | 24 +---
 1 file changed, 5 insertions(+), 19 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 1f71ed04be6..9992b9d6186 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -14050,25 +14050,11 @@ ix86_print_operand (FILE *file, rtx x, int code)
int pred_val = profile_probability::from_reg_br_prob_note
 (XINT (x, 0)).to_reg_br_prob_base ();
 
-   if (pred_val < REG_BR_PROB_BASE * 45 / 100
-   || pred_val > REG_BR_PROB_BASE * 55 / 100)
- {
-   bool taken = pred_val > REG_BR_PROB_BASE / 2;
-   bool cputaken
- = final_forward_branch_p (current_output_insn) == 0;
-
-   /* Emit hints only in the case default branch prediction
-  heuristics would fail.  */
-   if (taken != cputaken)
- {
-   /* We use 3e (DS) prefix for taken branches and
-  2e (CS) prefix for not taken branches.  */
-   if (taken)
- fputs ("ds ; ", file);
-   else
- fputs ("cs ; ", file);
- }
- }
+   bool taken = pred_val > REG_BR_PROB_BASE / 2;
+   /* We use 3e (DS) prefix for taken branches and
+  2e (CS) prefix for not taken branches.  */
+   if (taken)
+ fputs ("ds ; ", file);
  }
return;
  }
-- 
2.31.1



[PATCH 1/2] LoongArch: Fix explicit-relocs-{extreme-, }tls-desc.c tests.

2024-07-01 Thread Lulu Cheng
After r15-1579, ADD and LD/ST pairs will be merged into LDX/STX.
Cause these two tests to fail. To guarantee that these two tests pass,
add the compilation option '-fno-late-combine-instructions'.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c:
Add compilation options '-fno-late-combine-instructions'.
* gcc.target/loongarch/explicit-relocs-tls-desc.c: Likewise.
---
 .../gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c | 2 +-
 gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git 
a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c 
b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c
index 3797556e1e6..e9eb0d6f703 100644
--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c
+++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc 
-mcmodel=extreme" } */
+/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc 
-mcmodel=extreme -fno-late-combine-instructions" } */
 
 __thread int a __attribute__((visibility("hidden")));
 extern __thread int b __attribute__((visibility("default")));
diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c 
b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c
index f6690309156..fed478458a3 100644
--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c
+++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc" } */
+/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc 
-fno-late-combine-instructions" } */
 
 __thread int a __attribute__((visibility("hidden")));
 extern __thread int b __attribute__((visibility("default")));
-- 
2.39.3



[PATCH 2/2] LoongArch: Define loongarch_insn_cost and set the cost of movcf2gr and movgr2cf.

2024-07-01 Thread Lulu Cheng
The following two FAIL items have been fixed:

FAIL: gcc.target/loongarch/movcf2gr-via-fr.c scan-assembler 
movcf2fr\\t\$f[0-9]+,\$fcc
FAIL: gcc.target/loongarch/movcf2gr-via-fr.c scan-assembler 
movfr2gr.s\\t\$r4

gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_insn_cost):
New function.
(TARGET_INSN_COST): New macro.
---
 gcc/config/loongarch/loongarch.cc | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 0fb547e00f4..cf21c365605 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4372,6 +4372,34 @@ loongarch_address_cost (rtx addr, machine_mode mode,
   return loongarch_address_insns (addr, mode, false);
 }
 
+/* Implement TARGET_INSN_COST.  */
+
+static int
+loongarch_insn_cost (rtx_insn *insn, bool speed)
+{
+  rtx x = PATTERN (insn);
+  int cost = pattern_cost (x, speed);
+
+  /* On LA464, prevent movcf2fr and movfr2gr from merging into movcf2gr.  */
+  if (TARGET_uARCH_LA464 && GET_CODE (x) == SET
+  && GET_MODE (XEXP (x, 0)) == FCCmode)
+{
+  rtx dest, src;
+  dest = XEXP (x, 0);
+  src = XEXP (x, 1);
+
+  if (REG_P (dest) && REG_P (src))
+   {
+ if (GP_REG_P (REGNO (dest)) && FCC_REG_P (REGNO (src)))
+   cost = COSTS_N_INSNS (7);
+ else if (FCC_REG_P (REGNO (dest)) && GP_REG_P (REGNO (src)))
+   cost = COSTS_N_INSNS (15);
+   }
+}
+  return cost;
+}
+
+
 /* Return one word of double-word value OP, taking into account the fixed
endianness of certain registers.  HIGH_P is true to select the high part,
false to select the low part.  */
@@ -11105,6 +11133,8 @@ loongarch_asm_code_end (void)
 #define TARGET_RTX_COSTS loongarch_rtx_costs
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST loongarch_address_cost
+#undef TARGET_INSN_COST
+#define TARGET_INSN_COST loongarch_insn_cost
 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
   loongarch_builtin_vectorization_cost
-- 
2.39.3



[PATCH] [APX PPX] Avoid generating unmatched pushp/popp in pro/epilogue

2024-07-01 Thread Hongyu Wang
Hi,

According to APX spec, the pushp/popp pairs should be matched,
otherwise the PPX hint cannot take effect and cause performance loss.

In the ix86_expand_epilogue, there are several optimizations that may
cause the epilogue using mov to restore the regs. Check if PPX applied
and prevent usage of mov/leave in the epilogue.

Bootstrapped/regtested on x86_64-pc-linux-gnu.

Ok for trunk?

gcc/ChangeLog:

* config/i386/i386.cc (ix86_expand_prologue): Set apx_ppx_used
flag in m.fs with TARGET_APX_PPX && !crtl->calls_eh_return.
(ix86_emit_save_regs): Emit ppx is available only when
TARGET_APX_PPX && !crtl->calls_eh_return.
(ix86_expand_epilogue): Don't restore reg using mov when
apx_ppx_used flag is true.
* config/i386/i386.h (struct machine_frame_state):
Add apx_ppx_used flag.

gcc/testsuite/ChangeLog:

* gcc.target/i386/apx-ppx-2.c: New test.
* gcc.target/i386/apx-ppx-3.c: Likewise.
---
 gcc/config/i386/i386.cc   | 13 +
 gcc/config/i386/i386.h|  4 
 gcc/testsuite/gcc.target/i386/apx-ppx-2.c | 14 ++
 gcc/testsuite/gcc.target/i386/apx-ppx-3.c |  7 +++
 4 files changed, 34 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-3.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index bd7411190af..99def8d4a77 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -7429,6 +7429,7 @@ ix86_emit_save_regs (void)
 {
   int regno;
   rtx_insn *insn;
+  bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
 
   if (!TARGET_APX_PUSH2POP2
   || !ix86_can_use_push2pop2 ()
@@ -7438,7 +7439,7 @@ ix86_emit_save_regs (void)
if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
  {
insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
-   TARGET_APX_PPX));
+   use_ppx));
RTX_FRAME_RELATED_P (insn) = 1;
  }
 }
@@ -7469,7 +7470,7 @@ ix86_emit_save_regs (void)
  regno_list[0]),
 gen_rtx_REG (word_mode,
  regno_list[1]),
-TARGET_APX_PPX));
+use_ppx));
RTX_FRAME_RELATED_P (insn) = 1;
rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
 
@@ -7502,7 +7503,7 @@ ix86_emit_save_regs (void)
else
  {
insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
-   TARGET_APX_PPX));
+   use_ppx));
RTX_FRAME_RELATED_P (insn) = 1;
aligned = true;
  }
@@ -7511,7 +7512,7 @@ ix86_emit_save_regs (void)
{
  insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
   regno_list[0]),
- TARGET_APX_PPX));
+ use_ppx));
  RTX_FRAME_RELATED_P (insn) = 1;
}
 }
@@ -8985,6 +8986,7 @@ ix86_expand_prologue (void)
   if (!frame.save_regs_using_mov)
{
  ix86_emit_save_regs ();
+ m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
  int_registers_saved = true;
  gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
}
@@ -9870,6 +9872,9 @@ ix86_expand_epilogue (int style)
   /* SEH requires the use of pops to identify the epilogue.  */
   else if (TARGET_SEH)
 restore_regs_via_mov = false;
+  /* If we already save reg with pushp, don't use move at epilogue.  */
+  else if (m->fs.apx_ppx_used)
+restore_regs_via_mov = false;
   /* If we're only restoring one register and sp cannot be used then
  using a move instruction to restore the register since it's
  less work than reloading sp and popping the register.  */
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 147b12cd014..0c5292e1d64 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2693,6 +2693,10 @@ struct GTY(()) machine_frame_state
  The flags realigned and sp_realigned are mutually exclusive.  */
   BOOL_BITFIELD sp_realigned : 1;
 
+  /* When APX_PPX used in prologue, force epilogue to emit
+  popp instead of move and leave.  */
+  BOOL_BITFIELD apx_ppx_used : 1;
+
   /* If sp_realigned is set, this is the last valid offset from the CFA
  that can be used for access with the frame pointer.  */
   HOST_WIDE_INT sp_realigned_fp_last;
diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-2.c 
b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
new file mode 100644
index

[PATCH] sparc: define SPARC_LONG_DOUBLE_TYPE_SIZE for vxworks [PR115739]

2024-07-01 Thread Kewen.Lin
Hi,

Commit r15-1594 removed define of LONG_DOUBLE_TYPE_SIZE in
sparc.cc, it's based on the assumption that each OS has its
own define (see the comments in sparc.h), but it exposes an
issue on vxworks which lacks of the define.

We can bring back the default SPARC_LONG_DOUBLE_TYPE_SIZE to
sparc.cc, but according to the comments in sparc.h, I think
it's better to define this in vxworks.h.  btw, I also went
through all the sparc supported triples, vxworks is the only
one that misses this define.

Built well with cross build --target=sparc-wrs-vxworks,
is it ok for trunk?

BR,
Kewen
-

PR target/115739

gcc/ChangeLog:

* config/sparc/vxworks.h (SPARC_LONG_DOUBLE_TYPE_SIZE): New define.
---
 gcc/config/sparc/vxworks.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/config/sparc/vxworks.h b/gcc/config/sparc/vxworks.h
index c1a9310fb3f..4cdb3b1685d 100644
--- a/gcc/config/sparc/vxworks.h
+++ b/gcc/config/sparc/vxworks.h
@@ -62,3 +62,7 @@ along with GCC; see the file COPYING3.  If not see
 /* This platform supports the probing method of stack checking (RTP mode).
8K is reserved in the stack to propagate exceptions in case of overflow.  */
 #define STACK_CHECK_PROTECT 8192
+
+/* SPARC_LONG_DOUBLE_TYPE_SIZE should be defined per OS.  */
+#undef SPARC_LONG_DOUBLE_TYPE_SIZE
+#define SPARC_LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
--
2.43.0


Re: [PATCH 2/2] LoongArch: Define loongarch_insn_cost and set the cost of movcf2gr and movgr2cf.

2024-07-01 Thread Xi Ruoyao
On Tue, 2024-07-02 at 11:22 +0800, Lulu Cheng wrote:
> +static int
> +loongarch_insn_cost (rtx_insn *insn, bool speed)
> +{
> +  rtx x = PATTERN (insn);
> +  int cost = pattern_cost (x, speed);
> +
> +  /* On LA464, prevent movcf2fr and movfr2gr from merging into movcf2gr.  */
> +  if (TARGET_uARCH_LA464 && GET_CODE (x) == SET
> +  && GET_MODE (XEXP (x, 0)) == FCCmode)
> +    {
> +  rtx dest, src;
> +  dest = XEXP (x, 0);
> +  src = XEXP (x, 1);
> +
> +  if (REG_P (dest) && REG_P (src))
> + {
> +   if (GP_REG_P (REGNO (dest)) && FCC_REG_P (REGNO (src)))
> +     cost = COSTS_N_INSNS (7);

cost = loongarch_cost->movcf2gr;

> +   else if (FCC_REG_P (REGNO (dest)) && GP_REG_P (REGNO (src)))
> +     cost = COSTS_N_INSNS (15);

cost = loongarch_cost->movgr2cf;

Then we don't need to check TARGET_uARCH_LA464.

> + }
> +    }
> +  return cost;
> +}

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


Re: [PATCH 2/2] LoongArch: Define loongarch_insn_cost and set the cost of movcf2gr and movgr2cf.

2024-07-01 Thread Lulu Cheng



在 2024/7/2 上午11:50, Xi Ruoyao 写道:

On Tue, 2024-07-02 at 11:22 +0800, Lulu Cheng wrote:

+static int
+loongarch_insn_cost (rtx_insn *insn, bool speed)
+{
+  rtx x = PATTERN (insn);
+  int cost = pattern_cost (x, speed);
+
+  /* On LA464, prevent movcf2fr and movfr2gr from merging into movcf2gr.  */
+  if (TARGET_uARCH_LA464 && GET_CODE (x) == SET
+  && GET_MODE (XEXP (x, 0)) == FCCmode)
+    {
+  rtx dest, src;
+  dest = XEXP (x, 0);
+  src = XEXP (x, 1);
+
+  if (REG_P (dest) && REG_P (src))
+   {
+     if (GP_REG_P (REGNO (dest)) && FCC_REG_P (REGNO (src)))
+       cost = COSTS_N_INSNS (7);

cost = loongarch_cost->movcf2gr;


+     else if (FCC_REG_P (REGNO (dest)) && GP_REG_P (REGNO (src)))
+       cost = COSTS_N_INSNS (15);

cost = loongarch_cost->movgr2cf;

Then we don't need to check TARGET_uARCH_LA464.


Ok! I'll merge it after the revisions.

Thanks.




+   }
+    }
+  return cost;
+}




[PATCH v2] RISC-V: Implement the .SAT_TRUNC for scalar

2024-07-01 Thread pan2 . li
From: Pan Li 

Update in v2:
Rebase the upstream.

Log in v1:
This patch would like to implement the simple .SAT_TRUNC pattern
in the riscv backend. Aka:

Form 1:
  #define DEF_SAT_U_TRUC_FMT_1(NT, WT) \
  NT __attribute__((noinline)) \
  sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
  {\
bool overflow = x > (WT)(NT)(-1);  \
return ((NT)x) | (NT)-overflow;\
  }

DEF_SAT_U_TRUC_FMT_1(uint32_t, uint64_t)

Before this patch:
__attribute__((noinline))
uint8_t sat_u_truc_uint16_t_to_uint8_t_fmt_1 (uint16_t x)
{
  _Bool overflow;
  unsigned char _1;
  unsigned char _2;
  unsigned char _3;
  uint8_t _6;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  overflow_5 = x_4(D) > 255;
  _1 = (unsigned char) x_4(D);
  _2 = (unsigned char) overflow_5;
  _3 = -_2;
  _6 = _1 | _3;
  return _6;
;;succ:   EXIT

}

After this patch:
__attribute__((noinline))
uint8_t sat_u_truc_uint16_t_to_uint8_t_fmt_1 (uint16_t x)
{
  uint8_t _6;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _6 = .SAT_TRUNC (x_4(D)); [tail call]
  return _6;
;;succ:   EXIT

}

The below tests suites are passed for this patch
1. The rv64gcv fully regression test.
2. The rv64gcv build with glibc

gcc/ChangeLog:

* config/riscv/iterators.md (TARGET_64BIT): Add new iterator
and related attr(s).
* config/riscv/riscv-protos.h (riscv_expand_ustrunc): Add new
func decl for expanding ustrunc
* config/riscv/riscv.cc (riscv_expand_ustrunc): Add new func
impl to expand ustrunc.
* config/riscv/riscv.md (ustrunc2): Add
new pattern ustrunc2.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macro.
* gcc.target/riscv/sat_arith_data.h: New test.
* gcc.target/riscv/sat_u_trunc-1.c: New test.
* gcc.target/riscv/sat_u_trunc-2.c: New test.
* gcc.target/riscv/sat_u_trunc-3.c: New test.
* gcc.target/riscv/sat_u_trunc-run-1.c: New test.
* gcc.target/riscv/sat_u_trunc-run-2.c: New test.
* gcc.target/riscv/sat_u_trunc-run-3.c: New test.
* gcc.target/riscv/scalar_sat_unary.h: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/iterators.md | 10 
 gcc/config/riscv/riscv-protos.h   |  1 +
 gcc/config/riscv/riscv.cc | 40 +
 gcc/config/riscv/riscv.md | 10 
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 16 ++
 .../gcc.target/riscv/sat_arith_data.h | 56 +++
 .../gcc.target/riscv/sat_u_trunc-1.c  | 17 ++
 .../gcc.target/riscv/sat_u_trunc-2.c  | 20 +++
 .../gcc.target/riscv/sat_u_trunc-3.c  | 19 +++
 .../gcc.target/riscv/sat_u_trunc-run-1.c  | 16 ++
 .../gcc.target/riscv/sat_u_trunc-run-2.c  | 16 ++
 .../gcc.target/riscv/sat_u_trunc-run-3.c  | 16 ++
 .../gcc.target/riscv/scalar_sat_unary.h   | 22 
 13 files changed, 259 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_arith_data.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/scalar_sat_unary.h

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 20745faa55e..5e2216fdafb 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -65,6 +65,16 @@ (define_mode_iterator SUBX [QI HI (SI "TARGET_64BIT")])
 ;; Iterator for hardware-supported integer modes.
 (define_mode_iterator ANYI [QI HI SI (DI "TARGET_64BIT")])
 
+(define_mode_iterator ANYI_NARROW [HI SI (DI "TARGET_64BIT")])
+
+(define_mode_attr ANYI_NARROWED [
+  (HI "QI") (SI "HI") (DI "SI")
+])
+
+(define_mode_attr anyi_narrowed [
+  (HI "qi") (SI "hi") (DI "si")
+])
+
 ;; Iterator for hardware-supported floating-point modes.
 (define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT || TARGET_ZFINX")
(DF "TARGET_DOUBLE_FLOAT || TARGET_ZDINX")
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index a8b76173fa0..61a22a187df 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -135,6 +135,7 @@ riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int);
 extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
 extern void riscv_expand_usadd (rtx, rtx, rtx);
 extern void riscv_expand_ussub (rtx, rtx, rtx);
+extern void riscv_expand_ustrunc (rtx, rtx);
 
 #ifdef RTX_CODE
 extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool 
*invert_ptr = 0);
diff