[x86 PATCH] PR target/116275: Handle STV of *extenddi2_doubleword_highpart

2024-08-11 Thread Roger Sayle

This patch resolves PR target/116275, a recent ICE-on-valid regression on
-m32 caused by my recent change to enable STV of DImode arithmeric right
shift on non-AVX512VL targets.  The oversight is that the i386 backend
contains an *extenddi2_doubleword_highpart instruction (whose pattern
is an arithmetic right shift of a left shift) that optimizes the case where
sign-extension need only update the highpart word of a DImode value when
generating 32-bit code (!TARGET_64BIT).  STV accepts this pattern as a
candidate, as there are patterns to handle this form of extension on SSE
using AVX512VL instructions (and previously ASHIFTRT was only allowed on
AVX512VL).  Now that ASHIFTRT is a candidate on non-AVX512vL targets, we
either need to check that the first operand is a register, or as done
below provide the define_insn_and_split that provides a non-AVX512VL
implementation of *extendv2di_highpart_stv.

The new testcase only ICEed with -m32, so this test could be limited to
target ia32, but there's no harm also running this test on -m64 to
provide a little extra test coverage.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures.  Ok for mainline?


2024-08-11  Roger Sayle  

gcc/ChangeLog
PR target/116275
* config/i386/i386.md (*extendv2di2_highpart_stv_noavx512vl): New
define_insn_and_split to handle the STV conversion of the DImode
pattern *extenddi2_doubleword_highpart.

gcc/testsuite/ChangeLog
PR target/116275
* g++.target/i386/pr116275.C: New test case.


Roger
--

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index db7789c..1a6188f 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -17393,6 +17393,24 @@
(ashift:V2DI (match_dup 1) (match_dup 2)))
(set (match_dup 0)
(ashiftrt:V2DI (match_dup 0) (match_dup 2)))])
+
+;; Without AVX512VL, split this instruction before reload.
+(define_insn_and_split "*extendv2di2_highpart_stv_noavx512vl"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+   (ashiftrt:V2DI
+ (ashift:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "vm")
+  (match_operand:QI 2 "const_int_operand"))
+ (match_operand:QI 3 "const_int_operand")))]
+  "!TARGET_AVX512VL
+   && INTVAL (operands[2]) == INTVAL (operands[3])
+   && UINTVAL (operands[2]) < 32
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (ashift:V2DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+   (ashiftrt:V2DI (match_dup 0) (match_dup 2)))])
 
 ;; Rotate instructions
 
diff --git a/gcc/testsuite/g++.target/i386/pr116275.C 
b/gcc/testsuite/g++.target/i386/pr116275.C
new file mode 100644
index 000..69c5b5a
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr116275.C
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx -std=c++11" } */
+
+struct SymbolDesc push_back(SymbolDesc);
+struct SymbolDesc {
+  long long ELFLocalSymIdx;
+};
+struct Expected {
+  long long &operator*();
+};
+void SymbolizableObjectFileaddSymbol() {
+  Expected SymbolAddressOrErr;
+  long long SymbolAddress = *SymbolAddressOrErr << 8 >> 8;
+  push_back({SymbolAddress});
+}


[PATCH v2] RISC-V: Make sure high bits of usadd operands is clean for HI/QI [PR116278]

2024-08-11 Thread pan2 . li
From: Pan Li 

For QI/HImode of .SAT_ADD,  the operands may be sign-extended and the
high bits of Xmode may be all 1 which is not expected.  For example as
below code.

signed char b[1];
unsigned short c;
signed char *d = b;
int main() {
  b[0] = -40;
  c = ({ (unsigned short)d[0] < 0xFFF6 ? (unsigned short)d[0] : 0xFFF6; }) + 9;
  __builtin_printf("%d\n", c);
}

After expanding we have:

;; _6 = .SAT_ADD (_3, 9);
(insn 8 7 9 (set (reg:DI 143)
(high:DI (symbol_ref:DI ("d") [flags 0x86]  )))
 (nil))
(insn 9 8 10 (set (reg/f:DI 142)
(mem/f/c:DI (lo_sum:DI (reg:DI 143)
(symbol_ref:DI ("d") [flags 0x86]  )) [1 d+0 S8 
A64]))
 (nil))
(insn 10 9 11 (set (reg:HI 144 [ _3 ])
(sign_extend:HI (mem:QI (reg/f:DI 142) [0 *d.0_1+0 S1 A8]))) 
"test.c":7:10 -1
 (nil))

The convert from signed char to unsigned short will have sign_extend rtl
as above.  And finally become the lb insn as below:

lb  a1,0(a5)   // a1 is -40, aka 0xffd8
lui a0,0x1a
addia5,a1,9
sllia5,a5,0x30
srlia5,a5,0x30 // a5 is 65505
sltua1,a5,a1   // compare 65505 and 0xffd8 => TRUE

The sltu try to compare 65505 and 0xffd8 here,  but we
actually want to compare 65505 and 65496 (0xffd8).  Thus we need to
clean up the high bits to ensure this.

The below test suites are passed for this patch:
* The rv64gcv fully regression test.

PR target/116278

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_gen_zero_extend_rtx): Add new
func impl to zero extend rtx.
(riscv_expand_usadd): Leverage above func to cleanup operands
and sum.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/pr116278-run-1.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc | 19 ++-
 .../gcc.target/riscv/pr116278-run-1.c | 16 
 2 files changed, 34 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr116278-run-1.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 5fe4273beb7..cfdb3d82972 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11564,6 +11564,23 @@ riscv_get_raw_result_mode (int regno)
   return default_get_reg_raw_mode (regno);
 }
 
+/* Generate a new rtx of Xmode based on the rtx and mode in define pattern.
+   The rtx x will be zero extended to Xmode if the mode is HI/QImode,  and
+   the new zero extended Xmode rtx will be returned.
+   Or the gen_lowpart rtx of Xmode will be returned.  */
+
+static rtx
+riscv_gen_zero_extend_rtx (rtx x, machine_mode mode)
+{
+  if (mode != HImode && mode != QImode)
+return gen_lowpart (Xmode, x);
+
+  rtx xmode_reg = gen_reg_rtx (Xmode);
+  riscv_emit_unary (ZERO_EXTEND, xmode_reg, x);
+
+  return xmode_reg;
+}
+
 /* Implements the unsigned saturation add standard name usadd for int mode.
 
z = SAT_ADD(x, y).
@@ -11580,7 +11597,7 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y)
   machine_mode mode = GET_MODE (dest);
   rtx xmode_sum = gen_reg_rtx (Xmode);
   rtx xmode_lt = gen_reg_rtx (Xmode);
-  rtx xmode_x = gen_lowpart (Xmode, x);
+  rtx xmode_x = riscv_gen_zero_extend_rtx (x, mode);
   rtx xmode_y = gen_lowpart (Xmode, y);
   rtx xmode_dest = gen_reg_rtx (Xmode);
 
diff --git a/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c 
b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c
new file mode 100644
index 000..f6268e290ec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+signed char b[1];
+int c;
+signed char *d = b;
+
+int main() {
+  b[0] = -40;
+  c = ({
+(unsigned short)d[0] < 0xFFF6 ? (unsigned short)d[0] : 0xFFF6;
+  }) + 9;
+
+  if (c != 65505)
+__builtin_abort ();
+}
-- 
2.43.0



[avr,patch,applied] avr: Don't document -mlra

2024-08-11 Thread Georg-Johann Lay

This patch sets the Undocumented flag for -mlra because
it is likely just a transient option and only experimental.

Johann

--

AVR: -mlra is not documeted in TEXI.

gcc/
* config/avr/avr.opt (mlra): Set Undocumented flag.

diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt
index 947489eddf0..444ed7edb26 100644
--- a/gcc/config/avr/avr.opt
+++ b/gcc/config/avr/avr.opt
@@ -19,7 +19,7 @@
 ; .

 mlra
-Target Var(avr_lra_p) UInteger Init(0) Optimization
+Target Var(avr_lra_p) UInteger Init(0) Optimization Undocumented
 Usa LRA for reload instead of the old reload framework.  This option 
is experimental, and it may be removed in future versions of the compiler.


 mcall-prologues


[patch,avr,applied]: Use lra_in_progress resp. reload_in_progress depending on -mlra

2024-08-11 Thread Georg-Johann Lay

This patch adds function avr.cc::ra_in_progress() that returns
lra_in_progress resp. reload_in_progress depending on avr_lra_p.
Currently, direct use of ra_in_progress() is only made with -mlog=.

Johann

--


AVR: Add function avr.cc::ra_in_progress().

It returns lra_in_progress resp. reload_in_progress depending on 
avr_lra_p.

Currently, direct use of ra_in_progress() is only made with -mlog=.

gcc/
* config/avr/avr.cc (ra_in_progress): New static function.
(avr_legitimate_address_p, avr_addr_space_legitimate_address_p)
(extra_constraint_Q): Use it with -mlog=.

diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 27df4bc4146..8d59a6babed 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -330,6 +330,13 @@ avr_adiw_reg_p (rtx reg)
 }


+static bool
+ra_in_progress ()
+{
+  return avr_lra_p ? lra_in_progress : reload_in_progress;
+}
+
+
 namespace {

 static const pass_data avr_pass_data_recompute_notes =
@@ -3560,8 +3567,8 @@ avr_legitimate_address_p (machine_mode mode, rtx 
x, bool strict)

   if (avr_log.legitimate_address_p)
 {
   avr_edump ("\n%?: ret=%d, mode=%m strict=%d "
-"reload_completed=%d reload_in_progress=%d %s:",
-ok, mode, strict, reload_completed, reload_in_progress,
+"reload_completed=%d ra_in_progress=%d %s:",
+ok, mode, strict, reload_completed, ra_in_progress (),
 reg_renumber ? "(reg_renumber)" : "");

   if (GET_CODE (x) == PLUS
@@ -13973,8 +13980,8 @@ extra_constraint_Q (rtx x)
|| xx == arg_pointer_rtx);

   if (avr_log.constraints)
-   avr_edump ("\n%?=%d reload_completed=%d reload_in_progress=%d\n 
%r\n",

-  ok, reload_completed, reload_in_progress, x);
+   avr_edump ("\n%?=%d reload_completed=%d ra_in_progress=%d\n %r\n",
+  ok, reload_completed, ra_in_progress (), x);
 }

   return ok;
@@ -15038,8 +15045,8 @@ avr_addr_space_legitimate_address_p 
(machine_mode mode, rtx x, bool strict,

   if (avr_log.legitimate_address_p)
 {
   avr_edump ("\n%?: ret=%b, mode=%m strict=%d "
-"reload_completed=%d reload_in_progress=%d %s:",
-ok, mode, strict, reload_completed, reload_in_progress,
+"reload_completed=%d ra_in_progress=%d %s:",
+ok, mode, strict, reload_completed, ra_in_progress (),
 reg_renumber ? "(reg_renumber)" : "");

   if (GET_CODE (x) == PLUS


Re: [x86 PATCH] PR target/116275: Handle STV of *extenddi2_doubleword_highpart

2024-08-11 Thread Uros Bizjak
On Sun, Aug 11, 2024 at 12:16 PM Roger Sayle  wrote:
>
>
> This patch resolves PR target/116275, a recent ICE-on-valid regression on
> -m32 caused by my recent change to enable STV of DImode arithmeric right
> shift on non-AVX512VL targets.  The oversight is that the i386 backend
> contains an *extenddi2_doubleword_highpart instruction (whose pattern
> is an arithmetic right shift of a left shift) that optimizes the case where
> sign-extension need only update the highpart word of a DImode value when
> generating 32-bit code (!TARGET_64BIT).  STV accepts this pattern as a
> candidate, as there are patterns to handle this form of extension on SSE
> using AVX512VL instructions (and previously ASHIFTRT was only allowed on
> AVX512VL).  Now that ASHIFTRT is a candidate on non-AVX512vL targets, we
> either need to check that the first operand is a register, or as done
> below provide the define_insn_and_split that provides a non-AVX512VL
> implementation of *extendv2di_highpart_stv.
>
> The new testcase only ICEed with -m32, so this test could be limited to
> target ia32, but there's no harm also running this test on -m64 to
> provide a little extra test coverage.
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32}
> with no new failures.  Ok for mainline?
>
>
> 2024-08-11  Roger Sayle  
>
> gcc/ChangeLog
> PR target/116275
> * config/i386/i386.md (*extendv2di2_highpart_stv_noavx512vl): New
> define_insn_and_split to handle the STV conversion of the DImode
> pattern *extenddi2_doubleword_highpart.
>
> gcc/testsuite/ChangeLog
> PR target/116275
> * g++.target/i386/pr116275.C: New test case.

+  [(set (match_dup 0)
+ (ashift:V2DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+ (ashiftrt:V2DI (match_dup 0) (match_dup 2)))])

SInce this pattern is split before reload, you can perhaps introduce a
new V2DI temporary register and use it to output from the first RTX.
This will ease the job of RA a tiny bit.

OK with or without the above suggestion.

Thanks,
Uros.


Re: [PATCH v5] Target-independent store forwarding avoidance.

2024-08-11 Thread Jeff Law




On 8/9/24 8:58 AM, Manolis Tsamis wrote:

This pass detects cases of expensive store forwarding and tries to avoid them
by reordering the stores and using suitable bit insertion sequences.
For example it can transform this:

[ ... ]


Trying to bootstrap (enabled by default) on  68k:


../../../gcc/gcc/avoid-store-forwarding.cc: In function 'rtx_insn* 
{anonymous}::generate_bit_insert_sequence(store_fwd_info*, rtx, machine_mode)':
../../../gcc/gcc/avoid-store-forwarding.cc:152:44: error: unused parameter 
'load_inner_mode' [-Werror=unused-parameter]
  152 |   machine_mode load_inner_mode)
  |   ~^~~




Re: [PATCH v6] c++: Fix constrained auto deduction in templ spec scopes [PR114915]

2024-08-11 Thread Seyed Sajad Kahani
Hi.

Based on your comments here, and the discussions in another thread
https://gcc.gnu.org/pipermail/gcc-patches/2024-July/657703.html
I have made another patch to resolve this issue (as well as another
bug) that I will be sending right now.


On Thu, 2024-07-18 at 22:16 -0400, Jason Merrill wrote:
> On 7/18/24 12:03 PM, Seyed Sajad Kahani wrote:
> > When deducing auto for `adc_return_type`, `adc_variable_type`, and
> > `adc_decomp_type` contexts (at the usage time), we try to resolve
> > the outermost
> > template arguments to be used for satisfaction. This is done by one
> > of the
> > following, depending on the scope:
> > 
> > 1. Checking the `DECL_TEMPLATE_INFO` of the current function scope
> > and
> > extracting DECL_TI_ARGS from it for function scope deductions
> > (pt.cc:31236).
> > 2. Checking the `DECL_TEMPLATE_INFO` of the declaration (alongside
> > with other
> > conditions) for non-function scope variable declaration deductions
> > (decl.cc:8527).
> > 
> > Then, we do not retrieve the deeper layers of the template
> > arguments; instead,
> > we fill the missing levels with dummy levels (pt.cc:31260).
> > 
> > The problem (that is shown in PR114915) is that we do not consider
> > the case
> > where the deduction happens in a template specialization scope. In
> > this case,
> > the type is not dependent on the outermost template arguments
> > (which are
> > the specialization arguments). Yet, we still resolve the outermost
> > template
> > arguments, and then the number of layers in the template arguments
> > exceeds the
> > number of levels in the type. This causes the missing levels to be
> > negative.
> > This leads to the rejection of valid code and ICEs (like segfault)
> > in the
> > release mode. In the debug mode, it is possible to show as an
> > assertion failure
> > (when creating a tree_vec with a negative size).
> > 
> > This patch resolves PR114915 by replacing the logic that fills in
> > the
> > missing levels in do_auto_deduction in cp/pt.cc.
> > The new approach now trims targs if the depth of targs is deeper
> > than desired
> > (this will only happen in specific contexts), and still fills targs
> > with empty
> > layers if it has fewer depths than expected.
> 
> I would prefer to set outer_targs correctly in the first place, where
> it's currently set a few lines above.  And to factor that out so
> other 
> callers can use it as well instead of DECL_TI_ARGS.
> 
> It seems like we want something close to outer_template_args, but it 
> doesn't currently handle function scope decls or full
> specializations.
> 
> Jason
> 



[PATCH, committed] Fortran: silence Wmaybe-uninitialized warnings for LTO build [PR116221]

2024-08-11 Thread Harald Anlauf
Dear all,

I've pushed the attached simple patch for initialization of local variables
to silence warnings for LTO builds after Sam James' confirmation as

https://gcc.gnu.org/g:2b23a444bcf7eb67cb04b431d8fd4fa6f65222de

Thanks,
Harald

From 2b23a444bcf7eb67cb04b431d8fd4fa6f65222de Mon Sep 17 00:00:00 2001
From: Harald Anlauf 
Date: Sun, 11 Aug 2024 20:31:13 +0200
Subject: [PATCH] Fortran: silence Wmaybe-uninitialized warnings for LTO build
 [PR116221]

	PR fortran/116221

gcc/fortran/ChangeLog:

	* intrinsic.cc (gfc_get_intrinsic_sub_symbol): Initialize variable.
	* symbol.cc (gfc_get_ha_symbol): Likewise.
---
 gcc/fortran/intrinsic.cc | 2 +-
 gcc/fortran/symbol.cc| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/intrinsic.cc b/gcc/fortran/intrinsic.cc
index 40f4c4f4b0b..62c349da7f6 100644
--- a/gcc/fortran/intrinsic.cc
+++ b/gcc/fortran/intrinsic.cc
@@ -131,7 +131,7 @@ gfc_type_abi_kind (bt type, int kind)
 gfc_symbol *
 gfc_get_intrinsic_sub_symbol (const char *name)
 {
-  gfc_symbol *sym;
+  gfc_symbol *sym = NULL;

   gfc_get_symbol (name, gfc_intrinsic_namespace, &sym);
   sym->attr.always_explicit = 1;
diff --git a/gcc/fortran/symbol.cc b/gcc/fortran/symbol.cc
index b5143d9f790..a8b623dd92a 100644
--- a/gcc/fortran/symbol.cc
+++ b/gcc/fortran/symbol.cc
@@ -3599,7 +3599,7 @@ int
 gfc_get_ha_symbol (const char *name, gfc_symbol **result)
 {
   int i;
-  gfc_symtree *st;
+  gfc_symtree *st = NULL;

   i = gfc_get_ha_sym_tree (name, &st);

--
2.35.3



Re: [PATCH] gm2: add missing debug output guard

2024-08-11 Thread Gaius Mulley
Wilken Gottwalt  writes:

> On Sat, 10 Aug 2024 13:43:33 +0200 (CEST)
> Gerald Pfeifer  wrote:
>
>> On Tue, 23 Jul 2024, Gaius Mulley wrote:
>> >> gcc/gm2:
>> >>   * gm2-libs-iso/MemStream.mod: Guard debug output.
>> > many thanks!
>> 
>> I noticed this has not been pushed yet and believe Wilken does not have 
>> write/push access - so just pushed this on his behalf.
>> 
>> Wilken, it may be good to note this as part of patch submission.
>> 
>> Gerald
>
> Thank for pushing this. But now I'm a bit confused. Why should I have 
> write/push
> access to official repos? All I do is sending in patches like I do on the 
> LKML.
> I also sent in two more patches to fix some Modula-2 libs warnings, which make
> building with most warnings enabled (-W -Wall -Wextra) a bit annoying. Gerald,
> should I cc you in the next patches I will send in?
>
> greetings,
> Wilken

Ah I think the error is mine - I forgot to push the patch after okaying
it,

regards,
Gaius


Re: [PATCH] gm2: add missing debug output guard

2024-08-11 Thread Gerald Pfeifer
On Sun, 11 Aug 2024, Wilken Gottwalt wrote:
>> I noticed this has not been pushed yet and believe Wilken does not have 
>> write/push access - so just pushed this on his behalf.
>> 
>> Wilken, it may be good to note this as part of patch submission.
> Thank for pushing this. But now I'm a bit confused. Why should I have 
> write/push access to official repos?

First time contributors and occasional contributors indeed don't have push 
access to the official repos (and I did not mean to imply they should).

Rather what I meant was that it can be helpful noting this in a patch 
submission, something like "(I don't have write/push access.)", to make 
reviewers/appovers aware of the extra step.

> Gerald, should I cc you in the next patches I will send in?

If Gaius is happy to push your patches, that'd be best. I merely helped 
out here. :-) (If necessary, yes, happy to help again.)

Gerald


[PATCH] c++: Fix constrained auto deduction templ parms resolution [PR114915, PR115030]

2024-08-11 Thread Seyed Sajad Kahani
When deducing auto for `adc_return_type`, `adc_variable_type`, and
`adc_decomp_type` contexts (at the usage time), we try to resolve the outermost
template arguments to be used for satisfaction. This is done by one of the
following, depending on the scope:

1. Checking the `DECL_TEMPLATE_INFO` of the current function scope and
extracting `DECL_TI_ARGS` from it for function scope deductions (pt.cc:31236).
2. Checking the `DECL_TEMPLATE_INFO` of the declaration (alongside with other
conditions) for non-function scope variable declaration deductions
(decl.cc:8527).

Note that `DECL_TI_ARGS` for partial and explicit specializations will yield the
arguments with respect to the most_general_template, which is the primary
template. This can lead to rejection of valid code or acceptance of invalid code
(PR115030) in a partial specialization context. For an explicitly specialized
case, due to the mismatch between the desired depth and the actual depth of
args, it can lead to ICEs (PR114915) where we intend to fill the missing levels
with dummy levels (pt.cc:31260), while the missing levels are negative.

This patch resolves PR114915 and PR115030 by replacing the logic of extracting
args for the declaration in those two places with `outer_template_args`.
`outer_template_args` is an existing function that was used in limited contexts 
to
do so. Now, it is extended to handle partial and explicit specializations and
lambda functions as well. A few inevitable changes are also made to the
signature of some functions, relaxing `const_tree` to `tree`.

PR c++/114915
PR c++/115030

gcc/cp/ChangeLog:

* constraint.cc (maybe_substitute_reqs_for): Relax the argument type to
be compatible with outer_template_args.
* cp-tree.h (outer_template_args): Relax the argument type and add an
optional argument.
(maybe_substitute_reqs_for): Relax the argument type to be compatible
with outer_template_args.
* decl.cc (cp_finish_decl): Replace the logic of extracting args with
outer_template_args.
* pt.cc (outer_template_args): Handle partial and explicit
specializations and lambda functions.
(do_auto_deduction): Replace the logic of extracting args with
outer_template_args.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/concepts-placeholder14.C: New test.
* g++.dg/cpp2a/concepts-placeholder15.C: New test.
* g++.dg/cpp2a/concepts-placeholder16.C: New test.
* g++.dg/cpp2a/concepts-placeholder17.C: New test.
---
 gcc/cp/constraint.cc  |  2 +-
 gcc/cp/cp-tree.h  |  4 +-
 gcc/cp/decl.cc|  4 +-
 gcc/cp/pt.cc  | 71 ---
 .../g++.dg/cpp2a/concepts-placeholder14.C | 19 +
 .../g++.dg/cpp2a/concepts-placeholder15.C | 26 +++
 .../g++.dg/cpp2a/concepts-placeholder16.C | 33 +
 .../g++.dg/cpp2a/concepts-placeholder17.C | 21 ++
 8 files changed, 150 insertions(+), 30 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-placeholder14.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-placeholder15.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-placeholder16.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-placeholder17.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index ebf4255e5..a1c3962c4 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -1332,7 +1332,7 @@ remove_constraints (tree t)
for declaration matching.  */
 
 tree
-maybe_substitute_reqs_for (tree reqs, const_tree decl)
+maybe_substitute_reqs_for (tree reqs, tree decl)
 {
   if (reqs == NULL_TREE)
 return NULL_TREE;
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 9a8c86591..2d6733f57 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7215,7 +7215,7 @@ extern tree maybe_set_retval_sentinel (void);
 extern tree template_parms_to_args (tree);
 extern tree template_parms_level_to_args   (tree);
 extern tree generic_targs_for  (tree);
-extern tree outer_template_args(const_tree);
+extern tree outer_template_args(tree, bool = true);
 
 /* in expr.cc */
 extern tree cplus_expand_constant  (tree);
@@ -8560,7 +8560,7 @@ extern void remove_constraints  (tree);
 extern tree current_template_constraints   (void);
 extern tree associate_classtype_constraints (tree);
 extern tree build_constraints   (tree, tree);
-extern tree maybe_substitute_reqs_for  (tree, const_tree);
+extern tree maybe_substitute_reqs_for  (tree, tree);
 extern tree get_trailing_function_requirements (tree);
 extern tree get_shorthand_constraints   (tree);
 
diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index a139b293e..6b68d5f39 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -8557

Subject: [PATCH 0/8] Masked load else operand.

2024-08-11 Thread Robin Dapp
I figured it's easier to parse this as a series rather than one big
patch, in particular since target-specific code is involved.

This adds an else operand to masked-load operations in order to avoid
implicit dependencies on zeroed masked-out elements.  riscv does not
mandate zeroing for those but rather leaves them unspecified.

The general idea is to query the proper operand of the target's
respective optab for a supported else value.  If the supported value is
non-zero emit a cond_expr after the load in order to make the dependency
explicit and allow it to be optimized with the surrounding code.

In order to keep the fallout manageable the patch is, for now, restricted to
only emit cond_exprs during explicit masking in tree-ifcvt.  I have a local
version that emits a vec_cond_expr for each vector mask load but that would
cause several ripple effects further down the line.

Loop masking in vectorizer context is as before.  Also, the patch series only
considers element masking else values and no else value for length masking.

The backend changes are supposed to be more proof-of-concept than anything
and are surely not idiomatic.  x86's and aarch64's test suite results
are, however, unchanged.

Robin Dapp (8):
  docs: Document maskload else operand and behavior.
  ifn: Add else-operand handling.
  tree-ifcvt: Enforce zero else value after maskload.
  vect: Add maskload else value support.
  aarch64: Add masked-load else operands.
  gcn: Add else operand to masked loads.
  i386: Add else operand to masked loads.
  RISC-V: Add else operand to masked loads [PR115536].

 .../aarch64/aarch64-sve-builtins-base.cc  |  58 +++--
 gcc/config/aarch64/aarch64-sve-builtins.cc|   5 +
 gcc/config/aarch64/aarch64-sve-builtins.h |   1 +
 gcc/config/aarch64/aarch64-sve.md |  47 +++-
 gcc/config/aarch64/aarch64-sve2.md|   3 +-
 gcc/config/aarch64/predicates.md  |   4 +
 gcc/config/gcn/gcn-valu.md|   6 +-
 gcc/config/gcn/predicates.md  |   3 +
 gcc/config/i386/i386-expand.cc|  59 -
 gcc/config/i386/predicates.md |  15 ++
 gcc/config/i386/sse.md| 124 ++
 gcc/config/riscv/autovec.md   |  45 ++--
 gcc/config/riscv/predicates.md|   3 +
 gcc/config/riscv/riscv-v.cc   |  26 ++-
 gcc/doc/md.texi   |  60 +++--
 gcc/internal-fn.cc|  88 +--
 gcc/internal-fn.h |  11 +-
 gcc/optabs-query.cc   |  83 +--
 gcc/optabs-query.h|   3 +-
 gcc/optabs-tree.cc|  43 ++--
 gcc/optabs-tree.h |   8 +-
 .../gcc.target/riscv/rvv/autovec/pr115336.c   |  20 ++
 .../gcc.target/riscv/rvv/autovec/pr116059.c   |   9 +
 gcc/tree-if-conv.cc   |  78 +--
 gcc/tree-vect-data-refs.cc|  39 +++-
 gcc/tree-vect-patterns.cc |  17 +-
 gcc/tree-vect-slp.cc  |  22 +-
 gcc/tree-vect-stmts.cc| 218 ++
 gcc/tree-vectorizer.h |  11 +-
 29 files changed, 848 insertions(+), 261 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115336.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116059.c

-- 
2.45.2



[PATCH 1/8] docs: Document maskload else operand and behavior.

2024-08-11 Thread Robin Dapp
This patch amends the documentation for masked loads (maskload,
vec_mask_load_lanes, and mask_gather_load as well as their len
counterparts) with an else operand.

gcc/ChangeLog:

* doc/md.texi: Document masked load else operand.
---
 gcc/doc/md.texi | 60 +++--
 1 file changed, 38 insertions(+), 22 deletions(-)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 5dc0d55edd6..4047d8f58fe 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5017,8 +5017,9 @@ This pattern is not allowed to @code{FAIL}.
 @item @samp{vec_mask_load_lanes@var{m}@var{n}}
 Like @samp{vec_load_lanes@var{m}@var{n}}, but takes an additional
 mask operand (operand 2) that specifies which elements of the destination
-vectors should be loaded.  Other elements of the destination
-vectors are set to zero.  The operation is equivalent to:
+vectors should be loaded.  Operand 3 is an else operand similar to the one
+in @code{maskload}.
+The operation is equivalent to:
 
 @smallexample
 int c = GET_MODE_SIZE (@var{m}) / GET_MODE_SIZE (@var{n});
@@ -5028,7 +5029,7 @@ for (j = 0; j < GET_MODE_NUNITS (@var{n}); j++)
   operand0[i][j] = operand1[j * c + i];
   else
 for (i = 0; i < c; i++)
-  operand0[i][j] = 0;
+  operand0[i][j] = operand3;
 @end smallexample
 
 This pattern is not allowed to @code{FAIL}.
@@ -5036,16 +5037,20 @@ This pattern is not allowed to @code{FAIL}.
 @cindex @code{vec_mask_len_load_lanes@var{m}@var{n}} instruction pattern
 @item @samp{vec_mask_len_load_lanes@var{m}@var{n}}
 Like @samp{vec_load_lanes@var{m}@var{n}}, but takes an additional
-mask operand (operand 2), length operand (operand 3) as well as bias operand 
(operand 4)
-that specifies which elements of the destination vectors should be loaded.
-Other elements of the destination vectors are undefined.  The operation is 
equivalent to:
+mask operand (operand 2), length operand (operand 4) as well as bias operand
+(operand 5) that specifies which elements of the destination vectors should be
+loaded.  Operand 3 is an else operand similar to the one in @code{maskload}.
+Other elements of the destination vectors are undefined.  The operation
+is equivalent to:
 
 @smallexample
 int c = GET_MODE_SIZE (@var{m}) / GET_MODE_SIZE (@var{n});
-for (j = 0; j < operand3 + operand4; j++)
-  if (operand2[j])
-for (i = 0; i < c; i++)
+for (j = 0; j < operand4 + operand5; j++)
+  for (i = 0; i < c; i++)
+if (operand2[j])
   operand0[i][j] = operand1[j * c + i];
+else
+  operand0[i][j] = operand3;
 @end smallexample
 
 This pattern is not allowed to @code{FAIL}.
@@ -5125,18 +5130,24 @@ address width.
 @cindex @code{mask_gather_load@var{m}@var{n}} instruction pattern
 @item @samp{mask_gather_load@var{m}@var{n}}
 Like @samp{gather_load@var{m}@var{n}}, but takes an extra mask operand as
-operand 5.  Bit @var{i} of the mask is set if element @var{i}
+operand 5 and an else operand 6 similar to the one in @code{maskload}.
+Bit @var{i} of the mask is set if element @var{i}
 of the result should be loaded from memory and clear if element @var{i}
-of the result should be set to zero.
+of the result should be set to operand 6.
 
 @cindex @code{mask_len_gather_load@var{m}@var{n}} instruction pattern
 @item @samp{mask_len_gather_load@var{m}@var{n}}
-Like @samp{gather_load@var{m}@var{n}}, but takes an extra mask operand 
(operand 5),
-a len operand (operand 6) as well as a bias operand (operand 7).  Similar to 
mask_len_load,
-the instruction loads at most (operand 6 + operand 7) elements from memory.
+Like @samp{gather_load@var{m}@var{n}}, but takes an extra mask operand
+(operand 5) and an else operand (operand 6) similar to the one in
+@code{maskload} as well as a len operand (operand 7) and a bias operand
+(operand 8).
+
+Similar to mask_len_load the instruction loads at
+most (operand 7 + operand 8) elements from memory.
 Bit @var{i} of the mask is set if element @var{i} of the result should
-be loaded from memory and clear if element @var{i} of the result should be 
undefined.
-Mask elements @var{i} with @var{i} > (operand 6 + operand 7) are ignored.
+be loaded from memory and clear if element @var{i} of the result should
+be set to operand 6.
+Mask elements @var{i} with @var{i} > (operand 7 + operand 8) are ignored.
 
 @cindex @code{scatter_store@var{m}@var{n}} instruction pattern
 @item @samp{scatter_store@var{m}@var{n}}
@@ -5368,8 +5379,12 @@ Operands 4 and 5 have a target-dependent scalar integer 
mode.
 @cindex @code{maskload@var{m}@var{n}} instruction pattern
 @item @samp{maskload@var{m}@var{n}}
 Perform a masked load of vector from memory operand 1 of mode @var{m}
-into register operand 0.  Mask is provided in register operand 2 of
-mode @var{n}.
+into register operand 0.  The mask is provided in register operand 2 of
+mode @var{n}.  Operand 3 (the "else value") specifies which value is loaded
+when the mask is unset.  The predicate of operand 3 must only accept
+the else values that the targe

[PATCH 2/8] ifn: Add else-operand handling.

2024-08-11 Thread Robin Dapp
This patch adds else-operand handling to the internal functions.

gcc/ChangeLog:

* internal-fn.cc (add_mask_and_len_args): Rename...
(add_mask_else_and_len_args): ...to this and add else handling.
(expand_partial_load_optab_fn): Use adjusted function.
(expand_partial_store_optab_fn): Ditto.
(expand_scatter_store_optab_fn): Ditto.
(expand_gather_load_optab_fn): Ditto.
(internal_fn_len_index): Adjust for masked loads.
(internal_fn_else_index): Add masked loads.
---
 gcc/internal-fn.cc | 69 ++
 1 file changed, 58 insertions(+), 11 deletions(-)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 8a2e07f2f96..586978e8f3f 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -331,17 +331,18 @@ get_multi_vector_move (tree array_type, convert_optab 
optab)
   return convert_optab_handler (optab, imode, vmode);
 }
 
-/* Add mask and len arguments according to the STMT.  */
+/* Add mask, else, and len arguments according to the STMT.  */
 
 static unsigned int
-add_mask_and_len_args (expand_operand *ops, unsigned int opno, gcall *stmt)
+add_mask_else_and_len_args (expand_operand *ops, unsigned int opno, gcall 
*stmt)
 {
   internal_fn ifn = gimple_call_internal_fn (stmt);
   int len_index = internal_fn_len_index (ifn);
   /* BIAS is always consecutive next of LEN.  */
   int bias_index = len_index + 1;
   int mask_index = internal_fn_mask_index (ifn);
-  /* The order of arguments are always {len,bias,mask}.  */
+
+  /* The order of arguments is always {mask, else, len, bias}.  */
   if (mask_index >= 0)
 {
   tree mask = gimple_call_arg (stmt, mask_index);
@@ -362,6 +363,23 @@ add_mask_and_len_args (expand_operand *ops, unsigned int 
opno, gcall *stmt)
 
   create_input_operand (&ops[opno++], mask_rtx,
TYPE_MODE (TREE_TYPE (mask)));
+
+}
+
+  int els_index = internal_fn_else_index (ifn);
+  if (els_index >= 0)
+{
+  tree els = gimple_call_arg (stmt, els_index);
+  tree els_type = TREE_TYPE (els);
+  if (TREE_CODE (els) == SSA_NAME
+ && SSA_NAME_IS_DEFAULT_DEF (els)
+ && VAR_P (SSA_NAME_VAR (els)))
+   create_undefined_input_operand (&ops[opno++], TYPE_MODE (els_type));
+  else
+   {
+ rtx els_rtx = expand_normal (els);
+ create_input_operand (&ops[opno++], els_rtx, TYPE_MODE (els_type));
+   }
 }
   if (len_index >= 0)
 {
@@ -3014,7 +3032,7 @@ static void
 expand_partial_load_optab_fn (internal_fn ifn, gcall *stmt, convert_optab 
optab)
 {
   int i = 0;
-  class expand_operand ops[5];
+  class expand_operand ops[6];
   tree type, lhs, rhs, maskt;
   rtx mem, target;
   insn_code icode;
@@ -3044,7 +3062,7 @@ expand_partial_load_optab_fn (internal_fn ifn, gcall 
*stmt, convert_optab optab)
   target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
   create_call_lhs_operand (&ops[i++], target, TYPE_MODE (type));
   create_fixed_operand (&ops[i++], mem);
-  i = add_mask_and_len_args (ops, i, stmt);
+  i = add_mask_else_and_len_args (ops, i, stmt);
   expand_insn (icode, i, ops);
 
   assign_call_lhs (lhs, target, &ops[0]);
@@ -3090,7 +3108,7 @@ expand_partial_store_optab_fn (internal_fn ifn, gcall 
*stmt, convert_optab optab
   reg = expand_normal (rhs);
   create_fixed_operand (&ops[i++], mem);
   create_input_operand (&ops[i++], reg, TYPE_MODE (type));
-  i = add_mask_and_len_args (ops, i, stmt);
+  i = add_mask_else_and_len_args (ops, i, stmt);
   expand_insn (icode, i, ops);
 }
 
@@ -3676,7 +3694,7 @@ expand_scatter_store_optab_fn (internal_fn, gcall *stmt, 
direct_optab optab)
   create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset)));
   create_integer_operand (&ops[i++], scale_int);
   create_input_operand (&ops[i++], rhs_rtx, TYPE_MODE (TREE_TYPE (rhs)));
-  i = add_mask_and_len_args (ops, i, stmt);
+  i = add_mask_else_and_len_args (ops, i, stmt);
 
   insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (rhs)),
   TYPE_MODE (TREE_TYPE (offset)));
@@ -3705,7 +3723,7 @@ expand_gather_load_optab_fn (internal_fn, gcall *stmt, 
direct_optab optab)
   create_input_operand (&ops[i++], offset_rtx, TYPE_MODE (TREE_TYPE (offset)));
   create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset)));
   create_integer_operand (&ops[i++], scale_int);
-  i = add_mask_and_len_args (ops, i, stmt);
+  i = add_mask_else_and_len_args (ops, i, stmt);
   insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)),
   TYPE_MODE (TREE_TYPE (offset)));
   expand_insn (icode, i, ops);
@@ -4590,6 +4608,18 @@ get_len_internal_fn (internal_fn fn)
   case IFN_COND_##NAME:
\
 return IFN_COND_LEN_##NAME;
 #include "internal-fn.def"
+default:
+  break;
+}
+
+  switch (fn)
+{
+case IFN_MASK_

[PATCH 5/8] aarch64: Add masked-load else operands.

2024-08-11 Thread Robin Dapp
This adds zero else operands to masked loads and their intrinsics.
I needed to adjust more than initially thought because we rely on
combine for several instructions and a change in a "base" pattern
needs to propagate to all those.

For the lack of a better idea I used a function call property to specify
whether a builtin needs an else operand or not.  Somebody with better
knowledge of the aarch64 target can surely improve that.
---
 .../aarch64/aarch64-sve-builtins-base.cc  | 58 ++-
 gcc/config/aarch64/aarch64-sve-builtins.cc|  5 ++
 gcc/config/aarch64/aarch64-sve-builtins.h |  1 +
 gcc/config/aarch64/aarch64-sve.md | 47 +--
 gcc/config/aarch64/aarch64-sve2.md|  3 +-
 gcc/config/aarch64/predicates.md  |  4 ++
 6 files changed, 98 insertions(+), 20 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index d55bee0b72f..131c822a2cd 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -1459,7 +1459,7 @@ public:
   unsigned int
   call_properties (const function_instance &) const override
   {
-return CP_READ_MEMORY;
+return CP_READ_MEMORY | CP_HAS_ELSE;
   }
 
   gimple *
@@ -1474,11 +1474,12 @@ public:
 gimple_seq stmts = NULL;
 tree pred = f.convert_pred (stmts, vectype, 0);
 tree base = f.fold_contiguous_base (stmts, vectype);
+tree els = build_zero_cst (vectype);
 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
 
 tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
-gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
- base, cookie, pred);
+gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 4,
+ base, cookie, pred, els);
 gimple_call_set_lhs (new_call, f.lhs);
 return new_call;
   }
@@ -1488,10 +1489,16 @@ public:
   {
 insn_code icode;
 if (e.vectors_per_tuple () == 1)
-  icode = convert_optab_handler (maskload_optab,
-e.vector_mode (0), e.gp_mode (0));
+  {
+   icode = convert_optab_handler (maskload_optab,
+  e.vector_mode (0), e.gp_mode (0));
+   e.args.quick_push (CONST0_RTX (e.vector_mode (0)));
+  }
 else
-  icode = code_for_aarch64 (UNSPEC_LD1_COUNT, e.tuple_mode (0));
+  {
+   icode = code_for_aarch64 (UNSPEC_LD1_COUNT, e.tuple_mode (0));
+   e.args.quick_push (CONST0_RTX (e.tuple_mode (0)));
+  }
 return e.use_contiguous_load_insn (icode);
   }
 };
@@ -1502,12 +1509,20 @@ class svld1_extend_impl : public extending_load
 public:
   using extending_load::extending_load;
 
+  unsigned int
+  call_properties (const function_instance &) const override
+  {
+return CP_READ_MEMORY | CP_HAS_ELSE;
+  }
+
   rtx
   expand (function_expander &e) const override
   {
 insn_code icode = code_for_aarch64_load (UNSPEC_LD1_SVE, extend_rtx_code 
(),
 e.vector_mode (0),
 e.memory_vector_mode ());
+/* Add the else operand.  */
+e.args.quick_push (CONST0_RTX (e.vector_mode (1)));
 return e.use_contiguous_load_insn (icode);
   }
 };
@@ -1518,7 +1533,7 @@ public:
   unsigned int
   call_properties (const function_instance &) const override
   {
-return CP_READ_MEMORY;
+return CP_READ_MEMORY | CP_HAS_ELSE;
   }
 
   rtx
@@ -1527,6 +1542,8 @@ public:
 e.prepare_gather_address_operands (1);
 /* Put the predicate last, as required by mask_gather_load_optab.  */
 e.rotate_inputs_left (0, 5);
+/* Add the else operand.  */
+e.args.quick_push (CONST0_RTX (e.vector_mode (0)));
 machine_mode mem_mode = e.memory_vector_mode ();
 machine_mode int_mode = aarch64_sve_int_mode (mem_mode);
 insn_code icode = convert_optab_handler (mask_gather_load_optab,
@@ -1550,6 +1567,8 @@ public:
 e.rotate_inputs_left (0, 5);
 /* Add a constant predicate for the extension rtx.  */
 e.args.quick_push (CONSTM1_RTX (VNx16BImode));
+/* Add the else operand.  */
+e.args.quick_push (CONST0_RTX (e.vector_mode (1)));
 insn_code icode = code_for_aarch64_gather_load (extend_rtx_code (),
e.vector_mode (0),
e.memory_vector_mode ());
@@ -1680,7 +1699,7 @@ public:
   unsigned int
   call_properties (const function_instance &) const override
   {
-return CP_READ_MEMORY;
+return CP_READ_MEMORY | CP_HAS_ELSE;
   }
 
   gimple *
@@ -1692,6 +1711,7 @@ public:
 /* Get the predicate and base pointer.  */
 gimple_seq stmts = NULL;
 tree pred = f.convert_pred (stmts, vectype, 0);
+tree els = build_zero_cst (vectype);
 tree base = f.fold_contiguous_base (stmts, v

[PATCH 4/8] vect: Add maskload else value support.

2024-08-11 Thread Robin Dapp
This patch adds an else operand to vectorized masked load calls.
The current implementation adds else-value arguments to the respective
target-querying functions that is used to supply the vectorizer with the
proper else value.

Right now, the only spot where a zero else value is actually enforced is
tree-ifcvt.  Loop masking and other instances of masked loads in the
vectorizer itself do not use vec_cond_exprs.

gcc/ChangeLog:

* internal-fn.cc (internal_gather_scatter_fn_supported_p): Add
else argument.
* internal-fn.h (internal_gather_scatter_fn_supported_p): Ditto.
(MASK_LOAD_ELSE_NONE): Define.
(MASK_LOAD_ELSE_ZERO): Ditto.
(MASK_LOAD_ELSE_M1): Ditto.
(MASK_LOAD_ELSE_UNDEFINED): Ditto.
* optabs-query.cc (supports_vec_convert_optab_p): Return icode.
(get_supported_else_val): Return supported else value for
optab's operand at index.
(supports_vec_gather_load_p): Add else argument.
(supports_vec_scatter_store_p): Ditto.
* optabs-query.h (supports_vec_gather_load_p): Ditto.
(get_supported_else_val): Ditto.
* optabs-tree.cc (target_supports_mask_load_store_p): Ditto.
(can_vec_mask_load_store_p): Ditto.
(target_supports_len_load_store_p): Ditto.
(get_len_load_store_mode): Ditto.
* optabs-tree.h (target_supports_mask_load_store_p): Ditto.
(can_vec_mask_load_store_p): Ditto.
* tree-vect-data-refs.cc (vect_lanes_optab_supported_p): Ditto.
(vect_gather_scatter_fn_p): Ditto.
(vect_check_gather_scatter): Ditto.
(vect_load_lanes_supported): Ditto.
* tree-vect-patterns.cc (vect_recog_gather_scatter_pattern):
Ditto.
* tree-vect-slp.cc (vect_get_operand_map): Adjust indices for
else operand.
(vect_slp_analyze_node_operations): Skip undefined else operand.
* tree-vect-stmts.cc (exist_non_indexing_operands_for_use_p):
Add else operand handling.
(vect_get_vec_defs_for_operand): Handle undefined else operand.
(check_load_store_for_partial_vectors): Add else argument.
(vect_truncate_gather_scatter_offset): Ditto.
(vect_use_strided_gather_scatters_p): Ditto.
(get_group_load_store_type): Ditto.
(get_load_store_type): Ditto.
(vect_get_mask_load_else): Ditto.
(vect_get_else_val_from_tree): Ditto.
(vect_build_one_gather_load_call): Add zero else operand.
(vectorizable_load): Use else operand.
* tree-vectorizer.h (vect_gather_scatter_fn_p): Add else
argument.
(vect_load_lanes_supported): Ditto.
(vect_get_mask_load_else): Ditto.
(vect_get_else_val_from_tree): Ditto.
---
 gcc/internal-fn.cc |  19 +++-
 gcc/internal-fn.h  |  11 +-
 gcc/optabs-query.cc|  83 +++---
 gcc/optabs-query.h |   3 +-
 gcc/optabs-tree.cc |  43 +---
 gcc/optabs-tree.h  |   8 +-
 gcc/tree-vect-data-refs.cc |  39 +--
 gcc/tree-vect-patterns.cc  |  17 ++-
 gcc/tree-vect-slp.cc   |  22 +++-
 gcc/tree-vect-stmts.cc | 218 +
 gcc/tree-vectorizer.h  |  11 +-
 11 files changed, 367 insertions(+), 107 deletions(-)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 586978e8f3f..2fc676e397c 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4988,12 +4988,15 @@ internal_fn_stored_value_index (internal_fn fn)
or stored.  OFFSET_VECTOR_TYPE is the vector type that holds the
offset from the shared base address of each loaded or stored element.
SCALE is the amount by which these offsets should be multiplied
-   *after* they have been extended to address width.  */
+   *after* they have been extended to address width.
+   If the target supports the gather load the supported else value
+   will be written to the position ELSVAL points to if it is nonzero.  */
 
 bool
 internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type,
tree memory_element_type,
-   tree offset_vector_type, int scale)
+   tree offset_vector_type, int scale,
+   int *elsval)
 {
   if (!tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vector_type)),
   TYPE_SIZE (memory_element_type)))
@@ -5006,9 +5009,15 @@ internal_gather_scatter_fn_supported_p (internal_fn ifn, 
tree vector_type,
   TYPE_MODE (offset_vector_type));
   int output_ops = internal_load_fn_p (ifn) ? 1 : 0;
   bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (offset_vector_type));
-  return (icode != CODE_FOR_nothing
- && insn_operand_matches (icode, 2 + output_ops, GEN_INT (unsigned_p))
- && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale)));
+  bool ok = false;
+  ok = icode != CODE_FOR_nothing
+&

[PATCH 3/8] tree-ifcvt: Enforce zero else value after maskload.

2024-08-11 Thread Robin Dapp
When predicating a load we implicitly assume that the else value is
zero.  In order to formalize this this patch queries the target for
its supported else operand and uses that for the maskload call.
Subsequently, if the else operand is nonzero, a cond_expr enforcing
a zero else value is emitted.

gcc/ChangeLog:

* tree-if-conv.cc (predicate_load_or_store): Enforce zero else
value.
(predicate_statements): Use sequence instead of statement.
---
 gcc/tree-if-conv.cc | 78 +++--
 1 file changed, 62 insertions(+), 16 deletions(-)

diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index 57992b6deca..54cb9ef0ef1 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -2452,10 +2452,12 @@ mask_exists (int size, const vec &vec)
write and it needs to be predicated by MASK.  Return a statement
that does so.  */
 
-static gimple *
-predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
+static gimple_seq
+predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask,
+hash_set *ssa_names)
 {
-  gcall *new_stmt;
+  gimple_seq stmts = NULL;
+  gcall *call_stmt;
 
   tree lhs = gimple_assign_lhs (stmt);
   tree rhs = gimple_assign_rhs1 (stmt);
@@ -2471,21 +2473,59 @@ predicate_load_or_store (gimple_stmt_iterator *gsi, 
gassign *stmt, tree mask)
   ref);
   if (TREE_CODE (lhs) == SSA_NAME)
 {
-  new_stmt
-   = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
- ptr, mask);
-  gimple_call_set_lhs (new_stmt, lhs);
-  gimple_set_vuse (new_stmt, gimple_vuse (stmt));
+  /* Get the preferred vector mode and its corresponding mask for the
+masked load.  We need this to query the target's supported else
+operands.  */
+  machine_mode mode = TYPE_MODE (TREE_TYPE (addr));
+  scalar_mode smode = as_a  (mode);
+
+  machine_mode vmode = targetm.vectorize.preferred_simd_mode (smode);
+  machine_mode mask_mode
+   = targetm.vectorize.get_mask_mode (vmode).require ();
+
+  int elsval;
+  internal_fn ifn;
+  target_supports_mask_load_store_p (vmode, mask_mode, true, &ifn, 
&elsval);
+  tree els = vect_get_mask_load_else (elsval, TREE_TYPE (lhs));
+
+  call_stmt
+   = gimple_build_call_internal (IFN_MASK_LOAD, 4, addr,
+ ptr, mask, els);
+
+  /* Build the load call and, if the else value is nonzero,
+a COND_EXPR that enforces it.  */
+  tree loadlhs;
+  if (elsval == MASK_LOAD_ELSE_ZERO)
+   gimple_call_set_lhs (call_stmt, gimple_get_lhs (stmt));
+  else
+   {
+ loadlhs = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "_ifc_");
+ ssa_names->add (loadlhs);
+ gimple_call_set_lhs (call_stmt, loadlhs);
+   }
+  gimple_set_vuse (call_stmt, gimple_vuse (stmt));
+  gimple_seq_add_stmt (&stmts, call_stmt);
+
+  if (elsval != MASK_LOAD_ELSE_ZERO)
+   {
+ tree cond_rhs
+   = fold_build_cond_expr (TREE_TYPE (loadlhs), mask, loadlhs,
+   build_zero_cst (TREE_TYPE (loadlhs)));
+ gassign *cond_stmt
+   = gimple_build_assign (gimple_get_lhs (stmt), cond_rhs);
+ gimple_seq_add_stmt (&stmts, cond_stmt);
+   }
 }
   else
 {
-  new_stmt
+  call_stmt
= gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
  mask, rhs);
-  gimple_move_vops (new_stmt, stmt);
+  gimple_move_vops (call_stmt, stmt);
+  gimple_seq_add_stmt (&stmts, call_stmt);
 }
-  gimple_call_set_nothrow (new_stmt, true);
-  return new_stmt;
+  gimple_call_set_nothrow (call_stmt, true);
+  return stmts;
 }
 
 /* STMT uses OP_LHS.  Check whether it is equivalent to:
@@ -2789,11 +2829,17 @@ predicate_statements (loop_p loop)
  vect_masks.safe_push (mask);
}
  if (gimple_assign_single_p (stmt))
-   new_stmt = predicate_load_or_store (&gsi, stmt, mask);
- else
-   new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
+   {
+ gimple_seq call_seq
+   = predicate_load_or_store (&gsi, stmt, mask, &ssa_names);
 
- gsi_replace (&gsi, new_stmt, true);
+ gsi_replace_with_seq (&gsi, call_seq, true);
+   }
+ else
+   {
+ new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
+ gsi_replace (&gsi, new_stmt, true);
+   }
}
  else if (((lhs = gimple_assign_lhs (stmt)), true)
   && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
-- 
2.45.2



[PATCH 6/8] gcn: Add else operand to masked loads.

2024-08-11 Thread Robin Dapp
This patch adds a zero else operand to the masked loads.

gcc/ChangeLog:

* config/gcn/predicates.md (maskload_else_operand): New
predicate.
* config/gcn/gcn-valu.md: Use new predicate.
---
 gcc/config/gcn/gcn-valu.md   | 6 --
 gcc/config/gcn/predicates.md | 3 +++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index b24cf9be32e..2344bc00ffc 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -4002,7 +4002,8 @@ (define_expand "while_ultsidi"
 (define_expand "maskloaddi"
   [(match_operand:V_MOV 0 "register_operand")
(match_operand:V_MOV 1 "memory_operand")
-   (match_operand 2 "")]
+   (match_operand 2 "")
+   (match_operand:V_MOV 3 "maskload_else_operand")]
   ""
   {
 rtx exec = force_reg (DImode, operands[2]);
@@ -4040,7 +4041,8 @@ (define_expand "mask_gather_load"
(match_operand: 2 "register_operand")
(match_operand 3 "immediate_operand")
(match_operand:SI 4 "gcn_alu_operand")
-   (match_operand:DI 5 "")]
+   (match_operand:DI 5 "")
+   (match_operand:V_MOV 6 "maskload_else_operand")]
   ""
   {
 rtx exec = force_reg (DImode, operands[5]);
diff --git a/gcc/config/gcn/predicates.md b/gcc/config/gcn/predicates.md
index 3f59396a649..9bc806cf990 100644
--- a/gcc/config/gcn/predicates.md
+++ b/gcc/config/gcn/predicates.md
@@ -228,3 +228,6 @@ (define_predicate "ascending_zero_int_parallel"
   return gcn_stepped_zero_int_parallel_p (op, 1);
 })
 
+(define_predicate "maskload_else_operand"
+  (and (match_code "const_int,const_vector")
+   (match_test "op == CONST0_RTX (GET_MODE (op))")))
-- 
2.45.2



[PATCH 7/8] i386: Add else operand to masked loads.

2024-08-11 Thread Robin Dapp
This patch adds a zero else operand to masked loads, in particular the
masked gather load builtins that are used for gather vectorization.

gcc/ChangeLog:

* config/i386/i386-expand.cc (ix86_expand_special_args_builtin):
Add else-operand handling.
(ix86_expand_builtin): Ditto.
* config/i386/predicates.md (vcvtne2ps2bf_parallel): New
predicate.
(maskload_else_operand): Ditto.
* config/i386/sse.md: Use predicate.
---
 gcc/config/i386/i386-expand.cc |  59 +---
 gcc/config/i386/predicates.md  |  15 
 gcc/config/i386/sse.md | 124 -
 3 files changed, 142 insertions(+), 56 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index d9ad06264aa..b8505fe2c38 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -12462,10 +12462,11 @@ ix86_expand_special_args_builtin (const struct 
builtin_description *d,
 {
   tree arg;
   rtx pat, op;
-  unsigned int i, nargs, arg_adjust, memory;
+  unsigned int i, nargs, arg_adjust, memory = -1;
   unsigned int constant = 100;
   bool aligned_mem = false;
-  rtx xops[4];
+  rtx xops[4] = {};
+  bool add_els = false;
   enum insn_code icode = d->icode;
   const struct insn_data_d *insn_p = &insn_data[icode];
   machine_mode tmode = insn_p->operand[0].mode;
@@ -12592,6 +12593,9 @@ ix86_expand_special_args_builtin (const struct 
builtin_description *d,
 case V4DI_FTYPE_PCV4DI_V4DI:
 case V4SI_FTYPE_PCV4SI_V4SI:
 case V2DI_FTYPE_PCV2DI_V2DI:
+  /* Two actual args but an additional else operand.  */
+  add_els = true;
+  /* Fallthru.  */
 case VOID_FTYPE_INT_INT64:
   nargs = 2;
   klass = load;
@@ -12864,6 +12868,12 @@ ix86_expand_special_args_builtin (const struct 
builtin_description *d,
   xops[i]= op;
 }
 
+  if (add_els)
+{
+  xops[i] = CONST0_RTX (GET_MODE (xops[0]));
+  nargs++;
+}
+
   switch (nargs)
 {
 case 0:
@@ -13113,10 +13123,11 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
   size_t i;
   enum insn_code icode, icode2;
   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
-  tree arg0, arg1, arg2, arg3, arg4;
-  rtx op0, op1, op2, op3, op4, pat, pat2, insn;
-  machine_mode mode0, mode1, mode2, mode3, mode4;
+  tree arg0, arg1, arg2, arg3, arg4, arg5;
+  rtx op0, op1, op2, op3, op4, op5, opels, pat, pat2, insn;
+  machine_mode mode0, mode1, mode2, mode3, mode4, mode5;
   unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
+  bool has_else_op;
   HOST_WIDE_INT bisa, bisa2;
 
   /* For CPU builtins that can be folded, fold first and expand the fold.  */
@@ -14919,6 +14930,7 @@ rdseed_step:
   arg2 = CALL_EXPR_ARG (exp, 2);
   arg3 = CALL_EXPR_ARG (exp, 3);
   arg4 = CALL_EXPR_ARG (exp, 4);
+  has_else_op = call_expr_nargs (exp) == 6;
   op0 = expand_normal (arg0);
   op1 = expand_normal (arg1);
   op2 = expand_normal (arg2);
@@ -15021,10 +15033,38 @@ rdseed_step:
  op3 = copy_to_reg (op3);
  op3 = lowpart_subreg (mode3, op3, GET_MODE (op3));
}
-  if (!insn_data[icode].operand[5].predicate (op4, mode4))
+  /* The vectorizer only adds an else operand for real masks. */
+  if (has_else_op)
+   {
+ if (op4 != CONST0_RTX (GET_MODE (subtarget)))
+ {
+   error ("the else operand must be 0");
+   return const0_rtx;
+ }
+ else
+   {
+ arg5 = CALL_EXPR_ARG (exp, 5);
+ op5 = expand_normal (arg5);
+ /* Note the arg order is different from the operand order.  */
+ mode5 = insn_data[icode].operand[5].mode;
+ if (!insn_data[icode].operand[5].predicate (op5, mode5))
+   {
+ error ("the last argument must be scale 1, 2, 4, 8");
+ return const0_rtx;
+   }
+   }
+ opels = op4;
+ op4 = op5;
+ mode4 = mode5;
+   }
+  else
{
-  error ("the last argument must be scale 1, 2, 4, 8");
-  return const0_rtx;
+ if (!insn_data[icode].operand[5].predicate (op4, mode4))
+   {
+ error ("the last argument must be scale 1, 2, 4, 8");
+ return const0_rtx;
+   }
+ opels = CONST0_RTX (GET_MODE (subtarget));
}
 
   /* Optimize.  If mask is known to have all high bits set,
@@ -15095,7 +15135,8 @@ rdseed_step:
}
}
 
-  pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
+  pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4, opels);
+
   if (! pat)
return const0_rtx;
   emit_insn (pat);
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 680594871de..aac7341aeab 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -2332,3 +2332,18 @@ (define_predicate "apx_nd

[PATCH 8/8] RISC-V: Add else operand to masked loads [PR115536].

2024-08-11 Thread Robin Dapp
This patch adds else operands to masked loads.  Currently the default
else operand predicate accepts "undefined" (i.e. SCRATCH) as well as
all-ones values.

Note that this series introduces a large number of new RVV FAILs for
riscv.  All of them are due to us not being able to elide redundant
vec_cond_exprs.

PR 115336
PR 116059

gcc/ChangeLog:

* config/riscv/autovec.md: Add else operand.
* config/riscv/predicates.md (maskload_else_operand): New
predicate.
* config/riscv/riscv-v.cc (get_else_operand): Remove static.
(expand_load_store): Use get_else_operand and adjust index.
(expand_gather_scatter): Ditto.
(expand_lanes_load_store): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr115336.c: New test.
* gcc.target/riscv/rvv/autovec/pr116059.c: New test.
---
 gcc/config/riscv/autovec.md   | 45 +++
 gcc/config/riscv/predicates.md|  3 ++
 gcc/config/riscv/riscv-v.cc   | 26 +++
 .../gcc.target/riscv/rvv/autovec/pr115336.c   | 20 +
 .../gcc.target/riscv/rvv/autovec/pr116059.c   |  9 
 5 files changed, 76 insertions(+), 27 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115336.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116059.c

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index d5793acc999..4111474309c 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -26,8 +26,9 @@ (define_expand "mask_len_load"
   [(match_operand:V 0 "register_operand")
(match_operand:V 1 "memory_operand")
(match_operand: 2 "vector_mask_operand")
-   (match_operand 3 "autovec_length_operand")
-   (match_operand 4 "const_0_operand")]
+   (match_operand:V 3 "maskload_else_operand")
+   (match_operand 4 "autovec_length_operand")
+   (match_operand 5 "const_0_operand")]
   "TARGET_VECTOR"
 {
   riscv_vector::expand_load_store (operands, true);
@@ -57,8 +58,9 @@ (define_expand 
"mask_len_gather_load"
(match_operand 3 "")
(match_operand 4 "")
(match_operand: 5 "vector_mask_operand")
-   (match_operand 6 "autovec_length_operand")
-   (match_operand 7 "const_0_operand")]
+   (match_operand 6 "maskload_else_operand")
+   (match_operand 7 "autovec_length_operand")
+   (match_operand 8 "const_0_operand")]
   "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p 
(mode)"
 {
   riscv_vector::expand_gather_scatter (operands, true);
@@ -72,8 +74,9 @@ (define_expand 
"mask_len_gather_load"
(match_operand 3 "")
(match_operand 4 "")
(match_operand: 5 "vector_mask_operand")
-   (match_operand 6 "autovec_length_operand")
-   (match_operand 7 "const_0_operand")]
+   (match_operand 6 "maskload_else_operand")
+   (match_operand 7 "autovec_length_operand")
+   (match_operand 8 "const_0_operand")]
   "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p 
(mode)"
 {
   riscv_vector::expand_gather_scatter (operands, true);
@@ -87,8 +90,9 @@ (define_expand 
"mask_len_gather_load"
(match_operand 3 "")
(match_operand 4 "")
(match_operand: 5 "vector_mask_operand")
-   (match_operand 6 "autovec_length_operand")
-   (match_operand 7 "const_0_operand")]
+   (match_operand 6 "maskload_else_operand")
+   (match_operand 7 "autovec_length_operand")
+   (match_operand 8 "const_0_operand")]
   "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p 
(mode)"
 {
   riscv_vector::expand_gather_scatter (operands, true);
@@ -102,8 +106,9 @@ (define_expand 
"mask_len_gather_load"
(match_operand 3 "")
(match_operand 4 "")
(match_operand: 5 "vector_mask_operand")
-   (match_operand 6 "autovec_length_operand")
-   (match_operand 7 "const_0_operand")]
+   (match_operand 6 "maskload_else_operand")
+   (match_operand 7 "autovec_length_operand")
+   (match_operand 8 "const_0_operand")]
   "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p 
(mode)"
 {
   riscv_vector::expand_gather_scatter (operands, true);
@@ -117,8 +122,9 @@ (define_expand 
"mask_len_gather_load"
(match_operand 3 "")
(match_operand 4 "")
(match_operand: 5 "vector_mask_operand")
-   (match_operand 6 "autovec_length_operand")
-   (match_operand 7 "const_0_operand")]
+   (match_operand 6 "maskload_else_operand")
+   (match_operand 7 "autovec_length_operand")
+   (match_operand 8 "const_0_operand")]
   "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p 
(mode)"
 {
   riscv_vector::expand_gather_scatter (operands, true);
@@ -132,8 +138,9 @@ (define_expand 
"mask_len_gather_load"
(match_operand 3 "")
(match_operand 4 "")
(match_operand: 5 "vector_mask_operand")
-   (match_operand 6 "autovec_length_operand")
-   (match_operand 7 "const_0_operand")]
+   (match_operand 6 "maskload_else_operand")
+   (match_operand 7 "autovec_length_operand")
+   (match_operand 8 "const_0_operand")]
   "TARGET_VECTOR && riscv_vector::gathe

Re: [PATCH] lra: emit caller-save register spills before call insn [PR116028]

2024-08-11 Thread Mark Wielaard
Hi,

On Fri, Aug 09, 2024 at 08:15:31PM -0700, Andrew Pinski wrote:
> I had been wondering the same until I looked into it earlier today.
> Linaro CI's does `--disable-bootstrap` and there was no extra
> testsuite failures with the patch.
> So Linaro CI's is not catching all the bugs that a developer would
> catch in the end. Because bootstrap is one of the normal requirements;
> though usually only on one target.

Sam pointed out the same about the gcc arm64 build on
builder.sourceware.org. We enabled full bootrap and tests for that
architecture now.

fedora rawhide x86_64 (*), debian stable amd64, debian stable arm64,
fedora riscv and gentoo sparc.

https://builder.sourceware.org/buildbot/#/builders?tags=gcc-full

Note that these builds don't sent any email atm (they probably should
on build failure), so you would have to check by hand after commit
(after a a couple of hours). Test results are uploaded to bunsen.

Cheers,

Mark

(*) that one seems to loose a connection after a couple of ours
during the testsuite run though.


PING: [PATCH] x86: Update BB_HEAD when aligning BB_HEAD

2024-08-11 Thread H.J. Lu
On Thu, Aug 8, 2024 at 6:53 PM H.J. Lu  wrote:
>
> When we emit .p2align to align BB_HEAD, we must update BB_HEAD.  Otherwise
> ENDBR will be inserted as the wrong place.
>
> gcc/
>
> PR target/116174
> * config/i386/i386.cc (ix86_align_loops): Update BB_HEAD when
> aligning BB_HEAD
>
> gcc/testsuite/
>
> PR target/116174
> * gcc.target/i386/pr116174.c: New test.
>
> Signed-off-by: H.J. Lu 
> ---
>  gcc/config/i386/i386.cc  |  7 +--
>  gcc/testsuite/gcc.target/i386/pr116174.c | 12 
>  2 files changed, 17 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr116174.c
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index 77c441893b4..ec6cc5e3548 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -23528,8 +23528,11 @@ ix86_align_loops ()
>
>   if (padding_p && detect_tight_loop_p)
> {
> - emit_insn_before (gen_max_skip_align (GEN_INT (ceil_log2 
> (size)),
> -   GEN_INT (0)), label);
> + rtx_insn *align =
> +   emit_insn_before (gen_max_skip_align (GEN_INT (ceil_log2 
> (size)),
> + GEN_INT (0)), label);
> + if (BB_HEAD (bb) == label)
> +   BB_HEAD (bb) = align;
>   /* End of function.  */
>   if (!tbb || tbb == EXIT_BLOCK_PTR_FOR_FN (cfun))
> break;
> diff --git a/gcc/testsuite/gcc.target/i386/pr116174.c 
> b/gcc/testsuite/gcc.target/i386/pr116174.c
> new file mode 100644
> index 000..8877d0b51af
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr116174.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target *-*-linux* } } */
> +/* { dg-options "-O2 -fcf-protection=branch" } */
> +
> +char *
> +foo (char *dest, const char *src)
> +{
> +  while ((*dest++ = *src++) != '\0')
> +/* nothing */;
> +  return --dest;
> +}
> +
> +/* { dg-final { scan-assembler "\t\.cfi_startproc\n\tendbr(32|64)\n" } } */
> --
> 2.45.2
>

PING.

-- 
H.J.


[PATCH v8 0/3] c: Add __lengthof__ operator

2024-08-11 Thread Alejandro Colomar
Hi!

v8:

-  Reformat (simplify) change-log entries.
-  Improve wording of documentation.
-  Add link to LLVM issue in commit message.

I've added a GitHub issue in the LLVM project reporting about the
existence of this patch set:


Have a lovely night!
Alex

Alejandro Colomar (3):
  gcc/: Rename array_type_nelts() => array_type_nelts_minus_one()
  Merge definitions of array_type_nelts_top()
  c: Add __lengthof__ operator

 gcc/c-family/c-common.cc|  26 
 gcc/c-family/c-common.def   |   3 +
 gcc/c-family/c-common.h |   2 +
 gcc/c/c-decl.cc |  30 +++--
 gcc/c/c-fold.cc |   7 +-
 gcc/c/c-parser.cc   |  61 +++---
 gcc/c/c-tree.h  |   4 +
 gcc/c/c-typeck.cc   | 118 ++-
 gcc/config/aarch64/aarch64.cc   |   2 +-
 gcc/config/i386/i386.cc |   2 +-
 gcc/cp/cp-tree.h|   1 -
 gcc/cp/decl.cc  |   2 +-
 gcc/cp/init.cc  |   8 +-
 gcc/cp/lambda.cc|   3 +-
 gcc/cp/operators.def|   1 +
 gcc/cp/tree.cc  |  13 --
 gcc/doc/extend.texi |  31 +
 gcc/expr.cc |   8 +-
 gcc/fortran/trans-array.cc  |   2 +-
 gcc/fortran/trans-openmp.cc |   4 +-
 gcc/rust/backend/rust-tree.cc   |  13 --
 gcc/rust/backend/rust-tree.h|   2 -
 gcc/target.h|   3 +
 gcc/testsuite/gcc.dg/lengthof-compile.c | 115 ++
 gcc/testsuite/gcc.dg/lengthof-vla.c |  46 
 gcc/testsuite/gcc.dg/lengthof.c | 150 
 gcc/tree.cc |  17 ++-
 gcc/tree.h  |   3 +-
 28 files changed, 598 insertions(+), 79 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/lengthof-compile.c
 create mode 100644 gcc/testsuite/gcc.dg/lengthof-vla.c
 create mode 100644 gcc/testsuite/gcc.dg/lengthof.c

Range-diff against v7:
1:  8b68e250503 ! 1:  a6aa38c9013 gcc/: Rename array_type_nelts() => 
array_type_nelts_minus_one()
@@ Commit message
 
 gcc/ChangeLog:
 
-* tree.cc (array_type_nelts): Rename function ...
-(array_type_nelts_minus_one): ... to this name.  The old name
-was misleading.
-* tree.h (array_type_nelts): Rename function ...
-(array_type_nelts_minus_one): ... to this name.  The old name
-was misleading.
+* tree.cc (array_type_nelts, array_type_nelts_minus_one):
+* tree.h (array_type_nelts, array_type_nelts_minus_one):
 * expr.cc (count_type_elements):
-Rename array_type_nelts() => array_type_nelts_minus_one()
 * config/aarch64/aarch64.cc
-(pure_scalable_type_info::analyze_array): Likewise.
-* config/i386/i386.cc (ix86_canonical_va_list_type): Likewise.
+(pure_scalable_type_info::analyze_array):
+* config/i386/i386.cc (ix86_canonical_va_list_type):
+Rename array_type_nelts() => array_type_nelts_minus_one()
+The old name was misleading.
 
 gcc/c/ChangeLog:
 
 * c-decl.cc (one_element_array_type_p, get_parm_array_spec):
+* c-fold.cc (c_fold_array_ref):
 Rename array_type_nelts() => array_type_nelts_minus_one()
-* c-fold.cc (c_fold_array_ref): Likewise.
 
 gcc/cp/ChangeLog:
 
 * decl.cc (reshape_init_array):
+* init.cc
+(build_zero_init_1):
+(build_value_init_noctor):
+(build_vec_init):
+(build_delete):
+* lambda.cc (add_capture):
+* tree.cc (array_type_nelts_top):
 Rename array_type_nelts() => array_type_nelts_minus_one()
-* init.cc (build_zero_init_1): Likewise.
-(build_value_init_noctor): Likewise.
-(build_vec_init): Likewise.
-(build_delete): Likewise.
-* lambda.cc (add_capture): Likewise.
-* tree.cc (array_type_nelts_top): Likewise.
 
 gcc/fortran/ChangeLog:
 
 * trans-array.cc (structure_alloc_comps):
+* trans-openmp.cc
+(gfc_walk_alloc_comps):
+(gfc_omp_clause_linear_ctor):
 Rename array_type_nelts() => array_type_nelts_minus_one()
-* trans-openmp.cc (gfc_walk_alloc_comps): Likewise.
-(gfc_omp_clause_linear_ctor): Likewise.
 
 gcc/rust/ChangeLog:
 
2:  21433097103 ! 2:  43300a17e4a Merge definitions of

[PATCH v8 2/3] Merge definitions of array_type_nelts_top()

2024-08-11 Thread Alejandro Colomar
There were two identical definitions, and none of them are available
where they are needed for implementing __lengthof__.  Merge them, and
provide the single definition in gcc/tree.{h,cc}, where it's available
for __lengthof__, which will be added in the following commit.

gcc/ChangeLog:

* tree.h (array_type_nelts_top):
* tree.cc (array_type_nelts_top):
Define function (moved from gcc/cp/).

gcc/cp/ChangeLog:

* cp-tree.h (array_type_nelts_top):
* tree.cc (array_type_nelts_top):
Remove function (move to gcc/).

gcc/rust/ChangeLog:

* backend/rust-tree.h (array_type_nelts_top):
* backend/rust-tree.cc (array_type_nelts_top):
Remove function.

Signed-off-by: Alejandro Colomar 
---
 gcc/cp/cp-tree.h  |  1 -
 gcc/cp/tree.cc| 13 -
 gcc/rust/backend/rust-tree.cc | 13 -
 gcc/rust/backend/rust-tree.h  |  2 --
 gcc/tree.cc   | 13 +
 gcc/tree.h|  1 +
 6 files changed, 14 insertions(+), 29 deletions(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index b1693051231..76d7bc34577 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -8100,7 +8100,6 @@ extern tree build_exception_variant   (tree, 
tree);
 extern void fixup_deferred_exception_variants   (tree, tree);
 extern tree bind_template_template_parm(tree, tree);
 extern tree array_type_nelts_total (tree);
-extern tree array_type_nelts_top   (tree);
 extern bool array_of_unknown_bound_p   (const_tree);
 extern tree break_out_target_exprs (tree, bool = false);
 extern tree build_ctor_subob_ref   (tree, tree, tree);
diff --git a/gcc/cp/tree.cc b/gcc/cp/tree.cc
index 040136c70ab..7d179491476 100644
--- a/gcc/cp/tree.cc
+++ b/gcc/cp/tree.cc
@@ -3079,19 +3079,6 @@ cxx_print_statistics (void)
 depth_reached);
 }
 
-/* Return, as an INTEGER_CST node, the number of elements for TYPE
-   (which is an ARRAY_TYPE).  This counts only elements of the top
-   array.  */
-
-tree
-array_type_nelts_top (tree type)
-{
-  return fold_build2_loc (input_location,
- PLUS_EXPR, sizetype,
- array_type_nelts_minus_one (type),
- size_one_node);
-}
-
 /* Return, as an INTEGER_CST node, the number of elements for TYPE
(which is an ARRAY_TYPE).  This one is a recursive count of all
ARRAY_TYPEs that are clumped together.  */
diff --git a/gcc/rust/backend/rust-tree.cc b/gcc/rust/backend/rust-tree.cc
index 8d32e5203ae..3dc6b076711 100644
--- a/gcc/rust/backend/rust-tree.cc
+++ b/gcc/rust/backend/rust-tree.cc
@@ -859,19 +859,6 @@ is_empty_class (tree type)
   return CLASSTYPE_EMPTY_P (type);
 }
 
-// forked from gcc/cp/tree.cc array_type_nelts_top
-
-/* Return, as an INTEGER_CST node, the number of elements for TYPE
-   (which is an ARRAY_TYPE).  This counts only elements of the top
-   array.  */
-
-tree
-array_type_nelts_top (tree type)
-{
-  return fold_build2_loc (input_location, PLUS_EXPR, sizetype,
- array_type_nelts_minus_one (type), size_one_node);
-}
-
 // forked from gcc/cp/tree.cc builtin_valid_in_constant_expr_p
 
 /* Test whether DECL is a builtin that may appear in a
diff --git a/gcc/rust/backend/rust-tree.h b/gcc/rust/backend/rust-tree.h
index 26c8b653ac6..e597c3ab81d 100644
--- a/gcc/rust/backend/rust-tree.h
+++ b/gcc/rust/backend/rust-tree.h
@@ -2993,8 +2993,6 @@ extern location_t rs_expr_location (const_tree);
 extern int
 is_empty_class (tree type);
 
-extern tree array_type_nelts_top (tree);
-
 extern bool
 is_really_empty_class (tree, bool);
 
diff --git a/gcc/tree.cc b/gcc/tree.cc
index ed0a766016a..cedf95cc222 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -3729,6 +3729,19 @@ array_type_nelts_minus_one (const_tree type)
  ? max
  : fold_build2 (MINUS_EXPR, TREE_TYPE (max), max, min));
 }
+
+/* Return, as an INTEGER_CST node, the number of elements for TYPE
+   (which is an ARRAY_TYPE).  This counts only elements of the top
+   array.  */
+
+tree
+array_type_nelts_top (tree type)
+{
+  return fold_build2_loc (input_location,
+ PLUS_EXPR, sizetype,
+ array_type_nelts_minus_one (type),
+ size_one_node);
+}
 
 /* If arg is static -- a reference to an object in static storage -- then
return the object.  This is not the same as the C meaning of `static'.
diff --git a/gcc/tree.h b/gcc/tree.h
index 69d40bb4f04..9061dafd027 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4922,6 +4922,7 @@ extern tree build_method_type (tree, tree);
 extern tree build_offset_type (tree, tree);
 extern tree build_complex_type (tree, bool named = false);
 extern tree array_type_nelts_minus_one (const_tree);
+extern tree array_type_nelts_top (tree);
 
 extern tree value_member (tree, tree);
 extern tree purpose_member (const_tree, tree);
-- 
2.45.2



signature.asc
Des

[PATCH v8 1/3] gcc/: Rename array_type_nelts() => array_type_nelts_minus_one()

2024-08-11 Thread Alejandro Colomar
The old name was misleading.

While at it, also rename some temporary variables that are used with
this function, for consistency.

Link: 
https://inbox.sourceware.org/gcc-patches/9fffd80-dca-2c7e-14b-6c9b509a7...@redhat.com/T/#m2f661c67c8f7b2c405c8c7fc3152dd85dc729120
Cc: Gabriel Ravier 
Cc: Martin Uecker 
Cc: Joseph Myers 
Cc: Xavier Del Campo Romero 
Cc: Jakub Jelinek 

gcc/ChangeLog:

* tree.cc (array_type_nelts, array_type_nelts_minus_one):
* tree.h (array_type_nelts, array_type_nelts_minus_one):
* expr.cc (count_type_elements):
* config/aarch64/aarch64.cc
(pure_scalable_type_info::analyze_array):
* config/i386/i386.cc (ix86_canonical_va_list_type):
Rename array_type_nelts() => array_type_nelts_minus_one()
The old name was misleading.

gcc/c/ChangeLog:

* c-decl.cc (one_element_array_type_p, get_parm_array_spec):
* c-fold.cc (c_fold_array_ref):
Rename array_type_nelts() => array_type_nelts_minus_one()

gcc/cp/ChangeLog:

* decl.cc (reshape_init_array):
* init.cc
(build_zero_init_1):
(build_value_init_noctor):
(build_vec_init):
(build_delete):
* lambda.cc (add_capture):
* tree.cc (array_type_nelts_top):
Rename array_type_nelts() => array_type_nelts_minus_one()

gcc/fortran/ChangeLog:

* trans-array.cc (structure_alloc_comps):
* trans-openmp.cc
(gfc_walk_alloc_comps):
(gfc_omp_clause_linear_ctor):
Rename array_type_nelts() => array_type_nelts_minus_one()

gcc/rust/ChangeLog:

* backend/rust-tree.cc (array_type_nelts_top):
Rename array_type_nelts() => array_type_nelts_minus_one()

Suggested-by: Richard Biener 
Signed-off-by: Alejandro Colomar 
---
 gcc/c/c-decl.cc   | 10 +-
 gcc/c/c-fold.cc   |  7 ---
 gcc/config/aarch64/aarch64.cc |  2 +-
 gcc/config/i386/i386.cc   |  2 +-
 gcc/cp/decl.cc|  2 +-
 gcc/cp/init.cc|  8 
 gcc/cp/lambda.cc  |  3 ++-
 gcc/cp/tree.cc|  2 +-
 gcc/expr.cc   |  8 
 gcc/fortran/trans-array.cc|  2 +-
 gcc/fortran/trans-openmp.cc   |  4 ++--
 gcc/rust/backend/rust-tree.cc |  2 +-
 gcc/tree.cc   |  4 ++--
 gcc/tree.h|  2 +-
 14 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 8cef8f2c289..e7c2783e724 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -5309,7 +5309,7 @@ one_element_array_type_p (const_tree type)
 {
   if (TREE_CODE (type) != ARRAY_TYPE)
 return false;
-  return integer_zerop (array_type_nelts (type));
+  return integer_zerop (array_type_nelts_minus_one (type));
 }
 
 /* Determine whether TYPE is a zero-length array type "[0]".  */
@@ -6257,15 +6257,15 @@ get_parm_array_spec (const struct c_parm *parm, tree 
attrs)
  for (tree type = parm->specs->type; TREE_CODE (type) == ARRAY_TYPE;
   type = TREE_TYPE (type))
{
- tree nelts = array_type_nelts (type);
- if (error_operand_p (nelts))
+ tree nelts_minus_one = array_type_nelts_minus_one (type);
+ if (error_operand_p (nelts_minus_one))
return attrs;
- if (TREE_CODE (nelts) != INTEGER_CST)
+ if (TREE_CODE (nelts_minus_one) != INTEGER_CST)
{
  /* Each variable VLA bound is represented by the dollar
 sign.  */
  spec += "$";
- tpbnds = tree_cons (NULL_TREE, nelts, tpbnds);
+ tpbnds = tree_cons (NULL_TREE, nelts_minus_one, tpbnds);
}
}
  tpbnds = nreverse (tpbnds);
diff --git a/gcc/c/c-fold.cc b/gcc/c/c-fold.cc
index 57b67c74bd8..9ea174f79c4 100644
--- a/gcc/c/c-fold.cc
+++ b/gcc/c/c-fold.cc
@@ -73,11 +73,12 @@ c_fold_array_ref (tree type, tree ary, tree index)
   unsigned elem_nchars = (TYPE_PRECISION (elem_type)
  / TYPE_PRECISION (char_type_node));
   unsigned len = (unsigned) TREE_STRING_LENGTH (ary) / elem_nchars;
-  tree nelts = array_type_nelts (TREE_TYPE (ary));
+  tree nelts_minus_one = array_type_nelts_minus_one (TREE_TYPE (ary));
   bool dummy1 = true, dummy2 = true;
-  nelts = c_fully_fold_internal (nelts, true, &dummy1, &dummy2, false, false);
+  nelts_minus_one = c_fully_fold_internal (nelts_minus_one, true, &dummy1,
+  &dummy2, false, false);
   unsigned HOST_WIDE_INT i = tree_to_uhwi (index);
-  if (!tree_int_cst_le (index, nelts)
+  if (!tree_int_cst_le (index, nelts_minus_one)
   || i >= len
   || i + elem_nchars > len)
 return NULL_TREE;
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 2ac5a22c848..a757796afcf 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -1083,7 +1083,7 @@ pu

[PATCH v8 3/3] c: Add __lengthof__ operator

2024-08-11 Thread Alejandro Colomar
This operator is similar to sizeof but can only be applied to an array,
and returns its length (number of elements).

FUTURE DIRECTIONS:

-  We should make it work with array parameters to functions,
   and somehow magically return the length designator of the array,
   regardless of it being really a pointer.

-  Fix support for [0].

Cc: Joseph Myers 
Cc: Gabriel Ravier 
Cc: Jakub Jelinek 
Cc: Kees Cook 
Cc: Qing Zhao 
Cc: Jens Gustedt 
Cc: David Brown 
Cc: Florian Weimer 
Cc: Andreas Schwab 
Cc: Timm Baeder 

gcc/ChangeLog:

* doc/extend.texi: Document __lengthof__ operator.
* target.h (enum type_context_kind): Add __lengthof__ operator.

gcc/c-family/ChangeLog:

* c-common.h:
* c-common.def:
* c-common.cc (c_lengthof_type): Add __lengthof__ operator.

gcc/c/ChangeLog:

* c-tree.h
(c_expr_lengthof_expr, c_expr_lengthof_type):
* c-decl.cc
(start_struct, finish_struct):
(start_enum, finish_enum):
* c-parser.cc
(c_parser_sizeof_expression):
(c_parser_lengthof_expression):
(c_parser_sizeof_or_lengthof_expression):
(c_parser_unary_expression):
* c-typeck.cc
(build_external_ref):
(record_maybe_used_decl, pop_maybe_used):
(is_top_array_vla):
(c_expr_lengthof_expr, c_expr_lengthof_type):
Add __lengthof__operator.

gcc/cp/ChangeLog:

* operators.def: Add __lengthof__ operator.

gcc/testsuite/ChangeLog:

* gcc.dg/lengthof-compile.c:
* gcc.dg/lengthof-vla.c:
* gcc.dg/lengthof.c: Add tests for __lengthof__ operator.

Link: https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2529.pdf
Link: https://inbox.sourceware.org/gcc/m8s4oqy--...@tutanota.com/T/
Link: https://github.com/llvm/llvm-project/issues/102836
Suggested-by: Xavier Del Campo Romero 
Co-developed-by: Martin Uecker 
Signed-off-by: Alejandro Colomar 
---
 gcc/c-family/c-common.cc|  26 
 gcc/c-family/c-common.def   |   3 +
 gcc/c-family/c-common.h |   2 +
 gcc/c/c-decl.cc |  20 +++-
 gcc/c/c-parser.cc   |  61 +++---
 gcc/c/c-tree.h  |   4 +
 gcc/c/c-typeck.cc   | 118 ++-
 gcc/cp/operators.def|   1 +
 gcc/doc/extend.texi |  31 +
 gcc/target.h|   3 +
 gcc/testsuite/gcc.dg/lengthof-compile.c | 115 ++
 gcc/testsuite/gcc.dg/lengthof-vla.c |  46 
 gcc/testsuite/gcc.dg/lengthof.c | 150 
 13 files changed, 556 insertions(+), 24 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/lengthof-compile.c
 create mode 100644 gcc/testsuite/gcc.dg/lengthof-vla.c
 create mode 100644 gcc/testsuite/gcc.dg/lengthof.c

diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index e7e371fd26f..9f5feb83345 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -465,6 +465,7 @@ const struct c_common_resword c_common_reswords[] =
   { "__inline",RID_INLINE, 0 },
   { "__inline__",  RID_INLINE, 0 },
   { "__label__",   RID_LABEL,  0 },
+  { "__lengthof__",RID_LENGTHOF, 0 },
   { "__null",  RID_NULL,   0 },
   { "__real",  RID_REALPART,   0 },
   { "__real__",RID_REALPART,   0 },
@@ -4070,6 +4071,31 @@ c_alignof_expr (location_t loc, tree expr)
 
   return fold_convert_loc (loc, size_type_node, t);
 }
+
+/* Implement the lengthof keyword: Return the length of an array,
+   that is, the number of elements in the array.  */
+
+tree
+c_lengthof_type (location_t loc, tree type)
+{
+  enum tree_code type_code;
+
+  type_code = TREE_CODE (type);
+  if (type_code != ARRAY_TYPE)
+{
+  error_at (loc, "invalid application of % to type %qT", type);
+  return error_mark_node;
+}
+  if (!COMPLETE_TYPE_P (type))
+{
+  error_at (loc,
+   "invalid application of % to incomplete type %qT",
+   type);
+  return error_mark_node;
+}
+
+  return array_type_nelts_top (type);
+}
 
 /* Handle C and C++ default attributes.  */
 
diff --git a/gcc/c-family/c-common.def b/gcc/c-family/c-common.def
index 5de96e5d4a8..6d162f67104 100644
--- a/gcc/c-family/c-common.def
+++ b/gcc/c-family/c-common.def
@@ -50,6 +50,9 @@ DEFTREECODE (EXCESS_PRECISION_EXPR, "excess_precision_expr", 
tcc_expression, 1)
number.  */
 DEFTREECODE (USERDEF_LITERAL, "userdef_literal", tcc_exceptional, 3)
 
+/* Represents a 'lengthof' expression.  */
+DEFTREECODE (LENGTHOF_EXPR, "lengthof_expr", tcc_expression, 1)
+
 /* Represents a 'sizeof' expression during C++ template expansion,
or for the purpose of -Wsizeof-pointer-memaccess warning.  */
 DEFTREECODE (SIZEOF_EXPR, "sizeof_expr", tcc_expression, 1)
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index 2510ee4dbc9..d2e7d7e8c40 100644

Re:[pushed] [PATCH v2] LoongArch: Use iorn and andn standard pattern names.

2024-08-11 Thread Lulu Cheng

 Pushed to r15-2877.

在 2024/8/2 上午9:19, Lulu Cheng 写道:

R15-1890 introduced new optabs iorc and andc, and its corresponding
internal functions BIT_{ANDC,IORC}, and if targets defines such optabs
for vector modes.  And in r15-2258 the iorc and andc were renamed to
iorn and andn.
So we changed the andn and iorn implementation templates to the standard
template names.

---
v1 -> v2:
- Fixed bugs with the [x]vandn implementation in the previous
  version.
- Add testcases.




gcc/ChangeLog:

* config/loongarch/lasx.md (xvandn3): Rename to ...
(andn3): This.
(xvorn3): Rename to ...
(iorn3): This.
* config/loongarch/loongarch-builtins.cc
(CODE_FOR_lsx_vandn_v): Defined as the modified name.
(CODE_FOR_lsx_vorn_v): Likewise.
(CODE_FOR_lasx_xvandn_v): Likewise.
(CODE_FOR_lasx_xvorn_v): Likewise.
(loongarch_expand_builtin_insn): When the builtin function to be
called is __builtin_lasx_xvandn or __builtin_lsx_vandn, swap the
two operands.
* config/loongarch/loongarch.md (n): Rename to ...
(n3): This.
* config/loongarch/lsx.md (vandn3): Rename to ...
(andn3): This.
(vorn3): Rename to ...
(iorn3): This.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/lasx-andn-iorn.c: New test.
* gcc.target/loongarch/lsx-andn-iorn.c: New test.
---
  gcc/config/loongarch/lasx.md  | 10 +++
  gcc/config/loongarch/loongarch-builtins.cc| 10 ---
  gcc/config/loongarch/loongarch.md |  8 +++---
  gcc/config/loongarch/lsx.md   | 10 +++
  .../gcc.target/loongarch/lasx-andn-iorn.c | 11 
  .../gcc.target/loongarch/lsx-andn-iorn.c  | 28 +++
  6 files changed, 59 insertions(+), 18 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c
  create mode 100644 gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c

diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 7bd61f8ed5b..ca523880683 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -2716,12 +2716,12 @@ (define_insn "lasx_vext2xv_d_b"
 (set_attr "mode" "V4DI")])
  
  ;; Extend loongson-sx to loongson-asx.

-(define_insn "xvandn3"
+(define_insn "andn3"
[(set (match_operand:LASX 0 "register_operand" "=f")
-   (and:LASX (not:LASX (match_operand:LASX 1 "register_operand" "f"))
-   (match_operand:LASX 2 "register_operand" "f")))]
+   (and:LASX (not:LASX (match_operand:LASX 2 "register_operand" "f"))
+   (match_operand:LASX 1 "register_operand" "f")))]
"ISA_HAS_LASX"
-  "xvandn.v\t%u0,%u1,%u2"
+  "xvandn.v\t%u0,%u2,%u1"
[(set_attr "type" "simd_logic")
 (set_attr "mode" "")])
  
@@ -4637,7 +4637,7 @@ (define_insn "lasx_xvssrlrn__"

[(set_attr "type" "simd_int_arith")
 (set_attr "mode" "")])
  
-(define_insn "xvorn3"

+(define_insn "iorn3"
[(set (match_operand:ILASX 0 "register_operand" "=f")
(ior:ILASX (not:ILASX (match_operand:ILASX 2 "register_operand" "f"))
   (match_operand:ILASX 1 "register_operand" "f")))]
diff --git a/gcc/config/loongarch/loongarch-builtins.cc 
b/gcc/config/loongarch/loongarch-builtins.cc
index fbe46833c9b..cf92770de30 100644
--- a/gcc/config/loongarch/loongarch-builtins.cc
+++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -458,8 +458,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
  #define CODE_FOR_lsx_vabsd_du CODE_FOR_lsx_vabsd_u_du
  #define CODE_FOR_lsx_vftint_wu_s CODE_FOR_lsx_vftint_u_wu_s
  #define CODE_FOR_lsx_vftint_lu_d CODE_FOR_lsx_vftint_u_lu_d
-#define CODE_FOR_lsx_vandn_v CODE_FOR_vandnv16qi3
-#define CODE_FOR_lsx_vorn_v CODE_FOR_vornv16qi3
+#define CODE_FOR_lsx_vandn_v CODE_FOR_andnv16qi3
+#define CODE_FOR_lsx_vorn_v CODE_FOR_iornv16qi3
  #define CODE_FOR_lsx_vneg_b CODE_FOR_vnegv16qi2
  #define CODE_FOR_lsx_vneg_h CODE_FOR_vnegv8hi2
  #define CODE_FOR_lsx_vneg_w CODE_FOR_vnegv4si2
@@ -692,8 +692,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
  #define CODE_FOR_lasx_xvrepli_w CODE_FOR_lasx_xvrepliv8si
  #define CODE_FOR_lasx_xvrepli_d CODE_FOR_lasx_xvrepliv4di
  
-#define CODE_FOR_lasx_xvandn_v CODE_FOR_xvandnv32qi3

-#define CODE_FOR_lasx_xvorn_v CODE_FOR_xvornv32qi3
+#define CODE_FOR_lasx_xvandn_v CODE_FOR_andnv32qi3
+#define CODE_FOR_lasx_xvorn_v CODE_FOR_iornv32qi3
  #define CODE_FOR_lasx_xvneg_b CODE_FOR_negv32qi2
  #define CODE_FOR_lasx_xvneg_h CODE_FOR_negv16hi2
  #define CODE_FOR_lasx_xvneg_w CODE_FOR_negv8si2
@@ -2858,6 +2858,7 @@ loongarch_expand_builtin_insn (enum insn_code icode, 
unsigned int nops,
  case CODE_FOR_lsx_vpickod_b:
  case CODE_FOR_lsx_vpickod_h:
  case CODE_FOR_lsx_vpickod_w:
+case CODE_FOR_lsx_vandn_v:
  case CODE_FOR_lasx_xvilvh_b:
  case CODE_FOR_lasx_xvilvh_h:
  case CODE_FOR_lasx_xvilvh_w:
@@ -2878,6 +2879,7 @

Re: [pushed][PATCH v1 1/2] LoongArch: Drop vcond{,u} expanders.

2024-08-11 Thread Lulu Cheng

Pushed to r15-2878.

在 2024/8/8 下午2:47, Lulu Cheng 写道:

Optabs vcond{,u} will be removed for GCC 15.  Since regtest shows no
fallout, dropping the expanders, now.

gcc/ChangeLog:

PR target/114189
* config/loongarch/lasx.md (vcondu): Delete.
(vcond): Likewise.
* config/loongarch/lsx.md (vcondu): Likewise.
(vcond): Likewise.
---
  gcc/config/loongarch/lasx.md | 37 
  gcc/config/loongarch/lsx.md  | 31 --
  2 files changed, 68 deletions(-)

diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 7bd61f8ed5b..4087c4b5349 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -165,9 +165,6 @@ (define_c_enum "unspec" [
  ;; All vector modes with 256 bits.
  (define_mode_iterator LASX [V4DF V8SF V4DI V8SI V16HI V32QI])
  
-;; Same as LASX.  Used by vcond to iterate two modes.

-(define_mode_iterator LASX_2 [V4DF V8SF V4DI V8SI V16HI V32QI])
-
  ;; Only used for splitting insert_d and copy_{u,s}.d.
  (define_mode_iterator LASX_D [V4DI V4DF])
  
@@ -762,40 +759,6 @@ (define_expand "vec_perm"

 DONE;
  })
  
-;; FIXME: 256??

-(define_expand "vcondu"
-  [(match_operand:LASX 0 "register_operand")
-   (match_operand:LASX 1 "reg_or_m1_operand")
-   (match_operand:LASX 2 "reg_or_0_operand")
-   (match_operator 3 ""
-[(match_operand:ILASX 4 "register_operand")
- (match_operand:ILASX 5 "register_operand")])]
-  "ISA_HAS_LASX
-   && (GET_MODE_NUNITS (mode)
-   == GET_MODE_NUNITS (mode))"
-{
-  loongarch_expand_vec_cond_expr (mode, mode,
- operands);
-  DONE;
-})
-
-;; FIXME: 256??
-(define_expand "vcond"
-  [(match_operand:LASX 0 "register_operand")
-   (match_operand:LASX 1 "reg_or_m1_operand")
-   (match_operand:LASX 2 "reg_or_0_operand")
-   (match_operator 3 ""
- [(match_operand:LASX_2 4 "register_operand")
-  (match_operand:LASX_2 5 "register_operand")])]
-  "ISA_HAS_LASX
-   && (GET_MODE_NUNITS (mode)
-   == GET_MODE_NUNITS (mode))"
-{
-  loongarch_expand_vec_cond_expr (mode, mode,
- operands);
-  DONE;
-})
-
  ;; Same as vcond_
  (define_expand "vcond_mask_"
[(match_operand:LASX 0 "register_operand")
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
index 454cda47876..222a5afe5b2 100644
--- a/gcc/config/loongarch/lsx.md
+++ b/gcc/config/loongarch/lsx.md
@@ -186,9 +186,6 @@ (define_mode_attr VD2MODE
  ;; All vector modes with 128 bits.
  (define_mode_iterator LSX  [V2DF V4SF V2DI V4SI V8HI V16QI])
  
-;; Same as LSX.  Used by vcond to iterate two modes.

-(define_mode_iterator LSX_2[V2DF V4SF V2DI V4SI V8HI V16QI])
-
  ;; Only used for vilvh and splitting insert_d and copy_{u,s}.d.
  (define_mode_iterator LSX_D[V2DI V2DF])
  
@@ -533,34 +530,6 @@ (define_expand "vec_cmpu"

DONE;
  })
  
-(define_expand "vcondu"

-  [(match_operand:LSX 0 "register_operand")
-   (match_operand:LSX 1 "reg_or_m1_operand")
-   (match_operand:LSX 2 "reg_or_0_operand")
-   (match_operator 3 ""
- [(match_operand:ILSX 4 "register_operand")
-  (match_operand:ILSX 5 "register_operand")])]
-  "ISA_HAS_LSX
-   && (GET_MODE_NUNITS (mode) == GET_MODE_NUNITS (mode))"
-{
-  loongarch_expand_vec_cond_expr (mode, mode, operands);
-  DONE;
-})
-
-(define_expand "vcond"
-  [(match_operand:LSX 0 "register_operand")
-   (match_operand:LSX 1 "reg_or_m1_operand")
-   (match_operand:LSX 2 "reg_or_0_operand")
-   (match_operator 3 ""
- [(match_operand:LSX_2 4 "register_operand")
-  (match_operand:LSX_2 5 "register_operand")])]
-  "ISA_HAS_LSX
-   && (GET_MODE_NUNITS (mode) == GET_MODE_NUNITS (mode))"
-{
-  loongarch_expand_vec_cond_expr (mode, mode, operands);
-  DONE;
-})
-
  (define_expand "vcond_mask_"
[(match_operand:LSX 0 "register_operand")
 (match_operand:LSX 1 "reg_or_m1_operand")




Re: [pushed][PATCH v1 2/2] LoongArch: Provide ashr lshr and ashl RTL pattern for vectors.

2024-08-11 Thread Lulu Cheng

Pushed to r15-2879.

在 2024/8/8 下午2:47, Lulu Cheng 写道:

We support vashr vlshr and vashl. However, in r15-1638 support optimize
x < 0 ? -1 : 0 into (signed) x >> 31 and x < 0 ? 1 : 0 into (unsigned) x >> 31.
To support this optimization, vector ashr lshr and ashl need to be implemented.

gcc/ChangeLog:

* config/loongarch/loongarch.md (insn): Added rotatert rotr pairs.
* config/loongarch/simd.md (rotr3): Remove to ...
(3): This.

gcc/testsuite/ChangeLog:

* g++.target/loongarch/vect-ashr-lshr.C: New test.
---
  gcc/config/loongarch/loongarch.md |   1 +
  gcc/config/loongarch/simd.md  |  13 +-
  .../g++.target/loongarch/vect-ashr-lshr.C | 147 ++
  3 files changed, 155 insertions(+), 6 deletions(-)
  create mode 100644 gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index ee0310f2bd6..1f105cbf891 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -559,6 +559,7 @@ (define_code_attr optab [(ashift "ashl")
  (define_code_attr insn [(ashift "sll")
(ashiftrt "sra")
(lshiftrt "srl")
+   (rotatert "rotr")
(ior "or")
(xor "xor")
(and "and")
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
index 00ff2823a4e..45ea114220e 100644
--- a/gcc/config/loongarch/simd.md
+++ b/gcc/config/loongarch/simd.md
@@ -306,14 +306,15 @@ (define_expand "rotl3"
  operands[4] = gen_reg_rtx (mode);
});
  
-;; vrotri.{b/h/w/d}

+;; v{rotr/sll/sra/srl}i.{b/h/w/d}
  
-(define_insn "rotr3"

+(define_insn "3"
[(set (match_operand:IVEC 0 "register_operand" "=f")
-   (rotatert:IVEC (match_operand:IVEC 1 "register_operand" "f")
-  (match_operand:SI 2 "const__operand")))]
-  ""
-  "vrotri.\t%0,%1,%2";
+   (shift_w:IVEC
+ (match_operand:IVEC 1 "register_operand" "f")
+ (match_operand:SI 2 "const__operand")))]
+  "ISA_HAS_LSX"
+  "vi.\t%0,%1,%2"
[(set_attr "type" "simd_int_arith")
 (set_attr "mode" "")])
  
diff --git a/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C b/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C

new file mode 100644
index 000..bcef985fae2
--- /dev/null
+++ b/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C
@@ -0,0 +1,147 @@
+/* { dg-do compile } */
+/* { dg-options "-mlasx -O2" } */
+/* { dg-final { scan-assembler-times "vsrli.b" 2 } } */
+/* { dg-final { scan-assembler-times "vsrli.h" 2 } } */
+/* { dg-final { scan-assembler-times "vsrli.w" 2 } } */
+/* { dg-final { scan-assembler-times "vsrli.d" 2 } } */
+/* { dg-final { scan-assembler-times "vsrai.b" 2 } } */
+/* { dg-final { scan-assembler-times "vsrai.h" 2 } } */
+/* { dg-final { scan-assembler-times "vsrai.w" 2 } } */
+/* { dg-final { scan-assembler-times "vsrai.d" 2 } } */
+
+typedef signed char v16qi __attribute__((vector_size(16)));
+typedef signed char v32qi __attribute__((vector_size(32)));
+typedef short v8hi __attribute__((vector_size(16)));
+typedef short v16hi __attribute__((vector_size(32)));
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef long long v2di __attribute__((vector_size(16)));
+typedef long long v4di __attribute__((vector_size(32)));
+
+v16qi
+foo (v16qi a)
+{
+  v16qi const1_op = __extension__(v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
+  v16qi const0_op = __extension__(v16qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v32qi
+foo2 (v32qi a)
+{
+  v32qi const1_op = 
__extension__(v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
+  v32qi const0_op = 
__extension__(v32qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v8hi
+foo3 (v8hi a)
+{
+  v8hi const1_op = __extension__(v8hi){1,1,1,1,1,1,1,1};
+  v8hi const0_op = __extension__(v8hi){0,0,0,0,0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v16hi
+foo4 (v16hi a)
+{
+  v16hi const1_op = __extension__(v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
+  v16hi const0_op = __extension__(v16hi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v4si
+foo5 (v4si a)
+{
+  v4si const1_op = __extension__(v4si){1,1,1,1};
+  v4si const0_op = __extension__(v4si){0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v8si
+foo6 (v8si a)
+{
+  v8si const1_op = __extension__(v8si){1,1,1,1,1,1,1,1};
+  v8si const0_op = __extension__(v8si){0,0,0,0,0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v2di
+foo7 (v2di a)
+{
+  v2di const1_op = __extension__(v2di){1,1};
+  v2di const0_op = __extension__(v2di){0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v4di
+foo8 (v4di a)
+{
+  v4di const1_

Re: [PATCH/RFC] LRA: Don't emit move for substituted CONSTATNT_P operand [PR116170]

2024-08-11 Thread Kewen.Lin
Hi Vladimir,

on 2024/8/10 01:47, Vladimir Makarov wrote:
> 
> On 8/9/24 05:49, Kewen.Lin wrote:
>> Hi,
>>
>> Commit r15-2084 exposes one ICE in LRA.  Firstly, before
>> r15-2084 KFmode has 126 bit precision while V1TImode has 128
>> bit precision, so the subreg (subreg:V1TI (reg:KF 131) 0) is
>> paradoxical_subreg_p, which stops some passes from doing
>> some optimization.  After r15-2084, KFmode has the same mode
>> precision as V1TImode, passes are able to optimize more, but
>> it causes this ICE in LRA as described below:
>>
>> For insn 106 (set (mem:V1TI ...) (subreg:V1TI (reg:KF 133) 0)),
>> which matches pattern
>>
>> (define_insn "*vsx_le_perm_store_"
>>    [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
>>  (match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))]
>>    "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
>>     && !altivec_indexed_or_indirect_operand (operands[0], mode)"
>>    "@
>>     #
>>     #"
>>    [(set_attr "type" "vecstore,store")
>>     (set_attr "length" "12,8")
>>     (set_attr "isa" ",*")])
>> LRA makes equivalence substitution on r133 with const double
>> (const_double:KF 0.0), selects alternative 0 and fixes up
>> operand 1 for constraint "wa", because operand 1 is OP_INOUT,
>> so it considers assigning back to it as well, that is:
>>
>>    lra_emit_move (type == OP_INOUT ? copy_rtx (old) : old, new_reg);
>>
>> But because old has been changed to const_double in equivalence
>> substitution, the move is actually assigning to const_double,
>> which is invalid and cause ICE.
>>
>> Considering reg:KF 133 is equivalent with (const_double:KF 0.0)
>> even though this operand is OP_INOUT, IMHO there should not be
>> any following uses of reg:KF 133, otherwise it doesn't have the
>> chance to be equivalent to (const_double:KF 0.0).  From this
>> perspective, I think we can guard the lra_emit_move with
>> nonimmediate_operand to exclude such case.
>>
>> Does it sound reasonable?
> Yes.
>> btw, I also tried with disallowing equivalence substitution with
>> CONSTANT_P value if the corresponding operand is OP_INOUT or
>> OP_OUT, it can also fix this issue, but with more thinking it
>> seems not necessary to stop such substitution if we can handle it
>> later as above.
>>
>> Bootstrapped and regtested on x86_64-redhat-linux and
>> powerpc64{,le}-linux-gnu.
>>
> Thank you for the good explanation of the problem.  The patch is ok for me.  
> It would be nice to add a comment before `nonimmediate_operand` that `old` 
> can be an equivalent constant and we chose insn alternative before the 
> equivalent substitution.

Thanks for your comments!!  If I read the code right, the function chooses
insn alternative after equivalence substitution, the related code is:

4207 if (lra_dump_file != NULL)
4208   {
4209 fprintf (lra_dump_file,
4210  "Changing pseudo %d in operand %i of insn %u on 
equiv ",
4211  REGNO (old), i, INSN_UID (curr_insn));
4212 dump_value_slim (lra_dump_file, subst, 1);
4213 fprintf (lra_dump_file, "\n");
4214   }
4215 op_change_p = change_p = true;
4216   }
4217 if (simplify_operand_subreg (i, GET_MODE (old)) || op_change_p)
4218   {
4219 change_p = true;
4220 lra_update_dup (curr_id, i);
4221   }

4411   fprintf (lra_dump_file, "  Choosing alt %d in insn %u:",
4412goal_alt_number, INSN_UID (curr_insn));
4413   print_curr_insn_alt (goal_alt_number);

so I just added a comment as you suggested but stripping "and ...":

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 92b343fa99a..f355c6c6168 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -4742,7 +4742,9 @@ curr_insn_transform (bool check_only_p)
 }
   *loc = new_reg;
   if (type != OP_IN
-  && find_reg_note (curr_insn, REG_UNUSED, old) == NULL_RTX)
+  && find_reg_note (curr_insn, REG_UNUSED, old) == NULL_RTX
+  /* OLD can be an equivalent constant here.  */
+  && nonimmediate_operand (old, GET_MODE (old)))
 {
   start_sequence ();
   lra_emit_move (type == OP_INOUT ? copy_rtx (old) : old, new_reg);

Does it look good to you?  Or did I miss something here?

Thanks again!

BR,
Kewen

> 
> Thank you for fixing the PR.
> 
>> PR rtl-optimization/116170
>>
>> gcc/ChangeLog:
>>
>> * lra-constraints.cc (curr_insn_transform): Don't emit move back to
>> old operand if it's nonimmediate_operand.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gcc.target/powerpc/pr116170.c: New test.
>> ---
>>   gcc/lra-constraints.cc  |  3 ++-
>>   gcc/testsuite/gcc.target/powerpc/pr116170.c | 18 ++
>>   2 files changed, 20 insertions(+), 1 deletion(-)
>>   create mode 100644 gcc/testsuite/gcc.target/powerpc/

Re: PING: [PATCH] x86: Update BB_HEAD when aligning BB_HEAD

2024-08-11 Thread Hongtao Liu
On Mon, Aug 12, 2024 at 6:59 AM H.J. Lu  wrote:
>
> On Thu, Aug 8, 2024 at 6:53 PM H.J. Lu  wrote:
> >
> > When we emit .p2align to align BB_HEAD, we must update BB_HEAD.  Otherwise
> > ENDBR will be inserted as the wrong place.
> >
> > gcc/
> >
> > PR target/116174
> > * config/i386/i386.cc (ix86_align_loops): Update BB_HEAD when
> > aligning BB_HEAD
> >
> > gcc/testsuite/
> >
> > PR target/116174
> > * gcc.target/i386/pr116174.c: New test.
> >
> > Signed-off-by: H.J. Lu 
> > ---
> >  gcc/config/i386/i386.cc  |  7 +--
> >  gcc/testsuite/gcc.target/i386/pr116174.c | 12 
> >  2 files changed, 17 insertions(+), 2 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr116174.c
> >
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index 77c441893b4..ec6cc5e3548 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -23528,8 +23528,11 @@ ix86_align_loops ()
> >
> >   if (padding_p && detect_tight_loop_p)
> > {
> > - emit_insn_before (gen_max_skip_align (GEN_INT (ceil_log2 
> > (size)),
> > -   GEN_INT (0)), label);
> > + rtx_insn *align =
> > +   emit_insn_before (gen_max_skip_align (GEN_INT (ceil_log2 
> > (size)),
> > + GEN_INT (0)), label);
> > + if (BB_HEAD (bb) == label)
> > +   BB_HEAD (bb) = align;
Are there any assumptions that BB_HEAD must be a note or label?
Maybe we should move ix86_align_loops into a separate pass and insert
the pass just before pass_final.

> >   /* End of function.  */
> >   if (!tbb || tbb == EXIT_BLOCK_PTR_FOR_FN (cfun))
> > break;
> > diff --git a/gcc/testsuite/gcc.target/i386/pr116174.c 
> > b/gcc/testsuite/gcc.target/i386/pr116174.c
> > new file mode 100644
> > index 000..8877d0b51af
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr116174.c
> > @@ -0,0 +1,12 @@
> > +/* { dg-do compile { target *-*-linux* } } */
> > +/* { dg-options "-O2 -fcf-protection=branch" } */
> > +
> > +char *
> > +foo (char *dest, const char *src)
> > +{
> > +  while ((*dest++ = *src++) != '\0')
> > +/* nothing */;
> > +  return --dest;
> > +}
> > +
> > +/* { dg-final { scan-assembler "\t\.cfi_startproc\n\tendbr(32|64)\n" } } */
> > --
> > 2.45.2
> >
>
> PING.
>
> --
> H.J.



-- 
BR,
Hongtao


RE: [PATCH] [x86] Mention _Float16 and __bf16 changes in GCC14.

2024-08-11 Thread Liu, Hongtao



> -Original Message-
> From: Gerald Pfeifer 
> Sent: Saturday, August 10, 2024 6:33 PM
> To: Liu, Hongtao 
> Cc: gcc-patches@gcc.gnu.org; crazy...@gmail.com; hjl.to...@gmail.com
> Subject: Re: [PATCH] [x86] Mention _Float16 and __bf16 changes in GCC14.
> 
> On Wed, 31 Jul 2024, liuhongt wrote:
> > +   The _Float16 and __bf16 type are
> supported
> > +independent of SSE2. W/o SSE2, these types are storage-only, compiler
> will
> > +issue an error when they're used in conversion, unary operation,
> > +binary operation, parameter passing or value return.
> 
> "types" (plural)
> "independently"
> "Without" (spelt out)
> "the compiler"
> 
> And personally I would use an Oxford comma, so "..., or value return".
> 
> > +instead of __FLT16_MAX__(or other similar Macros).
> 
> "macros" (lowercase)
> 
> 
> --- a/htdocs/gcc-14/porting_to.html
> +++ b/htdocs/gcc-14/porting_to.html
> 
> I don't think we need this in porting_to.html as well; the release notes are
> sufficient.
> 
> 
> This patch is okay with the changes above. I see this is already
> committed. Can you please make them as follow-up? Or should I?
Could you help to refine the words, much thanks for that.
> 
> Thanks,
> Gerald



Re: [PATCH] rs6000: Add TARGET_P10_VECTOR for Power10 vector insns [PR116266]

2024-08-11 Thread Kewen.Lin
Hi Segher & Peter,

Thanks for your comments!!

on 2024/8/10 05:43, Segher Boessenkool wrote:
> On Fri, Aug 09, 2024 at 03:50:50PM -0500, Peter Bergner wrote:
>> On 8/9/24 12:54 PM, Segher Boessenkool wrote:
 --- a/gcc/config/rs6000/altivec.md
 +++ b/gcc/config/rs6000/altivec.md
 @@ -623,7 +623,7 @@ (define_insn "altivec_eqv1ti"
[(set (match_operand:V1TI 0 "altivec_register_operand" "=v")
   (eq:V1TI (match_operand:V1TI 1 "altivec_register_operand" "v")
(match_operand:V1TI 2 "altivec_register_operand" "v")))]
 -  "TARGET_POWER10"
 +  "TARGET_P10_VECTOR"
"vcmpequq %0,%1,%2"
[(set_attr "type" "veccmpfx")])
>>>
>>> This very first one is incorrect, already.  This is a Vector insn
>>> (it needs MSR[VEC]=1), not a VSX insn (for which MSR[VSX]=1 is needed).
>>>
>>> We test TARGET_ALTIVEC for that, not TARGET_VSX.
>>
>> I guess you are correct that *_VECTOR is not specific enough because
>> yeah, we could have -mcpu=power10 -maltivec -mno-vsx so we'd need two
>> macros, TARGET_P10_ALTIVEC and TARGET_P10_VSX rather than one catch-all.
> 
> The instructions are part of the Vector Facility.  Not the Vector-Scalar
> Extension Facility.  There is a difference, and the two are gated by
> different MSR bits.  This is *fundamental*.
> 
> Yes, often both are enabled.  Often *everything* is enabled.  In the
> compiler we cannot rely on the happy case often.

I agree with the difference between Vector Facility and Vector-Scalar
Extension Facility, the proposed TARGET_P10_VECTOR followed the existing
TARGET_P[89]_VECTOR usage, which guard for both VMX and VSX insns on
Power[89] (it means P8/9 VMX insns are not supported even with -maltivec),
I interpreted it was intentional to disable VMX with the implication VMX
units actually being VSX units.  But according to Segher's comments, we
want to separate them, I think it means we have to rework the current
TARGET_P[89]_VECTOR support first.

> 
>>> In general, we want to get rid of TARGET_Pxxx_VECTOR, not introduce new
>>> stuff like it!
>>
>> I'm fine with the TARGET_P10_* macro, since it's more readable than saying
>> TARGET_POWER10 && TARGET_ALTIVEC && TARGET_VSX, especially when we use the
>> negated version.

Yes, TARGET_P[89]_VECTOR means TARGET_POWER[89] && TARGET_VSX (vsx implies
altivec should be set).

> 
> It is not more readable *at all*.  What does it even mean?  Previous
> similar macros (TARGET_P8_VECTOR) meant that various VSX instructions
> new in ISA 2.07 were enabled, *or* that some vector insns (either VMX or
> VSX, it never was clear which) were enabled, and we were compiling for
> 2.07 or later.  It meant the former, but was often understood as meaning
> the latter.  It was a *mess*.  We should not make a bigger mess.

IIUC, we want to split TARGET_P[89]_VECTOR into TARGET_P[89]_ALTIVEC and
TARGET_P[89]_VSX (or just TARGET_POWER[89] && TARGET_VSX or TARGET_ALTIVEC)
according to the context (VMX or VSX), and we need to split power[89]-vector
bif stanzas, isa attributes etc.

One difference with this change is that previously users specify -mno-vsx to
disable all vector insns (both VMX and VSX) on Power[89], now they should
use -mno-altivec for that purpose.  I think it's better as it matches the
behaviors on Power7?

> 
> Convenience macros are fine, but it should be clear what they MEAN!
> Clear to the uninitiated.  Obvious, self-explanatory.  Not having two
> disparate meanings, both "obvious"!
> 
> 
> Segher

BR,
Kewen



Re: PING: [PATCH] x86: Update BB_HEAD when aligning BB_HEAD

2024-08-11 Thread H.J. Lu
On Sun, Aug 11, 2024 at 6:52 PM Hongtao Liu  wrote:
>
> On Mon, Aug 12, 2024 at 6:59 AM H.J. Lu  wrote:
> >
> > On Thu, Aug 8, 2024 at 6:53 PM H.J. Lu  wrote:
> > >
> > > When we emit .p2align to align BB_HEAD, we must update BB_HEAD.  Otherwise
> > > ENDBR will be inserted as the wrong place.
> > >
> > > gcc/
> > >
> > > PR target/116174
> > > * config/i386/i386.cc (ix86_align_loops): Update BB_HEAD when
> > > aligning BB_HEAD
> > >
> > > gcc/testsuite/
> > >
> > > PR target/116174
> > > * gcc.target/i386/pr116174.c: New test.
> > >
> > > Signed-off-by: H.J. Lu 
> > > ---
> > >  gcc/config/i386/i386.cc  |  7 +--
> > >  gcc/testsuite/gcc.target/i386/pr116174.c | 12 
> > >  2 files changed, 17 insertions(+), 2 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr116174.c
> > >
> > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > > index 77c441893b4..ec6cc5e3548 100644
> > > --- a/gcc/config/i386/i386.cc
> > > +++ b/gcc/config/i386/i386.cc
> > > @@ -23528,8 +23528,11 @@ ix86_align_loops ()
> > >
> > >   if (padding_p && detect_tight_loop_p)
> > > {
> > > - emit_insn_before (gen_max_skip_align (GEN_INT (ceil_log2 
> > > (size)),
> > > -   GEN_INT (0)), label);
> > > + rtx_insn *align =
> > > +   emit_insn_before (gen_max_skip_align (GEN_INT (ceil_log2 
> > > (size)),
> > > + GEN_INT (0)), 
> > > label);
> > > + if (BB_HEAD (bb) == label)
> > > +   BB_HEAD (bb) = align;
> Are there any assumptions that BB_HEAD must be a note or label?

I don't know.  But LABEL may be BB_HEAD.

> Maybe we should move ix86_align_loops into a separate pass and insert
> the pass just before pass_final.
>
> > >   /* End of function.  */
> > >   if (!tbb || tbb == EXIT_BLOCK_PTR_FOR_FN (cfun))
> > > break;
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr116174.c 
> > > b/gcc/testsuite/gcc.target/i386/pr116174.c
> > > new file mode 100644
> > > index 000..8877d0b51af
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr116174.c
> > > @@ -0,0 +1,12 @@
> > > +/* { dg-do compile { target *-*-linux* } } */
> > > +/* { dg-options "-O2 -fcf-protection=branch" } */
> > > +
> > > +char *
> > > +foo (char *dest, const char *src)
> > > +{
> > > +  while ((*dest++ = *src++) != '\0')
> > > +/* nothing */;
> > > +  return --dest;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler "\t\.cfi_startproc\n\tendbr(32|64)\n" } } 
> > > */
> > > --
> > > 2.45.2
> > >
> >
> > PING.
> >
> > --
> > H.J.
>
>
>
> --
> BR,
> Hongtao



-- 
H.J.


[PATCH v2] ASAN: call initialize_sanitizer_builtins for hwasan [PR115205]

2024-08-11 Thread Andrew Pinski
Sometimes initialize_sanitizer_builtins is not called before emitting
the asan builtins with hwasan. In the case of the bug report, there
was a path with the fortran front-end where it was not called.
So let's call it in asan_instrument before calling transform_statements
and from hwasan_finish_file.

Built and tested for aarch64-linux-gnu with no regressions.

Changes since v1:
* v2: Add call of asan_instrument to hwasan_finish_file also.

gcc/ChangeLog:

PR sanitizer/115205
* asan.cc (asan_instrument): Call initialize_sanitizer_builtins
for hwasan.
(hwasan_finish_file): Likewise.

Signed-off-by: Andrew Pinski 
---
 gcc/asan.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/asan.cc b/gcc/asan.cc
index 9e0f51b1477..5f262d54a3a 100644
--- a/gcc/asan.cc
+++ b/gcc/asan.cc
@@ -4276,6 +4276,7 @@ asan_instrument (void)
 {
   if (hwasan_sanitize_p ())
 {
+  initialize_sanitizer_builtins ();
   transform_statements ();
   return 0;
 }
@@ -4694,6 +4695,8 @@ hwasan_finish_file (void)
   if (flag_sanitize & SANITIZE_KERNEL_HWADDRESS)
 return;
 
+  initialize_sanitizer_builtins ();
+
   /* Avoid instrumenting code in the hwasan constructors/destructors.  */
   flag_sanitize &= ~SANITIZE_HWADDRESS;
   int priority = MAX_RESERVED_INIT_PRIORITY - 1;
-- 
2.43.0



[PATCH] c++/modules: Merge default arguments [PR99274]

2024-08-11 Thread Nathaniel Shead
Bootstrapped and regtested on x86_64-pc-linux-gnu, OK for trunk?

I tried to implement a remapping of the slots for TARGET_EXPRs for the
FIXME but I wasn't able to work out how to do so effectively.  Given
that I doubt this will be a common issue I felt probably easiest to
leave it for now and focus on other issues in the meantime; thoughts?

The other thing to note is that most of this function just has a single
error message always indicated by a 'goto mismatch;' but I felt that it
seemed reasonable to provide more specific error messages where we can.
But given that in the long term we probably want to replace this
function with an appropriately enhanced 'duplicate_decls' anyway maybe
it's not worth worrying about; this patch is still useful in the
meantime if only for the testcases, I hope.

-- >8 --

When merging a newly imported declaration with an existing declaration
we don't currently propagate new default arguments, which causes issues
when modularising header units.  This patch adds logic to propagate
default arguments to existing declarations on import, and error if the
defaults do not match.

PR c++/99274

gcc/cp/ChangeLog:

* module.cc (trees_in::is_matching_decl): Merge default
arguments.

gcc/testsuite/ChangeLog:

* g++.dg/modules/default-arg-1_a.H: New test.
* g++.dg/modules/default-arg-1_b.C: New test.
* g++.dg/modules/default-arg-2_a.H: New test.
* g++.dg/modules/default-arg-2_b.C: New test.
* g++.dg/modules/default-arg-3.h: New test.
* g++.dg/modules/default-arg-3_a.H: New test.
* g++.dg/modules/default-arg-3_b.C: New test.

Signed-off-by: Nathaniel Shead 
---
 gcc/cp/module.cc  | 62 ++-
 .../g++.dg/modules/default-arg-1_a.H  | 17 +
 .../g++.dg/modules/default-arg-1_b.C  | 26 
 .../g++.dg/modules/default-arg-2_a.H  | 17 +
 .../g++.dg/modules/default-arg-2_b.C  | 28 +
 gcc/testsuite/g++.dg/modules/default-arg-3.h  | 13 
 .../g++.dg/modules/default-arg-3_a.H  |  5 ++
 .../g++.dg/modules/default-arg-3_b.C  |  6 ++
 8 files changed, 171 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/modules/default-arg-1_a.H
 create mode 100644 gcc/testsuite/g++.dg/modules/default-arg-1_b.C
 create mode 100644 gcc/testsuite/g++.dg/modules/default-arg-2_a.H
 create mode 100644 gcc/testsuite/g++.dg/modules/default-arg-2_b.C
 create mode 100644 gcc/testsuite/g++.dg/modules/default-arg-3.h
 create mode 100644 gcc/testsuite/g++.dg/modules/default-arg-3_a.H
 create mode 100644 gcc/testsuite/g++.dg/modules/default-arg-3_b.C

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index f4d137b13a1..87f34bac578 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -11551,8 +11551,6 @@ trees_in::is_matching_decl (tree existing, tree decl, 
bool is_typedef)
 
  if (!same_type_p (TREE_VALUE (d_args), TREE_VALUE (e_args)))
goto mismatch;
-
- // FIXME: Check default values
}
 
   /* If EXISTING has an undeduced or uninstantiated exception
@@ -11690,7 +11688,65 @@ trees_in::is_matching_decl (tree existing, tree decl, 
bool is_typedef)
   if (!DECL_EXTERNAL (d_inner))
 DECL_EXTERNAL (e_inner) = false;
 
-  // FIXME: Check default tmpl and fn parms here
+  if (TREE_CODE (decl) == TEMPLATE_DECL)
+{
+  /* Merge default template arguments.  */
+  tree d_parms = DECL_INNERMOST_TEMPLATE_PARMS (decl);
+  tree e_parms = DECL_INNERMOST_TEMPLATE_PARMS (existing);
+  gcc_checking_assert (TREE_VEC_LENGTH (d_parms)
+  == TREE_VEC_LENGTH (e_parms));
+  for (int i = 0; i < TREE_VEC_LENGTH (d_parms); ++i)
+   {
+ tree d_default = TREE_PURPOSE (TREE_VEC_ELT (d_parms, i));
+ tree& e_default = TREE_PURPOSE (TREE_VEC_ELT (e_parms, i));
+ if (e_default == NULL_TREE)
+   e_default = d_default;
+ else if (d_default != NULL_TREE
+  && !cp_tree_equal (d_default, e_default))
+   {
+ auto_diagnostic_group d;
+ tree d_parm = TREE_VALUE (TREE_VEC_ELT (d_parms, i));
+ tree e_parm = TREE_VALUE (TREE_VEC_ELT (e_parms, i));
+ error_at (DECL_SOURCE_LOCATION (d_parm),
+   "conflicting default argument for %#qD", d_parm);
+ inform (DECL_SOURCE_LOCATION (e_parm),
+ "existing default declared here");
+ return false;
+   }
+   }
+}
+
+  if (TREE_CODE (d_inner) == FUNCTION_DECL)
+{
+  /* Merge default function arguments.  */
+  tree d_parm = FUNCTION_FIRST_USER_PARMTYPE (d_inner);
+  tree e_parm = FUNCTION_FIRST_USER_PARMTYPE (e_inner);
+  int i = 0;
+  for (; d_parm && d_parm != void_list_node;
+  d_parm = TREE_CHAIN (d_parm), e_parm = TREE_CHAIN (e_parm), ++i)
+   {
+ tree d_default = TREE_PURPOSE (d_parm);
+ t