date:20231026

Re: [pushed][PATCH] LoongArch:Enable vcond_mask_mn expanders for SF/DF modes.

2023-10-26 Thread chenglulu


Pushed to r14-4939.

在 2023/10/23 下午5:46, Jiahao Xu 写道:

If the vcond_mask patterns don't support fp modes, the vector
FP comparison instructions will not be generated.

gcc/ChangeLog:

 * config/loongarch/lasx.md
(vcond_mask_): Change to
(vcond_mask_): this.
* config/loongarch/lsx.md
(vcond_mask_): Change to
(vcond_mask_): this.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vector/lasx/lasx-cond-1.c: New test.
* gcc.target/loongarch/vector/lasx/lasx-vcond-2.c: Ditto.
* gcc.target/loongarch/vector/lsx/lsx-vcond-2.c: Ditto.
* gcc.target/loongarch/vector/lsx/lsx-vcond-2.c: Ditto.

Change-Id: If9716f356c0b83748a208235e835feb402b5c78f

diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 442fda24606..ba2c5eec7d0 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -906,15 +906,15 @@ (define_expand "vcond"
  })
  
  ;; Same as vcond_

-(define_expand "vcond_mask_"
-  [(match_operand:ILASX 0 "register_operand")
-   (match_operand:ILASX 1 "reg_or_m1_operand")
-   (match_operand:ILASX 2 "reg_or_0_operand")
-   (match_operand:ILASX 3 "register_operand")]
+(define_expand "vcond_mask_"
+  [(match_operand:LASX 0 "register_operand")
+   (match_operand:LASX 1 "reg_or_m1_operand")
+   (match_operand:LASX 2 "reg_or_0_operand")
+   (match_operand: 3 "register_operand")]
"ISA_HAS_LASX"
  {
-  loongarch_expand_vec_cond_mask_expr (mode,
- mode, operands);
+  loongarch_expand_vec_cond_mask_expr (mode,
+mode, operands);
DONE;
  })
  
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md

index b4e92ae9c54..7e77ac4ad6a 100644
--- a/gcc/config/loongarch/lsx.md
+++ b/gcc/config/loongarch/lsx.md
@@ -644,15 +644,15 @@ (define_expand "vcond"
DONE;
  })
  
-(define_expand "vcond_mask_"

-  [(match_operand:ILSX 0 "register_operand")
-   (match_operand:ILSX 1 "reg_or_m1_operand")
-   (match_operand:ILSX 2 "reg_or_0_operand")
-   (match_operand:ILSX 3 "register_operand")]
+(define_expand "vcond_mask_"
+  [(match_operand:LSX 0 "register_operand")
+   (match_operand:LSX 1 "reg_or_m1_operand")
+   (match_operand:LSX 2 "reg_or_0_operand")
+   (match_operand: 3 "register_operand")]
"ISA_HAS_LSX"
  {
-  loongarch_expand_vec_cond_mask_expr (mode,
- mode, operands);
+  loongarch_expand_vec_cond_mask_expr (mode,
+  mode, operands);
DONE;
  })
  
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c

new file mode 100644
index 000..ee9cb1a1fa7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c
@@ -0,0 +1,64 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-unroll-loops -fno-vect-cost-model 
-mlasx" } */
+
+#include 
+
+#define DEF_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX)   \
+  void __attribute__ ((noinline, noclone)) \
+  vcond_var_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r,  \
+  DATA_TYPE *__restrict__ x,   \
+  DATA_TYPE *__restrict__ y,   \
+  CMP_TYPE *__restrict__ a,\
+  CMP_TYPE *__restrict__ b,\
+  int n)   \
+  {\
+for (int i = 0; i < n; i++) \
+  {\
+   DATA_TYPE xval = x[i], yval = y[i]; \
+   CMP_TYPE aval = a[i], bval = b[i];  \
+   r[i] = aval COND bval ? xval : yval;\
+  }\
+  }
+
+#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX)  \
+  T (int8_t, int8_t, COND, SUFFIX) \
+  T (int16_t, int16_t, COND, SUFFIX)   \
+  T (int32_t, int32_t, COND, SUFFIX)   \
+  T (int64_t, int64_t, COND, SUFFIX)   \
+  T (float, int32_t, COND, SUFFIX##_float) \
+  T (double, int64_t, COND, SUFFIX##_double)
+
+#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX)\
+  T (uint8_t, uint8_t, COND, SUFFIX)   \
+  T (uint16_t, uint16_t, COND, SUFFIX) \
+  T (uint32_t, uint32_t, COND, SUFFIX) \
+  T (uint64_t, uint64_t, COND, SUFFIX) \
+  T (float, uint32_t, COND, SUFFIX##_float)\
+  T (double, uint64_t, COND, SUFFIX##_double)
+
+#define TEST_COND_VAR_ALL(T, COND, SUFFIX) \
+  TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX)   \
+  TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX)
+
+#define TEST_VAR_ALL(T)\
+  TEST_COND_VAR_ALL (T,

[PATCH] Darwin: Handle the fPIE option specially.

2023-10-26 Thread Iain Sandoe

Testing the fhardened patch prompted checking of what the Xcode tools
actually emit for -fPIE.  This patch makes the GCC port follow the same
pattern.  Tested on x86_64, i686-darwin, pushed to trunk, thanks,
Iain

--- 8< ---

For Darwin, PIE requires PIC codegen, but otherwise is only a link-time
change. For almost all Darwin, we do not report __PIE__; the exception is
32bit X86 and from Darwin12 to 17 only (32 bit is no longer supported
after Darwin17).

gcc/ChangeLog:

* config/darwin.cc (darwin_override_options): Handle fPIE.

Signed-off-by: Iain Sandoe 
---
 gcc/config/darwin.cc | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/gcc/config/darwin.cc b/gcc/config/darwin.cc
index a80b6caf95a..621a94d74a2 100644
--- a/gcc/config/darwin.cc
+++ b/gcc/config/darwin.cc
@@ -3325,6 +3325,8 @@ darwin_override_options (void)
 {
   if (strverscmp (darwin_macosx_version_min, "10.14") >= 0)
generating_for_darwin_version = 18;
+  else if (strverscmp (darwin_macosx_version_min, "10.8") >= 0)
+   generating_for_darwin_version = 12;
   else if (strverscmp (darwin_macosx_version_min, "10.7") >= 0)
generating_for_darwin_version = 11;
   else if (strverscmp (darwin_macosx_version_min, "10.6") >= 0)
@@ -3495,8 +3497,17 @@ darwin_override_options (void)
   && dwarf_debuginfo_p ())
 flag_var_tracking_uninit = flag_var_tracking;
 
-  /* Final check on PCI options; for Darwin these are not dependent on the PIE
- ones, although PIE does require PIC to support it.  */
+  if (OPTION_SET_P (flag_pie) && flag_pie)
+{
+  /* This is a little complicated, to match Xcode tools.
+For Darwin, PIE requires PIC codegen, but otherwise is only a link-
+time change.  For almost all Darwin, we do not report __PIE__; the
+exception is Darwin12-17 and for 32b only.  */
+  flag_pie = generating_for_darwin_version >= 12 && !TARGET_64BIT ? 2 : 0;
+  flag_pic = 2; /* We always set this.  */
+}
+
+  /* Final check on PIC options.  */
   if (MACHO_DYNAMIC_NO_PIC_P)
 {
   if (flag_pic)
-- 
2.39.2 (Apple Git-143)

RE: Re: [PATCH] RISC-V: Add AVL propagation PASS for RVV auto-vectorization

2023-10-26 Thread Li, Pan2

Just apply v2 version for RV32 with spike riscv-sim for confirmation.

This patch only increased 2 popcount run failures as well as 2 dump failures, 
and the mask_gather_load_run-11.c is PASS within spike.

Pan

-Original Message-
From: juzhe.zh...@rivai.ai  
Sent: Thursday, October 26, 2023 9:27 AM
To: Patrick O'Neill ; gcc-patches 

Cc: kito.cheng ; Kito.cheng ; 
jeffreyalaw ; Robin Dapp 
Subject: Re: Re: [PATCH] RISC-V: Add AVL propagation PASS for RVV 
auto-vectorization

I think it's QEMU issue:

line 15: 1520161 Aborted                 (core dumped) 
QEMU_CPU="$(march-to-cpu-opt --get-riscv-tag $1)" qemu-riscv$xlen -r 5.10 
"${qemu_args[@]}" -L ${RISC_V_SYSROOT} "$@"
FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/mask_gather_load_run-11.c 
execution test

I use SPIKE works fine. This is my SPIKE configuration

spike \
    --isa=rv64gcv_zvfh_zfh \
    --misaligned \
    ${PK_PATH}/pk${xlen} "$@"



juzhe.zh...@rivai.ai
 
From: Patrick O'Neill
Date: 2023-10-26 09:22
To: juzhe.zh...@rivai.ai; gcc-patches
CC: kito.cheng; Kito.cheng; jeffreyalaw; Robin Dapp
Subject: Re: [PATCH] RISC-V: Add AVL propagation PASS for RVV auto-vectorization

On 10/25/23 17:49, juzhe.zh...@rivai.ai wrote:
FAIL: gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-5.c -O3 -ftree-vectorize 
--param riscv-autovec-lmul=dynamic  scan-assembler e32,m4
FAIL: gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-2.c -O3 -ftree-vectorize 
--param riscv-autovec-lmul=dynamic  scan-assembler e32,m8

These 2 FAILs are bogus. Testcases need to be adapted, I notice I didn't 
include this in this patch.

FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/mask_gather_load_run-11.c 
execution test
FAIL: gcc.target/riscv/rvv/autovec/unop/popcount-run-1.c execution test
FAIL: gcc.target/riscv/rvv/autovec/unop/popcount-run-1.c execution test

These 2 already exist on the trunk for RV32.

FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/mask_gather_load_run-11.c 
execution test 
This FAIL for RV64 is odd. I don't have it.  Could you share me the debug log ?
rv64gcv debug log:

Executing on host: 
/scratch/tc-testing/tc-avl/build-rv64gcv/build-gcc-linux-stage2/gcc/xgcc 
-B/scratch/tc-testing/tc-avl/build-rv64gcv/build-gcc-linux-stage2/gcc/  
/scratch/tc-testing/tc-avl/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/mask_gather_load_run-11.c
  -march=rv64gcv -mabi=lp64d -mcmodel=medlow   -fdiagnostics-plain-output   
-ftree-vectorize -O3 --param riscv-autovec-preference=fixed-vlmax --param 
riscv-autovec-lmul=m8 -fno-vect-cost-model -ffast-math -mcmodel=medany  -lm 
 -o ./mask_gather_load_run-11.exe    (timeout = 600)
spawn -ignore SIGHUP 
/scratch/tc-testing/tc-avl/build-rv64gcv/build-gcc-linux-stage2/gcc/xgcc 
-B/scratch/tc-testing/tc-avl/build-rv64gcv/build-gcc-linux-stage2/gcc/ 
/scratch/tc-testing/tc-avl/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/mask_gather_load_run-11.c
 -march=rv64gcv -mabi=lp64d -mcmodel=medlow -fdiagnostics-plain-output 
-ftree-vectorize -O3 --param riscv-autovec-preference=fixed-vlmax --param 
riscv-autovec-lmul=m8 -fno-vect-cost-model -ffast-math -mcmodel=medany -lm -o 
./mask_gather_load_run-11.exe
PASS: gcc.target/riscv/rvv/autovec/gather-scatter/mask_gather_load_run-11.c 
(test for excess errors)
spawn riscv64-unknown-linux-gnu-run ./mask_gather_load_run-11.exe
mask_gather_load_run-11.exe: 
/scratch/tc-testing/tc-avl/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/mask_gather_load_run-11.c:98:
 main: Assertion `dest_uint16_t_uint8_t[i * 2] == dest2_uint16_t_uint8_t[i * 
2]' failed.
/scratch/tc-testing/tc-avl/build-rv64gcv/../scripts/wrapper/qemu/riscv64-unknown-linux-gnu-run:
 line 15: 1520161 Aborted (core dumped) 
QEMU_CPU="$(march-to-cpu-opt --get-riscv-tag $1)" qemu-riscv$xlen -r 5.10 
"${qemu_args[@]}" -L ${RISC_V_SYSROOT} "$@"
FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/mask_gather_load_run-11.c 
execution test

rv32gcv debug log:

Executing on host: 
/scratch/tc-testing/tc-avl/build-rv32gcv/build-gcc-linux-stage2/gcc/xgcc 
-B/scratch/tc-testing/tc-avl/build-rv32gcv/build-gcc-linux-stage2/gcc/  
/scratch/tc-testing/tc-avl/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/mask_gather_load_run-11.c
  -march=rv32gcv -mabi=ilp32d -mcmodel=medlow   -fdiagnostics-plain-output   
-ftree-vectorize -O3 --param riscv-autovec-preference=fixed-vlmax --param 
riscv-autovec-lmul=m8 -fno-vect-cost-model -ffast-math -mcmodel=medany  -lm 
 -o ./mask_gather_load_run-11.exe    (timeout = 600)
spawn -ignore SIGHUP 
/scratch/tc-testing/tc-avl/build-rv32gcv/build-gcc-linux-stage2/gcc/xgcc 
-B/scratch/tc-testing/tc-avl/build-rv32gcv/build-gcc-linux-stage2/gcc/ 
/scratch/tc-testing/tc-avl/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/mask_gather_load_run-11.c
 -march=rv32gcv -mabi=ilp32d -mcmodel=medlow -fdiagnostics-plain-output 
-ftree-vectorize -O3 --param riscv-autovec-preference=fixed-vlmax --param 
riscv-autovec-lmul=m8 -fno-vect-c

Re: RE: [PATCH] RISC-V: Add AVL propagation PASS for RVV auto-vectorization

2023-10-26 Thread juzhe.zh...@rivai.ai

Yes. I just checked again.

Before this patch:
FAIL: gcc.target/riscv/rvv/autovec/unop/popcount-run-1.c execution test
FAIL: gcc.target/riscv/rvv/autovec/unop/popcount-run-1.c execution test
FAIL: gcc.target/riscv/rvv/autovec/unop/popcount-run-1.c execution test
FAIL: gcc.target/riscv/rvv/autovec/unop/popcount-run-1.c execution test
FAIL: gcc.target/riscv/rvv/autovec/unop/popcount-run-1.c execution test
FAIL: gcc.target/riscv/rvv/autovec/unop/popcount-run-1.c execution test


After this patch:
FAIL: gcc.target/riscv/rvv/autovec/unop/popcount-run-1.c execution test
FAIL: gcc.target/riscv/rvv/autovec/unop/popcount-run-1.c execution test
FAIL: gcc.target/riscv/rvv/autovec/unop/popcount-run-1.c execution test
FAIL: gcc.target/riscv/rvv/autovec/unop/popcount-run-1.c execution test
FAIL: gcc.target/riscv/rvv/autovec/unop/popcount-run-1.c execution test
FAIL: gcc.target/riscv/rvv/autovec/unop/popcount-run-1.c execution test
FAIL: gcc.target/riscv/rvv/autovec/unop/popcount-run-1.c execution test
FAIL: gcc.target/riscv/rvv/autovec/unop/popcount-run-1.c execution test

Increased FAILS are LMUL = M4. I have analyzed the codegen. Looks reasonable.

Moreover, When I removed 'popcount_64' and test, all passed no matter apply 
this patch or not.

I think it is because popcount64 is buggy in RV32, this patch trigger LMUL = 4 
bug already existed that we were lucky.

So I suggest this patch should go ahead and ignore popcount issue for now. (I 
will send V3 with fixing dump FAILs).

I am not familiar  with popcount, Robin. Any suggestions?


juzhe.zh...@rivai.ai
 
From: Li, Pan2
Date: 2023-10-26 15:33
To: juzhe.zh...@rivai.ai; Patrick O'Neill; gcc-patches
CC: kito.cheng; Kito.cheng; jeffreyalaw; Robin Dapp
Subject: RE: Re: [PATCH] RISC-V: Add AVL propagation PASS for RVV 
auto-vectorization
Just apply v2 version for RV32 with spike riscv-sim for confirmation.
 
This patch only increased 2 popcount run failures as well as 2 dump failures, 
and the mask_gather_load_run-11.c is PASS within spike.
 
Pan
 
-Original Message-
From: juzhe.zh...@rivai.ai 
Sent: Thursday, October 26, 2023 9:27 AM
To: Patrick O'Neill ; gcc-patches 

Cc: kito.cheng ; Kito.cheng ; 
jeffreyalaw ; Robin Dapp 
Subject: Re: Re: [PATCH] RISC-V: Add AVL propagation PASS for RVV 
auto-vectorization
 
I think it's QEMU issue:
 
line 15: 1520161 Aborted                 (core dumped) 
QEMU_CPU="$(march-to-cpu-opt --get-riscv-tag $1)" qemu-riscv$xlen -r 5.10 
"${qemu_args[@]}" -L ${RISC_V_SYSROOT} "$@"
FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/mask_gather_load_run-11.c 
execution test
 
I use SPIKE works fine. This is my SPIKE configuration
 
spike \
    --isa=rv64gcv_zvfh_zfh \
    --misaligned \
    ${PK_PATH}/pk${xlen} "$@"
 
 
 
juzhe.zh...@rivai.ai
 
From: Patrick O'Neill
Date: 2023-10-26 09:22
To: juzhe.zh...@rivai.ai; gcc-patches
CC: kito.cheng; Kito.cheng; jeffreyalaw; Robin Dapp
Subject: Re: [PATCH] RISC-V: Add AVL propagation PASS for RVV auto-vectorization
 
On 10/25/23 17:49, juzhe.zh...@rivai.ai wrote:
FAIL: gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-5.c -O3 -ftree-vectorize 
--param riscv-autovec-lmul=dynamic  scan-assembler e32,m4
FAIL: gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-2.c -O3 -ftree-vectorize 
--param riscv-autovec-lmul=dynamic  scan-assembler e32,m8
 
These 2 FAILs are bogus. Testcases need to be adapted, I notice I didn't 
include this in this patch.
 
FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/mask_gather_load_run-11.c 
execution test
FAIL: gcc.target/riscv/rvv/autovec/unop/popcount-run-1.c execution test
FAIL: gcc.target/riscv/rvv/autovec/unop/popcount-run-1.c execution test
 
These 2 already exist on the trunk for RV32.
 
FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/mask_gather_load_run-11.c 
execution test 
This FAIL for RV64 is odd. I don't have it.  Could you share me the debug log ?
rv64gcv debug log:
 
Executing on host: 
/scratch/tc-testing/tc-avl/build-rv64gcv/build-gcc-linux-stage2/gcc/xgcc 
-B/scratch/tc-testing/tc-avl/build-rv64gcv/build-gcc-linux-stage2/gcc/  
/scratch/tc-testing/tc-avl/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/mask_gather_load_run-11.c
  -march=rv64gcv -mabi=lp64d -mcmodel=medlow   -fdiagnostics-plain-output   
-ftree-vectorize -O3 --param riscv-autovec-preference=fixed-vlmax --param 
riscv-autovec-lmul=m8 -fno-vect-cost-model -ffast-math -mcmodel=medany  -lm 
 -o ./mask_gather_load_run-11.exe    (timeout = 600)
spawn -ignore SIGHUP 
/scratch/tc-testing/tc-avl/build-rv64gcv/build-gcc-linux-stage2/gcc/xgcc 
-B/scratch/tc-testing/tc-avl/build-rv64gcv/build-gcc-linux-stage2/gcc/ 
/scratch/tc-testing/tc-avl/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/mask_gather_load_run-11.c
 -march=rv64gcv -mabi=lp64d -mcmodel=medlow -fdiagnostics-plain-output 
-ftree-vectorize -O3 --param riscv-autovec-preference=fixed-vlmax --param 
riscv-autovec-lmul=m8 -fno-vect-cost-model -ffast-math -mcmodel=medany -lm -o 
./mask_gather_load_run-11.exe
PASS:

Re: [PATCH] RISC-V: Add AVL propagation PASS for RVV auto-vectorization

2023-10-26 Thread Robin Dapp



> Increased FAILS are LMUL = M4. I have analyzed the codegen. Looks
> reasonable.
> 
> Moreover, When I removed 'popcount_64' and test, all passed no matter
> apply this patch or not.
> 
> I think it is because popcount64 is buggy in RV32, this patch trigger
> LMUL = 4 bug already existed that we were lucky.
> 
> So I suggest this patch should go ahead and ignore popcount issue for
> now. (I will send V3 with fixing dump FAILs).
> 
> I am not familiar  with popcount, Robin. Any suggestions?
Yeah, agree.  popcount_64 might be wrong and it's unlikely that your
patch causes it.  Will have a look.

Regards
 Robin

Re: [PATCH V2] RISC-V: Add AVL propagation PASS for RVV auto-vectorization

2023-10-26 Thread Kito Cheng

LGTM, Thanks, it's really awesome - the implementation is simpler than
I expected, it's another great improvement for RISC-V GCC!

Just make sure Patrick gives a green light on the testing before
committing the patch :)




On Wed, Oct 25, 2023 at 8:05 PM Juzhe-Zhong  wrote:
>
> This patch addresses the redundant AVL/VL toggling in RVV partial 
> auto-vectorization
> which is a known issue for a long time and I finally find the time to address 
> it.
>
> Consider a simple vector addition operation:
>
> https://godbolt.org/z/7hfGfEjW3
>
> void
> foo (int *__restrict a,
>  int *__restrict b,
>  int *__restrict n)
> {
>   for (int i = 0; i < n; i++)
>   a[i] = a[i] + b[i];
> }
>
> Optimized IR:
>
> Loop body:
>   _38 = .SELECT_VL (ivtmp_36, POLY_INT_CST [4, 4]);  
> -> vsetvli a5,a2,e8,mf4,ta,ma
>   ...
>   vect__4.8_27 = .MASK_LEN_LOAD (vectp_a.6_29, 32B, { -1, ... }, _38, 0);
> -> vle32.v v2,0(a0)
>   vect__6.11_20 = .MASK_LEN_LOAD (vectp_b.9_25, 32B, { -1, ... }, _38, 0);   
> -> vle32.v v1,0(a1)
>   vect__7.12_19 = vect__6.11_20 + vect__4.8_27;  
> -> vsetvli a6,zero,e32,m1,ta,ma + vadd.vv v1,v1,v2
>   .MASK_LEN_STORE (vectp_a.13_11, 32B, { -1, ... }, _38, 0, vect__7.12_19);  
> -> vsetvli zero,a5,e32,m1,ta,ma + vse32.v v1,0(a4)
>
> We can see 2 redundant vsetvls inside the loop body due to AVL/VL toggling.
> The AVL/VL toggling is because we are missing LEN information in simple 
> PLUS_EXPR GIMPLE assignment:
>
> vect__7.12_19 = vect__6.11_20 + vect__4.8_27;
>
> GCC apply partial predicate load/store and un-predicated full vector 
> operation on partial vectorization.
> Such flow are used by all other targets like ARM SVE (RVV also uses such 
> flow):
>
> ARM SVE:
>
> .L3:
> ld1wz30.s, p7/z, [x0, x3, lsl 2]   -> predicated load
> ld1wz31.s, p7/z, [x1, x3, lsl 2]   -> predicated load
> add z31.s, z31.s, z30.s-> un-predicated add
> st1wz31.s, p7, [x0, x3, lsl 2] -> predicated store
>
> Such vectorization flow causes AVL/VL toggling on RVV so we need AVL 
> propagation PASS for it.
>
> Also, It's very unlikely that we can apply predicated operations on all 
> vectorization for following reasons:
>
> 1. It's very heavy workload to support them on all vectorization and we don't 
> see any benefits if we can handle that on targets backend.
> 2. Changing Loop vectorizer for it will make code base ugly and hard to 
> maintain.
> 3. We will need so many patterns for all operations. Not only COND_LEN_ADD, 
> COND_LEN_SUB, 
>We also need COND_LEN_EXTEND, , COND_LEN_CEIL, ... .. over 100+ 
> patterns, unreasonable number of patterns.
>
> To conclude, we prefer un-predicated operations here, and design a nice and 
> clean AVL propagation PASS for it to elide the redundant vsetvls
> due to AVL/VL toggling.
>
> The second question is that why we separate a PASS called AVL propagation. 
> Why not optimize it in VSETVL PASS (We definitetly can optimize AVL in VSETVL 
> PASS)
>
> Frankly, I was planning to address such issue in VSETVL PASS that's why we 
> recently refactored VSETVL PASS. However, I changed my mind recently after 
> several
> experiments and tries.
>
> The reasons as follows:
>
> 1. For code base management and maintainience. Current VSETVL PASS is 
> complicated enough and aleady has enough aggressive and fancy optimizations 
> which
>turns out it can always generate optimal codegen in most of the cases. 
> It's not a good idea keep adding more features into VSETVL PASS to make VSETVL
>  PASS become heavy and heavy again, then we will need to refactor it 
> again in the future.
>  Actuall, the VSETVL PASS is very stable and optimal after the recent 
> refactoring. Hopefully, we should not change VSETVL PASS any more except the 
> minor
>  fixes.
>
> 2. vsetvl insertion (VSETVL PASS does this thing) and AVL propagation are 2 
> different things,  I don't think we should fuse them into same PASS.
>
> 3. VSETVL PASS is an post-RA PASS, wheras AVL propagtion should be done 
> before RA which can reduce register allocation.
>
> 4. This patch's AVL propagation PASS only does AVL propagation for RVV 
> partial auto-vectorization situations.
>This patch's codes are only hundreds lines which is very managable and can 
> be very easily extended features and enhancements.
>  We can easily extend and enhance more AVL propagation in a clean and 
> separate PASS in the future. (If we do it on VSETVL PASS, we will complicate
>  VSETVL PASS again which is already so complicated.)
>
> Here is an example to demonstrate more:
>
> https://godbolt.org/z/bE86sv3q5
>
> void foo2 (int *__restrict a,
>   int *__restrict b,
>   int *__restrict c,
>   int *__restrict a2,
>   int *__restrict b2,
>   int *__restrict c2,
>   int *__restrict a3,
>   int *__restrict b3,
>   int *__re

[Ready to commit V3] RISC-V: Add AVL propagation PASS for RVV auto-vectorization

2023-10-26 Thread Juzhe-Zhong

This patch addresses the redundant AVL/VL toggling in RVV partial 
auto-vectorization
which is a known issue for a long time and I finally find the time to address 
it.

Consider a simple vector addition operation:

https://godbolt.org/z/7hfGfEjW3

void
foo (int *__restrict a,
 int *__restrict b,
 int *__restrict n)
{
  for (int i = 0; i < n; i++)
  a[i] = a[i] + b[i];
}

Optimized IR:

Loop body:
  _38 = .SELECT_VL (ivtmp_36, POLY_INT_CST [4, 4]);  -> 
vsetvli a5,a2,e8,mf4,ta,ma
  ...
  vect__4.8_27 = .MASK_LEN_LOAD (vectp_a.6_29, 32B, { -1, ... }, _38, 0);-> 
vle32.v v2,0(a0)
  vect__6.11_20 = .MASK_LEN_LOAD (vectp_b.9_25, 32B, { -1, ... }, _38, 0);   -> 
vle32.v v1,0(a1)
  vect__7.12_19 = vect__6.11_20 + vect__4.8_27;  -> 
vsetvli a6,zero,e32,m1,ta,ma + vadd.vv v1,v1,v2
  .MASK_LEN_STORE (vectp_a.13_11, 32B, { -1, ... }, _38, 0, vect__7.12_19);  -> 
vsetvli zero,a5,e32,m1,ta,ma + vse32.v v1,0(a4)

We can see 2 redundant vsetvls inside the loop body due to AVL/VL toggling.
The AVL/VL toggling is because we are missing LEN information in simple 
PLUS_EXPR GIMPLE assignment:

vect__7.12_19 = vect__6.11_20 + vect__4.8_27;

GCC apply partial predicate load/store and un-predicated full vector operation 
on partial vectorization.
Such flow are used by all other targets like ARM SVE (RVV also uses such flow):

ARM SVE:
   
.L3:
ld1wz30.s, p7/z, [x0, x3, lsl 2]   -> predicated load
ld1wz31.s, p7/z, [x1, x3, lsl 2]   -> predicated load
add z31.s, z31.s, z30.s-> un-predicated add
st1wz31.s, p7, [x0, x3, lsl 2] -> predicated store

Such vectorization flow causes AVL/VL toggling on RVV so we need AVL 
propagation PASS for it.

Also, It's very unlikely that we can apply predicated operations on all 
vectorization for following reasons:

1. It's very heavy workload to support them on all vectorization and we don't 
see any benefits if we can handle that on targets backend.
2. Changing Loop vectorizer for it will make code base ugly and hard to 
maintain.
3. We will need so many patterns for all operations. Not only COND_LEN_ADD, 
COND_LEN_SUB, 
   We also need COND_LEN_EXTEND, , COND_LEN_CEIL, ... .. over 100+ 
patterns, unreasonable number of patterns.

To conclude, we prefer un-predicated operations here, and design a nice and 
clean AVL propagation PASS for it to elide the redundant vsetvls
due to AVL/VL toggling.

The second question is that why we separate a PASS called AVL propagation. Why 
not optimize it in VSETVL PASS (We definitetly can optimize AVL in VSETVL PASS)

Frankly, I was planning to address such issue in VSETVL PASS that's why we 
recently refactored VSETVL PASS. However, I changed my mind recently after 
several
experiments and tries.

The reasons as follows:

1. For code base management and maintainience. Current VSETVL PASS is 
complicated enough and aleady has enough aggressive and fancy optimizations 
which
   turns out it can always generate optimal codegen in most of the cases. It's 
not a good idea keep adding more features into VSETVL PASS to make VSETVL
 PASS become heavy and heavy again, then we will need to refactor it 
again in the future.
 Actuall, the VSETVL PASS is very stable and optimal after the recent 
refactoring. Hopefully, we should not change VSETVL PASS any more except the 
minor
 fixes.

2. vsetvl insertion (VSETVL PASS does this thing) and AVL propagation are 2 
different things,  I don't think we should fuse them into same PASS.

3. VSETVL PASS is an post-RA PASS, wheras AVL propagtion should be done before 
RA which can reduce register allocation.

4. This patch's AVL propagation PASS only does AVL propagation for RVV partial 
auto-vectorization situations.
   This patch's codes are only hundreds lines which is very managable and can 
be very easily extended features and enhancements.
 We can easily extend and enhance more AVL propagation in a clean and 
separate PASS in the future. (If we do it on VSETVL PASS, we will complicate 
 VSETVL PASS again which is already so complicated.) 

Here is an example to demonstrate more:

https://godbolt.org/z/bE86sv3q5

void foo2 (int *__restrict a,
  int *__restrict b,
  int *__restrict c,
  int *__restrict a2,
  int *__restrict b2,
  int *__restrict c2,
  int *__restrict a3,
  int *__restrict b3,
  int *__restrict c3,
  int *__restrict a4,
  int *__restrict b4,
  int *__restrict c4,
  int *__restrict a5,
  int *__restrict b5,
  int *__restrict c5,
  int n)
{
for (int i = 0; i < n; i++){
  a[i] = b[i] + c[i];
  b5[i] = b[i] + c[i];
  a2[i] = b2[i] + c2[i];
  a3[i] = b3[i] + c3[i];
  a4[i] = b4[i] + c4[i];
  a5[i] = a[i] + a4[i];
  a[i] = a5[i] + b5[i]+ a[i];

  a[i] = a[i] + c[i];
  b5[i] = a[i] +

Re: HELP: Will the reordering happen? Re: [V3][PATCH 0/3] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2023-10-26 Thread Martin Uecker

Am Mittwoch, dem 25.10.2023 um 15:32 -0700 schrieb Kees Cook:
> On Wed, Oct 25, 2023 at 10:27:41PM +, Qing Zhao wrote:
> > 
> > 
> > > On Oct 25, 2023, at 6:06 PM, Kees Cook  wrote:
> > > 
> > > On Wed, Oct 25, 2023 at 01:27:29PM +, Qing Zhao wrote:
> > > > A.  Add an additional argument, the size parameter,  to __bdos, 
> > > > A.1, during FE;
> > > > A.2, during gimplification phase;
> > > 
> > > I just wanted to clarify that this is all just an "internal" detail,
> > > yes?
> > 
> > YES!
> 
> Okay, I thought so, but I just wanted to double-check. :)
> 
> > > For example, the Linux kernel can still use __bdos() without knowing
> > > the count member ahead of time (otherwise it kind of defeats the purpose).
> > Don’t quite understand this, could you clarify? 
> 
> I was just trying to explain why a chance would be a problem. But it
> doesn't matter, so nevermind. :)
> 
> > (Anyway, the bottom line is no change to the user interface, we just 
> > discuss the internal implementation inside GCC) -:)
> 
> Great! I'll go back to lurking. :)
> 
> Thanks!
> 

While it is about the internal implementation, it would
potentially affect the semantics of the attribute:

This would work:

x->count = 10;
char *p = &x->buf;

but not this:

char *p = &x->buf;
x->count = 1;
p[10] = 1; // !

(because the pointer is passed around the
store to the counter)

and also here the second store is then irrelevant
for the access:

x->count = 10;
char* p = &x->buf;
...
x->count = 1; // somewhere else

p[9] = 1; // ok, because count matter when buf was accesssed.


IMHO this makes sense also from the user side and
are the desirable semantics we discussed before.

But can you take a look at this?


This should simulate it fairly well:
https://godbolt.org/z/xq89aM7Gr

(the call to the noinline function would go away,
but not necessarily its impact on optimization)

Martin

Re: Re: [PATCH V2] RISC-V: Add AVL propagation PASS for RVV auto-vectorization

2023-10-26 Thread juzhe.zh...@rivai.ai

Thanks Kito.

I have sent V3 with adapting testcases (2 additional dump FAILs detected by 
both Pan Li and Patrick).
No need to review.

I will wait for patrick is ok to ignore popcount FAILs for now then commit it.



juzhe.zh...@rivai.ai
 
From: Kito Cheng
Date: 2023-10-26 15:51
To: Juzhe-Zhong
CC: gcc-patches; kito.cheng; jeffreyalaw; rdapp.gcc; Patrick O'Neill
Subject: Re: [PATCH V2] RISC-V: Add AVL propagation PASS for RVV 
auto-vectorization
LGTM, Thanks, it's really awesome - the implementation is simpler than
I expected, it's another great improvement for RISC-V GCC!
 
Just make sure Patrick gives a green light on the testing before
committing the patch :)
 
 
 
 
On Wed, Oct 25, 2023 at 8:05 PM Juzhe-Zhong  wrote:
>
> This patch addresses the redundant AVL/VL toggling in RVV partial 
> auto-vectorization
> which is a known issue for a long time and I finally find the time to address 
> it.
>
> Consider a simple vector addition operation:
>
> https://godbolt.org/z/7hfGfEjW3
>
> void
> foo (int *__restrict a,
>  int *__restrict b,
>  int *__restrict n)
> {
>   for (int i = 0; i < n; i++)
>   a[i] = a[i] + b[i];
> }
>
> Optimized IR:
>
> Loop body:
>   _38 = .SELECT_VL (ivtmp_36, POLY_INT_CST [4, 4]);  
> -> vsetvli a5,a2,e8,mf4,ta,ma
>   ...
>   vect__4.8_27 = .MASK_LEN_LOAD (vectp_a.6_29, 32B, { -1, ... }, _38, 0);
> -> vle32.v v2,0(a0)
>   vect__6.11_20 = .MASK_LEN_LOAD (vectp_b.9_25, 32B, { -1, ... }, _38, 0);   
> -> vle32.v v1,0(a1)
>   vect__7.12_19 = vect__6.11_20 + vect__4.8_27;  
> -> vsetvli a6,zero,e32,m1,ta,ma + vadd.vv v1,v1,v2
>   .MASK_LEN_STORE (vectp_a.13_11, 32B, { -1, ... }, _38, 0, vect__7.12_19);  
> -> vsetvli zero,a5,e32,m1,ta,ma + vse32.v v1,0(a4)
>
> We can see 2 redundant vsetvls inside the loop body due to AVL/VL toggling.
> The AVL/VL toggling is because we are missing LEN information in simple 
> PLUS_EXPR GIMPLE assignment:
>
> vect__7.12_19 = vect__6.11_20 + vect__4.8_27;
>
> GCC apply partial predicate load/store and un-predicated full vector 
> operation on partial vectorization.
> Such flow are used by all other targets like ARM SVE (RVV also uses such 
> flow):
>
> ARM SVE:
>
> .L3:
> ld1wz30.s, p7/z, [x0, x3, lsl 2]   -> predicated load
> ld1wz31.s, p7/z, [x1, x3, lsl 2]   -> predicated load
> add z31.s, z31.s, z30.s-> un-predicated add
> st1wz31.s, p7, [x0, x3, lsl 2] -> predicated store
>
> Such vectorization flow causes AVL/VL toggling on RVV so we need AVL 
> propagation PASS for it.
>
> Also, It's very unlikely that we can apply predicated operations on all 
> vectorization for following reasons:
>
> 1. It's very heavy workload to support them on all vectorization and we don't 
> see any benefits if we can handle that on targets backend.
> 2. Changing Loop vectorizer for it will make code base ugly and hard to 
> maintain.
> 3. We will need so many patterns for all operations. Not only COND_LEN_ADD, 
> COND_LEN_SUB, 
>We also need COND_LEN_EXTEND, , COND_LEN_CEIL, ... .. over 100+ 
> patterns, unreasonable number of patterns.
>
> To conclude, we prefer un-predicated operations here, and design a nice and 
> clean AVL propagation PASS for it to elide the redundant vsetvls
> due to AVL/VL toggling.
>
> The second question is that why we separate a PASS called AVL propagation. 
> Why not optimize it in VSETVL PASS (We definitetly can optimize AVL in VSETVL 
> PASS)
>
> Frankly, I was planning to address such issue in VSETVL PASS that's why we 
> recently refactored VSETVL PASS. However, I changed my mind recently after 
> several
> experiments and tries.
>
> The reasons as follows:
>
> 1. For code base management and maintainience. Current VSETVL PASS is 
> complicated enough and aleady has enough aggressive and fancy optimizations 
> which
>turns out it can always generate optimal codegen in most of the cases. 
> It's not a good idea keep adding more features into VSETVL PASS to make VSETVL
>  PASS become heavy and heavy again, then we will need to refactor it 
> again in the future.
>  Actuall, the VSETVL PASS is very stable and optimal after the recent 
> refactoring. Hopefully, we should not change VSETVL PASS any more except the 
> minor
>  fixes.
>
> 2. vsetvl insertion (VSETVL PASS does this thing) and AVL propagation are 2 
> different things,  I don't think we should fuse them into same PASS.
>
> 3. VSETVL PASS is an post-RA PASS, wheras AVL propagtion should be done 
> before RA which can reduce register allocation.
>
> 4. This patch's AVL propagation PASS only does AVL propagation for RVV 
> partial auto-vectorization situations.
>This patch's codes are only hundreds lines which is very managable and can 
> be very easily extended features and enhancements.
>  We can easily extend and enhance more AVL propagation in a clean and 
> separate PASS in the future. (If we do i

Re: [PATCH v2 2/4] libgrust: Add libproc_macro and build system

2023-10-26 Thread Thomas Schwinge

Hi!

First, I've pushed into GCC upstream Git branch devel/rust/libgrust-v2
the "v2" libgrust changes as posted by Arthur, so that people can easily
test this before it getting into Git master branch.

I'll myself later try this for GCN and nvptx targets -- in their current
form where they don't support C++ (standard library), and in my hacky WIP
trees where C++ (standard library) is supported to some extent.  (This
should, roughly, match C++ functionality (not) provided by a number of
other GCC "embedded" targets.)


Then:

On 2023-10-25T13:06:46+0200, Arthur Cohen  wrote:
> From: Pierre-Emmanuel Patry 
>
> Add some dummy files in libproc_macro along with its build system.

I've not reviewed the build system in detail, just had a very quick look.

Three instances of 'librust'; should be 'libgrust':

configure.ac:AC_INIT([libgrust], version-unused,,librust)

configure.ac:AC_MSG_NOTICE([librust has been configured.])

Makefile.am:"TARGET_LIB_PATH_librust=$(TARGET_LIB_PATH_librust)" \

Compared to libgomp (which I'm reasonably familiar with), I found missing
in 'libgrust' at 'configure'-level:

  --enable-multilib   build many library versions (default)

  --disable-werrordisable building with -Werror

  --enable-symvers=STYLE  enables symbol versioning of the shared library
  [default=yes]

  --enable-cetenable Intel CET in target libraries 
[default=auto]

  --with-gcc-major-version-only
  use only GCC major number in filesystem paths

I can't tell off-hand whether all these are important, however.

Additionally, the new one that's being discussed in

'Update libgrust for upstream GCC commit 
6a6d3817afa02bbcd2388c8e005da6faf88932f1 "Config,Darwin: Allow for configuring 
Darwin to use embedded runpath"'.


Grüße
 Thomas


> libgrust/Changelog:
>
>   * Makefile.am: New file.
>   * configure.ac: New file.
>   * libproc_macro/Makefile.am: New file.
>   * libproc_macro/proc_macro.cc: New file.
>   * libproc_macro/proc_macro.h: New file.
>
> Signed-off-by: Pierre-Emmanuel Patry 
> ---
>  libgrust/Makefile.am |  68 
>  libgrust/configure.ac| 113 +++
>  libgrust/libproc_macro/Makefile.am   |  58 ++
>  libgrust/libproc_macro/proc_macro.cc |   7 ++
>  libgrust/libproc_macro/proc_macro.h  |   7 ++
>  5 files changed, 253 insertions(+)
>  create mode 100644 libgrust/Makefile.am
>  create mode 100644 libgrust/configure.ac
>  create mode 100644 libgrust/libproc_macro/Makefile.am
>  create mode 100644 libgrust/libproc_macro/proc_macro.cc
>  create mode 100644 libgrust/libproc_macro/proc_macro.h
>
> diff --git a/libgrust/Makefile.am b/libgrust/Makefile.am
> new file mode 100644
> index 000..8e5274922c5
> --- /dev/null
> +++ b/libgrust/Makefile.am
> @@ -0,0 +1,68 @@
> +AUTOMAKE_OPTIONS = 1.8 foreign
> +
> +SUFFIXES = .c .rs .def .o .lo .a
> +
> +ACLOCAL_AMFLAGS = -I . -I .. -I ../config
> +
> +AM_CFLAGS = -I $(srcdir)/../libgcc -I $(MULTIBUILDTOP)../../gcc/include
> +
> +TOP_GCCDIR := $(shell cd $(top_srcdir) && cd .. && pwd)
> +
> +GCC_DIR = $(TOP_GCCDIR)/gcc
> +RUST_SRC = $(GCC_DIR)/rust
> +
> +toolexeclibdir=@toolexeclibdir@
> +toolexecdir=@toolexecdir@
> +
> +SUBDIRS = libproc_macro
> +
> +RUST_BUILDDIR := $(shell pwd)
> +
> +# Work around what appears to be a GNU make bug handling MAKEFLAGS
> +# values defined in terms of make variables, as is the case for CC and
> +# friends when we are called from the top level Makefile.
> +AM_MAKEFLAGS = \
> +"GCC_DIR=$(GCC_DIR)" \
> +"RUST_SRC=$(RUST_SRC)" \
> + "AR_FLAGS=$(AR_FLAGS)" \
> + "CC_FOR_BUILD=$(CC_FOR_BUILD)" \
> + "CC_FOR_TARGET=$(CC_FOR_TARGET)" \
> + "RUST_FOR_TARGET=$(RUST_FOR_TARGET)" \
> + "CFLAGS=$(CFLAGS)" \
> + "CXXFLAGS=$(CXXFLAGS)" \
> + "CFLAGS_FOR_BUILD=$(CFLAGS_FOR_BUILD)" \
> + "CFLAGS_FOR_TARGET=$(CFLAGS_FOR_TARGET)" \
> + "INSTALL=$(INSTALL)" \
> + "INSTALL_DATA=$(INSTALL_DATA)" \
> + "INSTALL_PROGRAM=$(INSTALL_PROGRAM)" \
> + "INSTALL_SCRIPT=$(INSTALL_SCRIPT)" \
> + "LDFLAGS=$(LDFLAGS)" \
> + "LIBCFLAGS=$(LIBCFLAGS)" \
> + "LIBCFLAGS_FOR_TARGET=$(LIBCFLAGS_FOR_TARGET)" \
> + "MAKE=$(MAKE)" \
> + "MAKEINFO=$(MAKEINFO) $(MAKEINFOFLAGS)" \
> + "PICFLAG=$(PICFLAG)" \
> + "PICFLAG_FOR_TARGET=$(PICFLAG_FOR_TARGET)" \
> + "SHELL=$(SHELL)" \
> + "RUNTESTFLAGS=$(RUNTESTFLAGS)" \
> + "exec_prefix=$(exec_prefix)" \
> + "infodir=$(infodir)" \
> + "libdir=$(libdir)" \
> + "includedir=$(includedir)" \
> + "prefix=$(prefix)" \
> + "tooldir=$(tooldir)" \
> + "gxx_include_dir=$(gxx_include_dir)" \
> + "AR=$(AR)" \
> + "AS=$(AS)" \
> + "LD=$(LD)" \
> + "RANLIB=$(RANLIB)" \
> + "NM=$(NM)" \
> + "NM_FOR_BUILD=$(NM_FOR_BUILD)" \
> + "NM_FOR_TAR

Re: [PATCH V2 1/2] Pass type of comparison operands instead of comparison result to truth_type_for in build_vec_cmp.

2023-10-26 Thread Richard Biener

On Thu, Oct 26, 2023 at 3:15 AM liuhongt  wrote:
>
> >I think it's indeed on purpose that the result of v1 < v2 is a signed
> >integer vector type.
> >But build_vec_cmp should not use the truth type for the result but instead 
> >the
> >truth type for the comparison, so
>
> Change build_vec_cmp in both c/c++, also notice for jit part, it already uses
> type of comparison instead of the result.

OK if bootstrap/testing succeeds.

Thanks,
Richard.

> gcc/c/ChangeLog:
>
> * c-typeck.cc (build_vec_cmp): Pass type of arg0 to
> truth_type_for.
>
> gcc/cp/ChangeLog:
>
> * typeck.cc (build_vec_cmp): Pass type of arg0 to
> truth_type_for.
> ---
>  gcc/c/c-typeck.cc | 2 +-
>  gcc/cp/typeck.cc  | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
> index e55e887da14..41ee38368f2 100644
> --- a/gcc/c/c-typeck.cc
> +++ b/gcc/c/c-typeck.cc
> @@ -11982,7 +11982,7 @@ build_vec_cmp (tree_code code, tree type,
>  {
>tree zero_vec = build_zero_cst (type);
>tree minus_one_vec = build_minus_one_cst (type);
> -  tree cmp_type = truth_type_for (type);
> +  tree cmp_type = truth_type_for (TREE_TYPE (arg0));
>tree cmp = build2 (code, cmp_type, arg0, arg1);
>return build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
>  }
> diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
> index 8132bd7fccc..7b2ad51bde7 100644
> --- a/gcc/cp/typeck.cc
> +++ b/gcc/cp/typeck.cc
> @@ -4826,7 +4826,7 @@ build_vec_cmp (tree_code code, tree type,
>  {
>tree zero_vec = build_zero_cst (type);
>tree minus_one_vec = build_minus_one_cst (type);
> -  tree cmp_type = truth_type_for (type);
> +  tree cmp_type = truth_type_for (TREE_TYPE (arg0));
>tree cmp = build2 (code, cmp_type, arg0, arg1);
>return build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
>  }
> --
> 2.31.1
>

Re: [PATCH V2] RISC-V: Add AVL propagation PASS for RVV auto-vectorization

2023-10-26 Thread Robin Dapp

> I have sent V3 with adapting testcases (2 additional dump FAILs detected by 
> both Pan Li and Patrick).
> No need to review.
> 
> I will wait for patrick is ok to ignore popcount FAILs for now then commit it.

Just to confirm:  I can now also reproduce the popcount fail on my machine
without your patch.

Regards
 Robin

Re: [PATCH v2] VECT: Remove the type size restriction of vectorizer

2023-10-26 Thread Richard Biener

On Thu, Oct 26, 2023 at 4:18 AM  wrote:
>
> From: Pan Li 
>
> Update in v2:
>
> * Fix one ICE of type assertion.
> * Adjust some test cases for aarch64 sve and riscv vector.
>
> Original log:
>
> The vectoriable_call has one restriction of the size of data type.
> Aka DF to DI is allowed but SF to DI isn't. You may see below message
> when try to vectorize function call like lrintf.
>
> void
> test_lrintf (long *out, float *in, unsigned count)
> {
>   for (unsigned i = 0; i < count; i++)
> out[i] = __builtin_lrintf (in[i]);
> }
>
> lrintf.c:5:26: missed: couldn't vectorize loop
> lrintf.c:5:26: missed: not vectorized: unsupported data-type
>
> Then the standard name pattern like lrintmn2 cannot work for different
> data type size like SF => DI. This patch would like to remove this data
> type size check and unblock the standard name like lrintmn2.
>
> The below test are passed for this patch.
>
> * The x86 bootstrap and regression test.
> * The aarch64 regression test.
> * The risc-v regression tests.
>
> gcc/ChangeLog:
>
> * internal-fn.cc (expand_fn_using_insn): Add vector int assertion.
> * tree-vect-stmts.cc (vectorizable_call): Remove size check.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/sve/clrsb_1.c: Adjust checker.
> * gcc.target/aarch64/sve/clz_1.c: Ditto.
> * gcc.target/aarch64/sve/popcount_1.c: Ditto.
> * gcc.target/riscv/rvv/autovec/unop/popcount.c: Ditto.
>
> Signed-off-by: Pan Li 
> ---
>  gcc/internal-fn.cc  |  3 ++-
>  gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c  |  3 +--
>  gcc/testsuite/gcc.target/aarch64/sve/clz_1.c|  3 +--
>  gcc/testsuite/gcc.target/aarch64/sve/popcount_1.c   |  3 +--
>  .../gcc.target/riscv/rvv/autovec/unop/popcount.c|  2 +-
>  gcc/tree-vect-stmts.cc  | 13 -
>  6 files changed, 6 insertions(+), 21 deletions(-)
>
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index 61d5a9e4772..17c0f4c3805 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -281,7 +281,8 @@ expand_fn_using_insn (gcall *stmt, insn_code icode, 
> unsigned int noutputs,
> emit_move_insn (lhs_rtx, ops[0].value);
>else
> {
> - gcc_checking_assert (INTEGRAL_TYPE_P (TREE_TYPE (lhs)));
> + gcc_checking_assert (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
> +  || VECTOR_INTEGER_TYPE_P (TREE_TYPE (lhs)));

Can you explain why this is necessary?  In particular what is lhs_rtx
mode vs ops[0].value mode?

>   convert_move (lhs_rtx, ops[0].value, 0);

I'm not sure convert_move handles vector modes correctly.  Richard
probably added this code, CCed.

Richard.

> }
>  }
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c
> index bdc9856faaf..940d08bbc7b 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c
> @@ -18,5 +18,4 @@ clrsb_64 (unsigned int *restrict dst, uint64_t *restrict 
> src, int size)
>  }
>
>  /* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 1 } } */
> -/* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.d, p[0-7]/m, 
> z[0-9]+\.d\n} 2 } } */
> -/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, 
> z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.d, p[0-7]/m, 
> z[0-9]+\.d\n} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c
> index 0c7a4e6d768..58b8ff406d2 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c
> @@ -18,5 +18,4 @@ clz_64 (unsigned int *restrict dst, uint64_t *restrict src, 
> int size)
>  }
>
>  /* { dg-final { scan-assembler-times {\tclz\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 1 } } */
> -/* { dg-final { scan-assembler-times {\tclz\tz[0-9]+\.d, p[0-7]/m, 
> z[0-9]+\.d\n} 2 } } */
> -/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, 
> z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tclz\tz[0-9]+\.d, p[0-7]/m, 
> z[0-9]+\.d\n} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/popcount_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/popcount_1.c
> index dfb6f4ac7a5..0eba898307c 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/popcount_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/popcount_1.c
> @@ -18,5 +18,4 @@ popcount_64 (unsigned int *restrict dst, uint64_t *restrict 
> src, int size)
>  }
>
>  /* { dg-final { scan-assembler-times {\tcnt\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 1 } } */
> -/* { dg-final { scan-assembler-times {\tcnt\tz[0-9]+\.d, p[0-7]/m, 
> z[0-9]+\.d\n} 2 } } */
> -/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, 
> z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tcnt\tz[0-9]+\.d, p[0-7]/m, 
>

[PATCH] tree-optimization/109334: Improve computation for access attribute

2023-10-26 Thread Martin Uecker




Hi Sid and Jakub,

here is the patch discussed in PR 109334.

Martin



tree-optimization/109334: Improve computation for access attribute

The fix for PR104970 restricted size computations to the case
where the access attribute was specified explicitly (no VLA).
It also restricted it to void pointers or elements with constant
sizes.  The second restriction is enough to fix the original bug.
Revert the first change to again allow size computations for VLA
parameters and for VLA parameters together with an explicit access
attribute.

gcc/ChangeLog:

PR tree-optimization/109334
* tree-object-size.cc (parm_object_size): Allow size
computation for explicit access attributes.

gcc/testsuite/ChangeLog:

PR tree-optimization/109334
* gcc.dg/builtin-dynamic-object-size-20.c
(test_parmsz_simple3): Supported again.
(test_parmsz_external4): New test.
* gcc.dg/builtin-dynamic-object-size-20.c: New test.
* gcc.dg/pr104970.c: New test.

diff --git a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c 
b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c
index 6da04202ffe..07e3da6f254 100644
--- a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c
+++ b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c
@@ -455,7 +455,6 @@ test_parmsz_simple2 (size_t sz, char obj[])
   return __builtin_dynamic_object_size (obj, 0);
 }
 
-/* Implicitly constructed access attributes not supported yet.  */
 size_t
 __attribute__ ((noinline))
 test_parmsz_simple3 (size_t sz, char obj[sz])
@@ -527,6 +526,13 @@ test_parmsz_internal3 (size_t sz1, size_t sz2, double 
obj[sz1][sz2])
   return __builtin_dynamic_object_size (obj, 0);
 }
 
+size_t
+__attribute__ ((noinline))
+test_parmsz_internal4 (size_t sz1, size_t sz2, double obj[sz1 + 1][4])
+{
+  return __builtin_dynamic_object_size (obj, 0);
+}
+
 /* Loops.  */
 
 size_t
@@ -721,8 +727,8 @@ main (int argc, char **argv)
   if (test_parmsz_simple2 (__builtin_strlen (argv[0]) + 1, argv[0])
   != __builtin_strlen (argv[0]) + 1)
 FAIL ();
-  /* Only explicitly added access attributes are supported for now.  */
-  if (test_parmsz_simple3 (__builtin_strlen (argv[0]) + 1, argv[0]) != -1)
+  if (test_parmsz_simple3 (__builtin_strlen (argv[0]) + 1, argv[0]) 
+  != __builtin_strlen (argv[0]) + 1)
 FAIL ();
   int arr[42];
   if (test_parmsz_scaled (arr, 42) != sizeof (arr))
@@ -759,6 +765,8 @@ main (int argc, char **argv)
 FAIL ();
   if (test_parmsz_internal3 (4, 4, obj) != -1)
 FAIL ();
+  if (test_parmsz_internal4 (3, 4, obj) != -1)
+FAIL ();
   if (test_loop (arr, 42, 0, 32, 1) != 10 * sizeof (int))
 FAIL ();
   if (test_loop (arr, 42, 32, -1, -1) != 0)
diff --git a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-20.c 
b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-20.c
new file mode 100644
index 000..2c8e07dd98d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-20.c
@@ -0,0 +1,49 @@
+/* PR 109334 
+ * { dg-do run }
+ * { dg-options "-O1" } */
+
+
+[[gnu::noinline,gnu::noipa]]
+int f(int n, int buf[n])
+[[gnu::access(read_only, 2, 1)]]
+{
+return __builtin_dynamic_object_size(buf, 0);
+}
+
+[[gnu::noinline,gnu::noipa]]
+int g(int n, int buf[])
+[[gnu::access(read_only, 2, 1)]]
+{
+return __builtin_dynamic_object_size(buf, 0);
+}
+
+[[gnu::noinline,gnu::noipa]]
+int h(int n, int buf[n])
+{
+return __builtin_dynamic_object_size(buf, 0);
+}
+
+int dummy(int x) { return x + 1; }
+
+[[gnu::noinline,gnu::noipa]]
+int i(int n, int buf[dummy(n)])
+{
+return __builtin_dynamic_object_size(buf, 0);
+}
+
+int main()
+{
+int n = 10;
+int buf[n];
+if (n * sizeof(int) != f(n, buf))
+__builtin_abort();
+if (n * sizeof(int) != g(n, buf))
+__builtin_abort();
+if (n * sizeof(int) != h(n, buf))
+__builtin_abort();
+
+(void)i(n, buf);
+ 
+return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/pr104970.c b/gcc/testsuite/gcc.dg/pr104970.c
new file mode 100644
index 000..e24a7f22dfb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr104970.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -D_FORTIFY_SOURCE=2" } */
+
+__inline void
+memset2(void *__dest, int __ch, long __len) {
+  long __trans_tmp_1 = __builtin_dynamic_object_size(__dest, 0);
+  __builtin___memset_chk(__dest, __ch, __len, __trans_tmp_1);
+}
+
+void
+mleye(int l, double E[][l]) { memset2(E, 0, sizeof(double)); }
+
+
diff --git a/gcc/tree-object-size.cc b/gcc/tree-object-size.cc
index a62af050056..28f27adf9ca 100644
--- a/gcc/tree-object-size.cc
+++ b/gcc/tree-object-size.cc
@@ -1575,8 +1575,8 @@ parm_object_size (struct object_size_info *osi, tree var)
   tree typesize = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (parm)));
   tree sz = NULL_TREE;
 
-  /* If we have an explicit access attribute with a usable size argument... */
-  if (access &&

Re: Re: [PATCH V2] RISC-V: Add AVL propagation PASS for RVV auto-vectorization

2023-10-26 Thread juzhe.zh...@rivai.ai

Oh. It's surprising.
I think current RVV GCC is not stable and buggy so that different FAILs in 
different machines.

Currently, we have 2 middle-end bugs:

1. COND_LEN_XXX: 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111760 
2. Gather load bug:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111970 

I guess they are related to make RVV GCC unstable, so testing various in 
different machines.



juzhe.zh...@rivai.ai
 
From: Robin Dapp
Date: 2023-10-26 16:34
To: juzhe.zh...@rivai.ai; Kito.cheng
CC: rdapp.gcc; gcc-patches; kito.cheng; jeffreyalaw; Patrick O'Neill
Subject: Re: [PATCH V2] RISC-V: Add AVL propagation PASS for RVV 
auto-vectorization
> I have sent V3 with adapting testcases (2 additional dump FAILs detected by 
> both Pan Li and Patrick).
> No need to review.
> 
> I will wait for patrick is ok to ignore popcount FAILs for now then commit it.
 
Just to confirm:  I can now also reproduce the popcount fail on my machine
without your patch.
 
Regards
Robin

Re: [PATCH] internal-fn: Add VCOND_MASK_LEN.

2023-10-26 Thread Robin Dapp

> Yeah. I think Robin may need this :
> 
> TREE_CODE (else_val) == SSA_NAAME
>   && SSA_NAME_IS_DEFAULT_DEF (else_val)
>   && VAR_P (SSA_NAME_VAR (else_val))
> 
> to differentiate whether the ELSE VALUE is uninitialized SSA or not.

I think we are talking about a different simplification now.
This one we could still add as a match.pd pattern simplifying every
conditional operation with an undefined else value.

I just re-checked - without my pattern that turns
VCOND_MASK_LEN into VEC_COND there is only one additional fail.
(cond_widen_reduc-2.c where we scan for vfwreduc).
I guess I can just change the combine pattern to combine cond
as well as length masking (merge + if_then_else) when the else
value is similar in both.  Then we would avoid my dubious
simplification and still get rid of the execution failures.

Surely Richard is right in that we cannot "unconditionally" fold
away the length but my naive hunch is that we currently never
create situations where this really leads to errors.

Regards
 Robin

Re: HELP: Will the reordering happen? Re: [V3][PATCH 0/3] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2023-10-26 Thread Richard Biener

On Wed, Oct 25, 2023 at 8:16 PM Martin Uecker  wrote:
>
> Am Mittwoch, dem 25.10.2023 um 13:13 +0200 schrieb Richard Biener:
> >
> > > Am 25.10.2023 um 12:47 schrieb Martin Uecker :
> > >
> > > Am Mittwoch, dem 25.10.2023 um 06:25 -0400 schrieb Siddhesh Poyarekar:
> > > > > On 2023-10-25 04:16, Martin Uecker wrote:
> > > > > Am Mittwoch, dem 25.10.2023 um 08:43 +0200 schrieb Richard Biener:
> > > > > >
> > > > > > > Am 24.10.2023 um 22:38 schrieb Martin Uecker :
> > > > > > >
> > > > > > > Am Dienstag, dem 24.10.2023 um 20:30 + schrieb Qing Zhao:
> > > > > > > > Hi, Sid,
> > > > > > > >
> > > > > > > > Really appreciate for your example and detailed explanation. 
> > > > > > > > Very helpful.
> > > > > > > > I think that this example is an excellent example to show 
> > > > > > > > (almost) all the issues we need to consider.
> > > > > > > >
> > > > > > > > I slightly modified this example to make it to be compilable 
> > > > > > > > and run-able, as following:
> > > > > > > > (but I still cannot make the incorrect reordering or DSE 
> > > > > > > > happening, anyway, the potential reordering possibility is 
> > > > > > > > there…)
> > > > > > > >
> > > > > > > >  1 #include 
> > > > > > > >  2 struct A
> > > > > > > >  3 {
> > > > > > > >  4  size_t size;
> > > > > > > >  5  char buf[] __attribute__((counted_by(size)));
> > > > > > > >  6 };
> > > > > > > >  7
> > > > > > > >  8 static size_t
> > > > > > > >  9 get_size_from (void *ptr)
> > > > > > > > 10 {
> > > > > > > > 11  return __builtin_dynamic_object_size (ptr, 1);
> > > > > > > > 12 }
> > > > > > > > 13
> > > > > > > > 14 void
> > > > > > > > 15 foo (size_t sz)
> > > > > > > > 16 {
> > > > > > > > 17  struct A *obj = __builtin_malloc (sizeof(struct A) + sz * 
> > > > > > > > sizeof(char));
> > > > > > > > 18  obj->size = sz;
> > > > > > > > 19  obj->buf[0] = 2;
> > > > > > > > 20  __builtin_printf (“%d\n", get_size_from (obj->buf));
> > > > > > > > 21  return;
> > > > > > > > 22 }
> > > > > > > > 23
> > > > > > > > 24 int main ()
> > > > > > > > 25 {
> > > > > > > > 26  foo (20);
> > > > > > > > 27  return 0;
> > > > > > > > 28 }
> > > > > > > >
> > > >
> > > > 
> > > >
> > > > > > When it’s set I suppose.  Turn
> > > > > >
> > > > > > X.l = n;
> > > > > >
> > > > > > Into
> > > > > >
> > > > > > X.l = __builtin_with_size (x.buf, n);
> > > > >
> > > > > It would turn
> > > > >
> > > > > some_variable = (&) x.buf
> > > > >
> > > > > into
> > > > >
> > > > > some_variable = __builtin_with_size ( (&) x.buf. x.len)
> > > > >
> > > > >
> > > > > So the later access to x.buf and not the initialization
> > > > > of a member of the struct (which is too early).
> > > > >
> > > >
> > > > Hmm, so with Qing's example above, are you suggesting the transformation
> > > > be to foo like so:
> > > >
> > > > 14 void
> > > > 15 foo (size_t sz)
> > > > 16 {
> > > > 16.5  void * _1;
> > > > 17  struct A *obj = __builtin_malloc (sizeof(struct A) + sz * 
> > > > sizeof(char));
> > > > 18  obj->size = sz;
> > > > 19  obj->buf[0] = 2;
> > > > 19.5  _1 = __builtin_with_size (obj->buf, obj->size);
> > > > 20  __builtin_printf (“%d\n", get_size_from (_1));
> > > > 21  return;
> > > > 22 }
> > > >
> > > > If yes then this could indeed work.  I think I got thrown off by the
> > > > reference to __bdos.
> > >
> > > Yes. I think it is important not to evaluate the size at the
> > > access to buf and not the allocation, because the point is to
> > > recover it from the size member even when the compiler can't
> > > see the original allocation.
> >
> > But if the access is through a pointer without the attribute visible
> > even the Frontend cannot recover?
>
> Yes, if the access is using a struct-with-FAM without the attribute
> the FE would not be insert the builtin.  BDOS could potentially
> still see the original allocation but if it doesn't, then there is
> no information.
>
> > We’d need to force type correctness and give up on indirecting
> > through an int * when it can refer to two diffenent container types.
> > The best we can do I think is mark allocation sites and hope for
> > some basic code hygiene (not clobbering size or array pointer
> > through pointers without the appropriately attributed type)
>
> I am do not fully understand what you are referring to.

struct A { int n; int data[n]; };
struct B { long n; int data[n]; };

int *p = flag ? a->data : b->data;

access *p;

Since we need to allow interoperability of pointers (a->data is
convertible to a non-fat pointer of type int *) this leaves us with
ambiguity we need to conservatively handle to avoid false positives.

We _might_ want to diagnose decay of a->data to int *, but IIRC
there's no way (or proposal) to allow declaring a corresponding
fat pointer, so it's not a good designed feature.

Having __builtin_with_size at allocation would possibly make
the BOS use-def walk discover both objects.  I think you can't
insert __builtin_with_size at the access to *p, but in practice
that would be very much nee

Re: [PATCH] libcpp: Improve the diagnostic for poisoned identifiers [PR36887]

2023-10-26 Thread Christophe Lyon

Hi!

On Wed, 20 Sept 2023 at 06:12, Lewis Hyatt  wrote:
>
> Hello-
>
> This patch implements the PR's request to add more information to the
> diagnostic issued for using a poisoned identifier. Bootstrapped + regtested
> all languages on x86-64 Linux. Does it look OK please? Thanks!
>
> -Lewis
>
> -- >8 --
>
> The PR requests an enhancement to the diagnostic issued for the use of a
> poisoned identifier. Currently, we show the location of the usage, but not
> the location which requested the poisoning, which would be helpful for the
> user if the decision to poison an identifier was made externally, such as
> in a library header.
>
> In order to output this information, we need to remember a location_t for
> each identifier that has been poisoned, and that data needs to be preserved
> as well in a PCH. One option would be to add a field to struct cpp_hashnode,
> but there is no convenient place to add it without increasing the size of
> the struct for all identifiers. Given this facility will be needed rarely,
> it seemed better to add a second hash map, which is handled PCH-wise the
> same as the current one in gcc/stringpool.cc. This hash map associates a new
> struct cpp_hashnode_extra with each identifier that needs one. Currently
> that struct only contains the new location_t, but it could be extended in
> the future if there is other ancillary data that may be convenient to put
> there for other purposes.
>
> libcpp/ChangeLog:
>
> PR preprocessor/36887
> * directives.cc (do_pragma_poison): Store in the extra hash map the
> location from which an identifier has been poisoned.
> * lex.cc (identifier_diagnostics_on_lex): When issuing a diagnostic
> for the use of a poisoned identifier, also add a note indicating the
> location from which it was poisoned.
> * identifiers.cc (alloc_node): Convert to template function.
> (_cpp_init_hashtable): Handle the new extra hash map.
> (_cpp_destroy_hashtable): Likewise.
> * include/cpplib.h (struct cpp_hashnode_extra): New struct.
> (cpp_create_reader): Update prototype to...
> * init.cc (cpp_create_reader): ...accept an argument for the extra
> hash table and pass it to _cpp_init_hashtable.
> * include/symtab.h (ht_lookup): New overload for convenience.
> * internal.h (struct cpp_reader): Add EXTRA_HASH_TABLE member.
> (_cpp_init_hashtable): Adjust prototype.
>
> gcc/c-family/ChangeLog:
>
> PR preprocessor/36887
> * c-opts.cc (c_common_init_options): Pass new extra hash map
> argument to cpp_create_reader().
>
> gcc/ChangeLog:
>
> PR preprocessor/36887
> * toplev.h (ident_hash_extra): Declare...
> * stringpool.cc (ident_hash_extra): ...this new global variable.
> (init_stringpool): Handle ident_hash_extra as well as ident_hash.
> (ggc_mark_stringpool): Likewise.
> (ggc_purge_stringpool): Likewise.
> (struct string_pool_data_extra): New struct.
> (spd2): New GC root variable.
> (gt_pch_save_stringpool): Use spd2 to handle ident_hash_extra,
> analogous to how spd is used to handle ident_hash.
> (gt_pch_restore_stringpool): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> PR preprocessor/36887
> * c-c++-common/cpp/diagnostic-poison.c: New test.
> * g++.dg/pch/pr36887.C: New test.
> * g++.dg/pch/pr36887.Hs: New test.

We have noticed that the new tests fail on aarch64 with:
.../aarch64-unknown-linux-gnu/libc/usr/lib/crt1.o: in function `_start':
.../sysdeps/aarch64/start.S:110:(.text+0x38): undefined reference to `main'

Looking at the test, I'd say it lacks a dg-do compile (to avoid
linking), but how does it work on other targets?

Thanks,

Christophe

> ---
>  libcpp/directives.cc  |  3 ++
>  libcpp/identifiers.cc | 42 +++--
>  libcpp/include/cpplib.h   | 21 ++---
>  libcpp/include/symtab.h   |  6 +++
>  libcpp/init.cc|  4 +-
>  libcpp/internal.h |  8 +++-
>  libcpp/lex.cc | 10 -
>  gcc/c-family/c-opts.cc|  2 +-
>  gcc/stringpool.cc | 45 +++
>  gcc/toplev.h  |  3 +-
>  .../c-c++-common/cpp/diagnostic-poison.c  | 13 ++
>  gcc/testsuite/g++.dg/pch/pr36887.C|  3 ++
>  gcc/testsuite/g++.dg/pch/pr36887.Hs   |  1 +
>  13 files changed, 134 insertions(+), 27 deletions(-)
>  create mode 100644 gcc/testsuite/c-c++-common/cpp/diagnostic-poison.c
>  create mode 100644 gcc/testsuite/g++.dg/pch/pr36887.C
>  create mode 100644 gcc/testsuite/g++.dg/pch/pr36887.Hs
>
> diff --git a/libcpp/directives.cc b/libcpp/directives.cc
> index ee5419d1f40..c5c938fda1d 100644
> --- a/libcpp/directives.cc
> +++ b

Re: HELP: Will the reordering happen? Re: [V3][PATCH 0/3] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2023-10-26 Thread Richard Biener

On Thu, Oct 26, 2023 at 7:22 AM Jakub Jelinek  wrote:
>
> On Wed, Oct 25, 2023 at 07:03:43PM +, Qing Zhao wrote:
> > For the code generation impact:
> >
> > turning the original  x.buf
> > to a builtin function call
> > __builtin_with_access_and_size(x,buf, x.L,-1)
> >
> > might inhibit some optimizations from happening before the builtin is
> > evaluated into object size info (phase  .objsz1).  I guess there might be
> > some performance impact.
> >
> > However, if we mark this builtin as PURE, NOTRROW, etc, then the negative
> > performance impact will be reduced to minimum?
>
> You can't drop it during objsz1 pass though, otherwise __bdos wouldn't
> be able to figure out the dynamic sizes in case of normal (non-early)
> inlining - caller takes address of a counted_by array, passes it down
> to callee which is only inlined late and uses __bdos, or callee takes address
> and returns it and caller uses __bdos, etc. - so it would need to be objsz2.
>
> And while the builtin (or if it is an internal detail rather than user
> accessible builtin an internal function) could be even const/nothrow/leaf if
> the arguments contain the loads from the structure 2 fields, I'm afraid it
> will still have huge code generation impact, prevent tons of pre-IPA
> optimizations.  And it will need some work to handle it properly during
> inlining heuristics, because in GIMPLE the COMPONENT_REF loads aren't gimple
> values, so it wouldn't be just the builtin/internal-fn call to be ignored,
> but also the count load from memory.

I think we want to track the value, not the "memory" in the builtin call,
so GIMPLE would be

 _1 = x.L;
 .. = __builtin_with_access_and_size (&x.buf, _1, -1);

also please make sure to use an internal function for
__builtin_with_access_and_size,
I don't think we want to expose this to users - it's an implementation detail.

Richard.

>
> Jakub
>

RE: [PATCH-1v4, expand] Enable vector mode for compare_by_pieces [PR111449]

2023-10-26 Thread Jiang, Haochen

> -Original Message-
> From: Jiang, Haochen
> Sent: Wednesday, October 25, 2023 4:47 PM
> To: Richard Sandiford ; HAO CHEN GUI
> 
> Cc: gcc-patches 
> Subject: RE: [PATCH-1v4, expand] Enable vector mode for compare_by_pieces
> [PR111449]
> 
> > -Original Message-
> > From: Richard Sandiford 
> > Sent: Wednesday, October 25, 2023 4:40 PM
> > To: HAO CHEN GUI 
> > Cc: Jiang, Haochen ; gcc-patches  > patc...@gcc.gnu.org>
> > Subject: Re: [PATCH-1v4, expand] Enable vector mode for
> > compare_by_pieces [PR111449]
> >
> > HAO CHEN GUI  writes:
> > > Hi Haochen,
> > >   The regression cases are caused by "targetm.scalar_mode_supported_p"
> > > I added for scalar mode checking. XImode, OImode and TImode (with
> > > -m32) are not enabled in ix86_scalar_mode_supported_p. So they're
> > > excluded from by pieces operations on i386.
> > >
> > >   The original code doesn't do a check for scalar modes. I think it
> > > might be incorrect as not all scalar modes support move and compare
> optabs. (e.g.
> > > TImode with -m32 on rs6000).
> > >
> > >   I drafted a new patch to manually check optabs for scalar mode.
> > > Now both vector and scalar modes are checked for optabs.
> > >
> > >   I did a simple test. All former regression cases are back. Could
> > > you help do a full regression test? I am worry about the coverage of my CI
> system.
> 
> Thanks for that. I am running the regression test now.

The patch works. Thanks a lot!

Sorry for the delay since my CI accidentally crashed.

Thx,
Haochen

> 
> Thx,
> Haochen
> 
> >
> > Thanks for the quick fix.  The patch LGTM FWIW.  Just a small
> > suggestion for the function name:
> >
> > >
> > > Thanks
> > > Gui Haochen
> > >
> > > patch.diff
> > > diff --git a/gcc/expr.cc b/gcc/expr.cc index
> > > 7aac575eff8..2af9fcbed18
> > > 100644
> > > --- a/gcc/expr.cc
> > > +++ b/gcc/expr.cc
> > > @@ -1000,18 +1000,21 @@ can_use_qi_vectors (by_pieces_operation
> op)
> > >  /* Return true if optabs exists for the mode and certain by pieces
> > > operations.  */
> > >  static bool
> > > -qi_vector_mode_supported_p (fixed_size_mode mode,
> > by_pieces_operation
> > > op)
> > > +mode_supported_p (fixed_size_mode mode, by_pieces_operation op)
> >
> > Might be worth calling this something more specific, such as
> > by_pieces_mode_supported_p.
> >
> > Otherwise the patch is OK for trunk if it passes the x86 testing.
> >
> > Thanks,
> > Richard
> >
> > >  {
> > > +  if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
> > > +return false;
> > > +
> > >if ((op == SET_BY_PIECES || op == CLEAR_BY_PIECES)
> > > -  && optab_handler (vec_duplicate_optab, mode) !=
> CODE_FOR_nothing)
> > > -return true;
> > > +  && VECTOR_MODE_P (mode)
> > > +  && optab_handler (vec_duplicate_optab, mode) ==
> > CODE_FOR_nothing)
> > > +return false;
> > >
> > >if (op == COMPARE_BY_PIECES
> > > -  && optab_handler (mov_optab, mode) != CODE_FOR_nothing
> > > -  && can_compare_p (EQ, mode, ccp_jump))
> > > -return true;
> > > +  && !can_compare_p (EQ, mode, ccp_jump))
> > > +return false;
> > >
> > > -  return false;
> > > +  return true;
> > >  }
> > >
> > >  /* Return the widest mode that can be used to perform part of an @@
> > > -1035,7 +1038,7 @@ widest_fixed_size_mode_for_size (unsigned int
> > > size,
> > by_pieces_operation op)
> > > {
> > >   if (GET_MODE_SIZE (candidate) >= size)
> > > break;
> > > - if (qi_vector_mode_supported_p (candidate, op))
> > > + if (mode_supported_p (candidate, op))
> > > result = candidate;
> > > }
> > >
> > > @@ -1049,7 +1052,7 @@ widest_fixed_size_mode_for_size (unsigned int
> > size, by_pieces_operation op)
> > >  {
> > >mode = tmode.require ();
> > >if (GET_MODE_SIZE (mode) < size
> > > -   && targetm.scalar_mode_supported_p (mode))
> > > +   && mode_supported_p (mode, op))
> > >result = mode;
> > >  }
> > >
> > > @@ -1454,7 +1457,7 @@
> > op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size)
> > > break;
> > >
> > >   if (GET_MODE_SIZE (candidate) >= size
> > > - && qi_vector_mode_supported_p (candidate, m_op))
> > > + && mode_supported_p (candidate, m_op))
> > > return candidate;
> > > }
> > >  }

Re: [PATCH v2] bpf: Improvements in CO-RE builtins implementation.

2023-10-26 Thread Cupertino Miranda


Hi David,

Please find the new version inline right after the inline reply.

>> gcc/ChangeLog:
>> * config/bpf/bpf-passes.def (pass_lower_bpf_core): Added pass.
>
> It may only be due to how the patch is formatted in the attachment
> (everything above the first diff seems to be indented?), but each entry
> here should start with a tab rather than spaces.
>
> Please double-check with contrib/gcc-changelog/git_check_commit.py, it
> will complain if the indentation is wrong.
It did pass the check. I think it might be because of inline attachment.

>   * config/bpf/core-builtins.cc (cr_builtins, is_attr_preserve_access)
>   (core_field_info, bpf_core_get_index): Changed to do blah.
>
> Same thing about the multi-line entry within a single () pair.
>
Corrected!

>> +/* Declaration of target-specific passes for eBPF.
>> +   Copyright (C) 2021-2023 Free Software Foundation, Inc.
>
> This file is new, so just 2023 no?
Yes !

>> -  const char * section_name;
>> +  const char   section_name;
>
> Why this change from char* to char?
>
> This change doesn't make sense given the expressions assigned to
> the variable.  In fact, this does not compile.
>
It was an editor mistake ... caused by the editor sitting in a chair. ;-)
Apologies for this mistake and thanks for catching it.

Thanks,
Cupertino

commit 687b67d82c7d8c6cf5b0e3a9dc61fd4f1e1a1fbb
Author: Cupertino Miranda 
Date:   Tue Aug 8 09:22:41 2023 +0100

bpf: Improvements in CO-RE builtins implementation.

This patch moved the processing of attribute preserve_access_index to
its own independent pass in a gimple lowering pass.
This approach is more consistent with the implementation of the CO-RE
builtins when used explicitly in the code.  The attributed type accesses
are now early converted to __builtin_core_reloc builtin instead of being
kept as an expression in code through out all of the middle-end.
This disables the compiler to optimize out or manipulate the expression
using the local defined type, instead of assuming nothing is known about
this expression, as it should be the case in all of the CO-RE
relocations.

In the process, also the __builtin_preserve_access_index has been
improved to generate code for more complex expressions that would
require more then one CO-RE relocation.
This turned out to be a requirement, since bpf-next selftests would rely on
loop unrolling in order to convert an undefined index array access into a
defined one. This seemed extreme to expect for the unroll to happen, and for
that reason GCC still generates correct code in such scenarios, even when index
access is never predictable or unrolling does not occur.

gcc/ChangeLog:
* config/bpf/bpf-passes.def (pass_lower_bpf_core): Added pass.
* config/bpf/bpf-protos.h: Added prototype for new pass.
* config/bpf/bpf.cc (bpf_const_not_ok_for_debug_p): New function.
* config/bpf/bpf.md (mov_reloc_core): Prefixed
name with '*'.
* config/bpf/core-builtins.cc (cr_builtins) Added access_node to
struct.
(is_attr_preserve_access): Improved check.
(core_field_info): Make use of root_for_core_field_info
function.
(process_field_expr): Adapted to new functions.
(pack_type): Small improvement.
(bpf_handle_plugin_finish_type): Adapted to GTY(()).
(bpf_init_core_builtins): Changed to new function names.
(construct_builtin_core_reloc): Improved implementation.
(bpf_resolve_overloaded_core_builtin): Changed how
__builtin_preserve_access_index is converted.
(compute_field_expr): Corrected implementation. Added
access_node argument.
(bpf_core_get_index): Added valid argument.
(root_for_core_field_info, pack_field_expr)
(core_expr_with_field_expr_plus_base, make_core_safe_access_index)
(replace_core_access_index_comp_expr, maybe_get_base_for_field_expr)
(core_access_clean, core_is_access_index, core_mark_as_access_index)
(make_gimple_core_safe_access_index, execute_lower_bpf_core)
(make_pass_lower_bpf_core): Added functions.
(pass_data_lower_bpf_core): New pass struct.
(pass_lower_bpf_core): New gimple_opt_pass class.
(pack_field_expr_for_preserve_field)
(bpf_replace_core_move_operands): Removed function.
(bpf_enum_value_kind): Added GTY(()).
* config/bpf/core-builtins.h (bpf_field_info_kind, bpf_type_id_kind)
(bpf_type_info_kind, bpf_enum_value_kind): New enum.
* config/bpf/t-bpf: Added pass bpf-passes.def to PASSES_EXTRA.

gcc/testsuite/ChangeLog:
* gcc.target/bpf/core-attr-5.c: New test.
* gcc.target/bpf/core-attr-6.c: New test.
* gcc.target/bp

Re: HELP: Will the reordering happen? Re: [V3][PATCH 0/3] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2023-10-26 Thread Martin Uecker

Am Donnerstag, dem 26.10.2023 um 10:45 +0200 schrieb Richard Biener:
> On Wed, Oct 25, 2023 at 8:16 PM Martin Uecker  wrote:
> > 
> > Am Mittwoch, dem 25.10.2023 um 13:13 +0200 schrieb Richard Biener:
> > > 
> > > > Am 25.10.2023 um 12:47 schrieb Martin Uecker :
> > > > 
> > > > Am Mittwoch, dem 25.10.2023 um 06:25 -0400 schrieb Siddhesh Poyarekar:
> > > > > > On 2023-10-25 04:16, Martin Uecker wrote:
> > > > > > Am Mittwoch, dem 25.10.2023 um 08:43 +0200 schrieb Richard Biener:
> > > > > > > 
> > > > > > > > Am 24.10.2023 um 22:38 schrieb Martin Uecker :
> > > > > > > > 
> > > > > > > > Am Dienstag, dem 24.10.2023 um 20:30 + schrieb Qing Zhao:
> > > > > > > > > Hi, Sid,
> > > > > > > > > 
> > > > > > > > > Really appreciate for your example and detailed explanation. 
> > > > > > > > > Very helpful.
> > > > > > > > > I think that this example is an excellent example to show 
> > > > > > > > > (almost) all the issues we need to consider.
> > > > > > > > > 
> > > > > > > > > I slightly modified this example to make it to be compilable 
> > > > > > > > > and run-able, as following:
> > > > > > > > > (but I still cannot make the incorrect reordering or DSE 
> > > > > > > > > happening, anyway, the potential reordering possibility is 
> > > > > > > > > there…)
> > > > > > > > > 
> > > > > > > > >  1 #include 
> > > > > > > > >  2 struct A
> > > > > > > > >  3 {
> > > > > > > > >  4  size_t size;
> > > > > > > > >  5  char buf[] __attribute__((counted_by(size)));
> > > > > > > > >  6 };
> > > > > > > > >  7
> > > > > > > > >  8 static size_t
> > > > > > > > >  9 get_size_from (void *ptr)
> > > > > > > > > 10 {
> > > > > > > > > 11  return __builtin_dynamic_object_size (ptr, 1);
> > > > > > > > > 12 }
> > > > > > > > > 13
> > > > > > > > > 14 void
> > > > > > > > > 15 foo (size_t sz)
> > > > > > > > > 16 {
> > > > > > > > > 17  struct A *obj = __builtin_malloc (sizeof(struct A) + sz * 
> > > > > > > > > sizeof(char));
> > > > > > > > > 18  obj->size = sz;
> > > > > > > > > 19  obj->buf[0] = 2;
> > > > > > > > > 20  __builtin_printf (“%d\n", get_size_from (obj->buf));
> > > > > > > > > 21  return;
> > > > > > > > > 22 }
> > > > > > > > > 23
> > > > > > > > > 24 int main ()
> > > > > > > > > 25 {
> > > > > > > > > 26  foo (20);
> > > > > > > > > 27  return 0;
> > > > > > > > > 28 }
> > > > > > > > > 
> > > > > 
> > > > > 
> > > > > 
> > > > > > > When it’s set I suppose.  Turn
> > > > > > > 
> > > > > > > X.l = n;
> > > > > > > 
> > > > > > > Into
> > > > > > > 
> > > > > > > X.l = __builtin_with_size (x.buf, n);
> > > > > > 
> > > > > > It would turn
> > > > > > 
> > > > > > some_variable = (&) x.buf
> > > > > > 
> > > > > > into
> > > > > > 
> > > > > > some_variable = __builtin_with_size ( (&) x.buf. x.len)
> > > > > > 
> > > > > > 
> > > > > > So the later access to x.buf and not the initialization
> > > > > > of a member of the struct (which is too early).
> > > > > > 
> > > > > 
> > > > > Hmm, so with Qing's example above, are you suggesting the 
> > > > > transformation
> > > > > be to foo like so:
> > > > > 
> > > > > 14 void
> > > > > 15 foo (size_t sz)
> > > > > 16 {
> > > > > 16.5  void * _1;
> > > > > 17  struct A *obj = __builtin_malloc (sizeof(struct A) + sz * 
> > > > > sizeof(char));
> > > > > 18  obj->size = sz;
> > > > > 19  obj->buf[0] = 2;
> > > > > 19.5  _1 = __builtin_with_size (obj->buf, obj->size);
> > > > > 20  __builtin_printf (“%d\n", get_size_from (_1));
> > > > > 21  return;
> > > > > 22 }
> > > > > 
> > > > > If yes then this could indeed work.  I think I got thrown off by the
> > > > > reference to __bdos.
> > > > 
> > > > Yes. I think it is important not to evaluate the size at the
> > > > access to buf and not the allocation, because the point is to
> > > > recover it from the size member even when the compiler can't
> > > > see the original allocation.
> > > 
> > > But if the access is through a pointer without the attribute visible
> > > even the Frontend cannot recover?
> > 
> > Yes, if the access is using a struct-with-FAM without the attribute
> > the FE would not be insert the builtin.  BDOS could potentially
> > still see the original allocation but if it doesn't, then there is
> > no information.
> > 
> > > We’d need to force type correctness and give up on indirecting
> > > through an int * when it can refer to two diffenent container types.
> > > The best we can do I think is mark allocation sites and hope for
> > > some basic code hygiene (not clobbering size or array pointer
> > > through pointers without the appropriately attributed type)
> > 
> > I am do not fully understand what you are referring to.
> 
> struct A { int n; int data[n]; };
> struct B { long n; int data[n]; };
> 
> int *p = flag ? a->data : b->data;
> 
> access *p;
> 
> Since we need to allow interoperability of pointers (a->data is
> convertible to a non-fat pointer of type int *) this leaves us with
> ambiguity we need to conservatively handle to avoid false positives.

For BDOS, I would expect

Re: [x86 PATCH] PR target/110511: Fix reg allocation for widening multiplications.

2023-10-26 Thread Uros Bizjak

On Wed, Oct 25, 2023 at 4:41 PM Roger Sayle  wrote:
>
> Hi Uros,
>
> I've tried your suggestions to see what would happen.
> Alas, allowing both operands to (i386's) widening multiplications
> to be  nonimmediate_operand results in 90 additional testsuite
> unexpected failures", and 41 unresolved testcase, around things
> like:
>
> gcc.c-torture/compile/di.c:6:1: error: unrecognizable insn:
> (insn 14 13 15 2 (parallel [
> (set (reg:DI 98 [ _3 ])
> (mult:DI (zero_extend:DI (mem/c:SI (plus:SI (reg/f:SI 93 
> virtual-stack-vars)
> (const_int -8 [0xfff8])) [1 a+0 
> S4 A64]))
> (zero_extend:DI (mem/c:SI (plus:SI (reg/f:SI 93 
> virtual-stack-vars)
> (const_int -16 [0xfff0])) [1 b+0 
> S4 A64]
> (clobber (reg:CC 17 flags))
> ]) "gcc.c-torture/compile/di.c":5:12 -1
>  (nil))
> during RTL pass: vregs
> gcc.c-torture/compile/di.c:6:1: internal compiler error: in extract_insn, at 
> recog.cc:2791
>
> In my experiments, I've used nonimmediate_operand instead of general_operand,
> as a zero_extend of an immediate_operand, like const_int, would be 
> non-canonical.
>
> In short, it's ok (common) for '%' to apply to operands with different 
> predicates;
> reload will only swap things if the operand's predicates/constraints remain 
> consistent.
> For example, see i386.c's *add_1 pattern.  And as shown above it can't
> be left to (until) reload to decide which "mem" gets loaded into a register 
> (which
> would be nice), as some passes before reload check both predicates and 
> constraints.
>
> My original patch fixes PR 110511, using the same peephole2 idiom as already
> used elsewhere in i386.md.  Ok for mainline?

Thanks for the explanation. The patch is OK.

> > -Original Message-
> > From: Uros Bizjak 
> > Sent: 19 October 2023 18:02
> > To: Roger Sayle 
> > Cc: gcc-patches@gcc.gnu.org
> > Subject: Re: [x86 PATCH] PR target/110511: Fix reg allocation for widening
> > multiplications.
> >
> > On Tue, Oct 17, 2023 at 9:05 PM Roger Sayle 
> > wrote:
> > >
> > >
> > > This patch contains clean-ups of the widening multiplication patterns
> > > in i386.md, and provides variants of the existing highpart
> > > multiplication
> > > peephole2 transformations (that tidy up register allocation after
> > > reload), and thereby fixes PR target/110511, which is a superfluous
> > > move instruction.
> > >
> > > For the new test case, compiled on x86_64 with -O2.
> > >
> > > Before:
> > > mulx64: movabsq $-7046029254386353131, %rcx
> > > movq%rcx, %rax
> > > mulq%rdi
> > > xorq%rdx, %rax
> > > ret
> > >
> > > After:
> > > mulx64: movabsq $-7046029254386353131, %rax
> > > mulq%rdi
> > > xorq%rdx, %rax
> > > ret
> > >
> > > The clean-ups are (i) that operand 1 is consistently made
> > > register_operand and operand 2 becomes nonimmediate_operand, so that
> > > predicates match the constraints, (ii) the representation of the BMI2
> > > mulx instruction is updated to use the new umul_highpart RTX, and
> > > (iii) because operands
> > > 0 and 1 have different modes in widening multiplications, "a" is a
> > > more appropriate constraint than "0" (which avoids spills/reloads
> > > containing SUBREGs).  The new peephole2 transformations are based upon
> > > those at around line 9951 of i386.md, that begins with the comment ;;
> > > Highpart multiplication peephole2s to tweak register allocation.
> > > ;; mov imm,%rdx; mov %rdi,%rax; imulq %rdx  ->  mov imm,%rax; imulq
> > > %rdi
> > >
> > >
> > > This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> > > and make -k check, both with and without --target_board=unix{-m32}
> > > with no new failures.  Ok for mainline?
> > >
> > >
> > > 2023-10-17  Roger Sayle  
> > >
> > > gcc/ChangeLog
> > > PR target/110511
> > > * config/i386/i386.md (mul3): Make operands 1 and
> > > 2 take "regiser_operand" and "nonimmediate_operand" respectively.
> > > (mulqihi3): Likewise.
> > > (*bmi2_umul3_1): Operand 2 needs to be register_operand
> > > matching the %d constraint.  Use umul_highpart RTX to represent
> > > the highpart multiplication.
> > > (*umul3_1):  Operand 2 should use regiser_operand
> > > predicate, and "a" rather than "0" as operands 0 and 2 have
> > > different modes.
> > > (define_split): For mul to mulx conversion, use the new
> > > umul_highpart RTX representation.
> > > (*mul3_1):  Operand 1 should be register_operand
> > > and the constraint %a as operands 0 and 1 have different modes.
> > > (*mulqihi3_1): Operand 1 should be register_operand matching
> > > the constraint %0.
> > > (define_peephole2): Providing widening multiplication variants
> > > of the peephole2s that tweak highpa

Re: [PATCH] match: Simplify `a != C1 ? abs(a) : C2` when C2 == abs(C1) [PR111957]

2023-10-26 Thread Richard Biener

On Wed, Oct 25, 2023 at 5:37 AM Andrew Pinski  wrote:
>
> This adds a match pattern for `a != C1 ? abs(a) : C2` which gets simplified
> to `abs(a)`. if C1 was originally *_MIN then change it over to use absu 
> instead
> of abs.
>
> Bootstrapped and tested on x86_64-linux-gnu with no regressions.
>
> PR tree-optimization/111957
>
> gcc/ChangeLog:
>
> * match.pd (`a != C1 ? abs(a) : C2`): New pattern.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/phi-opt-40.c: New test.
> ---
>  gcc/match.pd   | 10 +
>  gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c | 25 ++
>  2 files changed, 35 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 5df04ebba77..370ee35de52 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -5622,6 +5622,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (if (wi::eq_p (wi::bit_not (wi::to_wide (@1)), wi::to_wide (@2)))
>@3))
>
> +/* X != C1 ? abs(X) : C2 simplifies to abs(x) when abs(C1) == C2. */
> +(for op (abs absu)
> + (simplify
> +  (cond (ne @0 INTEGER_CST@1) (op@3 @0) INTEGER_CST@2)
> +  (if (wi::abs (wi::to_wide (@1)) == wi::to_wide (@2))

Why not use

  (cond (ne @0 INTEGER_CST@1) (op@3 @0) @1)

?  OK with that change.

Richard.

> +   (if (op != ABSU_EXPR && wi::only_sign_bit_p (wi::to_wide (@1)))
> +(with { tree utype = unsigned_type_for (TREE_TYPE (@0)); }
> + (convert (absu:utype @0)))
> +@3
> +
>  /* (X + 1) > Y ? -X : 1 simplifies to X >= Y ? -X : 1 when
> X is unsigned, as when X + 1 overflows, X is -1, so -X == 1.  */
>  (simplify
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c
> new file mode 100644
> index 000..a9011ce97fb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fdump-tree-phiopt" } */
> +/* PR tree-optimization/111957 */
> +
> +int f(int a)
> +{
> +  if (a)
> +return a > 0 ? a : -a;
> +  return 0;
> +}
> +
> +int f1(int x)
> +{
> +  int intmin = (-1u >> 1);
> +  intmin = -intmin - 1;
> +  if (x != intmin)
> +return x > 0 ? x : -x;
> +  return intmin;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "if " 1 "phiopt1" } } */
> +/* { dg-final { scan-tree-dump-not "if " "phiopt2" } } */
> +/* { dg-final { scan-tree-dump-times "ABS_EXPR <" 2 "phiopt1" } } */
> +/* { dg-final { scan-tree-dump-times "ABS_EXPR <" 1 "phiopt2" } } */
> +/* { dg-final { scan-tree-dump-times "ABSU_EXPR <" 1 "phiopt2" } } */
> --
> 2.34.1
>

Re: [PATCH] DOC: Update COND_LEN document

2023-10-26 Thread Richard Sandiford

Juzhe-Zhong  writes:
> As Richard's suggested. We need to adapt doc for cond_len operations.
>
> gcc/ChangeLog:
>
>   * doc/md.texi: Update document.

Thanks for addressing my comment.  I was thinking about the
pseudo code though.  Currently it is:

for (i = 0; i < ops[4] + ops[5]; i++)
  op0[i] = op1[i] ? @var{op} op2[i] : op3[i];

but I think the outcome of the discussion was that it should instead be:

for (i = 0; i < GET_MODE_NUNITS (@var{m}); i++)
  op0[i] = (i < ops[4] + ops[5] && op1[i]
? @var{op} op2[i]
: op3[i]);

to match internal-fn.cc.  The binary and ternary optabs would need a
similar update.

Thanks,
Richard

> ---
>  gcc/doc/md.texi | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index daa318ee3da..dd2c26edf7b 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -7400,6 +7400,8 @@ form of @samp{@var{op}@var{mode}2}.
>  @itemx @samp{cond_len_lshr@var{mode}}
>  When operand 1 is true and element index < operand 5 + operand 6, perform an 
> operation on operands 2 and 3 and
>  store the result in operand 0, otherwise store operand 4 in operand 0.
> +operand 4 should be well-defined value for reduction situation, and 
> undefined value for some arithmetic operations
> +e.g. integer division.
>  The operation only works for the operands are vectors.
>  
>  @smallexample

Re: [PATCH] Improve tree_expr_nonnegative_p by using the ranger [PR111959]

2023-10-26 Thread Richard Biener

On Wed, Oct 25, 2023 at 5:51 AM Andrew Pinski  wrote:
>
> I noticed we were missing optimizing `a / (1 << b)` when
> we know that a is nonnegative but only due to ranger information.
> This adds the use of the global ranger to tree_single_nonnegative_warnv_p
> for SSA_NAME.
> I didn't extend tree_single_nonnegative_warnv_p to use the ranger for floating
> point nor to use the local ranger since I am not 100% sure it is safe where
> all of the uses tree_expr_nonnegative_p would be safe.
>
> Note pr80776-1.c testcase fails again due to vrp's bad handling of setting
> global ranges from __builtin_unreachable. It just happened to be optimized
> before due to global ranges not being used as much.
>
> Bootstrapped and tested on x86_64-linux-gnu with no regressions.
>
> PR tree-optimization/111959
>
> gcc/ChangeLog:
>
> * fold-const.cc (tree_single_nonnegative_warnv_p): Use
> the global range to see if the SSA_NAME was nonnegative.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/forwprop-42.c: New test.
> * gcc.dg/pr80776-1.c: xfail and update comment.
> ---
>  gcc/fold-const.cc   | 36 +++--
>  gcc/testsuite/gcc.dg/pr80776-1.c|  8 ++---
>  gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c | 15 +
>  3 files changed, 46 insertions(+), 13 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c
>
> diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
> index 40767736389..2a2a90230f5 100644
> --- a/gcc/fold-const.cc
> +++ b/gcc/fold-const.cc
> @@ -15047,15 +15047,33 @@ tree_single_nonnegative_warnv_p (tree t, bool 
> *strict_overflow_p, int depth)
>return RECURSE (TREE_OPERAND (t, 1)) && RECURSE (TREE_OPERAND (t, 2));
>
>  case SSA_NAME:
> -  /* Limit the depth of recursion to avoid quadratic behavior.
> -This is expected to catch almost all occurrences in practice.
> -If this code misses important cases that unbounded recursion
> -would not, passes that need this information could be revised
> -to provide it through dataflow propagation.  */
> -  return (!name_registered_for_update_p (t)
> - && depth < param_max_ssa_name_query_depth
> - && gimple_stmt_nonnegative_warnv_p (SSA_NAME_DEF_STMT (t),
> - strict_overflow_p, depth));
> +  {
> +   /* For integral types, querry the global range if possible. */

query

> +   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
> + {
> +   value_range vr;
> +   if (get_global_range_query ()->range_of_expr (vr, t)
> +   && !vr.varying_p () && !vr.undefined_p ())
> + {
> +   /* If the range is nonnegative, return true. */
> +   if (vr.nonnegative_p ())
> + return true;
> +
> +   /* If the range is non-positive, then return false. */
> +   if (vr.nonpositive_p ())
> + return false;

That's testing for <= 0, nonnegative for >= 0.  This means when
vr.nonpositive_p () the value could still be zero (and nonnegative),
possibly be figured out by the recursion below.

Since we don't have negative_p () do we want to test
nonpositive_p () && nonzero_p () instead?

OK with that change.

Richard.

> + }
> + }
> +   /* Limit the depth of recursion to avoid quadratic behavior.
> +  This is expected to catch almost all occurrences in practice.
> +  If this code misses important cases that unbounded recursion
> +  would not, passes that need this information could be revised
> +  to provide it through dataflow propagation.  */
> +   return (!name_registered_for_update_p (t)
> +   && depth < param_max_ssa_name_query_depth
> +   && gimple_stmt_nonnegative_warnv_p (SSA_NAME_DEF_STMT (t),
> +   strict_overflow_p, 
> depth));
> +  }
>
>  default:
>return tree_simple_nonnegative_warnv_p (TREE_CODE (t), TREE_TYPE (t));
> diff --git a/gcc/testsuite/gcc.dg/pr80776-1.c 
> b/gcc/testsuite/gcc.dg/pr80776-1.c
> index b9bce62d982..f3d47aeda36 100644
> --- a/gcc/testsuite/gcc.dg/pr80776-1.c
> +++ b/gcc/testsuite/gcc.dg/pr80776-1.c
> @@ -18,14 +18,14 @@ Foo (void)
>if (! (0 <= i && i <= 99))
>  __builtin_unreachable ();
>
> -  /* Legacy evrp sets the range of i to [0, MAX] *before* the first 
> conditional,
> +  /* vrp1 sets the range of i to [0, MAX] *before* the first conditional,
>   and to [0,99] *before* the second conditional.  This is because both
> - evrp and VRP use trickery to set global ranges when this particular use 
> of
> + vrp use trickery to set global ranges when this particular use of
>   a __builtin_unreachable is in play (see uses of
>   assert_unreachable_fallthru_edge_p).
>
> - Setting these ranges at the definition site, causes VRP to remove the

Re: [PATCH] [x86_64]: Zhaoxin yongfeng enablement

2023-10-26 Thread Uros Bizjak

On Wed, Oct 25, 2023 at 8:43 AM mayshao  wrote:
>
> Hi all:
> This patch enables -march/-mtune=yongfeng, costs and tunings are set 
> according to the characteristics of the processor. We add a new md file to 
> describe yongfeng processor.
>
> Bootstrapped /regtested X86_64.
>
> Ok for trunk?
> BR
> Mayshao
> gcc/ChangeLog:
>
> * common/config/i386/cpuinfo.h (get_zhaoxin_cpu): Recognize yongfeng.
> * common/config/i386/i386-common.cc: Add yongfeng.
> * common/config/i386/i386-cpuinfo.h (enum processor_subtypes): Add 
> ZHAOXIN_FAM7H_YONGFENG.
> * config.gcc: Add yongfeng.
> * config/i386/driver-i386.cc (host_detect_local_cpu): Let 
> -march=native
> recognize yongfeng processors.
> * config/i386/i386-c.cc (ix86_target_macros_internal): Add yongfeng.
> * config/i386/i386-options.cc (m_YONGFENG): New definition.
> (m_ZHAOXIN): Ditto.
> * config/i386/i386.h (enum processor_type): Add PROCESSOR_YONGFENG.
> * config/i386/i386.md: Add yongfeng.
> * config/i386/lujiazui.md: Fix typo.
> * config/i386/x86-tune-costs.h (struct processor_costs): Add yongfeng 
> costs.
> * config/i386/x86-tune-sched.cc (ix86_issue_rate): Add yongfeng.
> (ix86_adjust_cost): Ditto.
> * config/i386/x86-tune.def (X86_TUNE_SCHEDULE): Replace m_LUJIAZUI by 
> m_ZHAOXIN.
> (X86_TUNE_PARTIAL_REG_DEPENDENCY): Ditto.
> (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY): Ditto.
> (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY): Ditto.
> (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY): Ditto.
> (X86_TUNE_MOVX): Ditto.
> (X86_TUNE_MEMORY_MISMATCH_STALL): Ditto.
> (X86_TUNE_FUSE_CMP_AND_BRANCH_32): Ditto.
> (X86_TUNE_FUSE_CMP_AND_BRANCH_64): Ditto.
> (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS): Ditto.
> (X86_TUNE_FUSE_ALU_AND_BRANCH): Ditto.
> (X86_TUNE_ACCUMULATE_OUTGOING_ARGS): Ditto.
> (X86_TUNE_USE_LEAVE): Ditto.
> (X86_TUNE_PUSH_MEMORY): Ditto.
> (X86_TUNE_LCP_STALL): Ditto.
> (X86_TUNE_INTEGER_DFMODE_MOVES): Ditto.
> (X86_TUNE_OPT_AGU): Ditto.
> (X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB): Ditto.
> (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES): Ditto.
> (X86_TUNE_USE_SAHF): Ditto.
> (X86_TUNE_USE_BT): Ditto.
> (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI): Ditto.
> (X86_TUNE_ONE_IF_CONV_INSN): Ditto.
> (X86_TUNE_AVOID_MFENCE): Ditto.
> (X86_TUNE_EXPAND_ABS): Ditto.
> (X86_TUNE_USE_SIMODE_FIOP): Ditto.
> (X86_TUNE_USE_FFREEP): Ditto.
> (X86_TUNE_EXT_80387_CONSTANTS): Ditto.
> (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL): Ditto.
> (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL): Ditto.
> (X86_TUNE_SSE_TYPELESS_STORES): Ditto.
> (X86_TUNE_SSE_LOAD0_BY_PXOR): Ditto.
> (X86_TUNE_USE_GATHER_2PARTS): Add m_YONGFENG.
> (X86_TUNE_USE_GATHER_4PARTS): Ditto.
> (X86_TUNE_USE_GATHER_8PARTS): Ditto.
> (X86_TUNE_AVOID_128FMA_CHAINS): Ditto.
> * doc/extend.texi: Add details about yongfeng.
> * doc/invoke.texi: Ditto.
> * config/i386/yongfeng.md: New file for decribing yongfeng processor.
>
> gcc/testsuite/ChangeLog:
>
> * g++.target/i386/mv32.C: Handle new march.
> * gcc.target/i386/funcspec-56.inc: Ditto.

LGTM.

There are a couple of comments that needs to be fixed, please see inline.

BTW: A couple of days ago, I have added a new tunung flag [1]. I
considered Zhaoxin cores a modern core, but please review the new flag
anyway.

[1] https://gcc.gnu.org/pipermail/gcc-patches/2023-October/634280.html

Thanks,
Uros.

> ---
>  gcc/common/config/i386/cpuinfo.h  |   6 +
>  gcc/common/config/i386/i386-common.cc |  10 +-
>  gcc/common/config/i386/i386-cpuinfo.h |   1 +
>  gcc/config.gcc|  12 +-
>  gcc/config/i386/driver-i386.cc|   5 +
>  gcc/config/i386/i386-c.cc |   7 +
>  gcc/config/i386/i386-options.cc   |   3 +
>  gcc/config/i386/i386.h|   9 +
>  gcc/config/i386/i386.md   |   3 +-
>  gcc/config/i386/lujiazui.md   |   2 +-
>  gcc/config/i386/x86-tune-costs.h  | 116 +++
>  gcc/config/i386/x86-tune-sched.cc |  27 +-
>  gcc/config/i386/x86-tune.def  |  75 +-
>  gcc/config/i386/yongfeng.md   | 848 ++
>  gcc/doc/extend.texi   |   3 +
>  gcc/doc/invoke.texi   |   6 +
>  gcc/testsuite/g++.target/i386/mv32.C  |   5 +
>  gcc/testsuite/gcc.target/i386/funcspec-56.inc |   6 +-
>  18 files changed, 1095 insertions(+), 49 deletions(-)
>  create mode 100644 gcc/config/i386/yongfeng.md
>
> diff --git a/gcc/common/config/i386/cpuinfo.h 
> b/gcc/common/config/i386/cpuinfo.h
> index

Re: [PATCH 2/5] Support for CodeView debugging format

2023-10-26 Thread Richard Biener

On Mon, Oct 23, 2023 at 2:57 AM Mark Harmstone  wrote:
>
> This patch and the following add initial support for Microsoft's
> CodeView debugging format, as used by MSVC, to mingw targets.

A high-level question - it seems there's almost no information in the
codeview sections,
so is that debug format even inferior to STABS?  Is it even used with
contemporary
toolchains or is DWARF a thing with MSVC?

If CodeView is as full-featured as DWARF you are going to run into issues with
how we handle LTO given at dwarf2out_finish time all the DWARF for types and
declarations is "gone" (to disk).  For that post-processing the binary would be
much easier.

Richard.

> Note that you will need a recent version of binutils for this to be
> useful. The best way to view the output is to run Microsoft's
> cvdump.exe, found in their microsoft-pdb repo on GitHub, against the
> object files.
> ---
>  gcc/Makefile.in   |  2 +
>  gcc/config/i386/cygming.h |  2 +
>  gcc/dwarf2codeview.cc | 50 +++
>  gcc/dwarf2codeview.h  | 30 +++
>  gcc/dwarf2out.cc  |  4 ++
>  gcc/flag-types.h  |  3 ++
>  gcc/flags.h   |  4 ++
>  gcc/opts.cc   | 23 +++--
>  .../gcc.dg/debug/codeview/codeview-1.c|  6 +++
>  .../gcc.dg/debug/codeview/codeview.exp| 48 ++
>  gcc/toplev.cc |  4 ++
>  11 files changed, 171 insertions(+), 5 deletions(-)
>  create mode 100644 gcc/dwarf2codeview.cc
>  create mode 100644 gcc/dwarf2codeview.h
>  create mode 100644 gcc/testsuite/gcc.dg/debug/codeview/codeview-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/debug/codeview/codeview.exp
>
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index a25a1e32fbc..d011946379d 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -1428,6 +1428,7 @@ OBJS = \
> dumpfile.o \
> dwarf2asm.o \
> dwarf2cfi.o \
> +   dwarf2codeview.o \
> dwarf2ctf.o \
> dwarf2out.o \
> early-remat.o \
> @@ -2794,6 +2795,7 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h 
> $(srcdir)/coretypes.h \
>$(srcdir)/dwarf2out.h \
>$(srcdir)/dwarf2asm.cc \
>$(srcdir)/dwarf2cfi.cc \
> +  $(srcdir)/dwarf2codeview.cc \
>$(srcdir)/dwarf2ctf.cc \
>$(srcdir)/dwarf2out.cc \
>$(srcdir)/ctfc.h \
> diff --git a/gcc/config/i386/cygming.h b/gcc/config/i386/cygming.h
> index d539f8d0699..a141462133b 100644
> --- a/gcc/config/i386/cygming.h
> +++ b/gcc/config/i386/cygming.h
> @@ -20,6 +20,8 @@ along with GCC; see the file COPYING3.  If not see
>
>  #define DWARF2_DEBUGGING_INFO 1
>
> +#define CODEVIEW_DEBUGGING_INFO 1
> +
>  #undef PREFERRED_DEBUGGING_TYPE
>  #define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
>
> diff --git a/gcc/dwarf2codeview.cc b/gcc/dwarf2codeview.cc
> new file mode 100644
> index 000..e2bfdf8efeb
> --- /dev/null
> +++ b/gcc/dwarf2codeview.cc
> @@ -0,0 +1,50 @@
> +/* Generate CodeView debugging info from the GCC DWARF.
> +   Copyright (C) 2023 Free Software Foundation, Inc.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify it under
> +the terms of the GNU General Public License as published by the Free
> +Software Foundation; either version 3, or (at your option) any later
> +version.
> +
> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY
> +WARRANTY; without even the implied warranty of MERCHANTABILITY or
> +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
> +for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +.  */
> +
> +/* See gas/codeview.h in binutils for more about the constants and structs
> +   listed below.  References to Microsoft files refer to Microsoft's PDB
> +   repository: https://github.com/microsoft/microsoft-pdb.  */
> +
> +#include "config.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "target.h"
> +#include "output.h"
> +#include "errors.h"
> +#include "md5.h"
> +#include "function.h"
> +#include "version.h"
> +#include "tree.h"
> +#include "langhooks.h"
> +#include "dwarf2out.h"
> +#include "dwarf2codeview.h"
> +
> +#define CV_SIGNATURE_C13   4
> +
> +/* Finish CodeView debug info emission.  */
> +
> +void
> +codeview_debug_finish (void)
> +{
> +  targetm.asm_out.named_section (".debug$S", SECTION_DEBUG, NULL);
> +
> +  fputs (integer_asm_op (4, false), asm_out_file);
> +  fprint_whex (asm_out_file, CV_SIGNATURE_C13);
> +  putc ('\n', asm_out_file);
> +}
> diff --git a/gcc/dwarf2codeview.h b/gcc/dwarf2codeview.h
> new file mode 100644
> index 000..efda148eb49
> --- /dev/null
> +++ b/gcc/dwarf2codeview.h
> @@ -0,0 +1,30 @@
> +/* dwarf2codeview.h - DWARF interface for

Re: [PATCH, OpenACC 2.7, v2] readonly modifier support in front-ends

2023-10-26 Thread Thomas Schwinge

Hi!

On 2023-08-07T21:58:27+0800, Chung-Lin Tang  wrote:
> here's the updated v2 of the readonly modifier front-end patch.

Thanks.


 +++ b/gcc/c/c-parser.cc
 @@ -14059,7 +14059,8 @@ c_parser_omp_variable_list (c_parser *parser,

   static tree
   c_parser_omp_var_list_parens (c_parser *parser, enum omp_clause_code 
 kind,
 -   tree list, bool allow_deref = false)
 +   tree list, bool allow_deref = false,
 +   bool *readonly = NULL)
 ...
>>> Instead of doing this in 'c_parser_omp_var_list_parens', I think it's
>>> clearer to have this special 'readonly :' parsing logic in the two places
>>> where it's used.

> On 2023/7/20 11:08 PM, Tobias Burnus wrote:
>> I concur. [...]
>
> Okay, I've changed the C/C++ parser parts to have the parsing logic directly
> added.

These parts now looks good to me, with one remark for the C front end
changes, see below.


 +++ b/gcc/fortran/gfortran.h
 @@ -1360,7 +1360,11 @@ typedef struct gfc_omp_namelist
   {
 gfc_omp_reduction_op reduction_op;
 gfc_omp_depend_doacross_op depend_doacross_op;
 -  gfc_omp_map_op map_op;
 +  struct
 +{
 +   ENUM_BITFIELD (gfc_omp_map_op) map_op:8;
 +   bool readonly;
 +};
 gfc_expr *align;
 struct
{
>>> [...] Thus, the above looks good to me.
>> I concur but I wonder whether it would be cleaner to name the struct;
>> this makes it also more obvious what belongs together in the union.
>>
>> Namely, naming the struct 'map' and then changing the 45 users from
>> 'u.map_op' to 'u.map.op' and the new 'u.readonly' to 'u.map.readonly'. –
>> this seems to be cleaner.
>
> I've adjusted 'u.map' to be a named struct now, and updated the references.

I like that, thanks.  (Tobias, to reduce the volume of this patch here,
please let us know if the 'map_op' -> 'map.op' mass-change should be done
separately and go into master branch already, instead of as part of this
patch.)


>>> + if (gfc_match ("readonly :") == MATCH_YES)
>>> I note this one does not have a space after ':' in 'gfc_match', but the
>>> one above in 'gfc_match_omp_clauses' does.  I don't know off-hand if that
>>> makes a difference in parsing -- probably not, as all of
>>> 'gcc/fortran/openmp.cc' generally doesn't seem to be very consistent
>>> about these two variants?
>> It *does* make a difference. And for obvious reasons. You don't want to 
>> permit:
>>
>>!$acc kernels asnyccopy(a)
>>
>> but require at least one space (or comma) between "async" and "copy"..
>> (In fixed form Fortran, it would be fine - as would be "!$acc k e nelsasy nc 
>> co p y(a)".)
>>
>> A " " matches zero or more whitespaces, but with gfc_match_space you can 
>> find out
>> whether there was whitespace or not.

OK, I generally follow -- but does this rationale also apply in this case
here, concerning space after ':'?

> Okay, made sure both are 'gfc_match ("readonly : ")'. Thanks for catching 
> that, didn't
> realize that space was significant.


 +++ b/gcc/tree.h
 @@ -1813,6 +1813,14 @@ class auto_suppress_location_wrappers
   #define OMP_CLAUSE_MAP_DECL_MAKE_ADDRESSABLE(NODE) \
 (OMP_CLAUSE_SUBCODE_CHECK (NODE, 
 OMP_CLAUSE_MAP)->base.addressable_flag)

 +/* Nonzero if OpenACC 'readonly' modifier set, used for 'copyin'.  */
 +#define OMP_CLAUSE_MAP_READONLY(NODE) \
 +  TREE_READONLY (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP))
 +
 +/* Same as above, for use in OpenACC cache directives.  */
 +#define OMP_CLAUSE__CACHE__READONLY(NODE) \
 +  TREE_READONLY (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__CACHE_))
>>> I'm not sure if these special accessor functions are actually useful, or
>>> we should just directly use 'TREE_READONLY' instead?  We're only using
>>> them in contexts where it's clear that the 'OMP_CLAUSE_SUBCODE_CHECK' is
>>> satisfied, for example.
>> I find directly using TREE_READONLY confusing.
>
> FWIW, I've changed to use TREE_NOTHROW instead, if it can give a better sense 
> of safety :P

I don't understand that, why not use 'TREE_READONLY'?

> I think there's a misunderstanding here anyways: we are not relying on a DECL 
> marked
> TREE_READONLY here. We merely need the OMP_CLAUSE_MAP to be marked as 
> OMP_CLAUSE_MAP_READONLY == 1.

Yes, I understand that.  My question was why we don't just use
'TREE_READONLY (c)', where 'c' is the
'OMP_CLAUSE_MAP'/'OMP_CLAUSE__CACHE_' clause (not its decl), and avoid
the indirection through
'#define OMP_CLAUSE_MAP_READONLY'/'#define OMP_CLAUSE__CACHE__READONLY',
given that we're only using them in contexts where it's clear that the
'OMP_CLAUSE_SUBCODE_CHECK' is satisfied.  I don't have a strong
preference, though.

Either way, you still need to document this:

| Also, for the new use for OMP clauses, update 'gcc/tree.h:TREE_READONLY',
| and in 'gcc/tree-cor

Re: Re: [PATCH] DOC: Update COND_LEN document

2023-10-26 Thread juzhe.zh...@rivai.ai

Hi, Richard.

I tried your pseudo code, but report a warning:

../../../../gcc/gcc/doc/invoke.texi:20243: warning: `.' or `,' must follow 
@xref, not )

I have tried several changes, still failed to eliminate this warning.


juzhe.zh...@rivai.ai
 
From: Richard Sandiford
Date: 2023-10-26 17:25
To: Juzhe-Zhong
CC: gcc-patches; rguenther
Subject: Re: [PATCH] DOC: Update COND_LEN document
Juzhe-Zhong  writes:
> As Richard's suggested. We need to adapt doc for cond_len operations.
>
> gcc/ChangeLog:
>
> * doc/md.texi: Update document.
 
Thanks for addressing my comment.  I was thinking about the
pseudo code though.  Currently it is:
 
for (i = 0; i < ops[4] + ops[5]; i++)
  op0[i] = op1[i] ? @var{op} op2[i] : op3[i];
 
but I think the outcome of the discussion was that it should instead be:
 
for (i = 0; i < GET_MODE_NUNITS (@var{m}); i++)
  op0[i] = (i < ops[4] + ops[5] && op1[i]
? @var{op} op2[i]
: op3[i]);
 
to match internal-fn.cc.  The binary and ternary optabs would need a
similar update.
 
Thanks,
Richard
 
> ---
>  gcc/doc/md.texi | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index daa318ee3da..dd2c26edf7b 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -7400,6 +7400,8 @@ form of @samp{@var{op}@var{mode}2}.
>  @itemx @samp{cond_len_lshr@var{mode}}
>  When operand 1 is true and element index < operand 5 + operand 6, perform an 
> operation on operands 2 and 3 and
>  store the result in operand 0, otherwise store operand 4 in operand 0.
> +operand 4 should be well-defined value for reduction situation, and 
> undefined value for some arithmetic operations
> +e.g. integer division.
>  The operation only works for the operands are vectors.
>  
>  @smallexample

[PATCH V2] DOC: Update COND_LEN document

2023-10-26 Thread Juzhe-Zhong

gcc/ChangeLog:

* doc/md.texi: Adapt COND_LEN pseudo code.

---
 gcc/doc/md.texi | 18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index daa318ee3da..fab2513105a 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -7341,8 +7341,10 @@ store the result in operand 0, otherwise store operand 2 
in operand 0.
 The operation only works for the operands are vectors.
 
 @smallexample
-for (i = 0; i < ops[4] + ops[5]; i++)
-  op0[i] = op1[i] ? @var{op} op2[i] : op3[i];
+for (i = 0; i < GET_MODE_NUNITS (@var{m}); i++)
+  op0[i] = (i < ops[4] + ops[5] && op1[i]
+? @var{op} op2[i]
+: op3[i]);
 @end smallexample
 
 where, for example, @var{op} is @code{~} for 
@samp{cond_len_one_cmpl@var{mode}}.
@@ -7403,8 +7405,10 @@ store the result in operand 0, otherwise store operand 4 
in operand 0.
 The operation only works for the operands are vectors.
 
 @smallexample
-for (i = 0; i < ops[5] + ops[6]; i++)
-  op0[i] = op1[i] ? op2[i] @var{op} op3[i] : op4[i];
+for (i = 0; i < GET_MODE_NUNITS (@var{m}); i++)
+  op0[i] = (i < ops[5] + ops[6] && op1[i]
+? op2[i] @var{op} op3[i]
+: op4[i]);
 @end smallexample
 
 where, for example, @var{op} is @code{+} for @samp{cond_len_add@var{mode}}.
@@ -7436,8 +7440,10 @@ takes 3 operands rather than two.  For example, the 
vector form of
 @samp{cond_len_fma@var{mode}} is equivalent to:
 
 @smallexample
-for (i = 0; i < ops[6] + ops[7]; i++)
-  op0[i] = op1[i] ? fma (op2[i], op3[i], op4[i]) : op5[i];
+for (i = 0; i < GET_MODE_NUNITS (@var{m}); i++)
+  op0[i] = (i < ops[6] + ops[7] && op1[i]
+? fma (op2[i], op3[i], op4[i])
+: op5[i]);
 @end smallexample
 
 @cindex @code{neg@var{mode}cc} instruction pattern
-- 
2.36.3

Re: Re: [PATCH] DOC: Update COND_LEN document

2023-10-26 Thread juzhe.zh...@rivai.ai

Oh. I made a mistake here.

Forget about my last email.

https://gcc.gnu.org/pipermail/gcc-patches/2023-October/634376.html 
Here is the V2 address comments as you suggested.

Could you take a look ?

Thanks.



juzhe.zh...@rivai.ai
 
From: Richard Sandiford
Date: 2023-10-26 17:25
To: Juzhe-Zhong
CC: gcc-patches; rguenther
Subject: Re: [PATCH] DOC: Update COND_LEN document
Juzhe-Zhong  writes:
> As Richard's suggested. We need to adapt doc for cond_len operations.
>
> gcc/ChangeLog:
>
> * doc/md.texi: Update document.
 
Thanks for addressing my comment.  I was thinking about the
pseudo code though.  Currently it is:
 
for (i = 0; i < ops[4] + ops[5]; i++)
  op0[i] = op1[i] ? @var{op} op2[i] : op3[i];
 
but I think the outcome of the discussion was that it should instead be:
 
for (i = 0; i < GET_MODE_NUNITS (@var{m}); i++)
  op0[i] = (i < ops[4] + ops[5] && op1[i]
? @var{op} op2[i]
: op3[i]);
 
to match internal-fn.cc.  The binary and ternary optabs would need a
similar update.
 
Thanks,
Richard
 
> ---
>  gcc/doc/md.texi | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index daa318ee3da..dd2c26edf7b 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -7400,6 +7400,8 @@ form of @samp{@var{op}@var{mode}2}.
>  @itemx @samp{cond_len_lshr@var{mode}}
>  When operand 1 is true and element index < operand 5 + operand 6, perform an 
> operation on operands 2 and 3 and
>  store the result in operand 0, otherwise store operand 4 in operand 0.
> +operand 4 should be well-defined value for reduction situation, and 
> undefined value for some arithmetic operations
> +e.g. integer division.
>  The operation only works for the operands are vectors.
>  
>  @smallexample

Re: [PATCH V2] DOC: Update COND_LEN document

2023-10-26 Thread Richard Sandiford

Juzhe-Zhong  writes:
> gcc/ChangeLog:
>
>   * doc/md.texi: Adapt COND_LEN pseudo code.

OK.  Given your earlier message, I'd justed finished writing & testing
the same patch.

Richard

> ---
>  gcc/doc/md.texi | 18 --
>  1 file changed, 12 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index daa318ee3da..fab2513105a 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -7341,8 +7341,10 @@ store the result in operand 0, otherwise store operand 
> 2 in operand 0.
>  The operation only works for the operands are vectors.
>  
>  @smallexample
> -for (i = 0; i < ops[4] + ops[5]; i++)
> -  op0[i] = op1[i] ? @var{op} op2[i] : op3[i];
> +for (i = 0; i < GET_MODE_NUNITS (@var{m}); i++)
> +  op0[i] = (i < ops[4] + ops[5] && op1[i]
> +? @var{op} op2[i]
> +: op3[i]);
>  @end smallexample
>  
>  where, for example, @var{op} is @code{~} for 
> @samp{cond_len_one_cmpl@var{mode}}.
> @@ -7403,8 +7405,10 @@ store the result in operand 0, otherwise store operand 
> 4 in operand 0.
>  The operation only works for the operands are vectors.
>  
>  @smallexample
> -for (i = 0; i < ops[5] + ops[6]; i++)
> -  op0[i] = op1[i] ? op2[i] @var{op} op3[i] : op4[i];
> +for (i = 0; i < GET_MODE_NUNITS (@var{m}); i++)
> +  op0[i] = (i < ops[5] + ops[6] && op1[i]
> +? op2[i] @var{op} op3[i]
> +: op4[i]);
>  @end smallexample
>  
>  where, for example, @var{op} is @code{+} for @samp{cond_len_add@var{mode}}.
> @@ -7436,8 +7440,10 @@ takes 3 operands rather than two.  For example, the 
> vector form of
>  @samp{cond_len_fma@var{mode}} is equivalent to:
>  
>  @smallexample
> -for (i = 0; i < ops[6] + ops[7]; i++)
> -  op0[i] = op1[i] ? fma (op2[i], op3[i], op4[i]) : op5[i];
> +for (i = 0; i < GET_MODE_NUNITS (@var{m}); i++)
> +  op0[i] = (i < ops[6] + ops[7] && op1[i]
> +? fma (op2[i], op3[i], op4[i])
> +: op5[i]);
>  @end smallexample
>  
>  @cindex @code{neg@var{mode}cc} instruction pattern

Re: Re: [PATCH V2] DOC: Update COND_LEN document

2023-10-26 Thread juzhe.zh...@rivai.ai

Thanks Richard. Committed.



juzhe.zh...@rivai.ai
 
From: Richard Sandiford
Date: 2023-10-26 17:56
To: Juzhe-Zhong
CC: gcc-patches; rguenther
Subject: Re: [PATCH V2] DOC: Update COND_LEN document
Juzhe-Zhong  writes:
> gcc/ChangeLog:
>
> * doc/md.texi: Adapt COND_LEN pseudo code.
 
OK.  Given your earlier message, I'd justed finished writing & testing
the same patch.
 
Richard
 
> ---
>  gcc/doc/md.texi | 18 --
>  1 file changed, 12 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index daa318ee3da..fab2513105a 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -7341,8 +7341,10 @@ store the result in operand 0, otherwise store operand 
> 2 in operand 0.
>  The operation only works for the operands are vectors.
>  
>  @smallexample
> -for (i = 0; i < ops[4] + ops[5]; i++)
> -  op0[i] = op1[i] ? @var{op} op2[i] : op3[i];
> +for (i = 0; i < GET_MODE_NUNITS (@var{m}); i++)
> +  op0[i] = (i < ops[4] + ops[5] && op1[i]
> +? @var{op} op2[i]
> +: op3[i]);
>  @end smallexample
>  
>  where, for example, @var{op} is @code{~} for 
> @samp{cond_len_one_cmpl@var{mode}}.
> @@ -7403,8 +7405,10 @@ store the result in operand 0, otherwise store operand 
> 4 in operand 0.
>  The operation only works for the operands are vectors.
>  
>  @smallexample
> -for (i = 0; i < ops[5] + ops[6]; i++)
> -  op0[i] = op1[i] ? op2[i] @var{op} op3[i] : op4[i];
> +for (i = 0; i < GET_MODE_NUNITS (@var{m}); i++)
> +  op0[i] = (i < ops[5] + ops[6] && op1[i]
> +? op2[i] @var{op} op3[i]
> +: op4[i]);
>  @end smallexample
>  
>  where, for example, @var{op} is @code{+} for @samp{cond_len_add@var{mode}}.
> @@ -7436,8 +7440,10 @@ takes 3 operands rather than two.  For example, the 
> vector form of
>  @samp{cond_len_fma@var{mode}} is equivalent to:
>  
>  @smallexample
> -for (i = 0; i < ops[6] + ops[7]; i++)
> -  op0[i] = op1[i] ? fma (op2[i], op3[i], op4[i]) : op5[i];
> +for (i = 0; i < GET_MODE_NUNITS (@var{m}); i++)
> +  op0[i] = (i < ops[6] + ops[7] && op1[i]
> +? fma (op2[i], op3[i], op4[i])
> +: op5[i]);
>  @end smallexample
>  
>  @cindex @code{neg@var{mode}cc} instruction pattern

Re: [PATCH v2 3/4] build: Add libgrust as compilation modules

2023-10-26 Thread Arthur Cohen


Hi Thomas,

On 10/25/23 23:40, Thomas Schwinge wrote:

Hi!

On 2023-10-25T13:06:48+0200, Arthur Cohen  wrote:

From: Pierre-Emmanuel Patry 

Define the libgrust directory as a host compilation module as well as
for targets.


I don't see a response to Richard's comments:
.
Re "doesn't build libgrust if [Rust is not enabled]", I suppose (but have
not checked) this works for the *target* libgrust module via
'gcc/rust/config-lang.in:target_libs' requesting 'target-libgrust' only
if the Rust language is enabled?  I don't know what enables/disables the
*host* libgrust build?


My comments:

don't seem to have been addressed?


Sorry about that! The fixup commits you mentioned should have been 
integrated already to the commits I pushed. I seem to have messed 
something up in my branch, as your last comment should have been 
addressed too - I'll resend clean commits where all of this is checked.





Also, don't you first have to get in "build: Regenerate build files"
before the 'gcc/rust/config-lang.in:target_libs' change, to avoid
breaking bisection?

Maybe, after all, don't have a separate "build: Regenerate build files"
commit, but instead do the regeneration as part of the commits adding the
source files, in usual GCC style?


Alright, that sounds good.



And then, the 'contrib/gcc_update' change that's currently in
"libgrust: Add entry for maintainers and stub changelog file" should be
part of the commit that actually adds the files referenced therein.


Okay!

Thanks for the review. I'll address these shortly.




Grüße
  Thomas



ChangeLog:

   * Makefile.def: Add libgrust as host & target module.
   * configure.ac: Add libgrust to host tools list.

gcc/rust/ChangeLog:

   * config-lang.in: Add libgrust as a target module for the rust
   language.

Signed-off-by: Pierre-Emmanuel Patry 
---
  Makefile.def| 2 ++
  configure.ac| 3 ++-
  gcc/rust/config-lang.in | 2 ++
  3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/Makefile.def b/Makefile.def
index 15c068e4ac4..929a6f0a08e 100644
--- a/Makefile.def
+++ b/Makefile.def
@@ -149,6 +149,7 @@ host_modules= { module= libcc1; 
extra_configure_flags=--enable-shared; };
  host_modules= { module= gotools; };
  host_modules= { module= libctf; bootstrap=true; };
  host_modules= { module= libsframe; bootstrap=true; };
+host_modules= { module= libgrust; };

  target_modules = { module= libstdc++-v3;
  bootstrap=true;
@@ -192,6 +193,7 @@ target_modules = { module= libgm2; lib_path=.libs; };
  target_modules = { module= libgomp; bootstrap= true; lib_path=.libs; };
  target_modules = { module= libitm; lib_path=.libs; };
  target_modules = { module= libatomic; bootstrap=true; lib_path=.libs; };
+target_modules = { module= libgrust; };

  // These are (some of) the make targets to be done in each subdirectory.
  // Not all; these are the ones which don't have special options.
diff --git a/configure.ac b/configure.ac
index 692dc716343..b2a5511bab1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -133,7 +133,7 @@ build_tools="build-texinfo build-flex build-bison build-m4 
build-fixincludes"

  # these libraries are used by various programs built for the host environment
  #f
-host_libs="intl libiberty opcodes bfd readline tcl tk itcl libgui zlib libbacktrace 
libcpp libcody libdecnumber gmp mpfr mpc isl libiconv libctf libsframe"
+host_libs="intl libiberty opcodes bfd readline tcl tk itcl libgui zlib libbacktrace 
libcpp libcody libdecnumber gmp mpfr mpc isl libiconv libctf libsframe libgrust "

  # these tools are built for the host environment
  # Note, the powerpc-eabi build depends on sim occurring before gdb in order to
@@ -164,6 +164,7 @@ target_libraries="target-libgcc \
   target-libada \
   target-libgm2 \
   target-libgo \
+ target-libgrust \
   target-libphobos \
   target-zlib"

diff --git a/gcc/rust/config-lang.in b/gcc/rust/config-lang.in
index aac66c9b962..8f071dcb0bf 100644
--- a/gcc/rust/config-lang.in
+++ b/gcc/rust/config-lang.in
@@ -29,4 +29,6 @@ compilers="rust1\$(exeext)"

  build_by_default="no"

+target_libs="target-libffi target-libbacktrace target-libgrust"
+
  gtfiles="\$(srcdir)/rust/rust-lang.cc"
--
2.42.0

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955

Re: HELP: Will the reordering happen? Re: [V3][PATCH 0/3] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2023-10-26 Thread Martin Uecker

Am Donnerstag, dem 26.10.2023 um 11:20 +0200 schrieb Martin Uecker:
> Am Donnerstag, dem 26.10.2023 um 10:45 +0200 schrieb Richard Biener:
> > On Wed, Oct 25, 2023 at 8:16 PM Martin Uecker  wrote:
> > > 
> > > Am Mittwoch, dem 25.10.2023 um 13:13 +0200 schrieb Richard Biener:
> > > > 
> > > > > Am 25.10.2023 um 12:47 schrieb Martin Uecker :
> > > > > 
> > > > > Am Mittwoch, dem 25.10.2023 um 06:25 -0400 schrieb Siddhesh 
> > > > > Poyarekar:
> > > > > > > On 2023-10-25 04:16, Martin Uecker wrote:
> > > > > > > Am Mittwoch, dem 25.10.2023 um 08:43 +0200 schrieb Richard Biener:
> > > > > > > > 
> > > > > > > > > Am 24.10.2023 um 22:38 schrieb Martin Uecker 
> > > > > > > > > :
> > > > > > > > > 
> > > > > > > > > Am Dienstag, dem 24.10.2023 um 20:30 + schrieb Qing Zhao:
> > > > > > > > > > Hi, Sid,
> > > > > > > > > > 
> > > > > > > > > > Really appreciate for your example and detailed 
> > > > > > > > > > explanation. Very helpful.
> > > > > > > > > > I think that this example is an excellent example to show 
> > > > > > > > > > (almost) all the issues we need to consider.
> > > > > > > > > > 
> > > > > > > > > > I slightly modified this example to make it to be 
> > > > > > > > > > compilable and run-able, as following:
> > > > > > > > > > (but I still cannot make the incorrect reordering or DSE 
> > > > > > > > > > happening, anyway, the potential reordering possibility is 
> > > > > > > > > > there…)
> > > > > > > > > > 
> > > > > > > > > >  1 #include 
> > > > > > > > > >  2 struct A
> > > > > > > > > >  3 {
> > > > > > > > > >  4  size_t size;
> > > > > > > > > >  5  char buf[] __attribute__((counted_by(size)));
> > > > > > > > > >  6 };
> > > > > > > > > >  7
> > > > > > > > > >  8 static size_t
> > > > > > > > > >  9 get_size_from (void *ptr)
> > > > > > > > > > 10 {
> > > > > > > > > > 11  return __builtin_dynamic_object_size (ptr, 1);
> > > > > > > > > > 12 }
> > > > > > > > > > 13
> > > > > > > > > > 14 void
> > > > > > > > > > 15 foo (size_t sz)
> > > > > > > > > > 16 {
> > > > > > > > > > 17  struct A *obj = __builtin_malloc (sizeof(struct A) + sz 
> > > > > > > > > > * sizeof(char));
> > > > > > > > > > 18  obj->size = sz;
> > > > > > > > > > 19  obj->buf[0] = 2;
> > > > > > > > > > 20  __builtin_printf (“%d\n", get_size_from (obj->buf));
> > > > > > > > > > 21  return;
> > > > > > > > > > 22 }
> > > > > > > > > > 23
> > > > > > > > > > 24 int main ()
> > > > > > > > > > 25 {
> > > > > > > > > > 26  foo (20);
> > > > > > > > > > 27  return 0;
> > > > > > > > > > 28 }
> > > > > > > > > > 
> > > > > > 
> > > > > > 
> > > > > > 
> > > > > > > > When it’s set I suppose.  Turn
> > > > > > > > 
> > > > > > > > X.l = n;
> > > > > > > > 
> > > > > > > > Into
> > > > > > > > 
> > > > > > > > X.l = __builtin_with_size (x.buf, n);
> > > > > > > 
> > > > > > > It would turn
> > > > > > > 
> > > > > > > some_variable = (&) x.buf
> > > > > > > 
> > > > > > > into
> > > > > > > 
> > > > > > > some_variable = __builtin_with_size ( (&) x.buf. x.len)
> > > > > > > 
> > > > > > > 
> > > > > > > So the later access to x.buf and not the initialization
> > > > > > > of a member of the struct (which is too early).
> > > > > > > 
> > > > > > 
> > > > > > Hmm, so with Qing's example above, are you suggesting the 
> > > > > > transformation
> > > > > > be to foo like so:
> > > > > > 
> > > > > > 14 void
> > > > > > 15 foo (size_t sz)
> > > > > > 16 {
> > > > > > 16.5  void * _1;
> > > > > > 17  struct A *obj = __builtin_malloc (sizeof(struct A) + sz * 
> > > > > > sizeof(char));
> > > > > > 18  obj->size = sz;
> > > > > > 19  obj->buf[0] = 2;
> > > > > > 19.5  _1 = __builtin_with_size (obj->buf, obj->size);
> > > > > > 20  __builtin_printf (“%d\n", get_size_from (_1));
> > > > > > 21  return;
> > > > > > 22 }
> > > > > > 
> > > > > > If yes then this could indeed work.  I think I got thrown off by the
> > > > > > reference to __bdos.
> > > > > 
> > > > > Yes. I think it is important not to evaluate the size at the
> > > > > access to buf and not the allocation, because the point is to
> > > > > recover it from the size member even when the compiler can't
> > > > > see the original allocation.
> > > > 
> > > > But if the access is through a pointer without the attribute visible
> > > > even the Frontend cannot recover?
> > > 
> > > Yes, if the access is using a struct-with-FAM without the attribute
> > > the FE would not be insert the builtin.  BDOS could potentially
> > > still see the original allocation but if it doesn't, then there is
> > > no information.
> > > 
> > > > We’d need to force type correctness and give up on indirecting
> > > > through an int * when it can refer to two diffenent container types.
> > > > The best we can do I think is mark allocation sites and hope for
> > > > some basic code hygiene (not clobbering size or array pointer
> > > > through pointers without the appropriately attributed type)
> > > 
> > > I am do not fully understand what you are referring to.
> > 
> > struct A { int n; i

Re: [PATCH][_Hashtable] Use RAII to restore Rehash state

2023-10-26 Thread Jonathan Wakely

On Thu, 26 Oct 2023 at 06:18, François Dumont  wrote:

>  libstdc++: [_Hashtable] Use RAII type to manage rehash functor state
>
>  Replace usage of __try/__catch with a RAII type to restore rehash
> functor
>  state when needed.
>

I'm reviewing this now, but could I request that you attach patches as .txt
files in gmail please?

When you attach a .patch file gmail decides to give it content-type
text/x-patch and base64 encode it, and set content-disposition: attachment,
which mean it looks like this when received:

https://inbox.sourceware.org/libstdc++/7f61df18-dd99-4ff5-9fcd-8ca782040...@gmail.com/raw

It's hard to reply inline when the patch needs to be downloaded separately.

If you name the file .txt then gmail just shows it in the mail body
(conetnt-disposition: inline) and it's much easier to reply.

Anyway, I'll finish reviewing this one now that I've downloaded it and
manually pasted the patch into my reply.

>
>  libstdc++-v3/ChangeLog:
>
>  * include/bits/hashtable_policy.h (_RehashStateGuard): New.
>  (_Insert_base<>::_M_insert_range(_IIt, _IIt, const
> _NodeGet&, false_type)):
>  Adapt.
>  * include/bits/hashtable.h (__rehash_guard_t): New.
>  (__rehash_state): Remove.
>  (_M_rehash): Remove.
>  (_M_rehash_aux): Rename into _M_rehash.
>  (_M_assign_elements, _M_insert_unique_node,
> _M_insert_multi_node): Adapt.
>  (rehash): Adapt.
>
>
> Tested under Linux x64.
>
> Ok to commit ?
>
> François
>

Re: [PATCH][_Hashtable] Use RAII to restore Rehash state

2023-10-26 Thread Jonathan Wakely


On 26/10/23 07:18 +0200, François Dumont wrote:

    libstdc++: [_Hashtable] Use RAII type to manage rehash functor state

    Replace usage of __try/__catch with a RAII type to restore rehash 
functor

    state when needed.


Generally I really like replacing try-catch with RAII but I have some
questions below.


    libstdc++-v3/ChangeLog:

    * include/bits/hashtable_policy.h (_RehashStateGuard): New.
    (_Insert_base<>::_M_insert_range(_IIt, _IIt, const 
_NodeGet&, false_type)):

    Adapt.
    * include/bits/hashtable.h (__rehash_guard_t): New.
    (__rehash_state): Remove.
    (_M_rehash): Remove.
    (_M_rehash_aux): Rename into _M_rehash.
    (_M_assign_elements, _M_insert_unique_node, 
_M_insert_multi_node): Adapt.

    (rehash): Adapt.


Tested under Linux x64.

Ok to commit ?

François



diff --git a/libstdc++-v3/include/bits/hashtable.h 
b/libstdc++-v3/include/bits/hashtable.h
index 0857448f7ed..64071ac1fb2 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -234,6 +234,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  _RehashPolicy, _Traits>;
  using __enable_default_ctor
= _Hashtable_enable_default_ctor<_Equal, _Hash, _Alloc>;
+  using __rehash_guard_t
+   = __detail::_RehashStateGuard<_RehashPolicy>;

public:
  typedef _Key  key_type;
@@ -264,7 +266,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION

private:
  using __rehash_type = _RehashPolicy;
-  using __rehash_state = typename __rehash_type::_State;

  using __unique_keys = typename __traits_type::__unique_keys;

@@ -1200,14 +1201,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION

private:
  // Helper rehash method used when keys are unique.
-  void _M_rehash_aux(size_type __bkt_count, true_type __uks);
+  void _M_rehash(size_type __bkt_count, true_type __uks);

  // Helper rehash method used when keys can be non-unique.
-  void _M_rehash_aux(size_type __bkt_count, false_type __uks);
-
-  // Unconditionally change size of bucket array to n, restore
-  // hash policy state to __state on exception.
-  void _M_rehash(size_type __bkt_count, const __rehash_state& __state);
+  void _M_rehash(size_type __bkt_count, false_type __uks);
};

  // Definitions of class template _Hashtable's out-of-line member functions.
@@ -1337,7 +1334,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  {
__buckets_ptr __former_buckets = nullptr;
std::size_t __former_bucket_count = _M_bucket_count;
-   const __rehash_state& __former_state = _M_rehash_policy._M_state();
+   __rehash_guard_t __rehash_guard(_M_rehash_policy);

if (_M_bucket_count != __ht._M_bucket_count)
  {
@@ -1359,6 +1356,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_assign(std::forward<_Ht>(__ht), __roan);
if (__former_buckets)
  _M_deallocate_buckets(__former_buckets, __former_bucket_count);
+   __rehash_guard._M_reset = false;


I find this confusing. Usually "reset" means that something is
cleared, so won't take an action in the destructor. e.g. if you use
std::unique_ptr::reset() then the object is destroyed immediately, and
then nothing happens in the destructor. Here it's the opposite,
_M_reset=true means that it _sould_ do something in the destructor.

The problem is the ambiguity between "reset the state in the
destructor later" and "reset the object to an empty state now".

If the member was called _M_guarded then it might be clearer.
_M_guarded=true means the guard is active, and will restore the state
later. Or _M_active, or _M_armed, or even _M_reset_in_dtor. Any of
those names avoids the confusion with the semantics of
std::unique_ptr::reset() and similar well-known APIs.

Or what I usually do is store a pointer to the guarded object in the
RAII guard type, and then just null the pointer to disarm the guard.
That means you don't need a separate bool member variable. If
_RehashStateGuard::_M_rehash_policy was called _M_guarded_obj and was
a _RehashPolicy* instead of _RehashPolicy& then disarming it would be:

   __rehash_guard._M_guarded_obj = nullptr;

This seems clear to me, as it says that the guard no longer has
anything to guard, so won't do anything in the destructor.



  }
__catch(...)
  {
@@ -1366,7 +1364,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  {
// Restore previous buckets.
_M_deallocate_buckets();
-   _M_rehash_policy._M_reset(__former_state);
_M_buckets = __former_buckets;
_M_bucket_count = __former_bucket_count;
  }
@@ -2142,17 +2139,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  __node_ptr __node, size_type __n_elt)
-> iterator
{
-  const __rehash_state& __saved_state = _M_rehash_policy.

Re: [PATCH] Fortran: Fix incompatible types between INTEGER(8) and TYPE(c_ptr)

2023-10-26 Thread Paul-Antoine Arras


Hi Tobias,

Please see the updated patch attached incorporating your input and 
details below.


On 24/10/2023 18:12, you wrote:

On 20.10.23 16:02, Paul-Antoine Arras wrote:

gcc/fortran/ChangeLog:

  * interface.cc (gfc_compare_types): Return true in this situation.


That's a bad description. It makes sense when reading the commit log but 
if you

only read gcc/fortran/ChangeLog, 'this situation' is a dangling reference.


Updated Changelog with a more helpful description.


  gcc/fortran/ChangeLog.omp    |  5 ++
  gcc/testsuite/ChangeLog.omp  |  4 ++


On mainline, the ChangeLog not ChangeLog.omp is used. This changelog is 
automatically
filled by the data in the commit log. Thus, no need to include it in the 
patch.


Removed ChangeLog.omp from the patch.


See attached patch for a combined version, which checks now
whether from_intmod == INTMOD_ISO_C_BINDING and then compares
the names (to distinguish c_ptr and c_funptr). Those are unaffected
by 'use' renames, hence, we should be fine.


Added the proposed diff for interface.cc and misc.cc to the patch.


Additionally, I think it would be good to have a testcase which checks for
   c_funptr vs. c_ptr
mismatch.


Added new testcase c_ptr_tests_21.f90 to check that incompatibilities 
between c_funptr vs. c_ptr are properly reported.


Is this latest revision ready to commit?

Thanks,
--
PA
From 691d1050ce39c27231dc610b799bf180871820b8 Mon Sep 17 00:00:00 2001
From: Paul-Antoine Arras 
Date: Fri, 20 Oct 2023 12:42:49 +0200
Subject: [PATCH] Fortran: Fix incompatible types between INTEGER(8) and
 TYPE(c_ptr)

In the context of an OpenMP declare variant directive, arguments of type C_PTR
are sometimes recognised as C_PTR in the base function and as INTEGER(8) in the
variant - or the other way around, depending on the parsing order.
This patch prevents such situation from turning into a compile error.

2023-10-20  Paul-Antoine Arras  
	Tobias Burnus  

gcc/fortran/ChangeLog:

	* interface.cc (gfc_compare_types): Return true if one type is C_PTR
	and the other is a compatible INTEGER(8).
	* misc.cc (gfc_typename): Handle the case where an INTEGER(8) actually
	holds a TYPE(C_PTR).

gcc/testsuite/ChangeLog:

	* gfortran.dg/c_ptr_tests_20.f90: New test, checking that INTEGER(8)
	and TYPE(C_PTR) are recognised as compatible.
	* gfortran.dg/c_ptr_tests_21.f90: New test, exercising the error
	detection for C_FUNPTR.
---
 gcc/fortran/interface.cc | 16 --
 gcc/fortran/misc.cc  |  7 ++-
 gcc/testsuite/gfortran.dg/c_ptr_tests_20.f90 | 57 
 gcc/testsuite/gfortran.dg/c_ptr_tests_21.f90 | 57 
 4 files changed, 132 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/c_ptr_tests_20.f90
 create mode 100644 gcc/testsuite/gfortran.dg/c_ptr_tests_21.f90

diff --git a/gcc/fortran/interface.cc b/gcc/fortran/interface.cc
index e9843e9549c..ed1613b16fb 100644
--- a/gcc/fortran/interface.cc
+++ b/gcc/fortran/interface.cc
@@ -707,10 +707,18 @@ gfc_compare_types (gfc_typespec *ts1, gfc_typespec *ts2)
  better way of doing this.  When ISO C binding is cleared up,
  this can probably be removed.  See PR 57048.  */
 
-  if (((ts1->type == BT_INTEGER && ts2->type == BT_DERIVED)
-   || (ts1->type == BT_DERIVED && ts2->type == BT_INTEGER))
-  && ts1->u.derived && ts2->u.derived
-  && ts1->u.derived == ts2->u.derived)
+  if ((ts1->type == BT_INTEGER
+   && ts2->type == BT_DERIVED
+   && ts1->f90_type == BT_VOID
+   && ts2->u.derived->from_intmod == INTMOD_ISO_C_BINDING
+   && ts1->u.derived
+   && strcmp (ts1->u.derived->name, ts2->u.derived->name) == 0)
+  || (ts2->type == BT_INTEGER
+	  && ts1->type == BT_DERIVED
+	  && ts2->f90_type == BT_VOID
+	  && ts1->u.derived->from_intmod == INTMOD_ISO_C_BINDING
+	  && ts2->u.derived
+	  && strcmp (ts1->u.derived->name, ts2->u.derived->name) == 0))
 return true;
 
   /* The _data component is not always present, therefore check for its
diff --git a/gcc/fortran/misc.cc b/gcc/fortran/misc.cc
index bae6d292dc5..edffba07013 100644
--- a/gcc/fortran/misc.cc
+++ b/gcc/fortran/misc.cc
@@ -138,7 +138,12 @@ gfc_typename (gfc_typespec *ts, bool for_hash)
   switch (ts->type)
 {
 case BT_INTEGER:
-  sprintf (buffer, "INTEGER(%d)", ts->kind);
+  if (ts->f90_type == BT_VOID
+	  && ts->u.derived
+	  && ts->u.derived->from_intmod == INTMOD_ISO_C_BINDING)
+	sprintf (buffer, "TYPE(%s)", ts->u.derived->name);
+  else
+	sprintf (buffer, "INTEGER(%d)", ts->kind);
   break;
 case BT_REAL:
   sprintf (buffer, "REAL(%d)", ts->kind);
diff --git a/gcc/testsuite/gfortran.dg/c_ptr_tests_20.f90 b/gcc/testsuite/gfortran.dg/c_ptr_tests_20.f90
new file mode 100644
index 000..7dd510400f3
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/c_ptr_tests_20.f90
@@ -0,0 +1,57 @@
+! { dg-do compile }
+! { dg-additional-options "-fopenmp" }
+!
+! This fail

[PATCH] VECT: Support SLP MASK_LEN_GATHER_LOAD with conditional mask

2023-10-26 Thread Juzhe-Zhong

This patch leverage current MASK_GATHER_LOAD to support SLP 
MASK_LEN_GATHER_LOAD with condtional mask.

Unconditional MASK_LEN_GATHER_LOAD (base, offset, scale, zero, -1) SLP is not 
included in this patch
since it seems that we can't support it in the middle-end (due to PR44306).

May be we should support GATHER_LOAD explictily in RISC-V backend to walk 
around this issue.

I am gonna support GATHER_LOAD explictly work around in RISC-V backend.

This patch also adds conditional gather load test since there is no conditional 
gather load test.

Ok for trunk ? 

gcc/ChangeLog:

* tree-vect-slp.cc (vect_get_operand_map): Add MASK_LEN_GATHER_LOAD.
(vect_build_slp_tree_1): Ditto.
(vect_build_slp_tree_2): Ditto.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/vect-gather-6.c: New test.

---
 gcc/testsuite/gcc.dg/vect/vect-gather-6.c | 15 +++
 gcc/tree-vect-slp.cc  |  8 ++--
 2 files changed, 21 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-gather-6.c

diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-6.c 
b/gcc/testsuite/gcc.dg/vect/vect-gather-6.c
new file mode 100644
index 000..ff55f321854
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-gather-6.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+
+void
+f (int *restrict y, int *restrict x, int *restrict indices, int *restrict 
cond, int n)
+{
+  for (int i = 0; i < n; ++i)
+{
+  if (cond[i * 2])
+   y[i * 2] = x[indices[i * 2]] + 1;
+  if (cond[i * 2 + 1])
+   y[i * 2 + 1] = x[indices[i * 2 + 1]] + 2;
+}
+}
+
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect { target 
vect_gather_load_ifn } } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 5eb310eceaf..0c197b50054 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -564,6 +564,7 @@ vect_get_operand_map (const gimple *stmt, bool 
gather_scatter_p = false,
return arg1_map;
 
  case IFN_MASK_GATHER_LOAD:
+ case IFN_MASK_LEN_GATHER_LOAD:
return arg1_arg4_map;
 
  case IFN_MASK_STORE:
@@ -1158,7 +1159,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
 
  if (cfn == CFN_MASK_LOAD
  || cfn == CFN_GATHER_LOAD
- || cfn == CFN_MASK_GATHER_LOAD)
+ || cfn == CFN_MASK_GATHER_LOAD
+   || cfn == CFN_MASK_LEN_GATHER_LOAD)
ldst_p = true;
  else if (cfn == CFN_MASK_STORE)
{
@@ -1425,6 +1427,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
  if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))
  && rhs_code != CFN_GATHER_LOAD
  && rhs_code != CFN_MASK_GATHER_LOAD
+   && rhs_code != CFN_MASK_LEN_GATHER_LOAD
  && !STMT_VINFO_GATHER_SCATTER_P (stmt_info)
  /* Not grouped loads are handled as externals for BB
 vectorization.  For loop vectorization we can handle
@@ -1927,7 +1930,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
   if (gcall *stmt = dyn_cast  (stmt_info->stmt))
gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
|| gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
-   || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD));
+   || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD)
+   || gimple_call_internal_p (stmt, 
IFN_MASK_LEN_GATHER_LOAD));
   else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
   else
-- 
2.36.3

Re: [PATCH] tree-optimization/109334: Improve computation for access attribute

2023-10-26 Thread Siddhesh Poyarekar


On 2023-10-26 04:37, Martin Uecker wrote:



Hi Sid and Jakub,

here is the patch discussed in PR 109334.



I can't approve, but here's a review:


Martin



 tree-optimization/109334: Improve computation for access attribute
 
 The fix for PR104970 restricted size computations to the case

 where the access attribute was specified explicitly (no VLA).
 It also restricted it to void pointers or elements with constant
 sizes.  The second restriction is enough to fix the original bug.
 Revert the first change to again allow size computations for VLA
 parameters and for VLA parameters together with an explicit access
 attribute.
 
 gcc/ChangeLog:
 
 PR tree-optimization/109334

 * tree-object-size.cc (parm_object_size): Allow size
 computation for explicit access attributes.
 
 gcc/testsuite/ChangeLog:
 
 PR tree-optimization/109334

 * gcc.dg/builtin-dynamic-object-size-20.c
 (test_parmsz_simple3): Supported again.
 (test_parmsz_external4): New test.
 * gcc.dg/builtin-dynamic-object-size-20.c: New test.
 * gcc.dg/pr104970.c: New test.

diff --git a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c 
b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c
index 6da04202ffe..07e3da6f254 100644
--- a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c
+++ b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c
@@ -455,7 +455,6 @@ test_parmsz_simple2 (size_t sz, char obj[])
return __builtin_dynamic_object_size (obj, 0);
  }
  
-/* Implicitly constructed access attributes not supported yet.  */

  size_t
  __attribute__ ((noinline))
  test_parmsz_simple3 (size_t sz, char obj[sz])
@@ -527,6 +526,13 @@ test_parmsz_internal3 (size_t sz1, size_t sz2, double 
obj[sz1][sz2])
return __builtin_dynamic_object_size (obj, 0);
  }


This test case now works.  OK.

  
+size_t

+__attribute__ ((noinline))
+test_parmsz_internal4 (size_t sz1, size_t sz2, double obj[sz1 + 1][4])
+{
+  return __builtin_dynamic_object_size (obj, 0);
+}
+


New test case that isn't supported yet.  OK.


  /* Loops.  */
  
  size_t

@@ -721,8 +727,8 @@ main (int argc, char **argv)
if (test_parmsz_simple2 (__builtin_strlen (argv[0]) + 1, argv[0])
!= __builtin_strlen (argv[0]) + 1)
  FAIL ();
-  /* Only explicitly added access attributes are supported for now.  */
-  if (test_parmsz_simple3 (__builtin_strlen (argv[0]) + 1, argv[0]) != -1)
+  if (test_parmsz_simple3 (__builtin_strlen (argv[0]) + 1, argv[0])
+  != __builtin_strlen (argv[0]) + 1)
  FAIL ();
int arr[42];
if (test_parmsz_scaled (arr, 42) != sizeof (arr))
@@ -759,6 +765,8 @@ main (int argc, char **argv)
  FAIL ();
if (test_parmsz_internal3 (4, 4, obj) != -1)
  FAIL ();
+  if (test_parmsz_internal4 (3, 4, obj) != -1)
+FAIL ();
if (test_loop (arr, 42, 0, 32, 1) != 10 * sizeof (int))
  FAIL ();
if (test_loop (arr, 42, 32, -1, -1) != 0)
diff --git a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-20.c 
b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-20.c
new file mode 100644
index 000..2c8e07dd98d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-20.c
@@ -0,0 +1,49 @@
+/* PR 109334
+ * { dg-do run }
+ * { dg-options "-O1" } */
+
+
+[[gnu::noinline,gnu::noipa]]
+int f(int n, int buf[n])
+[[gnu::access(read_only, 2, 1)]]
+{
+return __builtin_dynamic_object_size(buf, 0);
+}
+
+[[gnu::noinline,gnu::noipa]]
+int g(int n, int buf[])
+[[gnu::access(read_only, 2, 1)]]
+{
+return __builtin_dynamic_object_size(buf, 0);
+}
+
+[[gnu::noinline,gnu::noipa]]
+int h(int n, int buf[n])
+{
+return __builtin_dynamic_object_size(buf, 0);
+}
+
+int dummy(int x) { return x + 1; }
+
+[[gnu::noinline,gnu::noipa]]
+int i(int n, int buf[dummy(n)])
+{
+return __builtin_dynamic_object_size(buf, 0);
+}
+
+int main()
+{
+int n = 10;
+int buf[n];
+if (n * sizeof(int) != f(n, buf))
+__builtin_abort();
+if (n * sizeof(int) != g(n, buf))
+__builtin_abort();
+if (n * sizeof(int) != h(n, buf))
+__builtin_abort();
+
+(void)i(n, buf);


f(), g(), h() supported, but i() isn't.  OK.


+
+return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/pr104970.c b/gcc/testsuite/gcc.dg/pr104970.c
new file mode 100644
index 000..e24a7f22dfb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr104970.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -D_FORTIFY_SOURCE=2" } */


The -D_FORTIFY_SOURCE=2 shouldn't be necessary since it doesn't really 
do anything in the context of this test.



+
+__inline void
+memset2(void *__dest, int __ch, long __len) {
+  long __trans_tmp_1 = __builtin_dynamic_object_size(__dest, 0);
+  __builtin___memset_chk(__dest, __ch, __len, __trans_tmp_1);
+}
+
+void
+mleye(int l, double E[][l]) { memset2(E, 0, sizeof(double)); }


New regression test for the ICE reported in p

Re: [PATCH v3] bpf: Improvements in CO-RE builtins implementation.

2023-10-26 Thread Cupertino Miranda


Changes from v2:
 - Reversed return value for bpf_const_not_ok_for_debug_p function.


commit 3a0b09273727a49fab7461d059d504899bb6556d
Author: Cupertino Miranda 
Date:   Tue Aug 8 09:22:41 2023 +0100

bpf: Improvements in CO-RE builtins implementation.

This patch moved the processing of attribute preserve_access_index to
its own independent pass in a gimple lowering pass.
This approach is more consistent with the implementation of the CO-RE
builtins when used explicitly in the code.  The attributed type accesses
are now early converted to __builtin_core_reloc builtin instead of being
kept as an expression in code through out all of the middle-end.
This disables the compiler to optimize out or manipulate the expression
using the local defined type, instead of assuming nothing is known about
this expression, as it should be the case in all of the CO-RE
relocations.

In the process, also the __builtin_preserve_access_index has been
improved to generate code for more complex expressions that would
require more then one CO-RE relocation.
This turned out to be a requirement, since bpf-next selftests would rely on
loop unrolling in order to convert an undefined index array access into a
defined one. This seemed extreme to expect for the unroll to happen, and for
that reason GCC still generates correct code in such scenarios, even when index
access is never predictable or unrolling does not occur.

gcc/ChangeLog:
* config/bpf/bpf-passes.def (pass_lower_bpf_core): Added pass.
* config/bpf/bpf-protos.h: Added prototype for new pass.
* config/bpf/bpf.cc (bpf_const_not_ok_for_debug_p): New function.
* config/bpf/bpf.md (mov_reloc_core): Prefixed
name with '*'.
* config/bpf/core-builtins.cc (cr_builtins) Added access_node to
struct.
(is_attr_preserve_access): Improved check.
(core_field_info): Make use of root_for_core_field_info
function.
(process_field_expr): Adapted to new functions.
(pack_type): Small improvement.
(bpf_handle_plugin_finish_type): Adapted to GTY(()).
(bpf_init_core_builtins): Changed to new function names.
(construct_builtin_core_reloc): Improved implementation.
(bpf_resolve_overloaded_core_builtin): Changed how
__builtin_preserve_access_index is converted.
(compute_field_expr): Corrected implementation. Added
access_node argument.
(bpf_core_get_index): Added valid argument.
(root_for_core_field_info, pack_field_expr)
(core_expr_with_field_expr_plus_base, make_core_safe_access_index)
(replace_core_access_index_comp_expr, maybe_get_base_for_field_expr)
(core_access_clean, core_is_access_index, core_mark_as_access_index)
(make_gimple_core_safe_access_index, execute_lower_bpf_core)
(make_pass_lower_bpf_core): Added functions.
(pass_data_lower_bpf_core): New pass struct.
(pass_lower_bpf_core): New gimple_opt_pass class.
(pack_field_expr_for_preserve_field)
(bpf_replace_core_move_operands): Removed function.
(bpf_enum_value_kind): Added GTY(()).
* config/bpf/core-builtins.h (bpf_field_info_kind, bpf_type_id_kind)
(bpf_type_info_kind, bpf_enum_value_kind): New enum.
* config/bpf/t-bpf: Added pass bpf-passes.def to PASSES_EXTRA.

gcc/testsuite/ChangeLog:
* gcc.target/bpf/core-attr-5.c: New test.
* gcc.target/bpf/core-attr-6.c: New test.
* gcc.target/bpf/core-builtin-1.c: Corrected
* gcc.target/bpf/core-builtin-enumvalue-opt.c: Corrected regular
expression.
* gcc.target/bpf/core-builtin-enumvalue.c: Corrected regular
expression.
* gcc.target/bpf/core-builtin-exprlist-1.c: New test.
* gcc.target/bpf/core-builtin-exprlist-2.c: New test.
* gcc.target/bpf/core-builtin-exprlist-3.c: New test.
* gcc.target/bpf/core-builtin-exprlist-4.c: New test.
* gcc.target/bpf/core-builtin-fieldinfo-offset-1.c: Extra tests

diff --git a/gcc/config/bpf/bpf-passes.def b/gcc/config/bpf/bpf-passes.def
new file mode 100644
index ..0ec20eac965d
--- /dev/null
+++ b/gcc/config/bpf/bpf-passes.def
@@ -0,0 +1,20 @@
+/* Declaration of target-specific passes for eBPF.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even th

RE: [PATCH v2] VECT: Remove the type size restriction of vectorizer

2023-10-26 Thread Li, Pan2

Thanks Richard for comments.

> Can you explain why this is necessary?  In particular what is lhs_rtx
> mode vs ops[0].value mode?

For testcase gcc.target/aarch64/sve/popcount_1.c, the rtl are list as below.

The lhs_rtx is (reg:VNx2SI 98 [ vect__5.36 ]).
The ops[0].value is (reg:VNx2DI 104).

The restriction removing make the vector rtl enter expand_fn_using_insn and of 
course hit the INTEGER_P assertion.

Pan

-Original Message-
From: Richard Biener  
Sent: Thursday, October 26, 2023 4:38 PM
To: Li, Pan2 
Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; Wang, Yanzhang 
; kito.ch...@gmail.com; Liu, Hongtao 
; Richard Sandiford 
Subject: Re: [PATCH v2] VECT: Remove the type size restriction of vectorizer

On Thu, Oct 26, 2023 at 4:18 AM  wrote:
>
> From: Pan Li 
>
> Update in v2:
>
> * Fix one ICE of type assertion.
> * Adjust some test cases for aarch64 sve and riscv vector.
>
> Original log:
>
> The vectoriable_call has one restriction of the size of data type.
> Aka DF to DI is allowed but SF to DI isn't. You may see below message
> when try to vectorize function call like lrintf.
>
> void
> test_lrintf (long *out, float *in, unsigned count)
> {
>   for (unsigned i = 0; i < count; i++)
> out[i] = __builtin_lrintf (in[i]);
> }
>
> lrintf.c:5:26: missed: couldn't vectorize loop
> lrintf.c:5:26: missed: not vectorized: unsupported data-type
>
> Then the standard name pattern like lrintmn2 cannot work for different
> data type size like SF => DI. This patch would like to remove this data
> type size check and unblock the standard name like lrintmn2.
>
> The below test are passed for this patch.
>
> * The x86 bootstrap and regression test.
> * The aarch64 regression test.
> * The risc-v regression tests.
>
> gcc/ChangeLog:
>
> * internal-fn.cc (expand_fn_using_insn): Add vector int assertion.
> * tree-vect-stmts.cc (vectorizable_call): Remove size check.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/sve/clrsb_1.c: Adjust checker.
> * gcc.target/aarch64/sve/clz_1.c: Ditto.
> * gcc.target/aarch64/sve/popcount_1.c: Ditto.
> * gcc.target/riscv/rvv/autovec/unop/popcount.c: Ditto.
>
> Signed-off-by: Pan Li 
> ---
>  gcc/internal-fn.cc  |  3 ++-
>  gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c  |  3 +--
>  gcc/testsuite/gcc.target/aarch64/sve/clz_1.c|  3 +--
>  gcc/testsuite/gcc.target/aarch64/sve/popcount_1.c   |  3 +--
>  .../gcc.target/riscv/rvv/autovec/unop/popcount.c|  2 +-
>  gcc/tree-vect-stmts.cc  | 13 -
>  6 files changed, 6 insertions(+), 21 deletions(-)
>
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index 61d5a9e4772..17c0f4c3805 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -281,7 +281,8 @@ expand_fn_using_insn (gcall *stmt, insn_code icode, 
> unsigned int noutputs,
> emit_move_insn (lhs_rtx, ops[0].value);
>else
> {
> - gcc_checking_assert (INTEGRAL_TYPE_P (TREE_TYPE (lhs)));
> + gcc_checking_assert (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
> +  || VECTOR_INTEGER_TYPE_P (TREE_TYPE (lhs)));

Can you explain why this is necessary?  In particular what is lhs_rtx
mode vs ops[0].value mode?

>   convert_move (lhs_rtx, ops[0].value, 0);

I'm not sure convert_move handles vector modes correctly.  Richard
probably added this code, CCed.

Richard.

> }
>  }
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c
> index bdc9856faaf..940d08bbc7b 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c
> @@ -18,5 +18,4 @@ clrsb_64 (unsigned int *restrict dst, uint64_t *restrict 
> src, int size)
>  }
>
>  /* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 1 } } */
> -/* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.d, p[0-7]/m, 
> z[0-9]+\.d\n} 2 } } */
> -/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, 
> z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.d, p[0-7]/m, 
> z[0-9]+\.d\n} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c
> index 0c7a4e6d768..58b8ff406d2 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c
> @@ -18,5 +18,4 @@ clz_64 (unsigned int *restrict dst, uint64_t *restrict src, 
> int size)
>  }
>
>  /* { dg-final { scan-assembler-times {\tclz\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 1 } } */
> -/* { dg-final { scan-assembler-times {\tclz\tz[0-9]+\.d, p[0-7]/m, 
> z[0-9]+\.d\n} 2 } } */
> -/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, 
> z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tclz\tz[0-9]+\.d, p[0-7]/m, 
> z[0-9]+\.d\n} 1 } } */
> diff --git a/gc

Re: [PATCH] match: Simplify `a != C1 ? abs(a) : C2` when C2 == abs(C1) [PR111957]

2023-10-26 Thread Andrew Pinski

On Thu, Oct 26, 2023 at 2:24 AM Richard Biener
 wrote:
>
> On Wed, Oct 25, 2023 at 5:37 AM Andrew Pinski  wrote:
> >
> > This adds a match pattern for `a != C1 ? abs(a) : C2` which gets simplified
> > to `abs(a)`. if C1 was originally *_MIN then change it over to use absu 
> > instead
> > of abs.
> >
> > Bootstrapped and tested on x86_64-linux-gnu with no regressions.
> >
> > PR tree-optimization/111957
> >
> > gcc/ChangeLog:
> >
> > * match.pd (`a != C1 ? abs(a) : C2`): New pattern.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.dg/tree-ssa/phi-opt-40.c: New test.
> > ---
> >  gcc/match.pd   | 10 +
> >  gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c | 25 ++
> >  2 files changed, 35 insertions(+)
> >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 5df04ebba77..370ee35de52 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -5622,6 +5622,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >   (if (wi::eq_p (wi::bit_not (wi::to_wide (@1)), wi::to_wide (@2)))
> >@3))
> >
> > +/* X != C1 ? abs(X) : C2 simplifies to abs(x) when abs(C1) == C2. */
> > +(for op (abs absu)
> > + (simplify
> > +  (cond (ne @0 INTEGER_CST@1) (op@3 @0) INTEGER_CST@2)
> > +  (if (wi::abs (wi::to_wide (@1)) == wi::to_wide (@2))
>
> Why not use
>
>   (cond (ne @0 INTEGER_CST@1) (op@3 @0) @1)

Because that does not work for:
`a != -1 ? abs(a) : -1`
We want to match -1 and 1 here (in that order).
It just happens 0 and INT_MIN have the same value as their abs and
matches but we could expand it to match all others too.

Thanks,
Andrew

>
> ?  OK with that change.
>
> Richard.
>
> > +   (if (op != ABSU_EXPR && wi::only_sign_bit_p (wi::to_wide (@1)))
> > +(with { tree utype = unsigned_type_for (TREE_TYPE (@0)); }
> > + (convert (absu:utype @0)))
> > +@3
> > +
> >  /* (X + 1) > Y ? -X : 1 simplifies to X >= Y ? -X : 1 when
> > X is unsigned, as when X + 1 overflows, X is -1, so -X == 1.  */
> >  (simplify
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c 
> > b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c
> > new file mode 100644
> > index 000..a9011ce97fb
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c
> > @@ -0,0 +1,25 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O1 -fdump-tree-phiopt" } */
> > +/* PR tree-optimization/111957 */
> > +
> > +int f(int a)
> > +{
> > +  if (a)
> > +return a > 0 ? a : -a;
> > +  return 0;
> > +}
> > +
> > +int f1(int x)
> > +{
> > +  int intmin = (-1u >> 1);
> > +  intmin = -intmin - 1;
> > +  if (x != intmin)
> > +return x > 0 ? x : -x;
> > +  return intmin;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "if " 1 "phiopt1" } } */
> > +/* { dg-final { scan-tree-dump-not "if " "phiopt2" } } */
> > +/* { dg-final { scan-tree-dump-times "ABS_EXPR <" 2 "phiopt1" } } */
> > +/* { dg-final { scan-tree-dump-times "ABS_EXPR <" 1 "phiopt2" } } */
> > +/* { dg-final { scan-tree-dump-times "ABSU_EXPR <" 1 "phiopt2" } } */
> > --
> > 2.34.1
> >

Re: [PATCH] Fortran: Fix incompatible types between INTEGER(8) and TYPE(c_ptr)

2023-10-26 Thread Tobias Burnus


Hi Paul-Antoine,

On 26.10.23 13:24, Paul-Antoine Arras wrote:

Please see the updated patch attached incorporating your input and
details below.

...

Is this latest revision ready to commit?


LGTM.

Thanks,

Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955

[PATCH] testsuite: Allow general skips/requires in PCH tests

2023-10-26 Thread Richard Sandiford

dg-pch.exp handled dg-require-effective-target pch_supported_debug
as a special case, by grepping the source code.  This patch tries
to generalise it to other dg-require-effective-targets, and to
dg-skip-if.

There also seemed to be some errors in check-flags.  It used:

lappend $args [list ]

which treats the contents of args as a variable name.  I think
it was supposed to be "lappend args" instead.  From the later
code, the element was supposed to be  itself, rather than
a singleton list containing .

We can also save some time by doing the common early-exit first.

Doing this removes the need to specify the dg-require-effective-target
in both files.  Tested by faking unsupported debug and checking that
the tests were still correctly skipped.

Tested on aarch64-linux-gnu.  OK to install?

Richard


gcc/testsuite/
* lib/target-supports-dg.exp (check-flags): Move default argument
handling further up.  Fix a couple of issues in the lappends.
Avoid frobbing the compiler flags if the return value is already
known to be 1.
* lib/dg-pch.exp (dg-flags-pch): Process the dg-skip-if and
dg-require-effective-target directives to see whether the
assembly test should be skipped.
* gcc.dg/pch/valid-1.c: Remove dg-require-effective-target.
* gcc.dg/pch/valid-1b.c: Likewise.
---
 gcc/testsuite/gcc.dg/pch/valid-1.c   |  1 -
 gcc/testsuite/gcc.dg/pch/valid-1b.c  |  1 -
 gcc/testsuite/lib/dg-pch.exp | 24 --
 gcc/testsuite/lib/target-supports-dg.exp | 40 
 4 files changed, 42 insertions(+), 24 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pch/valid-1.c 
b/gcc/testsuite/gcc.dg/pch/valid-1.c
index 6e9abdaef3a..b950d0d4c64 100644
--- a/gcc/testsuite/gcc.dg/pch/valid-1.c
+++ b/gcc/testsuite/gcc.dg/pch/valid-1.c
@@ -1,4 +1,3 @@
-/* { dg-require-effective-target pch_supported_debug } */
 /* { dg-options "-I. -Winvalid-pch -g" } */
 
 #include "valid-1.h"/* { dg-warning "created with .none. debug info, but used 
with" } */
diff --git a/gcc/testsuite/gcc.dg/pch/valid-1b.c 
b/gcc/testsuite/gcc.dg/pch/valid-1b.c
index 3113d0f744d..a2709967c07 100644
--- a/gcc/testsuite/gcc.dg/pch/valid-1b.c
+++ b/gcc/testsuite/gcc.dg/pch/valid-1b.c
@@ -1,4 +1,3 @@
-/* { dg-require-effective-target pch_supported_debug } */
 /* { dg-options "-I. -Winvalid-pch -g0" } */
 
 #include "valid-1b.h"
diff --git a/gcc/testsuite/lib/dg-pch.exp b/gcc/testsuite/lib/dg-pch.exp
index 6b09e8c0478..b6fefaa0286 100644
--- a/gcc/testsuite/lib/dg-pch.exp
+++ b/gcc/testsuite/lib/dg-pch.exp
@@ -100,6 +100,27 @@ proc dg-flags-pch { subdir test otherflags options suffix 
} {
# For the rest, the default is to compile to .s.
set dg-do-what-default compile
 
+   # Process the target selectors to see whether the remaining
+   # part of the test should be skipped.
+   #
+   # ??? This doesn't currently handle flag-specific skips,
+   # based on dg-options.
+   set dg-do-what [list compile "" P]
+   set dg-extra-tool-flags ""
+   foreach op [dg-get-options "./$bname$suffix"] {
+   switch [lindex $op 0] {
+   dg-require-effective-target -
+   dg-skip-if {
+   if { [catch "$op" msg] } {
+   perror "$bname$suffix: $msg for \"$op\""
+   }
+   }
+   }
+   }
+   if { [lindex ${dg-do-what} 1] == "N" } {
+   continue
+   }
+
set have_errs [llength [grep $test "{\[ \t\]\+dg-error\[ \t\]\+.*\[ 
\t\]\+}"]]
 
if { [ file_on_host exists "$bname$suffix.gch" ] } {
@@ -134,8 +155,7 @@ proc dg-flags-pch { subdir test otherflags options suffix } 
{
fail "$nshort $flags assembly comparison"
}
}
-   } elseif { $pch_unsupported_debug == 0 \
-  || [llength [grep $test "{\[ 
\t\]\+dg-require-effective-target\[ \t\]\+pch_supported_debug\[ \t\]\+.*\[ 
\t\]\+}"]] > 0 } {
+   } else {
verbose -log "pch file '$bname$suffix.gch' missing"
fail "$nshort $flags"
if { !$have_errs } {
diff --git a/gcc/testsuite/lib/target-supports-dg.exp 
b/gcc/testsuite/lib/target-supports-dg.exp
index a80970f1ac2..b5658c1c33e 100644
--- a/gcc/testsuite/lib/target-supports-dg.exp
+++ b/gcc/testsuite/lib/target-supports-dg.exp
@@ -334,6 +334,23 @@ proc check-flags { args } {
 # The args are within another list; pull them out.
 set args [lindex $args 0]
 
+# The next two arguments are optional.  If they were not specified,
+# use the defaults.
+if { [llength $args] == 2 } {
+   lappend args "*"
+}
+if { [llength $args] == 3 } {
+   lappend args ""
+}
+
+# If the option strings are the defaults, or the same as the
+# defaults, there is no need to call check_conditional_xfail to
+# compare them to the actual options.
+if { [string compare [lindex $args 2] "

Re: [PATCH] RISC-V: Pass abi to g++ rvv testsuite

2023-10-26 Thread Jeff Law





On 10/25/23 18:13, Patrick O'Neill wrote:

On rv32gcv testcases like g++.target/riscv/rvv/base/bug-22.C fail with:
FAIL: g++.target/riscv/rvv/base/bug-22.C (test for excess errors)
Excess errors:
cc1plus: error: ABI requires '-march=rv32'

This patch adds the -mabi argument to g++ rvv tests.

gcc/testsuite/ChangeLog:

 * g++.target/riscv/rvv/rvv.exp: Add -mabi argument to CFLAGS.

OK.
jeff

Re: [PING] libffi: Consider '--with-build-sysroot=[...]' for target libraries' build-tree testing (instead of build-time 'CC' etc.) [PR109951]

2023-10-26 Thread Jeff Law





On 10/25/23 02:30, Thomas Schwinge wrote:

Hi!

Ping.


Grüße
  Thomas


On 2023-09-12T12:58:27+0200, I wrote:

Hi!

On 2020-04-20T14:18:40+0100, "Maciej W. Rozycki via Gcc-patches" 
 wrote:

Fix a problem with the libffi testsuite using a method to determine the
compiler to use resulting in the tool being different from one the
library has been built with, and causing a catastrophic failure from the
inability to actually choose any compiler at all in a cross-compilation
configuration.


This has since, as far as I can tell, been resolved properly by H.J. Lu's
GCC commit 5be7b66998127286fada45e4f23bd8a2056d553e,
"libffi: Integrate build with GCC", and
GCC commit 4824ed41ba7cd63e60fd9f8769a58b79935a90d1
"libffi: Integrate testsuite with GCC testsuite".


Address this problem by providing a DejaGNU configuration file defining
the compiler to use, via the CC_FOR_TARGET TCL variable, set from $CC by
autoconf, which will have all the required options set for the target
compiler to build executables in the environment configured


As we've found, this is conceptually problematic, as discussed in

"Consider '--with-build-sysroot=[...]' for target libraries' build-tree testing 
(instead of build-time 'CC' etc.) [PR109951]".
I therefore suggest to apply to GCC libffi the conceptually same changes
as I've just pushed for libgomp:

"libgomp: Consider '--with-build-sysroot=[...]' for target libraries' build-tree 
testing (instead of build-time 'CC' etc.) [PR91884, PR109951]".
OK to push the attached
"libffi: Consider '--with-build-sysroot=[...]' for target libraries' build-tree 
testing (instead of build-time 'CC' etc.) [PR109951]"?
OK (patch didn't appear inline, but hopefully you've got enough context 
to know it's the libffi bits getting ACK'd).


jeff

Re: [PING] libatomic: Consider '--with-build-sysroot=[...]' for target libraries' build-tree testing (instead of build-time 'CC' etc.) [PR109951]

2023-10-26 Thread Jeff Law





On 10/25/23 02:32, Thomas Schwinge wrote:

Hi!

Ping.


Grüße
  Thomas


On 2023-09-12T13:03:28+0200, I wrote:

Hi!

On 2020-04-04T00:00:44+0100, "Maciej W. Rozycki via Gcc-patches" 
 wrote:

Fix a problem with the libatomic testsuite using a method to determine
the compiler to use resulting in the tool being different from one the
library has been built with, and causing a catastrophic failure from the
lack of a suitable `--sysroot=' option where the `--with-build-sysroot='
configuration option has been used to build the compiler resulting in
the inability to link executables.

Address this problem by providing a DejaGNU configuration file defining
the compiler to use, via the GCC_UNDER_TEST TCL variable, set from $CC
by autoconf, which will have all the required options set for the target
compiler to build executables in the environment configured


As we've found, this is conceptually problematic, as discussed in

"Consider '--with-build-sysroot=[...]' for target libraries' build-tree testing 
(instead of build-time 'CC' etc.)
[PR109951]".
I therefore suggest to apply to libatomic the conceptually same changes
as I've just pushed for libgomp:

"libgomp: Consider '--with-build-sysroot=[...]' for target libraries' 
build-tree testing (instead of build-time 'CC'
etc.) [PR91884, PR109951]".
OK to push the attached
"libatomic: Consider '--with-build-sysroot=[...]' for target libraries' build-tree 
testing (instead of build-time 'CC' etc.) [PR109951]"?

Also OK.

Jeff

Re: [PATCH] match: Simplify `a != C1 ? abs(a) : C2` when C2 == abs(C1) [PR111957]

2023-10-26 Thread Richard Biener




> Am 26.10.2023 um 14:21 schrieb Andrew Pinski :
> 
> On Thu, Oct 26, 2023 at 2:24 AM Richard Biener
>  wrote:
>> 
>>> On Wed, Oct 25, 2023 at 5:37 AM Andrew Pinski  wrote:
>>> 
>>> This adds a match pattern for `a != C1 ? abs(a) : C2` which gets simplified
>>> to `abs(a)`. if C1 was originally *_MIN then change it over to use absu 
>>> instead
>>> of abs.
>>> 
>>> Bootstrapped and tested on x86_64-linux-gnu with no regressions.
>>> 
>>>PR tree-optimization/111957
>>> 
>>> gcc/ChangeLog:
>>> 
>>>* match.pd (`a != C1 ? abs(a) : C2`): New pattern.
>>> 
>>> gcc/testsuite/ChangeLog:
>>> 
>>>* gcc.dg/tree-ssa/phi-opt-40.c: New test.
>>> ---
>>> gcc/match.pd   | 10 +
>>> gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c | 25 ++
>>> 2 files changed, 35 insertions(+)
>>> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c
>>> 
>>> diff --git a/gcc/match.pd b/gcc/match.pd
>>> index 5df04ebba77..370ee35de52 100644
>>> --- a/gcc/match.pd
>>> +++ b/gcc/match.pd
>>> @@ -5622,6 +5622,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>>>  (if (wi::eq_p (wi::bit_not (wi::to_wide (@1)), wi::to_wide (@2)))
>>>   @3))
>>> 
>>> +/* X != C1 ? abs(X) : C2 simplifies to abs(x) when abs(C1) == C2. */
>>> +(for op (abs absu)
>>> + (simplify
>>> +  (cond (ne @0 INTEGER_CST@1) (op@3 @0) INTEGER_CST@2)
>>> +  (if (wi::abs (wi::to_wide (@1)) == wi::to_wide (@2))
>> 
>> Why not use
>> 
>>  (cond (ne @0 INTEGER_CST@1) (op@3 @0) @1)
> 
> Because that does not work for:
> `a != -1 ? abs(a) : -1`
> We want to match -1 and 1 here (in that order).
> It just happens 0 and INT_MIN have the same value as their abs and
> matches but we could expand it to match all others too.

Ah, I missed the wi::not.  The original patch is OK.

Richard 

> Thanks,
> Andrew
> 
>> 
>> ?  OK with that change.
>> 
>> Richard.
>> 
>>> +   (if (op != ABSU_EXPR && wi::only_sign_bit_p (wi::to_wide (@1)))
>>> +(with { tree utype = unsigned_type_for (TREE_TYPE (@0)); }
>>> + (convert (absu:utype @0)))
>>> +@3
>>> +
>>> /* (X + 1) > Y ? -X : 1 simplifies to X >= Y ? -X : 1 when
>>>X is unsigned, as when X + 1 overflows, X is -1, so -X == 1.  */
>>> (simplify
>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c 
>>> b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c
>>> new file mode 100644
>>> index 000..a9011ce97fb
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c
>>> @@ -0,0 +1,25 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-O1 -fdump-tree-phiopt" } */
>>> +/* PR tree-optimization/111957 */
>>> +
>>> +int f(int a)
>>> +{
>>> +  if (a)
>>> +return a > 0 ? a : -a;
>>> +  return 0;
>>> +}
>>> +
>>> +int f1(int x)
>>> +{
>>> +  int intmin = (-1u >> 1);
>>> +  intmin = -intmin - 1;
>>> +  if (x != intmin)
>>> +return x > 0 ? x : -x;
>>> +  return intmin;
>>> +}
>>> +
>>> +/* { dg-final { scan-tree-dump-times "if " 1 "phiopt1" } } */
>>> +/* { dg-final { scan-tree-dump-not "if " "phiopt2" } } */
>>> +/* { dg-final { scan-tree-dump-times "ABS_EXPR <" 2 "phiopt1" } } */
>>> +/* { dg-final { scan-tree-dump-times "ABS_EXPR <" 1 "phiopt2" } } */
>>> +/* { dg-final { scan-tree-dump-times "ABSU_EXPR <" 1 "phiopt2" } } */
>>> --
>>> 2.34.1
>>>

Re: [PATCH v2] VECT: Remove the type size restriction of vectorizer

2023-10-26 Thread Richard Biener




> Am 26.10.2023 um 13:59 schrieb Li, Pan2 :
> 
> Thanks Richard for comments.
> 
>> Can you explain why this is necessary?  In particular what is lhs_rtx
>> mode vs ops[0].value mode?
> 
> For testcase gcc.target/aarch64/sve/popcount_1.c, the rtl are list as below.
> 
> The lhs_rtx is (reg:VNx2SI 98 [ vect__5.36 ]).
> The ops[0].value is (reg:VNx2DI 104).
> 
> The restriction removing make the vector rtl enter expand_fn_using_insn and 
> of course hit the INTEGER_P assertion.

But I think this shows we mid-selected the optab, a convert_move is certainly 
not correct unconditionally here (the target might not support that)

> Pan
> 
> -Original Message-
> From: Richard Biener  
> Sent: Thursday, October 26, 2023 4:38 PM
> To: Li, Pan2 
> Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; Wang, Yanzhang 
> ; kito.ch...@gmail.com; Liu, Hongtao 
> ; Richard Sandiford 
> Subject: Re: [PATCH v2] VECT: Remove the type size restriction of vectorizer
> 
>> On Thu, Oct 26, 2023 at 4:18 AM  wrote:
>> 
>> From: Pan Li 
>> 
>> Update in v2:
>> 
>> * Fix one ICE of type assertion.
>> * Adjust some test cases for aarch64 sve and riscv vector.
>> 
>> Original log:
>> 
>> The vectoriable_call has one restriction of the size of data type.
>> Aka DF to DI is allowed but SF to DI isn't. You may see below message
>> when try to vectorize function call like lrintf.
>> 
>> void
>> test_lrintf (long *out, float *in, unsigned count)
>> {
>>  for (unsigned i = 0; i < count; i++)
>>out[i] = __builtin_lrintf (in[i]);
>> }
>> 
>> lrintf.c:5:26: missed: couldn't vectorize loop
>> lrintf.c:5:26: missed: not vectorized: unsupported data-type
>> 
>> Then the standard name pattern like lrintmn2 cannot work for different
>> data type size like SF => DI. This patch would like to remove this data
>> type size check and unblock the standard name like lrintmn2.
>> 
>> The below test are passed for this patch.
>> 
>> * The x86 bootstrap and regression test.
>> * The aarch64 regression test.
>> * The risc-v regression tests.
>> 
>> gcc/ChangeLog:
>> 
>>* internal-fn.cc (expand_fn_using_insn): Add vector int assertion.
>>* tree-vect-stmts.cc (vectorizable_call): Remove size check.
>> 
>> gcc/testsuite/ChangeLog:
>> 
>>* gcc.target/aarch64/sve/clrsb_1.c: Adjust checker.
>>* gcc.target/aarch64/sve/clz_1.c: Ditto.
>>* gcc.target/aarch64/sve/popcount_1.c: Ditto.
>>* gcc.target/riscv/rvv/autovec/unop/popcount.c: Ditto.
>> 
>> Signed-off-by: Pan Li 
>> ---
>> gcc/internal-fn.cc  |  3 ++-
>> gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c  |  3 +--
>> gcc/testsuite/gcc.target/aarch64/sve/clz_1.c|  3 +--
>> gcc/testsuite/gcc.target/aarch64/sve/popcount_1.c   |  3 +--
>> .../gcc.target/riscv/rvv/autovec/unop/popcount.c|  2 +-
>> gcc/tree-vect-stmts.cc  | 13 -
>> 6 files changed, 6 insertions(+), 21 deletions(-)
>> 
>> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
>> index 61d5a9e4772..17c0f4c3805 100644
>> --- a/gcc/internal-fn.cc
>> +++ b/gcc/internal-fn.cc
>> @@ -281,7 +281,8 @@ expand_fn_using_insn (gcall *stmt, insn_code icode, 
>> unsigned int noutputs,
>>emit_move_insn (lhs_rtx, ops[0].value);
>>   else
>>{
>> - gcc_checking_assert (INTEGRAL_TYPE_P (TREE_TYPE (lhs)));
>> + gcc_checking_assert (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
>> +  || VECTOR_INTEGER_TYPE_P (TREE_TYPE (lhs)));
> 
> Can you explain why this is necessary?  In particular what is lhs_rtx
> mode vs ops[0].value mode?
> 
>>  convert_move (lhs_rtx, ops[0].value, 0);
> 
> I'm not sure convert_move handles vector modes correctly.  Richard
> probably added this code, CCed.
> 
> Richard.
> 
>>}
>> }
>> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c 
>> b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c
>> index bdc9856faaf..940d08bbc7b 100644
>> --- a/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c
>> +++ b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c
>> @@ -18,5 +18,4 @@ clrsb_64 (unsigned int *restrict dst, uint64_t *restrict 
>> src, int size)
>> }
>> 
>> /* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s\n} 1 } } */
>> -/* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.d, p[0-7]/m, 
>> z[0-9]+\.d\n} 2 } } */
>> -/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, 
>> z[0-9]+\.s\n} 1 } } */
>> +/* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.d, p[0-7]/m, 
>> z[0-9]+\.d\n} 1 } } */
>> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c 
>> b/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c
>> index 0c7a4e6d768..58b8ff406d2 100644
>> --- a/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c
>> +++ b/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c
>> @@ -18,5 +18,4 @@ clz_64 (unsigned int *restrict dst, uint64_t *restrict 
>> src, int size)
>> }
>> 
>> /* { dg-final { scan-assembler-times {\tclz

[pushed] [RA]: Modify cost calculation for dealing with pseudo equivalences

2023-10-26 Thread Vladimir Makarov

This is the second attempt to improve RA cost calculation for pseudos 
with equivalences.  The patch explanation is in the log message.


The patch was successfully bootstrapped and tested on x86-64, aarch64, 
and ppc64le.  The patch was also benchmarked on x86-64 spec2017.  
specfp2017 performance did not changed, specint2017 improved by 0.3%.


commit f55cdce3f8dd8503e080e35be59c5f5390f6d95e
Author: Vladimir N. Makarov 
Date:   Thu Oct 26 09:50:40 2023 -0400

[RA]: Modfify cost calculation for dealing with equivalences

RISCV target developers reported that pseudos with equivalence used in
a loop can be spilled.  Simple changes of heuristics of cost
calculation of pseudos with equivalence or even ignoring equivalences
resulted in numerous testsuite failures on different targets or worse
spec2017 performance.  This patch implements more sophisticated cost
calculations of pseudos with equivalences.  The patch does not change
RA behaviour for targets still using the old reload pass instead of
LRA.  The patch solves the reported problem and improves x86-64
specint2017 a bit (specfp2017 performance stays the same).  The patch
takes into account how the equivalence will be used: will it be
integrated into the user insns or require an input reload insn.  It
requires additional pass over insns.  To compensate RA slow down, the
patch removes a pass over insns in the reload pass used by IRA before.
This also decouples IRA from reload more and will help to remove the
reload pass in the future if it ever happens.

gcc/ChangeLog:

* dwarf2out.cc (reg_loc_descriptor): Use lra_eliminate_regs when
LRA is used.
* ira-costs.cc: Include regset.h.
(equiv_can_be_consumed_p, get_equiv_regno, calculate_equiv_gains):
New functions.
(find_costs_and_classes): Call calculate_equiv_gains and redefine
mem_cost of pseudos with equivs when LRA is used.
* var-tracking.cc: Include ira.h and lra.h.
(vt_initialize): Use lra_eliminate_regs when LRA is used.

diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
index 0ea73bf782e..1e0cec66c5e 100644
--- a/gcc/dwarf2out.cc
+++ b/gcc/dwarf2out.cc
@@ -14311,7 +14311,9 @@ reg_loc_descriptor (rtx rtl, enum var_init_status initialized)
  argument pointer and soft frame pointer rtx's.
  Use DW_OP_fbreg offset DW_OP_stack_value in this case.  */
   if ((rtl == arg_pointer_rtx || rtl == frame_pointer_rtx)
-  && eliminate_regs (rtl, VOIDmode, NULL_RTX) != rtl)
+  && (ira_use_lra_p
+	  ? lra_eliminate_regs (rtl, VOIDmode, NULL_RTX)
+	  : eliminate_regs (rtl, VOIDmode, NULL_RTX)) != rtl)
 {
   dw_loc_descr_ref result = NULL;
 
diff --git a/gcc/ira-costs.cc b/gcc/ira-costs.cc
index d9e700e8947..a59d45a6e24 100644
--- a/gcc/ira-costs.cc
+++ b/gcc/ira-costs.cc
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tm_p.h"
 #include "insn-config.h"
 #include "regs.h"
+#include "regset.h"
 #include "ira.h"
 #include "ira-int.h"
 #include "addresses.h"
@@ -1757,6 +1758,145 @@ process_bb_node_for_costs (ira_loop_tree_node_t loop_tree_node)
 process_bb_for_costs (bb);
 }
 
+/* Check that reg REGNO can be changed by TO in INSN.  Return true in case the
+   result insn would be valid one.  */
+static bool
+equiv_can_be_consumed_p (int regno, rtx to, rtx_insn *insn)
+{
+  validate_replace_src_group (regno_reg_rtx[regno], to, insn);
+  bool res = verify_changes (0);
+  cancel_changes (0);
+  return res;
+}
+
+/* Return true if X contains a pseudo with equivalence.  In this case also
+   return the pseudo through parameter REG.  If the pseudo is a part of subreg,
+   return the subreg through parameter SUBREG.  */
+
+static bool
+get_equiv_regno (rtx x, int ®no, rtx &subreg)
+{
+  subreg = NULL_RTX;
+  if (GET_CODE (x) == SUBREG)
+{
+  subreg = x;
+  x = SUBREG_REG (x);
+}
+  if (REG_P (x)
+  && (ira_reg_equiv[REGNO (x)].memory != NULL
+	  || ira_reg_equiv[REGNO (x)].constant != NULL))
+{
+  regno = REGNO (x);
+  return true;
+}
+  RTX_CODE code = GET_CODE (x);
+  const char *fmt = GET_RTX_FORMAT (code);
+
+  for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+if (fmt[i] == 'e')
+  {
+	if (get_equiv_regno (XEXP (x, i), regno, subreg))
+	  return true;
+  }
+else if (fmt[i] == 'E')
+  {
+	for (int j = 0; j < XVECLEN (x, i); j++)
+	  if (get_equiv_regno (XVECEXP (x, i, j), regno, subreg))
+	return true;
+  }
+  return false;
+}
+
+/* A pass through the current function insns.  Calculate costs of using
+   equivalences for pseudos and store them in regno_equiv_gains.  */
+
+static void
+calculate_equiv_gains (void)
+{
+  basic_block bb;
+  int regno, freq, cost;
+  rtx subreg;
+  rtx_insn *insn;
+  machine_mode mode;
+  enum reg_class rclass;
+  bitmap_head equiv_pseudos;
+
+  ira_assert (allocno_p);
+  bitmap_initialize

Re: [PATCH] internal-fn: Add VCOND_MASK_LEN.

2023-10-26 Thread Robin Dapp

Ok, next try.  Now without dubious pattern and with direct optab
but still dedicated expander function.

This will cause one riscv regression in cond_widen_reduc-2.c that
we can deal with later.  It is just a missed optimization where
we do not combine something that we used to because of the
now-present length masking.

I'd also like to postpone handling vcond_mask_len simplifications
via stripping the length and falling back to vec_cond and its fold
patterns to a later time.  As is, this helps us avoid execution
failures in at least five test cases.

Bootstrap et al. running on x86, aarch64 and power10.

Regards
 Robin

>From 7acdebb5b13b71331621af08da6649fe08476fe8 Mon Sep 17 00:00:00 2001
From: Robin Dapp 
Date: Wed, 25 Oct 2023 22:19:43 +0200
Subject: [PATCH v3] internal-fn: Add VCOND_MASK_LEN.

In order to prevent simplification of a COND_OP with degenerate mask
(all true or all zero) into just an OP in the presence of length
masking this patch introduces a length-masked analog to VEC_COND_EXPR:
IFN_VCOND_MASK_LEN.

It also adds new match patterns that allow the combination of
unconditional unary, binary and ternay operations with the
VCOND_MASK_LEN into a conditional operation if the target supports it.

gcc/ChangeLog:

PR tree-optimization/111760

* config/riscv/autovec.md (vcond_mask_len_): Add
expander.
* config/riscv/riscv-protos.h (enum insn_type): Add.
* doc/md.texi: Add vcond_mask_len.
* gimple-match-exports.cc (maybe_resimplify_conditional_op):
Create VCOND_MASK_LEN when
length masking.
* gimple-match.h (gimple_match_op::gimple_match_op): Allow
matching of 6 and 7 parameters.
(gimple_match_op::set_op): Ditto.
(gimple_match_op::gimple_match_op): Always initialize len and
bias.
* internal-fn.cc (vec_cond_mask_len_direct): Add.
(expand_vec_cond_mask_len_optab_fn): Add.
(direct_vec_cond_mask_len_optab_supported_p): Add.
(internal_fn_len_index): Add VCOND_MASK_LEN.
(internal_fn_mask_index): Ditto.
* internal-fn.def (VCOND_MASK_LEN): New internal function.
* match.pd: Combine unconditional unary, binary and ternary
operations into the respective COND_LEN operations.
* optabs.def (OPTAB_D): Add vcond_mask_len optab.
---
 gcc/config/riscv/autovec.md | 37 
 gcc/config/riscv/riscv-protos.h |  5 +++
 gcc/doc/md.texi |  9 
 gcc/gimple-match-exports.cc | 13 --
 gcc/gimple-match.h  | 78 -
 gcc/internal-fn.cc  | 42 ++
 gcc/internal-fn.def |  2 +
 gcc/match.pd| 61 ++
 gcc/optabs.def  |  1 +
 9 files changed, 243 insertions(+), 5 deletions(-)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 80910ba3cc2..dadb71c1165 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -565,6 +565,43 @@ (define_insn_and_split "vcond_mask_"
   [(set_attr "type" "vector")]
 )
 
+(define_expand "vcond_mask_len_"
+  [(match_operand:V_VLS 0 "register_operand")
+(match_operand: 3 "nonmemory_operand")
+(match_operand:V_VLS 1 "nonmemory_operand")
+(match_operand:V_VLS 2 "autovec_else_operand")
+(match_operand 4 "autovec_length_operand")
+(match_operand 5 "const_0_operand")]
+  "TARGET_VECTOR"
+  {
+if (satisfies_constraint_Wc1 (operands[3]))
+  {
+   rtx ops[] = {operands[0], operands[2], operands[1]};
+   riscv_vector::emit_nonvlmax_insn (code_for_pred_mov (mode),
+ riscv_vector::UNARY_OP_TUMA,
+ ops, operands[4]);
+  }
+else if (satisfies_constraint_Wc0 (operands[3]))
+  {
+   rtx ops[] = {operands[0], operands[2], operands[2]};
+   riscv_vector::emit_nonvlmax_insn (code_for_pred_mov (mode),
+ riscv_vector::UNARY_OP_TUMA,
+ ops, operands[4]);
+  }
+else
+  {
+   /* The order of vcond_mask is opposite to pred_merge.  */
+   rtx ops[] = {operands[0], operands[2], operands[2], operands[1],
+operands[3]};
+   riscv_vector::emit_nonvlmax_insn (code_for_pred_merge (mode),
+ riscv_vector::MERGE_OP_TUMA,
+ ops, operands[4]);
+  }
+DONE;
+  }
+  [(set_attr "type" "vector")]
+)
+
 ;; -
 ;;  [BOOL] Select based on masks
 ;; -
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 668d75043ca..0a54e4ff022 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -302,6 +302,7 @@ enum ins

Re: HELP: Will the reordering happen? Re: [V3][PATCH 0/3] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2023-10-26 Thread Richard Biener




> Am 26.10.2023 um 12:14 schrieb Martin Uecker :
> 
> Am Donnerstag, dem 26.10.2023 um 11:20 +0200 schrieb Martin Uecker:
>>> Am Donnerstag, dem 26.10.2023 um 10:45 +0200 schrieb Richard Biener:
>>> On Wed, Oct 25, 2023 at 8:16 PM Martin Uecker  wrote:
 
 Am Mittwoch, dem 25.10.2023 um 13:13 +0200 schrieb Richard Biener:
> 
>> Am 25.10.2023 um 12:47 schrieb Martin Uecker :
>> 
>> Am Mittwoch, dem 25.10.2023 um 06:25 -0400 schrieb Siddhesh Poyarekar:
 On 2023-10-25 04:16, Martin Uecker wrote:
 Am Mittwoch, dem 25.10.2023 um 08:43 +0200 schrieb Richard Biener:
> 
>> Am 24.10.2023 um 22:38 schrieb Martin Uecker :
>> 
>> Am Dienstag, dem 24.10.2023 um 20:30 + schrieb Qing Zhao:
>>> Hi, Sid,
>>> 
>>> Really appreciate for your example and detailed explanation. Very 
>>> helpful.
>>> I think that this example is an excellent example to show (almost) 
>>> all the issues we need to consider.
>>> 
>>> I slightly modified this example to make it to be compilable and 
>>> run-able, as following:
>>> (but I still cannot make the incorrect reordering or DSE happening, 
>>> anyway, the potential reordering possibility is there…)
>>> 
>>> 1 #include 
>>> 2 struct A
>>> 3 {
>>> 4  size_t size;
>>> 5  char buf[] __attribute__((counted_by(size)));
>>> 6 };
>>> 7
>>> 8 static size_t
>>> 9 get_size_from (void *ptr)
>>> 10 {
>>> 11  return __builtin_dynamic_object_size (ptr, 1);
>>> 12 }
>>> 13
>>> 14 void
>>> 15 foo (size_t sz)
>>> 16 {
>>> 17  struct A *obj = __builtin_malloc (sizeof(struct A) + sz * 
>>> sizeof(char));
>>> 18  obj->size = sz;
>>> 19  obj->buf[0] = 2;
>>> 20  __builtin_printf (“%d\n", get_size_from (obj->buf));
>>> 21  return;
>>> 22 }
>>> 23
>>> 24 int main ()
>>> 25 {
>>> 26  foo (20);
>>> 27  return 0;
>>> 28 }
>>> 
>>> 
>>> 
>>> 
> When it’s set I suppose.  Turn
> 
> X.l = n;
> 
> Into
> 
> X.l = __builtin_with_size (x.buf, n);
 
 It would turn
 
 some_variable = (&) x.buf
 
 into
 
 some_variable = __builtin_with_size ( (&) x.buf. x.len)
 
 
 So the later access to x.buf and not the initialization
 of a member of the struct (which is too early).
 
>>> 
>>> Hmm, so with Qing's example above, are you suggesting the transformation
>>> be to foo like so:
>>> 
>>> 14 void
>>> 15 foo (size_t sz)
>>> 16 {
>>> 16.5  void * _1;
>>> 17  struct A *obj = __builtin_malloc (sizeof(struct A) + sz * 
>>> sizeof(char));
>>> 18  obj->size = sz;
>>> 19  obj->buf[0] = 2;
>>> 19.5  _1 = __builtin_with_size (obj->buf, obj->size);
>>> 20  __builtin_printf (“%d\n", get_size_from (_1));
>>> 21  return;
>>> 22 }
>>> 
>>> If yes then this could indeed work.  I think I got thrown off by the
>>> reference to __bdos.
>> 
>> Yes. I think it is important not to evaluate the size at the
>> access to buf and not the allocation, because the point is to
>> recover it from the size member even when the compiler can't
>> see the original allocation.
> 
> But if the access is through a pointer without the attribute visible
> even the Frontend cannot recover?
 
 Yes, if the access is using a struct-with-FAM without the attribute
 the FE would not be insert the builtin.  BDOS could potentially
 still see the original allocation but if it doesn't, then there is
 no information.
 
> We’d need to force type correctness and give up on indirecting
> through an int * when it can refer to two diffenent container types.
> The best we can do I think is mark allocation sites and hope for
> some basic code hygiene (not clobbering size or array pointer
> through pointers without the appropriately attributed type)
 
 I am do not fully understand what you are referring to.
>>> 
>>> struct A { int n; int data[n]; };
>>> struct B { long n; int data[n]; };
>>> 
>>> int *p = flag ? a->data : b->data;
>>> 
>>> access *p;
>>> 
>>> Since we need to allow interoperability of pointers (a->data is
>>> convertible to a non-fat pointer of type int *) this leaves us with
>>> ambiguity we need to conservatively handle to avoid false positives.
>> 
>> For BDOS, I would expect this to work exactly like:
>> 
>> char aa[n1];
>> char bb[n2];
>> char *p = flag ? aa : bb;
>> 
>> (or similar code with malloc). In fact it does:
>> 
>> https://godbolt.org/z/bK68YKqhe
>> (cheating a bit and also the sub-object version of
>> BDOS does not seem to

Re: [PATCH GCC13 backport] Avoid compile time hog on vect_peel_nonlinear_iv_init for nonlinear induction vec_step_op_mul when iteration count is too big.

2023-10-26 Thread Richard Biener




> Am 24.10.2023 um 13:22 schrieb liuhongt :
> 
> This is the backport patch for releases/gcc-13 branch, the original patch 
> for main trunk
> is at [1].
> The only difference between this backport patch and [1] is GCC13 doesn't 
> support auto_mpz,
> So this patch manually use mpz_init/mpz_clear.
> 
> [1] https://gcc.gnu.org/pipermail/gcc-patches/2023-October/633661.html
> 
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
> Ok for backport to releases/gcc-13?

Ok.

Richard 

> There's loop in vect_peel_nonlinear_iv_init to get init_expr *
> pow (step_expr, skip_niters). When skipn_iters is too big, compile time
> hogs. To avoid that, optimize init_expr * pow (step_expr, skip_niters) to
> init_expr << (exact_log2 (step_expr) * skip_niters) when step_expr is
> pow of 2, otherwise give up vectorization when skip_niters >=
> TYPE_PRECISION (TREE_TYPE (init_expr)).
> 
> Also give up vectorization when niters_skip is negative which will be
> used for fully masked loop.
> 
> gcc/ChangeLog:
> 
>PR tree-optimization/111820
>PR tree-optimization/111833
>* tree-vect-loop-manip.cc (vect_can_peel_nonlinear_iv_p): Give
>up vectorization for nonlinear iv vect_step_op_mul when
>step_expr is not exact_log2 and niters is greater than
>TYPE_PRECISION (TREE_TYPE (step_expr)). Also don't vectorize
>for nagative niters_skip which will be used by fully masked
>loop.
>(vect_can_advance_ivs_p): Pass whole phi_info to
>vect_can_peel_nonlinear_iv_p.
>* tree-vect-loop.cc (vect_peel_nonlinear_iv_init): Optimize
>init_expr * pow (step_expr, skipn) to init_expr
><< (log2 (step_expr) * skipn) when step_expr is exact_log2.
> 
> gcc/testsuite/ChangeLog:
> 
>* gcc.target/i386/pr111820-1.c: New test.
>* gcc.target/i386/pr111820-2.c: New test.
>* gcc.target/i386/pr111820-3.c: New test.
>* gcc.target/i386/pr103144-mul-1.c: Adjust testcase.
>* gcc.target/i386/pr103144-mul-2.c: Adjust testcase.
> ---
> .../gcc.target/i386/pr103144-mul-1.c  |  8 +++---
> .../gcc.target/i386/pr103144-mul-2.c  |  8 +++---
> gcc/testsuite/gcc.target/i386/pr111820-1.c| 16 +++
> gcc/testsuite/gcc.target/i386/pr111820-2.c| 16 +++
> gcc/testsuite/gcc.target/i386/pr111820-3.c| 16 +++
> gcc/tree-vect-loop-manip.cc   | 28 +--
> gcc/tree-vect-loop.cc | 21 +++---
> 7 files changed, 98 insertions(+), 15 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/pr111820-1.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr111820-2.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr111820-3.c
> 
> diff --git a/gcc/testsuite/gcc.target/i386/pr103144-mul-1.c 
> b/gcc/testsuite/gcc.target/i386/pr103144-mul-1.c
> index 640c34fd959..913d7737dcd 100644
> --- a/gcc/testsuite/gcc.target/i386/pr103144-mul-1.c
> +++ b/gcc/testsuite/gcc.target/i386/pr103144-mul-1.c
> @@ -11,7 +11,7 @@ foo_mul (int* a, int b)
>   for (int i = 0; i != N; i++)
> {
>   a[i] = b;
> -  b *= 3;
> +  b *= 4;
> }
> }
> 
> @@ -23,7 +23,7 @@ foo_mul_const (int* a)
>   for (int i = 0; i != N; i++)
> {
>   a[i] = b;
> -  b *= 3;
> +  b *= 4;
> }
> }
> 
> @@ -34,7 +34,7 @@ foo_mul_peel (int* a, int b)
>   for (int i = 0; i != 39; i++)
> {
>   a[i] = b;
> -  b *= 3;
> +  b *= 4;
> }
> }
> 
> @@ -46,6 +46,6 @@ foo_mul_peel_const (int* a)
>   for (int i = 0; i != 39; i++)
> {
>   a[i] = b;
> -  b *= 3;
> +  b *= 4;
> }
> }
> diff --git a/gcc/testsuite/gcc.target/i386/pr103144-mul-2.c 
> b/gcc/testsuite/gcc.target/i386/pr103144-mul-2.c
> index 39fdea3a69d..b2ff186e335 100644
> --- a/gcc/testsuite/gcc.target/i386/pr103144-mul-2.c
> +++ b/gcc/testsuite/gcc.target/i386/pr103144-mul-2.c
> @@ -16,12 +16,12 @@ avx2_test (void)
> 
>   __builtin_memset (epi32_exp, 0, N * sizeof (int));
>   int b = 8;
> -  v8si init = __extension__(v8si) { b, b * 3, b * 9, b * 27, b * 81, b * 
> 243, b * 729, b * 2187 };
> +  v8si init = __extension__(v8si) { b, b * 4, b * 16, b * 64, b * 256, b * 
> 1024, b * 4096, b * 16384 };
> 
>   for (int i = 0; i != N / 8; i++)
> {
>   memcpy (epi32_exp + i * 8, &init, 32);
> -  init *= 6561;
> +  init *= 65536;
> }
> 
>   foo_mul (epi32_dst, b);
> @@ -32,11 +32,11 @@ avx2_test (void)
>   if (__builtin_memcmp (epi32_dst, epi32_exp, 39 * 4) != 0)
> __builtin_abort ();
> 
> -  init = __extension__(v8si) { 1, 3, 9, 27, 81, 243, 729, 2187 };
> +  init = __extension__(v8si) { 1, 4, 16, 64, 256, 1024, 4096, 16384 };
>   for (int i = 0; i != N / 8; i++)
> {
>   memcpy (epi32_exp + i * 8, &init, 32);
> -  init *= 6561;
> +  init *= 65536;
> }
> 
>   foo_mul_const (epi32_dst);
> diff --git a/gcc/testsuite/gcc.target/i386/pr111820-1.c 
> b/gcc/testsuite/gcc.target/i386/pr111820-1.c
> new file mode 100644
> index 000..50e960c39d4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.targe

[PATCH] aarch64: Add basic target_print_operand support for CONST_STRING

2023-10-26 Thread Victor Do Nascimento

Motivated by the need to print system register names in output
assembly, this patch adds the required logic to
`aarch64_print_operand' to accept rtxs of type CONST_STRING and
process these accordingly.

Consequently, an rtx such as:

  (set (reg/i:DI 0 x0)
 (unspec:DI [(const_string ("s3_3_c13_c2_2"))])

can now be output correctly using the following output pattern when
composing `define_insn's:

  "mrs\t%x0, %1"

Testing:
 - Bootstrap/regtest on aarch64-linux-gnu done.

gcc/ChangeLog

* config/aarch64/aarch64.cc (aarch64_print_operand): Add
support for CONST_STRING.
---
 gcc/config/aarch64/aarch64.cc | 5 +
 1 file changed, 5 insertions(+)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 62b1ae0652f..c715f6369bc 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -12346,6 +12346,11 @@ aarch64_print_operand (FILE *f, rtx x, int code)
 
   switch (GET_CODE (x))
{
+   case CONST_STRING:
+ {
+   asm_fprintf (f, "%s", XSTR (x, 0));
+   break;
+ }
case REG:
  if (aarch64_sve_data_mode_p (GET_MODE (x)))
{
-- 
2.41.0

Re: Re: [PATCH] internal-fn: Add VCOND_MASK_LEN.

2023-10-26 Thread 钟居哲

+(define_expand "vcond_mask_len_"
+  [(match_operand:V_VLS 0 "register_operand")
+(match_operand: 3 "nonmemory_operand")
+(match_operand:V_VLS 1 "nonmemory_operand")
+(match_operand:V_VLS 2 "autovec_else_operand")
+(match_operand 4 "autovec_length_operand")
+(match_operand 5 "const_0_operand")]

I think you should change V_VLS into V since we never apply partial 
vectorization (predicated by length)
on VLSmodes.  VLSmodes are the modes used on GNU vector/SLP/SIMD vectorizations.



juzhe.zh...@rivai.ai
 
From: Robin Dapp
Date: 2023-10-26 22:02
To: richard.sandiford
CC: rdapp.gcc; gcc-patches; rguenther; juzhe.zh...@rivai.ai
Subject: Re: [PATCH] internal-fn: Add VCOND_MASK_LEN.
Ok, next try.  Now without dubious pattern and with direct optab
but still dedicated expander function.
 
This will cause one riscv regression in cond_widen_reduc-2.c that
we can deal with later.  It is just a missed optimization where
we do not combine something that we used to because of the
now-present length masking.
 
I'd also like to postpone handling vcond_mask_len simplifications
via stripping the length and falling back to vec_cond and its fold
patterns to a later time.  As is, this helps us avoid execution
failures in at least five test cases.
 
Bootstrap et al. running on x86, aarch64 and power10.
 
Regards
Robin
 
From 7acdebb5b13b71331621af08da6649fe08476fe8 Mon Sep 17 00:00:00 2001
From: Robin Dapp 
Date: Wed, 25 Oct 2023 22:19:43 +0200
Subject: [PATCH v3] internal-fn: Add VCOND_MASK_LEN.
 
In order to prevent simplification of a COND_OP with degenerate mask
(all true or all zero) into just an OP in the presence of length
masking this patch introduces a length-masked analog to VEC_COND_EXPR:
IFN_VCOND_MASK_LEN.
 
It also adds new match patterns that allow the combination of
unconditional unary, binary and ternay operations with the
VCOND_MASK_LEN into a conditional operation if the target supports it.
 
gcc/ChangeLog:
 
PR tree-optimization/111760
 
* config/riscv/autovec.md (vcond_mask_len_): Add
expander.
* config/riscv/riscv-protos.h (enum insn_type): Add.
* doc/md.texi: Add vcond_mask_len.
* gimple-match-exports.cc (maybe_resimplify_conditional_op):
Create VCOND_MASK_LEN when
length masking.
* gimple-match.h (gimple_match_op::gimple_match_op): Allow
matching of 6 and 7 parameters.
(gimple_match_op::set_op): Ditto.
(gimple_match_op::gimple_match_op): Always initialize len and
bias.
* internal-fn.cc (vec_cond_mask_len_direct): Add.
(expand_vec_cond_mask_len_optab_fn): Add.
(direct_vec_cond_mask_len_optab_supported_p): Add.
(internal_fn_len_index): Add VCOND_MASK_LEN.
(internal_fn_mask_index): Ditto.
* internal-fn.def (VCOND_MASK_LEN): New internal function.
* match.pd: Combine unconditional unary, binary and ternary
operations into the respective COND_LEN operations.
* optabs.def (OPTAB_D): Add vcond_mask_len optab.
---
gcc/config/riscv/autovec.md | 37 
gcc/config/riscv/riscv-protos.h |  5 +++
gcc/doc/md.texi |  9 
gcc/gimple-match-exports.cc | 13 --
gcc/gimple-match.h  | 78 -
gcc/internal-fn.cc  | 42 ++
gcc/internal-fn.def |  2 +
gcc/match.pd| 61 ++
gcc/optabs.def  |  1 +
9 files changed, 243 insertions(+), 5 deletions(-)
 
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 80910ba3cc2..dadb71c1165 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -565,6 +565,43 @@ (define_insn_and_split "vcond_mask_"
   [(set_attr "type" "vector")]
)
+(define_expand "vcond_mask_len_"
+  [(match_operand:V_VLS 0 "register_operand")
+(match_operand: 3 "nonmemory_operand")
+(match_operand:V_VLS 1 "nonmemory_operand")
+(match_operand:V_VLS 2 "autovec_else_operand")
+(match_operand 4 "autovec_length_operand")
+(match_operand 5 "const_0_operand")]
+  "TARGET_VECTOR"
+  {
+if (satisfies_constraint_Wc1 (operands[3]))
+  {
+ rtx ops[] = {operands[0], operands[2], operands[1]};
+ riscv_vector::emit_nonvlmax_insn (code_for_pred_mov (mode),
+   riscv_vector::UNARY_OP_TUMA,
+   ops, operands[4]);
+  }
+else if (satisfies_constraint_Wc0 (operands[3]))
+  {
+ rtx ops[] = {operands[0], operands[2], operands[2]};
+ riscv_vector::emit_nonvlmax_insn (code_for_pred_mov (mode),
+   riscv_vector::UNARY_OP_TUMA,
+   ops, operands[4]);
+  }
+else
+  {
+ /* The order of vcond_mask is opposite to pred_merge.  */
+ rtx ops[] = {operands[0], operands[2], operands[2], operands[1],
+  operands[3]};
+ riscv_vector::emit_nonvlmax_insn (code_for_pred_merge (mode),
+   riscv_vector::MERGE_OP_TUMA,
+   ops, operands[4]);
+  }
+DONE;
+  }
+  [(set_attr "type" "vector")]
+)
+
;; -
;;  [BOOL] Select based on masks
;;

Re: HELP: Will the reordering happen? Re: [V3][PATCH 0/3] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2023-10-26 Thread Qing Zhao

> On Oct 26, 2023, at 1:21 AM, Jakub Jelinek  wrote:
> 
> On Wed, Oct 25, 2023 at 07:03:43PM +, Qing Zhao wrote:
>> For the code generation impact:
>> 
>> turning the original  x.buf 
>> to a builtin function call
>> __builtin_with_access_and_size(x,buf, x.L,-1)
>> 
>> might inhibit some optimizations from happening before the builtin is
>> evaluated into object size info (phase  .objsz1).  I guess there might be
>> some performance impact.
>> 
>> However, if we mark this builtin as PURE, NOTRROW, etc, then the negative
>> performance impact will be reduced to minimum?
> 
> You can't drop it during objsz1 pass though, otherwise __bdos wouldn't
> be able to figure out the dynamic sizes in case of normal (non-early)
> inlining - caller takes address of a counted_by array, passes it down
> to callee which is only inlined late and uses __bdos, or callee takes address
> and returns it and caller uses __bdos, etc. - so it would need to be objsz2.

I guess that I didn’t say it very clear previously. Let me explain again:

My understanding is, there are “early_objsz” phase and then later “objsz1” 
phase for -O[1|2|3]. 
For -Og, there are “early_objsz” and then later “objsz2”. 

So, the “objsz1” I mentioned (for the case -O[1|2|3])  should be the same as 
the “objsz2” you mentioned above?  -:)
It’s the second objsz phase. 

In the second objsz phase, I believe that all the inlining (including early 
inlining and IPA inlining) are all applied?
> 
> And while the builtin (or if it is an internal detail rather than user
> accessible builtin an internal function)

Okay, will use an “internal function” instead of “ builtin function”. 

> could be even const/nothrow/leaf if
> the arguments contain the loads from the structure 2 fields, I'm afraid it
> will still have huge code generation impact, prevent tons of pre-IPA
> optimizations.  And it will need some work to handle it properly during
> inlining heuristics, because in GIMPLE the COMPONENT_REF loads aren't gimple
> values, so it wouldn't be just the builtin/internal-fn call to be ignored,
> but also the count load from memory.

Are you worrying about the potential additional LOADs will change the inlining 
decision
 since the inlining heuristic depends on the # of loads from memory? 

In additional to the # of loads, the # of instructions and the # of calls of 
the function 
might be increased too, will these have impact on inlining decision? 

In addition to inlining decision, any other impact to other IPA optimizations? 

thanks.

Qing

> 
>   Jakub
>

RE: [PATCH v2] VECT: Remove the type size restriction of vectorizer

2023-10-26 Thread Li, Pan2

> But I think this shows we mid-selected the optab, a convert_move is certainly 
> not correct unconditionally here (the target might not support that)

Make sense, we can wait a while for the confirmation from Richard S.

If convert_move is not designed for Vector (looks like mostly up to a point), I 
am not sure if we can fix the assertion like below

...
else If (VECTOR_INTERGER_TYPE (TREE_TYPE(lhs)))
  return;
else
  {
gcc_checking_assert (INTEGRAL_TYPE_TYPE_P (TREE_TYPE (lhs)));
convert_move (lhs_rtx, ops[0].value, 0);
  }

Aka bypass the vector here, but I am afraid this change may make the llrintf 
(SF => DI) not working on standard name.
Let me have a try and keep you posted.

Pan
  

-Original Message-
From: Richard Biener  
Sent: Thursday, October 26, 2023 10:00 PM
To: Li, Pan2 
Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; Wang, Yanzhang 
; kito.ch...@gmail.com; Liu, Hongtao 
; Richard Sandiford 
Subject: Re: [PATCH v2] VECT: Remove the type size restriction of vectorizer



> Am 26.10.2023 um 13:59 schrieb Li, Pan2 :
> 
> Thanks Richard for comments.
> 
>> Can you explain why this is necessary?  In particular what is lhs_rtx
>> mode vs ops[0].value mode?
> 
> For testcase gcc.target/aarch64/sve/popcount_1.c, the rtl are list as below.
> 
> The lhs_rtx is (reg:VNx2SI 98 [ vect__5.36 ]).
> The ops[0].value is (reg:VNx2DI 104).
> 
> The restriction removing make the vector rtl enter expand_fn_using_insn and 
> of course hit the INTEGER_P assertion.

But I think this shows we mid-selected the optab, a convert_move is certainly 
not correct unconditionally here (the target might not support that)

> Pan
> 
> -Original Message-
> From: Richard Biener  
> Sent: Thursday, October 26, 2023 4:38 PM
> To: Li, Pan2 
> Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; Wang, Yanzhang 
> ; kito.ch...@gmail.com; Liu, Hongtao 
> ; Richard Sandiford 
> Subject: Re: [PATCH v2] VECT: Remove the type size restriction of vectorizer
> 
>> On Thu, Oct 26, 2023 at 4:18 AM  wrote:
>> 
>> From: Pan Li 
>> 
>> Update in v2:
>> 
>> * Fix one ICE of type assertion.
>> * Adjust some test cases for aarch64 sve and riscv vector.
>> 
>> Original log:
>> 
>> The vectoriable_call has one restriction of the size of data type.
>> Aka DF to DI is allowed but SF to DI isn't. You may see below message
>> when try to vectorize function call like lrintf.
>> 
>> void
>> test_lrintf (long *out, float *in, unsigned count)
>> {
>>  for (unsigned i = 0; i < count; i++)
>>out[i] = __builtin_lrintf (in[i]);
>> }
>> 
>> lrintf.c:5:26: missed: couldn't vectorize loop
>> lrintf.c:5:26: missed: not vectorized: unsupported data-type
>> 
>> Then the standard name pattern like lrintmn2 cannot work for different
>> data type size like SF => DI. This patch would like to remove this data
>> type size check and unblock the standard name like lrintmn2.
>> 
>> The below test are passed for this patch.
>> 
>> * The x86 bootstrap and regression test.
>> * The aarch64 regression test.
>> * The risc-v regression tests.
>> 
>> gcc/ChangeLog:
>> 
>>* internal-fn.cc (expand_fn_using_insn): Add vector int assertion.
>>* tree-vect-stmts.cc (vectorizable_call): Remove size check.
>> 
>> gcc/testsuite/ChangeLog:
>> 
>>* gcc.target/aarch64/sve/clrsb_1.c: Adjust checker.
>>* gcc.target/aarch64/sve/clz_1.c: Ditto.
>>* gcc.target/aarch64/sve/popcount_1.c: Ditto.
>>* gcc.target/riscv/rvv/autovec/unop/popcount.c: Ditto.
>> 
>> Signed-off-by: Pan Li 
>> ---
>> gcc/internal-fn.cc  |  3 ++-
>> gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c  |  3 +--
>> gcc/testsuite/gcc.target/aarch64/sve/clz_1.c|  3 +--
>> gcc/testsuite/gcc.target/aarch64/sve/popcount_1.c   |  3 +--
>> .../gcc.target/riscv/rvv/autovec/unop/popcount.c|  2 +-
>> gcc/tree-vect-stmts.cc  | 13 -
>> 6 files changed, 6 insertions(+), 21 deletions(-)
>> 
>> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
>> index 61d5a9e4772..17c0f4c3805 100644
>> --- a/gcc/internal-fn.cc
>> +++ b/gcc/internal-fn.cc
>> @@ -281,7 +281,8 @@ expand_fn_using_insn (gcall *stmt, insn_code icode, 
>> unsigned int noutputs,
>>emit_move_insn (lhs_rtx, ops[0].value);
>>   else
>>{
>> - gcc_checking_assert (INTEGRAL_TYPE_P (TREE_TYPE (lhs)));
>> + gcc_checking_assert (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
>> +  || VECTOR_INTEGER_TYPE_P (TREE_TYPE (lhs)));
> 
> Can you explain why this is necessary?  In particular what is lhs_rtx
> mode vs ops[0].value mode?
> 
>>  convert_move (lhs_rtx, ops[0].value, 0);
> 
> I'm not sure convert_move handles vector modes correctly.  Richard
> probably added this code, CCed.
> 
> Richard.
> 
>>}
>> }
>> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c 
>> b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c
>> index bdc9856faaf..940d08bbc7b 100644
>> -

Re: [PATCH V2 2/7] aarch64: Add support for aarch64-sys-regs.def

2023-10-26 Thread Victor Do Nascimento





On 10/18/23 22:07, Richard Sandiford wrote:

Victor Do Nascimento  writes:

This patch defines the structure of a new .def file used for
representing the aarch64 system registers, what information it should
hold and the basic framework in GCC to process this file.

Entries in the aarch64-system-regs.def file should be as follows:

   SYSREG (NAME, CPENC (sn,op1,cn,cm,op2), FLAG1 | ... | FLAGn, ARCH)

Where the arguments to SYSREG correspond to:
   - NAME:  The system register name, as used in the assembly language.
   - CPENC: The system register encoding, mapping to:

   s__c_c_

   - FLAG: The entries in the FLAGS field are bitwise-OR'd together to
  encode extra information required to ensure proper use of
  the system register.  For example, a read-only system
  register will have the flag F_REG_READ, while write-only
  registers will be labeled F_REG_WRITE.  Such flags are
  tested against at compile-time.
   - ARCH: The architectural features the system register is associated
  with.  This is encoded via one of three possible macros:
  1. When a system register is universally implemented, we say
  it has no feature requirements, so we tag it with the
  AARCH64_NO_FEATURES macro.
  2. When a register is only implemented for a single
  architectural extension EXT, the AARCH64_FEATURE (EXT), is
  used.
  3. When a given system register is made available by any of N
  possible architectural extensions, the AARCH64_FEATURES(N, ...)
  macro is used to combine them accordingly.

In order to enable proper interpretation of the SYSREG entries by the
compiler, flags defining system register behavior such as `F_REG_READ'
and `F_REG_WRITE' are also defined here, so they can later be used for
the validation of system register properties.

Finally, any architectural feature flags from Binutils missing from GCC
have appropriate aliases defined here so as to ensure
cross-compatibility of SYSREG entries across the toolchain.

gcc/ChangeLog:

* gcc/config/aarch64/aarch64.cc (sysreg_t): New.
(sysreg_structs): Likewise.
(nsysreg): Likewise.
(AARCH64_FEATURE): Likewise.
(AARCH64_FEATURES): Likewise.
(AARCH64_NO_FEATURES): Likewise.
* gcc/config/aarch64/aarch64.h (AARCH64_ISA_V8A): Add missing
ISA flag.
(AARCH64_ISA_V8_1A): Likewise.
(AARCH64_ISA_V8_7A): Likewise.
(AARCH64_ISA_V8_8A): Likewise.
(AARCH64_NO_FEATURES): Likewise.
(AARCH64_FL_RAS): New ISA flag alias.
(AARCH64_FL_LOR): Likewise.
(AARCH64_FL_PAN): Likewise.
(AARCH64_FL_AMU): Likewise.
(AARCH64_FL_SCXTNUM): Likewise.
(AARCH64_FL_ID_PFR2): Likewise.
(F_DEPRECATED): New.
(F_REG_READ): Likewise.
(F_REG_WRITE): Likewise.
(F_ARCHEXT): Likewise.
(F_REG_ALIAS): Likewise.
---
  gcc/config/aarch64/aarch64.cc | 38 +++
  gcc/config/aarch64/aarch64.h  | 36 +
  2 files changed, 74 insertions(+)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 9fbfc548a89..69de2366424 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -2807,6 +2807,44 @@ static const struct processor all_cores[] =
{NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL}
  };
  
+typedef struct {

+  const char* name;
+  const char* encoding;


Formatting nit, but GCC style is:

   const char *foo

rather than:

   const char* foo;


+  const unsigned properties;
+  const unsigned long long arch_reqs;


I don't think these two should be const.  There's no reason in principle
why a sysreg_t can't be created and modified dynamically.

It would be useful to have some comments above the fields to say what
they represent.  E.g. the definition on its own doesn't make clear what
"properties" refers to.

arch_reqs should use aarch64_feature_flags rather than unsigned long long.
We're running out of feature flags in GCC too, so aarch64_feature_flags
is soon likely to be a C++ class.


+} sysreg_t;
+
+/* An aarch64_feature_set initializer for a single feature,
+   AARCH64_FEATURE_.  */
+#define AARCH64_FEATURE(FEAT) AARCH64_FL_##FEAT
+
+/* Used by AARCH64_FEATURES.  */
+#define AARCH64_OR_FEATURES_1(X, F1) \
+  AARCH64_FEATURE (F1)
+#define AARCH64_OR_FEATURES_2(X, F1, F2) \
+  (AARCH64_FEATURE (F1) | AARCH64_OR_FEATURES_1 (X, F2))
+#define AARCH64_OR_FEATURES_3(X, F1, ...) \
+  (AARCH64_FEATURE (F1) | AARCH64_OR_FEATURES_2 (X, __VA_ARGS__))
+
+/* An aarch64_feature_set initializer for the N features listed in "...".  */
+#define AARCH64_FEATURES(N, ...) \
+  AARCH64_OR_FEATURES_##N (0, __VA_ARGS__)
+
+/* Database of system registers, their encodings and architectural
+   requirements.  */
+const sysreg_t sysreg_structs[] =
+{
+#define CPENC(SN, OP1, CN, CM, OP2) "s"#SN"_

Re: HELP: Will the reordering happen? Re: [V3][PATCH 0/3] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2023-10-26 Thread Qing Zhao



> On Oct 26, 2023, at 4:56 AM, Richard Biener  
> wrote:
> 
> On Thu, Oct 26, 2023 at 7:22 AM Jakub Jelinek  wrote:
>> 
>> On Wed, Oct 25, 2023 at 07:03:43PM +, Qing Zhao wrote:
>>> For the code generation impact:
>>> 
>>> turning the original  x.buf
>>> to a builtin function call
>>> __builtin_with_access_and_size(x,buf, x.L,-1)
>>> 
>>> might inhibit some optimizations from happening before the builtin is
>>> evaluated into object size info (phase  .objsz1).  I guess there might be
>>> some performance impact.
>>> 
>>> However, if we mark this builtin as PURE, NOTRROW, etc, then the negative
>>> performance impact will be reduced to minimum?
>> 
>> You can't drop it during objsz1 pass though, otherwise __bdos wouldn't
>> be able to figure out the dynamic sizes in case of normal (non-early)
>> inlining - caller takes address of a counted_by array, passes it down
>> to callee which is only inlined late and uses __bdos, or callee takes address
>> and returns it and caller uses __bdos, etc. - so it would need to be objsz2.
>> 
>> And while the builtin (or if it is an internal detail rather than user
>> accessible builtin an internal function) could be even const/nothrow/leaf if
>> the arguments contain the loads from the structure 2 fields, I'm afraid it
>> will still have huge code generation impact, prevent tons of pre-IPA
>> optimizations.  And it will need some work to handle it properly during
>> inlining heuristics, because in GIMPLE the COMPONENT_REF loads aren't gimple
>> values, so it wouldn't be just the builtin/internal-fn call to be ignored,
>> but also the count load from memory.
> 
> I think we want to track the value, not the "memory" in the builtin call,
> so GIMPLE would be
> 
> _1 = x.L;
> .. = __builtin_with_access_and_size (&x.buf, _1, -1);

Before adding the __builtin_with_access_and_size, the code is:

&x.buf

After inserting the built-in, it becomes:

_1 = x.L;
__builtin_with_access_and_size (&x.buf, _1, -1).


So, the # of total instructions, the # of LOADs, and the # of calls will all be 
increased.
There will be impact to the inlining decision definitely.

> 
> also please make sure to use an internal function for
> __builtin_with_access_and_size,
> I don't think we want to expose this to users - it's an implementation detail.

Okay, will define it as an internal function (add it to internal-fn.def). -:)

Qing
> 
> Richard.
> 
>> 
>>Jakub
>>

Re: [PATCH V2 5/7] aarch64: Implement system register r/w arm ACLE intrinsic functions

2023-10-26 Thread Victor Do Nascimento





On 10/18/23 21:39, Richard Sandiford wrote:

Victor Do Nascimento  writes:

Implement the aarch64 intrinsics for reading and writing system
registers with the following signatures:

uint32_t __arm_rsr(const char *special_register);
uint64_t __arm_rsr64(const char *special_register);
void* __arm_rsrp(const char *special_register);
float __arm_rsrf(const char *special_register);
double __arm_rsrf64(const char *special_register);
void __arm_wsr(const char *special_register, uint32_t value);
void __arm_wsr64(const char *special_register, uint64_t value);
void __arm_wsrp(const char *special_register, const void *value);
void __arm_wsrf(const char *special_register, float value);
void __arm_wsrf64(const char *special_register, double value);

gcc/ChangeLog:

* gcc/config/aarch64/aarch64-builtins.cc (enum aarch64_builtins):
Add enums for new builtins.
(aarch64_init_rwsr_builtins): New.
(aarch64_general_init_builtins): Call aarch64_init_rwsr_builtins.
(aarch64_expand_rwsr_builtin):  New.
(aarch64_general_expand_builtin): Call aarch64_general_expand_builtin.
* gcc/config/aarch64/aarch64.md (read_sysregdi): New insn_and_split.
(write_sysregdi): Likewise.
* gcc/config/aarch64/arm_acle.h (__arm_rsr): New.
(__arm_rsrp): Likewise.
(__arm_rsr64): Likewise.
(__arm_rsrf): Likewise.
(__arm_rsrf64): Likewise.
(__arm_wsr): Likewise.
(__arm_wsrp): Likewise.
(__arm_wsr64): Likewise.
(__arm_wsrf): Likewise.
(__arm_wsrf64): Likewise.

gcc/testsuite/ChangeLog:

* gcc/testsuite/gcc.target/aarch64/acle/rwsr.c: New.
* gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c: Likewise.
---
  gcc/config/aarch64/aarch64-builtins.cc| 200 ++
  gcc/config/aarch64/aarch64.md |  17 ++
  gcc/config/aarch64/arm_acle.h |  30 +++
  .../gcc.target/aarch64/acle/rwsr-1.c  |  20 ++
  gcc/testsuite/gcc.target/aarch64/acle/rwsr.c  | 144 +
  5 files changed, 411 insertions(+)
  create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c
  create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rwsr.c

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 04f59fd9a54..d8bb2a989a5 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -808,6 +808,17 @@ enum aarch64_builtins
AARCH64_RBIT,
AARCH64_RBITL,
AARCH64_RBITLL,
+  /* System register builtins.  */
+  AARCH64_RSR,
+  AARCH64_RSRP,
+  AARCH64_RSR64,
+  AARCH64_RSRF,
+  AARCH64_RSRF64,
+  AARCH64_WSR,
+  AARCH64_WSRP,
+  AARCH64_WSR64,
+  AARCH64_WSRF,
+  AARCH64_WSRF64,
AARCH64_BUILTIN_MAX
  };
  
@@ -1798,6 +1809,65 @@ aarch64_init_rng_builtins (void)

   AARCH64_BUILTIN_RNG_RNDRRS);
  }
  
+/* Add builtins for reading system register.  */

+static void
+aarch64_init_rwsr_builtins (void)
+{
+  tree fntype = NULL;
+  tree const_char_ptr_type
+= build_pointer_type (build_type_variant (char_type_node, true, false));
+
+#define AARCH64_INIT_RWSR_BUILTINS_DECL(F, N, T) \
+  aarch64_builtin_decls[AARCH64_##F] \
+= aarch64_general_add_builtin ("__builtin_aarch64_"#N, T, AARCH64_##F);
+
+  fntype
+= build_function_type_list (uint32_type_node, const_char_ptr_type, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (RSR, rsr, fntype);
+
+  fntype
+= build_function_type_list (ptr_type_node, const_char_ptr_type, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (RSRP, rsrp, fntype);
+
+  fntype
+= build_function_type_list (uint64_type_node, const_char_ptr_type, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (RSR64, rsr64, fntype);
+
+  fntype
+= build_function_type_list (float_type_node, const_char_ptr_type, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF, rsrf, fntype);
+
+  fntype
+= build_function_type_list (double_type_node, const_char_ptr_type, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF64, rsrf64, fntype);
+
+  fntype
+= build_function_type_list (void_type_node, const_char_ptr_type,
+   uint32_type_node, NULL);
+
+  AARCH64_INIT_RWSR_BUILTINS_DECL (WSR, wsr, fntype);
+
+  fntype
+= build_function_type_list (void_type_node, const_char_ptr_type,
+   const_ptr_type_node, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (WSRP, wsrp, fntype);
+
+  fntype
+= build_function_type_list (void_type_node, const_char_ptr_type,
+   uint64_type_node, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (WSR64, wsr64, fntype);
+
+  fntype
+= build_function_type_list (void_type_node, const_char_ptr_type,
+   float_type_node, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF, wsrf, fntype);
+
+  fntype
+= build_function_type_list (void_type_node, const_cha

[PATCH v4] bpf: Improvements in CO-RE builtins implementation.

2023-10-26 Thread Cupertino Miranda


Changes from v1:
 - Fixed Davids remarks on initial patch.
 - Fixed mistake with deleted '*'.

Changes from v2:
 - Reversed return value for bpf_const_not_ok_for_debug_p function.

Changes from v3:
 - Fixed ICE in two bpf-next tests:
 -  if (!wi->is_lhs)
 -   core_mark_as_access_index (gimple_get_lhs (wi->stmt));
 +  tree lhs;
 +  if (!wi->is_lhs
 + && (lhs = gimple_get_lhs (wi->stmt)) != NULL_TREE)
 +   core_mark_as_access_index (lhs);

commit b525feaeb159f55c2a6db1cb4246bd027351f2c5
Author: Cupertino Miranda 
Date:   Tue Aug 8 09:22:41 2023 +0100

bpf: Improvements in CO-RE builtins implementation.

This patch moved the processing of attribute preserve_access_index to
its own independent pass in a gimple lowering pass.
This approach is more consistent with the implementation of the CO-RE
builtins when used explicitly in the code.  The attributed type accesses
are now early converted to __builtin_core_reloc builtin instead of being
kept as an expression in code through out all of the middle-end.
This disables the compiler to optimize out or manipulate the expression
using the local defined type, instead of assuming nothing is known about
this expression, as it should be the case in all of the CO-RE
relocations.

In the process, also the __builtin_preserve_access_index has been
improved to generate code for more complex expressions that would
require more then one CO-RE relocation.
This turned out to be a requirement, since bpf-next selftests would rely on
loop unrolling in order to convert an undefined index array access into a
defined one. This seemed extreme to expect for the unroll to happen, and for
that reason GCC still generates correct code in such scenarios, even when index
access is never predictable or unrolling does not occur.

gcc/ChangeLog:
* config/bpf/bpf-passes.def (pass_lower_bpf_core): Added pass.
* config/bpf/bpf-protos.h: Added prototype for new pass.
* config/bpf/bpf.cc (bpf_const_not_ok_for_debug_p): New function.
* config/bpf/bpf.md (mov_reloc_core): Prefixed
name with '*'.
* config/bpf/core-builtins.cc (cr_builtins) Added access_node to
struct.
(is_attr_preserve_access): Improved check.
(core_field_info): Make use of root_for_core_field_info
function.
(process_field_expr): Adapted to new functions.
(pack_type): Small improvement.
(bpf_handle_plugin_finish_type): Adapted to GTY(()).
(bpf_init_core_builtins): Changed to new function names.
(construct_builtin_core_reloc): Improved implementation.
(bpf_resolve_overloaded_core_builtin): Changed how
__builtin_preserve_access_index is converted.
(compute_field_expr): Corrected implementation. Added
access_node argument.
(bpf_core_get_index): Added valid argument.
(root_for_core_field_info, pack_field_expr)
(core_expr_with_field_expr_plus_base, make_core_safe_access_index)
(replace_core_access_index_comp_expr, maybe_get_base_for_field_expr)
(core_access_clean, core_is_access_index, core_mark_as_access_index)
(make_gimple_core_safe_access_index, execute_lower_bpf_core)
(make_pass_lower_bpf_core): Added functions.
(pass_data_lower_bpf_core): New pass struct.
(pass_lower_bpf_core): New gimple_opt_pass class.
(pack_field_expr_for_preserve_field)
(bpf_replace_core_move_operands): Removed function.
(bpf_enum_value_kind): Added GTY(()).
* config/bpf/core-builtins.h (bpf_field_info_kind, bpf_type_id_kind)
(bpf_type_info_kind, bpf_enum_value_kind): New enum.
* config/bpf/t-bpf: Added pass bpf-passes.def to PASSES_EXTRA.

gcc/testsuite/ChangeLog:
* gcc.target/bpf/core-attr-5.c: New test.
* gcc.target/bpf/core-attr-6.c: New test.
* gcc.target/bpf/core-builtin-1.c: Corrected
* gcc.target/bpf/core-builtin-enumvalue-opt.c: Corrected regular
expression.
* gcc.target/bpf/core-builtin-enumvalue.c: Corrected regular
expression.
* gcc.target/bpf/core-builtin-exprlist-1.c: New test.
* gcc.target/bpf/core-builtin-exprlist-2.c: New test.
* gcc.target/bpf/core-builtin-exprlist-3.c: New test.
* gcc.target/bpf/core-builtin-exprlist-4.c: New test.
* gcc.target/bpf/core-builtin-fieldinfo-offset-1.c: Extra tests

diff --git a/gcc/config/bpf/bpf-passes.def b/gcc/config/bpf/bpf-passes.def
new file mode 100644
index ..0ec20eac965d
--- /dev/null
+++ b/gcc/config/bpf/bpf-passes.def
@@ -0,0 +1,20 @@
+/* Declaration of target-specific passes for eBPF.
+   Copyright (C) 2

[pushed] Darwin: Make metadata symbol lables linker-visible for GNU objc.

2023-10-26 Thread Iain Sandoe

Tested on x86_64-darwin, x86_64-linux-gnu, pushed to trunk, thanks
Iain

--- 8< ---

Now we have shifted to using the same relocation mechanism as clang for
objective-c typeinfo the static linker needs to have a linker-visible
symbol for metadata names (this is only needed for GNU objective C, for
NeXT the names are in separate sections).

gcc/ChangeLog:

* config/darwin.h
(darwin_label_is_anonymous_local_objc_name): Make metadata names
linker-visibile for GNU objective C.

Signed-off-by: Iain Sandoe 
---
 gcc/config/darwin.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index b43c8ce97a8..5db64a1ad68 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -868,7 +868,7 @@ int darwin_label_is_anonymous_local_objc_name (const char 
*name);
else if (xname[0] == '+' || xname[0] == '-') \
  fprintf (FILE, "\"%s\"", xname);   \
else if (darwin_label_is_anonymous_local_objc_name (xname))  \
- fprintf (FILE, "L%s", xname);  \
+   fprintf (FILE, "%c%s", flag_next_runtime ? 'L' : 'l', xname);\
else if (xname[0] != '"' && name_needs_quotes (xname))   \
 asm_fprintf (FILE, "\"%U%s\"", xname);  \
else \
-- 
2.39.2 (Apple Git-143)

Re: [PATCH V2 7/7] aarch64: Add system register duplication check selftest

2023-10-26 Thread Victor Do Nascimento





On 10/18/23 22:30, Richard Sandiford wrote:

Victor Do Nascimento  writes:

Add a build-time test to check whether system register data, as
imported from `aarch64-sys-reg.def' has any duplicate entries.

Duplicate entries are defined as any two SYSREG entries in the .def
file which share the same encoding values (as specified by its `CPENC'
field) and where the relationship amongst the two does not fit into
one of the following categories:

* Simple aliasing: In some cases, it is observed that one
register name serves as an alias to another.  One example of
this is where TRCEXTINSELR aliases TRCEXTINSELR0.
* Expressing intent: It is possible that when a given register
serves two distinct functions depending on how it is used, it
is given two distinct names whose use should match the context
under which it is being used.  Example:  Debug Data Transfer
Register. When used to receive data, it should be accessed as
DBGDTRRX_EL0 while when transmitting data it should be
accessed via DBGDTRTX_EL0.
* Register depreciation: Some register names have been
deprecated and should no longer be used, but backwards-
compatibility requires that such names continue to be
recognized, as is the case for the SPSR_EL1 register, whose
access via the SPSR_SVC name is now deprecated.
* Same encoding different target: Some encodings are given
different meaning depending on the target architecture and, as
such, are given different names in each of theses contexts.
We see an example of this for CPENC(3,4,2,0,0), which
corresponds to TTBR0_EL2 for Armv8-A targets and VSCTLR_EL2
in Armv8-R targets.

A consequence of these observations is that `CPENC' duplication is
acceptable iff at least one of the `properties' or `arch_reqs' fields
of the `sysreg_t' structs associated with the two registers in
question differ and it's this condition that is checked by the new
`aarch64_test_sysreg_encoding_clashes' function.

gcc/ChangeLog:

* gcc/config/aarch64/aarch64.cc
(aarch64_test_sysreg_encoding_clashes): New.
(aarch64_run_selftests): add call to
aarch64_test_sysreg_encoding_clashes selftest.
---
  gcc/config/aarch64/aarch64.cc | 53 +++
  1 file changed, 53 insertions(+)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index d187e171beb..e0be2877ede 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -22,6 +22,7 @@
  
  #define INCLUDE_STRING

  #define INCLUDE_ALGORITHM
+#define INCLUDE_VECTOR
  #include "config.h"
  #include "system.h"
  #include "coretypes.h"
@@ -28332,6 +28333,57 @@ aarch64_test_fractional_cost ()
ASSERT_EQ (cf (1, 2).as_double (), 0.5);
  }
  
+/* Calculate whether our system register data, as imported from

+   `aarch64-sys-reg.def' has any duplicate entries.  */
+static void
+aarch64_test_sysreg_encoding_clashes (void)
+{
+  using dup_counters_t = hash_map;
+  using dup_instances_t = hash_map>;
+
+  dup_counters_t duplicate_counts;
+  dup_instances_t duplicate_instances;
+
+  /* Every time an encoding is established to come up more than once
+  we add it to a "clash-analysis queue", which is then used to extract
+  necessary information from our hash map when establishing whether
+  repeated encodings are valid.  */


Formatting nit, sorry, but second and subsequent lines should be
indented to line up with the "E".


+
+  /* 1) Collect recurrence information.  */
+  std::vector testqueue;
+
+  for (unsigned i = 0; i < nsysreg; i++)
+{
+  const sysreg_t *reg = sysreg_structs + i;
+
+  unsigned *tbl_entry = &duplicate_counts.get_or_insert (reg->encoding);
+  *tbl_entry += 1;
+
+  std::vector *tmp
+   = &duplicate_instances.get_or_insert (reg->encoding);
+
+  tmp->push_back (reg);
+  if (*tbl_entry > 1)
+ testqueue.push_back (reg->encoding);
+}


Do we need two hash maps here?  It looks like the length of the vector
is always equal to the count.  Also...



You're right.  Addressed in next iteration of patch series.


+
+  /* 2) Carry out analysis on collected data.  */
+  for (auto enc : testqueue)


...hash_map itself is iterable.  We could iterate over that instead,
which would avoid the need for the queue.



My rationale here is that I prefer to take up the extra little bit of 
memory to save on execution time.


`duplicate_instances' is an iterable of vectors, with one such vector 
for each encountered encoding value, irrespective of whether or not that 
encoding is duplicated.  Thus to iterate over this, we'd have to 1. 
iterate through every possible vector and 2. check each one's length. 
By having our `testqueue', we know immediately which encodings have 
duplicate sysreg entries and thus we can jump immediately to analyzing 
those and only those.


Many thanks,
V.



+{
+

Re: [PATCH V2 2/7] aarch64: Add support for aarch64-sys-regs.def

2023-10-26 Thread Richard Sandiford

Thanks for the updates.

Victor Do Nascimento  writes:
> On 10/18/23 22:07, Richard Sandiford wrote:
>> Victor Do Nascimento  writes:
>>> This patch defines the structure of a new .def file used for
>>> representing the aarch64 system registers, what information it should
>>> hold and the basic framework in GCC to process this file.
>>>
>>> Entries in the aarch64-system-regs.def file should be as follows:
>>>
>>>SYSREG (NAME, CPENC (sn,op1,cn,cm,op2), FLAG1 | ... | FLAGn, ARCH)
>>>
>>> Where the arguments to SYSREG correspond to:
>>>- NAME:  The system register name, as used in the assembly language.
>>>- CPENC: The system register encoding, mapping to:
>>>
>>>s__c_c_
>>>
>>>- FLAG: The entries in the FLAGS field are bitwise-OR'd together to
>>>   encode extra information required to ensure proper use of
>>>   the system register.  For example, a read-only system
>>>   register will have the flag F_REG_READ, while write-only
>>>   registers will be labeled F_REG_WRITE.  Such flags are
>>>   tested against at compile-time.
>>>- ARCH: The architectural features the system register is associated
>>>   with.  This is encoded via one of three possible macros:
>>>   1. When a system register is universally implemented, we say
>>>   it has no feature requirements, so we tag it with the
>>>   AARCH64_NO_FEATURES macro.
>>>   2. When a register is only implemented for a single
>>>   architectural extension EXT, the AARCH64_FEATURE (EXT), is
>>>   used.
>>>   3. When a given system register is made available by any of N
>>>   possible architectural extensions, the AARCH64_FEATURES(N, ...)
>>>   macro is used to combine them accordingly.
>>>
>>> In order to enable proper interpretation of the SYSREG entries by the
>>> compiler, flags defining system register behavior such as `F_REG_READ'
>>> and `F_REG_WRITE' are also defined here, so they can later be used for
>>> the validation of system register properties.
>>>
>>> Finally, any architectural feature flags from Binutils missing from GCC
>>> have appropriate aliases defined here so as to ensure
>>> cross-compatibility of SYSREG entries across the toolchain.
>>>
>>> gcc/ChangeLog:
>>>
>>> * gcc/config/aarch64/aarch64.cc (sysreg_t): New.
>>> (sysreg_structs): Likewise.
>>> (nsysreg): Likewise.
>>> (AARCH64_FEATURE): Likewise.
>>> (AARCH64_FEATURES): Likewise.
>>> (AARCH64_NO_FEATURES): Likewise.
>>> * gcc/config/aarch64/aarch64.h (AARCH64_ISA_V8A): Add missing
>>> ISA flag.
>>> (AARCH64_ISA_V8_1A): Likewise.
>>> (AARCH64_ISA_V8_7A): Likewise.
>>> (AARCH64_ISA_V8_8A): Likewise.
>>> (AARCH64_NO_FEATURES): Likewise.
>>> (AARCH64_FL_RAS): New ISA flag alias.
>>> (AARCH64_FL_LOR): Likewise.
>>> (AARCH64_FL_PAN): Likewise.
>>> (AARCH64_FL_AMU): Likewise.
>>> (AARCH64_FL_SCXTNUM): Likewise.
>>> (AARCH64_FL_ID_PFR2): Likewise.
>>> (F_DEPRECATED): New.
>>> (F_REG_READ): Likewise.
>>> (F_REG_WRITE): Likewise.
>>> (F_ARCHEXT): Likewise.
>>> (F_REG_ALIAS): Likewise.
>>> ---
>>>   gcc/config/aarch64/aarch64.cc | 38 +++
>>>   gcc/config/aarch64/aarch64.h  | 36 +
>>>   2 files changed, 74 insertions(+)
>>>
>>> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
>>> index 9fbfc548a89..69de2366424 100644
>>> --- a/gcc/config/aarch64/aarch64.cc
>>> +++ b/gcc/config/aarch64/aarch64.cc
>>> @@ -2807,6 +2807,44 @@ static const struct processor all_cores[] =
>>> {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL}
>>>   };
>>>   
>>> +typedef struct {
>>> +  const char* name;
>>> +  const char* encoding;
>> 
>> Formatting nit, but GCC style is:
>> 
>>const char *foo
>> 
>> rather than:
>> 
>>const char* foo;
>> 
>>> +  const unsigned properties;
>>> +  const unsigned long long arch_reqs;
>> 
>> I don't think these two should be const.  There's no reason in principle
>> why a sysreg_t can't be created and modified dynamically.
>> 
>> It would be useful to have some comments above the fields to say what
>> they represent.  E.g. the definition on its own doesn't make clear what
>> "properties" refers to.
>> 
>> arch_reqs should use aarch64_feature_flags rather than unsigned long long.
>> We're running out of feature flags in GCC too, so aarch64_feature_flags
>> is soon likely to be a C++ class.
>> 
>>> +} sysreg_t;
>>> +
>>> +/* An aarch64_feature_set initializer for a single feature,
>>> +   AARCH64_FEATURE_.  */
>>> +#define AARCH64_FEATURE(FEAT) AARCH64_FL_##FEAT
>>> +
>>> +/* Used by AARCH64_FEATURES.  */
>>> +#define AARCH64_OR_FEATURES_1(X, F1) \
>>> +  AARCH64_FEATURE (F1)
>>> +#define AARCH64_OR_FEATURES_2(X, F1, F2) \
>>> +  (AARCH64_FEATURE (F1) | AARCH64_OR_FEATURES_1 (X, F2))
>>> +#define AARCH64_OR_FEATURES_3(X, F1, ...) \
>>> +  (AARCH64_FEATURE (F1) | AARCH64_OR_FEATURE

Re: [PATCH V2 5/7] aarch64: Implement system register r/w arm ACLE intrinsic functions

2023-10-26 Thread Richard Sandiford

Victor Do Nascimento  writes:
> On 10/18/23 21:39, Richard Sandiford wrote:
>> Victor Do Nascimento  writes:
>>> Implement the aarch64 intrinsics for reading and writing system
>>> registers with the following signatures:
>>>
>>> uint32_t __arm_rsr(const char *special_register);
>>> uint64_t __arm_rsr64(const char *special_register);
>>> void* __arm_rsrp(const char *special_register);
>>> float __arm_rsrf(const char *special_register);
>>> double __arm_rsrf64(const char *special_register);
>>> void __arm_wsr(const char *special_register, uint32_t value);
>>> void __arm_wsr64(const char *special_register, uint64_t value);
>>> void __arm_wsrp(const char *special_register, const void *value);
>>> void __arm_wsrf(const char *special_register, float value);
>>> void __arm_wsrf64(const char *special_register, double value);
>>>
>>> gcc/ChangeLog:
>>>
>>> * gcc/config/aarch64/aarch64-builtins.cc (enum aarch64_builtins):
>>> Add enums for new builtins.
>>> (aarch64_init_rwsr_builtins): New.
>>> (aarch64_general_init_builtins): Call aarch64_init_rwsr_builtins.
>>> (aarch64_expand_rwsr_builtin):  New.
>>> (aarch64_general_expand_builtin): Call aarch64_general_expand_builtin.
>>> * gcc/config/aarch64/aarch64.md (read_sysregdi): New insn_and_split.
>>> (write_sysregdi): Likewise.
>>> * gcc/config/aarch64/arm_acle.h (__arm_rsr): New.
>>> (__arm_rsrp): Likewise.
>>> (__arm_rsr64): Likewise.
>>> (__arm_rsrf): Likewise.
>>> (__arm_rsrf64): Likewise.
>>> (__arm_wsr): Likewise.
>>> (__arm_wsrp): Likewise.
>>> (__arm_wsr64): Likewise.
>>> (__arm_wsrf): Likewise.
>>> (__arm_wsrf64): Likewise.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> * gcc/testsuite/gcc.target/aarch64/acle/rwsr.c: New.
>>> * gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c: Likewise.
>>> ---
>>>   gcc/config/aarch64/aarch64-builtins.cc| 200 ++
>>>   gcc/config/aarch64/aarch64.md |  17 ++
>>>   gcc/config/aarch64/arm_acle.h |  30 +++
>>>   .../gcc.target/aarch64/acle/rwsr-1.c  |  20 ++
>>>   gcc/testsuite/gcc.target/aarch64/acle/rwsr.c  | 144 +
>>>   5 files changed, 411 insertions(+)
>>>   create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c
>>>   create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rwsr.c
>>>
>>> diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
>>> b/gcc/config/aarch64/aarch64-builtins.cc
>>> index 04f59fd9a54..d8bb2a989a5 100644
>>> --- a/gcc/config/aarch64/aarch64-builtins.cc
>>> +++ b/gcc/config/aarch64/aarch64-builtins.cc
>>> @@ -808,6 +808,17 @@ enum aarch64_builtins
>>> AARCH64_RBIT,
>>> AARCH64_RBITL,
>>> AARCH64_RBITLL,
>>> +  /* System register builtins.  */
>>> +  AARCH64_RSR,
>>> +  AARCH64_RSRP,
>>> +  AARCH64_RSR64,
>>> +  AARCH64_RSRF,
>>> +  AARCH64_RSRF64,
>>> +  AARCH64_WSR,
>>> +  AARCH64_WSRP,
>>> +  AARCH64_WSR64,
>>> +  AARCH64_WSRF,
>>> +  AARCH64_WSRF64,
>>> AARCH64_BUILTIN_MAX
>>>   };
>>>   
>>> @@ -1798,6 +1809,65 @@ aarch64_init_rng_builtins (void)
>>>AARCH64_BUILTIN_RNG_RNDRRS);
>>>   }
>>>   
>>> +/* Add builtins for reading system register.  */
>>> +static void
>>> +aarch64_init_rwsr_builtins (void)
>>> +{
>>> +  tree fntype = NULL;
>>> +  tree const_char_ptr_type
>>> += build_pointer_type (build_type_variant (char_type_node, true, 
>>> false));
>>> +
>>> +#define AARCH64_INIT_RWSR_BUILTINS_DECL(F, N, T) \
>>> +  aarch64_builtin_decls[AARCH64_##F] \
>>> += aarch64_general_add_builtin ("__builtin_aarch64_"#N, T, AARCH64_##F);
>>> +
>>> +  fntype
>>> += build_function_type_list (uint32_type_node, const_char_ptr_type, 
>>> NULL);
>>> +  AARCH64_INIT_RWSR_BUILTINS_DECL (RSR, rsr, fntype);
>>> +
>>> +  fntype
>>> += build_function_type_list (ptr_type_node, const_char_ptr_type, NULL);
>>> +  AARCH64_INIT_RWSR_BUILTINS_DECL (RSRP, rsrp, fntype);
>>> +
>>> +  fntype
>>> += build_function_type_list (uint64_type_node, const_char_ptr_type, 
>>> NULL);
>>> +  AARCH64_INIT_RWSR_BUILTINS_DECL (RSR64, rsr64, fntype);
>>> +
>>> +  fntype
>>> += build_function_type_list (float_type_node, const_char_ptr_type, 
>>> NULL);
>>> +  AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF, rsrf, fntype);
>>> +
>>> +  fntype
>>> += build_function_type_list (double_type_node, const_char_ptr_type, 
>>> NULL);
>>> +  AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF64, rsrf64, fntype);
>>> +
>>> +  fntype
>>> += build_function_type_list (void_type_node, const_char_ptr_type,
>>> +   uint32_type_node, NULL);
>>> +
>>> +  AARCH64_INIT_RWSR_BUILTINS_DECL (WSR, wsr, fntype);
>>> +
>>> +  fntype
>>> += build_function_type_list (void_type_node, const_char_ptr_type,
>>> +   const_ptr_type_node, NULL);
>>> +  AARCH64_INIT_RWSR_BUILTINS_DECL (WSRP, wsrp, fntype);
>>> +
>>> +  fntype
>>> += build_function_type_list (void_type_node, cons

Re: [PATCH] testsuite: Allow general skips/requires in PCH tests

2023-10-26 Thread Mike Stump

On Oct 26, 2023, at 5:34 AM, Richard Sandiford  
wrote:
> dg-pch.exp handled dg-require-effective-target pch_supported_debug
> as a special case, by grepping the source code.  This patch tries
> to generalise it to other dg-require-effective-targets, and to
> dg-skip-if.
> 
> There also seemed to be some errors in check-flags.  It used:
> 
>lappend $args [list ]
> 
> which treats the contents of args as a variable name.  I think
> it was supposed to be "lappend args" instead.  From the later
> code, the element was supposed to be  itself, rather than
> a singleton list containing .
> 
> We can also save some time by doing the common early-exit first.
> 
> Doing this removes the need to specify the dg-require-effective-target
> in both files.  Tested by faking unsupported debug and checking that
> the tests were still correctly skipped.
> 
> Tested on aarch64-linux-gnu.  OK to install?

Ok.

Re: [PATCH V2 7/7] aarch64: Add system register duplication check selftest

2023-10-26 Thread Richard Sandiford

Victor Do Nascimento  writes:
> On 10/18/23 22:30, Richard Sandiford wrote:
>> Victor Do Nascimento  writes:
>>> Add a build-time test to check whether system register data, as
>>> imported from `aarch64-sys-reg.def' has any duplicate entries.
>>>
>>> Duplicate entries are defined as any two SYSREG entries in the .def
>>> file which share the same encoding values (as specified by its `CPENC'
>>> field) and where the relationship amongst the two does not fit into
>>> one of the following categories:
>>>
>>> * Simple aliasing: In some cases, it is observed that one
>>> register name serves as an alias to another.  One example of
>>> this is where TRCEXTINSELR aliases TRCEXTINSELR0.
>>> * Expressing intent: It is possible that when a given register
>>> serves two distinct functions depending on how it is used, it
>>> is given two distinct names whose use should match the context
>>> under which it is being used.  Example:  Debug Data Transfer
>>> Register. When used to receive data, it should be accessed as
>>> DBGDTRRX_EL0 while when transmitting data it should be
>>> accessed via DBGDTRTX_EL0.
>>> * Register depreciation: Some register names have been
>>> deprecated and should no longer be used, but backwards-
>>> compatibility requires that such names continue to be
>>> recognized, as is the case for the SPSR_EL1 register, whose
>>> access via the SPSR_SVC name is now deprecated.
>>> * Same encoding different target: Some encodings are given
>>> different meaning depending on the target architecture and, as
>>> such, are given different names in each of theses contexts.
>>> We see an example of this for CPENC(3,4,2,0,0), which
>>> corresponds to TTBR0_EL2 for Armv8-A targets and VSCTLR_EL2
>>> in Armv8-R targets.
>>>
>>> A consequence of these observations is that `CPENC' duplication is
>>> acceptable iff at least one of the `properties' or `arch_reqs' fields
>>> of the `sysreg_t' structs associated with the two registers in
>>> question differ and it's this condition that is checked by the new
>>> `aarch64_test_sysreg_encoding_clashes' function.
>>>
>>> gcc/ChangeLog:
>>>
>>> * gcc/config/aarch64/aarch64.cc
>>> (aarch64_test_sysreg_encoding_clashes): New.
>>> (aarch64_run_selftests): add call to
>>> aarch64_test_sysreg_encoding_clashes selftest.
>>> ---
>>>   gcc/config/aarch64/aarch64.cc | 53 +++
>>>   1 file changed, 53 insertions(+)
>>>
>>> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
>>> index d187e171beb..e0be2877ede 100644
>>> --- a/gcc/config/aarch64/aarch64.cc
>>> +++ b/gcc/config/aarch64/aarch64.cc
>>> @@ -22,6 +22,7 @@
>>>   
>>>   #define INCLUDE_STRING
>>>   #define INCLUDE_ALGORITHM
>>> +#define INCLUDE_VECTOR
>>>   #include "config.h"
>>>   #include "system.h"
>>>   #include "coretypes.h"
>>> @@ -28332,6 +28333,57 @@ aarch64_test_fractional_cost ()
>>> ASSERT_EQ (cf (1, 2).as_double (), 0.5);
>>>   }
>>>   
>>> +/* Calculate whether our system register data, as imported from
>>> +   `aarch64-sys-reg.def' has any duplicate entries.  */
>>> +static void
>>> +aarch64_test_sysreg_encoding_clashes (void)
>>> +{
>>> +  using dup_counters_t = hash_map;
>>> +  using dup_instances_t = hash_map>> +  std::vector>;
>>> +
>>> +  dup_counters_t duplicate_counts;
>>> +  dup_instances_t duplicate_instances;
>>> +
>>> +  /* Every time an encoding is established to come up more than once
>>> +  we add it to a "clash-analysis queue", which is then used to extract
>>> +  necessary information from our hash map when establishing whether
>>> +  repeated encodings are valid.  */
>> 
>> Formatting nit, sorry, but second and subsequent lines should be
>> indented to line up with the "E".
>> 
>>> +
>>> +  /* 1) Collect recurrence information.  */
>>> +  std::vector testqueue;
>>> +
>>> +  for (unsigned i = 0; i < nsysreg; i++)
>>> +{
>>> +  const sysreg_t *reg = sysreg_structs + i;
>>> +
>>> +  unsigned *tbl_entry = &duplicate_counts.get_or_insert 
>>> (reg->encoding);
>>> +  *tbl_entry += 1;
>>> +
>>> +  std::vector *tmp
>>> +   = &duplicate_instances.get_or_insert (reg->encoding);
>>> +
>>> +  tmp->push_back (reg);
>>> +  if (*tbl_entry > 1)
>>> + testqueue.push_back (reg->encoding);
>>> +}
>> 
>> Do we need two hash maps here?  It looks like the length of the vector
>> is always equal to the count.  Also...
>> 
>
> You're right.  Addressed in next iteration of patch series.
>
>>> +
>>> +  /* 2) Carry out analysis on collected data.  */
>>> +  for (auto enc : testqueue)
>> 
>> ...hash_map itself is iterable.  We could iterate over that instead,
>> which would avoid the need for the queue.
>> 
>
> My rationale here is that I prefer to take up the extra little bit of 
> memory to save on execution time.
>
> `duplicate_instances' is an iterable of vectors, with one such vector 
> for each enc

Re: [PATCH] aarch64: Add basic target_print_operand support for CONST_STRING

2023-10-26 Thread Richard Sandiford

Victor Do Nascimento  writes:
> Motivated by the need to print system register names in output
> assembly, this patch adds the required logic to
> `aarch64_print_operand' to accept rtxs of type CONST_STRING and
> process these accordingly.
>
> Consequently, an rtx such as:
>
>   (set (reg/i:DI 0 x0)
>  (unspec:DI [(const_string ("s3_3_c13_c2_2"))])
>
> can now be output correctly using the following output pattern when
> composing `define_insn's:
>
>   "mrs\t%x0, %1"
>
> Testing:
>  - Bootstrap/regtest on aarch64-linux-gnu done.
>
> gcc/ChangeLog
>
>   * config/aarch64/aarch64.cc (aarch64_print_operand): Add
>   support for CONST_STRING.

OK, thanks.

Richard

> ---
>  gcc/config/aarch64/aarch64.cc | 5 +
>  1 file changed, 5 insertions(+)
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 62b1ae0652f..c715f6369bc 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -12346,6 +12346,11 @@ aarch64_print_operand (FILE *f, rtx x, int code)
>  
>switch (GET_CODE (x))
>   {
> + case CONST_STRING:
> +   {
> + asm_fprintf (f, "%s", XSTR (x, 0));
> + break;
> +   }
>   case REG:
> if (aarch64_sve_data_mode_p (GET_MODE (x)))
>   {

hardcfr: support checking at abnormal edges [PR111943]

2023-10-26 Thread Alexandre Oliva



Control flow redundancy may choose abnormal edges for early checking,
but that breaks because we can't insert checks on such edges.

Introduce conditional checking on the dest block of abnormal edges,
and leave it for the optimizer to drop the conditional.

Also, oops, I noticed the new files went in with an incorrect copyright
notice, that this patch fixes.

Regstrapped on x86_64-linux-gnu.  Ok to install?


for  gcc/ChangeLog

PR tree-optimization/111943
* gimple-harden-control-flow.cc: Adjust copyright year.
(rt_bb_visited): Add vfalse and vtrue data members.
Zero-initialize them in the ctor.
(rt_bb_visited::insert_exit_check_on_edge): Upon encountering
abnormal edges, insert initializers for vfalse and vtrue on
entry, and insert the check sequence guarded by a conditional
in the dest block.

for  libgcc/ChangeLog

* hardcfr.c: Adjust copyright year.

for  gcc/testsuite/ChangeLog

PR tree-optimization/111943
* gcc.dg/harden-cfr-pr111943.c: New.
---
 gcc/gimple-harden-control-flow.cc  |   78 +++-
 gcc/testsuite/gcc.dg/harden-cfr-pr111943.c |   33 
 libgcc/hardcfr.c   |2 -
 3 files changed, 109 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/harden-cfr-pr111943.c

diff --git a/gcc/gimple-harden-control-flow.cc 
b/gcc/gimple-harden-control-flow.cc
index 3711b25d09123..77c140178060e 100644
--- a/gcc/gimple-harden-control-flow.cc
+++ b/gcc/gimple-harden-control-flow.cc
@@ -1,5 +1,5 @@
 /* Control flow redundancy hardening.
-   Copyright (C) 2022 Free Software Foundation, Inc.
+   Copyright (C) 2022-2023 Free Software Foundation, Inc.
Contributed by Alexandre Oliva .
 
 This file is part of GCC.
@@ -460,6 +460,10 @@ class rt_bb_visited
  at the end of a block's predecessors or successors list.  */
   tree ckfail, ckpart, ckinv, ckblk;
 
+  /* If we need to deal with abnormal edges, we insert SSA_NAMEs for
+ boolean true and false.  */
+  tree vfalse, vtrue;
+
   /* Convert a block index N to a block vindex, the index used to
  identify it in the VISITED array.  Check that it's in range:
  neither ENTRY nor EXIT, but maybe one-past-the-end, to compute
@@ -596,7 +600,8 @@ public:
   /* Prepare to add control flow redundancy testing to CFUN.  */
   rt_bb_visited (int checkpoints)
 : nblocks (n_basic_blocks_for_fn (cfun)),
-  vword_type (NULL), ckseq (NULL), rtcfg (NULL)
+  vword_type (NULL), ckseq (NULL), rtcfg (NULL),
+  vfalse (NULL), vtrue (NULL)
   {
 /* If we've already added a declaration for the builtin checker,
extract vword_type and vword_bits from its declaration.  */
@@ -703,7 +708,74 @@ public:
   /* Insert SEQ on E.  */
   void insert_exit_check_on_edge (gimple_seq seq, edge e)
   {
-gsi_insert_seq_on_edge_immediate (e, seq);
+if (!(e->flags & EDGE_ABNORMAL))
+  {
+   gsi_insert_seq_on_edge_immediate (e, seq);
+   return;
+  }
+
+/* Initialize SSA boolean constants for use in abnormal PHIs.  */
+if (!vfalse)
+  {
+   vfalse = make_ssa_name (boolean_type_node);
+   vtrue = make_ssa_name (boolean_type_node);
+
+   gimple_seq vft_seq = NULL;
+   gassign *vfalse_init = gimple_build_assign (vfalse, boolean_false_node);
+   gimple_seq_add_stmt (&vft_seq, vfalse_init);
+   gassign *vtrue_init = gimple_build_assign (vtrue, boolean_true_node);
+   gimple_seq_add_stmt (&vft_seq, vtrue_init);
+
+   gsi_insert_seq_on_edge_immediate (single_succ_edge
+ (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
+ vft_seq);
+  }
+
+/* We can't insert on abnormal edges, but we can arrange for SEQ
+   to execute conditionally at dest.  Add a PHI boolean with TRUE
+   from E and FALES from other preds, split the whole block, add a
+   test for the PHI to run a new block with SEQ or skip straight
+   to the original block.  If there are multiple incoming abnormal
+   edges, we'll do this multiple times.  ??? Unless there are
+   multiple abnormal edges with different postcheck status, we
+   could split the block and redirect other edges, rearranging the
+   PHI nodes.  Optimizers already know how to do this, so we can
+   keep things simple here.  */
+basic_block bb = e->dest;
+basic_block bb_postcheck = split_block_after_labels (bb)->dest;
+
+basic_block bb_check = create_empty_bb (e->dest);
+bb_check->count = e->count ();
+if (dom_info_available_p (CDI_DOMINATORS))
+  set_immediate_dominator (CDI_DOMINATORS, bb_check, bb);
+if (current_loops)
+  add_bb_to_loop (bb_check, current_loops->tree_root);
+
+gimple_stmt_iterator chkpt = gsi_after_labels (bb_check);
+gsi_insert_seq_before_without_update (&chkpt, seq, GSI_SAME_STMT);
+edge edge_postcheck = make_edge (bb_check, bb_postcheck,

Re: HELP: Will the reordering happen? Re: [V3][PATCH 0/3] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2023-10-26 Thread Richard Biener




> Am 26.10.2023 um 16:58 schrieb Qing Zhao :
> 
> 
> 
>> On Oct 26, 2023, at 4:56 AM, Richard Biener  
>> wrote:
>> 
>>> On Thu, Oct 26, 2023 at 7:22 AM Jakub Jelinek  wrote:
>>> 
>>> On Wed, Oct 25, 2023 at 07:03:43PM +, Qing Zhao wrote:
 For the code generation impact:
 
 turning the original  x.buf
 to a builtin function call
 __builtin_with_access_and_size(x,buf, x.L,-1)
 
 might inhibit some optimizations from happening before the builtin is
 evaluated into object size info (phase  .objsz1).  I guess there might be
 some performance impact.
 
 However, if we mark this builtin as PURE, NOTRROW, etc, then the negative
 performance impact will be reduced to minimum?
>>> 
>>> You can't drop it during objsz1 pass though, otherwise __bdos wouldn't
>>> be able to figure out the dynamic sizes in case of normal (non-early)
>>> inlining - caller takes address of a counted_by array, passes it down
>>> to callee which is only inlined late and uses __bdos, or callee takes 
>>> address
>>> and returns it and caller uses __bdos, etc. - so it would need to be objsz2.
>>> 
>>> And while the builtin (or if it is an internal detail rather than user
>>> accessible builtin an internal function) could be even const/nothrow/leaf if
>>> the arguments contain the loads from the structure 2 fields, I'm afraid it
>>> will still have huge code generation impact, prevent tons of pre-IPA
>>> optimizations.  And it will need some work to handle it properly during
>>> inlining heuristics, because in GIMPLE the COMPONENT_REF loads aren't gimple
>>> values, so it wouldn't be just the builtin/internal-fn call to be ignored,
>>> but also the count load from memory.
>> 
>> I think we want to track the value, not the "memory" in the builtin call,
>> so GIMPLE would be
>> 
>> _1 = x.L;
>> .. = __builtin_with_access_and_size (&x.buf, _1, -1);
> 
> Before adding the __builtin_with_access_and_size, the code is:
> 
> &x.buf
> 
> After inserting the built-in, it becomes:
> 
> _1 = x.L;
> __builtin_with_access_and_size (&x.buf, _1, -1).
> 
> 
> So, the # of total instructions, the # of LOADs, and the # of calls will all 
> be increased.
> There will be impact to the inlining decision definitely.

Note we have to make sure, if x is a pointer and we want to instrument &x->buf 
that we
Can dereference x.  Possibly doing

_1 = x ? x->Len : -1;

I’m not sure the C standard makes accessing x->Len unconditionally not 
undefined behavior when &x->buf is computed.  Definitely it’s a violation of 
the abstract machine of Len is volatile qualified (but we can reject such 
counted_by or instantiations as volatile qualified types).

Richard 

> 
>> 
>> also please make sure to use an internal function for
>> __builtin_with_access_and_size,
>> I don't think we want to expose this to users - it's an implementation 
>> detail.
> 
> Okay, will define it as an internal function (add it to internal-fn.def). -:)
> 
> Qing
>> 
>> Richard.
>> 
>>> 
>>>   Jakub
>>> 
>

Re: [PATCH] tree-optimization/109334: Improve computation for access attribute

2023-10-26 Thread Richard Biener




> Am 26.10.2023 um 13:51 schrieb Siddhesh Poyarekar :
> 
> On 2023-10-26 04:37, Martin Uecker wrote:
>> Hi Sid and Jakub,
>> here is the patch discussed in PR 109334.
> 
> I can't approve, but here's a review:

Ok

Thanks for the review,
Richard 

>> Martin
>> tree-optimization/109334: Improve computation for access attribute
>>  The fix for PR104970 restricted size computations to the case
>> where the access attribute was specified explicitly (no VLA).
>> It also restricted it to void pointers or elements with constant
>> sizes.  The second restriction is enough to fix the original bug.
>> Revert the first change to again allow size computations for VLA
>> parameters and for VLA parameters together with an explicit access
>> attribute.
>>  gcc/ChangeLog:
>>  PR tree-optimization/109334
>> * tree-object-size.cc (parm_object_size): Allow size
>> computation for explicit access attributes.
>>  gcc/testsuite/ChangeLog:
>>  PR tree-optimization/109334
>> * gcc.dg/builtin-dynamic-object-size-20.c
>> (test_parmsz_simple3): Supported again.
>> (test_parmsz_external4): New test.
>> * gcc.dg/builtin-dynamic-object-size-20.c: New test.
>> * gcc.dg/pr104970.c: New test.
>> diff --git a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c 
>> b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c
>> index 6da04202ffe..07e3da6f254 100644
>> --- a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c
>> +++ b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c
>> @@ -455,7 +455,6 @@ test_parmsz_simple2 (size_t sz, char obj[])
>>return __builtin_dynamic_object_size (obj, 0);
>>  }
>>  -/* Implicitly constructed access attributes not supported yet.  */
>>  size_t
>>  __attribute__ ((noinline))
>>  test_parmsz_simple3 (size_t sz, char obj[sz])
>> @@ -527,6 +526,13 @@ test_parmsz_internal3 (size_t sz1, size_t sz2, double 
>> obj[sz1][sz2])
>>return __builtin_dynamic_object_size (obj, 0);
>>  }
> 
> This test case now works.  OK.
> 
>>  +size_t
>> +__attribute__ ((noinline))
>> +test_parmsz_internal4 (size_t sz1, size_t sz2, double obj[sz1 + 1][4])
>> +{
>> +  return __builtin_dynamic_object_size (obj, 0);
>> +}
>> +
> 
> New test case that isn't supported yet.  OK.
> 
>>  /* Loops.  */
>>size_t
>> @@ -721,8 +727,8 @@ main (int argc, char **argv)
>>if (test_parmsz_simple2 (__builtin_strlen (argv[0]) + 1, argv[0])
>>!= __builtin_strlen (argv[0]) + 1)
>>  FAIL ();
>> -  /* Only explicitly added access attributes are supported for now.  */
>> -  if (test_parmsz_simple3 (__builtin_strlen (argv[0]) + 1, argv[0]) != -1)
>> +  if (test_parmsz_simple3 (__builtin_strlen (argv[0]) + 1, argv[0])
>> +  != __builtin_strlen (argv[0]) + 1)
>>  FAIL ();
>>int arr[42];
>>if (test_parmsz_scaled (arr, 42) != sizeof (arr))
>> @@ -759,6 +765,8 @@ main (int argc, char **argv)
>>  FAIL ();
>>if (test_parmsz_internal3 (4, 4, obj) != -1)
>>  FAIL ();
>> +  if (test_parmsz_internal4 (3, 4, obj) != -1)
>> +FAIL ();
>>if (test_loop (arr, 42, 0, 32, 1) != 10 * sizeof (int))
>>  FAIL ();
>>if (test_loop (arr, 42, 32, -1, -1) != 0)
>> diff --git a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-20.c 
>> b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-20.c
>> new file mode 100644
>> index 000..2c8e07dd98d
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-20.c
>> @@ -0,0 +1,49 @@
>> +/* PR 109334
>> + * { dg-do run }
>> + * { dg-options "-O1" } */
>> +
>> +
>> +[[gnu::noinline,gnu::noipa]]
>> +int f(int n, int buf[n])
>> +[[gnu::access(read_only, 2, 1)]]
>> +{
>> +return __builtin_dynamic_object_size(buf, 0);
>> +}
>> +
>> +[[gnu::noinline,gnu::noipa]]
>> +int g(int n, int buf[])
>> +[[gnu::access(read_only, 2, 1)]]
>> +{
>> +return __builtin_dynamic_object_size(buf, 0);
>> +}
>> +
>> +[[gnu::noinline,gnu::noipa]]
>> +int h(int n, int buf[n])
>> +{
>> +return __builtin_dynamic_object_size(buf, 0);
>> +}
>> +
>> +int dummy(int x) { return x + 1; }
>> +
>> +[[gnu::noinline,gnu::noipa]]
>> +int i(int n, int buf[dummy(n)])
>> +{
>> +return __builtin_dynamic_object_size(buf, 0);
>> +}
>> +
>> +int main()
>> +{
>> +int n = 10;
>> +int buf[n];
>> +if (n * sizeof(int) != f(n, buf))
>> +__builtin_abort();
>> +if (n * sizeof(int) != g(n, buf))
>> +__builtin_abort();
>> +if (n * sizeof(int) != h(n, buf))
>> +__builtin_abort();
>> +
>> +(void)i(n, buf);
> 
> f(), g(), h() supported, but i() isn't.  OK.
> 
>> +
>> +return 0;
>> +}
>> +
>> diff --git a/gcc/testsuite/gcc.dg/pr104970.c 
>> b/gcc/testsuite/gcc.dg/pr104970.c
>> new file mode 100644
>> index 000..e24a7f22dfb
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/pr104970.c
>> @@ -0,0 +1,13 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O1 -D_FORTIFY_SO

Re: [PATCH v3] gcc: Introduce -fhardened

2023-10-26 Thread Richard Biener




> Am 24.10.2023 um 21:09 schrieb Marek Polacek :
> 
> On Tue, Oct 24, 2023 at 09:22:25AM +0200, Richard Biener wrote:
>>> On Mon, Oct 23, 2023 at 9:26 PM Marek Polacek  wrote:
>>> 
>>> On Thu, Oct 19, 2023 at 02:24:11PM +0200, Richard Biener wrote:
 On Wed, Oct 11, 2023 at 10:48 PM Marek Polacek  wrote:
> 
> On Tue, Sep 19, 2023 at 10:58:19AM -0400, Marek Polacek wrote:
>> On Mon, Sep 18, 2023 at 08:57:39AM +0200, Richard Biener wrote:
>>> On Fri, Sep 15, 2023 at 5:09 PM Marek Polacek via Gcc-patches
>>>  wrote:
 
 Bootstrapped/regtested on x86_64-pc-linux-gnu, 
 powerpc64le-unknown-linux-gnu,
 and aarch64-unknown-linux-gnu; ok for trunk?
 
 -- >8 --
 In 
 I proposed -fhardened, a new umbrella option that enables a reasonable 
 set
 of hardening flags.  The read of the room seems to be that the option
 would be useful.  So here's a patch implementing that option.
 
 Currently, -fhardened enables:
 
  -D_FORTIFY_SOURCE=3 (or =2 for older glibcs)
  -D_GLIBCXX_ASSERTIONS
  -ftrivial-auto-var-init=pattern
 
 I think =zero is much better here given the overhead is way
 cheaper and pointers get a more reliable behavior.
>>> 
>>> Ok, changed now.
>>> 
  -fPIE  -pie  -Wl,-z,relro,-z,now
  -fstack-protector-strong
  -fstack-clash-protection
  -fcf-protection=full (x86 GNU/Linux only)
 
 -fhardened will not override options that were specified on the 
 command line
 (before or after -fhardened).  For example,
 
 -D_FORTIFY_SOURCE=1 -fhardened
 
 means that _FORTIFY_SOURCE=1 will be used.  Similarly,
 
  -fhardened -fstack-protector
 
 will not enable -fstack-protector-strong.
 
 In DW_AT_producer it is reflected only as -fhardened; it doesn't expand
 to anything.  I think we need a better way to show what it actually
 enables.
>>> 
>>> I do think we need to find a solution here to solve asserting 
>>> compliance.
>> 
>> Fair enough.
>> 
>>> Maybe we can have -Whardened that will diagnose any altering of
>>> -fhardened by other options on the command-line or by missed target
>>> implementations?  People might for example use -fstack-protector
>>> but don't really want to make protection lower than requested with 
>>> -fhardened.
>>> 
>>> Any such conflict is much less appearant than when you use the
>>> flags -fhardened composes.
>> 
>> How about: --help=hardened says which options -fhardened attempts to
>> enable, and -Whardened warns when it didn't enable an option?  E.g.,
>> 
>>  -fstack-protector -fhardened -Whardened
>> 
>> would say that it didn't enable -fstack-protector-strong because
>> -fstack-protector was specified on the command line?
>> 
>> If !HAVE_LD_NOW_SUPPORT, --help=hardened probably doesn't even have to
>> list -z now, likewise for -z relro.
>> 
>> Unclear if -Whardened should be enabled by default, but probably yes?
> 
> Here's v2 which adds -Whardened (enabled by default).
> 
> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
 
 I think it's OK but I'd like to see a second ACK here.
>>> 
>>> Thanks!
>>> 
 Can you see how our
 primary and secondary targets (+ host OS) behave here?
>>> 
>>> That's very reasonable.  I tried to build gcc on Compile Farm 119 (AIX) but
>>> that fails with:
>>> 
>>> ar  -X64 x ../ppc64/libgcc/libgcc_s.a shr.o
>>> ar: 0707-100 ../ppc64/libgcc/libgcc_s.a does not exist.
>>> make[2]: *** [/home/polacek/gcc/libgcc/config/rs6000/t-slibgcc-aix:98: all] 
>>> Error 1
>>> make[2]: Leaving directory 
>>> '/home/polacek/x/trunk/powerpc-ibm-aix7.3.1.0/libgcc'
>>> 
>>> and I tried Darwin (104) and that fails with
>>> 
>>> *** Configuration aarch64-apple-darwin21.6.0 not supported
>>> 
>>> Is anyone else able to build gcc on those machines, or test the attached
>>> patch?
>>> 
 I think the
 documentation should elaborate a bit on expectations for non-Linux/GNU
 targets, specifically I think the default configuration for a target should
 with -fhardened _not_ have any -Whardened diagnostics.  Maybe we can
 have a testcase for this?
>>> 
>>> Sorry, I'm not sure how to test that.  I suppose if -fhardened enables
>>> something not supported on those systems, and it's something for which
>>> we have a configure test, then we shouldn't warn.  This is already the
>>> case for -pie, -z relro, and -z now.
>> 
>> I was thinking of
>> 
>> /* { dg-do compile } */
>> /* { dg-additional-options "-fhardened -Whardened" } */
>> 
>> int main () {}
>> 
>> and excess errors should catch "misconfigurations"?
> 
> I see.  fhardene

Re: [PATCH V2 5/7] aarch64: Implement system register r/w arm ACLE intrinsic functions

2023-10-26 Thread Victor Do Nascimento





On 10/26/23 16:23, Richard Sandiford wrote:

Victor Do Nascimento  writes:

On 10/18/23 21:39, Richard Sandiford wrote:

Victor Do Nascimento  writes:

Implement the aarch64 intrinsics for reading and writing system
registers with the following signatures:

uint32_t __arm_rsr(const char *special_register);
uint64_t __arm_rsr64(const char *special_register);
void* __arm_rsrp(const char *special_register);
float __arm_rsrf(const char *special_register);
double __arm_rsrf64(const char *special_register);
void __arm_wsr(const char *special_register, uint32_t value);
void __arm_wsr64(const char *special_register, uint64_t value);
void __arm_wsrp(const char *special_register, const void *value);
void __arm_wsrf(const char *special_register, float value);
void __arm_wsrf64(const char *special_register, double value);

gcc/ChangeLog:

* gcc/config/aarch64/aarch64-builtins.cc (enum aarch64_builtins):
Add enums for new builtins.
(aarch64_init_rwsr_builtins): New.
(aarch64_general_init_builtins): Call aarch64_init_rwsr_builtins.
(aarch64_expand_rwsr_builtin):  New.
(aarch64_general_expand_builtin): Call aarch64_general_expand_builtin.
* gcc/config/aarch64/aarch64.md (read_sysregdi): New insn_and_split.
(write_sysregdi): Likewise.
* gcc/config/aarch64/arm_acle.h (__arm_rsr): New.
(__arm_rsrp): Likewise.
(__arm_rsr64): Likewise.
(__arm_rsrf): Likewise.
(__arm_rsrf64): Likewise.
(__arm_wsr): Likewise.
(__arm_wsrp): Likewise.
(__arm_wsr64): Likewise.
(__arm_wsrf): Likewise.
(__arm_wsrf64): Likewise.

gcc/testsuite/ChangeLog:

* gcc/testsuite/gcc.target/aarch64/acle/rwsr.c: New.
* gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c: Likewise.
---
   gcc/config/aarch64/aarch64-builtins.cc| 200 ++
   gcc/config/aarch64/aarch64.md |  17 ++
   gcc/config/aarch64/arm_acle.h |  30 +++
   .../gcc.target/aarch64/acle/rwsr-1.c  |  20 ++
   gcc/testsuite/gcc.target/aarch64/acle/rwsr.c  | 144 +
   5 files changed, 411 insertions(+)
   create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c
   create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rwsr.c

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 04f59fd9a54..d8bb2a989a5 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -808,6 +808,17 @@ enum aarch64_builtins
 AARCH64_RBIT,
 AARCH64_RBITL,
 AARCH64_RBITLL,
+  /* System register builtins.  */
+  AARCH64_RSR,
+  AARCH64_RSRP,
+  AARCH64_RSR64,
+  AARCH64_RSRF,
+  AARCH64_RSRF64,
+  AARCH64_WSR,
+  AARCH64_WSRP,
+  AARCH64_WSR64,
+  AARCH64_WSRF,
+  AARCH64_WSRF64,
 AARCH64_BUILTIN_MAX
   };
   
@@ -1798,6 +1809,65 @@ aarch64_init_rng_builtins (void)

   AARCH64_BUILTIN_RNG_RNDRRS);
   }
   
+/* Add builtins for reading system register.  */

+static void
+aarch64_init_rwsr_builtins (void)
+{
+  tree fntype = NULL;
+  tree const_char_ptr_type
+= build_pointer_type (build_type_variant (char_type_node, true, false));
+
+#define AARCH64_INIT_RWSR_BUILTINS_DECL(F, N, T) \
+  aarch64_builtin_decls[AARCH64_##F] \
+= aarch64_general_add_builtin ("__builtin_aarch64_"#N, T, AARCH64_##F);
+
+  fntype
+= build_function_type_list (uint32_type_node, const_char_ptr_type, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (RSR, rsr, fntype);
+
+  fntype
+= build_function_type_list (ptr_type_node, const_char_ptr_type, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (RSRP, rsrp, fntype);
+
+  fntype
+= build_function_type_list (uint64_type_node, const_char_ptr_type, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (RSR64, rsr64, fntype);
+
+  fntype
+= build_function_type_list (float_type_node, const_char_ptr_type, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF, rsrf, fntype);
+
+  fntype
+= build_function_type_list (double_type_node, const_char_ptr_type, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF64, rsrf64, fntype);
+
+  fntype
+= build_function_type_list (void_type_node, const_char_ptr_type,
+   uint32_type_node, NULL);
+
+  AARCH64_INIT_RWSR_BUILTINS_DECL (WSR, wsr, fntype);
+
+  fntype
+= build_function_type_list (void_type_node, const_char_ptr_type,
+   const_ptr_type_node, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (WSRP, wsrp, fntype);
+
+  fntype
+= build_function_type_list (void_type_node, const_char_ptr_type,
+   uint64_type_node, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (WSR64, wsr64, fntype);
+
+  fntype
+= build_function_type_list (void_type_node, const_char_ptr_type,
+   float_type_node, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL

Re: HELP: Will the reordering happen? Re: [V3][PATCH 0/3] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2023-10-26 Thread Kees Cook

On Thu, Oct 26, 2023 at 10:15:10AM +0200, Martin Uecker wrote:
> but not this:
> 
> char *p = &x->buf;
> x->count = 1;
> p[10] = 1; // !

This seems fine to me -- it's how I'd expect it to work: "10" is beyond
"1".

> (because the pointer is passed around the
> store to the counter)
> 
> and also here the second store is then irrelevant
> for the access:
> 
> x->count = 10;
> char* p = &x->buf;
> ...
> x->count = 1; // somewhere else
> 
> p[9] = 1; // ok, because count matter when buf was accesssed.

This is less great, but I can understand why it happens. "p" loses the
association with "x". It'd be nice if "p" had to way to retain that it
was just an alias for x->buf, so future p access would check count.

But this appears to be an existing limitation in other areas where an
assignment will cause the loss of object association. (I've run into
this before.) It's just more surprising in the above example because in
the past the loss of association would cause __bdos() to revert back to
"SIZE_MAX" results ("I don't know the size") rather than an "outdated"
size, which may get us into unexpected places...

> IMHO this makes sense also from the user side and
> are the desirable semantics we discussed before.
> 
> But can you take a look at this?
> 
> 
> This should simulate it fairly well:
> https://godbolt.org/z/xq89aM7Gr
> 
> (the call to the noinline function would go away,
> but not necessarily its impact on optimization)

Yeah, this example should be a very rare situation: a leaf function is
changing the characteristics of the struct but returning a buffer within
it to the caller. The more likely glitch would be from:

int main()
{
struct foo *f = foo_alloc(7);
char *p = FAM_ACCESS(f, size, buf);

printf("%ld\n", __builtin_dynamic_object_size(p, 0));
test1(f); // or just "f->count = 10;" no function call needed
printf("%ld\n", __builtin_dynamic_object_size(p, 0));

return 0;
}

which reports:
7
7

instead of:
7
10

This kind of "get an alias" situation is pretty common in the kernel
as a way to have a convenient "handle" to the array. In the case of a
"fill the array without knowing the actual final size" code pattern,
things would immediately break:

struct foo *f;
char *p;
int i;

f = alloc(maximum_possible);
f->count = 0;
p = f->buf;

for (i; data_is_available() && i < maximum_possible; i++) {
f->count ++;
p[i] = next_data_item();
}

Now perhaps the problem here is that "count" cannot be used for a count
of "logically valid members in the array" but must always be a count of
"allocated member space in the array", which I guess is tolerable, but
isn't ideal -- I'd like to catch logic bugs in addition to allocation
bugs, but the latter is certainly much more important to catch.

-- 
Kees Cook

Re: [PATCH] Fortran: Fix incompatible types between INTEGER(8) and TYPE(c_ptr)

2023-10-26 Thread Thomas Schwinge

Hi!

On 2023-10-26T13:24:04+0200, Paul-Antoine Arras  wrote:
> --- /dev/null
> +++ b/gcc/testsuite/gfortran.dg/c_ptr_tests_20.f90
> @@ -0,0 +1,57 @@
> +! { dg-do compile }
> +! { dg-additional-options "-fopenmp" }
> +[...]

> --- /dev/null
> +++ b/gcc/testsuite/gfortran.dg/c_ptr_tests_21.f90
> @@ -0,0 +1,57 @@
> +! { dg-do compile }
> +! { dg-additional-options "-fopenmp" }
> +[...]

OpenMP is not universally supported across different GCC configurations,
so this will FAIL for some.  Therefore, please either guard with
effective target:

@item fopenmp
Target supports OpenMP via @option{-fopenmp}.

..., or move into 'gcc/testsuite/gfortran.dg/gomp/' (may then remove
explicit 'dg-additional-options "-fopenmp"').

I don't know which variant makes more sense, here.

Grüße
 Thomas
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955

Re: HELP: Will the reordering happen? Re: [V3][PATCH 0/3] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2023-10-26 Thread Martin Uecker

Am Donnerstag, dem 26.10.2023 um 17:48 +0200 schrieb Richard Biener:
> 
> > Am 26.10.2023 um 16:58 schrieb Qing Zhao :
> > 
> > 
> > 
> > > On Oct 26, 2023, at 4:56 AM, Richard Biener  
> > > wrote:
> > > 
> > > > On Thu, Oct 26, 2023 at 7:22 AM Jakub Jelinek  wrote:
> > > > 
> > > > On Wed, Oct 25, 2023 at 07:03:43PM +, Qing Zhao wrote:
> > > > > For the code generation impact:
> > > > > 
> > > > > turning the original  x.buf
> > > > > to a builtin function call
> > > > > __builtin_with_access_and_size(x,buf, x.L,-1)
> > > > > 
> > > > > might inhibit some optimizations from happening before the builtin is
> > > > > evaluated into object size info (phase  .objsz1).  I guess there 
> > > > > might be
> > > > > some performance impact.
> > > > > 
> > > > > However, if we mark this builtin as PURE, NOTRROW, etc, then the 
> > > > > negative
> > > > > performance impact will be reduced to minimum?
> > > > 
> > > > You can't drop it during objsz1 pass though, otherwise __bdos wouldn't
> > > > be able to figure out the dynamic sizes in case of normal (non-early)
> > > > inlining - caller takes address of a counted_by array, passes it down
> > > > to callee which is only inlined late and uses __bdos, or callee takes 
> > > > address
> > > > and returns it and caller uses __bdos, etc. - so it would need to be 
> > > > objsz2.
> > > > 
> > > > And while the builtin (or if it is an internal detail rather than user
> > > > accessible builtin an internal function) could be even 
> > > > const/nothrow/leaf if
> > > > the arguments contain the loads from the structure 2 fields, I'm afraid 
> > > > it
> > > > will still have huge code generation impact, prevent tons of pre-IPA
> > > > optimizations.  And it will need some work to handle it properly during
> > > > inlining heuristics, because in GIMPLE the COMPONENT_REF loads aren't 
> > > > gimple
> > > > values, so it wouldn't be just the builtin/internal-fn call to be 
> > > > ignored,
> > > > but also the count load from memory.
> > > 
> > > I think we want to track the value, not the "memory" in the builtin call,
> > > so GIMPLE would be
> > > 
> > > _1 = x.L;
> > > .. = __builtin_with_access_and_size (&x.buf, _1, -1);
> > 
> > Before adding the __builtin_with_access_and_size, the code is:
> > 
> > &x.buf
> > 
> > After inserting the built-in, it becomes:
> > 
> > _1 = x.L;
> > __builtin_with_access_and_size (&x.buf, _1, -1).
> > 
> > 
> > So, the # of total instructions, the # of LOADs, and the # of calls will 
> > all be increased.
> > There will be impact to the inlining decision definitely.
> 
> Note we have to make sure, if x is a pointer and we want to instrument 
> &x->buf that we
> Can dereference x.  Possibly doing
> 
> _1 = x ? x->Len : -1;
> 
> I’m not sure the C standard makes accessing x->Len unconditionally not 
> undefined behavior when &x->buf is computed.  Definitely it’s a violation of 
> the abstract machine of Len is volatile qualified (but we can reject such 
> counted_by or instantiations as volatile qualified types).

I believe it is implicit UB to do &x->buf if there is
no object *x because the wording assumes the existence
of an object.  In that case accessing x->L should
be fine too.  

In practice the access may trap  for other reasons 
(mprotect etc.),  but I guess this is acceptable,
but should probably be documented...

We might need the x?  to not run into trouble with
those offsetof  implementations written using null
pointer.  Although in this case maybe one could
hope that the load will get optimized anyway ...

Martin

> 
> Richard 
> 
> > 
> > > 
> > > also please make sure to use an internal function for
> > > __builtin_with_access_and_size,
> > > I don't think we want to expose this to users - it's an implementation 
> > > detail.
> > 
> > Okay, will define it as an internal function (add it to internal-fn.def). 
> > -:)
> > 
> > Qing
> > > 
> > > Richard.
> > > 
> > > > 
> > > >   Jakub
> > > > 
> >

Re: [PATCH] libcpp: Improve the diagnostic for poisoned identifiers [PR36887]

2023-10-26 Thread Lewis Hyatt

On Thu, Oct 26, 2023 at 4:49 AM Christophe Lyon
 wrote:
> We have noticed that the new tests fail on aarch64 with:
> .../aarch64-unknown-linux-gnu/libc/usr/lib/crt1.o: in function `_start':
> .../sysdeps/aarch64/start.S:110:(.text+0x38): undefined reference to `main'
>
> Looking at the test, I'd say it lacks a dg-do compile (to avoid
> linking), but how does it work on other targets?

Thanks for pointing it out. I am definitely under the impression that
{ dg-do compile } is the default and doesn't need to be specified, I
have never seen it not be the case before... Is that just not correct?
I tried it out on the cfarm (gcc185) for aarch64-redhat-linux and it
works for me there too, I tried the test individually and also as part
of the whole check-gcc-c++ target.

I do see that there are target-dependent functions in
testsuite/lib/*.exp that will change dg-do-what-default under some
circumstances... but I also see in dg-pch.exp (which is the one
relevant for this test g++.dg/pch/pr36887.C) that dg-do-what-default
is set to compile explicitly.

Note sure what the best next step is, should I just add { dg-do
compile } since it's harmless in any case, or is there something else
worth looking into here? I'm not sure why I couldn't reproduce the
issue on the compile farm machine either, maybe you wouldn't mind
please check if adding this line fixes it for you anyway? Thanks...

-Lewis

[Committed] RISC-V: Pass abi to g++ rvv testsuite

2023-10-26 Thread Patrick O'Neill




On 10/26/23 06:30, Jeff Law wrote:



On 10/25/23 18:13, Patrick O'Neill wrote:

On rv32gcv testcases like g++.target/riscv/rvv/base/bug-22.C fail with:
FAIL: g++.target/riscv/rvv/base/bug-22.C (test for excess errors)
Excess errors:
cc1plus: error: ABI requires '-march=rv32'

This patch adds the -mabi argument to g++ rvv tests.

gcc/testsuite/ChangeLog:

 * g++.target/riscv/rvv/rvv.exp: Add -mabi argument to CFLAGS.

OK.
jeff


Committed, thanks.

Patrick

Re: [PATCH] Fortran: Fix incompatible types between INTEGER(8) and TYPE(c_ptr)

2023-10-26 Thread Paul-Antoine Arras


Hi Thomas,

On 26/10/2023 18:16, you wrote:

Hi!

On 2023-10-26T13:24:04+0200, Paul-Antoine Arras  wrote:

--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/c_ptr_tests_20.f90
@@ -0,0 +1,57 @@
+! { dg-do compile }
+! { dg-additional-options "-fopenmp" }
+[...]



--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/c_ptr_tests_21.f90
@@ -0,0 +1,57 @@
+! { dg-do compile }
+! { dg-additional-options "-fopenmp" }
+[...]


OpenMP is not universally supported across different GCC configurations,
so this will FAIL for some.  Therefore, please either guard with
effective target:

 @item fopenmp
 Target supports OpenMP via @option{-fopenmp}.



Would the following be enough?


diff --git gcc/testsuite/gfortran.dg/c_ptr_tests_20.f90 
gcc/testsuite/gfortran.dg/c_ptr_tests_20.f90
index 7dd510400f3..131603d3819 100644
--- gcc/testsuite/gfortran.dg/c_ptr_tests_20.f90
+++ gcc/testsuite/gfortran.dg/c_ptr_tests_20.f90
@@ -1,4 +1,5 @@
 ! { dg-do compile }
+! { dg-require-effective-target fopenmp }
 ! { dg-additional-options "-fopenmp" }
 !
 ! This failed to compile the declare variant directive due to the C_PTR 
diff --git gcc/testsuite/gfortran.dg/c_ptr_tests_21.f90 gcc/testsuite/gfortran.dg/c_ptr_tests_21.f90

index 05ccb771eee..060d29d0275 100644
--- gcc/testsuite/gfortran.dg/c_ptr_tests_21.f90
+++ gcc/testsuite/gfortran.dg/c_ptr_tests_21.f90
@@ -1,4 +1,5 @@
 ! { dg-do compile }
+! { dg-require-effective-target fopenmp }
 ! { dg-additional-options "-fopenmp" }
 !
 ! Ensure that C_PTR and C_FUNPTR are reported as incompatible types in variant 


Thanks,
--
PA

Re: HELP: Will the reordering happen? Re: [V3][PATCH 0/3] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2023-10-26 Thread Qing Zhao



> On Oct 26, 2023, at 5:20 AM, Martin Uecker  wrote:
> 
> Am Donnerstag, dem 26.10.2023 um 10:45 +0200 schrieb Richard Biener:
>> On Wed, Oct 25, 2023 at 8:16 PM Martin Uecker  wrote:
>>> 
>>> Am Mittwoch, dem 25.10.2023 um 13:13 +0200 schrieb Richard Biener:
 
> Am 25.10.2023 um 12:47 schrieb Martin Uecker :
> 
> Am Mittwoch, dem 25.10.2023 um 06:25 -0400 schrieb Siddhesh Poyarekar:
>>> On 2023-10-25 04:16, Martin Uecker wrote:
>>> Am Mittwoch, dem 25.10.2023 um 08:43 +0200 schrieb Richard Biener:
 
> Am 24.10.2023 um 22:38 schrieb Martin Uecker :
> 
> Am Dienstag, dem 24.10.2023 um 20:30 + schrieb Qing Zhao:
>> Hi, Sid,
>> 
>> Really appreciate for your example and detailed explanation. Very 
>> helpful.
>> I think that this example is an excellent example to show (almost) 
>> all the issues we need to consider.
>> 
>> I slightly modified this example to make it to be compilable and 
>> run-able, as following:
>> (but I still cannot make the incorrect reordering or DSE happening, 
>> anyway, the potential reordering possibility is there…)
>> 
>> 1 #include 
>> 2 struct A
>> 3 {
>> 4  size_t size;
>> 5  char buf[] __attribute__((counted_by(size)));
>> 6 };
>> 7
>> 8 static size_t
>> 9 get_size_from (void *ptr)
>> 10 {
>> 11  return __builtin_dynamic_object_size (ptr, 1);
>> 12 }
>> 13
>> 14 void
>> 15 foo (size_t sz)
>> 16 {
>> 17  struct A *obj = __builtin_malloc (sizeof(struct A) + sz * 
>> sizeof(char));
>> 18  obj->size = sz;
>> 19  obj->buf[0] = 2;
>> 20  __builtin_printf (“%d\n", get_size_from (obj->buf));
>> 21  return;
>> 22 }
>> 23
>> 24 int main ()
>> 25 {
>> 26  foo (20);
>> 27  return 0;
>> 28 }
>> 
>> 
>> 
>> 
 When it’s set I suppose.  Turn
 
 X.l = n;
 
 Into
 
 X.l = __builtin_with_size (x.buf, n);
>>> 
>>> It would turn
>>> 
>>> some_variable = (&) x.buf
>>> 
>>> into
>>> 
>>> some_variable = __builtin_with_size ( (&) x.buf. x.len)
>>> 
>>> 
>>> So the later access to x.buf and not the initialization
>>> of a member of the struct (which is too early).
>>> 
>> 
>> Hmm, so with Qing's example above, are you suggesting the transformation
>> be to foo like so:
>> 
>> 14 void
>> 15 foo (size_t sz)
>> 16 {
>> 16.5  void * _1;
>> 17  struct A *obj = __builtin_malloc (sizeof(struct A) + sz * 
>> sizeof(char));
>> 18  obj->size = sz;
>> 19  obj->buf[0] = 2;
>> 19.5  _1 = __builtin_with_size (obj->buf, obj->size);
>> 20  __builtin_printf (“%d\n", get_size_from (_1));
>> 21  return;
>> 22 }
>> 
>> If yes then this could indeed work.  I think I got thrown off by the
>> reference to __bdos.
> 
> Yes. I think it is important not to evaluate the size at the
> access to buf and not the allocation, because the point is to
> recover it from the size member even when the compiler can't
> see the original allocation.
 
 But if the access is through a pointer without the attribute visible
 even the Frontend cannot recover?
>>> 
>>> Yes, if the access is using a struct-with-FAM without the attribute
>>> the FE would not be insert the builtin.  BDOS could potentially
>>> still see the original allocation but if it doesn't, then there is
>>> no information.
>>> 
 We’d need to force type correctness and give up on indirecting
 through an int * when it can refer to two diffenent container types.
 The best we can do I think is mark allocation sites and hope for
 some basic code hygiene (not clobbering size or array pointer
 through pointers without the appropriately attributed type)
>>> 
>>> I am do not fully understand what you are referring to.
>> 
>> struct A { int n; int data[n]; };
>> struct B { long n; int data[n]; };
>> 
>> int *p = flag ? a->data : b->data;
>> 
>> access *p;
>> 
>> Since we need to allow interoperability of pointers (a->data is
>> convertible to a non-fat pointer of type int *) this leaves us with
>> ambiguity we need to conservatively handle to avoid false positives.
> 
> For BDOS, I would expect this to work exactly like:
> 
> char aa[n1];
> char bb[n2];
> char *p = flag ? aa : bb;
> 
> (or similar code with malloc). In fact it does:
> 
> https://godbolt.org/z/bK68YKqhe
> (cheating a bit and also the sub-object version of
> BDOS does not seem to work)
> 
>> 
>> We _might_ want to diagnose decay of a->data to int *, but IIRC
>> there's no way (or proposal) to allow declaring a corresponding
>> fat pointer, so it's not a good designed feature.
> 
> As a l

Re: [PATCH] Improve tree_expr_nonnegative_p by using the ranger [PR111959]

2023-10-26 Thread Mikael Morin


Le 26/10/2023 à 11:29, Richard Biener a écrit :

On Wed, Oct 25, 2023 at 5:51 AM Andrew Pinski  wrote:

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 40767736389..2a2a90230f5 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -15047,15 +15047,33 @@ tree_single_nonnegative_warnv_p (tree t, bool 
*strict_overflow_p, int depth)
return RECURSE (TREE_OPERAND (t, 1)) && RECURSE (TREE_OPERAND (t, 2));

  case SSA_NAME:
-  /* Limit the depth of recursion to avoid quadratic behavior.
-This is expected to catch almost all occurrences in practice.
-If this code misses important cases that unbounded recursion
-would not, passes that need this information could be revised
-to provide it through dataflow propagation.  */
-  return (!name_registered_for_update_p (t)
- && depth < param_max_ssa_name_query_depth
- && gimple_stmt_nonnegative_warnv_p (SSA_NAME_DEF_STMT (t),
- strict_overflow_p, depth));
+  {
+   /* For integral types, querry the global range if possible. */


query


+   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
+ {
+   value_range vr;
+   if (get_global_range_query ()->range_of_expr (vr, t)
+   && !vr.varying_p () && !vr.undefined_p ())
+ {
+   /* If the range is nonnegative, return true. */
+   if (vr.nonnegative_p ())
+ return true;
+
+   /* If the range is non-positive, then return false. */
+   if (vr.nonpositive_p ())
+ return false;


That's testing for <= 0, nonnegative for >= 0.  This means when
vr.nonpositive_p () the value could still be zero (and nonnegative),
possibly be figured out by the recursion below.

Since we don't have negative_p () do we want to test
nonpositive_p () && nonzero_p () instead?



Maybe !contains_zero_p () instead of nonzero_p () ?

nonzero_p seems to check that the range is exactly the "all but zero" 
range as visible in the implementation:


  inline bool
  irange::nonzero_p () const
  {
if (undefined_p ())
  return false;

wide_int zero = wi::zero (TYPE_PRECISION (type ()));
return *this == int_range<2> (type (), zero, zero, VR_ANTI_RANGE);
  }

Re: [PATCH v4] bpf: Improvements in CO-RE builtins implementation.

2023-10-26 Thread David Faust




On 10/26/23 08:08, Cupertino Miranda wrote:
> 
> Changes from v1:
>  - Fixed Davids remarks on initial patch.
>  - Fixed mistake with deleted '*'.
> 
> Changes from v2:
>  - Reversed return value for bpf_const_not_ok_for_debug_p function.

Hmm..

> +static bool
> +bpf_const_not_ok_for_debug_p (rtx p)
> +{
> +  if (GET_CODE (p) == UNSPEC
> +  && XINT (p, 1) == UNSPEC_CORE_RELOC)
> +return false;
> +
> +  return true;
> +}
> +
> +#undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
> +#define TARGET_CONST_NOT_OK_FOR_DEBUG_P bpf_const_not_ok_for_debug_p

 -- Target Hook: bool TARGET_CONST_NOT_OK_FOR_DEBUG_P (rtx X)
 This hook should return true if X should not be emitted into debug
 sections.

As written now, won't this cause all ordinary (non-UNSPEC_CORE_RELOC)
consts to get rejected for debug? ("regular" debug i.e. DWARF, not to
be confused with the BTF.ext holding CO-RE relocs).

I see other targets implementing the hook returning true only in
specific cases and false otherwise.  The implementation in v1 makes
more sense to me.  Could you explain why flip the return value?

> 
> Changes from v3:
>  - Fixed ICE in two bpf-next tests:
>  -  if (!wi->is_lhs)
>  -   core_mark_as_access_index (gimple_get_lhs (wi->stmt));
>  +  tree lhs;
>  +  if (!wi->is_lhs
>  + && (lhs = gimple_get_lhs (wi->stmt)) != NULL_TREE)
>  +   core_mark_as_access_index (lhs);
>

Re: HELP: Will the reordering happen? Re: [V3][PATCH 0/3] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2023-10-26 Thread Martin Uecker

Am Donnerstag, dem 26.10.2023 um 09:13 -0700 schrieb Kees Cook:
> On Thu, Oct 26, 2023 at 10:15:10AM +0200, Martin Uecker wrote:
> > but not this:
> > 

x->count = 11;
> > char *p = &x->buf;
> > x->count = 1;
> > p[10] = 1; // !
> 
> This seems fine to me -- it's how I'd expect it to work: "10" is beyond
> "1".

Note that the store would be allowed.

> 
> > (because the pointer is passed around the
> > store to the counter)
> > 
> > and also here the second store is then irrelevant
> > for the access:
> > 
> > x->count = 10;
> > char* p = &x->buf;
> > ...
> > x->count = 1; // somewhere else
> > 
> > p[9] = 1; // ok, because count matter when buf was accesssed.
> 
> This is less great, but I can understand why it happens. "p" loses the
> association with "x". It'd be nice if "p" had to way to retain that it
> was just an alias for x->buf, so future p access would check count.

The problem is not to discover that p is an alias to x->buf, 
but that it seems difficult to make sure that stores to 
x->count are not reordered relative to the final access to
p[i] you want to check, so that you then get the right value.

> 
> But this appears to be an existing limitation in other areas where an
> assignment will cause the loss of object association. (I've run into
> this before.) It's just more surprising in the above example because in
> the past the loss of association would cause __bdos() to revert back to
> "SIZE_MAX" results ("I don't know the size") rather than an "outdated"
> size, which may get us into unexpected places...
> 
> > IMHO this makes sense also from the user side and
> > are the desirable semantics we discussed before.
> > 
> > But can you take a look at this?
> > 
> > 
> > This should simulate it fairly well:
> > https://godbolt.org/z/xq89aM7Gr
> > 
> > (the call to the noinline function would go away,
> > but not necessarily its impact on optimization)
> 
> Yeah, this example should be a very rare situation: a leaf function is
> changing the characteristics of the struct but returning a buffer within
> it to the caller. The more likely glitch would be from:
> 
> int main()
> {
>   struct foo *f = foo_alloc(7);
>   char *p = FAM_ACCESS(f, size, buf);
> 
>   printf("%ld\n", __builtin_dynamic_object_size(p, 0));
>   test1(f); // or just "f->count = 10;" no function call needed
>   printf("%ld\n", __builtin_dynamic_object_size(p, 0));
> 
>   return 0;
> }
> 
> which reports:
> 7
> 7
> 
> instead of:
> 7
> 10
> 
> This kind of "get an alias" situation is pretty common in the kernel
> as a way to have a convenient "handle" to the array. In the case of a
> "fill the array without knowing the actual final size" code pattern,
> things would immediately break:
> 
>   struct foo *f;
>   char *p;
>   int i;
> 
>   f = alloc(maximum_possible);
>   f->count = 0;
>   p = f->buf;
> 
>   for (i; data_is_available() && i < maximum_possible; i++) {
>   f->count ++;
>   p[i] = next_data_item();
>   }
> 
> Now perhaps the problem here is that "count" cannot be used for a count
> of "logically valid members in the array" but must always be a count of
> "allocated member space in the array", which I guess is tolerable, but
> isn't ideal -- I'd like to catch logic bugs in addition to allocation
> bugs, but the latter is certainly much more important to catch.

Maybe we could have a warning when f->buf is not directly
accessed.

Martin

>

Re: [PATCH] libcpp: Improve the diagnostic for poisoned identifiers [PR36887]

2023-10-26 Thread Christophe Lyon

On Thu, 26 Oct 2023 at 18:18, Lewis Hyatt  wrote:
>
> On Thu, Oct 26, 2023 at 4:49 AM Christophe Lyon
>  wrote:
> > We have noticed that the new tests fail on aarch64 with:
> > .../aarch64-unknown-linux-gnu/libc/usr/lib/crt1.o: in function `_start':
> > .../sysdeps/aarch64/start.S:110:(.text+0x38): undefined reference to `main'
> >
> > Looking at the test, I'd say it lacks a dg-do compile (to avoid
> > linking), but how does it work on other targets?
>
> Thanks for pointing it out. I am definitely under the impression that
> { dg-do compile } is the default and doesn't need to be specified, I
> have never seen it not be the case before... Is that just not correct?
> I tried it out on the cfarm (gcc185) for aarch64-redhat-linux and it
> works for me there too, I tried the test individually and also as part
> of the whole check-gcc-c++ target.
>
> I do see that there are target-dependent functions in
> testsuite/lib/*.exp that will change dg-do-what-default under some
> circumstances... but I also see in dg-pch.exp (which is the one
> relevant for this test g++.dg/pch/pr36887.C) that dg-do-what-default
> is set to compile explicitly.

Indeed, thanks for checking.

> Note sure what the best next step is, should I just add { dg-do
> compile } since it's harmless in any case, or is there something else
> worth looking into here? I'm not sure why I couldn't reproduce the
> issue on the compile farm machine either, maybe you wouldn't mind
> please check if adding this line fixes it for you anyway? Thanks...

Can you share the compile line for this test in g++.log?

Actually I'm seeing several similar errors in our g++.log, not
reported before because they were "pre-existing" failures.
So something is confusing the testsuite and puts it into link mode.

I am currently building from scratch, without our CI scripts to get
some additional logs in a setup that probably matches yours. Then I
should be able to add more traces a dejagnu level to understand what's
happening.

Thanks,

Christophe

Re: [PATCH] Fortran: Fix incompatible types between INTEGER(8) and TYPE(c_ptr)

2023-10-26 Thread Thomas Schwinge

Hi PA!

On 2023-10-26T18:28:07+0200, Paul-Antoine Arras  wrote:
> On 26/10/2023 18:16, you wrote:
>> On 2023-10-26T13:24:04+0200, Paul-Antoine Arras  
>> wrote:
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gfortran.dg/c_ptr_tests_20.f90
>>> @@ -0,0 +1,57 @@
>>> +! { dg-do compile }
>>> +! { dg-additional-options "-fopenmp" }
>>> +[...]
>>
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gfortran.dg/c_ptr_tests_21.f90
>>> @@ -0,0 +1,57 @@
>>> +! { dg-do compile }
>>> +! { dg-additional-options "-fopenmp" }
>>> +[...]
>>
>> OpenMP is not universally supported across different GCC configurations,
>> so this will FAIL for some.  Therefore, please either guard with
>> effective target:
>>
>>  @item fopenmp
>>  Target supports OpenMP via @option{-fopenmp}.
>>
>
> Would the following be enough?
>
>> diff --git gcc/testsuite/gfortran.dg/c_ptr_tests_20.f90 
>> gcc/testsuite/gfortran.dg/c_ptr_tests_20.f90
>> index 7dd510400f3..131603d3819 100644
>> --- gcc/testsuite/gfortran.dg/c_ptr_tests_20.f90
>> +++ gcc/testsuite/gfortran.dg/c_ptr_tests_20.f90
>> @@ -1,4 +1,5 @@
>>  ! { dg-do compile }
>> +! { dg-require-effective-target fopenmp }
>>  ! { dg-additional-options "-fopenmp" }
>>  !
>>  ! This failed to compile the declare variant directive due to the C_PTR
>> diff --git gcc/testsuite/gfortran.dg/c_ptr_tests_21.f90 
>> gcc/testsuite/gfortran.dg/c_ptr_tests_21.f90
>> index 05ccb771eee..060d29d0275 100644
>> --- gcc/testsuite/gfortran.dg/c_ptr_tests_21.f90
>> +++ gcc/testsuite/gfortran.dg/c_ptr_tests_21.f90
>> @@ -1,4 +1,5 @@
>>  ! { dg-do compile }
>> +! { dg-require-effective-target fopenmp }
>>  ! { dg-additional-options "-fopenmp" }
>>  !
>>  ! Ensure that C_PTR and C_FUNPTR are reported as incompatible types in 
>> variant

Yes, that looks good to me -- you may push "as obvious".


Grüße
 Thomas
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955

[COMMITTED] [range-ops] Remove unneeded parameters from rv_fold.

2023-10-26 Thread Aldy Hernandez

Now that the floating point version of rv_fold calculates its result
in an frange, we can remove the superfluous LB, UB, and MAYBE_NAN
arguments.

gcc/ChangeLog:

* range-op-float.cc (range_operator::fold_range): Remove
superfluous code.
(range_operator::rv_fold): Remove unneeded arguments.
(operator_plus::rv_fold): Same.
(operator_minus::rv_fold): Same.
(operator_mult::rv_fold): Same.
(operator_div::rv_fold): Same.
* range-op-mixed.h: Remove lb, ub, and maybe_nan arguments from
rv_fold methods.
* range-op.h: Same.
---
 gcc/range-op-float.cc | 72 +--
 gcc/range-op-mixed.h  |  6 
 gcc/range-op.h|  2 --
 3 files changed, 14 insertions(+), 66 deletions(-)

diff --git a/gcc/range-op-float.cc b/gcc/range-op-float.cc
index cb2c0a61a1a..ffa3dec133e 100644
--- a/gcc/range-op-float.cc
+++ b/gcc/range-op-float.cc
@@ -63,41 +63,14 @@ range_operator::fold_range (frange &r, tree type,
 }
 
   frange res;
-  REAL_VALUE_TYPE lb, ub;
-  bool maybe_nan;
-  rv_fold (res, type,
-  lb, ub, maybe_nan,
+  rv_fold (r, type,
   op1.lower_bound (), op1.upper_bound (),
   op2.lower_bound (), op2.upper_bound (), trio.op1_op2 ());
 
-  // Handle possible NANs by saturating to the appropriate INF if only
-  // one end is a NAN.  If both ends are a NAN, just return a NAN.
-  bool lb_nan = real_isnan (&lb);
-  bool ub_nan = real_isnan (&ub);
-  if (lb_nan && ub_nan)
-{
-  r.set_nan (type);
-  gcc_checking_assert (r == res);
-  return true;
-}
-  if (lb_nan)
-lb = dconstninf;
-  else if (ub_nan)
-ub = dconstinf;
-
-  r.set (type, lb, ub);
-
-  if (lb_nan || ub_nan || maybe_nan
-  || op1.maybe_isnan ()
-  || op2.maybe_isnan ())
-// Keep the default NAN (with a varying sign) set by the setter.
-;
-  else
-r.clear_nan ();
-
+  if (r.known_isnan ())
+return true;
   if (op1.maybe_isnan () || op2.maybe_isnan ())
-res.update_nan ();
-  gcc_checking_assert (r == res);
+r.update_nan ();
 
   // If the result has overflowed and flag_trapping_math, folding this
   // operation could elide an overflow or division by zero exception.
@@ -130,17 +103,11 @@ range_operator::fold_range (frange &r, tree type,
 // UB, the final range has the possibility of a NAN.
 void
 range_operator::rv_fold (frange &r, tree type,
-REAL_VALUE_TYPE &lb,
-REAL_VALUE_TYPE &ub,
-bool &maybe_nan,
 const REAL_VALUE_TYPE &,
 const REAL_VALUE_TYPE &,
 const REAL_VALUE_TYPE &,
 const REAL_VALUE_TYPE &, relation_kind) const
 {
-  lb = dconstninf;
-  ub = dconstinf;
-  maybe_nan = true;
   r.set (type, dconstninf, dconstinf, nan_state (true));
 }
 
@@ -2479,14 +2446,15 @@ operator_plus::op2_range (frange &r, tree type,
 
 void
 operator_plus::rv_fold (frange &r, tree type,
-   REAL_VALUE_TYPE &lb, REAL_VALUE_TYPE &ub,
-   bool &maybe_nan,
const REAL_VALUE_TYPE &lh_lb,
const REAL_VALUE_TYPE &lh_ub,
const REAL_VALUE_TYPE &rh_lb,
const REAL_VALUE_TYPE &rh_ub,
relation_kind) const
 {
+  REAL_VALUE_TYPE lb, ub;
+  bool maybe_nan = false;
+
   frange_arithmetic (PLUS_EXPR, type, lb, lh_lb, rh_lb, dconstninf);
   frange_arithmetic (PLUS_EXPR, type, ub, lh_ub, rh_ub, dconstinf);
 
@@ -2496,8 +2464,6 @@ operator_plus::rv_fold (frange &r, tree type,
   // [+INF] + [-INF] = NAN
   else if (real_isinf (&lh_ub, false) && real_isinf (&rh_lb, true))
 maybe_nan = true;
-  else
-maybe_nan = false;
 
   // Handle possible NANs by saturating to the appropriate INF if only
   // one end is a NAN.  If both ends are a NAN, just return a NAN.
@@ -2543,14 +2509,15 @@ operator_minus::op2_range (frange &r, tree type,
 
 void
 operator_minus::rv_fold (frange &r, tree type,
-REAL_VALUE_TYPE &lb, REAL_VALUE_TYPE &ub,
-bool &maybe_nan,
 const REAL_VALUE_TYPE &lh_lb,
 const REAL_VALUE_TYPE &lh_ub,
 const REAL_VALUE_TYPE &rh_lb,
 const REAL_VALUE_TYPE &rh_ub,
 relation_kind) const
 {
+  REAL_VALUE_TYPE lb, ub;
+  bool maybe_nan = false;
+
   frange_arithmetic (MINUS_EXPR, type, lb, lh_lb, rh_ub, dconstninf);
   frange_arithmetic (MINUS_EXPR, type, ub, lh_ub, rh_lb, dconstinf);
 
@@ -2560,8 +2527,6 @@ operator_minus::rv_fold (frange &r, tree type,
   // [-INF] - [-INF] = NAN
   else if (real_isinf (&lh_lb, true) && real_isinf (&rh_lb, true))
 maybe_nan = true;
-  else
-maybe_nan = false;
 
   // Handle possible NANs by saturating to the appropriate INF if only
   // one end is a NAN.  If both end

[COMMITTED] [range-ops] Add frange& argument to rv_fold.

2023-10-26 Thread Aldy Hernandez

The floating point version of rv_fold returns its result in 3 pieces:
the lower bound, the upper bound, and a maybe_nan bit.  It is cleaner
to return everything in an frange, thus bringing the floating point
version of rv_fold in line with the integer version.

This first patch adds an frange argument, while keeping the current
functionality, and asserting that we get the same results.  In a
follow-up patch I will nuke the now useless 3 arguments.  Splitting
this into two patches makes it easier to bisect any problems if any
should arise.

gcc/ChangeLog:

* range-op-float.cc (range_operator::fold_range): Pass frange
argument to rv_fold.
(range_operator::rv_fold): Add frange argument.
(operator_plus::rv_fold): Same.
(operator_minus::rv_fold): Same.
(operator_mult::rv_fold): Same.
(operator_div::rv_fold): Same.
* range-op-mixed.h: Add frange argument to rv_fold methods.
* range-op.h: Same.
---
 gcc/range-op-float.cc | 120 +-
 gcc/range-op-mixed.h  |  15 +++---
 gcc/range-op.h|   4 +-
 3 files changed, 107 insertions(+), 32 deletions(-)

diff --git a/gcc/range-op-float.cc b/gcc/range-op-float.cc
index 0951bd385a9..cb2c0a61a1a 100644
--- a/gcc/range-op-float.cc
+++ b/gcc/range-op-float.cc
@@ -62,9 +62,11 @@ range_operator::fold_range (frange &r, tree type,
   return true;
 }
 
+  frange res;
   REAL_VALUE_TYPE lb, ub;
   bool maybe_nan;
-  rv_fold (lb, ub, maybe_nan, type,
+  rv_fold (res, type,
+  lb, ub, maybe_nan,
   op1.lower_bound (), op1.upper_bound (),
   op2.lower_bound (), op2.upper_bound (), trio.op1_op2 ());
 
@@ -75,6 +77,7 @@ range_operator::fold_range (frange &r, tree type,
   if (lb_nan && ub_nan)
 {
   r.set_nan (type);
+  gcc_checking_assert (r == res);
   return true;
 }
   if (lb_nan)
@@ -92,6 +95,10 @@ range_operator::fold_range (frange &r, tree type,
   else
 r.clear_nan ();
 
+  if (op1.maybe_isnan () || op2.maybe_isnan ())
+res.update_nan ();
+  gcc_checking_assert (r == res);
+
   // If the result has overflowed and flag_trapping_math, folding this
   // operation could elide an overflow or division by zero exception.
   // Avoid returning a singleton +-INF, to keep the propagators (DOM
@@ -122,19 +129,19 @@ range_operator::fold_range (frange &r, tree type,
 // MAYBE_NAN is set to TRUE if, in addition to any result in LB or
 // UB, the final range has the possibility of a NAN.
 void
-range_operator::rv_fold (REAL_VALUE_TYPE &lb,
-  REAL_VALUE_TYPE &ub,
-  bool &maybe_nan,
-  tree type ATTRIBUTE_UNUSED,
-  const REAL_VALUE_TYPE &lh_lb ATTRIBUTE_UNUSED,
-  const REAL_VALUE_TYPE &lh_ub ATTRIBUTE_UNUSED,
-  const REAL_VALUE_TYPE &rh_lb ATTRIBUTE_UNUSED,
-  const REAL_VALUE_TYPE &rh_ub ATTRIBUTE_UNUSED,
-  relation_kind) const
+range_operator::rv_fold (frange &r, tree type,
+REAL_VALUE_TYPE &lb,
+REAL_VALUE_TYPE &ub,
+bool &maybe_nan,
+const REAL_VALUE_TYPE &,
+const REAL_VALUE_TYPE &,
+const REAL_VALUE_TYPE &,
+const REAL_VALUE_TYPE &, relation_kind) const
 {
   lb = dconstninf;
   ub = dconstinf;
   maybe_nan = true;
+  r.set (type, dconstninf, dconstinf, nan_state (true));
 }
 
 bool
@@ -2471,8 +2478,9 @@ operator_plus::op2_range (frange &r, tree type,
 }
 
 void
-operator_plus::rv_fold (REAL_VALUE_TYPE &lb, REAL_VALUE_TYPE &ub,
-   bool &maybe_nan, tree type,
+operator_plus::rv_fold (frange &r, tree type,
+   REAL_VALUE_TYPE &lb, REAL_VALUE_TYPE &ub,
+   bool &maybe_nan,
const REAL_VALUE_TYPE &lh_lb,
const REAL_VALUE_TYPE &lh_ub,
const REAL_VALUE_TYPE &rh_lb,
@@ -2490,6 +2498,21 @@ operator_plus::rv_fold (REAL_VALUE_TYPE &lb, 
REAL_VALUE_TYPE &ub,
 maybe_nan = true;
   else
 maybe_nan = false;
+
+  // Handle possible NANs by saturating to the appropriate INF if only
+  // one end is a NAN.  If both ends are a NAN, just return a NAN.
+  bool lb_nan = real_isnan (&lb);
+  bool ub_nan = real_isnan (&ub);
+  if (lb_nan && ub_nan)
+{
+  r.set_nan (type);
+  return;
+}
+  if (lb_nan)
+lb = dconstninf;
+  else if (ub_nan)
+ub = dconstinf;
+  r.set (type, lb, ub, nan_state (maybe_nan));
 }
 
 
@@ -2519,8 +2542,9 @@ operator_minus::op2_range (frange &r, tree type,
 }
 
 void
-operator_minus::rv_fold (REAL_VALUE_TYPE &lb, REAL_VALUE_TYPE &ub,
-bool &maybe_nan, tree type,
+operator_minus::rv_fold (frange &r, tree type,
+REAL_VALU

[PATCH] [range-op] Remove unused variable in fold_range.

2023-10-26 Thread Aldy Hernandez

Missed this...

gcc/ChangeLog:

* range-op-float.cc (range_operator::fold_range): Delete unused
variable.
---
 gcc/range-op-float.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gcc/range-op-float.cc b/gcc/range-op-float.cc
index ffa3dec133e..75816942f8c 100644
--- a/gcc/range-op-float.cc
+++ b/gcc/range-op-float.cc
@@ -62,7 +62,6 @@ range_operator::fold_range (frange &r, tree type,
   return true;
 }
 
-  frange res;
   rv_fold (r, type,
   op1.lower_bound (), op1.upper_bound (),
   op2.lower_bound (), op2.upper_bound (), trio.op1_op2 ());
-- 
2.41.0

Re: [PATCH] Fortran: Fix incompatible types between INTEGER(8) and TYPE(c_ptr)

2023-10-26 Thread tobias.bur...@siemens.com

Missed that, sorry. Please move the test to gfortran.dg/gomp/ — those tests are 
automatically compiled with -fopenmp, hence, no need for 
dg-(additional-)options.

Not applicable here, but tests that use omp.h or "use omp_lib" or the runtime 
have to be under libgomp/testsuite - or, for compile checks (esp. for 
is-invalid diagnostic), the required decls have to be copied into the test file.

Tobias

[PING][PATCH] Include safe-ctype.h after C++ standard headers, to avoid over-poisoning

2023-10-26 Thread Dimitry Andric

Ping. It would be nice to get this QoL fix in.

-Dimitry

> On 28 Sep 2023, at 18:37, Dimitry Andric  wrote:
> 
> Ref: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111632
> 
> When building gcc's C++ sources against recent libc++, the poisoning of
> the ctype macros due to including safe-ctype.h before including C++
> standard headers such as , , etc, causes many compilation
> errors, similar to:
> 
> In file included from /home/dim/src/gcc/master/gcc/gensupport.cc:23:
> In file included from /home/dim/src/gcc/master/gcc/system.h:233:
> In file included from /usr/include/c++/v1/vector:321:
> In file included from
> /usr/include/c++/v1/__format/formatter_bool.h:20:
> In file included from
> /usr/include/c++/v1/__format/formatter_integral.h:32:
> In file included from /usr/include/c++/v1/locale:202:
> /usr/include/c++/v1/__locale:546:5: error: '__abi_tag__' attribute
> only applies to structs, variables, functions, and namespaces
>   546 | _LIBCPP_INLINE_VISIBILITY
>   | ^
> /usr/include/c++/v1/__config:813:37: note: expanded from macro
> '_LIBCPP_INLINE_VISIBILITY'
>   813 | #  define _LIBCPP_INLINE_VISIBILITY _LIBCPP_HIDE_FROM_ABI
>   | ^
> /usr/include/c++/v1/__config:792:26: note: expanded from macro
> '_LIBCPP_HIDE_FROM_ABI'
>   792 |
>   __attribute__((__abi_tag__(_LIBCPP_TOSTRING(
> _LIBCPP_VERSIONED_IDENTIFIER
>   |  ^
> In file included from /home/dim/src/gcc/master/gcc/gensupport.cc:23:
> In file included from /home/dim/src/gcc/master/gcc/system.h:233:
> In file included from /usr/include/c++/v1/vector:321:
> In file included from
> /usr/include/c++/v1/__format/formatter_bool.h:20:
> In file included from
> /usr/include/c++/v1/__format/formatter_integral.h:32:
> In file included from /usr/include/c++/v1/locale:202:
> /usr/include/c++/v1/__locale:547:37: error: expected ';' at end of
> declaration list
>   547 | char_type toupper(char_type __c) const
>   | ^
> /usr/include/c++/v1/__locale:553:48: error: too many arguments
> provided to function-like macro invocation
>   553 | const char_type* toupper(char_type* __low, const
>   char_type* __high) const
>   |^
> /home/dim/src/gcc/master/gcc/../include/safe-ctype.h:146:9: note:
> macro 'toupper' defined here
>   146 | #define toupper(c) do_not_use_toupper_with_safe_ctype
>   | ^
> 
> This is because libc++ uses different transitive includes than
> libstdc++, and some of those transitive includes pull in various ctype
> declarations (typically via ).
> 
> There was already a special case for including  before
> safe-ctype.h, so move the rest of the C++ standard header includes to
> the same location, to fix the problem.
> 
> Signed-off-by: Dimitry Andric 
> ---
> gcc/system.h | 39 ++-
> 1 file changed, 18 insertions(+), 21 deletions(-)
> 
> diff --git a/gcc/system.h b/gcc/system.h
> index e924152ad4c..7a516b11438 100644
> --- a/gcc/system.h
> +++ b/gcc/system.h
> @@ -194,27 +194,8 @@ extern int fprintf_unlocked (FILE *, const char *, ...);
> #undef fread_unlocked
> #undef fwrite_unlocked
> 
> -/* Include  before "safe-ctype.h" to avoid GCC poisoning
> -   the ctype macros through safe-ctype.h */
> -
> -#ifdef __cplusplus
> -#ifdef INCLUDE_STRING
> -# include 
> -#endif
> -#endif
> -
> -/* There are an extraordinary number of issues with .
> -   The last straw is that it varies with the locale.  Use libiberty's
> -   replacement instead.  */
> -#include "safe-ctype.h"
> -
> -#include 
> -
> -#include 
> -
> -#if !defined (errno) && defined (HAVE_DECL_ERRNO) && !HAVE_DECL_ERRNO
> -extern int errno;
> -#endif
> +/* Include C++ standard headers before "safe-ctype.h" to avoid GCC
> +   poisoning the ctype macros through safe-ctype.h */
> 
> #ifdef __cplusplus
> #if defined (INCLUDE_ALGORITHM) || !defined (HAVE_SWAP_IN_UTILITY)
> @@ -229,6 +210,9 @@ extern int errno;
> #ifdef INCLUDE_SET
> # include 
> #endif
> +#ifdef INCLUDE_STRING
> +# include 
> +#endif
> #ifdef INCLUDE_VECTOR
> # include 
> #endif
> @@ -245,6 +229,19 @@ extern int errno;
> # include 
> #endif
> 
> +/* There are an extraordinary number of issues with .
> +   The last straw is that it varies with the locale.  Use libiberty's
> +   replacement instead.  */
> +#include "safe-ctype.h"
> +
> +#include 
> +
> +#include 
> +
> +#if !defined (errno) && defined (HAVE_DECL_ERRNO) && !HAVE_DECL_ERRNO
> +extern int errno;
> +#endif
> +
> /* Some of glibc's string inlines cause warnings.  Plus we'd rather
>   rely on (and therefore test) GCC's string builtins.  */
> #define __NO_STRING_INLINES
> -- 
> 2.42.0
>

Re: HELP: Will the reordering happen? Re: [V3][PATCH 0/3] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2023-10-26 Thread Martin Uecker

Am Donnerstag, dem 26.10.2023 um 16:41 + schrieb Qing Zhao:
> 
> > On Oct 26, 2023, at 5:20 AM, Martin Uecker  wrote:
> > 
> > Am Donnerstag, dem 26.10.2023 um 10:45 +0200 schrieb Richard Biener:
> > > On Wed, Oct 25, 2023 at 8:16 PM Martin Uecker  wrote:
> > > > 
> > > > Am Mittwoch, dem 25.10.2023 um 13:13 +0200 schrieb Richard Biener:
> > > > > 
> > > > > > Am 25.10.2023 um 12:47 schrieb Martin Uecker :
> > > > > > 
> > > > > > Am Mittwoch, dem 25.10.2023 um 06:25 -0400 schrieb Siddhesh 
> > > > > > Poyarekar:
> > > > > > > > On 2023-10-25 04:16, Martin Uecker wrote:
> > > > > > > > Am Mittwoch, dem 25.10.2023 um 08:43 +0200 schrieb Richard 
> > > > > > > > Biener:
> > > > > > > > > 
> > > > > > > > > > Am 24.10.2023 um 22:38 schrieb Martin Uecker 
> > > > > > > > > > :
> > > > > > > > > > 
> > > > > > > > > > Am Dienstag, dem 24.10.2023 um 20:30 + schrieb Qing 
> > > > > > > > > > Zhao:
> > > > > > > > > > > Hi, Sid,
> > > > > > > > > > > 
> > > > > > > > > > > Really appreciate for your example and detailed 
> > > > > > > > > > > explanation. Very helpful.
> > > > > > > > > > > I think that this example is an excellent example to show 
> > > > > > > > > > > (almost) all the issues we need to consider.
> > > > > > > > > > > 
> > > > > > > > > > > I slightly modified this example to make it to be 
> > > > > > > > > > > compilable and run-able, as following:
> > > > > > > > > > > (but I still cannot make the incorrect reordering or DSE 
> > > > > > > > > > > happening, anyway, the potential reordering possibility 
> > > > > > > > > > > is there…)
> > > > > > > > > > > 
> > > > > > > > > > > 1 #include 
> > > > > > > > > > > 2 struct A
> > > > > > > > > > > 3 {
> > > > > > > > > > > 4  size_t size;
> > > > > > > > > > > 5  char buf[] __attribute__((counted_by(size)));
> > > > > > > > > > > 6 };
> > > > > > > > > > > 7
> > > > > > > > > > > 8 static size_t
> > > > > > > > > > > 9 get_size_from (void *ptr)
> > > > > > > > > > > 10 {
> > > > > > > > > > > 11  return __builtin_dynamic_object_size (ptr, 1);
> > > > > > > > > > > 12 }
> > > > > > > > > > > 13
> > > > > > > > > > > 14 void
> > > > > > > > > > > 15 foo (size_t sz)
> > > > > > > > > > > 16 {
> > > > > > > > > > > 17  struct A *obj = __builtin_malloc (sizeof(struct A) + 
> > > > > > > > > > > sz * sizeof(char));
> > > > > > > > > > > 18  obj->size = sz;
> > > > > > > > > > > 19  obj->buf[0] = 2;
> > > > > > > > > > > 20  __builtin_printf (“%d\n", get_size_from (obj->buf));
> > > > > > > > > > > 21  return;
> > > > > > > > > > > 22 }
> > > > > > > > > > > 23
> > > > > > > > > > > 24 int main ()
> > > > > > > > > > > 25 {
> > > > > > > > > > > 26  foo (20);
> > > > > > > > > > > 27  return 0;
> > > > > > > > > > > 28 }
> > > > > > > > > > > 
> > > > > > > 
> > > > > > > 
> > > > > > > 
> > > > > > > > > When it’s set I suppose.  Turn
> > > > > > > > > 
> > > > > > > > > X.l = n;
> > > > > > > > > 
> > > > > > > > > Into
> > > > > > > > > 
> > > > > > > > > X.l = __builtin_with_size (x.buf, n);
> > > > > > > > 
> > > > > > > > It would turn
> > > > > > > > 
> > > > > > > > some_variable = (&) x.buf
> > > > > > > > 
> > > > > > > > into
> > > > > > > > 
> > > > > > > > some_variable = __builtin_with_size ( (&) x.buf. x.len)
> > > > > > > > 
> > > > > > > > 
> > > > > > > > So the later access to x.buf and not the initialization
> > > > > > > > of a member of the struct (which is too early).
> > > > > > > > 
> > > > > > > 
> > > > > > > Hmm, so with Qing's example above, are you suggesting the 
> > > > > > > transformation
> > > > > > > be to foo like so:
> > > > > > > 
> > > > > > > 14 void
> > > > > > > 15 foo (size_t sz)
> > > > > > > 16 {
> > > > > > > 16.5  void * _1;
> > > > > > > 17  struct A *obj = __builtin_malloc (sizeof(struct A) + sz * 
> > > > > > > sizeof(char));
> > > > > > > 18  obj->size = sz;
> > > > > > > 19  obj->buf[0] = 2;
> > > > > > > 19.5  _1 = __builtin_with_size (obj->buf, obj->size);
> > > > > > > 20  __builtin_printf (“%d\n", get_size_from (_1));
> > > > > > > 21  return;
> > > > > > > 22 }
> > > > > > > 
> > > > > > > If yes then this could indeed work.  I think I got thrown off by 
> > > > > > > the
> > > > > > > reference to __bdos.
> > > > > > 
> > > > > > Yes. I think it is important not to evaluate the size at the
> > > > > > access to buf and not the allocation, because the point is to
> > > > > > recover it from the size member even when the compiler can't
> > > > > > see the original allocation.
> > > > > 
> > > > > But if the access is through a pointer without the attribute visible
> > > > > even the Frontend cannot recover?
> > > > 
> > > > Yes, if the access is using a struct-with-FAM without the attribute
> > > > the FE would not be insert the builtin.  BDOS could potentially
> > > > still see the original allocation but if it doesn't, then there is
> > > > no information.
> > > > 
> > > > > We’d need to force type correctness and give up on indirecting
> > > > > through an int * when it can refer to

[PATCH htdocs] bugs: Mention -D_GLIBCXX_ASSERTIONS and -D_GLIBCXX_DEBUG

2023-10-26 Thread Sam James

These options both enabled more checking within the C++ standard library
and can expose errors in submitted code.

-D_GLIBCXX_DEBUG is mentioned separately because while we want people to try it,
it's not always feasible because it requires the whole program and any used
libraries to also be built with it (as it breaks ABI).

Signed-off-by: Sam James 
---
 htdocs/bugs/index.html | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/htdocs/bugs/index.html b/htdocs/bugs/index.html
index da3d4c0d..a5a38f42 100644
--- a/htdocs/bugs/index.html
+++ b/htdocs/bugs/index.html
@@ -56,6 +56,13 @@ makes a difference, or if compiling with 
-fsanitize=undefined
 produces any run-time errors, then your code is probably not correct.
 
 
+We also ask that for C++ code, users test their programs with
+-D_GLIBCXX_ASSERTIONS. If you're able to rebuild the entire
+program (including any libraries it uses, because it breaks ABI), please do try
+-D_GLIBCXX_DEBUG which enables thorough checking throughout
+the C++ standard library. If either of these fail, this is a strong indicator
+of an error in your code.
+
 Summarized bug reporting instructions
 
 After this summary, you'll find detailed instructions that explain
-- 
2.42.0

[Patch, fortran] PR104625 ICE in fixup_array_ref, at fortran/resolve.cc:9275 since r10-2912-g70570ec192745095

2023-10-26 Thread Paul Richard Thomas

Hi All,

The attached patch fixes the original problem, in which parentheses around
the selector in select type constructs caused ICES. Stacked parentheses
caused problems in trans-stmt.cc. Rather than tracking this down, the
redundant parentheses were removed on resolution of the selector
expression.

Fixing the primary problem revealed "Unclassifiable statement" errors when
using array references of the associate variable and this was fixed as
well. Finally, the error triggered by using associate variables associated
with non-variable selectors was corrected to ensure that only vector
indexed selectors were flagged up as such. The secondary error in
associate_55.f90 was corrected for this, since the selector might or might
not be vector indexed.

Regtests fine - OK for trunk?

Paul

Fortran: Fix some problems with SELECT TYPE selectors [PR104625].

2023-10-26  Paul Thomas  

gcc/fortran
PR fortran/104625
* expr.cc (gfc_check_vardef_context): Check that the target
does have a vector index before emitting the specific error.
* match.cc (copy_ts_from_selector_to_associate): Ensure that
class valued operator expressions set the selector rank and
use the rank to provide the associate variable with an
appropriate array spec.
* resolve.cc (resolve_operator): Reduce stacked parentheses to
a single pair.
(fixup_array_ref): Extract selector symbol from parentheses.

gcc/testsuite/
PR fortran/104625
* gfortran.dg/pr104625.f90: New test.
* gfortran.dg/associate_55.f90: Change error check text.
diff --git a/gcc/fortran/expr.cc b/gcc/fortran/expr.cc
index 663fe63dea6..c668baeef8c 100644
--- a/gcc/fortran/expr.cc
+++ b/gcc/fortran/expr.cc
@@ -6474,7 +6474,8 @@ gfc_check_vardef_context (gfc_expr* e, bool pointer, bool alloc_obj,
 	{
 	  if (context)
 	{
-	  if (assoc->target->expr_type == EXPR_VARIABLE)
+	  if (assoc->target->expr_type == EXPR_VARIABLE
+		  && gfc_has_vector_index (assoc->target))
 		gfc_error ("%qs at %L associated to vector-indexed target"
 			   " cannot be used in a variable definition"
 			   " context (%s)",
diff --git a/gcc/fortran/match.cc b/gcc/fortran/match.cc
index c926f38058f..05995c6f97f 100644
--- a/gcc/fortran/match.cc
+++ b/gcc/fortran/match.cc
@@ -6341,12 +6341,13 @@ copy_ts_from_selector_to_associate (gfc_expr *associate, gfc_expr *selector)
   else if (selector->ts.type == BT_CLASS
 	   && CLASS_DATA (selector)
 	   && CLASS_DATA (selector)->as
-	   && ref && ref->type == REF_ARRAY)
+	   && ((ref && ref->type == REF_ARRAY)
+	   || selector->expr_type == EXPR_OP))
 {
   /* Ensure that the array reference type is set.  We cannot use
 	 gfc_resolve_expr at this point, so the usable parts of
 	 resolve.cc(resolve_array_ref) are employed to do it.  */
-  if (ref->u.ar.type == AR_UNKNOWN)
+  if (ref && ref->u.ar.type == AR_UNKNOWN)
 	{
 	  ref->u.ar.type = AR_ELEMENT;
 	  for (int i = 0; i < ref->u.ar.dimen + ref->u.ar.codimen; i++)
@@ -6360,7 +6361,7 @@ copy_ts_from_selector_to_associate (gfc_expr *associate, gfc_expr *selector)
 	  }
 	}
 
-  if (ref->u.ar.type == AR_FULL)
+  if (!ref || ref->u.ar.type == AR_FULL)
 	selector->rank = CLASS_DATA (selector)->as->rank;
   else if (ref->u.ar.type == AR_SECTION)
 	selector->rank = ref->u.ar.dimen;
@@ -6372,12 +6373,15 @@ copy_ts_from_selector_to_associate (gfc_expr *associate, gfc_expr *selector)
 
   if (rank)
 {
-  for (int i = 0; i < ref->u.ar.dimen + ref->u.ar.codimen; i++)
-	if (ref->u.ar.dimen_type[i] == DIMEN_ELEMENT
-	|| (ref->u.ar.dimen_type[i] == DIMEN_UNKNOWN
-		&& ref->u.ar.end[i] == NULL
-		&& ref->u.ar.stride[i] == NULL))
-	  rank--;
+  if (ref)
+	{
+	  for (int i = 0; i < ref->u.ar.dimen + ref->u.ar.codimen; i++)
+	if (ref->u.ar.dimen_type[i] == DIMEN_ELEMENT
+	  || (ref->u.ar.dimen_type[i] == DIMEN_UNKNOWN
+		  && ref->u.ar.end[i] == NULL
+		  && ref->u.ar.stride[i] == NULL))
+	  rank--;
+	}
 
   if (rank)
 	{
diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc
index 861f69ac20f..9f4dc072645 100644
--- a/gcc/fortran/resolve.cc
+++ b/gcc/fortran/resolve.cc
@@ -4138,6 +4138,16 @@ resolve_operator (gfc_expr *e)
   bool dual_locus_error;
   bool t = true;
 
+  /* Reduce stacked parentheses to single pair  */
+  while (e->expr_type == EXPR_OP
+	 && e->value.op.op == INTRINSIC_PARENTHESES
+	 && e->value.op.op1->expr_type == EXPR_OP
+	 && e->value.op.op1->value.op.op == INTRINSIC_PARENTHESES)
+{
+  gfc_expr *tmp = gfc_copy_expr (e->value.op.op1);
+  gfc_replace_expr (e, tmp);
+}
+
   /* Resolve all subnodes-- give them types.  */
 
   switch (e->value.op.op)
@@ -9451,8 +9461,25 @@ fixup_array_ref (gfc_expr **expr1, gfc_expr *expr2,
 {
   gfc_ref *nref = (*expr1)->ref;
   gfc_symbol *sym1 = (*expr1)->symtree->n.sym;
-  gfc_symbol *sym2 = expr2 ? expr2->symtree->n.sym : NULL;
+  gfc_symbol *sym2;
+  gfc_expr *selector = gfc_copy_expr (expr2);
+
   (*expr1)->rank = rank;
+  if (selector)
+{
+  gfc_resolve_expr (selector);
+

Re: HELP: Will the reordering happen? Re: [V3][PATCH 0/3] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2023-10-26 Thread Richard Biener




> Am 26.10.2023 um 19:05 schrieb Martin Uecker :
> 
> Am Donnerstag, dem 26.10.2023 um 16:41 + schrieb Qing Zhao:
>> 
 On Oct 26, 2023, at 5:20 AM, Martin Uecker  wrote:
>>> 
>>> Am Donnerstag, dem 26.10.2023 um 10:45 +0200 schrieb Richard Biener:
 On Wed, Oct 25, 2023 at 8:16 PM Martin Uecker  wrote:
> 
> Am Mittwoch, dem 25.10.2023 um 13:13 +0200 schrieb Richard Biener:
>> 
>>> Am 25.10.2023 um 12:47 schrieb Martin Uecker :
>>> 
>>> Am Mittwoch, dem 25.10.2023 um 06:25 -0400 schrieb Siddhesh Poyarekar:
> On 2023-10-25 04:16, Martin Uecker wrote:
> Am Mittwoch, dem 25.10.2023 um 08:43 +0200 schrieb Richard Biener:
>> 
>>> Am 24.10.2023 um 22:38 schrieb Martin Uecker :
>>> 
>>> Am Dienstag, dem 24.10.2023 um 20:30 + schrieb Qing Zhao:
 Hi, Sid,
 
 Really appreciate for your example and detailed explanation. Very 
 helpful.
 I think that this example is an excellent example to show (almost) 
 all the issues we need to consider.
 
 I slightly modified this example to make it to be compilable and 
 run-able, as following:
 (but I still cannot make the incorrect reordering or DSE 
 happening, anyway, the potential reordering possibility is there…)
 
 1 #include 
 2 struct A
 3 {
 4  size_t size;
 5  char buf[] __attribute__((counted_by(size)));
 6 };
 7
 8 static size_t
 9 get_size_from (void *ptr)
 10 {
 11  return __builtin_dynamic_object_size (ptr, 1);
 12 }
 13
 14 void
 15 foo (size_t sz)
 16 {
 17  struct A *obj = __builtin_malloc (sizeof(struct A) + sz * 
 sizeof(char));
 18  obj->size = sz;
 19  obj->buf[0] = 2;
 20  __builtin_printf (“%d\n", get_size_from (obj->buf));
 21  return;
 22 }
 23
 24 int main ()
 25 {
 26  foo (20);
 27  return 0;
 28 }
 
 
 
 
>> When it’s set I suppose.  Turn
>> 
>> X.l = n;
>> 
>> Into
>> 
>> X.l = __builtin_with_size (x.buf, n);
> 
> It would turn
> 
> some_variable = (&) x.buf
> 
> into
> 
> some_variable = __builtin_with_size ( (&) x.buf. x.len)
> 
> 
> So the later access to x.buf and not the initialization
> of a member of the struct (which is too early).
> 
 
 Hmm, so with Qing's example above, are you suggesting the 
 transformation
 be to foo like so:
 
 14 void
 15 foo (size_t sz)
 16 {
 16.5  void * _1;
 17  struct A *obj = __builtin_malloc (sizeof(struct A) + sz * 
 sizeof(char));
 18  obj->size = sz;
 19  obj->buf[0] = 2;
 19.5  _1 = __builtin_with_size (obj->buf, obj->size);
 20  __builtin_printf (“%d\n", get_size_from (_1));
 21  return;
 22 }
 
 If yes then this could indeed work.  I think I got thrown off by the
 reference to __bdos.
>>> 
>>> Yes. I think it is important not to evaluate the size at the
>>> access to buf and not the allocation, because the point is to
>>> recover it from the size member even when the compiler can't
>>> see the original allocation.
>> 
>> But if the access is through a pointer without the attribute visible
>> even the Frontend cannot recover?
> 
> Yes, if the access is using a struct-with-FAM without the attribute
> the FE would not be insert the builtin.  BDOS could potentially
> still see the original allocation but if it doesn't, then there is
> no information.
> 
>> We’d need to force type correctness and give up on indirecting
>> through an int * when it can refer to two diffenent container types.
>> The best we can do I think is mark allocation sites and hope for
>> some basic code hygiene (not clobbering size or array pointer
>> through pointers without the appropriately attributed type)
> 
> I am do not fully understand what you are referring to.
 
 struct A { int n; int data[n]; };
 struct B { long n; int data[n]; };
 
 int *p = flag ? a->data : b->data;
 
 access *p;
 
 Since we need to allow interoperability of pointers (a->data is
 convertible to a non-fat pointer of type int *) this leaves us with
 ambiguity we need to conservatively handle to avoid false positives.
>>> 
>>> For BDOS, I would expect this to work exactly like:
>>> 
>>> char aa[n1];
>>> char bb[n2

Ping^2: [PATCH v2 0/2] Replace intl/ with out-of-tree GNU gettext

2023-10-26 Thread Arsen Arsenović

Ping^2

It'd be nice to get this patch into 14 (given the intended C strictness
changes, especially).

Have a lovely evening.
-- 
Arsen Arsenović


signature.asc
Description: PGP signature

Re: [PATCH] libcpp: Improve the diagnostic for poisoned identifiers [PR36887]

2023-10-26 Thread Lewis Hyatt

On Thu, Oct 26, 2023 at 12:48 PM Christophe Lyon
 wrote:
>
> On Thu, 26 Oct 2023 at 18:18, Lewis Hyatt  wrote:
> >
> > On Thu, Oct 26, 2023 at 4:49 AM Christophe Lyon
> >  wrote:
> > > We have noticed that the new tests fail on aarch64 with:
> > > .../aarch64-unknown-linux-gnu/libc/usr/lib/crt1.o: in function `_start':
> > > .../sysdeps/aarch64/start.S:110:(.text+0x38): undefined reference to 
> > > `main'
> > >
> > > Looking at the test, I'd say it lacks a dg-do compile (to avoid
> > > linking), but how does it work on other targets?
> >
> > Thanks for pointing it out. I am definitely under the impression that
> > { dg-do compile } is the default and doesn't need to be specified, I
> > have never seen it not be the case before... Is that just not correct?
> > I tried it out on the cfarm (gcc185) for aarch64-redhat-linux and it
> > works for me there too, I tried the test individually and also as part
> > of the whole check-gcc-c++ target.
> >
> > I do see that there are target-dependent functions in
> > testsuite/lib/*.exp that will change dg-do-what-default under some
> > circumstances... but I also see in dg-pch.exp (which is the one
> > relevant for this test g++.dg/pch/pr36887.C) that dg-do-what-default
> > is set to compile explicitly.
>
> Indeed, thanks for checking.
>
> > Note sure what the best next step is, should I just add { dg-do
> > compile } since it's harmless in any case, or is there something else
> > worth looking into here? I'm not sure why I couldn't reproduce the
> > issue on the compile farm machine either, maybe you wouldn't mind
> > please check if adding this line fixes it for you anyway? Thanks...
>
> Can you share the compile line for this test in g++.log?
>

Sure, here is what I got on aarch64 for

make RUNTESTFLAGS=pch.exp=pr36887.C check-gcc-c++

For making the PCH:

xg++ -B/dev/shm/lhyatt/build/gcc/testsuite/g++/../../ ./pr36887.H
-fdiagnostics-plain-output -nostdinc++
-I/dev/shm/lhyatt/build/aarch64-unknown-linux-gnu/libstdc++-v3/include/aarch64-unknown-linux-gnu
-I/dev/shm/lhyatt/build/aarch64-unknown-linux-gnu/libstdc++-v3/include
-I/dev/shm/lhyatt/src/libstdc++-v3/libsupc++
-I/dev/shm/lhyatt/src/libstdc++-v3/include/backward
-I/dev/shm/lhyatt/src/libstdc++-v3/testsuite/util -fmessage-length=0
-g -o pr36887.H.gch

For compiling the test:

xg++ -B/dev/shm/lhyatt/build/gcc/testsuite/g++/../../
/dev/shm/lhyatt/src/gcc/testsuite/g++.dg/pch/pr36887.C
-fdiagnostics-plain-output -nostdinc++
-I/dev/shm/lhyatt/build/aarch64-unknown-linux-gnu/libstdc++-v3/include/aarch64-unknown-linux-gnu
-I/dev/shm/lhyatt/build/aarch64-unknown-linux-gnu/libstdc++-v3/include
-I/dev/shm/lhyatt/src/libstdc++-v3/libsupc++
-I/dev/shm/lhyatt/src/libstdc++-v3/include/backward
-I/dev/shm/lhyatt/src/libstdc++-v3/testsuite/util -fmessage-length=0
-g -I. -Dwith_PCH -S -o pr36887.s

(and then it repeats with -O2 added, or with -g removed as well)

> Actually I'm seeing several similar errors in our g++.log, not
> reported before because they were "pre-existing" failures.
> So something is confusing the testsuite and puts it into link mode.
>
> I am currently building from scratch, without our CI scripts to get
> some additional logs in a setup that probably matches yours. Then I
> should be able to add more traces a dejagnu level to understand what's
> happening.
>
> Thanks,
>
> Christophe

Re: [PATCH v2] VECT: Remove the type size restriction of vectorizer

2023-10-26 Thread Richard Sandiford

Richard Biener  writes:
>> Am 26.10.2023 um 13:59 schrieb Li, Pan2 :
>> 
>> Thanks Richard for comments.
>> 
>>> Can you explain why this is necessary?  In particular what is lhs_rtx
>>> mode vs ops[0].value mode?
>> 
>> For testcase gcc.target/aarch64/sve/popcount_1.c, the rtl are list as below.
>> 
>> The lhs_rtx is (reg:VNx2SI 98 [ vect__5.36 ]).
>> The ops[0].value is (reg:VNx2DI 104).
>> 
>> The restriction removing make the vector rtl enter expand_fn_using_insn and 
>> of course hit the INTEGER_P assertion.
>
> But I think this shows we mid-selected the optab, a convert_move is certainly 
> not correct unconditionally here (the target might not support that)

Agreed.  Allowing TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out)
makes sense if the called function allows the input and output modes
to vary.  That's true for internal functions that eventually map to
two-mode optabs.  But we can't remove the condition for calls to
other functions, at least not without some fix-ups.

ISTM that the problem being hit is the one described by the removed
comment.

In other words, I don't think simply removing the test from the vectoriser
is correct.  It needs to be replaced by something more selective.

Thanks,
Richard

>> Pan
>> 
>> -Original Message-
>> From: Richard Biener  
>> Sent: Thursday, October 26, 2023 4:38 PM
>> To: Li, Pan2 
>> Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; Wang, Yanzhang 
>> ; kito.ch...@gmail.com; Liu, Hongtao 
>> ; Richard Sandiford 
>> Subject: Re: [PATCH v2] VECT: Remove the type size restriction of vectorizer
>> 
>>> On Thu, Oct 26, 2023 at 4:18 AM  wrote:
>>> 
>>> From: Pan Li 
>>> 
>>> Update in v2:
>>> 
>>> * Fix one ICE of type assertion.
>>> * Adjust some test cases for aarch64 sve and riscv vector.
>>> 
>>> Original log:
>>> 
>>> The vectoriable_call has one restriction of the size of data type.
>>> Aka DF to DI is allowed but SF to DI isn't. You may see below message
>>> when try to vectorize function call like lrintf.
>>> 
>>> void
>>> test_lrintf (long *out, float *in, unsigned count)
>>> {
>>>  for (unsigned i = 0; i < count; i++)
>>>out[i] = __builtin_lrintf (in[i]);
>>> }
>>> 
>>> lrintf.c:5:26: missed: couldn't vectorize loop
>>> lrintf.c:5:26: missed: not vectorized: unsupported data-type
>>> 
>>> Then the standard name pattern like lrintmn2 cannot work for different
>>> data type size like SF => DI. This patch would like to remove this data
>>> type size check and unblock the standard name like lrintmn2.
>>> 
>>> The below test are passed for this patch.
>>> 
>>> * The x86 bootstrap and regression test.
>>> * The aarch64 regression test.
>>> * The risc-v regression tests.
>>> 
>>> gcc/ChangeLog:
>>> 
>>>* internal-fn.cc (expand_fn_using_insn): Add vector int assertion.
>>>* tree-vect-stmts.cc (vectorizable_call): Remove size check.
>>> 
>>> gcc/testsuite/ChangeLog:
>>> 
>>>* gcc.target/aarch64/sve/clrsb_1.c: Adjust checker.
>>>* gcc.target/aarch64/sve/clz_1.c: Ditto.
>>>* gcc.target/aarch64/sve/popcount_1.c: Ditto.
>>>* gcc.target/riscv/rvv/autovec/unop/popcount.c: Ditto.
>>> 
>>> Signed-off-by: Pan Li 
>>> ---
>>> gcc/internal-fn.cc  |  3 ++-
>>> gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c  |  3 +--
>>> gcc/testsuite/gcc.target/aarch64/sve/clz_1.c|  3 +--
>>> gcc/testsuite/gcc.target/aarch64/sve/popcount_1.c   |  3 +--
>>> .../gcc.target/riscv/rvv/autovec/unop/popcount.c|  2 +-
>>> gcc/tree-vect-stmts.cc  | 13 -
>>> 6 files changed, 6 insertions(+), 21 deletions(-)
>>> 
>>> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
>>> index 61d5a9e4772..17c0f4c3805 100644
>>> --- a/gcc/internal-fn.cc
>>> +++ b/gcc/internal-fn.cc
>>> @@ -281,7 +281,8 @@ expand_fn_using_insn (gcall *stmt, insn_code icode, 
>>> unsigned int noutputs,
>>>emit_move_insn (lhs_rtx, ops[0].value);
>>>   else
>>>{
>>> - gcc_checking_assert (INTEGRAL_TYPE_P (TREE_TYPE (lhs)));
>>> + gcc_checking_assert (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
>>> +  || VECTOR_INTEGER_TYPE_P (TREE_TYPE (lhs)));
>> 
>> Can you explain why this is necessary?  In particular what is lhs_rtx
>> mode vs ops[0].value mode?
>> 
>>>  convert_move (lhs_rtx, ops[0].value, 0);
>> 
>> I'm not sure convert_move handles vector modes correctly.  Richard
>> probably added this code, CCed.
>> 
>> Richard.
>> 
>>>}
>>> }
>>> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c 
>>> b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c
>>> index bdc9856faaf..940d08bbc7b 100644
>>> --- a/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c
>>> +++ b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c
>>> @@ -18,5 +18,4 @@ clrsb_64 (unsigned int *restrict dst, uint64_t *restrict 
>>> src, int size)
>>> }
>>> 
>>> /* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.s, p[0-7]/m, 
>>> z[0-9]+\.s\n} 1 } } */
>>>

[PATCH] c++: simplify build_new_1 when in a template context

2023-10-26 Thread Patrick Palka

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look
OK for trunk?

-- >8 --

Since when in a template context we end up just discarding the result
of build_new_1, we don't have to bother with much of the code generation
it performs.  This patch makes the function exit early, returning a dummy
non-erroneous result, once we've done pretty much all ahead of time checks
that we could have.  In passing avoid building up 'outer_nelts_check' in
a template context too.

gcc/cp/ChangeLog:

* init.cc (build_new_1): Don't build 'outer_nelts_check' when
in a template context.  Exit early returning void_node when
in a template context.  Simplify the remainder of the function
accordingly.
---
 gcc/cp/init.cc | 36 +++-
 1 file changed, 11 insertions(+), 25 deletions(-)

diff --git a/gcc/cp/init.cc b/gcc/cp/init.cc
index 6444f0a8518..fab1126ca65 100644
--- a/gcc/cp/init.cc
+++ b/gcc/cp/init.cc
@@ -3281,7 +3281,7 @@ build_new_1 (vec **placement, tree type, 
tree nelts,
  return error_mark_node;
}
}
-  else
+  else if (!processing_template_decl)
{
  /* When a runtime check is necessary because the array size
 isn't constant, keep only the top-most seven bits (starting
@@ -3467,10 +3467,15 @@ build_new_1 (vec **placement, tree type, 
tree nelts,
}
 }
 
+  if (processing_template_decl)
+/* We've done pretty much all we can to check this non-dependent new-expr
+   ahead of time.  Any further work is unnecessary since (build_new just
+   discards the result) and/or unsuitable inside a template context.  */
+return void_node;
+
   /* If we found a simple case of PLACEMENT_EXPR above, then copy it
  into a temporary variable.  */
-  if (!processing_template_decl
-  && TREE_CODE (alloc_call) == CALL_EXPR
+  if (TREE_CODE (alloc_call) == CALL_EXPR
   && call_expr_nargs (alloc_call) == 2
   && TREE_CODE (TREE_TYPE (CALL_EXPR_ARG (alloc_call, 0))) == INTEGER_TYPE
   && TYPE_PTR_P (TREE_TYPE (CALL_EXPR_ARG (alloc_call, 1
@@ -3609,25 +3614,7 @@ build_new_1 (vec **placement, tree type, 
tree nelts,
  explicit_value_init_p = true;
}
 
-  if (processing_template_decl)
-   {
- /* Avoid an ICE when converting to a base in build_simple_base_path.
-We'll throw this all away anyway, and build_new will create
-a NEW_EXPR.  */
- tree t = fold_convert (build_pointer_type (elt_type), data_addr);
- /* build_value_init doesn't work in templates, and we don't need
-the initializer anyway since we're going to throw it away and
-rebuild it at instantiation time, so just build up a single
-constructor call to get any appropriate diagnostics.  */
- init_expr = cp_build_fold_indirect_ref (t);
- if (type_build_ctor_call (elt_type))
-   init_expr = build_special_member_call (init_expr,
-  complete_ctor_identifier,
-  init, elt_type,
-  LOOKUP_NORMAL,
-  complain);
-   }
-  else if (array_p)
+  if (array_p)
{
  tree vecinit = NULL_TREE;
  const size_t len = vec_safe_length (*init);
@@ -3721,8 +3708,7 @@ build_new_1 (vec **placement, tree type, 
tree nelts,
 object being initialized, replace them now and don't try to
 preevaluate.  */
  bool had_placeholder = false;
- if (!processing_template_decl
- && TREE_CODE (init_expr) == INIT_EXPR)
+ if (TREE_CODE (init_expr) == INIT_EXPR)
TREE_OPERAND (init_expr, 1)
  = replace_placeholders (TREE_OPERAND (init_expr, 1),
  TREE_OPERAND (init_expr, 0),
@@ -3760,7 +3746,7 @@ build_new_1 (vec **placement, tree type, 
tree nelts,
  alloc_fn,
  complain));
 
-  if (cleanup && init_expr && !processing_template_decl)
+  if (cleanup && init_expr)
/* Ack!  First we allocate the memory.  Then we set our sentry
   variable to true, and expand a cleanup that deletes the
   memory if sentry is true.  Then we run the constructor, and
-- 
2.42.0.482.g2e8e77cbac

1 2 >

1 - 100 of 162 matches

Mail list logo