date:20210725

[Bug objc/101616] Objective-C frontend should not emit vtable/fixup messages (at least, not by default)

2021-07-25 Thread iains at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101616

Iain Sandoe  changed:

   What|Removed |Added

 Status|UNCONFIRMED |RESOLVED
 Resolution|--- |FIXED

--- Comment #2 from Iain Sandoe  ---
(In reply to Matt Jacobson from comment #0)
> In 10.2.0, the Objective-C frontend (in NeXT v2 ABI mode) emits "fixup"
> messages for all message sends.

Please check 10.3, 11.(1,2rc) and master - I believe this is already fixed (and
back ported to 10.3).

I have not (yet) applied it to 9.x (so that would not appear until 9.5, if
done).

The changes are selective on the target OS version (since fixup messages _are_
emitted by the 'system' [i.e. last usable Xcode] compilers for earlier OS
versions).

So that 
gcc foo.m 
on a recent OS version should omit the fixup versions 
but with -mmacosx-version-min=10.5 the fixups versions should be emitted
(actually, with a few small changes as the OS version changes).

[Bug gcov-profile/101618] New: [GCOV] Wrong coverage caused by call site in a "for" statement

2021-07-25 Thread njuwy at smail dot nju.edu.cn via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101618

Bug ID: 101618
   Summary: [GCOV] Wrong coverage caused by call site in a "for"
statement
   Product: gcc
   Version: 10.2.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: gcov-profile
  Assignee: unassigned at gcc dot gnu.org
  Reporter: njuwy at smail dot nju.edu.cn
CC: marxin at gcc dot gnu.org
  Target Milestone: ---

$ gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/local/libexec/gcc/x86_64-pc-linux-gnu/10.2.0/lto-wrapper
Target: x86_64-pc-linux-gnu
Configured with: ../configure -enable-checking=release -enable-languages=c,c++
-disable-multilib
Thread model: posix
Supported LTO compression algorithms: zlib
gcc version 10.2.0 (GCC) 

$ cat test.c
#include 
#include 
#include 
#include 
struct obstack {};
struct bitmap_head_def;
typedef struct bitmap_head_def *bitmap;
typedef const struct bitmap_head_def *const_bitmap;
typedef unsigned long BITMAP_WORD;
typedef struct bitmap_obstack {
  struct bitmap_element_def *elements;
  struct bitmap_head_def *heads;
  struct obstack obstack;
} bitmap_obstack;
typedef struct bitmap_element_def {
  struct bitmap_element_def *next;
  struct bitmap_element_def *prev;
  unsigned int indx;
  BITMAP_WORD bits[((128 + (8 * 8 * 1u) - 1) / (8 * 8 * 1u))];
} bitmap_element;

struct bitmap_descriptor;

typedef struct bitmap_head_def {
  bitmap_element *first;
  bitmap_element *current;
  unsigned int indx;
  bitmap_obstack *obstack;
} bitmap_head;

bitmap_element bitmap_zero_bits;

typedef struct {
  bitmap_element *elt1;
  bitmap_element *elt2;
  unsigned word_no;
  BITMAP_WORD bits;
} bitmap_iterator;

static void __attribute__((noinline))
bmp_iter_set_init(bitmap_iterator *bi, const_bitmap map, unsigned start_bit,
  unsigned *bit_no) {
  bi->elt1 = map->first;
  bi->elt2 = ((void *)0);

  while (1) {
if (!bi->elt1) {
  bi->elt1 = &bitmap_zero_bits;
  break;
}

if (bi->elt1->indx >=
start_bit / (((128 + (8 * 8 * 1u) - 1) / (8 * 8 * 1u)) * (8 * 8 * 1u)))
  break;
bi->elt1 = bi->elt1->next;
  }

  if (bi->elt1->indx !=
  start_bit / (((128 + (8 * 8 * 1u) - 1) / (8 * 8 * 1u)) * (8 * 8 * 1u)))
start_bit = bi->elt1->indx *
(((128 + (8 * 8 * 1u) - 1) / (8 * 8 * 1u)) * (8 * 8 * 1u));

  bi->word_no =
  start_bit / (8 * 8 * 1u) % ((128 + (8 * 8 * 1u) - 1) / (8 * 8 * 1u));
  bi->bits = bi->elt1->bits[bi->word_no];
  bi->bits >>= start_bit % (8 * 8 * 1u);

  start_bit += !bi->bits;

  *bit_no = start_bit;
}

static void __attribute__((noinline))
bmp_iter_next(bitmap_iterator *bi, unsigned *bit_no) {
  bi->bits >>= 1;
  *bit_no += 1;
}

static unsigned char __attribute__((noinline))
bmp_iter_set_tail(bitmap_iterator *bi, unsigned *bit_no) {
  while (!(bi->bits & 1)) {
bi->bits >>= 1;
*bit_no += 1;
  }
  return 1;
}

static __inline__ unsigned char bmp_iter_set(bitmap_iterator *bi,
 unsigned *bit_no) {
  unsigned bno = *bit_no;
  BITMAP_WORD bits = bi->bits;
  bitmap_element *elt1;

  if (bits) {
while (!(bits & 1)) {
  bits >>= 1;
  bno += 1;
}
*bit_no = bno;
return 1;
  }

  *bit_no = ((bno + 64 - 1) / 64 * 64);
  bi->word_no++;

  elt1 = bi->elt1;
  while (1) {
while (bi->word_no != 2) {
  bi->bits = elt1->bits[bi->word_no];
  if (bi->bits) {
bi->elt1 = elt1;
return bmp_iter_set_tail(bi, bit_no);
  }
  *bit_no += 64;
  bi->word_no++;
}
elt1 = elt1->next;
if (!elt1) {
  bi->elt1 = elt1;
  return 0;
}
*bit_no = elt1->indx * (2 * 64);
bi->word_no = 0;
  }
}

extern void abort(void);

static void __attribute__((noinline)) catchme(int i) {
  if (i != 0 && i != 64)
abort();
}
static void __attribute__((noinline)) foobar(bitmap_head *chain) {
  bitmap_iterator rsi;
  unsigned int regno;
  for (bmp_iter_set_init(&(rsi), (chain), (0), &(regno));
   bmp_iter_set(&(rsi), &(regno)); bmp_iter_next(&(rsi), &(regno)))
catchme(regno);
}

int main() {
  bitmap_element elem = {(void *)0, (void *)0, 0, {1, 1}};
  bitmap_head live_throughout = {&elem, &elem, 0, (void *)0};
  foobar(&live_throughout);
  return 0;
}

$ gcc -O0 --coverage test.c;./a.out;gcov test;cat test.c.gcov
File 'test.c'
Lines executed:80.88% of 68
Creating 'test.c.gcov'

-:0:Source:test.c
-:0:Graph:test.gcno
-:0:Data:test.gcda
-:0:Runs:1
-:1:#include 
-:2:#include 
-:3:#include 
-:4:#include 
-:5:struct obstack {};
-:6:struct bitmap_head_def;
-:7:typedef struct bitmap_head_def *bitmap;
-:8:typedef const struct bitmap_head_def *const_bitmap;
-:9:typedef unsigned long BITMAP_WORD;
-:   10:typedef struct bitmap_obstack {

[Bug sanitizer/101111] xgcc cross-compiler for x86_64-apple-darwin in GCC 11.1 doesn't generate weak symbols, resulting in undefined reference to ___lsan_default_suppressions

2021-07-25 Thread iains at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=10

--- Comment #10 from Iain Sandoe  ---
(In reply to Mosè Giordano from comment #6)
> Created attachment 51038 [details]
> Patch to fix the reported issue
> 
> Please find attached a patch to fix the reported issue.  I replaced the
> bashism += with simple string interpolation, to make it complaint with
> strict POSIX shells.

This is OK for master and back-ports from the Darwin perspective (I guess
Martin plans to deal with this since he has assigned the PR, but if he does not
have time, I can apply this for you if you don't have write access).

[Bug sanitizer/101111] xgcc cross-compiler for x86_64-apple-darwin in GCC 11.1 doesn't generate weak symbols, resulting in undefined reference to ___lsan_default_suppressions

2021-07-25 Thread mose at gnu dot org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=10

--- Comment #11 from Mosè Giordano  ---
> This is OK for master and back-ports from the Darwin perspective

Thanks for the review and confirmation!

> (I guess Martin plans to deal with this since he has assigned the PR, but if 
> he does not have time, I can apply this for you if you don't have write 
> access).

Yes, I don't have write access, so someone else will need to apply the patch
:-)

[Bug bootstrap/100552] [11/12 Regression] configure: 32208: Syntax error: Bad substitution

2021-07-25 Thread ibuclaw at gdcproject dot org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100552

Iain Buclaw  changed:

   What|Removed |Added

 Status|UNCONFIRMED |RESOLVED
 Resolution|--- |FIXED

--- Comment #4 from Iain Buclaw  ---
Given the two commits, I'm going to assume this is fixed.

[Bug d/101619] New: d: Change in DotTemplateExp type semantics leading to regression

2021-07-25 Thread ibuclaw at gdcproject dot org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101619

Bug ID: 101619
   Summary: d: Change in DotTemplateExp type semantics leading to
regression
   Product: gcc
   Version: 10.3.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: d
  Assignee: ibuclaw at gdcproject dot org
  Reporter: ibuclaw at gdcproject dot org
  Target Milestone: ---

A regression found in upstream was included in the fix for PR100999.
---
import std.range.primitives : isInputRange;
struct Slice
{
bool empty() const;
int front() const;
void popFront()()
{
}
}
static assert(isInputRange!(  Slice) == true);
static assert(isInputRange!(const Slice) == false);  // fails since PR100999

[Bug fortran/92482] BIND(C) with array-descriptor mishandled for type character

2021-07-25 Thread sandra at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92482

sandra at gcc dot gnu.org changed:

   What|Removed |Added

 CC||sandra at gcc dot gnu.org

--- Comment #4 from sandra at gcc dot gnu.org ---
Tobias's recent commit (which he forgot to tag with this issue) changed the
"must be length 1" messages to something more descriptive, but the
functionality itself still isn't working.

commit b3d4011ba10275fbd5d6ec5a16d5aaebbdfb5d3c
Author: Tobias Burnus 
Date:   Wed Jul 21 09:36:48 2021 +0200

Fortran: Fix bind(C) character length checks

gcc/fortran/ChangeLog:

* decl.c (gfc_verify_c_interop_param): Update for F2008 + F2018
changes; reject unsupported bits with 'Error: Sorry,'.
* trans-expr.c (gfc_conv_procedure_call): Fix condition to
For using CFI descriptor with characters.

gcc/testsuite/ChangeLog:

* gfortran.dg/iso_c_binding_char_1.f90: Update dg-error.
* gfortran.dg/pr32599.f03: Use -std=-f2003 + update comment.
* gfortran.dg/bind_c_char_10.f90: New test.
* gfortran.dg/bind_c_char_6.f90: New test.
* gfortran.dg/bind_c_char_7.f90: New test.
* gfortran.dg/bind_c_char_8.f90: New test.
* gfortran.dg/bind_c_char_9.f90: New test.

[Bug tree-optimization/101617] a ? -1 : 1 -> (-(type)a) | 1

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101617

--- Comment #1 from Andrew Pinski  ---
So it turns out you can make this generic and don't need to handle 1 specially
diff --git a/gcc/match.pd b/gcc/match.pd
index beb8d27535e..2af987278af 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3805,14 +3805,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (simplify
  (cond @0 INTEGER_CST@1 INTEGER_CST@2)
  (switch
+   /* a ? CST : -1 -> -(!a) | CST. */
+  (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@2))
+   (with {
+  tree booltrue = constant_boolean_node (true, boolean_type_node);
+}
+(bit_ior (negate (convert (bit_xor (convert:boolean_type_node @0) {
booltrue; } ))) @2)))
+   /* a ? -1 : CST -> -(a) | CST. */
+  (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@1))
+   (with {
+  tree booltrue = constant_boolean_node (true, boolean_type_node);
+}
+(bit_ior (negate (convert (convert:boolean_type_node @0))) @2)))
   (if (integer_zerop (@2))
(switch
 /* a ? 1 : 0 -> a if 0 and 1 are integral types. */
 (if (integer_onep (@1))
  (convert (convert:boolean_type_node @0)))
-/* a ? -1 : 0 -> -a. */
-(if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@1))
- (negate (convert (convert:boolean_type_node @0
 /* a ? powerof2cst : 0 -> a << (log2(powerof2cst)) */
 (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@1))
  (with {
@@ -3827,9 +3836,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  /* a ? 0 : 1 -> !a. */
  (if (integer_onep (@2))
   (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } )))
- /* a ? -1 : 0 -> -(!a). */
- (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@2))
-  (negate (convert (bit_xor (convert:boolean_type_node @0) { booltrue; }

  /* a ? powerof2cst : 0 -> (!a) << (log2(powerof2cst)) */
  (if (INTEGRAL_TYPE_P (type) &&  integer_pow2p (@2))
   (with {

[Bug rtl-optimization/67382] RTL combiner is too eager to combine (plus (reg 92) (reg 92)) to (ashift (reg 92) (const_int 1))

2021-07-25 Thread segher at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67382

--- Comment #4 from Segher Boessenkool  ---
(In reply to Andrew Pinski from comment #3)
> Note combine is able to figure out the jump is unconditional but there is no
> "pattern" to match it:
> Trying 10 -> 17:
>10: r85:QI=0x1
>17: {flags:CCC=cmp(r85:QI-0x1,r85:QI);clobber scratch;}
>   REG_DEAD r85:QI
>   REG_EQUAL cmp(0,0x1)
> Failed to match this instruction:
> (parallel [
> (set (pc)
> (pc))
> (clobber (scratch:QI))
> ])
> Failed to match this instruction:
> (set (pc)
> (pc))

This is an other_insn, namely a cc_use_insn.  We currently use that for
changing the cc mode used.
update_cfg_for_uncondjump
There is code in combine for handling (set (pc) (pc)) in other_insn, in
fact (see where update_cfg_for_uncondjump is called).

There also is code (in recog_for_combine_1) that should handle noop sets
like this.  It does not print anything if that happens though.

Investigating.

[Bug c++/52099] Incorrectly applying conversion when catching pointer-to-members

2021-07-25 Thread redi at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52099

--- Comment #2 from Jonathan Wakely  ---
>From the dup:


 Eric Fiselier 2016-01-20 03:50:56 UTC

Created attachment 37399 [details]
reproducer

I don't see where [except.handle] allows such a conversion.

Comment 1 Jonathan Wakely 2017-01-13 20:36:35 UTC

We're missing a check for cv-qualifiers in
__pointer_to_member_type_info::__pointer_catch that needs to be done before we
compare the pointees. Both pointees have type void() so we need to compare the
cv-quals before that info is lost.

Comment 2 Jonathan Wakely 2017-01-13 20:49:13 UTC

Hmm, we don't seem to have the cv-quals in __flags. That's a problem.

Comment 3 Jonathan Wakely 2017-01-13 21:08:10 UTC

When compiled with clang the pointees are different, so the match fails when
comparing them.

Using Clang:

(gdb) step
__cxxabiv1::__pbase_type_info::__pointer_catch (this=0x401cc0 , thrown_type=0x401d10 ,
thr_obj=0x7fffd220, outer=0)
at
/usr/lib/gcc/x86_64-redhat-linux/6.3.1/../../../../include/c++/6.3.1/cxxabi.h:309
(gdb) step
std::type_info::__do_catch (this=0x401c90 ,
thr_type=0x401cf8 ) at
../../../../libstdc++-v3/libsupc++/tinfo.cc:71
(gdb) p *this
$3 = {_vptr.type_info = 0x6030b0 , __name = 0x401c89  "KFvvE"}
(gdb) p *thr_type
$4 = {_vptr.type_info = 0x6030b0 , __name = 0x401cf0  "FvvE"}
(gdb) 


But using GCC the two pointee types are the same:

(gdb) p *this
$1 = {_vptr.type_info = 0x6030e8 , __name = 0x401c50  "FvvE"}
(gdb) p *thr_type
$2 = {_vptr.type_info = 0x6030e8 , __name = 0x401c50  "FvvE"}

So it looks like the problem is in the front-end where the typeinfo object for
a pointer to cv-qualified member function has the wrong pointee type.

Comment 4 Jonathan Wakely 2017-01-13 23:05:34 UTC

My front-end debugging skills are pitiful, but I've found something suspicious.
ptm_initializer uses TYPE_PTRMEM_POINTED_TO_TYPE to get that pointee type. For
this case that expands to TYPE_PTRMEMFUNC_FN_TYPE which is a call to
cp_build_qualified_type with the qualifiers from cp_type_quals.

But cp_type_quals tries pretty hard to ensure we never get cv-quals for a
function type. For the purposes of RTTI, where we really do care about the
difference between void() and void()const, do we want the memfn quals instead?

Comment 5 Jonathan Wakely 2017-01-13 23:20:33 UTC

For the attached reproducer this condition is never true in
cp_build_qualified_type_real

  /* But preserve any function-cv-quals on a FUNCTION_TYPE.  */
  if (TREE_CODE (type) == FUNCTION_TYPE)
type_quals |= type_memfn_quals (type);

As far as I can tell this is what's supposed to put the cv-quals back onto the
function type, so we'd have a pointee of type void() const not void().

[Bug c++/101620] New: gcc incorrectly makes concept checking in incomplete-class context

2021-07-25 Thread fchelnokov at gmail dot com via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101620

Bug ID: 101620
   Summary: gcc incorrectly makes concept checking in
incomplete-class context
   Product: gcc
   Version: 11.1.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: c++
  Assignee: unassigned at gcc dot gnu.org
  Reporter: fchelnokov at gmail dot com
  Target Milestone: ---

Compilation of this program
```
struct A {};

template
concept DerivedOnceFromA = requires(T t) { { static_cast(t) }; };

template
struct B {};

struct C : A
{
B foo();
};
```
must fail, since B is checked in incomplete struct C context:
https://gcc.godbolt.org/z/ajh8MsY4n

[Bug rtl-optimization/101617] a ? -1 : 1 -> (-(type)a) | 1

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101617

Andrew Pinski  changed:

   What|Removed |Added

  Component|tree-optimization   |rtl-optimization

--- Comment #2 from Andrew Pinski  ---
I decided that this should really go on the RTL level

[Bug rtl-optimization/67382] RTL combiner is too eager to combine (plus (reg 92) (reg 92)) to (ashift (reg 92) (const_int 1))

2021-07-25 Thread segher at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67382

--- Comment #5 from Segher Boessenkool  ---
It turns out that noop other_insn is fine, and is accepted etc., but the
resulting i3 in this case is not.

[Bug d/101441] FUNCTION doesn't work in core.stdc.stdio functions without cast

2021-07-25 Thread ibuclaw at gdcproject dot org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101441

--- Comment #1 from Iain Buclaw  ---
Upstream dmd fixed bug much later than 2.076.

https://github.com/dlang/dmd/pull/9920

[Bug rtl-optimization/101617] a ? -1 : 1 -> (-(type)a) | 1

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101617

--- Comment #3 from Andrew Pinski  ---
I have the ifcvt.c patch which adds this.

[Bug rtl-optimization/101617] a ? -1 : 1 -> (-(type)a) | 1

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101617

--- Comment #4 from Andrew Pinski  ---
Created attachment 51203
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=51203&action=edit
ifcvt patch

Patch which go into testing.

[Bug rtl-optimization/101617] a ? -1 : 1 -> (-(type)a) | 1

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101617

Andrew Pinski  changed:

   What|Removed |Added

  Attachment #51203|0   |1
is obsolete||

--- Comment #5 from Andrew Pinski  ---
Comment on attachment 51203
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=51203
ifcvt patch

This patch is wrong if STORE_FLAG_VALUE == -1.

[Bug d/101490] ICE at convert_expr(tree_node, Type, Type*)

2021-07-25 Thread ibuclaw at gdcproject dot org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101490

--- Comment #1 from Iain Buclaw  ---
Reduced test
---
struct test
{
int[0] foo;
}

void main()
{
test* t;
auto a = cast(typeof(t.foo)[0])t.foo;
write(a);
}

void write(S)(S args)
{
foreach (arg; args)
{
}
}

[Bug rtl-optimization/101617] a ? -1 : 1 -> (-(type)a) | 1

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101617

--- Comment #6 from Andrew Pinski  ---
Thinking about this some more, there is a canonicalization issue. We need to
decide if we want to canonicalization to just a ? -1 : 1; or expand it out.
a ? 1 : 0 makes sense to do (cast) a;  So does "a ? 0 : 1".

Does the current a ? -1 : 0 make sense or just add that to ifcvt.

I am going to take a few days to think of this and such.

There are other issues that deal with this.  Even having a cmov existing makes
it harder to decide.  Even though for an example -(a == 0) can be optimized
nicely on x86, it might not be nicely on other targets.

[Bug rtl-optimization/101617] a ? -1 : 1 -> (-(type)a) | 1

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101617

--- Comment #7 from Andrew Pinski  ---
A few more canonicalization issues that need to be thought of:

"a >>u (bitsize-1)" and "a >s (bitsize-1)" and "-(a  Thinking about this some more, there is a canonicalization issue. We need to
> decide if we want to canonicalization to just a ? -1 : 1; or expand it out.
> a ? 1 : 0 makes sense to do (cast) a;  So does "a ? 0 : 1".
> 
> Does the current a ? -1 : 0 make sense or just add that to ifcvt.

PR101339 is related to that canonicalization really.

There are others.

Even things like:
(a == 0) + 2
Should that be:
a == 0 ? 3 : 2
On the gimple level
and then do the correct thing on the RTL level?

[Bug target/101614] [s390] vec_signed requires z15, docs say z13

2021-07-25 Thread evan--- via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101614

Evan Nemerson  changed:

   What|Removed |Added

 Status|UNCONFIRMED |RESOLVED
 Resolution|--- |INVALID

--- Comment #1 from Evan Nemerson  ---
Never mind; the ARCH in the documentation refers to the same value as __ARCH__,
not -march=zN

[Bug tree-optimization/101621] New: gcc cannot optimize int8_t vector assign with subscription to shuffle

2021-07-25 Thread yumeyao at gmail dot com via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101621

Bug ID: 101621
   Summary: gcc cannot optimize int8_t vector assign with
subscription to shuffle
   Product: gcc
   Version: 11.1.1
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: tree-optimization
  Assignee: unassigned at gcc dot gnu.org
  Reporter: yumeyao at gmail dot com
  Target Milestone: ---

https://gcc.godbolt.org/z/91cqenf99

typedef char v16b __attribute__((vector_size(16)));

To summary it up, regarding optimizing v = { v[n] ...} into shuffle, targeting
Intel x86(x86_64):
These is a lack of optimization when there is a zero
There is some regression starting from gcc9.
so this might be 2 issues. But I think a proper fix could resolve both.


* gcc can optimize int8_t vector assign with subscription of the same vector to
shuffle, like this:
v16b gcc_can_shuffle(v16b b) {
return (v16b) {b[0], b[0], b[0], b[0], b[4], b[4], b[4], b[4], b[8], b[8],
b[8], b[8], b[12], b[12], b[12], b[12]};
}

* However, if there is a zero, gcc can't handle this. Actually this is
supported on Intel x86, with a negative subscription indicating the 'zero
value'.
Clang can do the optimization starting with clang 5.

* Furthermore, there is a regression:
gcc < 8 can always optimize it, but starting with gcc9, if there is a cast,
then the optimization fails:
typedef long v2si64 __attribute__((vector_size(16)));
v16b gcc_cannot_shuffle_with_cast(v2si64 x) {
v16b b = (v16b)x;
v16b b0 = {b[0], b[0], b[0], b[0], b[4], b[4], b[4], b[4], b[8], b[8],
b[8], b[8], b[12], b[12], b[12], b[12]};
return b0;
}
gcc 11 can optimize it on -O3, but not on -O1 or -O2.

[Bug target/18233] extraneous inc/dec pair

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=18233

Andrew Pinski  changed:

   What|Removed |Added

 Depends on||94956
 Resolution|--- |FIXED
   Target Milestone|--- |11.2
 Status|NEW |RESOLVED

--- Comment #4 from Andrew Pinski  ---
So this is fixed in a few different ways but fully with r11-194.

For x86 with cmov (!=i386), this was fixed in GCC 4.5.0 where the ffs is
expanded at expand time to use ctz and cmov.

without cmov, this was only fixed in GCC 11 with r11-194 which changes ffs to
ctz if ctz has a known 0 alrgument which x86 has.

So closing as fixed for GCC 11; There is already a testcase for this too;
gcc.target/i386/pr94956.c .


Referenced Bugs:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94956
[Bug 94956] Unable to remove impossible ffs() test for zero

[Bug tree-optimization/101621] gcc cannot optimize int8_t vector assign with subscription to shuffle

2021-07-25 Thread yumeyao at gmail dot com via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101621

--- Comment #1 from YumeYao  ---
https://gcc.godbolt.org/z/a47Enb9oK

16-bytes (AVX) version added.

[Bug target/19922] xor is enclosed in loop, and exectuted on each iteration of for statement

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=19922

--- Comment #7 from Andrew Pinski  ---
So the question becomes do we care about this look if 
-fno-tree-loop-distribute-patterns  is added?  Anyways we are able to detect
the loop is a memset for a while now and then expand that to have no xor inside
the loop.

[Bug tree-optimization/101621] gcc cannot optimize int8_t vector assign with subscription to shuffle

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101621

Andrew Pinski  changed:

   What|Removed |Added

   Severity|normal  |enhancement

--- Comment #2 from Andrew Pinski  ---
The cast issue is because in GCC 9, it was not producing PERM at the gimple
level which was fixed correctly in GCC 11.

clang_shuffle_with_zero can easy be added.

[Bug tree-optimization/21712] missed optimization due with const function and pulling out of loops

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=21712

Andrew Pinski  changed:

   What|Removed |Added

 Resolution|--- |FIXED
   Target Milestone|--- |4.3.0
 Status|NEW |RESOLVED

--- Comment #26 from Andrew Pinski  ---
Fixed for GCC 4.3.0 and above.  Most likely by r0-86459 .

[Bug target/18562] SSE constant vector initialization produces dead constant values on stack

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=18562

Andrew Pinski  changed:

   What|Removed |Added

   Target Milestone|--- |4.9.0
 Resolution|--- |FIXED
 Status|NEW |RESOLVED

--- Comment #14 from Andrew Pinski  ---
Fixed fully in 4.9 and above.

[Bug target/28919] IV selection is messed up

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=28919

Andrew Pinski  changed:

   What|Removed |Added

   Last reconfirmed|2006-09-17 22:48:12 |2021-7-25

--- Comment #10 from Andrew Pinski  ---
Still happens.
__builtin_prefetch causes the issue.

[Bug tree-optimization/35344] Loop unswitching to produce perfect loop nest

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=35344

Andrew Pinski  changed:

   What|Removed |Added

 Resolution|--- |DUPLICATE
   Target Milestone|--- |6.0
 Status|UNCONFIRMED |RESOLVED

--- Comment #2 from Andrew Pinski  ---
Fixed:

  if (m_23(D) > 0)
goto ; [89.00%]
  else
goto ; [11.00%]

   [local count: 12992276]:
  p.0_1 = p;
  q.1_10 = q;
  if (n_24(D) > 0)
goto ; [89.00%]
  else
goto ; [11.00%]

So yes it is a dup.

*** This bug has been marked as a duplicate of bug 23855 ***

[Bug tree-optimization/23855] loop header should also be pulled out of the inner loop too

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=23855

Andrew Pinski  changed:

   What|Removed |Added

 CC||xinliangli at gmail dot com

--- Comment #33 from Andrew Pinski  ---
*** Bug 35344 has been marked as a duplicate of this bug. ***

[Bug target/23813] redundant register assignments not eliminated

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=23813

Andrew Pinski  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|NEW |RESOLVED
   Target Milestone|--- |5.0

--- Comment #6 from Andrew Pinski  ---
So this has been fixed in GCC 5.0 and above as it is able to detect bswap and
do the correct thing there.

That is it is able to convert:
  REV64_STEP(n,  8, 0x00FF00FF00FF00FFULL); /* bytes */
  REV64_STEP(n, 16, 0xULL); /* halfwords */
  REV64_STEP(n, 32, 0xULL); /* full words */

Into:
n = __builtin_bswap64 (n)

[Bug rtl-optimization/35309] Late struct expansion leads to missing PRE

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=35309

--- Comment #3 from Andrew Pinski  ---
THe original testcase in comment #0 is now fixed but the following is not:
struct A {
  int f[16];
} ag, ag2,ag3;


struct A foo(int n)
{
   if (n)
   {
 ag2 = ag;
   }

   return ag;
}

[Bug tree-optimization/37810] Bad store sinking job

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37810

Andrew Pinski  changed:

   What|Removed |Added

   Last reconfirmed|2009-04-03 12:34:44 |2021-7-25

--- Comment #6 from Andrew Pinski  ---
For the reduced testcase in comment #2 I get now:
4.8.0+:
.L4:
addl$1, %eax
movl%eax, (%rbx)
cmpl4(%rbx), %eax
je  .L8
.L3:
testl   %eax, %eax
jne .L4

4.7.4 and before:
.L3:
testl   %eax, %eax
je  .L8
addl$1, %eax
cmpl4(%rbx), %eax
movl%eax, (%rbx)
jne .L3

Or on the trunk at the gimple level:
   [local count: 1014686025]:
  _1 = prephitmp_10 + 1;
  iter_6(D)->n = _1;
  _2 = iter_6(D)->m;
  if (_1 == _2)
goto ; [5.50%]
  else
goto ; [94.50%]

   [local count: 55807731]:
  g ();

   [local count: 114863530]:
  pretmp_11 = iter_6(D)->n;

   [local count: 1073741824]:
  # prephitmp_10 = PHI 
  if (prephitmp_10 != 0)
goto ; [94.50%]
  else
goto ; [5.50%]

Aka the store still happens inside the loop unconditionally.

[Bug tree-optimization/39761] data-flow analysis does not discover constant real/imaginary parts

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=39761

Andrew Pinski  changed:

   What|Removed |Added

   Target Milestone|--- |8.0
 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #13 from Andrew Pinski  ---
Fixed in GCC 8, most likely by r8-5346 .  That is DOM is now able to do the
jump threading even at -Os.

[Bug tree-optimization/39761] data-flow analysis does not discover constant real/imaginary parts

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=39761

--- Comment #14 from Andrew Pinski  ---
(In reply to Andrew Pinski from comment #13)
> Fixed in GCC 8, most likely by r8-5346 .  That is DOM is now able to do the
> jump threading even at -Os.

I should say DOM is doing the jump threading now which is why I think r8-5346
fixed this.

[Bug tree-optimization/30099] missed value numbering optimization (conditional-based assertions)

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=30099

Andrew Pinski  changed:

   What|Removed |Added

   Target Milestone|--- |8.0
 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #3 from Andrew Pinski  ---
Fixed in GCC 8 by r8-1633 .

[Bug tree-optimization/32226] Missed optimization caused by copy loop header (yes a weird case)

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=32226

--- Comment #3 from Andrew Pinski  ---
To do this optimization (the reduced testcase works right now), you have to
simulate each statement until the end with "width_5 == 0" (the opposite range
of the initial condition) to see if get the other phi operand.

  if (width_5(D) != 0)
goto ; [89.00%]
  else
goto ; [11.00%]

   [local count: 105119325]:
  _1 = (long unsigned int) dir_8(D);
  _3 = width_5(D) + 4294967295;
  _14 = (sizetype) _3;
  _6 = _14 + 1;
  _17 = _1 * _6;
  _18 = _17 * 2;
  errorptr_4 = errorptr_7(D) + _18;

   [local count: 118111601]:
  # errorptr_16 = PHI 


I don't know if this optimization is that important, even clang does not do it.
It should most likely be only done if the branch is highly predicted taken down
the route of the longer path.

[Bug target/25230] __sync_add_and_fetch does not use condition flags from subl

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=25230

Andrew Pinski  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |DUPLICATE
   Target Milestone|--- |4.7.0

--- Comment #3 from Andrew Pinski  ---
Dup of bug 48986 which was fixed for GCC 4.7.0.

*** This bug has been marked as a duplicate of bug 48986 ***

[Bug target/48986] Missed optimization in atomic decrement on x86/x64

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=48986

Andrew Pinski  changed:

   What|Removed |Added

 CC||bcrl at kvack dot org

--- Comment #7 from Andrew Pinski  ---
*** Bug 25230 has been marked as a duplicate of this bug. ***

[Bug tree-optimization/101621] gcc cannot optimize int8_t vector assign with subscription to shuffle

2021-07-25 Thread yumeyao at gmail dot com via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101621

--- Comment #3 from YumeYao  ---
(In reply to Andrew Pinski from comment #2)
> The cast issue is because in GCC 9, it was not producing PERM at the gimple
> level which was fixed correctly in GCC 11.
> 
> clang_shuffle_with_zero can easy be added.

Thanks for your insights.

Do you have any comment on the optimization flag part (gcc <=8 only needs -O1
to optimize the 'cast' case, but gcc 11 requires -O3)?
Is it due to some default optimization options change in -O1 between gcc 8 and
11, or it's something deeper?

[Bug tree-optimization/40170] redundant zero extensions

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=40170

Andrew Pinski  changed:

   What|Removed |Added

   Target Milestone|--- |11.0
 Resolution|--- |FIXED
  Component|target  |tree-optimization
 Status|UNCONFIRMED |RESOLVED

--- Comment #2 from Andrew Pinski  ---
Fixed for GCC 11, in EVRP.  I can't figure out which patch caused it but what
happens is the following:
We figure out the range of _3 to be [0, 255]
 _3 = (int) bit_16;

While processing:
  _4 = _2 >> _3;

We figure out the range of _4 is still [0, 255] as it is a right shift so we
cannot change any upper bits.

And then we match and simplify the following:
  _24 = _4 & 255;

to just:
 _24 = _4;

[Bug c++/101622] New: Type erasure (upcasting) in constexpr/consteval context

2021-07-25 Thread sjeltsch at gmail dot com via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101622

Bug ID: 101622
   Summary: Type erasure (upcasting) in constexpr/consteval
context
   Product: gcc
   Version: 10.2.1
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: c++
  Assignee: unassigned at gcc dot gnu.org
  Reporter: sjeltsch at gmail dot com
  Target Milestone: ---

Source:

```
template  void fun() {} 

template inline constexpr bool var = false; 

consteval bool funC() { 
  void (*a)() = fun;   
  void (*b)() = fun; 
  return a == b;
}   

constexpr auto x = funC();  

consteval bool varC() { 
  const void* a = &var;
  const void* b = &var;  
  return a == b;
}   

constexpr auto y = varC();  

int main() { return 0; } 
```   

output:

Using built-in specs. 
COLLECT_GCC=g++ 
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/10/lto-wrapper
OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa:hsa
OFFLOAD_TARGET_DEFAULT=1 
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Debian 10.2.1-6'
--with-bugurl=file:///usr/share/doc/gcc-10/README.Bugs
--enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++,m2 --prefix=/usr
--with
-gcc-major-version-only --program-suffix=-10 --program-prefix=x86_64-linux-gnu-
--enable-shared --enable-linker-build-id --libexecdir=/usr/lib
--without-included-gettext --enable-threads=posix --libdir=/usr/lib 
--enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug
--enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new
--enable-gnu-unique-object --disable-vtable-verify --enable-plugin --ena
ble-default-pie --with-system-zlib --enable-libphobos-checking=release
--with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch
--disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-
list=m32,m64,mx32 --enable-multilib --with-tune=generic
--enable-offload-targets=nvptx-none=/build/gcc-10-Km9U7s/gcc-10-10.2.1/debian/tmp-nvptx/usr,amdgcn-amdhsa=/build/gcc-10-Km9U7s/gcc-10-10.2.1/debian/tmp-gcn
/usr,hsa --without-cuda-driver --enable-checking=release
--build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
--with-build-config=bootstrap-lto-lean --enable-link-mutex
Thread model: posix 
Supported LTO compression algorithms: zlib zstd 
gcc version 10.2.1 20210110 (Debian 10.2.1-6)   
COLLECT_GCC_OPTIONS='-std=c++2a' '-v' '-o' 'foo' '-shared-libgcc'
'-mtune=generic' '-march=x86-64'
 /usr/lib/gcc/x86_64-linux-gnu/10/cc1plus -quiet -v -imultiarch
x86_64-linux-gnu -D_GNU_SOURCE foo.cc -quiet -dumpbase foo.cc -mtune=generic
-march=x86-64 -auxbase foo -std=c++2a -version -fasynchronous-unwind-t
ables -o /tmp/ccp7WoOR.s
GNU C++17 (Debian 10.2.1-6) version 10.2.1 20210110 (x86_64-linux-gnu)
compiled by GNU C version 10.2.1 20210110, GMP version 6.2.1, MPFR
version 4.1.0, MPC version 1.2.0, isl version isl-0.23-GMP

GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072  
ignoring duplicate directory "/usr/include/x86_64-linux-gnu/c++/10"
ignoring nonexistent directory "/usr/local/include/x86_64-linux-gnu"
ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/10/include-fixed"
ignoring nonexistent directory
"/usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include"
#include "..." search starts here:
#include <...> search starts here:
 /usr/include/c++/10   
   
   
/usr/include/x86_64-linux-gnu/c++/10   
   
   /usr/include/c++/10/backward
   
   
  /usr/lib/gcc/x86_64-linux-gnu/10/include 
   

/usr/local/include

[Bug target/39821] 120% slowdown with vectorizer

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=39821

Andrew Pinski  changed:

   What|Removed |Added

  Component|tree-optimization   |target

--- Comment #5 from Andrew Pinski  ---
The code generation for aarch64 looks fine:
dotproduct_order4:
.LFB1:
.cfi_startproc
ldr q1, [x0]
ldr q2, [x1]
smull   v0.2d, v2.2s, v1.2s
smlal2  v0.2d, v2.4s, v1.4s
addpd0, v0.2d
fmovx0, d0
ret
  vect__6.41_18 = MEM  [(int32_t *)v1_2(D)];
  vect__10.44_13 = MEM  [(int32_t *)v2_3(D)];
  vect_patt_25.45_8 = WIDEN_MULT_LO_EXPR ;
  vect_patt_25.45_4 = WIDEN_MULT_HI_EXPR ;
  vect_accum_14.46_31 = vect_patt_25.45_4 + vect_patt_25.45_8;
  _33 = .REDUC_PLUS (vect_accum_14.46_31); [tail call]
 CUT 
Even the gimple level for x86_64 looks ok:
  vect__6.41_18 = MEM  [(int32_t *)v1_2(D)];
  vect__10.44_13 = MEM  [(int32_t *)v2_3(D)];
  vect_patt_25.45_8 = WIDEN_MULT_LO_EXPR ;
  vect_patt_25.45_4 = WIDEN_MULT_HI_EXPR ;
  vect_accum_14.46_31 = vect_patt_25.45_4 + vect_patt_25.45_8;
  _33 = VEC_PERM_EXPR ;
  _34 = vect_accum_14.46_31 + _33;
  stmp_accum_14.47_35 = BIT_FIELD_REF <_34, 64, 0>;

But the expansion looks bad.

[Bug rtl-optimization/40361] Conditional return not always profitable with -Os

2021-07-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=40361

--- Comment #2 from Andrew Pinski  ---
So the cross jumping opportunity since at least 5.4 even with a conditional
return.

ldr r3, .L8
stmfd   sp!, {r4, lr}
ldr r3, [r3]
ldr r4, .L8+4
cmp r3, #0
bge .L2
bl  bar1
ldr r3, [r4]
cmp r3, #0
ldmgefd sp!, {r4, pc}
.L3:
mov r3, #0
str r3, [r4]
ldmfd   sp!, {r4, pc}
.L2:
bl  bar2
ldr r3, [r4]
cmp r3, #0
blt .L3
ldmfd   sp!, {r4, pc}

The trunk produces this:
push{r4, lr}
ldr r4, .L9
ldr r3, [r4]
cmp r3, #0
bge .L2
bl  bar1
.L8:
ldr r3, [r4, #4]
cmp r3, #0
movlt   r3, #0
strlt   r3, [r4, #4]
pop {r4, pc}
.L2:
bl  bar2
b   .L8

Which is even more cross jumped and note push and stmfd are the same here just
written differently.

45 matches

Mail list logo