[Bug objc/101616] Objective-C frontend should not emit vtable/fixup messages (at least, not by default)
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101616 Iain Sandoe changed: What|Removed |Added Status|UNCONFIRMED |RESOLVED Resolution|--- |FIXED --- Comment #2 from Iain Sandoe --- (In reply to Matt Jacobson from comment #0) > In 10.2.0, the Objective-C frontend (in NeXT v2 ABI mode) emits "fixup" > messages for all message sends. Please check 10.3, 11.(1,2rc) and master - I believe this is already fixed (and back ported to 10.3). I have not (yet) applied it to 9.x (so that would not appear until 9.5, if done). The changes are selective on the target OS version (since fixup messages _are_ emitted by the 'system' [i.e. last usable Xcode] compilers for earlier OS versions). So that gcc foo.m on a recent OS version should omit the fixup versions but with -mmacosx-version-min=10.5 the fixups versions should be emitted (actually, with a few small changes as the OS version changes).
[Bug gcov-profile/101618] New: [GCOV] Wrong coverage caused by call site in a "for" statement
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101618 Bug ID: 101618 Summary: [GCOV] Wrong coverage caused by call site in a "for" statement Product: gcc Version: 10.2.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: gcov-profile Assignee: unassigned at gcc dot gnu.org Reporter: njuwy at smail dot nju.edu.cn CC: marxin at gcc dot gnu.org Target Milestone: --- $ gcc -v Using built-in specs. COLLECT_GCC=gcc COLLECT_LTO_WRAPPER=/usr/local/libexec/gcc/x86_64-pc-linux-gnu/10.2.0/lto-wrapper Target: x86_64-pc-linux-gnu Configured with: ../configure -enable-checking=release -enable-languages=c,c++ -disable-multilib Thread model: posix Supported LTO compression algorithms: zlib gcc version 10.2.0 (GCC) $ cat test.c #include #include #include #include struct obstack {}; struct bitmap_head_def; typedef struct bitmap_head_def *bitmap; typedef const struct bitmap_head_def *const_bitmap; typedef unsigned long BITMAP_WORD; typedef struct bitmap_obstack { struct bitmap_element_def *elements; struct bitmap_head_def *heads; struct obstack obstack; } bitmap_obstack; typedef struct bitmap_element_def { struct bitmap_element_def *next; struct bitmap_element_def *prev; unsigned int indx; BITMAP_WORD bits[((128 + (8 * 8 * 1u) - 1) / (8 * 8 * 1u))]; } bitmap_element; struct bitmap_descriptor; typedef struct bitmap_head_def { bitmap_element *first; bitmap_element *current; unsigned int indx; bitmap_obstack *obstack; } bitmap_head; bitmap_element bitmap_zero_bits; typedef struct { bitmap_element *elt1; bitmap_element *elt2; unsigned word_no; BITMAP_WORD bits; } bitmap_iterator; static void __attribute__((noinline)) bmp_iter_set_init(bitmap_iterator *bi, const_bitmap map, unsigned start_bit, unsigned *bit_no) { bi->elt1 = map->first; bi->elt2 = ((void *)0); while (1) { if (!bi->elt1) { bi->elt1 = &bitmap_zero_bits; break; } if (bi->elt1->indx >= start_bit / (((128 + (8 * 8 * 1u) - 1) / (8 * 8 * 1u)) * (8 * 8 * 1u))) break; bi->elt1 = bi->elt1->next; } if (bi->elt1->indx != start_bit / (((128 + (8 * 8 * 1u) - 1) / (8 * 8 * 1u)) * (8 * 8 * 1u))) start_bit = bi->elt1->indx * (((128 + (8 * 8 * 1u) - 1) / (8 * 8 * 1u)) * (8 * 8 * 1u)); bi->word_no = start_bit / (8 * 8 * 1u) % ((128 + (8 * 8 * 1u) - 1) / (8 * 8 * 1u)); bi->bits = bi->elt1->bits[bi->word_no]; bi->bits >>= start_bit % (8 * 8 * 1u); start_bit += !bi->bits; *bit_no = start_bit; } static void __attribute__((noinline)) bmp_iter_next(bitmap_iterator *bi, unsigned *bit_no) { bi->bits >>= 1; *bit_no += 1; } static unsigned char __attribute__((noinline)) bmp_iter_set_tail(bitmap_iterator *bi, unsigned *bit_no) { while (!(bi->bits & 1)) { bi->bits >>= 1; *bit_no += 1; } return 1; } static __inline__ unsigned char bmp_iter_set(bitmap_iterator *bi, unsigned *bit_no) { unsigned bno = *bit_no; BITMAP_WORD bits = bi->bits; bitmap_element *elt1; if (bits) { while (!(bits & 1)) { bits >>= 1; bno += 1; } *bit_no = bno; return 1; } *bit_no = ((bno + 64 - 1) / 64 * 64); bi->word_no++; elt1 = bi->elt1; while (1) { while (bi->word_no != 2) { bi->bits = elt1->bits[bi->word_no]; if (bi->bits) { bi->elt1 = elt1; return bmp_iter_set_tail(bi, bit_no); } *bit_no += 64; bi->word_no++; } elt1 = elt1->next; if (!elt1) { bi->elt1 = elt1; return 0; } *bit_no = elt1->indx * (2 * 64); bi->word_no = 0; } } extern void abort(void); static void __attribute__((noinline)) catchme(int i) { if (i != 0 && i != 64) abort(); } static void __attribute__((noinline)) foobar(bitmap_head *chain) { bitmap_iterator rsi; unsigned int regno; for (bmp_iter_set_init(&(rsi), (chain), (0), &(regno)); bmp_iter_set(&(rsi), &(regno)); bmp_iter_next(&(rsi), &(regno))) catchme(regno); } int main() { bitmap_element elem = {(void *)0, (void *)0, 0, {1, 1}}; bitmap_head live_throughout = {&elem, &elem, 0, (void *)0}; foobar(&live_throughout); return 0; } $ gcc -O0 --coverage test.c;./a.out;gcov test;cat test.c.gcov File 'test.c' Lines executed:80.88% of 68 Creating 'test.c.gcov' -:0:Source:test.c -:0:Graph:test.gcno -:0:Data:test.gcda -:0:Runs:1 -:1:#include -:2:#include -:3:#include -:4:#include -:5:struct obstack {}; -:6:struct bitmap_head_def; -:7:typedef struct bitmap_head_def *bitmap; -:8:typedef const struct bitmap_head_def *const_bitmap; -:9:typedef unsigned long BITMAP_WORD; -: 10:typedef struct bitmap_obstack {
[Bug sanitizer/101111] xgcc cross-compiler for x86_64-apple-darwin in GCC 11.1 doesn't generate weak symbols, resulting in undefined reference to ___lsan_default_suppressions
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=10 --- Comment #10 from Iain Sandoe --- (In reply to Mosè Giordano from comment #6) > Created attachment 51038 [details] > Patch to fix the reported issue > > Please find attached a patch to fix the reported issue. I replaced the > bashism += with simple string interpolation, to make it complaint with > strict POSIX shells. This is OK for master and back-ports from the Darwin perspective (I guess Martin plans to deal with this since he has assigned the PR, but if he does not have time, I can apply this for you if you don't have write access).
[Bug sanitizer/101111] xgcc cross-compiler for x86_64-apple-darwin in GCC 11.1 doesn't generate weak symbols, resulting in undefined reference to ___lsan_default_suppressions
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=10 --- Comment #11 from Mosè Giordano --- > This is OK for master and back-ports from the Darwin perspective Thanks for the review and confirmation! > (I guess Martin plans to deal with this since he has assigned the PR, but if > he does not have time, I can apply this for you if you don't have write > access). Yes, I don't have write access, so someone else will need to apply the patch :-)
[Bug bootstrap/100552] [11/12 Regression] configure: 32208: Syntax error: Bad substitution
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100552 Iain Buclaw changed: What|Removed |Added Status|UNCONFIRMED |RESOLVED Resolution|--- |FIXED --- Comment #4 from Iain Buclaw --- Given the two commits, I'm going to assume this is fixed.
[Bug d/101619] New: d: Change in DotTemplateExp type semantics leading to regression
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101619 Bug ID: 101619 Summary: d: Change in DotTemplateExp type semantics leading to regression Product: gcc Version: 10.3.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: d Assignee: ibuclaw at gdcproject dot org Reporter: ibuclaw at gdcproject dot org Target Milestone: --- A regression found in upstream was included in the fix for PR100999. --- import std.range.primitives : isInputRange; struct Slice { bool empty() const; int front() const; void popFront()() { } } static assert(isInputRange!( Slice) == true); static assert(isInputRange!(const Slice) == false); // fails since PR100999
[Bug fortran/92482] BIND(C) with array-descriptor mishandled for type character
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92482 sandra at gcc dot gnu.org changed: What|Removed |Added CC||sandra at gcc dot gnu.org --- Comment #4 from sandra at gcc dot gnu.org --- Tobias's recent commit (which he forgot to tag with this issue) changed the "must be length 1" messages to something more descriptive, but the functionality itself still isn't working. commit b3d4011ba10275fbd5d6ec5a16d5aaebbdfb5d3c Author: Tobias Burnus Date: Wed Jul 21 09:36:48 2021 +0200 Fortran: Fix bind(C) character length checks gcc/fortran/ChangeLog: * decl.c (gfc_verify_c_interop_param): Update for F2008 + F2018 changes; reject unsupported bits with 'Error: Sorry,'. * trans-expr.c (gfc_conv_procedure_call): Fix condition to For using CFI descriptor with characters. gcc/testsuite/ChangeLog: * gfortran.dg/iso_c_binding_char_1.f90: Update dg-error. * gfortran.dg/pr32599.f03: Use -std=-f2003 + update comment. * gfortran.dg/bind_c_char_10.f90: New test. * gfortran.dg/bind_c_char_6.f90: New test. * gfortran.dg/bind_c_char_7.f90: New test. * gfortran.dg/bind_c_char_8.f90: New test. * gfortran.dg/bind_c_char_9.f90: New test.
[Bug tree-optimization/101617] a ? -1 : 1 -> (-(type)a) | 1
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101617 --- Comment #1 from Andrew Pinski --- So it turns out you can make this generic and don't need to handle 1 specially diff --git a/gcc/match.pd b/gcc/match.pd index beb8d27535e..2af987278af 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3805,14 +3805,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (simplify (cond @0 INTEGER_CST@1 INTEGER_CST@2) (switch + /* a ? CST : -1 -> -(!a) | CST. */ + (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@2)) + (with { + tree booltrue = constant_boolean_node (true, boolean_type_node); +} +(bit_ior (negate (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } ))) @2))) + /* a ? -1 : CST -> -(a) | CST. */ + (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@1)) + (with { + tree booltrue = constant_boolean_node (true, boolean_type_node); +} +(bit_ior (negate (convert (convert:boolean_type_node @0))) @2))) (if (integer_zerop (@2)) (switch /* a ? 1 : 0 -> a if 0 and 1 are integral types. */ (if (integer_onep (@1)) (convert (convert:boolean_type_node @0))) -/* a ? -1 : 0 -> -a. */ -(if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@1)) - (negate (convert (convert:boolean_type_node @0 /* a ? powerof2cst : 0 -> a << (log2(powerof2cst)) */ (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@1)) (with { @@ -3827,9 +3836,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* a ? 0 : 1 -> !a. */ (if (integer_onep (@2)) (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } ))) - /* a ? -1 : 0 -> -(!a). */ - (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@2)) - (negate (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } /* a ? powerof2cst : 0 -> (!a) << (log2(powerof2cst)) */ (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@2)) (with {
[Bug rtl-optimization/67382] RTL combiner is too eager to combine (plus (reg 92) (reg 92)) to (ashift (reg 92) (const_int 1))
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67382 --- Comment #4 from Segher Boessenkool --- (In reply to Andrew Pinski from comment #3) > Note combine is able to figure out the jump is unconditional but there is no > "pattern" to match it: > Trying 10 -> 17: >10: r85:QI=0x1 >17: {flags:CCC=cmp(r85:QI-0x1,r85:QI);clobber scratch;} > REG_DEAD r85:QI > REG_EQUAL cmp(0,0x1) > Failed to match this instruction: > (parallel [ > (set (pc) > (pc)) > (clobber (scratch:QI)) > ]) > Failed to match this instruction: > (set (pc) > (pc)) This is an other_insn, namely a cc_use_insn. We currently use that for changing the cc mode used. update_cfg_for_uncondjump There is code in combine for handling (set (pc) (pc)) in other_insn, in fact (see where update_cfg_for_uncondjump is called). There also is code (in recog_for_combine_1) that should handle noop sets like this. It does not print anything if that happens though. Investigating.
[Bug c++/52099] Incorrectly applying conversion when catching pointer-to-members
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52099 --- Comment #2 from Jonathan Wakely --- >From the dup: Eric Fiselier 2016-01-20 03:50:56 UTC Created attachment 37399 [details] reproducer I don't see where [except.handle] allows such a conversion. Comment 1 Jonathan Wakely 2017-01-13 20:36:35 UTC We're missing a check for cv-qualifiers in __pointer_to_member_type_info::__pointer_catch that needs to be done before we compare the pointees. Both pointees have type void() so we need to compare the cv-quals before that info is lost. Comment 2 Jonathan Wakely 2017-01-13 20:49:13 UTC Hmm, we don't seem to have the cv-quals in __flags. That's a problem. Comment 3 Jonathan Wakely 2017-01-13 21:08:10 UTC When compiled with clang the pointees are different, so the match fails when comparing them. Using Clang: (gdb) step __cxxabiv1::__pbase_type_info::__pointer_catch (this=0x401cc0 , thrown_type=0x401d10 , thr_obj=0x7fffd220, outer=0) at /usr/lib/gcc/x86_64-redhat-linux/6.3.1/../../../../include/c++/6.3.1/cxxabi.h:309 (gdb) step std::type_info::__do_catch (this=0x401c90 , thr_type=0x401cf8 ) at ../../../../libstdc++-v3/libsupc++/tinfo.cc:71 (gdb) p *this $3 = {_vptr.type_info = 0x6030b0 , __name = 0x401c89 "KFvvE"} (gdb) p *thr_type $4 = {_vptr.type_info = 0x6030b0 , __name = 0x401cf0 "FvvE"} (gdb) But using GCC the two pointee types are the same: (gdb) p *this $1 = {_vptr.type_info = 0x6030e8 , __name = 0x401c50 "FvvE"} (gdb) p *thr_type $2 = {_vptr.type_info = 0x6030e8 , __name = 0x401c50 "FvvE"} So it looks like the problem is in the front-end where the typeinfo object for a pointer to cv-qualified member function has the wrong pointee type. Comment 4 Jonathan Wakely 2017-01-13 23:05:34 UTC My front-end debugging skills are pitiful, but I've found something suspicious. ptm_initializer uses TYPE_PTRMEM_POINTED_TO_TYPE to get that pointee type. For this case that expands to TYPE_PTRMEMFUNC_FN_TYPE which is a call to cp_build_qualified_type with the qualifiers from cp_type_quals. But cp_type_quals tries pretty hard to ensure we never get cv-quals for a function type. For the purposes of RTTI, where we really do care about the difference between void() and void()const, do we want the memfn quals instead? Comment 5 Jonathan Wakely 2017-01-13 23:20:33 UTC For the attached reproducer this condition is never true in cp_build_qualified_type_real /* But preserve any function-cv-quals on a FUNCTION_TYPE. */ if (TREE_CODE (type) == FUNCTION_TYPE) type_quals |= type_memfn_quals (type); As far as I can tell this is what's supposed to put the cv-quals back onto the function type, so we'd have a pointee of type void() const not void().
[Bug c++/101620] New: gcc incorrectly makes concept checking in incomplete-class context
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101620 Bug ID: 101620 Summary: gcc incorrectly makes concept checking in incomplete-class context Product: gcc Version: 11.1.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c++ Assignee: unassigned at gcc dot gnu.org Reporter: fchelnokov at gmail dot com Target Milestone: --- Compilation of this program ``` struct A {}; template concept DerivedOnceFromA = requires(T t) { { static_cast(t) }; }; template struct B {}; struct C : A { B foo(); }; ``` must fail, since B is checked in incomplete struct C context: https://gcc.godbolt.org/z/ajh8MsY4n
[Bug rtl-optimization/101617] a ? -1 : 1 -> (-(type)a) | 1
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101617 Andrew Pinski changed: What|Removed |Added Component|tree-optimization |rtl-optimization --- Comment #2 from Andrew Pinski --- I decided that this should really go on the RTL level
[Bug rtl-optimization/67382] RTL combiner is too eager to combine (plus (reg 92) (reg 92)) to (ashift (reg 92) (const_int 1))
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67382 --- Comment #5 from Segher Boessenkool --- It turns out that noop other_insn is fine, and is accepted etc., but the resulting i3 in this case is not.
[Bug d/101441] __FUNCTION__ doesn't work in core.stdc.stdio functions without cast
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101441 --- Comment #1 from Iain Buclaw --- Upstream dmd fixed bug much later than 2.076. https://github.com/dlang/dmd/pull/9920
[Bug rtl-optimization/101617] a ? -1 : 1 -> (-(type)a) | 1
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101617 --- Comment #3 from Andrew Pinski --- I have the ifcvt.c patch which adds this.
[Bug rtl-optimization/101617] a ? -1 : 1 -> (-(type)a) | 1
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101617 --- Comment #4 from Andrew Pinski --- Created attachment 51203 --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=51203&action=edit ifcvt patch Patch which go into testing.
[Bug rtl-optimization/101617] a ? -1 : 1 -> (-(type)a) | 1
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101617 Andrew Pinski changed: What|Removed |Added Attachment #51203|0 |1 is obsolete|| --- Comment #5 from Andrew Pinski --- Comment on attachment 51203 --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=51203 ifcvt patch This patch is wrong if STORE_FLAG_VALUE == -1.
[Bug d/101490] ICE at convert_expr(tree_node*, Type*, Type*)
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101490 --- Comment #1 from Iain Buclaw --- Reduced test --- struct test { int[0] foo; } void main() { test* t; auto a = cast(typeof(t.foo)[0])t.foo; write(a); } void write(S)(S args) { foreach (arg; args) { } }
[Bug rtl-optimization/101617] a ? -1 : 1 -> (-(type)a) | 1
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101617 --- Comment #6 from Andrew Pinski --- Thinking about this some more, there is a canonicalization issue. We need to decide if we want to canonicalization to just a ? -1 : 1; or expand it out. a ? 1 : 0 makes sense to do (cast) a; So does "a ? 0 : 1". Does the current a ? -1 : 0 make sense or just add that to ifcvt. I am going to take a few days to think of this and such. There are other issues that deal with this. Even having a cmov existing makes it harder to decide. Even though for an example -(a == 0) can be optimized nicely on x86, it might not be nicely on other targets.
[Bug rtl-optimization/101617] a ? -1 : 1 -> (-(type)a) | 1
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101617 --- Comment #7 from Andrew Pinski --- A few more canonicalization issues that need to be thought of: "a >>u (bitsize-1)" and "a >s (bitsize-1)" and "-(a Thinking about this some more, there is a canonicalization issue. We need to > decide if we want to canonicalization to just a ? -1 : 1; or expand it out. > a ? 1 : 0 makes sense to do (cast) a; So does "a ? 0 : 1". > > Does the current a ? -1 : 0 make sense or just add that to ifcvt. PR101339 is related to that canonicalization really. There are others. Even things like: (a == 0) + 2 Should that be: a == 0 ? 3 : 2 On the gimple level and then do the correct thing on the RTL level?
[Bug target/101614] [s390] vec_signed requires z15, docs say z13
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101614 Evan Nemerson changed: What|Removed |Added Status|UNCONFIRMED |RESOLVED Resolution|--- |INVALID --- Comment #1 from Evan Nemerson --- Never mind; the ARCH in the documentation refers to the same value as __ARCH__, not -march=zN
[Bug tree-optimization/101621] New: gcc cannot optimize int8_t vector assign with subscription to shuffle
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101621 Bug ID: 101621 Summary: gcc cannot optimize int8_t vector assign with subscription to shuffle Product: gcc Version: 11.1.1 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization Assignee: unassigned at gcc dot gnu.org Reporter: yumeyao at gmail dot com Target Milestone: --- https://gcc.godbolt.org/z/91cqenf99 typedef char v16b __attribute__((vector_size(16))); To summary it up, regarding optimizing v = { v[n] ...} into shuffle, targeting Intel x86(x86_64): These is a lack of optimization when there is a zero There is some regression starting from gcc9. so this might be 2 issues. But I think a proper fix could resolve both. * gcc can optimize int8_t vector assign with subscription of the same vector to shuffle, like this: v16b gcc_can_shuffle(v16b b) { return (v16b) {b[0], b[0], b[0], b[0], b[4], b[4], b[4], b[4], b[8], b[8], b[8], b[8], b[12], b[12], b[12], b[12]}; } * However, if there is a zero, gcc can't handle this. Actually this is supported on Intel x86, with a negative subscription indicating the 'zero value'. Clang can do the optimization starting with clang 5. * Furthermore, there is a regression: gcc < 8 can always optimize it, but starting with gcc9, if there is a cast, then the optimization fails: typedef long v2si64 __attribute__((vector_size(16))); v16b gcc_cannot_shuffle_with_cast(v2si64 x) { v16b b = (v16b)x; v16b b0 = {b[0], b[0], b[0], b[0], b[4], b[4], b[4], b[4], b[8], b[8], b[8], b[8], b[12], b[12], b[12], b[12]}; return b0; } gcc 11 can optimize it on -O3, but not on -O1 or -O2.
[Bug target/18233] extraneous inc/dec pair
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=18233 Andrew Pinski changed: What|Removed |Added Depends on||94956 Resolution|--- |FIXED Target Milestone|--- |11.2 Status|NEW |RESOLVED --- Comment #4 from Andrew Pinski --- So this is fixed in a few different ways but fully with r11-194. For x86 with cmov (!=i386), this was fixed in GCC 4.5.0 where the ffs is expanded at expand time to use ctz and cmov. without cmov, this was only fixed in GCC 11 with r11-194 which changes ffs to ctz if ctz has a known 0 alrgument which x86 has. So closing as fixed for GCC 11; There is already a testcase for this too; gcc.target/i386/pr94956.c . Referenced Bugs: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94956 [Bug 94956] Unable to remove impossible ffs() test for zero
[Bug tree-optimization/101621] gcc cannot optimize int8_t vector assign with subscription to shuffle
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101621 --- Comment #1 from YumeYao --- https://gcc.godbolt.org/z/a47Enb9oK 16-bytes (AVX) version added.
[Bug target/19922] xor is enclosed in loop, and exectuted on each iteration of for statement
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=19922 --- Comment #7 from Andrew Pinski --- So the question becomes do we care about this look if -fno-tree-loop-distribute-patterns is added? Anyways we are able to detect the loop is a memset for a while now and then expand that to have no xor inside the loop.
[Bug tree-optimization/101621] gcc cannot optimize int8_t vector assign with subscription to shuffle
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101621 Andrew Pinski changed: What|Removed |Added Severity|normal |enhancement --- Comment #2 from Andrew Pinski --- The cast issue is because in GCC 9, it was not producing PERM at the gimple level which was fixed correctly in GCC 11. clang_shuffle_with_zero can easy be added.
[Bug tree-optimization/21712] missed optimization due with const function and pulling out of loops
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=21712 Andrew Pinski changed: What|Removed |Added Resolution|--- |FIXED Target Milestone|--- |4.3.0 Status|NEW |RESOLVED --- Comment #26 from Andrew Pinski --- Fixed for GCC 4.3.0 and above. Most likely by r0-86459 .
[Bug target/18562] SSE constant vector initialization produces dead constant values on stack
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=18562 Andrew Pinski changed: What|Removed |Added Target Milestone|--- |4.9.0 Resolution|--- |FIXED Status|NEW |RESOLVED --- Comment #14 from Andrew Pinski --- Fixed fully in 4.9 and above.
[Bug target/28919] IV selection is messed up
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=28919 Andrew Pinski changed: What|Removed |Added Last reconfirmed|2006-09-17 22:48:12 |2021-7-25 --- Comment #10 from Andrew Pinski --- Still happens. __builtin_prefetch causes the issue.
[Bug tree-optimization/35344] Loop unswitching to produce perfect loop nest
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=35344 Andrew Pinski changed: What|Removed |Added Resolution|--- |DUPLICATE Target Milestone|--- |6.0 Status|UNCONFIRMED |RESOLVED --- Comment #2 from Andrew Pinski --- Fixed: if (m_23(D) > 0) goto ; [89.00%] else goto ; [11.00%] [local count: 12992276]: p.0_1 = p; q.1_10 = q; if (n_24(D) > 0) goto ; [89.00%] else goto ; [11.00%] So yes it is a dup. *** This bug has been marked as a duplicate of bug 23855 ***
[Bug tree-optimization/23855] loop header should also be pulled out of the inner loop too
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=23855 Andrew Pinski changed: What|Removed |Added CC||xinliangli at gmail dot com --- Comment #33 from Andrew Pinski --- *** Bug 35344 has been marked as a duplicate of this bug. ***
[Bug target/23813] redundant register assignments not eliminated
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=23813 Andrew Pinski changed: What|Removed |Added Resolution|--- |FIXED Status|NEW |RESOLVED Target Milestone|--- |5.0 --- Comment #6 from Andrew Pinski --- So this has been fixed in GCC 5.0 and above as it is able to detect bswap and do the correct thing there. That is it is able to convert: REV64_STEP(n, 8, 0x00FF00FF00FF00FFULL); /* bytes */ REV64_STEP(n, 16, 0xULL); /* halfwords */ REV64_STEP(n, 32, 0xULL); /* full words */ Into: n = __builtin_bswap64 (n)
[Bug rtl-optimization/35309] Late struct expansion leads to missing PRE
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=35309 --- Comment #3 from Andrew Pinski --- THe original testcase in comment #0 is now fixed but the following is not: struct A { int f[16]; } ag, ag2,ag3; struct A foo(int n) { if (n) { ag2 = ag; } return ag; }
[Bug tree-optimization/37810] Bad store sinking job
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37810 Andrew Pinski changed: What|Removed |Added Last reconfirmed|2009-04-03 12:34:44 |2021-7-25 --- Comment #6 from Andrew Pinski --- For the reduced testcase in comment #2 I get now: 4.8.0+: .L4: addl$1, %eax movl%eax, (%rbx) cmpl4(%rbx), %eax je .L8 .L3: testl %eax, %eax jne .L4 4.7.4 and before: .L3: testl %eax, %eax je .L8 addl$1, %eax cmpl4(%rbx), %eax movl%eax, (%rbx) jne .L3 Or on the trunk at the gimple level: [local count: 1014686025]: _1 = prephitmp_10 + 1; iter_6(D)->n = _1; _2 = iter_6(D)->m; if (_1 == _2) goto ; [5.50%] else goto ; [94.50%] [local count: 55807731]: g (); [local count: 114863530]: pretmp_11 = iter_6(D)->n; [local count: 1073741824]: # prephitmp_10 = PHI if (prephitmp_10 != 0) goto ; [94.50%] else goto ; [5.50%] Aka the store still happens inside the loop unconditionally.
[Bug tree-optimization/39761] data-flow analysis does not discover constant real/imaginary parts
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=39761 Andrew Pinski changed: What|Removed |Added Target Milestone|--- |8.0 Status|NEW |RESOLVED Resolution|--- |FIXED --- Comment #13 from Andrew Pinski --- Fixed in GCC 8, most likely by r8-5346 . That is DOM is now able to do the jump threading even at -Os.
[Bug tree-optimization/39761] data-flow analysis does not discover constant real/imaginary parts
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=39761 --- Comment #14 from Andrew Pinski --- (In reply to Andrew Pinski from comment #13) > Fixed in GCC 8, most likely by r8-5346 . That is DOM is now able to do the > jump threading even at -Os. I should say DOM is doing the jump threading now which is why I think r8-5346 fixed this.
[Bug tree-optimization/30099] missed value numbering optimization (conditional-based assertions)
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=30099 Andrew Pinski changed: What|Removed |Added Target Milestone|--- |8.0 Status|NEW |RESOLVED Resolution|--- |FIXED --- Comment #3 from Andrew Pinski --- Fixed in GCC 8 by r8-1633 .
[Bug tree-optimization/32226] Missed optimization caused by copy loop header (yes a weird case)
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=32226 --- Comment #3 from Andrew Pinski --- To do this optimization (the reduced testcase works right now), you have to simulate each statement until the end with "width_5 == 0" (the opposite range of the initial condition) to see if get the other phi operand. if (width_5(D) != 0) goto ; [89.00%] else goto ; [11.00%] [local count: 105119325]: _1 = (long unsigned int) dir_8(D); _3 = width_5(D) + 4294967295; _14 = (sizetype) _3; _6 = _14 + 1; _17 = _1 * _6; _18 = _17 * 2; errorptr_4 = errorptr_7(D) + _18; [local count: 118111601]: # errorptr_16 = PHI I don't know if this optimization is that important, even clang does not do it. It should most likely be only done if the branch is highly predicted taken down the route of the longer path.
[Bug target/25230] __sync_add_and_fetch does not use condition flags from subl
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=25230 Andrew Pinski changed: What|Removed |Added Status|NEW |RESOLVED Resolution|--- |DUPLICATE Target Milestone|--- |4.7.0 --- Comment #3 from Andrew Pinski --- Dup of bug 48986 which was fixed for GCC 4.7.0. *** This bug has been marked as a duplicate of bug 48986 ***
[Bug target/48986] Missed optimization in atomic decrement on x86/x64
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=48986 Andrew Pinski changed: What|Removed |Added CC||bcrl at kvack dot org --- Comment #7 from Andrew Pinski --- *** Bug 25230 has been marked as a duplicate of this bug. ***
[Bug tree-optimization/101621] gcc cannot optimize int8_t vector assign with subscription to shuffle
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101621 --- Comment #3 from YumeYao --- (In reply to Andrew Pinski from comment #2) > The cast issue is because in GCC 9, it was not producing PERM at the gimple > level which was fixed correctly in GCC 11. > > clang_shuffle_with_zero can easy be added. Thanks for your insights. Do you have any comment on the optimization flag part (gcc <=8 only needs -O1 to optimize the 'cast' case, but gcc 11 requires -O3)? Is it due to some default optimization options change in -O1 between gcc 8 and 11, or it's something deeper?
[Bug tree-optimization/40170] redundant zero extensions
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=40170 Andrew Pinski changed: What|Removed |Added Target Milestone|--- |11.0 Resolution|--- |FIXED Component|target |tree-optimization Status|UNCONFIRMED |RESOLVED --- Comment #2 from Andrew Pinski --- Fixed for GCC 11, in EVRP. I can't figure out which patch caused it but what happens is the following: We figure out the range of _3 to be [0, 255] _3 = (int) bit_16; While processing: _4 = _2 >> _3; We figure out the range of _4 is still [0, 255] as it is a right shift so we cannot change any upper bits. And then we match and simplify the following: _24 = _4 & 255; to just: _24 = _4;
[Bug c++/101622] New: Type erasure (upcasting) in constexpr/consteval context
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101622 Bug ID: 101622 Summary: Type erasure (upcasting) in constexpr/consteval context Product: gcc Version: 10.2.1 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c++ Assignee: unassigned at gcc dot gnu.org Reporter: sjeltsch at gmail dot com Target Milestone: --- Source: ``` template void fun() {} template inline constexpr bool var = false; consteval bool funC() { void (*a)() = fun; void (*b)() = fun; return a == b; } constexpr auto x = funC(); consteval bool varC() { const void* a = &var; const void* b = &var; return a == b; } constexpr auto y = varC(); int main() { return 0; } ``` output: Using built-in specs. COLLECT_GCC=g++ COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/10/lto-wrapper OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa:hsa OFFLOAD_TARGET_DEFAULT=1 Target: x86_64-linux-gnu Configured with: ../src/configure -v --with-pkgversion='Debian 10.2.1-6' --with-bugurl=file:///usr/share/doc/gcc-10/README.Bugs --enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++,m2 --prefix=/usr --with -gcc-major-version-only --program-suffix=-10 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --ena ble-default-pie --with-system-zlib --enable-libphobos-checking=release --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib- list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-10-Km9U7s/gcc-10-10.2.1/debian/tmp-nvptx/usr,amdgcn-amdhsa=/build/gcc-10-Km9U7s/gcc-10-10.2.1/debian/tmp-gcn /usr,hsa --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu --with-build-config=bootstrap-lto-lean --enable-link-mutex Thread model: posix Supported LTO compression algorithms: zlib zstd gcc version 10.2.1 20210110 (Debian 10.2.1-6) COLLECT_GCC_OPTIONS='-std=c++2a' '-v' '-o' 'foo' '-shared-libgcc' '-mtune=generic' '-march=x86-64' /usr/lib/gcc/x86_64-linux-gnu/10/cc1plus -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE foo.cc -quiet -dumpbase foo.cc -mtune=generic -march=x86-64 -auxbase foo -std=c++2a -version -fasynchronous-unwind-t ables -o /tmp/ccp7WoOR.s GNU C++17 (Debian 10.2.1-6) version 10.2.1 20210110 (x86_64-linux-gnu) compiled by GNU C version 10.2.1 20210110, GMP version 6.2.1, MPFR version 4.1.0, MPC version 1.2.0, isl version isl-0.23-GMP GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072 ignoring duplicate directory "/usr/include/x86_64-linux-gnu/c++/10" ignoring nonexistent directory "/usr/local/include/x86_64-linux-gnu" ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/10/include-fixed" ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include" #include "..." search starts here: #include <...> search starts here: /usr/include/c++/10 /usr/include/x86_64-linux-gnu/c++/10 /usr/include/c++/10/backward /usr/lib/gcc/x86_64-linux-gnu/10/include /usr/local/include
[Bug target/39821] 120% slowdown with vectorizer
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=39821 Andrew Pinski changed: What|Removed |Added Component|tree-optimization |target --- Comment #5 from Andrew Pinski --- The code generation for aarch64 looks fine: dotproduct_order4: .LFB1: .cfi_startproc ldr q1, [x0] ldr q2, [x1] smull v0.2d, v2.2s, v1.2s smlal2 v0.2d, v2.4s, v1.4s addpd0, v0.2d fmovx0, d0 ret vect__6.41_18 = MEM [(int32_t *)v1_2(D)]; vect__10.44_13 = MEM [(int32_t *)v2_3(D)]; vect_patt_25.45_8 = WIDEN_MULT_LO_EXPR ; vect_patt_25.45_4 = WIDEN_MULT_HI_EXPR ; vect_accum_14.46_31 = vect_patt_25.45_4 + vect_patt_25.45_8; _33 = .REDUC_PLUS (vect_accum_14.46_31); [tail call] CUT Even the gimple level for x86_64 looks ok: vect__6.41_18 = MEM [(int32_t *)v1_2(D)]; vect__10.44_13 = MEM [(int32_t *)v2_3(D)]; vect_patt_25.45_8 = WIDEN_MULT_LO_EXPR ; vect_patt_25.45_4 = WIDEN_MULT_HI_EXPR ; vect_accum_14.46_31 = vect_patt_25.45_4 + vect_patt_25.45_8; _33 = VEC_PERM_EXPR ; _34 = vect_accum_14.46_31 + _33; stmp_accum_14.47_35 = BIT_FIELD_REF <_34, 64, 0>; But the expansion looks bad.
[Bug rtl-optimization/40361] Conditional return not always profitable with -Os
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=40361 --- Comment #2 from Andrew Pinski --- So the cross jumping opportunity since at least 5.4 even with a conditional return. ldr r3, .L8 stmfd sp!, {r4, lr} ldr r3, [r3] ldr r4, .L8+4 cmp r3, #0 bge .L2 bl bar1 ldr r3, [r4] cmp r3, #0 ldmgefd sp!, {r4, pc} .L3: mov r3, #0 str r3, [r4] ldmfd sp!, {r4, pc} .L2: bl bar2 ldr r3, [r4] cmp r3, #0 blt .L3 ldmfd sp!, {r4, pc} The trunk produces this: push{r4, lr} ldr r4, .L9 ldr r3, [r4] cmp r3, #0 bge .L2 bl bar1 .L8: ldr r3, [r4, #4] cmp r3, #0 movlt r3, #0 strlt r3, [r4, #4] pop {r4, pc} .L2: bl bar2 b .L8 Which is even more cross jumped and note push and stmfd are the same here just written differently.