Committed at 6cec7b06b3c8187b36fc05cfd4dd38b42313d727 Thanks, Di
> -----Original Message----- > From: Richard Biener <richard.guent...@gmail.com> > Sent: Friday, December 22, 2023 11:40 PM > To: Di Zhao OS <diz...@os.amperecomputing.com> > Cc: Thomas Schwinge <tho...@codesourcery.com>; gcc-patches@gcc.gnu.org > Subject: Re: [PATCH v4] [tree-optimization/110279] Consider FMA in > get_reassociation_width > > > > > Am 22.12.2023 um 16:05 schrieb Di Zhao OS <diz...@os.amperecomputing.com>: > > > > Updated the fix in attachment. > > > > Is it OK for trunk? > > Ok > > > Tested on aarch64-unknown-linux-gnu and x86_64-pc-linux-gnu. > > > > Thanks, > > Di Zhao > > > >> -----Original Message----- > >> From: Di Zhao OS <diz...@os.amperecomputing.com> > >> Sent: Sunday, December 17, 2023 8:31 PM > >> To: Thomas Schwinge <tho...@codesourcery.com>; gcc-patches@gcc.gnu.org > >> Cc: Richard Biener <richard.guent...@gmail.com> > >> Subject: RE: [PATCH v4] [tree-optimization/110279] Consider FMA in > >> get_reassociation_width > >> > >> Hello Thomas, > >> > >>> -----Original Message----- > >>> From: Thomas Schwinge <tho...@codesourcery.com> > >>> Sent: Friday, December 15, 2023 5:46 PM > >>> To: Di Zhao OS <diz...@os.amperecomputing.com>; gcc-patches@gcc.gnu.org > >>> Cc: Richard Biener <richard.guent...@gmail.com> > >>> Subject: RE: [PATCH v4] [tree-optimization/110279] Consider FMA in > >>> get_reassociation_width > >>> > >>> Hi! > >>> > >>> On 2023-12-13T08:14:28+0000, Di Zhao OS <diz...@os.amperecomputing.com> > >> wrote: > >>>> --- /dev/null > >>>> +++ b/gcc/testsuite/gcc.dg/pr110279-2.c > >>>> @@ -0,0 +1,41 @@ > >>>> +/* PR tree-optimization/110279 */ > >>>> +/* { dg-do compile } */ > >>>> +/* { dg-options "-Ofast --param tree-reassoc-width=4 --param fully- > >>> pipelined-fma=1 -fdump-tree-reassoc2-details -fdump-tree-optimized" } */ > >>>> +/* { dg-additional-options "-march=armv8.2-a" { target aarch64-*-* } } > */ > >>>> + > >>>> +#define LOOP_COUNT 800000000 > >>>> +typedef double data_e; > >>>> + > >>>> +#include <stdio.h> > >>>> + > >>>> +__attribute_noinline__ data_e > >>>> +foo (data_e in) > >>> > >>> Pushed to master branch commit 91e9e8faea4086b3b8aef2355fc12c1559d425f6 > >>> "Fix 'gcc.dg/pr110279-2.c' syntax error due to '__attribute_noinline__'", > >>> see attached. > >>> > >>> However: > >>> > >>>> +{ > >>>> + data_e a1, a2, a3, a4; > >>>> + data_e tmp, result = 0; > >>>> + a1 = in + 0.1; > >>>> + a2 = in * 0.1; > >>>> + a3 = in + 0.01; > >>>> + a4 = in * 0.59; > >>>> + > >>>> + data_e result2 = 0; > >>>> + > >>>> + for (int ic = 0; ic < LOOP_COUNT; ic++) > >>>> + { > >>>> + /* Test that a complete FMA chain with length=4 is not broken. */ > >>>> + tmp = a1 + a2 * a2 + a3 * a3 + a4 * a4 ; > >>>> + result += tmp - ic; > >>>> + result2 = result2 / 2 - tmp; > >>>> + > >>>> + a1 += 0.91; > >>>> + a2 += 0.1; > >>>> + a3 -= 0.01; > >>>> + a4 -= 0.89; > >>>> + > >>>> + } > >>>> + > >>>> + return result + result2; > >>>> +} > >>>> + > >>>> +/* { dg-final { scan-tree-dump-not "was chosen for reassociation" > >>> "reassoc2"} } */ > >>>> +/* { dg-final { scan-tree-dump-times {\.FMA } 3 "optimized"} } */ > >> > >> Thank you for the fix. > >> > >>> ..., I still see these latter two tree dump scans FAIL, for GCN: > >>> > >>> $ grep -C2 'was chosen for reassociation' pr110279-2.c.197t.reassoc2 > >>> 2 *: a3_40 > >>> 2 *: a2_39 > >>> Width = 4 was chosen for reassociation > >>> Transforming _15 = powmult_1 + powmult_3; > >>> into _63 = powmult_1 + a1_38; > >>> $ grep -F .FMA pr110279-2.c.265t.optimized > >>> _63 = .FMA (a2_39, a2_39, a1_38); > >>> _64 = .FMA (a3_40, a3_40, powmult_5); > >>> > >>> ..., nvptx: > >>> > >>> $ grep -C2 'was chosen for reassociation' pr110279-2.c.197t.reassoc2 > >>> 2 *: a3_40 > >>> 2 *: a2_39 > >>> Width = 4 was chosen for reassociation > >>> Transforming _15 = powmult_1 + powmult_3; > >>> into _63 = powmult_1 + a1_38; > >>> $ grep -F .FMA pr110279-2.c.265t.optimized > >>> _63 = .FMA (a2_39, a2_39, a1_38); > >>> _64 = .FMA (a3_40, a3_40, powmult_5); > >> > >> For these 2 targets, the reassoc_width for FMUL is 1 (default value), > >> While the testcase assumes that to be 4. The bug was introduced when I > >> updated the patch but forgot to update the testcase. > >> > >>> ..., but also x86_64-pc-linux-gnu: > >>> > >>> $ grep -C2 'was chosen for reassociation' pr110279-2.c.197t.reassoc2 > >>> 2 *: a3_40 > >>> 2 *: a2_39 > >>> Width = 2 was chosen for reassociation > >>> Transforming _15 = powmult_1 + powmult_3; > >>> into _63 = powmult_1 + powmult_3; > >>> $ grep -cF .FMA pr110279-2.c.265t.optimized > >>> 0 > >> > >> For x86_64 this needs "-mfma". Sorry the compile options missed that. > >> Can the change below fix these issues? I moved them into > >> testsuite/gcc.target/aarch64, since they rely on tunings. > >> > >> Tested on aarch64-unknown-linux-gnu. > >> > >>> > >>> Grüße > >>> Thomas > >>> > >>> > >>> ----------------- > >>> Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, > >> 80634 > >>> München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas > >>> Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht > >>> München, HRB 106955 > >> > >> Thanks, > >> Di Zhao > >> > >> --- > >> gcc/testsuite/{gcc.dg => gcc.target/aarch64}/pr110279-1.c | 3 +-- > >> gcc/testsuite/{gcc.dg => gcc.target/aarch64}/pr110279-2.c | 3 +-- > >> 2 files changed, 2 insertions(+), 4 deletions(-) > >> rename gcc/testsuite/{gcc.dg => gcc.target/aarch64}/pr110279-1.c (83%) > >> rename gcc/testsuite/{gcc.dg => gcc.target/aarch64}/pr110279-2.c (78%) > >> > >> diff --git a/gcc/testsuite/gcc.dg/pr110279-1.c > >> b/gcc/testsuite/gcc.target/aarch64/pr110279-1.c > >> similarity index 83% > >> rename from gcc/testsuite/gcc.dg/pr110279-1.c > >> rename to gcc/testsuite/gcc.target/aarch64/pr110279-1.c > >> index f25b6aec967..97d693f56a5 100644 > >> --- a/gcc/testsuite/gcc.dg/pr110279-1.c > >> +++ b/gcc/testsuite/gcc.target/aarch64/pr110279-1.c > >> @@ -1,6 +1,5 @@ > >> /* { dg-do compile } */ > >> -/* { dg-options "-Ofast --param avoid-fma-max-bits=512 --param tree- > reassoc- > >> width=4 -fdump-tree-widening_mul-details" } */ > >> -/* { dg-additional-options "-march=armv8.2-a" { target aarch64-*-* } } */ > >> +/* { dg-options "-Ofast -mcpu=generic --param avoid-fma-max-bits=512 -- > param > >> tree-reassoc-width=4 -fdump-tree-widening_mul-details" } */ > >> > >> #define LOOP_COUNT 800000000 > >> typedef double data_e; > >> diff --git a/gcc/testsuite/gcc.dg/pr110279-2.c > >> b/gcc/testsuite/gcc.target/aarch64/pr110279-2.c > >> similarity index 78% > >> rename from gcc/testsuite/gcc.dg/pr110279-2.c > >> rename to gcc/testsuite/gcc.target/aarch64/pr110279-2.c > >> index b6b69969c6b..a88cb361fdc 100644 > >> --- a/gcc/testsuite/gcc.dg/pr110279-2.c > >> +++ b/gcc/testsuite/gcc.target/aarch64/pr110279-2.c > >> @@ -1,7 +1,6 @@ > >> /* PR tree-optimization/110279 */ > >> /* { dg-do compile } */ > >> -/* { dg-options "-Ofast --param tree-reassoc-width=4 --param fully- > pipelined- > >> fma=1 -fdump-tree-reassoc2-details -fdump-tree-optimized" } */ > >> -/* { dg-additional-options "-march=armv8.2-a" { target aarch64-*-* } } */ > >> +/* { dg-options "-Ofast -mcpu=generic --param tree-reassoc-width=4 --param > >> fully-pipelined-fma=1 -fdump-tree-reassoc2-details -fdump-tree-optimized" } > */ > >> > >> #define LOOP_COUNT 800000000 > >> typedef double data_e; > >> -- > >> 2.25.1 > > <0001-Fix-compile-options-of-pr110279-1.c-and-pr110279-2.c.patch>