Hi!

On 2022-04-07T15:04:15+0200, Richard Biener via Gcc-patches 
<gcc-patches@gcc.gnu.org> wrote:
> On Thu, 7 Apr 2022, Jan Hubicka wrote:
>> > On Thu, 7 Apr 2022, Jan Hubicka wrote:
>> > > this patch fixes miscompilation of gnatmake.  Modref attempts to track 
>> > > memory
>> > > accesses relative to the base pointers which are parameters of functions.
>> > > If it fails, it still makes difference between unknown memory access and
>> > > global memory access.  The second makes it possible to disambiguate with
>> > > memory that is not accessible from outside world (i.e. everything that 
>> > > does
>> > > not escape from the caller function).  This is useful so we do not punt
>> > > when unknown function is called.
>> > >
>> > > Now I added ref_may_access_global_memory_p to tree-ssa-alias whic is 
>> > > using
>> > > ptr_deref_may_alias_global_p.  There is however a shift in meaning of 
>> > > this
>> > > predicate: the second tests that the dereference may alias with global 
>> > > variable.
>> > >
>> > > In the testcase we are disambiguating heap allocated escaping memory 
>> > > which is
>> > > not a global variable but it is still a global memory in the modref's 
>> > > sense.
>> > > So we need to test in addition contains_escaped.
>> > >
>> > > The patch simply copies logic from the predicate and adds the check.
>> > > I am not sure if there is better way to handle this?
>> >
>> > I'm testing the following variant which exposes this detail
>> > (escaped local memory global or not) in the APIs that say "global"
>> > which allows to remove ref_may_access_global_memory_p.
>>
>> Thank you.  Indeed it is better to have an explicit flag, since the
>> clash of names is bit sensitive.
>
> OK - bootstrapped / tested on x86_64-unknown-linux-gnu including Ada
> and now pushed.

This commit r12-8048-g8c0ebaf9f586100920a3c0849fb10e9985d7ae58
"ipa/104303 - miscompilation of gnatmake" is causing one regression in
nvptx offload testing:

    [...]
    [-PASS:-]{+FAIL:+} libgomp.oacc-fortran/private-variables.f90 
-DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O1   at 
line 142 (test for bogus messages, line 131)
    [...]

I've done a before/after 'diff' of
'-fdump-tree-all -foffload-options=nvptx-none=-fdump-tree-all'
with all functions and calls other than 't4' commented out.

For '-O0', there's no difference at all.

For '-O1', for host compilation we see:

    diff -ru 0-O1/a-private-variables.f90.117t.dce2 
./a-private-variables.f90.117t.dce2
    --- 0-O1/a-private-variables.f90.117t.dce2      2022-04-12 
08:36:54.525302868 +0200
    +++ ./a-private-variables.f90.117t.dce2 2022-04-12 12:51:43.726304109 +0200
    @@ -30,9 +30,13 @@

       <bb 3> [local count: 87490071]:
       # .offset.15_2 = PHI <0(2), .offset.15_63(5)>
    +  pt.x = .offset.15_2;
       _25 = .offset.15_2 * 2;
    +  pt.y = _25;
       _27 = .offset.15_2 * 4;
    +  pt.z = _27;
       _29 = .offset.15_2 * 6;
    +  pt.attr[4] = _29;

       <bb 4> [local count: 1073741824]:
       # .offset.10_4 = PHI <0(3), .offset.10_56(4)>
    diff -ru 0-O1/a-private-variables.f90.118t.stdarg 
./a-private-variables.f90.118t.stdarg
    --- 0-O1/a-private-variables.f90.118t.stdarg    2022-04-12 
08:36:54.525302868 +0200
    +++ ./a-private-variables.f90.118t.stdarg       2022-04-12 
12:51:43.726304109 +0200
    @@ -4,6 +4,7 @@
     __attribute__((oacc function (1, 1, 1), oacc parallel, omp target 
entrypoint))
     void t4_._omp_fn.0 (const struct .omp_data_t.1 & restrict .omp_data_i)
     {
    +  struct vec3 pt;
       integer(kind=4) .offset.15_2;
       integer(kind=4) .offset.10_4;
       integer(kind=4) _25;
    @@ -25,9 +26,13 @@

       <bb 3> [local count: 87490071]:
       # .offset.15_2 = PHI <0(2), .offset.15_63(5)>
    +  pt.x = .offset.15_2;
       _25 = .offset.15_2 * 2;
    +  pt.y = _25;
       _27 = .offset.15_2 * 4;
    +  pt.z = _27;
       _29 = .offset.15_2 * 6;
    +  pt.attr[4] = _29;

       <bb 4> [local count: 1073741824]:
       # .offset.10_4 = PHI <0(3), .offset.10_56(4)>
    [Similar for following passes/dumps.]
    diff -ru 0-O1/a-private-variables.f90.141t.lim2 
./a-private-variables.f90.141t.lim2
    --- 0-O1/a-private-variables.f90.141t.lim2      2022-04-12 
08:36:54.525302868 +0200
    +++ ./a-private-variables.f90.141t.lim2 2022-04-12 12:51:43.730304125 +0200
    @@ -24,11 +24,42 @@
     ;; 5 succs { 7 6 }
     ;; 7 succs { 3 }
     ;; 6 succs { 1 }
    +
    +Symbols to be put in SSA form
    +{ D.4340 D.4356 D.4357 D.4358 D.4359 D.4360 D.4361 D.4362 D.4363 }
    +Incremental SSA update started at block: 0
    +Number of blocks in CFG: 9
    +Number of blocks to update: 8 ( 89%)
    +
    +
    +
    +SSA replacement table
    +N_i -> { O_1 ... O_j } means that N_i replaces O_1, ..., O_j
    +
    +pt_x_lsm.22_1 -> { pt_x_lsm.22_72 }
    +pt_z_lsm.24_20 -> { pt_z_lsm.24_9 }
    +pt_attr_I_lsm.25_21 -> { pt_attr_I_lsm.25_10 }
    +pt_y_lsm.23_22 -> { pt_y_lsm.23_73 }
    +Incremental SSA update started at block: 3
    +Number of blocks in CFG: 9
    +Number of blocks to update: 3 ( 33%)
    +
    +
     __attribute__((oacc function (1, 1, 1), oacc parallel, omp target 
entrypoint))
     void t4_._omp_fn.0 (const struct .omp_data_t.1 & restrict .omp_data_i)
     {
    +  integer(kind=4) D.4363;
    +  integer(kind=4) pt_attr_I_lsm.25;
    +  integer(kind=4) D.4361;
    +  integer(kind=4) pt_z_lsm.24;
    +  integer(kind=4) D.4359;
    +  integer(kind=4) pt_y_lsm.23;
    +  integer(kind=4) D.4357;
    +  integer(kind=4) pt_x_lsm.22;
    +  struct vec3 pt;
       integer(kind=4) .offset.15_2;
       integer(kind=4) .offset.10_4;
    +  integer(kind=4) _7(D);
       integer(kind=4) _25;
       integer(kind=4) _27;
       integer(kind=4) _29;
    @@ -37,21 +68,32 @@
       integer(kind=8) _43;
       integer(kind=4)[1025] * _45;
       integer(kind=4) _46;
    +  integer(kind=4) _47(D);
       integer(kind=4) _48;
       integer(kind=4) _50;
    +  integer(kind=4) _51(D);
       integer(kind=4) _52;
       integer(kind=4) _54;
       integer(kind=4) .offset.10_56;
       integer(kind=4) .offset.15_63;
    +  integer(kind=4) _70(D);

       <bb 2> [local count: 7128820]:
    +  pt_x_lsm.22_8 = _7(D);
    +  pt_y_lsm.23_49 = _47(D);
    +  pt_z_lsm.24_53 = _51(D);
    +  pt_attr_I_lsm.25_71 = _70(D);
       _45 = *.omp_data_i_44(D).arr;

       <bb 3> [local count: 87490071]:
       # .offset.15_2 = PHI <0(2), .offset.15_63(7)>
    +  pt_x_lsm.22_72 = .offset.15_2;
       _25 = .offset.15_2 * 2;
    +  pt_y_lsm.23_73 = _25;
       _27 = .offset.15_2 * 4;
    +  pt_z_lsm.24_9 = _27;
       _29 = .offset.15_2 * 6;
    +  pt_attr_I_lsm.25_10 = _29;
       _41 = .offset.15_2 * 32;

       <bb 4> [local count: 1073741824]:
    @@ -84,6 +126,14 @@
       goto <bb 3>; [100.00%]

       <bb 6> [local count: 35644102]:
    +  # pt_z_lsm.24_20 = PHI <pt_z_lsm.24_9(5)>
    +  # pt_attr_I_lsm.25_21 = PHI <pt_attr_I_lsm.25_10(5)>
    +  # pt_x_lsm.22_1 = PHI <pt_x_lsm.22_72(5)>
    +  # pt_y_lsm.23_22 = PHI <pt_y_lsm.23_73(5)>
    +  pt.attr[4] = pt_attr_I_lsm.25_21;
    +  pt.z = pt_z_lsm.24_20;
    +  pt.y = pt_y_lsm.23_22;
    +  pt.x = pt_x_lsm.22_1;
       return;

     }
    [Similar for following passes/dumps.]
    diff -ru 0-O1/a-private-variables.f90.148t.dse3 
./a-private-variables.f90.148t.dse3
    --- 0-O1/a-private-variables.f90.148t.dse3      2022-04-12 
08:36:54.525302868 +0200
    +++ ./a-private-variables.f90.148t.dse3 2022-04-12 12:51:43.730304125 +0200
    @@ -1,11 +1,24 @@

     ;; Function t4_._omp_fn.0 (t4_._omp_fn.0, funcdef_no=3, decl_uid=4278, 
cgraph_uid=4, symbol_order=3)

    +Removing basic block 7
    +Removing basic block 8
    +Removing basic block 9
     __attribute__((oacc function (1, 1, 1), oacc parallel, omp target 
entrypoint))
     void t4_._omp_fn.0 (const struct .omp_data_t.1 & restrict .omp_data_i)
     {
    +  integer(kind=4) D.4363;
    +  integer(kind=4) pt_attr_I_lsm.25;
    +  integer(kind=4) D.4361;
    +  integer(kind=4) pt_z_lsm.24;
    +  integer(kind=4) D.4359;
    +  integer(kind=4) pt_y_lsm.23;
    +  integer(kind=4) D.4357;
    +  integer(kind=4) pt_x_lsm.22;
    +  struct vec3 pt;
       integer(kind=4) .offset.15_2;
       integer(kind=4) .offset.10_4;
    +  integer(kind=4) _7(D);
       integer(kind=4) _25;
       integer(kind=4) _27;
       integer(kind=4) _29;
    @@ -14,25 +27,28 @@
       integer(kind=8) _43;
       integer(kind=4)[1025] * _45;
       integer(kind=4) _46;
    +  integer(kind=4) _47(D);
       integer(kind=4) _48;
       integer(kind=4) _50;
    +  integer(kind=4) _51(D);
       integer(kind=4) _52;
       integer(kind=4) _54;
       integer(kind=4) .offset.10_56;
       integer(kind=4) .offset.15_63;
    +  integer(kind=4) _70(D);

       <bb 2> [local count: 7128820]:
       _45 = *.omp_data_i_44(D).arr;

       <bb 3> [local count: 87490071]:
    -  # .offset.15_2 = PHI <0(2), .offset.15_63(7)>
    +  # .offset.15_2 = PHI <0(2), .offset.15_63(5)>
       _25 = .offset.15_2 * 2;
       _27 = .offset.15_2 * 4;
       _29 = .offset.15_2 * 6;
       _41 = .offset.15_2 * 32;

       <bb 4> [local count: 1073741824]:
    -  # .offset.10_4 = PHI <0(3), .offset.10_56(8)>
    +  # .offset.10_4 = PHI <0(3), .offset.10_56(4)>
       _42 = .offset.10_4 + _41;
       _43 = (integer(kind=8)) _42;
       _46 = (*_45)[_43];
    @@ -43,23 +59,17 @@
       (*_45)[_43] = _54;
       .offset.10_56 = .offset.10_4 + 1;
       if (.offset.10_56 <= 31)
    -    goto <bb 8>; [89.00%]
    +    goto <bb 4>; [89.00%]
       else
         goto <bb 5>; [11.00%]

    -  <bb 8> [local count: 955630224]:
    -  goto <bb 4>; [100.00%]
    -
       <bb 5> [local count: 437450365]:
       .offset.15_63 = .offset.15_2 + 1;
       if (.offset.15_63 <= 31)
    -    goto <bb 7>; [89.00%]
    +    goto <bb 3>; [89.00%]
       else
         goto <bb 6>; [11.00%]

    -  <bb 7> [local count: 389330825]:
    -  goto <bb 3>; [100.00%]
    -
       <bb 6> [local count: 35644102]:
       return;

    [Similar for following passes/dumps.]

..., so in 'a-private-variables.f90.148t.dse3', the 'pt.{x,y,z,attr}'
assignments for the new 'struct vec3 pt;' get cleaned out, so that should
all be fine; no actual changes in the end.

Comparing '-O1' nvptx offload target compilation before/after, the first
difference is in 'a.xnvptx-none.mkoffload.117t.dce2': similar to host
compilation.  But then, in the following things do not get cleaned up as
they do for the host compilation; the 'pt.{x,y,z,attr}' assignments for
the new 'struct vec3 pt;' persist:

    diff -ru 0-O1/a.xnvptx-none.mkoffload.252t.optimized 
./a.xnvptx-none.mkoffload.252t.optimized
    --- 0-O1/a.xnvptx-none.mkoffload.252t.optimized 2022-04-12 
08:36:54.569303204 +0200
    +++ ./a.xnvptx-none.mkoffload.252t.optimized    2022-04-12 
12:51:43.774304292 +0200
    @@ -7,34 +7,36 @@
     __attribute__((oacc function (32, 8, 32), oacc parallel, omp target 
entrypoint))
     void t4_._omp_fn.0 (const struct .omp_data_t.1 & restrict .omp_data_i)
     {
    -  unsigned int ivtmp$6;
    +  unsigned int ivtmp$8;
       unsigned int ivtmp$5;
    +  unsigned int ivtmp$3;
       int D.1527;
       int D.1524;
    +  struct vec3 pt;
       int _2;
    +  int[1025] * _4;
    +  int _22;
       int _23;
       int _25;
       int _27;
       int _29;
       int _34;
    -  int[1025] * _43;
    -  sizetype _45;
    -  sizetype _46;
    -  int[1025] * _47;
    -  unsigned int _48;
    +  sizetype _41;
    +  unsigned int _42;
    +  unsigned int _43;
    +  unsigned int _45;
    +  int _46;
       int _49;
    -  unsigned int _50;
       int _51;
    -  int _52;
    -  int _53;
    -  int _63;
    -  int _82;
    -  int _83;
    -  int _87;
    +  int _54;
    +  sizetype _63;
       int _95;
    +  int _96;
    +  int _99;
       int _102;
    -  int _103;
    +  int[1025] * _103;
       int _104;
    +  int _105;
       int _107;

       <bb 2> [local count: 7128820]:
    @@ -47,43 +49,50 @@
         goto <bb 7>; [73.00%]

       <bb 3> [local count: 1924781]:
    -  _87 = _104 * 32;
    -  ivtmp$5_80 = (unsigned int) _87;
    -  _52 = _104 * 2;
    -  ivtmp$6_54 = (unsigned int) _52;
    +  ivtmp$3_61 = (unsigned int) _104;
    +  _54 = _104 * 2;
    +  ivtmp$5_56 = (unsigned int) _54;
    +  _46 = _104 * 32;
    +  ivtmp$8_48 = (unsigned int) _46;
    +  _42 = (unsigned int) _23;

       <bb 4> [local count: 87490071]:
    -  # _2 = PHI <_104(3), _63(6)>
    -  # ivtmp$5_22 = PHI <ivtmp$5_80(3), ivtmp$5_81(6)>
    -  # ivtmp$6_72 = PHI <ivtmp$6_54(3), ivtmp$6_56(6)>
    -  _25 = (int) ivtmp$6_72;
    -  _50 = ivtmp$6_72 * 2;
    -  _27 = (int) _50;
    -  _48 = ivtmp$6_72 * 3;
    -  _29 = (int) _48;
    +  # ivtmp$3_106 = PHI <ivtmp$3_61(3), ivtmp$3_47(6)>
    +  # ivtmp$5_87 = PHI <ivtmp$5_56(3), ivtmp$5_72(6)>
    +  # ivtmp$8_52 = PHI <ivtmp$8_48(3), ivtmp$8_50(6)>
    +  _2 = (int) ivtmp$3_106;
    +  pt.x = _2;
    +  _25 = (int) ivtmp$5_87;
    +  pt.y = _25;
    +  _45 = ivtmp$5_87 * 2;
    +  _27 = (int) _45;
    +  pt.z = _27;
    +  _43 = ivtmp$5_87 * 3;
    +  _29 = (int) _43;
    +  pt.attr[4] = _29;
       _34 = .UNIQUE (OACC_FORK, 0, 2);

       <bb 5> [local count: 437450365]:
       _107 = .GOACC_DIM_POS (2);
    -  _82 = (int) ivtmp$5_22;
    -  _83 = _82 + _107;
    -  _47 = *.omp_data_i_44(D).arr;
    -  _46 = (sizetype) _83;
    -  _45 = _46 * 4;
    -  _43 = _47 + _45;
    -  _49 = MEM <int> [(int[1025] *)_43];
    -  _51 = _2 + _49;
    -  _53 = _25 + _51;
    -  _103 = _27 + _53;
    -  _95 = _29 + _103;
    -  MEM <int> [(int[1025] *)_43] = _95;
    +  _49 = (int) ivtmp$8_52;
    +  _51 = _49 + _107;
    +  _103 = *.omp_data_i_44(D).arr;
    +  _63 = (sizetype) _51;
    +  _41 = _63 * 4;
    +  _4 = _103 + _41;
    +  _95 = MEM <int> [(int[1025] *)_4];
    +  _96 = _2 + _95;
    +  _22 = _25 + _96;
    +  _105 = _22 + _27;
    +  _99 = _29 + _105;
    +  MEM <int> [(int[1025] *)_4] = _99;
       .UNIQUE (OACC_JOIN, _34, 2);

       <bb 6> [local count: 87490071]:
    -  _63 = _2 + 1;
    -  ivtmp$5_81 = ivtmp$5_22 + 32;
    -  ivtmp$6_56 = ivtmp$6_72 + 2;
    -  if (_23 != _63)
    +  ivtmp$3_47 = ivtmp$3_106 + 1;
    +  ivtmp$5_72 = ivtmp$5_87 + 2;
    +  ivtmp$8_50 = ivtmp$8_52 + 32;
    +  if (_42 != ivtmp$3_47)
         goto <bb 4>; [89.00%]
       else
         goto <bb 7>; [11.00%]

..., and thus the change in diagnostics:

     [...]
     
source-gcc/libgomp/testsuite/libgomp.oacc-fortran/private-variables.f90:131:63: 
note: variable ‘pt’ ought to be adjusted for OpenACC privatization level: ‘gang’
     
source-gcc/libgomp/testsuite/libgomp.oacc-fortran/private-variables.f90:131:63: 
note: variable ‘pt’ ought to be adjusted for OpenACC privatization level: ‘gang’
    +source-gcc/libgomp/testsuite/libgomp.oacc-fortran/private-variables.f90: 
In function ‘t4_._omp_fn.0’:
    
+source-gcc/libgomp/testsuite/libgomp.oacc-fortran/private-variables.f90:131:63:
 note: variable ‘pt’ adjusted for OpenACC privatization level: ‘gang’
    +  131 |   !$acc loop gang private(pt) ! { dg-line l_loop[incr c_loop] }
    +      |                                                               ^

For '-O2', host compilation begins same as '-O1', and again in
'a-private-variables.f90.148t.dse3', the 'pt.{x,y,z,attr}' assignments
for the new 'struct vec3 pt;' get cleaned out:

    --- ./a-private-variables.f90.144t.sink1        2022-04-12 
14:28:19.173520425 +0200
    +++ ./a-private-variables.f90.148t.dse3 2022-04-12 14:28:19.173520425 +0200
    [...]
     __attribute__((oacc function (1, 1, 1), oacc parallel, omp target 
entrypoint))
     void t4_._omp_fn.0 (const struct .omp_data_t.1 & restrict .omp_data_i)
     {
    @@ -97,10 +74,6 @@
       goto <bb 3>; [100.00%]

       <bb 6> [local count: 35644102]:
    -  pt.attr[4] = _29;
    -  pt.z = _27;
    -  pt.y = _25;
    -  pt.x = .offset.15_2;
       return;

     }
    [...]

For '-O2', nvptx offload target compilation looks very similar to host
compilation, and again in 'a.xnvptx-none.mkoffload.148t.dse3', the
'pt.{x,y,z,attr}' assignments for the new 'struct vec3 pt;' get cleaned
out:

    --- ./a.xnvptx-none.mkoffload.144t.sink1        2022-04-12 
14:28:19.213520366 +0200
    +++ ./a.xnvptx-none.mkoffload.148t.dse3 2022-04-12 14:28:19.213520366 +0200
    [...]
     __attribute__((oacc function (32, 8, 32), oacc parallel, omp target 
entrypoint))
     void t4_._omp_fn.0 (const struct .omp_data_t.1 & restrict .omp_data_i)
     {
    @@ -34,13 +25,9 @@

       <bb 2> [local count: 7128820]:
       _104 = .GOACC_DIM_POS (0);
    -  pt.x = _104;
       _25 = _104 * 2;
    -  pt.y = _25;
       _27 = _104 * 4;
    -  pt.z = _27;
       _29 = _104 * 6;
    -  pt.attr[4] = _29;
       _34 = .UNIQUE (OACC_FORK, 0, 2);

       <bb 3> [local count: 437450365]:

..., so no actual changes in the end.

I have not verified other ("higher") optimization levels, but given no
change in diagnostics, I suppose the same ("no actual changes") happens
for those.

Is the '-O1' change/regression unexpected, and should be analyzed, or
should we just accept the slightly worse code generation (for '-O1'
only), and I accordingly adjust the test case for the change in
diagnostics?


Grüße
 Thomas
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955

Reply via email to