https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118189

            Bug ID: 118189
           Summary: Weired vec_contruct of elements who's from continuous
                    memory
           Product: gcc
           Version: 15.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: liuhongt at gcc dot gnu.org
            Blocks: 53947
  Target Milestone: ---

double
foo (double* a, double* b, double c)
{
    c += (a[0] - b[0]) * (a[0] - b[0]);
    c += (a[1] - b[1]) * (a[1] - b[1]);
    c += (a[2] - b[2]) * (a[2] - b[2]);
    return c;
}

gcc -O2 -march=x86-64-v3 


/app/example.cpp:6:7: note: node (external) 0x25c51bd0 (max_nunits=2, refcnt=1)
vector(2) double
/app/example.cpp:6:7: note:     stmt 0 _9 = MEM[(double *)a_14(D) + 16B];
/app/example.cpp:6:7: note:     stmt 1 _5 = MEM[(double *)a_14(D) + 8B];
/app/example.cpp:6:7: note: node (external) 0x25c51c60 (max_nunits=2, refcnt=1)
vector(2) double
/app/example.cpp:6:7: note:     stmt 0 _10 = MEM[(double *)b_15(D) + 16B];
/app/example.cpp:6:7: note:     stmt 1 _6 = MEM[(double *)b_15(D) + 8B];
/app/example.cpp:6:7: note: Cost model analysis: 
powmult_4 + c_18 3 times scalar_stmt costs 12 in body
_11 * _11 1 times scalar_stmt costs 20 in body
_7 * _7 1 times scalar_stmt costs 20 in body
_9 - _10 1 times scalar_stmt costs 12 in body
_5 - _6 1 times scalar_stmt costs 12 in body
_9 - _10 1 times vector_stmt costs 12 in body
node 0x25c51bd0 1 times vec_construct costs 4 in prologue
node 0x25c51c60 1 times vec_construct costs 4 in prologue
_11 * _11 1 times vector_stmt costs 20 in body
powmult_4 + c_18 1 times vector_stmt costs 12 in body
powmult_4 + c_18 1 times vec_perm costs 4 in body
powmult_4 + c_18 1 times vec_to_scalar costs 4 in body
powmult_4 + c_18 2 times scalar_stmt costs 8 in body
/app/example.cpp:6:7: note: Cost model analysis for part in loop 0:
  Vector cost: 68
  Scalar cost: 76

...

  <bb 2> [local count: 1073741824]:
  # DEBUG BEGIN_STMT
  _1 = *a_14(D);
  _2 = *b_15(D);
  _3 = _1 - _2;
  powmult_12 = _3 * _3;
  # DEBUG c => powmult_12 + c_16(D)
  # DEBUG BEGIN_STMT
  _5 = MEM[(double *)a_14(D) + 8B];
  _6 = MEM[(double *)b_15(D) + 8B];
  _7 = _5 - _6;
  powmult_8 = _7 * _7;
  _20 = powmult_8 + powmult_12;
  c_18 = c_16(D) + _20;
  # DEBUG c => c_18
  # DEBUG BEGIN_STMT
  _9 = MEM[(double *)a_14(D) + 16B];
  _23 = {_9, _5}; ------------------------- from a + 8 and a + 16
  _10 = MEM[(double *)b_15(D) + 16B];
  _22 = {_10, _6};
  vect__11.3_21 = _23 - _22;
  vect_powmult_4.4_17 = vect__11.3_21 * vect__11.3_21;
  _11 = _9 - _10;
  powmult_4 = _11 * _11;
  _24 = .REDUC_PLUS (vect_powmult_4.4_17);
  _25 = c_16(D) + powmult_12;
  _26 = _24 + _25;
  c_19 = _26;
  # DEBUG c => c_19
  # DEBUG BEGIN_STMT
  return c_19;

}


Referenced Bugs:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53947
[Bug 53947] [meta-bug] vectorizer missed-optimizations

Reply via email to