https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115843

--- Comment #4 from Richard Biener <rguenth at gcc dot gnu.org> ---
The loops are

    for (i = 0; i < 64; i++) {
        KnightMoves[i] = 0;

        if (Rank(i) > 0) { 
            if (Rank(i) > 1) {
                if (File(i) > 0) KnightMoves[i] |= Mask[i-17];
                if (File(i) < 7) KnightMoves[i] |= Mask[i-15];
            }
            if (File(i) > 1) KnightMoves[i] |= Mask[i-10];
            if (File(i) < 6) KnightMoves[i] |= Mask[i-6];
        }

        if (Rank(i) < 7) {
            if (Rank(i) < 6) {
                if (File(i) > 0) KnightMoves[i] |= Mask[i+15];
                if (File(i) < 7) KnightMoves[i] |= Mask[i+17];
            }        
            if (File(i) > 1) KnightMoves[i] |= Mask[i+6];
            if (File(i) < 6) KnightMoves[i] |= Mask[i+10];
        }
    }

    for (i = 0; i < 64; i++) {
        if (File(i) == FileA) {
            KingPressureMask[i] = KingSafetyMask[i + 1];
        } else if (File(i) == FileH) {
            KingPressureMask[i] = KingSafetyMask[i - 1];
        } else {
            KingPressureMask[i] = KingSafetyMask[i];
        }   
    }     

    for (i = 0; i < 64; i++) {
        if (File(i) == FileA) {
            KingPressureMask1[i] = KingSafetyMask1[i + 1];
        } else if (File(i) == FileH) {
            KingPressureMask1[i] = KingSafetyMask1[i - 1];
        } else {
            KingPressureMask1[i] = KingSafetyMask1[i];
        }   
    }          

the last loop is

  <bb 302> [local count: 145013]:

  <bb 183> [local count: 9271420]:
  # i_38 = PHI <_1526(215), 0(302)>
  # ivtmp_1427 = PHI <ivtmp_1430(215), 64(302)>
  _296 = i_38 & 7;
  _1526 = i_38 + 1;
  _380 = _296 == 0;
  _1371 = &KingSafetyMask1[_1526];
  _298 = .MASK_LOAD (_1371, 64B, _380);
  _804 = _296 == 7;
  _1370 = (unsigned int) i_38;
  _1369 = _1370 + 4294967295;
  _299 = (int) _1369;
  _1368 = &KingSafetyMask1[_299];
  _300 = .MASK_LOAD (_1368, 64B, _804);
  _301 = KingSafetyMask1[i_38];
  _ifc__1431 = _804 ? _300 : _301;
  _336 = _380 ? _298 : _ifc__1431;
  KingPressureMask1[i_38] = _336;
  ivtmp_1430 = ivtmp_1427 - 1;
  if (ivtmp_1430 != 0)
    goto <bb 215>; [98.44%]
  else
    goto <bb 189>; [1.56%]

  <bb 215> [local count: 9126407]:
  goto <bb 183>; [100.00%]

vectorized as

  <bb 183> [local count: 579464]:
  # vect_vec_iv_.194_1737 = PHI <_1915(215), { -15, -14, -13, -12, -11, -10,
-9, -8, -7, -6, -5, -4, -3, -2, -1, 0 }(198)>
  # vectp_KingSafetyMask1.198_1768 = PHI <vectp_KingSafetyMask1.198_1859(215),
&MEM <BITBOARD[64]> [(void *)&KingSafetyMask1 + -112B](198)>
  # vectp_KingSafetyMask1.204_1878 = PHI <vectp_KingSafetyMask1.204_1879(215),
&MEM <BITBOARD[64]> [(void *)&KingSafetyMask1 + -128B](198)>
  # vectp_KingSafetyMask1.208_2015 = PHI <vectp_KingSafetyMask1.208_2017(215),
&MEM <BITBOARD[64]> [(void *)&KingSafetyMask1 + -120B](198)>
  # vectp_KingPressureMask1.216_2023 = PHI
<vectp_KingPressureMask1.216_2025(215), &MEM <BITBOARD[64]> [(void
*)&KingPressureMask1 + -120B](198)>
  # ivtmp_2028 = PHI <ivtmp_2030(215), 79(198)>
  # loop_mask_1995 = PHI <_1989(215), { 0, 0, 0, 0, 0, 0, 0, 0 }(198)>
  # loop_mask_1860 = PHI <_1990(215), { 0, 0, 0, 0, 0, 0, 0, 0 }(198)>
  _1915 = vect_vec_iv_.194_1737 + { 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16 };
  vect__296.195_1901 = vect_vec_iv_.194_1737 & { 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7 };
  mask__380.196_1920 = vect__296.195_1901 == { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0 };
  mask_patt_1854.197_1855 = [vec_unpack_lo_expr] mask__380.196_1920;
  mask_patt_1854.197_1733 = [vec_unpack_hi_expr] mask__380.196_1920;
  vec_mask_and_1997 = mask_patt_1854.197_1855 & loop_mask_1860;
  vect_patt_1732.200_1998 = .MASK_LOAD (vectp_KingSafetyMask1.198_1768, 128B,
vec_mask_and_1997);
  vectp_KingSafetyMask1.198_1865 = vectp_KingSafetyMask1.198_1768 + 64;
  vec_mask_and_2002 = mask_patt_1854.197_1733 & loop_mask_1995;
  vect_patt_1732.201_2003 = .MASK_LOAD (vectp_KingSafetyMask1.198_1865, 128B,
vec_mask_and_2002);
  mask__804.202_1876 = vect__296.195_1901 == { 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7 };
  mask_patt_1734.203_2005 = [vec_unpack_lo_expr] mask__804.202_1876;
  mask_patt_1734.203_2007 = [vec_unpack_hi_expr] mask__804.202_1876;
  vec_mask_and_2010 = mask_patt_1734.203_2005 & loop_mask_1860;
  vect_patt_1772.206_2012 = .MASK_LOAD (vectp_KingSafetyMask1.204_1878, 512B,
vec_mask_and_2010);
  vectp_KingSafetyMask1.204_2013 = vectp_KingSafetyMask1.204_1878 + 64;
  vec_mask_and_1980 = mask_patt_1734.203_2007 & loop_mask_1995;
  vect_patt_1772.207_1981 = .MASK_LOAD (vectp_KingSafetyMask1.204_2013, 512B,
vec_mask_and_1980);
  vect__301.210_1882 = .MASK_LOAD (vectp_KingSafetyMask1.208_2015, 64B,
loop_mask_1860);
  vectp_KingSafetyMask1.208_2018 = vectp_KingSafetyMask1.208_2015 + 64;
  vect__301.211_2019 = .MASK_LOAD (vectp_KingSafetyMask1.208_2018, 64B,
loop_mask_1995);
  vect_patt_1775.213_2021 = VEC_COND_EXPR <mask_patt_1734.203_2005,
vect_patt_1772.206_2012, vect__301.210_1882>;
  vect_patt_1775.213_2022 = VEC_COND_EXPR <mask_patt_1734.203_2007,
vect_patt_1772.207_1981, vect__301.211_2019>;
  vect_patt_1897.215_1984 = VEC_COND_EXPR <mask_patt_1854.197_1855,
vect_patt_1732.200_1998, vect_patt_1775.213_2021>;
  vect_patt_1897.215_1985 = VEC_COND_EXPR <mask_patt_1854.197_1733,
vect_patt_1732.201_2003, vect_patt_1775.213_2022>;
  .MASK_STORE (vectp_KingPressureMask1.216_2023, 64B, loop_mask_1860,
vect_patt_1897.215_1984);
  vectp_KingPressureMask1.216_2026 = vectp_KingPressureMask1.216_2023 + 64;
  .MASK_STORE (vectp_KingPressureMask1.216_2026, 64B, loop_mask_1995,
vect_patt_1897.215_1985);
  vectp_KingSafetyMask1.198_1859 = vectp_KingSafetyMask1.198_1865 + 64;
  vectp_KingSafetyMask1.204_1879 = vectp_KingSafetyMask1.204_2013 + 64;
  vectp_KingSafetyMask1.208_2017 = vectp_KingSafetyMask1.208_2018 + 64;
  vectp_KingPressureMask1.216_2025 = vectp_KingPressureMask1.216_2026 + 64;
  ivtmp_2030 = ivtmp_2028 - 16;
  _2031 = (unsigned short) ivtmp_2030;
  _1988 = {_2031, _2031, _2031, _2031, _2031, _2031, _2031, _2031};
  _1989 = { 8, 9, 10, 11, 12, 13, 14, 15 } < _1988;
  _1990 = { 0, 1, 2, 3, 4, 5, 6, 7 } < _1988;
  if (ivtmp_2028 > 16)
    goto <bb 215>; [74.97%]
  else
    goto <bb 529>; [25.03%]

  <bb 215> [local count: 434451]:
  goto <bb 183>; [100.00%]

and with -mtune=cascadelake -mprefer-vector-width=512 we avoid the failure,
generating

  <bb 183> [local count: 435039]:
  # i_38 = PHI <_1526(215), 0(198)>
  # ivtmp_1427 = PHI <ivtmp_1430(215), 64(198)>
  # vect_vec_iv_.194_1737 = PHI <_1915(215), { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15 }(198)>
  # vectp_KingSafetyMask1.198_1768 = PHI <vectp_KingSafetyMask1.198_1859(215),
&MEM <BITBOARD[64]> [(void *)&KingSafetyMask1 + 8B](198)>
  # vectp_KingSafetyMask1.204_1876 = PHI <vectp_KingSafetyMask1.204_2005(215),
&MEM <BITBOARD[64]> [(void *)&KingSafetyMask1 + -8B](198)>
  # vectp_KingSafetyMask1.208_1879 = PHI <vectp_KingSafetyMask1.208_2010(215),
&KingSafetyMask1(198)>
  # vectp_KingPressureMask1.216_2020 = PHI
<vectp_KingPressureMask1.216_2021(215), &KingPressureMask1(198)>
  # ivtmp_1984 = PHI <ivtmp_1985(215), 0(198)>
  _1915 = vect_vec_iv_.194_1737 + { 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16 };
  vect__296.195_1901 = vect_vec_iv_.194_1737 & { 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7 };
  mask__380.196_1920 = vect__296.195_1901 == { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0 };
...

the difference is peeling for alignment (which is an odd thing to do here,
but ...).

Reply via email to