On Wed, May 19, 2021 at 10:15:53AM +0200, Christophe Lyon via Gcc-patches wrote:
> After this update, the test fails on arm and aarch64: according to the
> logs, the optimization is still performed 14 times.

Seems this is because
              if (change
                  && !flag_syntax_only
                  && (load_extend_op (TYPE_MODE (TREE_TYPE (and0)))
                      == ZERO_EXTEND))
                {
                  tree uns = unsigned_type_for (TREE_TYPE (and0));
                  and0 = fold_convert_loc (loc, uns, and0);
                  and1 = fold_convert_loc (loc, uns, and1);
                }
in fold-const.c adds on these targets extra casts that prevent the
optimizations, instead of
      _1 = __v._M_value;
      _2 = (int) _1;
      _3 = _2 & 1;
      _4 = __v._M_value;
      _5 = (int) _4;
      D.8503 = _3 == _5;
on x86_64 it is:
      _1 = __v._M_value;
      _2 = (unsigned char) _1;
      _3 = (int) _2;
      _4 = _3 & 1;
      _5 = __v._M_value;
      _6 = (int) _5;
      D.10471 = _4 == _6;
Before fre1 it is
  __v$_M_value_9 = __v._M_value;
  _1 = __v$_M_value_9;
  _2 = (int) _1;
  _8 = _1 & 1;
  _3 = (int) _8;
  _4 = __v$_M_value_9;
  _5 = (int) _4;
  _7 = _4 == _8;
which fre1 using match.pd optimizes into
  __v$_M_value_9 = __v._M_value;
  _10 = __v$_M_value_9 & -2;
  _7 = _10 == 0;
but on aarch64 before fre1 we have:
  __v$_M_value_10 = __v._M_value;
  _1 = __v$_M_value_10;
  _2 = (unsigned char) _1;
  _3 = (int) _2;
  _9 = _2 & 1;
  _4 = (int) _9;
  _5 = __v$_M_value_10;
  _6 = (int) _5;
  _8 = _4 == _6;
which is not optimized.

        Jakub

Reply via email to