https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104268

            Bug ID: 104268
           Summary: 390: inefficient vec_popcnt for 16-bit for z13
           Product: gcc
           Version: 10.2.1
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: jens.seifert at de dot ibm.com
  Target Milestone: ---

#include <vecintrin.h>

vector unsigned short popcnt(vector unsigned short a)
{
   return vec_popcnt(a);
}

Generates with -march=z13

_Z6popcntDv8_t:
.LFB1:
        .cfi_startproc
        vzero   %v0
        vpopct  %v24,%v24,0
        vleib   %v0,8,7
        vsrlb   %v0,%v24,%v0
        vab     %v24,%v24,%v0
        vgbm    %v0,21845
        vn      %v24,%v24,%v0
        br      %r14
        .cfi_endproc


Optimal sequence would be:
vector unsigned short popcnt_opt(vector unsigned short a)
{
   vector unsigned short r = (vector unsigned short)vec_popcnt((vector unsigned
char)a);
   vector unsigned short b = vec_rli(r, 8);
   r = r + b;
   r = r >> 8;
   return r;
}

_Z10popcnt_optDv8_t:
.LFB3:
        .cfi_startproc
        vpopct  %v24,%v24,0
        verllh  %v0,%v24,8
        vah     %v24,%v0,%v24
        vesrlh  %v24,%v24,8
        br      %r14
        .cfi_endproc
  • [Bug target/104268] New: 390: ... jens.seifert at de dot ibm.com via Gcc-bugs

Reply via email to