As suggested in PR 91201 to avoid zero-extension to HImode for SSE4.1 targets.
2019-08-02 Uroš Bizjak <ubiz...@gmail.com> PR target/91201 * config/i386/sse.md (*vec_extractv16qi_zext): New insn pattern. testsuite/ChangeLog: 2019-08-02 Uroš Bizjak <ubiz...@gmail.com> PR target/91201 * gcc.target/i386/sse4_1-pr91201.c: New test. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Committed to mainlin SVN. Uros.
Index: config/i386/sse.md =================================================================== --- config/i386/sse.md (revision 274008) +++ config/i386/sse.md (working copy) @@ -14970,6 +14970,25 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) +(define_insn "*vec_extractv16qi_zext" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (zero_extend:HI + (vec_select:QI + (match_operand:V16QI 1 "register_operand" "x,v") + (parallel + [(match_operand:SI 2 "const_0_to_15_operand")]))))] + "TARGET_SSE4_1" + "@ + %vpextrb\t{%2, %1, %k0|%k0, %1, %2} + vpextrb\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "isa" "*,avx512bw") + (set_attr "type" "sselog1") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "TI")]) + (define_insn "*vec_extract<mode>_mem" [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r") (vec_select:<ssescalarmode> Index: testsuite/gcc.target/i386/sse4_1-pr91201.c =================================================================== --- testsuite/gcc.target/i386/sse4_1-pr91201.c (nonexistent) +++ testsuite/gcc.target/i386/sse4_1-pr91201.c (working copy) @@ -0,0 +1,12 @@ +/* PR tree-optimization/91201 */ +/* { dg-do compile } */ +/* { dg-options "-Os -msse4.1 -masm=att" } */ +/* { dg-final { scan-assembler-not "\tmovzb(w|l)" } } */ + +typedef unsigned char V __attribute__((vector_size (16))); + +unsigned short +foo (V x) +{ + return x[0]; +}