On Wed, Dec 14, 2016 at 4:39 PM, Uros Bizjak <ubiz...@gmail.com> wrote: > On Wed, Dec 14, 2016 at 4:33 PM, Jakub Jelinek <ja...@redhat.com> wrote: >> On Wed, Dec 14, 2016 at 04:30:39PM +0100, Uros Bizjak wrote: >>> Apparently, Qt source uses these two builtins. As the removal brings >>> much pain, I'd like to commit the following patch to restore >>> __builtin_clzs and __builtin_ctzs on gcc-6 branch. >>> >>> 2016-12-14 Uros Bizjak <ubiz...@gmail.com> >>> >>> PR target/59874 >>> * config/i386/i386.c (enum ix86_builtins): Add IX86_BUILTIN_CLZS >>> and IX86_BUILTIN_CTZS. >>> (bdesc_args): Add __builtin_clzs and __builtin_ctzs. >>> (ix86_fold_builtin): Handle IX86_BUILTIN_CTZS and IX86_BUILTIN_CLZS. >>> >>> Bootstrapped and regression tested on x86_64-linux-gnu. >> >> Ok for 6. For 7 we ask them to change it to the new builtins? > > I'm investigating automatic conversion to HImode insn, something along > the lines: > > +(define_insn_and_split "*ctzhi2" > + [(set (match_operand:SI 0 "register_operand") > + (ctz:SI > + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand")))) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_BMI > + && can_create_pseudo_p ()" > + "#" > + "&& 1" > + [(const_int 0)] > +{ > + rtx tmp = gen_reg_rtx (HImode); > + > + emit_insn (gen_bmi_tzcnt_hi (tmp, operands[1])); > + emit_insn (gen_zero_extendhisi2 (operands[0], tmp)); > + DONE; > +}) > > But it looks that __builtin_clzs and __builtin_ctzs will also have to stay.
Attached patch is what was committed to mainline SVN. As shown in the attached testcase, we can use generic __builtin_c{l,t}z, and merge zero-extension of the operand to synthesize HImode {l,t}zcntw instruction. Alternatively, __builtin_c{l,t}zs are still available, in the hope that some day these will be substituted with a documented generic 16-bit builtin with the same name. 2016-12-14 Uros Bizjak <ubiz...@gmail.com> PR target/59874 * config/i386/i386-builtin.def: Add __builtin_clzs and __builtin_ctzs. (ix86_fold_builtin): Handle IX86_BUILTIN_CTZS and IX86_BUILTIN_CLZS. * config/i386/i386.md (*ctzhi2): New insn_and_split pattern. (*clzhi2): Ditto. testsuite/ChangeLog 2016-12-14 Uros Bizjak <ubiz...@gmail.com> PR target/59874 * gcc.target/i386/pr59874-1.c: New test. * gcc.target/i386/pr59874-2.c: Ditto. Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Committed to mainline SVN. Uros.
Index: config/i386/i386-builtin.def =================================================================== --- config/i386/i386-builtin.def (revision 243651) +++ config/i386/i386-builtin.def (working copy) @@ -1188,6 +1188,8 @@ BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4 /* LZCNT */ BDESC (OPTION_MASK_ISA_LZCNT, CODE_FOR_lzcnt_hi, "__builtin_ia32_lzcnt_u16", IX86_BUILTIN_LZCNT16, UNKNOWN, (int) UINT16_FTYPE_UINT16) +/* Same as above, for backward compatibility. */ +BDESC (OPTION_MASK_ISA_LZCNT, CODE_FOR_lzcnt_hi, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16) BDESC (OPTION_MASK_ISA_LZCNT, CODE_FOR_lzcnt_si, "__builtin_ia32_lzcnt_u32", IX86_BUILTIN_LZCNT32, UNKNOWN, (int) UINT_FTYPE_UINT) BDESC (OPTION_MASK_ISA_LZCNT | OPTION_MASK_ISA_64BIT, CODE_FOR_lzcnt_di, "__builtin_ia32_lzcnt_u64", IX86_BUILTIN_LZCNT64, UNKNOWN, (int) UINT64_FTYPE_UINT64) @@ -1196,6 +1198,8 @@ BDESC (OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, BDESC (OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64) BDESC (OPTION_MASK_ISA_BMI, CODE_FOR_bmi_tzcnt_hi, "__builtin_ia32_tzcnt_u16", IX86_BUILTIN_TZCNT16, UNKNOWN, (int) UINT16_FTYPE_UINT16) +/* Same as above, for backward compatibility. */ +BDESC (OPTION_MASK_ISA_BMI, CODE_FOR_bmi_tzcnt_hi, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16) BDESC (OPTION_MASK_ISA_BMI, CODE_FOR_bmi_tzcnt_si, "__builtin_ia32_tzcnt_u32", IX86_BUILTIN_TZCNT32, UNKNOWN, (int) UINT_FTYPE_UINT) BDESC (OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi_tzcnt_di, "__builtin_ia32_tzcnt_u64", IX86_BUILTIN_TZCNT64, UNKNOWN, (int) UINT64_FTYPE_UINT64) Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 243651) +++ config/i386/i386.c (working copy) @@ -33573,6 +33573,7 @@ ix86_fold_builtin (tree fndecl, int n_args, } case IX86_BUILTIN_TZCNT16: + case IX86_BUILTIN_CTZS: case IX86_BUILTIN_TZCNT32: case IX86_BUILTIN_TZCNT64: gcc_assert (n_args == 1); @@ -33580,7 +33581,8 @@ ix86_fold_builtin (tree fndecl, int n_args, { tree type = TREE_TYPE (TREE_TYPE (fndecl)); tree arg = args[0]; - if (fn_code == IX86_BUILTIN_TZCNT16) + if (fn_code == IX86_BUILTIN_TZCNT16 + || fn_code == IX86_BUILTIN_CTZS) arg = fold_convert (short_unsigned_type_node, arg); if (integer_zerop (arg)) return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); @@ -33590,6 +33592,7 @@ ix86_fold_builtin (tree fndecl, int n_args, break; case IX86_BUILTIN_LZCNT16: + case IX86_BUILTIN_CLZS: case IX86_BUILTIN_LZCNT32: case IX86_BUILTIN_LZCNT64: gcc_assert (n_args == 1); @@ -33597,7 +33600,8 @@ ix86_fold_builtin (tree fndecl, int n_args, { tree type = TREE_TYPE (TREE_TYPE (fndecl)); tree arg = args[0]; - if (fn_code == IX86_BUILTIN_LZCNT16) + if (fn_code == IX86_BUILTIN_LZCNT16 + || fn_code == IX86_BUILTIN_CLZS) arg = fold_convert (short_unsigned_type_node, arg); if (integer_zerop (arg)) return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 243651) +++ config/i386/i386.md (working copy) @@ -12606,6 +12606,24 @@ (match_operand:SWI48 1 "nonimmediate_operand"))) (clobber (reg:CC FLAGS_REG))])]) +(define_insn_and_split "*ctzhi2" + [(set (match_operand:SI 0 "register_operand") + (ctz:SI + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI + && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + rtx tmp = gen_reg_rtx (HImode); + + emit_insn (gen_bmi_tzcnt_hi (tmp, operands[1])); + emit_insn (gen_zero_extendhisi2 (operands[0], tmp)); + DONE; +}) + ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. @@ -12768,6 +12786,27 @@ (clobber (reg:CC FLAGS_REG))])] "TARGET_LZCNT") +(define_insn_and_split "*clzhi2" + [(set (match_operand:SI 0 "register_operand") + (clz:SI + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_LZCNT + && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + rtx tmp = gen_reg_rtx (HImode); + + emit_insn (gen_lzcnt_hi (tmp, operands[1])); + emit_insn (gen_zero_extendhisi2 (operands[0], tmp)); + DONE; +}) + +; False dependency happens when destination is only updated by tzcnt, +; lzcnt or popcnt. There is no false dependency when destination is +; also used in source. (define_insn_and_split "*clz<mode>2_lzcnt_falsedep_1" [(set (match_operand:SWI48 0 "register_operand" "=r") (clz:SWI48 Index: testsuite/gcc.target/i386/pr59874-1.c =================================================================== --- testsuite/gcc.target/i386/pr59874-1.c (nonexistent) +++ testsuite/gcc.target/i386/pr59874-1.c (working copy) @@ -0,0 +1,10 @@ +/* PR target/59874 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mbmi -masm=att" } */ +/* { dg-final { scan-assembler "tzcntw" } } */ + +unsigned int +foo (unsigned short x) +{ + return x ? __builtin_ctz (x) : 16U; +} Index: testsuite/gcc.target/i386/pr59874-2.c =================================================================== --- testsuite/gcc.target/i386/pr59874-2.c (nonexistent) +++ testsuite/gcc.target/i386/pr59874-2.c (working copy) @@ -0,0 +1,10 @@ +/* PR target/59874 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mlzcnt -masm=att" } */ +/* { dg-final { scan-assembler "lzcntw" } } */ + +unsigned int +foo (unsigned short x) +{ + return x ? __builtin_clz (x) : 16U; +}