On Wed, Dec 14, 2016 at 4:39 PM, Uros Bizjak <ubiz...@gmail.com> wrote:
> On Wed, Dec 14, 2016 at 4:33 PM, Jakub Jelinek <ja...@redhat.com> wrote:
>> On Wed, Dec 14, 2016 at 04:30:39PM +0100, Uros Bizjak wrote:
>>> Apparently, Qt source uses these two builtins. As the removal brings
>>> much pain, I'd like to commit the following patch to restore
>>> __builtin_clzs and __builtin_ctzs on gcc-6 branch.
>>>
>>> 2016-12-14  Uros Bizjak  <ubiz...@gmail.com>
>>>
>>>     PR target/59874
>>>     * config/i386/i386.c (enum ix86_builtins): Add IX86_BUILTIN_CLZS
>>>     and IX86_BUILTIN_CTZS.
>>>     (bdesc_args): Add __builtin_clzs and __builtin_ctzs.
>>>     (ix86_fold_builtin): Handle IX86_BUILTIN_CTZS and IX86_BUILTIN_CLZS.
>>>
>>> Bootstrapped and regression tested on x86_64-linux-gnu.
>>
>> Ok for 6.  For 7 we ask them to change it to the new builtins?
>
> I'm investigating automatic conversion to HImode insn, something along
> the lines:
>
> +(define_insn_and_split "*ctzhi2"
> +  [(set (match_operand:SI 0 "register_operand")
> +       (ctz:SI
> +         (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand"))))
> +   (clobber (reg:CC FLAGS_REG))]
> +  "TARGET_BMI
> +   && can_create_pseudo_p ()"
> +  "#"
> +  "&& 1"
> +  [(const_int 0)]
> +{
> +  rtx tmp = gen_reg_rtx (HImode);
> +
> +  emit_insn (gen_bmi_tzcnt_hi (tmp, operands[1]));
> +  emit_insn (gen_zero_extendhisi2 (operands[0], tmp));
> +  DONE;
> +})
>
> But it looks that __builtin_clzs and __builtin_ctzs will also have to stay.

Attached patch is what was committed to mainline SVN.

As shown in the attached testcase, we can use generic
__builtin_c{l,t}z, and merge zero-extension of the operand to
synthesize HImode {l,t}zcntw instruction. Alternatively,
__builtin_c{l,t}zs are still available, in the hope that some day
these will be substituted with a documented generic 16-bit builtin
with the same name.

2016-12-14  Uros Bizjak  <ubiz...@gmail.com>

    PR target/59874
    * config/i386/i386-builtin.def: Add __builtin_clzs and __builtin_ctzs.
    (ix86_fold_builtin): Handle IX86_BUILTIN_CTZS and IX86_BUILTIN_CLZS.
    * config/i386/i386.md (*ctzhi2): New insn_and_split pattern.
    (*clzhi2): Ditto.

testsuite/ChangeLog

2016-12-14  Uros Bizjak  <ubiz...@gmail.com>

    PR target/59874
    * gcc.target/i386/pr59874-1.c: New test.
    * gcc.target/i386/pr59874-2.c: Ditto.

Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline SVN.

Uros.
Index: config/i386/i386-builtin.def
===================================================================
--- config/i386/i386-builtin.def        (revision 243651)
+++ config/i386/i386-builtin.def        (working copy)
@@ -1188,6 +1188,8 @@ BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4
 
 /* LZCNT */
 BDESC (OPTION_MASK_ISA_LZCNT, CODE_FOR_lzcnt_hi, "__builtin_ia32_lzcnt_u16", 
IX86_BUILTIN_LZCNT16, UNKNOWN, (int) UINT16_FTYPE_UINT16)
+/* Same as above, for backward compatibility.  */
+BDESC (OPTION_MASK_ISA_LZCNT, CODE_FOR_lzcnt_hi, "__builtin_clzs", 
IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16)
 BDESC (OPTION_MASK_ISA_LZCNT, CODE_FOR_lzcnt_si, "__builtin_ia32_lzcnt_u32", 
IX86_BUILTIN_LZCNT32, UNKNOWN, (int) UINT_FTYPE_UINT)
 BDESC (OPTION_MASK_ISA_LZCNT | OPTION_MASK_ISA_64BIT, CODE_FOR_lzcnt_di, 
"__builtin_ia32_lzcnt_u64", IX86_BUILTIN_LZCNT64, UNKNOWN, (int) 
UINT64_FTYPE_UINT64)
 
@@ -1196,6 +1198,8 @@ BDESC (OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si,
 BDESC (OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi_bextr_di, 
"__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) 
UINT64_FTYPE_UINT64_UINT64)
 
 BDESC (OPTION_MASK_ISA_BMI, CODE_FOR_bmi_tzcnt_hi, "__builtin_ia32_tzcnt_u16", 
IX86_BUILTIN_TZCNT16, UNKNOWN, (int) UINT16_FTYPE_UINT16)
+/* Same as above, for backward compatibility.  */
+BDESC (OPTION_MASK_ISA_BMI, CODE_FOR_bmi_tzcnt_hi, "__builtin_ctzs", 
IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16)
 BDESC (OPTION_MASK_ISA_BMI, CODE_FOR_bmi_tzcnt_si, "__builtin_ia32_tzcnt_u32", 
IX86_BUILTIN_TZCNT32, UNKNOWN, (int) UINT_FTYPE_UINT)
 BDESC (OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi_tzcnt_di, 
"__builtin_ia32_tzcnt_u64", IX86_BUILTIN_TZCNT64, UNKNOWN, (int) 
UINT64_FTYPE_UINT64)
 
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c  (revision 243651)
+++ config/i386/i386.c  (working copy)
@@ -33573,6 +33573,7 @@ ix86_fold_builtin (tree fndecl, int n_args,
          }
 
        case IX86_BUILTIN_TZCNT16:
+       case IX86_BUILTIN_CTZS:
        case IX86_BUILTIN_TZCNT32:
        case IX86_BUILTIN_TZCNT64:
          gcc_assert (n_args == 1);
@@ -33580,7 +33581,8 @@ ix86_fold_builtin (tree fndecl, int n_args,
            {
              tree type = TREE_TYPE (TREE_TYPE (fndecl));
              tree arg = args[0];
-             if (fn_code == IX86_BUILTIN_TZCNT16)
+             if (fn_code == IX86_BUILTIN_TZCNT16
+                 || fn_code == IX86_BUILTIN_CTZS)
                arg = fold_convert (short_unsigned_type_node, arg);
              if (integer_zerop (arg))
                return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
@@ -33590,6 +33592,7 @@ ix86_fold_builtin (tree fndecl, int n_args,
          break;
 
        case IX86_BUILTIN_LZCNT16:
+       case IX86_BUILTIN_CLZS:
        case IX86_BUILTIN_LZCNT32:
        case IX86_BUILTIN_LZCNT64:
          gcc_assert (n_args == 1);
@@ -33597,7 +33600,8 @@ ix86_fold_builtin (tree fndecl, int n_args,
            {
              tree type = TREE_TYPE (TREE_TYPE (fndecl));
              tree arg = args[0];
-             if (fn_code == IX86_BUILTIN_LZCNT16)
+             if (fn_code == IX86_BUILTIN_LZCNT16
+                 || fn_code == IX86_BUILTIN_CLZS)
                arg = fold_convert (short_unsigned_type_node, arg);
              if (integer_zerop (arg))
                return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 243651)
+++ config/i386/i386.md (working copy)
@@ -12606,6 +12606,24 @@
            (match_operand:SWI48 1 "nonimmediate_operand")))
      (clobber (reg:CC FLAGS_REG))])])
 
+(define_insn_and_split "*ctzhi2"
+  [(set (match_operand:SI 0 "register_operand")
+       (ctz:SI
+         (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (HImode);
+
+  emit_insn (gen_bmi_tzcnt_hi (tmp, operands[1]));
+  emit_insn (gen_zero_extendhisi2 (operands[0], tmp));
+  DONE;
+})
+
 ; False dependency happens when destination is only updated by tzcnt,
 ; lzcnt or popcnt.  There is no false dependency when destination is
 ; also used in source.
@@ -12768,6 +12786,27 @@
      (clobber (reg:CC FLAGS_REG))])]
   "TARGET_LZCNT")
 
+(define_insn_and_split "*clzhi2"
+  [(set (match_operand:SI 0 "register_operand")
+       (clz:SI
+         (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_LZCNT
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (HImode);
+
+  emit_insn (gen_lzcnt_hi (tmp, operands[1]));
+  emit_insn (gen_zero_extendhisi2 (operands[0], tmp));
+  DONE;
+})
+
+; False dependency happens when destination is only updated by tzcnt,
+; lzcnt or popcnt.  There is no false dependency when destination is
+; also used in source.
 (define_insn_and_split "*clz<mode>2_lzcnt_falsedep_1"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
        (clz:SWI48
Index: testsuite/gcc.target/i386/pr59874-1.c
===================================================================
--- testsuite/gcc.target/i386/pr59874-1.c       (nonexistent)
+++ testsuite/gcc.target/i386/pr59874-1.c       (working copy)
@@ -0,0 +1,10 @@
+/* PR target/59874 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi -masm=att" } */
+/* { dg-final { scan-assembler "tzcntw" } } */
+
+unsigned int
+foo (unsigned short x)
+{
+  return x ? __builtin_ctz (x) : 16U;
+}
Index: testsuite/gcc.target/i386/pr59874-2.c
===================================================================
--- testsuite/gcc.target/i386/pr59874-2.c       (nonexistent)
+++ testsuite/gcc.target/i386/pr59874-2.c       (working copy)
@@ -0,0 +1,10 @@
+/* PR target/59874 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mlzcnt -masm=att" } */
+/* { dg-final { scan-assembler "lzcntw" } } */
+
+unsigned int
+foo (unsigned short x)
+{
+  return x ? __builtin_clz (x) : 16U;
+}

Reply via email to