Hello! 2016-10-25 Uros Bizjak <ubiz...@gmail.com>
Backport from mainline 2016-10-21 Jakub Jelinek <ja...@redhat.com> PR target/78057 * config/i386/i386.c: Include fold-const-call.h, tree-vrp.h and tree-ssanames.h. (ix86_fold_builtin): Fold IX86_BUILTIN_[LT]ZCNT{16,32,64} with INTEGER_CST argument. (ix86_gimple_fold_builtin): New function. (TARGET_GIMPLE_FOLD_BUILTIN): Define. Backport from mainline 2016-10-20 Uros Bizjak <ubiz...@gmail.com> PR target/78037 * config/i386/bmiintrin.h (__tzcnt_u16): Call __builtin_ia32_tzcnt_u16. (__tzcnt_u32, _tzcnt_u32): Call __builtin_ia32_tzcnt_u32. (__tzcnt_u64, _tzcnt_u64): Call __builtin_ia32_tzcnt_u64. * config/i386/lzcntintrin.h (__lzcnt_u16): Call __builtin_ia32_lzcnt_u16. (__lzcnt_u32, _lzcnt_u32): Call __builtin_ia32_lzcnt_u32. (__lzcnt_u64, _lzcnt_u64): Call __builtin_ia32_lzcnt_u64. * config/i386/i386.md (UNSPEC_LZCNT, UNSPEC_TZCNT): New unspecs. (ctz<mode>2, *ctz<mode>2): Use SWI48 mode iterator. (bmi_tzcnt_<mode>): New expander. (*bmi_tzcnt_<mode>_falsedep_1): New define_insn_and_split pattern. (*bmi_tzcnt_<mode>_falsedep, *bmi_tzcnt_<mode>): New insn patterns. (clz<mode>2_lzcnt, *clz<mode>2_lzcnt): Use SWI48 mode iterator. (lzcnt_<mode>): New expander. (*lzcnt_<mode>_falsedep_1): New define_insn_and_split pattern. (*lzcnt_<mode>_falsedep, *lzcnt_<mode>): New insn patterns. * config/i386/i386-builtin-types.def (UINT_FTYPE_UINT): New. (UINT64_FTYPE_UINT64): New. * config/i386/i386-builtin.def (__builtin_clzs): Remove description. (__builtin_ia32_lzcnt_u16): New description. (__builtin_ia32_lzcnt_u32): Ditto. (__builtin_ia32_lzcnt_u64): Ditto. (__builtin_ctzs): Remove description. (__builtin_ia32_tzcnt_u16): New description. (__builtin_ia32_tzcnt_u32): Ditto. (__builtin_ia32_tzcnt_u64): Ditto. * config/i386/i386.c (ix86_expand_args_builtin): Handle UINT_FTYPE_UINT and UINT64_FTYPE_UINT64. testsuite/ChangeLog: 2016-10-25 Uros Bizjak <ubiz...@gmail.com> * gcc.target/i386/bmi-6.c: XFAIL. Backport from mainline 2016-10-21 Jakub Jelinek <ja...@redhat.com> PR target/78057 * gcc.target/i386/pr78057.c: New test. Backport from mainline 2016-10-20 Uros Bizjak <ubiz...@gmail.com> PR target/78037 * gcc.target/i386/pr78037.c: New test. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Please note that I have to XFAIL bmi-6.c due to insufficient inlining on gcc-6 branch. If there are no objections, I plan to commit the patch tomorrow. Uros.
Index: config/i386/bmiintrin.h =================================================================== --- config/i386/bmiintrin.h (revision 241451) +++ config/i386/bmiintrin.h (working copy) @@ -37,7 +37,7 @@ extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __tzcnt_u16 (unsigned short __X) { - return __builtin_ctzs (__X); + return __builtin_ia32_tzcnt_u16 (__X); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -97,13 +97,13 @@ _blsr_u32 (unsigned int __X) extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __tzcnt_u32 (unsigned int __X) { - return __builtin_ctz (__X); + return __builtin_ia32_tzcnt_u32 (__X); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _tzcnt_u32 (unsigned int __X) { - return __builtin_ctz (__X); + return __builtin_ia32_tzcnt_u32 (__X); } @@ -165,13 +165,13 @@ _blsr_u64 (unsigned long long __X) extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __tzcnt_u64 (unsigned long long __X) { - return __builtin_ctzll (__X); + return __builtin_ia32_tzcnt_u64 (__X); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _tzcnt_u64 (unsigned long long __X) { - return __builtin_ctzll (__X); + return __builtin_ia32_tzcnt_u64 (__X); } #endif /* __x86_64__ */ Index: config/i386/i386-builtin-types.def =================================================================== --- config/i386/i386-builtin-types.def (revision 241451) +++ config/i386/i386-builtin-types.def (working copy) @@ -201,9 +201,11 @@ DEF_FUNCTION_TYPE (INT, PCCHAR) DEF_FUNCTION_TYPE (INT64, INT64) DEF_FUNCTION_TYPE (INT64, V2DF) DEF_FUNCTION_TYPE (INT64, V4SF) +DEF_FUNCTION_TYPE (UINT, UINT) +DEF_FUNCTION_TYPE (UINT16, UINT16) DEF_FUNCTION_TYPE (UINT64, INT) -DEF_FUNCTION_TYPE (UINT16, UINT16) DEF_FUNCTION_TYPE (UINT64, PUNSIGNED) +DEF_FUNCTION_TYPE (UINT64, UINT64) DEF_FUNCTION_TYPE (V16QI, PCCHAR) DEF_FUNCTION_TYPE (V16QI, V16QI) DEF_FUNCTION_TYPE (V2DF, PCDOUBLE) Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 241451) +++ config/i386/i386.c (working copy) @@ -76,6 +76,8 @@ along with GCC; see the file COPYING3. If not see #include "case-cfn-macros.h" #include "regrename.h" #include "dojump.h" +#include "fold-const-call.h" +#include "tree-ssanames.h" /* This file should be included last. */ #include "target-def.h" @@ -32356,7 +32358,10 @@ enum ix86_builtins IX86_BUILTIN_LWPINS32, IX86_BUILTIN_LWPINS64, - IX86_BUILTIN_CLZS, + /* LZCNT */ + IX86_BUILTIN_LZCNT16, + IX86_BUILTIN_LZCNT32, + IX86_BUILTIN_LZCNT64, /* RTM */ IX86_BUILTIN_XBEGIN, @@ -32380,7 +32385,9 @@ enum ix86_builtins /* BMI instructions. */ IX86_BUILTIN_BEXTR32, IX86_BUILTIN_BEXTR64, - IX86_BUILTIN_CTZS, + IX86_BUILTIN_TZCNT16, + IX86_BUILTIN_TZCNT32, + IX86_BUILTIN_TZCNT64, /* TBM instructions. */ IX86_BUILTIN_BEXTRI32, @@ -33768,13 +33775,19 @@ static const struct builtin_description bdesc_args { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, - { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 }, + /* LZCNT */ + { OPTION_MASK_ISA_LZCNT, CODE_FOR_lzcnt_hi, "__builtin_ia32_lzcnt_u16", IX86_BUILTIN_LZCNT16, UNKNOWN, (int) UINT16_FTYPE_UINT16 }, + { OPTION_MASK_ISA_LZCNT, CODE_FOR_lzcnt_si, "__builtin_ia32_lzcnt_u32", IX86_BUILTIN_LZCNT32, UNKNOWN, (int) UINT_FTYPE_UINT }, + { OPTION_MASK_ISA_LZCNT | OPTION_MASK_ISA_64BIT, CODE_FOR_lzcnt_di, "__builtin_ia32_lzcnt_u64", IX86_BUILTIN_LZCNT64, UNKNOWN, (int) UINT64_FTYPE_UINT64 }, /* BMI */ { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, { OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, - { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 }, + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_tzcnt_hi, "__builtin_ia32_tzcnt_u16", IX86_BUILTIN_TZCNT16, UNKNOWN, (int) UINT16_FTYPE_UINT16 }, + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_tzcnt_si, "__builtin_ia32_tzcnt_u32", IX86_BUILTIN_TZCNT32, UNKNOWN, (int) UINT_FTYPE_UINT }, + { OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi_tzcnt_di, "__builtin_ia32_tzcnt_u64", IX86_BUILTIN_TZCNT64, UNKNOWN, (int) UINT64_FTYPE_UINT64 }, + /* TBM */ { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, { OPTION_MASK_ISA_TBM | OPTION_MASK_ISA_64BIT, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, @@ -37546,11 +37559,49 @@ ix86_fold_builtin (tree fndecl, int n_args, { enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE (fndecl); - if (fn_code == IX86_BUILTIN_CPU_IS - || fn_code == IX86_BUILTIN_CPU_SUPPORTS) + switch (fn_code) { + case IX86_BUILTIN_CPU_IS: + case IX86_BUILTIN_CPU_SUPPORTS: gcc_assert (n_args == 1); return fold_builtin_cpu (fndecl, args); + + case IX86_BUILTIN_TZCNT16: + case IX86_BUILTIN_TZCNT32: + case IX86_BUILTIN_TZCNT64: + gcc_assert (n_args == 1); + if (TREE_CODE (args[0]) == INTEGER_CST) + { + tree type = TREE_TYPE (TREE_TYPE (fndecl)); + tree arg = args[0]; + if (fn_code == IX86_BUILTIN_TZCNT16) + arg = fold_convert (short_unsigned_type_node, arg); + if (integer_zerop (arg)) + return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); + else + return fold_const_call (CFN_CTZ, type, arg); + } + break; + + case IX86_BUILTIN_LZCNT16: + case IX86_BUILTIN_LZCNT32: + case IX86_BUILTIN_LZCNT64: + gcc_assert (n_args == 1); + if (TREE_CODE (args[0]) == INTEGER_CST) + { + tree type = TREE_TYPE (TREE_TYPE (fndecl)); + tree arg = args[0]; + if (fn_code == IX86_BUILTIN_LZCNT16) + arg = fold_convert (short_unsigned_type_node, arg); + if (integer_zerop (arg)) + return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); + else + return fold_const_call (CFN_CLZ, type, arg); + } + break; + + default: + break; } } @@ -37561,6 +37612,70 @@ ix86_fold_builtin (tree fndecl, int n_args, return NULL_TREE; } +/* Fold a MD builtin (use ix86_fold_builtin for folding into + constant) in GIMPLE. */ + +bool +ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) +{ + gimple *stmt = gsi_stmt (*gsi); + tree fndecl = gimple_call_fndecl (stmt); + gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD); + int n_args = gimple_call_num_args (stmt); + enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE (fndecl); + tree decl = NULL_TREE; + tree arg0; + + switch (fn_code) + { + case IX86_BUILTIN_TZCNT32: + decl = builtin_decl_implicit (BUILT_IN_CTZ); + goto fold_tzcnt_lzcnt; + + case IX86_BUILTIN_TZCNT64: + decl = builtin_decl_implicit (BUILT_IN_CTZLL); + goto fold_tzcnt_lzcnt; + + case IX86_BUILTIN_LZCNT32: + decl = builtin_decl_implicit (BUILT_IN_CLZ); + goto fold_tzcnt_lzcnt; + + case IX86_BUILTIN_LZCNT64: + decl = builtin_decl_implicit (BUILT_IN_CLZLL); + goto fold_tzcnt_lzcnt; + + fold_tzcnt_lzcnt: + gcc_assert (n_args == 1); + arg0 = gimple_call_arg (stmt, 0); + if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt)) + { + int prec = TYPE_PRECISION (TREE_TYPE (arg0)); + /* If arg0 is provably non-zero, optimize into generic + __builtin_c[tl]z{,ll} function the middle-end handles + better. */ + if (!expr_not_equal_to (arg0, wi::zero (prec))) + return false; + + location_t loc = gimple_location (stmt); + gimple *g = gimple_build_call (decl, 1, arg0); + gimple_set_location (g, loc); + tree lhs = make_ssa_name (integer_type_node); + gimple_call_set_lhs (g, lhs); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs); + gimple_set_location (g, loc); + gsi_replace (gsi, g, true); + return true; + } + break; + + default: + break; + } + + return false; +} + /* Make builtins to detect cpu type and features supported. NAME is the builtin name, CODE is the builtin code, and FTYPE is the function type of the builtin. */ @@ -38522,8 +38637,10 @@ ix86_expand_args_builtin (const struct builtin_des case FLOAT128_FTYPE_FLOAT128: case FLOAT_FTYPE_FLOAT: case INT_FTYPE_INT: + case UINT_FTYPE_UINT: + case UINT16_FTYPE_UINT16: case UINT64_FTYPE_INT: - case UINT16_FTYPE_UINT16: + case UINT64_FTYPE_UINT64: case INT64_FTYPE_INT64: case INT64_FTYPE_V4SF: case INT64_FTYPE_V2DF: @@ -54588,6 +54705,9 @@ ix86_addr_space_zero_address_valid (addr_space_t a #undef TARGET_FOLD_BUILTIN #define TARGET_FOLD_BUILTIN ix86_fold_builtin +#undef TARGET_GIMPLE_FOLD_BUILTIN +#define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin + #undef TARGET_COMPARE_VERSION_PRIORITY #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 241451) +++ config/i386/i386.md (working copy) @@ -174,7 +174,11 @@ ;; For CRC32 support UNSPEC_CRC32 + ;; For LZCNT suppoprt + UNSPEC_LZCNT + ;; For BMI support + UNSPEC_TZCNT UNSPEC_BEXTR ;; For BMI2 support @@ -12810,9 +12814,9 @@ (define_expand "ctz<mode>2" [(parallel - [(set (match_operand:SWI248 0 "register_operand") - (ctz:SWI248 - (match_operand:SWI248 1 "nonimmediate_operand"))) + [(set (match_operand:SWI48 0 "register_operand") + (ctz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand"))) (clobber (reg:CC FLAGS_REG))])]) ; False dependency happens when destination is only updated by tzcnt, @@ -12860,8 +12864,8 @@ (set_attr "mode" "<MODE>")]) (define_insn "*ctz<mode>2" - [(set (match_operand:SWI248 0 "register_operand" "=r") - (ctz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) + [(set (match_operand:SWI48 0 "register_operand" "=r") + (ctz:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] "" { @@ -12886,15 +12890,78 @@ (const_string "0"))) (set_attr "mode" "<MODE>")]) +;; Version of tzcnt that is expanded from intrinsics. This version provides +;; operand size as output when source operand is zero. + +(define_expand "bmi_tzcnt_<mode>" + [(parallel + [(set (match_operand:SWI248 0 "register_operand") + (unspec:SWI248 + [(match_operand:SWI248 1 "nonimmediate_operand")] + UNSPEC_TZCNT)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_BMI") + +; False dependency happens when destination is only updated by tzcnt, +; lzcnt or popcnt. There is no false dependency when destination is +; also used in source. +(define_insn_and_split "*bmi_tzcnt_<mode>_falsedep_1" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 + [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] + UNSPEC_TZCNT)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI + && TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 0) + (unspec:SWI48 [(match_dup 1)] UNSPEC_TZCNT)) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))])] +{ + if (!reg_mentioned_p (operands[0], operands[1])) + ix86_expand_clear (operands[0]); +}) + +(define_insn "*bmi_tzcnt_<mode>_falsedep" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 + [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] + UNSPEC_TZCNT)) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "prefix_rep" "1") + (set_attr "mode" "<MODE>")]) + +(define_insn "*bmi_tzcnt_<mode>" + [(set (match_operand:SWI248 0 "register_operand" "=r") + (unspec:SWI248 + [(match_operand:SWI248 1 "nonimmediate_operand" "rm")] + UNSPEC_TZCNT)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "prefix_rep" "1") + (set_attr "mode" "<MODE>")]) + (define_expand "clz<mode>2" [(parallel - [(set (match_operand:SWI248 0 "register_operand") - (minus:SWI248 + [(set (match_operand:SWI48 0 "register_operand") + (minus:SWI48 (match_dup 2) - (clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand")))) + (clz:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")))) (clobber (reg:CC FLAGS_REG))]) (parallel - [(set (match_dup 0) (xor:SWI248 (match_dup 0) (match_dup 2))) + [(set (match_dup 0) (xor:SWI48 (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "" { @@ -12908,9 +12975,9 @@ (define_expand "clz<mode>2_lzcnt" [(parallel - [(set (match_operand:SWI248 0 "register_operand") - (clz:SWI248 - (match_operand:SWI248 1 "nonimmediate_operand"))) + [(set (match_operand:SWI48 0 "register_operand") + (clz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand"))) (clobber (reg:CC FLAGS_REG))])] "TARGET_LZCNT") @@ -12947,8 +13014,8 @@ (set_attr "mode" "<MODE>")]) (define_insn "*clz<mode>2_lzcnt" - [(set (match_operand:SWI248 0 "register_operand" "=r") - (clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) + [(set (match_operand:SWI48 0 "register_operand" "=r") + (clz:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] "TARGET_LZCNT" "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" @@ -12956,6 +13023,69 @@ (set_attr "type" "bitmanip") (set_attr "mode" "<MODE>")]) +;; Version of lzcnt that is expanded from intrinsics. This version provides +;; operand size as output when source operand is zero. + +(define_expand "lzcnt_<mode>" + [(parallel + [(set (match_operand:SWI248 0 "register_operand") + (unspec:SWI248 + [(match_operand:SWI248 1 "nonimmediate_operand")] + UNSPEC_LZCNT)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_LZCNT") + +; False dependency happens when destination is only updated by tzcnt, +; lzcnt or popcnt. There is no false dependency when destination is +; also used in source. +(define_insn_and_split "*lzcnt_<mode>_falsedep_1" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 + [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] + UNSPEC_LZCNT)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_LZCNT + && TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 0) + (unspec:SWI48 [(match_dup 1)] UNSPEC_LZCNT)) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))])] +{ + if (!reg_mentioned_p (operands[0], operands[1])) + ix86_expand_clear (operands[0]); +}) + +(define_insn "*lzcnt_<mode>_falsedep" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 + [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] + UNSPEC_LZCNT)) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))] + "TARGET_LZCNT" + "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "prefix_rep" "1") + (set_attr "mode" "<MODE>")]) + +(define_insn "*lzcnt_<mode>" + [(set (match_operand:SWI248 0 "register_operand" "=r") + (unspec:SWI248 + [(match_operand:SWI248 1 "nonimmediate_operand" "rm")] + UNSPEC_LZCNT)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_LZCNT" + "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "prefix_rep" "1") + (set_attr "mode" "<MODE>")]) + ;; BMI instructions. (define_insn "*bmi_andn_<mode>" [(set (match_operand:SWI48 0 "register_operand" "=r,r") Index: config/i386/lzcntintrin.h =================================================================== --- config/i386/lzcntintrin.h (revision 241451) +++ config/i386/lzcntintrin.h (working copy) @@ -38,19 +38,19 @@ extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __lzcnt16 (unsigned short __X) { - return __builtin_clzs (__X); + return __builtin_ia32_lzcnt_u16 (__X); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __lzcnt32 (unsigned int __X) { - return __builtin_clz (__X); + return __builtin_ia32_lzcnt_u32 (__X); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _lzcnt_u32 (unsigned int __X) { - return __builtin_clz (__X); + return __builtin_ia32_lzcnt_u32 (__X); } #ifdef __x86_64__ @@ -57,13 +57,13 @@ _lzcnt_u32 (unsigned int __X) extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __lzcnt64 (unsigned long long __X) { - return __builtin_clzll (__X); + return __builtin_ia32_lzcnt_u64 (__X); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _lzcnt_u64 (unsigned long long __X) { - return __builtin_clzll (__X); + return __builtin_ia32_lzcnt_u64 (__X); } #endif Index: testsuite/gcc.target/i386/bmi-6.c =================================================================== --- testsuite/gcc.target/i386/bmi-6.c (revision 241451) +++ testsuite/gcc.target/i386/bmi-6.c (working copy) @@ -1,4 +1,5 @@ /* { dg-do link } */ +/* { dg-xfail-if "PR 78057" { "*-*-*" } { "*" } { "" } } */ /* { dg-options "-O2 -mbmi" } */ #include <x86intrin.h> Index: testsuite/gcc.target/i386/pr78037.c =================================================================== --- testsuite/gcc.target/i386/pr78037.c (nonexistent) +++ testsuite/gcc.target/i386/pr78037.c (working copy) @@ -0,0 +1,21 @@ +/* { dg-do run } */ +/* { dg-require-effective-target bmi } */ +/* { dg-options "-O2 -mbmi" } */ + +#include <x86intrin.h> + +#include "bmi-check.h" + +int +__attribute__((noinline, noclone)) +foo (int x) +{ + return __tzcnt_u32 (x) & 0x1f; +} + +static void +bmi_test () +{ + if (foo (0) != 0) + abort (); +} Index: testsuite/gcc.target/i386/pr78057.c =================================================================== --- testsuite/gcc.target/i386/pr78057.c (nonexistent) +++ testsuite/gcc.target/i386/pr78057.c (working copy) @@ -0,0 +1,42 @@ +/* PR target/78057 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mbmi -mlzcnt -fdump-tree-optimized" } */ + +extern void link_error (void); + +int +foo (int x) +{ + if (__builtin_ia32_tzcnt_u16 (16) != 4 + || __builtin_ia32_tzcnt_u16 (0) != 16 + || __builtin_ia32_lzcnt_u16 (0x1ff) != 7 + || __builtin_ia32_lzcnt_u16 (0) != 16 + || __builtin_ia32_tzcnt_u32 (8) != 3 + || __builtin_ia32_tzcnt_u32 (0) != 32 + || __builtin_ia32_lzcnt_u32 (0x3fffffff) != 2 + || __builtin_ia32_lzcnt_u32 (0) != 32 +#ifdef __x86_64__ + || __builtin_ia32_tzcnt_u64 (4) != 2 + || __builtin_ia32_tzcnt_u64 (0) != 64 + || __builtin_ia32_lzcnt_u64 (0x1fffffff) != 35 + || __builtin_ia32_lzcnt_u64 (0) != 64 +#endif + ) + link_error (); + x += 2; + if (x == 0) + return 5; + return __builtin_ia32_tzcnt_u32 (x) + + __builtin_ia32_lzcnt_u32 (x) +#ifdef __x86_64__ + + __builtin_ia32_tzcnt_u64 (x) + + __builtin_ia32_lzcnt_u64 (x) +#endif + ; +} + +/* { dg-final { scan-tree-dump-not "link_error" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "__builtin_ia32_\[lt]zcnt" "optimized" } } */ +/* { dg-final { scan-tree-dump-times "__builtin_ctz " 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "__builtin_clz " 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "__builtin_ctzll " 1 "optimized" { target lp64 } } } */