Hi, This patch adds the half-to-double conversions, both as library functions, or when supported in hardware, using the appropriate instructions.
That means adding support for the __gnu_d2h_{ieee/alternative} library calls added in patch 2/4, and providing a more aggressive truncdfhf2 where we can. This also lets us remove the implementation of TARGET_CONVERT_TO_TYPE. Bootstrapped on an ARMv8-A machine,and crosstested with no issues. OK? Thanks, James --- gcc/ 2016-11-09 James Greenhalgh <james.greenha...@arm.com> * config/arm/arm.c (arm_convert_to_type): Delete. (TARGET_CONVERT_TO_TYPE): Delete. (arm_init_libfuncs): Enable trunc_optab from DFmode to HFmode. (arm_libcall_uses_aapcs_base): Add trunc_optab from DF- to HFmode. * config/arm/arm.h (TARGET_FP16_TO_DOUBLE): New. * config/arm/arm.md (truncdfhf2): Only convert through SFmode if we are in fast math mode, and have no single step hardware instruction. (extendhfdf2): Only expand through SFmode if we don't have a single-step hardware instruction. * config/arm/vfp.md (*truncdfhf2): New. (extendhfdf2): Likewise. gcc/testsuite/ 2016-11-09 James Greenhalgh <james.greenha...@arm.com> * gcc.target/arm/fp16-rounding-alt-1.c (ROUNDED): Change expected result. * gcc.target/arm/fp16-rounding-ieee-1.c (ROUNDED): Change expected result.
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 239117f..b9097c5 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -245,7 +245,6 @@ static bool arm_output_addr_const_extra (FILE *, rtx); static bool arm_allocate_stack_slots_for_args (void); static bool arm_warn_func_return (tree); static tree arm_promoted_type (const_tree t); -static tree arm_convert_to_type (tree type, tree expr); static bool arm_scalar_mode_supported_p (machine_mode); static bool arm_frame_pointer_required (void); static bool arm_can_eliminate (const int, const int); @@ -654,9 +653,6 @@ static const struct attribute_spec arm_attribute_table[] = #undef TARGET_PROMOTED_TYPE #define TARGET_PROMOTED_TYPE arm_promoted_type -#undef TARGET_CONVERT_TO_TYPE -#define TARGET_CONVERT_TO_TYPE arm_convert_to_type - #undef TARGET_SCALAR_MODE_SUPPORTED_P #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p @@ -2535,6 +2531,11 @@ arm_init_libfuncs (void) ? "__gnu_h2f_ieee" : "__gnu_h2f_alternative")); + set_conv_libfunc (trunc_optab, HFmode, DFmode, + (arm_fp16_format == ARM_FP16_FORMAT_IEEE + ? "__gnu_d2h_ieee" + : "__gnu_d2h_alternative")); + /* Arithmetic. */ set_optab_libfunc (add_optab, HFmode, NULL); set_optab_libfunc (sdiv_optab, HFmode, NULL); @@ -5262,6 +5263,8 @@ arm_libcall_uses_aapcs_base (const_rtx libcall) SFmode)); add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode, DFmode)); + add_libcall (libcall_htab, + convert_optab_libfunc (trunc_optab, HFmode, DFmode)); } return libcall && libcall_htab->find (libcall) != NULL; @@ -22517,23 +22520,6 @@ arm_promoted_type (const_tree t) return NULL_TREE; } -/* Implement TARGET_CONVERT_TO_TYPE. - Specifically, this hook implements the peculiarity of the ARM - half-precision floating-point C semantics that requires conversions between - __fp16 to or from double to do an intermediate conversion to float. */ - -static tree -arm_convert_to_type (tree type, tree expr) -{ - tree fromtype = TREE_TYPE (expr); - if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type)) - return NULL_TREE; - if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32) - || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32)) - return convert (type, convert (float_type_node, expr)); - return NULL_TREE; -} - /* Implement TARGET_SCALAR_MODE_SUPPORTED_P. This simply adds HFmode as a supported mode; even though we don't implement arithmetic on this type directly, it's supported by diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index c8d7462..e759720 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -179,6 +179,11 @@ extern void (*arm_lang_output_object_attributes_hook)(void); #define TARGET_FP16 \ (ARM_FPU_FSET_HAS (TARGET_FPU_FEATURES, FPU_FL_FP16)) +/* FPU supports converting between HFmode and DFmode in a single hardware + step. */ +#define TARGET_FP16_TO_DOUBLE \ + (TARGET_HARD_FLOAT && (TARGET_FP16 && TARGET_VFP5)) + /* FPU supports fused-multiply-add operations. */ #define TARGET_FMA (TARGET_FPU_REV >= 4) diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 8393f65..4074773 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -5177,20 +5177,35 @@ "" ) -;; DFmode to HFmode conversions have to go through SFmode. +;; DFmode to HFmode conversions on targets without a single-step hardware +;; instruction for it would have to go through SFmode. This is dangerous +;; as it introduces double rounding. +;; +;; Disable this pattern unless we are in an unsafe math mode, or we have +;; a single-step instruction. + (define_expand "truncdfhf2" - [(set (match_operand:HF 0 "general_operand" "") + [(set (match_operand:HF 0 "s_register_operand" "") (float_truncate:HF - (match_operand:DF 1 "general_operand" "")))] - "TARGET_EITHER" - " - { - rtx op1; - op1 = convert_to_mode (SFmode, operands[1], 0); - op1 = convert_to_mode (HFmode, op1, 0); - emit_move_insn (operands[0], op1); - DONE; - }" + (match_operand:DF 1 "s_register_operand" "")))] + "(TARGET_EITHER && flag_unsafe_math_optimizations) + || (TARGET_32BIT && TARGET_FP16_TO_DOUBLE)" +{ + /* We don't have a direct instruction for this, so we must be in + an unsafe math mode, and going via SFmode. */ + + if (!(TARGET_32BIT && TARGET_FP16_TO_DOUBLE)) + { + rtx op1; + gcc_assert (flag_unsafe_math_optimizations); + op1 = convert_to_mode (SFmode, operands[1], 0); + op1 = convert_to_mode (HFmode, op1, 0); + emit_move_insn (operands[0], op1); + DONE; + } + /* Otherwise, we will pick this up as a single instruction with + no intermediary rounding. */ +} ) ;; Zero and sign extension instructions. @@ -5684,19 +5699,28 @@ "" ) -;; HFmode -> DFmode conversions have to go through SFmode. +;; HFmode -> DFmode conversions where we don't have an instruction for it +;; must go through SFmode. +;; +;; This is always safe for an extend. + (define_expand "extendhfdf2" - [(set (match_operand:DF 0 "general_operand" "") - (float_extend:DF (match_operand:HF 1 "general_operand" "")))] + [(set (match_operand:DF 0 "s_register_operand" "") + (float_extend:DF (match_operand:HF 1 "s_register_operand" "")))] "TARGET_EITHER" - " - { - rtx op1; - op1 = convert_to_mode (SFmode, operands[1], 0); - op1 = convert_to_mode (DFmode, op1, 0); - emit_insn (gen_movdf (operands[0], op1)); - DONE; - }" +{ + /* We don't have a direct instruction for this, so go via SFmode. */ + if (!(TARGET_32BIT && TARGET_FP16_TO_DOUBLE)) + { + rtx op1; + op1 = convert_to_mode (SFmode, operands[1], 0); + op1 = convert_to_mode (DFmode, op1, 0); + emit_insn (gen_movdf (operands[0], op1)); + DONE; + } + /* Otherwise, we're done producing RTL and will pick up the correct + pattern to do this with one rounding-step in a single instruction. */ +} ) ;; Move insns (including loads and stores) diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index 2051f10..d417e92 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -1507,6 +1507,26 @@ (set_attr "type" "f_cvt")] ) +(define_insn "*truncdfhf2" + [(set (match_operand:HF 0 "s_register_operand" "=t") + (float_truncate:HF (match_operand:DF 1 "s_register_operand" "w")))] + "TARGET_32BIT && TARGET_FP16_TO_DOUBLE" + "vcvtb%?.f16.f64\\t%0, %P1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "f_cvt")] +) + +(define_insn "*extendhfdf2" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (float_extend:DF (match_operand:HF 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_FP16_TO_DOUBLE" + "vcvtb%?.f64.f16\\t%P0, %1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "f_cvt")] +) + (define_insn "truncsfhf2" [(set (match_operand:HF 0 "s_register_operand" "=t") (float_truncate:HF (match_operand:SF 1 "s_register_operand" "t")))] diff --git a/gcc/testsuite/gcc.target/arm/fp16-rounding-alt-1.c b/gcc/testsuite/gcc.target/arm/fp16-rounding-alt-1.c index 1c15b61..27bb40d 100644 --- a/gcc/testsuite/gcc.target/arm/fp16-rounding-alt-1.c +++ b/gcc/testsuite/gcc.target/arm/fp16-rounding-alt-1.c @@ -1,6 +1,6 @@ -/* Test intermediate rounding of double to float and then to __fp16, using - an example of a number that would round differently if it went directly - from double to __fp16. */ +/* Test that rounding double to __fp16 happens directly, using an example + of a number that would round differently if it went from double to + __fp16 via float. */ /* { dg-do run } */ /* { dg-require-effective-target arm_fp16_alternative_ok } */ @@ -11,8 +11,8 @@ /* The original double value. */ #define ORIG 0x1.0020008p0 -/* The expected (double)((__fp16)((float)ORIG)) value. */ -#define ROUNDED 0x1.0000000p0 +/* The expected (double)((__fp16)ORIG) value. */ +#define ROUNDED 0x1.0040000p0 typedef union u { __fp16 f; diff --git a/gcc/testsuite/gcc.target/arm/fp16-rounding-ieee-1.c b/gcc/testsuite/gcc.target/arm/fp16-rounding-ieee-1.c index 866d4d8..194dc9d 100644 --- a/gcc/testsuite/gcc.target/arm/fp16-rounding-ieee-1.c +++ b/gcc/testsuite/gcc.target/arm/fp16-rounding-ieee-1.c @@ -1,6 +1,6 @@ -/* Test intermediate rounding of double to float and then to __fp16, using - an example of a number that would round differently if it went directly - from double to __fp16. */ +/* Test that rounding double to __fp16 happens directly, using an example + of a number that would round differently if it went from double to + __fp16 via float. */ /* { dg-do run } */ /* { dg-options "-mfp16-format=ieee" } */ @@ -10,8 +10,8 @@ /* The original double value. */ #define ORIG 0x1.0020008p0 -/* The expected (double)((__fp16)((float)ORIG)) value. */ -#define ROUNDED 0x1.0000000p0 +/* The expected (double)((__fp16)ORIG) value. */ +#define ROUNDED 0x1.0040000p0 typedef union u { __fp16 f;