> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index 2cecf45..9d6983b 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -7131,6 +7131,8 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int*
> total, bool speed)
> *total = COSTS_N_INSNS (2);
> else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
> *total = COSTS_N_INSNS (4);
> + else if (mode == DImode)
> + *total = COSTS_N_INSNS (50);
> else
> *total = COSTS_N_INSNS (20);
> return false;
> diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
> index d48a465..b5627c2 100644
> --- a/gcc/config/mips/mips.c
> +++ b/gcc/config/mips/mips.c
> @@ -3846,7 +3846,13 @@ mips_rtx_costs (rtx x, int code, int outer_code, int
> opno ATTRIBUTE_UNUSED,
> *total = COSTS_N_INSNS (mips_idiv_insns ());
> }
> else if (mode == DImode)
> - *total = mips_cost->int_div_di;
> + {
> + if (!TARGET_64BIT)
> + /* Divide double integer library call is expensive. */
> + *total = COSTS_N_INSNS (200);
> + else
> + *total = mips_cost->int_div_di;
> + }
> else
> *total = mips_cost->int_div_si;
> return false;
> diff --git a/gcc/expmed.c b/gcc/expmed.c
> index aa24fbf..5f4c921 100644
> --- a/gcc/expmed.c
> +++ b/gcc/expmed.c
> @@ -3523,6 +3523,105 @@ expand_mult_highpart_optab (enum machine_mode mode,
> rtx op0, rtx op1,
> }
> }
>
> + if (unsignedp && (!optimize_size && (optimize>1))
> + && (size - 1 > BITS_PER_WORD
> + && BITS_PER_WORD == 32 && GET_MODE_BITSIZE (mode) ==
> 2*BITS_PER_WORD)
These references to 32-bits are still wrong (and unnecessary, just
remove them).
> + && (4 * mul_cost[speed][mode] + 4 * add_cost[speed][mode]
> + + shift_cost[speed][mode][31] < max_cost))
> + {
> + unsigned HOST_WIDE_INT d;
> + rtx x1, x0, y1, y0, z2, z0, tmp, u0, u0tmp, u1, c, c1, ccst, cres,
> result;
> +
> + d = (INTVAL (op1) & GET_MODE_MASK (mode));
This could be a CONST_DOUBLE. But you don't need "d", because you can...
> + /* Extracting the higher part of the 64-bit multiplier. */
> + x1 = gen_highpart (word_mode, op0);
> + x1 = force_reg (word_mode, x1);
> +
> + /* Extracting the lower part of the 64-bit multiplier. */
> + x0 = gen_lowpart (word_mode, op0);
> + x0 = force_reg (word_mode, x0);
> +
> + /* Splitting the 64-bit constant for the higher and the lower parts.
> */
> + y0 = gen_int_mode(d & UINT32_MAX, word_mode);
> + y1 = gen_int_mode(d >> 32, word_mode);
... use gen_lowpart and gen_highpart directly on op1.
> +
> + z2 = gen_reg_rtx (mode);
> + u0 = gen_reg_rtx (mode);
> +
> + /* Unsigned multiplication of the higher multiplier part
> + and the higher constant part. */
> + z2 = expand_widening_mult (mode, x1, y1, z2, 1, umul_widen_optab);
> + /* Unsigned multiplication of the lower multiplier part
> + and the higher constant part. */
> + u0 = expand_widening_mult (mode, x0, y1, u0, 1, umul_widen_optab);
> +
> + z0 = gen_reg_rtx (mode);
> + u1 = gen_reg_rtx (mode);
> +
> + /* Unsigned multiplication of the lower multiplier part
> + and the lower constant part. */
> + z0 = expand_widening_mult (mode, x0, y0, z0, 1, umul_widen_optab);
> +
> + /* Unsigned multiplication of the higher multiplier part
> + the lower constant part. */
> + u1 = expand_widening_mult (mode, x1, y0, u1, 1, umul_widen_optab);
Up to here the comments are not necessary.
> + /* Getting the higher part of multiplication between the lower
> multiplier
> + part and the lower constant part, the lower part is not interesting
> + for the final result. */
> + u0tmp = gen_highpart (word_mode, z0);
> + u0tmp = force_reg (word_mode, u0tmp);
> + u0tmp = convert_to_mode (mode, u0tmp, 1);
> +
> + /* Adding the higher part of multiplication between the lower
> multiplier
> + part and the lower constant part to the result of multiplication
> between
> + the lower multiplier part and the higher constant part. Please note,
> + that we couldn't get overflow here since in the worst case
> + (0xffffffff*0xffffffff)+0xffffffff we get 0xffffffff00000000L. */
The command can simply be "compute the middle word of the three-word
intermediate result." Also it's not overflow, it's carry.
> + expand_inc (u0, u0tmp);
> + tmp = gen_reg_rtx (mode);
> +
> + /* Adding multiplication between the lower multiplier part and the
> higher
> + constant part with the higher part of multiplication between the lower
> + multiplier part and the lower constant part to the result of
> multiplication
> + between the higher multiplier part and the lower constant part. */
Here you have to explain:
/* We have to return
z2 + ((u0 + u1) >> GET_MODE_BITSIZE (word_mode)).
u0 + u1 are the upper two words of the three-word
intermediate result and they could have up to
2 * GET_MODE_BITSIZE (word_mode) + 1 bits of precision.
We compute the extra bit by checking for carry, and add
1 << GET_MODE_BITSIZE (word_mode) to z2 if there is carry. */
> + tmp = expand_binop (mode, add_optab, u0, u1, tmp, 1, OPTAB_LIB_WIDEN);
> + if (!tmp)
> + return 0;
/* We have to return z2 + (tmp >> 32). We need
> + /* Checking for overflow. */
This is not overflow, it's carry (see above).
> + c = gen_reg_rtx (mode);
> + c1 = gen_reg_rtx (mode);
> + cres = gen_reg_rtx (mode);
> +
> + emit_store_flag_force (c, GT, u0, tmp, mode, 1, 1);
> + emit_store_flag_force (c1, GT, u1, tmp, mode, 1, 1);
> + result = expand_binop (mode, ior_optab, c, c1, cres, 1,
> OPTAB_LIB_WIDEN);
> + if (!result)
> + return 0;
> +
> + ccst = gen_reg_rtx (mode);
> + ccst = expand_shift (LSHIFT_EXPR, mode, cres, 32, ccst, 1);
This 32 should be GET_MODE_BITSIZE (word_mode).
> +
> + /* Adding 0x10000000 in case of overflow to the result of
> multiplication
One 0 missing in the constant.
> + between the higher multiplier part and the higher constant part.
> Please note,
> + that we don't have to check for overflow here because in the worst case
> + (0xffffffff*0xffffffff) + 0x100000000 equals to 0xffffffff00000001L.
> */
Again, s/overflow/carry/.
> + expand_inc (z2, ccst);
> + /* Extracting the higher part of the sum. */
> + tmp = gen_highpart (word_mode, tmp);
> + tmp = force_reg (word_mode, tmp);
> + tmp = convert_to_mode (mode, tmp, 1);
> +
> + /* The final result, again we don't have to check for overflow here.
> */
> + expand_inc (z2, tmp);
> +
> + return z2;
> +
> + }
> +
> /* Try widening multiplication of opposite signedness, and adjust. */
> moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
> if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
>