On 2/6/18 6:42 AM, Peter Bergner wrote:
> On 2/5/18 10:48 PM, David Edelsohn wrote:
>> On Mon, Feb 5, 2018 at 9:43 PM, Peter Bergner <berg...@vnet.ibm.com> wrote:
>>> I did also try calling expand_divmod() here which did generate correct
>>> code, the problem was that it wasn't as clean/optimized as the change
>>> to gen_divdi3.
>>
>> Why not fix it at the site of the call to gen_divdi3 instead of the
>> divdi3 pattern?
> 
> Well as I said above, I did try that and we got worse code.  That said,
> I unconditionally called expand_divmod() instead of calling gen_divdi3()
> when we can (TARGET_POWERPC64).  Let me retry with that change and see
> what kind of code gen we get.

Ok, calling expand_divmod() in the !TARGET_POWERPC64 case still generates
worse code.  However, if I instead explicitly generate the call to the
div/udiv lib functions, then I seem to get the same code as the gen_*
patch gave...at least for my unit tests I've been using.  Let me
bootstrap/regtest the following, which I assume you're more happy with?

Peter

gcc/
        PR target/83926
        * config/rs6000/vsx.md (vsx_mul_v2di): Handle generating a 64-bit
        multiply in 32-bit mode.
        (vsx_div_v2di): Handle generating a 64-bit signed divide in 32-bit mode.
        (vsx_udiv_v2di): Handle generating a 64-bit unsigned divide in 32-bit
        mode.

gcc/testsuite/
        PR target/83926
        * gcc.target/powerpc/pr83926.c: New test.

Index: vsx.md
===================================================================
--- vsx.md      (revision 257390)
+++ vsx.md      (working copy)
@@ -1650,10 +1650,22 @@
   rtx op5 = gen_reg_rtx (DImode);
   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
-  emit_insn (gen_muldi3 (op5, op3, op4));
+  if (TARGET_POWERPC64)
+    emit_insn (gen_muldi3 (op5, op3, op4));
+  else
+    {
+      rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
+      emit_move_insn (op5, ret);
+    }
   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
-  emit_insn (gen_muldi3 (op3, op3, op4));
+  if (TARGET_POWERPC64)
+    emit_insn (gen_muldi3 (op3, op3, op4));
+  else
+    {
+      rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
+      emit_move_insn (op3, ret);
+    }
   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
   DONE;
 }"
@@ -1688,10 +1700,30 @@
   rtx op5 = gen_reg_rtx (DImode);
   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
-  emit_insn (gen_divdi3 (op5, op3, op4));
+  if (TARGET_POWERPC64)
+    emit_insn (gen_divdi3 (op5, op3, op4));
+  else
+    {
+      rtx libfunc = optab_libfunc (sdiv_optab, DImode);
+      rtx target = emit_library_call_value (libfunc,
+                                           op5, LCT_NORMAL, DImode,
+                                           op3, DImode,
+                                           op4, DImode);
+      emit_move_insn (op5, target);
+    }
   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
-  emit_insn (gen_divdi3 (op3, op3, op4));
+  if (TARGET_POWERPC64)
+    emit_insn (gen_divdi3 (op3, op3, op4));
+  else
+    {
+      rtx libfunc = optab_libfunc (sdiv_optab, DImode);
+      rtx target = emit_library_call_value (libfunc,
+                                           op3, LCT_NORMAL, DImode,
+                                           op3, DImode,
+                                           op4, DImode);
+      emit_move_insn (op3, target);
+    }
   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
   DONE;
 }"
@@ -1716,10 +1748,30 @@
   rtx op5 = gen_reg_rtx (DImode);
   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
-  emit_insn (gen_udivdi3 (op5, op3, op4));
+  if (TARGET_POWERPC64)
+    emit_insn (gen_udivdi3 (op5, op3, op4));
+  else
+    {
+      rtx libfunc = optab_libfunc (udiv_optab, DImode);
+      rtx target = emit_library_call_value (libfunc,
+                                           op5, LCT_NORMAL, DImode,
+                                           op3, DImode,
+                                           op4, DImode);
+      emit_move_insn (op5, target);
+    }
   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
-  emit_insn (gen_udivdi3 (op3, op3, op4));
+  if (TARGET_POWERPC64)
+    emit_insn (gen_udivdi3 (op3, op3, op4));
+  else
+    {
+      rtx libfunc = optab_libfunc (udiv_optab, DImode);
+      rtx target = emit_library_call_value (libfunc,
+                                           op3, LCT_NORMAL, DImode,
+                                           op3, DImode,
+                                           op4, DImode);
+      emit_move_insn (op3, target);
+    }
   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
   DONE;
 }"

Reply via email to