On 2/6/18 10:36 AM, Peter Bergner wrote: > On 2/6/18 10:20 AM, David Edelsohn wrote: >> Do the gen_XXXdi3 calls work if you use SDI iterator instead of GPR >> iterator, as Segher suggested? > > Well it works _if_ we use the first patch that changes the gen_* > patterns. If we go this route, I agree we should use the SDI > iterator instead of GPR.
Actually, my bad. While bootstrapping this on a BE system, we get an error when we attempt a 64-bit multiply in 32-bit mode. In this case, the gen_muldi3() pattern calls expand_mult(DImode, ...) and the automatic expand machinery notices the gen_muldi3() now allows DImode in the !TARGET_POWERPC64 case and then calls gen_muldi3() to emit the multiply and we go into infinite recursion. We don't have that problem in the div/udiv case, because we call out to the lib routines, so no recursion. Given this, I think we should probably go with the patch that modifies vsx.md and guards the calls to gen_{div,udiv,mul}di3() with a TARGET_POWERPC64 test. For completeness, that patch again is below with one testsuite addition. The builtins-1-be.c test case must never have been tested in 32-bit mode, since it was always ICEing from the beginning. I've fixed it to run in both 32-bit and 64-bit modes and in 32-bit mode, it now correctly scans for the 64-bit div/udiv/mul cases this patch generates. Again, this passed bootstrap and regtesting on powerpc64le-linux as well as on powerpc64-linux and running the testsuite in both 32-bit and 64-bit modes. Ok for trunk? Peter gcc/ PR target/83926 * config/rs6000/vsx.md (vsx_mul_v2di): Handle generating a 64-bit multiply in 32-bit mode. (vsx_div_v2di): Handle generating a 64-bit signed divide in 32-bit mode. (vsx_udiv_v2di): Handle generating a 64-bit unsigned divide in 32-bit mode. gcc/testsuite/ PR target/83926 * gcc.target/powerpc/pr83926.c: New test. * gcc.target/powerpc/builtins-1-be.c: Filter out gimple folding disabled message. Fix test for running in 32-bit mode. Index: gcc/config/rs6000/vsx.md =================================================================== --- gcc/config/rs6000/vsx.md (revision 257390) +++ gcc/config/rs6000/vsx.md (working copy) @@ -1650,10 +1650,22 @@ rtx op5 = gen_reg_rtx (DImode); emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); - emit_insn (gen_muldi3 (op5, op3, op4)); + if (TARGET_POWERPC64) + emit_insn (gen_muldi3 (op5, op3, op4)); + else + { + rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false); + emit_move_insn (op5, ret); + } emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); - emit_insn (gen_muldi3 (op3, op3, op4)); + if (TARGET_POWERPC64) + emit_insn (gen_muldi3 (op3, op3, op4)); + else + { + rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false); + emit_move_insn (op3, ret); + } emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); DONE; }" @@ -1688,10 +1700,30 @@ rtx op5 = gen_reg_rtx (DImode); emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); - emit_insn (gen_divdi3 (op5, op3, op4)); + if (TARGET_POWERPC64) + emit_insn (gen_divdi3 (op5, op3, op4)); + else + { + rtx libfunc = optab_libfunc (sdiv_optab, DImode); + rtx target = emit_library_call_value (libfunc, + op5, LCT_NORMAL, DImode, + op3, DImode, + op4, DImode); + emit_move_insn (op5, target); + } emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); - emit_insn (gen_divdi3 (op3, op3, op4)); + if (TARGET_POWERPC64) + emit_insn (gen_divdi3 (op3, op3, op4)); + else + { + rtx libfunc = optab_libfunc (sdiv_optab, DImode); + rtx target = emit_library_call_value (libfunc, + op3, LCT_NORMAL, DImode, + op3, DImode, + op4, DImode); + emit_move_insn (op3, target); + } emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); DONE; }" @@ -1716,10 +1748,30 @@ rtx op5 = gen_reg_rtx (DImode); emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); - emit_insn (gen_udivdi3 (op5, op3, op4)); + if (TARGET_POWERPC64) + emit_insn (gen_udivdi3 (op5, op3, op4)); + else + { + rtx libfunc = optab_libfunc (udiv_optab, DImode); + rtx target = emit_library_call_value (libfunc, + op5, LCT_NORMAL, DImode, + op3, DImode, + op4, DImode); + emit_move_insn (op5, target); + } emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); - emit_insn (gen_udivdi3 (op3, op3, op4)); + if (TARGET_POWERPC64) + emit_insn (gen_udivdi3 (op3, op3, op4)); + else + { + rtx libfunc = optab_libfunc (udiv_optab, DImode); + rtx target = emit_library_call_value (libfunc, + op3, LCT_NORMAL, DImode, + op3, DImode, + op4, DImode); + emit_move_insn (op3, target); + } emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); DONE; }" Index: gcc/testsuite/gcc.target/powerpc/pr83926.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/pr83926.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/pr83926.c (working copy) @@ -0,0 +1,22 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-options "-O2 -mcpu=power8 -mno-fold-gimple" } */ + +__attribute__ ((altivec(vector__))) long long +sdiv (__attribute__ ((altivec(vector__))) long long a, + __attribute__ ((altivec(vector__))) long long b) +{ + return __builtin_vsx_div_2di (a, b); +} +__attribute__ ((altivec(vector__))) unsigned long long +udiv (__attribute__ ((altivec(vector__))) unsigned long long a, + __attribute__ ((altivec(vector__))) unsigned long long b) +{ + return __builtin_vsx_udiv_2di (a, b); +} +__attribute__ ((altivec(vector__))) long long +smul (__attribute__ ((altivec(vector__))) long long a, + __attribute__ ((altivec(vector__))) long long b) +{ + return __builtin_vsx_mul_2di (a, b); +} Index: gcc/testsuite/gcc.target/powerpc/builtins-1-be.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/builtins-1-be.c (revision 257390) +++ gcc/testsuite/gcc.target/powerpc/builtins-1-be.c (working copy) @@ -1,6 +1,7 @@ /* { dg-do compile { target { powerpc64-*-* } } } */ /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ /* { dg-options "-mcpu=power8 -O0 -mno-fold-gimple" } */ +/* { dg-prune-output "gimple folding of rs6000 builtins has been disabled." } */ /* Test that a number of newly added builtin overloads are accepted by the compiler. */ @@ -22,10 +23,10 @@ vec_ctf xvmuldp vec_cts xvcvdpsxds, vctsxs vec_ctu xvcvdpuxds, vctuxs - vec_div divd, divdu + vec_div divd, divdu | __divdi3(), __udivdi3() vec_mergel vmrghb, vmrghh, xxmrghw vec_mergeh xxmrglw, vmrglh - vec_mul mulld + vec_mul mulld | mullw, mulhwu vec_nor xxlnor vec_or xxlor vec_packsu vpksdus @@ -49,21 +50,26 @@ /* { dg-final { scan-assembler-times "vctsxs" 1 } } */ /* { dg-final { scan-assembler-times "xvcvdpuxds" 1 } } */ /* { dg-final { scan-assembler-times "vctuxs" 1 } } */ -/* { dg-final { scan-assembler-times "divd" 4 } } */ -/* { dg-final { scan-assembler-times "divdu" 2 } } */ /* { dg-final { scan-assembler-times "vmrghb" 0 } } */ /* { dg-final { scan-assembler-times "vmrghh" 3 } } */ /* { dg-final { scan-assembler-times "xxmrghw" 1 } } */ /* { dg-final { scan-assembler-times "xxmrglw" 4 } } */ /* { dg-final { scan-assembler-times "vmrglh" 4 } } */ -/* { dg-final { scan-assembler-times "mulld" 4 } } */ -/* { dg-final { scan-assembler-times "xxlnor" 19 } } */ -/* { dg-final { scan-assembler-times "xxlor" 14 } } */ +/* { dg-final { scan-assembler-times "xxlnor" 6 } } */ +/* { dg-final { scan-assembler-times {\mxxlor\M} 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mxxlor\M} 11 { target ilp32 } } } */ /* { dg-final { scan-assembler-times "vpksdus" 1 } } */ /* { dg-final { scan-assembler-times "vperm" 2 } } */ /* { dg-final { scan-assembler-times "xvrdpi" 1 } } */ /* { dg-final { scan-assembler-times "xxsel" 6 } } */ /* { dg-final { scan-assembler-times "xxlxor" 6 } } */ +/* { dg-final { scan-assembler-times {\mdivd\M} 2 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mdivdu\M} 2 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mmulld\M} 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mbl __divdi3\M} 2 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times {\mbl __udivdi3\M} 2 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times {\mmullw\M} 12 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times {\mmulhwu\M} 4 { target ilp32 } } } */ /* The source code for the test is in builtins-1.h. */ #include "builtins-1.h"