Hi Dave, I actually think using plus_xor_ior operator is useful. It means that if combine, inlining or some other RTL simplification generates these variants, these forms will still be recognized by the backend. It's more typing, but the compiler produces better code.
Here's what I have so far, but please feel free to modify anything. I'll leave the rest to you. With this patch: unsigned long long rotl4(unsigned long long x) { return (x<<4) | (x>>60); } unsigned long long rotr4(unsigned long long x) { return (x<<60) | (x>>4); } which previously generated: rotl4: depd,z %r26,59,60,%r28 extrd,u %r26,3,4,%r26 bve (%r2) or %r26,%r28,%r28 rotr4: extrd,u %r26,59,60,%r28 depd,z %r26,3,4,%r26 bve (%r2) or %r26,%r28,%r28 now produces: rotl4: bve (%r2) shrpd %r26,%r26,60,%r28 rotr4: bve (%r2) shrpd %r26,%r26,4,%r28 I'm guessing this is very similar to what you were thinking (or what I described previously). Many thanks again for trying out these patches/suggestions for me. Best regards, Roger -- -----Original Message----- From: John David Anglin <dave.ang...@bell.net> Sent: 22 August 2020 23:09 To: Roger Sayle <ro...@nextmovesoftware.com>; 'GCC Patches' <gcc-patches@gcc.gnu.org> Cc: 'Jeff Law' <l...@redhat.com> Subject: Re: [PATCH] hppa: Improve expansion of ashldi3 when !TARGET_64BIT On 2020-08-22 12:01 p.m., Roger Sayle wrote: > I suspect that the issue with the 64-bit patterns is that the second > variant of pa.md's define_insn "shrpdi4" is unlikely ever to match as > (minus:DI (const_int 64) x) is never "canonical" when x is itself a > CONST_INT. Splitting this define_insn into two (or three see below) > separate forms; the first as it currently is and the second (as you > suggest) with > "TARGET_64BIT > && INTVAL (operands[3]) + INTVAL (operands[4]) == 64" > should do the trick. I will go ahead and add the basic patterns. It seems it would be best if I avoid using the "plus_xor_ior_operator". It also seems the 32-bit patterns should avoid it. > > My first impression was that the DImode shrpd instructions would be > most useful for implementing TI mode shifts, but that TI mode isn't > supported by hppa64. But then I noticed that the more immediate > benefit would be in supporting rotrdi3 and rotldi3 on TARGET_64BIT > that currently don't have expanders nor insns defined. Here GCC > currently generates three instructions where a single shrpd would be > optimal. It turns out we now need to support TI mode and __int128 for libgomp. The hppa64-hpux target won't boot without it. I had just added a change to support TI mode but it's untested. Regards, Dave -- John David Anglin dave.ang...@bell.net
patchh3.log
Description: Binary data
diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md index 6350c68..5f04c02 100644 --- a/gcc/config/pa/pa.md +++ b/gcc/config/pa/pa.md @@ -6604,32 +6604,82 @@ (set_attr "length" "4")]) ; Shift right pair word 0 to 31 bits. -(define_insn "shrpsi4" - [(set (match_operand:SI 0 "register_operand" "=r,r") - (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "r,r") - (minus:SI (const_int 32) - (match_operand:SI 3 "shift5_operand" "q,n"))) - (lshiftrt:SI (match_operand:SI 2 "register_operand" "r,r") - (match_dup 3))))] +(define_insn "*shrpsi4_1" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operator:SI 4 "plus_xor_ior_operator" + [(ashift:SI (match_operand:SI 1 "register_operand" "r") + (minus:SI (const_int 32) + (match_operand:SI 3 "register_operand" "q"))) + (lshiftrt:SI (match_operand:SI 2 "register_operand" "r") + (match_dup 3))]))] "" - "@ - {vshd %1,%2,%0|shrpw %1,%2,%%sar,%0} - {shd|shrpw} %1,%2,%3,%0" + "{vshd %1,%2,%0|shrpw %1,%2,%%sar,%0}" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "*shrpsi4_2" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operator:SI 4 "plus_xor_ior_operator" + [(lshiftrt:SI (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "register_operand" "q")) + (ashift:SI (match_operand:SI 1 "register_operand" "r") + (minus:SI (const_int 32) + (match_dup 3)))]))] + "" + "{vshd %1,%2,%0|shrpw %1,%2,%%sar,%0}" [(set_attr "type" "shift") (set_attr "length" "4")]) ; Shift right pair doubleword 0 to 63 bits. -(define_insn "shrpdi4" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (ior:DI (ashift:DI (match_operand:SI 1 "register_operand" "r,r") - (minus:DI (const_int 64) - (match_operand:DI 3 "shift6_operand" "q,n"))) - (lshiftrt:DI (match_operand:DI 2 "register_operand" "r,r") - (match_dup 3))))] +(define_insn "*shrpdi4_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operator:DI 4 "plus_xor_ior_operator" + [(ashift:DI (match_operand:DI 1 "register_operand" "r") + (minus:DI (const_int 64) + (match_operand:DI 3 "register_operand" "q"))) + (lshiftrt:DI (match_operand:DI 2 "register_operand" "r") + (match_dup 3))]))] "TARGET_64BIT" - "@ - shrpd %1,%2,%%sar,%0 - shrpd %1,%2,%3,%0" + "shrpd %1,%2,%%sar,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "*shrpdi4_2" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operator:DI 4 "plus_xor_ior_operator" + [(lshiftrt:DI (match_operand:DI 2 "register_operand" "r") + (match_operand:DI 3 "shift6_operand" "q")) + (ashift:DI (match_operand:SI 1 "register_operand" "r") + (minus:DI (const_int 64) + (match_dup 3)))]))] + "TARGET_64BIT" + "shrpd %1,%2,%%sar,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "*shrpdi4_3" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operator:DI 5 "plus_xor_ior_operator" + [(ashift:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 3 "const_int_operand" "n")) + (lshiftrt:DI (match_operand:DI 2 "register_operand" "r") + (match_operand:DI 4 "const_int_operand" "n"))]))] + "TARGET_64BIT + && INTVAL (operands[3]) + INTVAL (operands[4]) == 64" + "shrpd %1,%2,%4,%0" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_insn "*shrpdi4_4" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operator:DI 5 "plus_xor_ior_operator" + [(lshiftrt:DI (match_operand:DI 2 "register_operand" "r") + (match_operand:DI 4 "const_int_operand" "n")) + (ashift:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 3 "const_int_operand" "n"))]))] + "TARGET_64BIT + && INTVAL (operands[3]) + INTVAL (operands[4]) == 64" + "shrpd %1,%2,%4,%0" [(set_attr "type" "shift") (set_attr "length" "4")]) @@ -6668,7 +6718,7 @@ /* Else expand normally. */ }") -(define_insn "" +(define_insn "*rotlsi3_internal" [(set (match_operand:SI 0 "register_operand" "=r") (rotate:SI (match_operand:SI 1 "register_operand" "r") (match_operand:SI 2 "const_int_operand" "n")))] @@ -6681,6 +6731,54 @@ [(set_attr "type" "shift") (set_attr "length" "4")]) +(define_insn "rotrdi3" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (rotatert:DI (match_operand:DI 1 "register_operand" "r,r") + (match_operand:DI 2 "shift6_operand" "q,n")))] + "TARGET_64BIT" + "* +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + operands[2] = GEN_INT (INTVAL (operands[2]) & 63); + return \"shrpd %1,%1,%2,%0\"; + } + else + return \"shrpd %1,%1,%%sar,%0\"; +}" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + +(define_expand "rotldi3" + [(set (match_operand:DI 0 "register_operand" "") + (rotate:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "arith32_operand" "")))] + "TARGET_64BIT" + " +{ + if (GET_CODE (operands[2]) != CONST_INT) + { + rtx temp = gen_reg_rtx (DImode); + emit_insn (gen_subdi3 (temp, GEN_INT (64), operands[2])); + emit_insn (gen_rotrdi3 (operands[0], operands[1], temp)); + DONE; + } + /* Else expand normally. */ +}") + +(define_insn "*rotldi3_internal" + [(set (match_operand:DI 0 "register_operand" "=r") + (rotate:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "const_int_operand" "n")))] + "TARGET_64BIT" + "* +{ + operands[2] = GEN_INT ((64 - INTVAL (operands[2])) & 63); + return \"shrpd %1,%1,%2,%0\"; +}" + [(set_attr "type" "shift") + (set_attr "length" "4")]) + (define_insn "" [(set (match_operand:SI 0 "register_operand" "=r") (match_operator:SI 5 "plus_xor_ior_operator"