Re: [patches] Re: [PATCH] RISC-V: Implement __umulsidi3, umul_ppmm and __muluw3

Kito Cheng Sun, 19 Nov 2017 19:35:15 -0800

>> 2017-11-17  Kito Cheng  <kito.ch...@gmail.com>
>>
>>          * longlong.h [__riscv] (__umulsidi3): Define.
>>          [__riscv] (umul_ppmm) Likewise.
>>          [__riscv] (__muluw3) Likewise.
>
>
> Apparently the point of this is that by defining __mulsi3/__muldi3 as an 
> extended asm, we get better register allocation, and hence better 
> performance.  It would be nice if this info was provided when a patch was 
> submitted, so that we don't have to figure it out ourselves.


I prefer write more comment in the code instead of ChangeLog, I add
more comment in __muluw3.

btw, long times ago, Vladimir was told me[1] it should contain what is
done but not why it is done?

[1] https://gcc.gnu.org/ml/gcc-patches/2014-06/msg00604.html

> I see one minor issue though
>
>> +    (w0) = __muluw3 (__ll_lowpart (__x1), __ll_B)                      \
>> +          + __ll_lowpart (__x0);                                       \
>
>
> The multiply here is just a shift and should be written as a shift by 
> (W_TYPE_SIZE / 2).
>
> You copied this code from the default definition which uses *, and assumes 
> that the compiler will optimize the multiply into a shift. However, because 
> __muluw3 expands into a call to __mulsi3/__muldi3 for a risc-v part with no 
> multiply, the optimization will not take place here, and you end up with one 
> extra unnecessary multiply operation.

You are right, I guess I just substitute all multiplication before,
thanks you point out this :)


Updated patch:



2017-11-24  Kito Cheng  <kito.ch...@gmail.com>

        * longlong.h [__riscv] (__umulsidi3): Define.
        [__riscv] (umul_ppmm) Likewise.
        [__riscv] (__muluw3) Likewise.

---
 include/longlong.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/include/longlong.h b/include/longlong.h
index 9d3ab21..b76fa41 100644
--- a/include/longlong.h
+++ b/include/longlong.h
@@ -1086,6 +1086,56 @@ extern UDItype __umulsidi3 (USItype, USItype);
   } while (0)
 #endif

+#if defined(__riscv)
+#ifdef __riscv_mul
+#define __umulsidi3(u,v) ((UDWtype)(UWtype)(u) * (UWtype)(v))
+#define __muluw3(a, b) ((UWtype)(a) * (UWtype)(b))
+#else
+#if __riscv_xlen == 32
+  #define MULUW3 "call __mulsi3"
+#elif __riscv_xlen == 64
+  #define MULUW3 "call __muldi3"
+#else
+#error unsupport xlen
+#endif /* __riscv_xlen */
+/* We rely on the fact that MULUW3 doesn't clobber the t-registers.
+   It can get better register allocation result.  */
+#define __muluw3(a, b) \
+  ({ \
+    register UWtype __op0 asm ("a0") = a; \
+    register UWtype __op1 asm ("a1") = b; \
+    asm volatile (MULUW3 \
+                  : "+r" (__op0), "+r" (__op1) \
+                  : \
+                  : "ra", "a2", "a3"); \
+    __op0; \
+  })
+#endif /* __riscv_mul */
+#define umul_ppmm(w1, w0, u, v) \
+  do { \
+    UWtype __x0, __x1, __x2, __x3; \
+    UHWtype __ul, __vl, __uh, __vh; \
+ \
+    __ul = __ll_lowpart (u); \
+    __uh = __ll_highpart (u); \
+    __vl = __ll_lowpart (v); \
+    __vh = __ll_highpart (v); \
+ \
+    __x0 = __muluw3 (__ul, __vl); \
+    __x1 = __muluw3 (__ul, __vh); \
+    __x2 = __muluw3 (__uh, __vl); \
+    __x3 = __muluw3 (__uh, __vh); \
+ \
+    __x1 += __ll_highpart (__x0);/* this can't give carry */ \
+    __x1 += __x2; /* but this indeed can */ \
+    if (__x1 < __x2) /* did we get it? */ \
+      __x3 += __ll_B; /* yes, add it in the proper pos.  */ \
+ \
+    (w1) = __x3 + __ll_highpart (__x1); \
+    (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \
+  } while (0)
+#endif /* __riscv */
+
 #if defined(__sh__) && W_TYPE_SIZE == 32
 #ifndef __sh1__
 #define umul_ppmm(w1, w0, u, v) \
-- 
2.7.4

Re: [patches] Re: [PATCH] RISC-V: Implement __umulsidi3, umul_ppmm and __muluw3

Reply via email to