config_host.mk.in | 1 configure.ac | 16 external/skia/0001-loong64-fix-some-bugs-in-type-conversion-on-Loongarc.patch | 119 ++ external/skia/0002-loong64-Honor-existing-LASX-LSX-settings.patch | 48 + external/skia/0003-loong64-Fix-missing-rounding-in-loong64-scaled_mult-.patch | 74 + external/skia/0004-loong64-Fix-the-remaining-implicit-vector-casts.patch | 439 ++++++++++ external/skia/Library_skia.mk | 15 external/skia/UnpackedTarball_skia.mk | 4 8 files changed, 715 insertions(+), 1 deletion(-)
New commits: commit be1b310125098ebd015c4838f1fdc8ccd50817c9 Author: Bingwu Zhang <x...@aosc.io> AuthorDate: Fri Feb 7 22:31:17 2025 +0800 Commit: Noel Grandin <noel.gran...@collabora.co.uk> CommitDate: Sat Mar 1 12:29:54 2025 +0100 skia: enable LASX explicitly to LASX binary compatible objects Fix skia build on LA64 for the second time (: Enable LASX intrinsics explicitly when building with LASX binary compatibility. In function ‘__lasx_xvsub_h’, inlined from ‘S32_alpha_D32_filter_DX’ at .../workdir/UnpackedTarball/skia/src/opts/SkBitmapProcState_opts.h:323:43: /usr/lib/gcc/loongarch64-aosc-linux-gnu/14.2.0/include/lasxintrin.h:539:42: error: built-in function ‘__builtin_lasx_xvsub_h’ is not enabled 539 | return (__m256i)__builtin_lasx_xvsub_h ((v16i16)_1, (v16i16)_2); cf. https://buildit.aosc.io/logs/62784-libreoffice-25.2.0.3-loongarch64-loong13-2025-02-07-22:04:10.txt Link: https://skia-review.googlesource.com/c/skia/+/873776 Fixes: I232b4656a87a4c3d87d669769c6483e01425c0e6 Tested-by: Bingwu Zhang <x...@aosc.io> Change-Id: I9fbf0f24f28f71a76c10ee31e9a9da00ff350481 Signed-off-by: Bingwu Zhang <x...@aosc.io> Reviewed-on: https://gerrit.libreoffice.org/c/core/+/181256 Tested-by: Jenkins Reviewed-by: Noel Grandin <noel.gran...@collabora.co.uk> diff --git a/config_host.mk.in b/config_host.mk.in index 274e2759e186..2ab62f748bd6 100644 --- a/config_host.mk.in +++ b/config_host.mk.in @@ -100,6 +100,7 @@ export CPPU_ENV_FOR_BUILD=@CPPU_ENV_FOR_BUILD@ export CPPUNIT_CFLAGS=$(gb_SPACE)@CPPUNIT_CFLAGS@ export CPPUNIT_LIBS=$(gb_SPACE)@CPPUNIT_LIBS@ export CPUNAME=@CPUNAME@ +ENABLE_LASX=@ENABLE_LASX@ export CROSS_COMPILING=@CROSS_COMPILING@ export CURL=@CURL@ export CURL_CFLAGS=$(gb_SPACE)@CURL_CFLAGS@ diff --git a/configure.ac b/configure.ac index 5ffe6c7f847b..b4c4cfecf2e2 100644 --- a/configure.ac +++ b/configure.ac @@ -5703,6 +5703,22 @@ emscripten*) ;; esac +dnl =================================================================== +dnl Target-specific options +dnl =================================================================== + +ENABLE_LASX="TRUE" +if test "$CPUNAME" == LOONGARCH64; then + AC_ARG_ENABLE(lasx, + AS_HELP_STRING([--enable-lasx], + [build with LASX vectorization]) + ) + if test "$enable_lasx" != yes; then + ENABLE_LASX="" + fi +fi +AC_SUBST(ENABLE_LASX) + dnl =================================================================== dnl .NET support dnl =================================================================== diff --git a/external/skia/Library_skia.mk b/external/skia/Library_skia.mk index dd99e6f271ab..e9a43a35bbdf 100644 --- a/external/skia/Library_skia.mk +++ b/external/skia/Library_skia.mk @@ -1315,8 +1315,21 @@ $(eval $(call gb_Library_add_generated_exception_objects,skia,\ UnpackedTarball/skia/src/core/SkSwizzler_opts_lasx \ UnpackedTarball/skia/src/core/SkBlitRow_opts_lasx \ UnpackedTarball/skia/src/core/SkBitmapProcState_opts_lasx, \ - $(LO_SKIA_AVOID_INLINE_COPIES) \ + $(LO_SKIA_AVOID_INLINE_COPIES) -mlasx \ )) +# Always allow LASX builtin functions duraing linking +# because the above SkOpts_lasx, etc, LASX binary compatibility +# object requires LASX intrinsics to be available +$(eval $(call gb_Library_add_ldflags,skia,\ + -mlasx \ +)) + +# Apply --enable-lasx to compiling +ifeq ($(ENABLE_LASX),TRUE) +$(eval $(call gb_Library_add_cxxflags,skia, \ + -mlasx \ +)) +endif endif # Skcms code is used by png writer, which is used by SkiaHelper::dump(). Building commit ec5cd2a82dfb39fcb76eb42ef2ebb5be71e70007 Author: Bingwu Zhang <x...@aosc.io> AuthorDate: Fri Feb 7 09:10:19 2025 +0800 Commit: Noel Grandin <noel.gran...@collabora.co.uk> CommitDate: Sat Mar 1 12:29:42 2025 +0100 skia: backport & cherry-pick fixes for LoongArch64 LSX vectorization This changeset backports/cherry-picks the following changes from Skia: - I5751928376acbe298d923b465cc9303bd5bac2bb - I1e6183b987002ed09621b111a6012a0f186233f1 - I45e43a7a7e6d50b4c32c5e69a6d1d7de341eccf1 - I3b4e9479cb6f9628b4cf796a0ac25098bc1836a2 This should address compiler errors when building LibreOffice for LoongArch64 targets with LSX enabled and LASX disabled. Tested-by: Bingwu Zhang <x...@aosc.io> Link: https://skia-review.googlesource.com/c/skia/+/908136 Link: https://skia-review.googlesource.com/c/skia/+/908137 Link: https://skia-review.googlesource.com/c/skia/+/909436 Link: https://skia-review.googlesource.com/c/skia/+/948976 Change-Id: I87fa242c44296b222a8c70b19169b06d62f566b3 Signed-off-by: Bingwu Zhang <x...@aosc.io> Reviewed-on: https://gerrit.libreoffice.org/c/core/+/181251 Reviewed-by: Noel Grandin <noel.gran...@collabora.co.uk> Tested-by: Jenkins diff --git a/external/skia/0001-loong64-fix-some-bugs-in-type-conversion-on-Loongarc.patch b/external/skia/0001-loong64-fix-some-bugs-in-type-conversion-on-Loongarc.patch new file mode 100644 index 000000000000..d1d9acda5643 --- /dev/null +++ b/external/skia/0001-loong64-fix-some-bugs-in-type-conversion-on-Loongarc.patch @@ -0,0 +1,119 @@ +From a65c19ee42e9c9a2728497d6ea42a32dd516ea93 Mon Sep 17 00:00:00 2001 +From: Kai Zou <double1...@gmail.com> +Date: Fri, 18 Oct 2024 10:01:39 +0800 +Subject: [PATCH 1/4] [loong64] fix some bugs in type conversion on Loongarch +X-Developer-Signature: v=1; a=openpgp-sha256; l=4201; i=x...@aosc.io; + h=from:subject; bh=XTSCL4cd9w9LM40ioj+FZRuks/XFKeHfRWGBQfQdrWE=; + b=owGbwMvMwCW2U4Ij7wZL9ETG02pJDOmr/sbfCshtPdTqW/9lttfaXbYdfb9WnqxiMDrPY27Ow + x4g9j+8o5SFQYyLQVZMkaXIsMGbVSedX3RZuSzMHFYmkCEMXJwCMJGz7xgZXvbdNJ0eeDbII2NR + fObeCr82UW7W//P0l7w8sqFmysuJVxj+CjaUTNptuKahJOrs7m/GPE9jLzpt2ipQ9phl9e+3P17 + P5QcA +X-Developer-Key: i=x...@aosc.io; a=openpgp; + fpr=7231804B052C670F15A6771DB918086ED8045B91 + +Backported of 7e8c7385e673 ("fix some bugs in type conversion on +Loongarch"). + +Change-Id: I5751928376acbe298d923b465cc9303bd5bac2bb +Reviewed-on: https://skia-review.googlesource.com/c/skia/+/909436 +Reviewed-by: Greg Daniel <egdan...@google.com> +Reviewed-by: Ben Wagner <bunge...@google.com> +Commit-Queue: Ben Wagner <bunge...@google.com> +Signed-off-by: Bingwu Zhang <x...@aosc.io> +--- + src/opts/SkRasterPipeline_opts.h | 30 +++++++++++++++--------------- + 1 file changed, 15 insertions(+), 15 deletions(-) + +diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h +index e1783a589075..d12968d876b5 100644 +--- a/src/opts/SkRasterPipeline_opts.h ++++ b/src/opts/SkRasterPipeline_opts.h +@@ -943,22 +943,22 @@ namespace SK_OPTS_NS { + SI F abs_ (F v) { return (F)__lasx_xvand_v((I32)v, (I32)(0-v)); } + SI I32 abs_(I32 v) { return max(v, -v); } + SI F rcp_approx(F v) { return __lasx_xvfrecip_s(v); } +- SI F rcp_precise (F v) { F e = rcp_approx(v); return e * nmad(v, e, 2.0f); } ++ SI F rcp_precise (F v) { F e = rcp_approx(v); return e * nmad(v, e, F() + 2.0f); } + SI F rsqrt_approx (F v) { return __lasx_xvfrsqrt_s(v); } + SI F sqrt_(F v) { return __lasx_xvfsqrt_s(v); } + + SI U32 iround(F v) { +- F t = F(0.5); ++ F t = F() + 0.5f; + return __lasx_xvftintrz_w_s(v + t); + } + + SI U32 round(F v) { +- F t = F(0.5); ++ F t = F() + 0.5f; + return __lasx_xvftintrz_w_s(v + t); + } + + SI U32 round(F v, F scale) { +- F t = F(0.5); ++ F t = F() + 0.5f; + return __lasx_xvftintrz_w_s(mad(v, scale, t)); + } + +@@ -993,8 +993,8 @@ namespace SK_OPTS_NS { + + template <typename T> + SI V<T> gather(const T* p, U32 ix) { +- return { p[ix[0]], p[ix[1]], p[ix[2]], p[ix[3]], +- p[ix[4]], p[ix[5]], p[ix[6]], p[ix[7]], }; ++ return V<T>{ p[ix[0]], p[ix[1]], p[ix[2]], p[ix[3]], ++ p[ix[4]], p[ix[5]], p[ix[6]], p[ix[7]], }; + } + + template <typename V, typename S> +@@ -1147,20 +1147,20 @@ namespace SK_OPTS_NS { + SI F abs_(F v) { return (F)__lsx_vand_v((I32)v, (I32)(0-v)); } + SI I32 abs_(I32 v) { return max(v, -v); } + SI F rcp_approx (F v) { return __lsx_vfrecip_s(v); } +- SI F rcp_precise (F v) { F e = rcp_approx(v); return e * nmad(v, e, 2.0f); } ++ SI F rcp_precise (F v) { F e = rcp_approx(v); return e * nmad(v, e, F() + 2.0f); } + SI F rsqrt_approx (F v) { return __lsx_vfrsqrt_s(v); } + SI F sqrt_(F v) { return __lsx_vfsqrt_s (v); } + + SI U32 iround(F v) { +- F t = F(0.5); ++ F t = F() + 0.5f; + return __lsx_vftintrz_w_s(v + t); } + + SI U32 round(F v) { +- F t = F(0.5); ++ F t = F() + 0.5f; + return __lsx_vftintrz_w_s(v + t); } + + SI U32 round(F v, F scale) { +- F t = F(0.5); ++ F t = F() + 0.5f; + return __lsx_vftintrz_w_s(mad(v, scale, t)); } + + SI U16 pack(U32 v) { +@@ -1196,15 +1196,15 @@ namespace SK_OPTS_NS { + + template <typename T> + SI V<T> gather(const T* p, U32 ix) { +- return {p[ix[0]], p[ix[1]], p[ix[2]], p[ix[3]]}; ++ return V<T>{p[ix[0]], p[ix[1]], p[ix[2]], p[ix[3]]}; + } + // Using 'int*' prevents data from passing through floating-point registers. + SI F gather(const int* p, int ix0, int ix1, int ix2, int ix3) { + F ret = {0.0}; +- ret = __lsx_vinsgr2vr_w(ret, p[ix0], 0); +- ret = __lsx_vinsgr2vr_w(ret, p[ix1], 1); +- ret = __lsx_vinsgr2vr_w(ret, p[ix2], 2); +- ret = __lsx_vinsgr2vr_w(ret, p[ix3], 3); ++ ret = (F)__lsx_vinsgr2vr_w(ret, p[ix0], 0); ++ ret = (F)__lsx_vinsgr2vr_w(ret, p[ix1], 1); ++ ret = (F)__lsx_vinsgr2vr_w(ret, p[ix2], 2); ++ ret = (F)__lsx_vinsgr2vr_w(ret, p[ix3], 3); + return ret; + } + + +base-commit: b988efa06f8aa3bfeaa18c5b8c716ff244ab43cf +-- +2.48.1 + diff --git a/external/skia/0002-loong64-Honor-existing-LASX-LSX-settings.patch b/external/skia/0002-loong64-Honor-existing-LASX-LSX-settings.patch new file mode 100644 index 000000000000..2f5b2a3d87c4 --- /dev/null +++ b/external/skia/0002-loong64-Honor-existing-LASX-LSX-settings.patch @@ -0,0 +1,48 @@ +From d1168502a9872eb1a8de76a0414292e95c54eed9 Mon Sep 17 00:00:00 2001 +From: Bingwu Zhang <x...@aosc.io> +Date: Sat, 8 Feb 2025 12:05:04 +0800 +Subject: [PATCH 2/4] [loong64] Honor existing LASX/LSX settings +X-Developer-Signature: v=1; a=openpgp-sha256; l=1333; i=x...@aosc.io; + h=from:subject; bh=XsKjt2E4mmiE+K/HXjlVXAX8KruaZxFK1HhexJExG7M=; + b=owGbwMvMwCW2U4Ij7wZL9ETG02pJDOmr/iY0Tfy5rfxqeegfo8PK9U0bPFs45jMd/f4u99juw + keHY3786ihlYRDjYpAVU2QpMmzwZtVJ5xddVi4LM4eVCWQIAxenAEzk5A2Gf7rTeX/clQ99KPDu + xPsYVtY+icRvz2/9/Jr9ZaXaLrPl6nEMf/gFmgxyEkodXlYIBzTEzpvpu23Zj6zpG8xW2X0NO6L + WwAAA +X-Developer-Key: i=x...@aosc.io; a=openpgp; + fpr=7231804B052C670F15A6771DB918086ED8045B91 + +Change-Id: I1e6183b987002ed09621b111a6012a0f186233f1 +Signed-off-by: Bingwu Zhang <x...@aosc.io> +--- + AUTHORS | 1 + + src/opts/SkRasterPipeline_opts.h | 2 +- + 2 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/AUTHORS b/AUTHORS +index f533d9a8e5bf..03d0ac82a412 100755 +--- a/AUTHORS ++++ b/AUTHORS +@@ -24,6 +24,7 @@ Amazon, Inc <*@amazon.com> + Anthony Catel <parab...@gmail.com> + Andrew Kurushin <ajax16...@gmail.com> + Bharat Ahuja <ahujabhara...@gmail.com> ++Bingwu Zhang <x...@aosc.io> + Biswapriyo Nath <nathbap...@gmail.com> + Brian Salomon <briansalo...@gmail.com> + Callum Moffat <smartercal...@gmail.com> +diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h +index d12968d876b5..27a00474e07b 100644 +--- a/src/opts/SkRasterPipeline_opts.h ++++ b/src/opts/SkRasterPipeline_opts.h +@@ -69,7 +69,7 @@ using NoCtx = const void*; + + #if defined(SKRP_CPU_SCALAR) || defined(SKRP_CPU_NEON) || defined(SKRP_CPU_HSW) || \ + defined(SKRP_CPU_SKX) || defined(SKRP_CPU_AVX) || defined(SKRP_CPU_SSE41) || \ +- defined(SKRP_CPU_SSE2) ++ defined(SKRP_CPU_SSE2) || defined(SKRP_CPU_LASX) || defined(SKRP_CPU_LSX) + // Honor the existing setting + #elif !defined(__clang__) && !defined(__GNUC__) + #define SKRP_CPU_SCALAR +-- +2.48.1 + diff --git a/external/skia/0003-loong64-Fix-missing-rounding-in-loong64-scaled_mult-.patch b/external/skia/0003-loong64-Fix-missing-rounding-in-loong64-scaled_mult-.patch new file mode 100644 index 000000000000..07640b3f24d2 --- /dev/null +++ b/external/skia/0003-loong64-Fix-missing-rounding-in-loong64-scaled_mult-.patch @@ -0,0 +1,74 @@ +From d83823a4366ba80d17c1cb8238a1f9341b4b7dbb Mon Sep 17 00:00:00 2001 +From: WANG Xuerui <g...@xen0n.name> +Date: Tue, 8 Oct 2024 18:53:11 +0800 +Subject: [PATCH 3/4] [loong64] Fix missing rounding in loong64 scaled_mult + implementation +X-Developer-Signature: v=1; a=openpgp-sha256; l=2720; i=x...@aosc.io; + h=from:subject; bh=kJJcHKrCRPKmlNuupz+tutR4ovZG2dr8WzFx7tRxsZ4=; + b=owGbwMvMwCW2U4Ij7wZL9ETG02pJDOmr/iYoOXn9Vw6eo9upkB98c86RSap2a5texns/mayXt + eBg1qtDHaUsDGJcDLJiiixFhg3erDrp/KLLymVh5rAygQxh4OIUgIncE2dkWN/jenqS27vvB5w2 + BJocv8muYCVutbpokcN5C4XY0OSFQgz/zOaoHn6gdCY2UGm13i/tU1lls1f+NHq+S7mj12N5Ufc + MZgA= +X-Developer-Key: i=x...@aosc.io; a=openpgp; + fpr=7231804B052C670F15A6771DB918086ED8045B91 + +The reference semantics of scaled_mult include rounding, but the +original implementation did not do so. This is triggering an SkAssert +in the unit test case FilterResult_raster_RescaleWithTransform, from +constrained_add's debug checks. + +The fixed implementation bumps the cost of each scaled_mult from 2 +LoongArch SIMD instructions to 8 (with 1 constant load that can be +shared across unrolled calls to scaled_mult), but now matches the +reference scalar semantics and unblocks testing of debug builds. + +Change-Id: I45e43a7a7e6d50b4c32c5e69a6d1d7de341eccf1 +Signed-off-by: Bingwu Zhang <x...@aosc.io> +--- + AUTHORS | 1 + + src/opts/SkRasterPipeline_opts.h | 16 ++++++++++++---- + 2 files changed, 13 insertions(+), 4 deletions(-) + +diff --git a/AUTHORS b/AUTHORS +index 03d0ac82a412..ef0fdfa550d7 100755 +--- a/AUTHORS ++++ b/AUTHORS +@@ -104,6 +104,7 @@ Sylvestre Ledru <sylvestre.le...@gmail.com> + The Chromium Authors <*@chromium.org> + Thiago Fransosi Farina <thiago.far...@gmail.com> + Vibe Inc <*@vibe.us> ++WANG Xuerui <g...@xen0n.name> + William Candillon <wcandil...@gmail.com> + Wonmin Park <satcom1...@hanmail.net> + Yandex LLC <*@yandex-team.ru> +diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h +index 27a00474e07b..42e9cd8aa229 100644 +--- a/src/opts/SkRasterPipeline_opts.h ++++ b/src/opts/SkRasterPipeline_opts.h +@@ -5543,11 +5543,19 @@ SI I16 scaled_mult(I16 a, I16 b) { + #elif defined(SKRP_CPU_NEON) + return vqrdmulhq_s16(a, b); + #elif defined(SKRP_CPU_LASX) +- I16 res = __lasx_xvmuh_h(a, b); +- return __lasx_xvslli_h(res, 1); ++ Vec<8, int32_t> even = (Vec<8, int32_t>)__lasx_xvmulwev_w_h((__m256i)a, (__m256i)b); ++ Vec<8, int32_t> odd = (Vec<8, int32_t>)__lasx_xvmulwod_w_h((__m256i)a, (__m256i)b); ++ Vec<8, int32_t> roundingTerm = (Vec<8, int32_t>)__lasx_xvldi(-0xec0); // v8i32(0x40 << 8) ++ even = (even + roundingTerm) >> 15; ++ odd = (odd + roundingTerm) >> 15; ++ return (I16)__lasx_xvpackev_h((__m256i)odd, (__m256i)even); + #elif defined(SKRP_CPU_LSX) +- I16 res = __lsx_vmuh_h(a, b); +- return __lsx_vslli_h(res, 1); ++ Vec<4, int32_t> even = (Vec<4, int32_t>)__lsx_vmulwev_w_h((__m128i)a, (__m128i)b); ++ Vec<4, int32_t> odd = (Vec<4, int32_t>)__lsx_vmulwod_w_h((__m128i)a, (__m128i)b); ++ Vec<4, int32_t> roundingTerm = (Vec<4, int32_t>)__lsx_vldi(-0xec0); // v4i32(0x40 << 8) ++ even = (even + roundingTerm) >> 15; ++ odd = (odd + roundingTerm) >> 15; ++ return (I16)__lsx_vpackev_h((__m128i)odd, (__m128i)even); + #else + const I32 roundingTerm = I32_(1 << 14); + return cast<I16>((cast<I32>(a) * cast<I32>(b) + roundingTerm) >> 15); +-- +2.48.1 + diff --git a/external/skia/0004-loong64-Fix-the-remaining-implicit-vector-casts.patch b/external/skia/0004-loong64-Fix-the-remaining-implicit-vector-casts.patch new file mode 100644 index 000000000000..8b807ab76aa6 --- /dev/null +++ b/external/skia/0004-loong64-Fix-the-remaining-implicit-vector-casts.patch @@ -0,0 +1,439 @@ +From e1bf3ab137bc79eb724edb16eebcdbd035384314 Mon Sep 17 00:00:00 2001 +From: WANG Xuerui <g...@xen0n.name> +Date: Sun, 6 Oct 2024 21:09:00 +0800 +Subject: [PATCH 4/4] [loong64] Fix the remaining implicit vector casts +X-Developer-Signature: v=1; a=openpgp-sha256; l=20810; i=x...@aosc.io; + h=from:subject; bh=1UrATZzIPMUS1cxzVsIuckeoCgGcUcx1bqBHm1XVQFY=; + b=owGbwMvMwCW2U4Ij7wZL9ETG02pJDOmr/ib0cTzIc+w+/XJhdv+2X37JPt57bP6crPyXVGG3d + N7vvM8NHaUsDGJcDLJiiixFhg3erDrp/KLLymVh5rAygQxh4OIUgIlsqGL4wxXdeZD/v+775UcW + XfCd0pa0KljQ+9af08veRUT/kH5/sIOR4QKzwIsKh0L/xB6T1mdTjy5aWvl/umNU4LQHRhLes3r + r2AA= +X-Developer-Key: i=x...@aosc.io; a=openpgp; + fpr=7231804B052C670F15A6771DB918086ED8045B91 + +CL 909436 fixed a few implicit vector casts, but they are not tested +with a proper Skia build (in fact it was mainly for fixing those errors +that happen to show up in Debian packaging e.g. wpewebkit), so many of +the implicit casts remain. + +The changes here are tested with GCC 14, and confirmed to not affect +Clang builds. However, due to an issue with Clang headers [1], +-fno-lax-vector-conversions cannot be enabled for LoongArch Clang +builds yet, at least for Clang < 19.1.4 [2]. + +[1]: https://github.com/llvm/llvm-project/issues/110834 +[2]: https://github.com/llvm/llvm-project/pull/114958 + +Change-Id: I3b4e9479cb6f9628b4cf796a0ac25098bc1836a2 +Signed-off-by: Bingwu Zhang <x...@aosc.io> +--- + src/core/SkBlurEngine.cpp | 36 +++---- + src/opts/SkRasterPipeline_opts.h | 172 +++++++++++++++---------------- + 2 files changed, 103 insertions(+), 105 deletions(-) + +diff --git a/src/core/SkBlurEngine.cpp b/src/core/SkBlurEngine.cpp +index 2c18eb0fb2e7..1c8e9bdb7945 100644 +--- a/src/core/SkBlurEngine.cpp ++++ b/src/core/SkBlurEngine.cpp +@@ -330,61 +330,61 @@ private: + skvx::Vec<4, uint32_t>* buffer0Cursor = fBuffer0Cursor; + skvx::Vec<4, uint32_t>* buffer1Cursor = fBuffer1Cursor; + skvx::Vec<4, uint32_t>* buffer2Cursor = fBuffer2Cursor; +- v4u32 sum0 = __lsx_vld(fSum0, 0); // same as skvx::Vec<4, uint32_t>::Load(fSum0); +- v4u32 sum1 = __lsx_vld(fSum1, 0); +- v4u32 sum2 = __lsx_vld(fSum2, 0); ++ v4u32 sum0 = (v4u32)__lsx_vld(fSum0, 0); // same as skvx::Vec<4, uint32_t>::Load(fSum0); ++ v4u32 sum1 = (v4u32)__lsx_vld(fSum1, 0); ++ v4u32 sum2 = (v4u32)__lsx_vld(fSum2, 0); + + auto processValue = [&](v4u32& vLeadingEdge){ + sum0 += vLeadingEdge; + sum1 += sum0; + sum2 += sum1; + +- v4u32 divisorFactor = __lsx_vreplgr2vr_w(fDivider.divisorFactor()); +- v4u32 blurred = __lsx_vmuh_w(divisorFactor, sum2); ++ v4u32 divisorFactor = (v4u32)__lsx_vreplgr2vr_w(fDivider.divisorFactor()); ++ v4u32 blurred = (v4u32)__lsx_vmuh_w((__m128i)divisorFactor, (__m128i)sum2); + +- v4u32 buffer2Value = __lsx_vld(buffer2Cursor, 0); //Not fBuffer0Cursor, out of bounds. ++ v4u32 buffer2Value = (v4u32)__lsx_vld(buffer2Cursor, 0); // Not fBuffer0Cursor, out of bounds. + sum2 -= buffer2Value; + __lsx_vst(sum1, (void *)buffer2Cursor, 0); + buffer2Cursor = (buffer2Cursor + 1) < fBuffersEnd ? buffer2Cursor + 1 : fBuffer2; +- v4u32 buffer1Value = __lsx_vld(buffer1Cursor, 0); ++ v4u32 buffer1Value = (v4u32)__lsx_vld(buffer1Cursor, 0); + sum1 -= buffer1Value; + __lsx_vst(sum0, (void *)buffer1Cursor, 0); + buffer1Cursor = (buffer1Cursor + 1) < fBuffer2 ? buffer1Cursor + 1 : fBuffer1; +- v4u32 buffer0Value = __lsx_vld(buffer0Cursor, 0); ++ v4u32 buffer0Value = (v4u32)__lsx_vld(buffer0Cursor, 0); + sum0 -= buffer0Value; + __lsx_vst(vLeadingEdge, (void *)buffer0Cursor, 0); + buffer0Cursor = (buffer0Cursor + 1) < fBuffer1 ? buffer0Cursor + 1 : fBuffer0; + + v16u8 shuf = {0x0,0x4,0x8,0xc,0x0}; +- v16u8 ret = __lsx_vshuf_b(blurred, blurred, shuf); ++ v16u8 ret = (v16u8)__lsx_vshuf_b((__m128i)blurred, (__m128i)blurred, (__m128i)shuf); + return ret; + }; + +- v4u32 zero = __lsx_vldi(0x0); ++ v4u32 zero = (v4u32)__lsx_vldi(0x0); + if (!src && !dst) { + while (n --> 0) { + (void)processValue(zero); + } + } else if (src && !dst) { + while (n --> 0) { +- v4u32 edge = __lsx_vinsgr2vr_w(zero, *src, 0); +- edge = __lsx_vilvl_b(zero, edge); +- edge = __lsx_vilvl_h(zero, edge); ++ v4u32 edge = (v4u32)__lsx_vinsgr2vr_w((__m128i)zero, *src, 0); ++ edge = (v4u32)__lsx_vilvl_b((__m128i)zero, (__m128i)edge); ++ edge = (v4u32)__lsx_vilvl_h((__m128i)zero, (__m128i)edge); + (void)processValue(edge); + src += srcStride; + } + } else if (!src && dst) { + while (n --> 0) { +- v4u32 ret = processValue(zero); ++ v4u32 ret = (v4u32)processValue(zero); + __lsx_vstelm_w(ret, dst, 0, 0); // 3rd is offset, 4th is idx. + dst += dstStride; + } + } else if (src && dst) { + while (n --> 0) { +- v4u32 edge = __lsx_vinsgr2vr_w(zero, *src, 0); +- edge = __lsx_vilvl_b(zero, edge); +- edge = __lsx_vilvl_h(zero, edge); +- v4u32 ret = processValue(edge); ++ v4u32 edge = (v4u32)__lsx_vinsgr2vr_w(zero, *src, 0); ++ edge = (v4u32)__lsx_vilvl_b((__m128i)zero, (__m128i)edge); ++ edge = (v4u32)__lsx_vilvl_h((__m128i)zero, (__m128i)edge); ++ v4u32 ret = (v4u32)processValue(edge); + __lsx_vstelm_w(ret, dst, 0, 0); + src += srcStride; + dst += dstStride; +diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h +index 42e9cd8aa229..97dfe7213d42 100644 +--- a/src/opts/SkRasterPipeline_opts.h ++++ b/src/opts/SkRasterPipeline_opts.h +@@ -931,16 +931,16 @@ namespace SK_OPTS_NS { + sk_bit_cast<__m256i>(c))); + } + +- SI F min(F a, F b) { return __lasx_xvfmin_s(a,b); } +- SI F max(F a, F b) { return __lasx_xvfmax_s(a,b); } +- SI I32 min(I32 a, I32 b) { return __lasx_xvmin_w(a,b); } +- SI U32 min(U32 a, U32 b) { return __lasx_xvmin_wu(a,b); } +- SI I32 max(I32 a, I32 b) { return __lasx_xvmax_w(a,b); } +- SI U32 max(U32 a, U32 b) { return __lasx_xvmax_wu(a,b); } ++ SI F min(F a, F b) { return (F)__lasx_xvfmin_s((__m256)a, (__m256)b); } ++ SI F max(F a, F b) { return (F)__lasx_xvfmax_s((__m256)a, (__m256)b); } ++ SI I32 min(I32 a, I32 b) { return (I32)__lasx_xvmin_w((__m256i)a, (__m256i)b); } ++ SI U32 min(U32 a, U32 b) { return (U32)__lasx_xvmin_wu((__m256i)a, (__m256i)b); } ++ SI I32 max(I32 a, I32 b) { return (I32)__lasx_xvmax_w((__m256i)a, (__m256i)b); } ++ SI U32 max(U32 a, U32 b) { return (U32)__lasx_xvmax_wu((__m256i)a, (__m256i)b); } + + SI F mad(F f, F m, F a) { return __lasx_xvfmadd_s(f, m, a); } + SI F nmad(F f, F m, F a) { return __lasx_xvfmadd_s(-f, m, a); } +- SI F abs_ (F v) { return (F)__lasx_xvand_v((I32)v, (I32)(0-v)); } ++ SI F abs_(F v) { return (F)__lasx_xvand_v((__m256i)v, (__m256i)(I32)(0-v)); } + SI I32 abs_(I32 v) { return max(v, -v); } + SI F rcp_approx(F v) { return __lasx_xvfrecip_s(v); } + SI F rcp_precise (F v) { F e = rcp_approx(v); return e * nmad(v, e, F() + 2.0f); } +@@ -948,13 +948,11 @@ namespace SK_OPTS_NS { + SI F sqrt_(F v) { return __lasx_xvfsqrt_s(v); } + + SI U32 iround(F v) { +- F t = F() + 0.5f; +- return __lasx_xvftintrz_w_s(v + t); ++ return (U32)__lasx_xvftintrz_w_s(v + 0.5f); + } + + SI U32 round(F v) { +- F t = F() + 0.5f; +- return __lasx_xvftintrz_w_s(v + t); ++ return (U32)__lasx_xvftintrz_w_s(v + 0.5f); + } + + SI U32 round(F v, F scale) { +@@ -963,8 +961,8 @@ namespace SK_OPTS_NS { + } + + SI U16 pack(U32 v) { +- return __lsx_vpickev_h(__lsx_vsat_wu(emulate_lasx_d_xr2vr_h(v), 15), +- __lsx_vsat_wu(emulate_lasx_d_xr2vr_l(v), 15)); ++ return (U16)__lsx_vpickev_h(__lsx_vsat_wu(emulate_lasx_d_xr2vr_h((__m256i)v), 15), ++ __lsx_vsat_wu(emulate_lasx_d_xr2vr_l((__m256i)v), 15)); + } + + SI U8 pack(U16 v) { +@@ -974,12 +972,12 @@ namespace SK_OPTS_NS { + } + + SI bool any(I32 c){ +- v8i32 retv = (v8i32)__lasx_xvmskltz_w(__lasx_xvslt_wu(__lasx_xvldi(0), c)); ++ v8i32 retv = (v8i32)__lasx_xvmskltz_w(__lasx_xvslt_wu(__lasx_xvldi(0), (__m256i)c)); + return (retv[0] | retv[4]) != 0b0000; + } + + SI bool all(I32 c){ +- v8i32 retv = (v8i32)__lasx_xvmskltz_w(__lasx_xvslt_wu(__lasx_xvldi(0), c)); ++ v8i32 retv = (v8i32)__lasx_xvmskltz_w(__lasx_xvslt_wu(__lasx_xvldi(0), (__m256i)c)); + return (retv[0] & retv[4]) == 0b1111; + } + +@@ -1012,16 +1010,16 @@ namespace SK_OPTS_NS { + } + + SI void load2(const uint16_t* ptr, U16* r, U16* g) { +- U16 _0123 = __lsx_vld(ptr, 0), +- _4567 = __lsx_vld(ptr, 16); +- *r = __lsx_vpickev_h(__lsx_vsat_w(__lsx_vsrai_w(__lsx_vslli_w(_4567, 16), 16), 15), +- __lsx_vsat_w(__lsx_vsrai_w(__lsx_vslli_w(_0123, 16), 16), 15)); +- *g = __lsx_vpickev_h(__lsx_vsat_w(__lsx_vsrai_w(_4567, 16), 15), +- __lsx_vsat_w(__lsx_vsrai_w(_0123, 16), 15)); ++ U16 _0123 = (U16)__lsx_vld(ptr, 0), ++ _4567 = (U16)__lsx_vld(ptr, 16); ++ *r = (U16)__lsx_vpickev_h(__lsx_vsat_w(__lsx_vsrai_w(__lsx_vslli_w(_4567, 16), 16), 15), ++ __lsx_vsat_w(__lsx_vsrai_w(__lsx_vslli_w(_0123, 16), 16), 15)); ++ *g = (U16)__lsx_vpickev_h(__lsx_vsat_w(__lsx_vsrai_w(_4567, 16), 15), ++ __lsx_vsat_w(__lsx_vsrai_w(_0123, 16), 15)); + } + SI void store2(uint16_t* ptr, U16 r, U16 g) { +- auto _0123 = __lsx_vilvl_h(g, r), +- _4567 = __lsx_vilvh_h(g, r); ++ auto _0123 = __lsx_vilvl_h((__m128i)g, (__m128i)r), ++ _4567 = __lsx_vilvh_h((__m128i)g, (__m128i)r); + __lsx_vst(_0123, ptr, 0); + __lsx_vst(_4567, ptr, 16); + } +@@ -1042,17 +1040,17 @@ namespace SK_OPTS_NS { + rg4567 = __lsx_vilvl_h(_57, _46), + ba4567 = __lsx_vilvh_h(_57, _46); + +- *r = __lsx_vilvl_d(rg4567, rg0123); +- *g = __lsx_vilvh_d(rg4567, rg0123); +- *b = __lsx_vilvl_d(ba4567, ba0123); +- *a = __lsx_vilvh_d(ba4567, ba0123); ++ *r = (U16)__lsx_vilvl_d(rg4567, rg0123); ++ *g = (U16)__lsx_vilvh_d(rg4567, rg0123); ++ *b = (U16)__lsx_vilvl_d(ba4567, ba0123); ++ *a = (U16)__lsx_vilvh_d(ba4567, ba0123); + } + + SI void store4(uint16_t* ptr, U16 r, U16 g, U16 b, U16 a) { +- auto rg0123 = __lsx_vilvl_h(g, r), // r0 g0 r1 g1 r2 g2 r3 g3 +- rg4567 = __lsx_vilvh_h(g, r), // r4 g4 r5 g5 r6 g6 r7 g7 +- ba0123 = __lsx_vilvl_h(a, b), +- ba4567 = __lsx_vilvh_h(a, b); ++ auto rg0123 = __lsx_vilvl_h((__m128i)g, (__m128i)r), // r0 g0 r1 g1 r2 g2 r3 g3 ++ rg4567 = __lsx_vilvh_h((__m128i)g, (__m128i)r), // r4 g4 r5 g5 r6 g6 r7 g7 ++ ba0123 = __lsx_vilvl_h((__m128i)a, (__m128i)b), ++ ba4567 = __lsx_vilvh_h((__m128i)a, (__m128i)b); + + auto _01 =__lsx_vilvl_w(ba0123, rg0123), + _23 =__lsx_vilvh_w(ba0123, rg0123), +@@ -1135,29 +1133,29 @@ namespace SK_OPTS_NS { + sk_bit_cast<__m128i>(c))); + } + +- SI F min(F a, F b) { return __lsx_vfmin_s(a,b); } +- SI F max(F a, F b) { return __lsx_vfmax_s(a,b); } +- SI I32 min(I32 a, I32 b) { return __lsx_vmin_w(a,b); } +- SI U32 min(U32 a, U32 b) { return __lsx_vmin_wu(a,b); } +- SI I32 max(I32 a, I32 b) { return __lsx_vmax_w(a,b); } +- SI U32 max(U32 a, U32 b) { return __lsx_vmax_wu(a,b); } ++ SI F min(F a, F b) { return (F)__lsx_vfmin_s((__m128)a, (__m128)b); } ++ SI F max(F a, F b) { return (F)__lsx_vfmax_s((__m128)a, (__m128)b); } ++ SI I32 min(I32 a, I32 b) { return (I32)__lsx_vmin_w((__m128i)a, (__m128i)b); } ++ SI U32 min(U32 a, U32 b) { return (U32)__lsx_vmin_wu((__m128i)a, (__m128i)b); } ++ SI I32 max(I32 a, I32 b) { return (I32)__lsx_vmax_w((__m128i)a, (__m128i)b); } ++ SI U32 max(U32 a, U32 b) { return (U32)__lsx_vmax_wu((__m128i)a, (__m128i)b); } + +- SI F mad(F f, F m, F a) { return __lsx_vfmadd_s(f, m, a); } +- SI F nmad(F f, F m, F a) { return __lsx_vfmadd_s(-f, m, a); } +- SI F abs_(F v) { return (F)__lsx_vand_v((I32)v, (I32)(0-v)); } ++ SI F mad(F f, F m, F a) { return (F)__lsx_vfmadd_s((__m128)f, (__m128)m, (__m128)a); } ++ SI F nmad(F f, F m, F a) { return (F)__lsx_vfmadd_s((__m128)(-f), (__m128)m, (__m128)a); } ++ SI F abs_(F v) { return (F)__lsx_vand_v((__m128i)(I32)v, (__m128i)(I32)(0-v)); } + SI I32 abs_(I32 v) { return max(v, -v); } +- SI F rcp_approx (F v) { return __lsx_vfrecip_s(v); } ++ SI F rcp_approx (F v) { return (F)__lsx_vfrecip_s((__m128)v); } + SI F rcp_precise (F v) { F e = rcp_approx(v); return e * nmad(v, e, F() + 2.0f); } +- SI F rsqrt_approx (F v) { return __lsx_vfrsqrt_s(v); } +- SI F sqrt_(F v) { return __lsx_vfsqrt_s (v); } ++ SI F rsqrt_approx (F v) { return (F)__lsx_vfrsqrt_s((__m128)v); } ++ SI F sqrt_(F v) { return (F)__lsx_vfsqrt_s ((__m128)v); } + + SI U32 iround(F v) { +- F t = F() + 0.5f; +- return __lsx_vftintrz_w_s(v + t); } ++ return (U32)__lsx_vftintrz_w_s(v + 0.5f); ++ } + + SI U32 round(F v) { +- F t = F() + 0.5f; +- return __lsx_vftintrz_w_s(v + t); } ++ return (U32)__lsx_vftintrz_w_s(v + 0.5f); ++ } + + SI U32 round(F v, F scale) { + F t = F() + 0.5f; +@@ -1177,12 +1175,12 @@ namespace SK_OPTS_NS { + } + + SI bool any(I32 c){ +- v4i32 retv = (v4i32)__lsx_vmskltz_w(__lsx_vslt_wu(__lsx_vldi(0), c)); ++ v4i32 retv = (v4i32)__lsx_vmskltz_w(__lsx_vslt_wu(__lsx_vldi(0), (__m128i)c)); + return retv[0] != 0b0000; + } + + SI bool all(I32 c){ +- v4i32 retv = (v4i32)__lsx_vmskltz_w(__lsx_vslt_wu(__lsx_vldi(0), c)); ++ v4i32 retv = (v4i32)__lsx_vmskltz_w(__lsx_vslt_wu(__lsx_vldi(0), (__m128i)c)); + return retv[0] == 0b1111; + } + +@@ -1229,7 +1227,7 @@ namespace SK_OPTS_NS { + } + + SI void store2(uint16_t* ptr, U16 r, U16 g) { +- U32 rg = __lsx_vilvl_h(widen_cast<__m128i>(g), widen_cast<__m128i>(r)); ++ U32 rg = (U32)__lsx_vilvl_h(widen_cast<__m128i>(g), widen_cast<__m128i>(r)); + __lsx_vst(rg, ptr, 0); + } + +@@ -3402,35 +3400,35 @@ SI void gradient_lookup(const SkRasterPipeline_GradientCtx* c, U32 idx, F t, + } else + #elif defined(SKRP_CPU_LASX) + if (c->stopCount <= 8) { +- fr = (__m256)__lasx_xvperm_w(__lasx_xvld(c->fs[0], 0), idx); +- br = (__m256)__lasx_xvperm_w(__lasx_xvld(c->bs[0], 0), idx); +- fg = (__m256)__lasx_xvperm_w(__lasx_xvld(c->fs[1], 0), idx); +- bg = (__m256)__lasx_xvperm_w(__lasx_xvld(c->bs[1], 0), idx); +- fb = (__m256)__lasx_xvperm_w(__lasx_xvld(c->fs[2], 0), idx); +- bb = (__m256)__lasx_xvperm_w(__lasx_xvld(c->bs[2], 0), idx); +- fa = (__m256)__lasx_xvperm_w(__lasx_xvld(c->fs[3], 0), idx); +- ba = (__m256)__lasx_xvperm_w(__lasx_xvld(c->bs[3], 0), idx); ++ fr = (F)__lasx_xvperm_w(__lasx_xvld(c->fs[0], 0), (__m256i)idx); ++ br = (F)__lasx_xvperm_w(__lasx_xvld(c->bs[0], 0), (__m256i)idx); ++ fg = (F)__lasx_xvperm_w(__lasx_xvld(c->fs[1], 0), (__m256i)idx); ++ bg = (F)__lasx_xvperm_w(__lasx_xvld(c->bs[1], 0), (__m256i)idx); ++ fb = (F)__lasx_xvperm_w(__lasx_xvld(c->fs[2], 0), (__m256i)idx); ++ bb = (F)__lasx_xvperm_w(__lasx_xvld(c->bs[2], 0), (__m256i)idx); ++ fa = (F)__lasx_xvperm_w(__lasx_xvld(c->fs[3], 0), (__m256i)idx); ++ ba = (F)__lasx_xvperm_w(__lasx_xvld(c->bs[3], 0), (__m256i)idx); + } else + #elif defined(SKRP_CPU_LSX) + if (c->stopCount <= 4) { + __m128i zero = __lsx_vldi(0); +- fr = (__m128)__lsx_vshuf_w(idx, zero, __lsx_vld(c->fs[0], 0)); +- br = (__m128)__lsx_vshuf_w(idx, zero, __lsx_vld(c->bs[0], 0)); +- fg = (__m128)__lsx_vshuf_w(idx, zero, __lsx_vld(c->fs[1], 0)); +- bg = (__m128)__lsx_vshuf_w(idx, zero, __lsx_vld(c->bs[1], 0)); +- fb = (__m128)__lsx_vshuf_w(idx, zero, __lsx_vld(c->fs[2], 0)); +- bb = (__m128)__lsx_vshuf_w(idx, zero, __lsx_vld(c->bs[2], 0)); +- fa = (__m128)__lsx_vshuf_w(idx, zero, __lsx_vld(c->fs[3], 0)); +- ba = (__m128)__lsx_vshuf_w(idx, zero, __lsx_vld(c->bs[3], 0)); ++ fr = (F)__lsx_vshuf_w((__m128i)idx, zero, __lsx_vld(c->fs[0], 0)); ++ br = (F)__lsx_vshuf_w((__m128i)idx, zero, __lsx_vld(c->bs[0], 0)); ++ fg = (F)__lsx_vshuf_w((__m128i)idx, zero, __lsx_vld(c->fs[1], 0)); ++ bg = (F)__lsx_vshuf_w((__m128i)idx, zero, __lsx_vld(c->bs[1], 0)); ++ fb = (F)__lsx_vshuf_w((__m128i)idx, zero, __lsx_vld(c->fs[2], 0)); ++ bb = (F)__lsx_vshuf_w((__m128i)idx, zero, __lsx_vld(c->bs[2], 0)); ++ fa = (F)__lsx_vshuf_w((__m128i)idx, zero, __lsx_vld(c->fs[3], 0)); ++ ba = (F)__lsx_vshuf_w((__m128i)idx, zero, __lsx_vld(c->bs[3], 0)); + } else + #endif + { + #if defined(SKRP_CPU_LSX) + // This can reduce some vpickve2gr instructions. +- int i0 = __lsx_vpickve2gr_w(idx, 0); +- int i1 = __lsx_vpickve2gr_w(idx, 1); +- int i2 = __lsx_vpickve2gr_w(idx, 2); +- int i3 = __lsx_vpickve2gr_w(idx, 3); ++ int i0 = __lsx_vpickve2gr_w((__m128i)idx, 0); ++ int i1 = __lsx_vpickve2gr_w((__m128i)idx, 1); ++ int i2 = __lsx_vpickve2gr_w((__m128i)idx, 2); ++ int i3 = __lsx_vpickve2gr_w((__m128i)idx, 3); + fr = gather((int *)c->fs[0], i0, i1, i2, i3); + br = gather((int *)c->bs[0], i0, i1, i2, i3); + fg = gather((int *)c->fs[1], i0, i1, i2, i3); +@@ -5942,7 +5940,7 @@ SI void from_8888(U32 rgba, U16* r, U16* g, U16* b, U16* a) { + split(v, &_02,&_13); + __m256i tmp0 = __lasx_xvsat_wu(_02, 15); + __m256i tmp1 = __lasx_xvsat_wu(_13, 15); +- return __lasx_xvpickev_h(tmp1, tmp0); ++ return (U16)__lasx_xvpickev_h(tmp1, tmp0); + }; + #elif defined(SKRP_CPU_LSX) + __m128i _01, _23, rg, ba; +@@ -5952,10 +5950,10 @@ SI void from_8888(U32 rgba, U16* r, U16* g, U16* b, U16* a) { + + __m128i mask_00ff = __lsx_vreplgr2vr_h(0xff); + +- *r = __lsx_vand_v(rg, mask_00ff); +- *g = __lsx_vsrli_h(rg, 8); +- *b = __lsx_vand_v(ba, mask_00ff); +- *a = __lsx_vsrli_h(ba, 8); ++ *r = (U16)__lsx_vand_v(rg, mask_00ff); ++ *g = (U16)__lsx_vsrli_h(rg, 8); ++ *b = (U16)__lsx_vand_v(ba, mask_00ff); ++ *a = (U16)__lsx_vsrli_h(ba, 8); + #else + auto cast_U16 = [](U32 v) -> U16 { + return cast<U16>(v); +@@ -5983,26 +5981,26 @@ SI void load_8888_(const uint32_t* ptr, U16* r, U16* g, U16* b, U16* a) { + SI void store_8888_(uint32_t* ptr, U16 r, U16 g, U16 b, U16 a) { + #if defined(SKRP_CPU_LSX) + __m128i mask = __lsx_vreplgr2vr_h(255); +- r = __lsx_vmin_hu(r, mask); +- g = __lsx_vmin_hu(g, mask); +- b = __lsx_vmin_hu(b, mask); +- a = __lsx_vmin_hu(a, mask); ++ r = (U16)__lsx_vmin_hu((__m128i)r, mask); ++ g = (U16)__lsx_vmin_hu((__m128i)g, mask); ++ b = (U16)__lsx_vmin_hu((__m128i)b, mask); ++ a = (U16)__lsx_vmin_hu((__m128i)a, mask); + +- g = __lsx_vslli_h(g, 8); ++ g = (U16)__lsx_vslli_h(g, 8); + r = r | g; +- a = __lsx_vslli_h(a, 8); ++ a = (U16)__lsx_vslli_h(a, 8); + a = a | b; + + __m128i r_lo = __lsx_vsllwil_wu_hu(r, 0); +- __m128i r_hi = __lsx_vexth_wu_hu(r); ++ __m128i r_hi = __lsx_vexth_wu_hu((__m128i)r); + __m128i a_lo = __lsx_vsllwil_wu_hu(a, 0); +- __m128i a_hi = __lsx_vexth_wu_hu(a); ++ __m128i a_hi = __lsx_vexth_wu_hu((__m128i)a); + + a_lo = __lsx_vslli_w(a_lo, 16); + a_hi = __lsx_vslli_w(a_hi, 16); + +- r = r_lo | a_lo; +- a = r_hi | a_hi; ++ r = (U16)(r_lo | a_lo); ++ a = (U16)(r_hi | a_hi); + store(ptr, join<U32>(r, a)); + #else + r = min(r, 255); +@@ -6568,8 +6566,8 @@ STAGE_GP(bilerp_clamp_8888, const SkRasterPipeline_GatherCtx* ctx) { + qy_lo = __lsx_vxor_v(qy_lo, temp); + qy_hi = __lsx_vxor_v(qy_hi, temp); + +- I16 tx = __lsx_vpickev_h(qx_hi, qx_lo); +- I16 ty = __lsx_vpickev_h(qy_hi, qy_lo); ++ I16 tx = (I16)__lsx_vpickev_h(qx_hi, qx_lo); ++ I16 ty = (I16)__lsx_vpickev_h(qy_hi, qy_lo); + #else + I16 tx = cast<I16>(qx ^ 0x8000), + ty = cast<I16>(qy ^ 0x8000); +-- +2.48.1 + diff --git a/external/skia/UnpackedTarball_skia.mk b/external/skia/UnpackedTarball_skia.mk index 60481be5de23..cc7f0dd29633 100644 --- a/external/skia/UnpackedTarball_skia.mk +++ b/external/skia/UnpackedTarball_skia.mk @@ -41,6 +41,10 @@ skia_patches := \ help-msvc-analyzer.patch \ always_inline_and_multiversioning_conflict.patch.1 \ windows-define-conflict.patch.1 \ + 0001-loong64-fix-some-bugs-in-type-conversion-on-Loongarc.patch \ + 0002-loong64-Honor-existing-LASX-LSX-settings.patch \ + 0003-loong64-Fix-missing-rounding-in-loong64-scaled_mult-.patch \ + 0004-loong64-Fix-the-remaining-implicit-vector-casts.patch \ $(eval $(call gb_UnpackedTarball_set_patchlevel,skia,1))