The reason for the failure is that the function yuv2rgb_1_c_template was modified in 095f8038fa9180842cd38d4d61c7c47a02aad9ed. The corresponding functional test was added in c601bb8df5ae896061c42dd0e23cea8fba938ecb. The code on loongarch was not updated in a timely manner, resulting in the error.
Signed-off-by: yuanhecai <yuanhe...@loongson.cn> --- libswscale/loongarch/output_lasx.c | 45 ++++++---------------- libswscale/loongarch/output_lsx.c | 61 +++++++++++------------------- 2 files changed, 35 insertions(+), 71 deletions(-) diff --git a/libswscale/loongarch/output_lasx.c b/libswscale/loongarch/output_lasx.c index 21d0a501b0..801cc70bd2 100644 --- a/libswscale/loongarch/output_lasx.c +++ b/libswscale/loongarch/output_lasx.c @@ -637,7 +637,7 @@ yuv2rgb_1_template_lasx(SwsInternal *c, const int16_t *buf0, int len_count = (dstW + 1) >> 1; const void *r, *g, *b; - if (uvalpha < 2048) { + if (uvalpha == 0) { int count = 0; int head = YUVRGB_TABLE_HEADROOM; __m256i headroom = __lasx_xvreplgr2vr_h(head); @@ -706,14 +706,17 @@ yuv2rgb_1_template_lasx(SwsInternal *c, const int16_t *buf0, const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; int count = 0; int HEADROOM = YUVRGB_TABLE_HEADROOM; - __m256i headroom = __lasx_xvreplgr2vr_w(HEADROOM); + int uvalpha1 = 4096 - uvalpha; + __m256i headroom = __lasx_xvreplgr2vr_w(HEADROOM); + __m256i uvalpha_tmp1 = __lasx_xvreplgr2vr_h(uvalpha1); + __m256i uvalpha_tmp = __lasx_xvreplgr2vr_h(uvalpha); for (i = 0; i < len; i += 16) { int Y1, Y2, U, V; int i_dex = i << 1; int c_dex = count << 1; __m256i src_y, src_u0, src_v0, src_u1, src_v1; - __m256i y_l, y_h, u, v; + __m256i y_l, y_h, u, v, u_ev, v_od; DUP4_ARG2(__lasx_xvldx, buf0, i_dex, ubuf0, c_dex, vbuf0, c_dex, ubuf1, c_dex, src_y, src_u0, src_v0, src_u1); @@ -721,12 +724,14 @@ yuv2rgb_1_template_lasx(SwsInternal *c, const int16_t *buf0, src_u0 = __lasx_xvpermi_q(src_u0, src_v0, 0x02); src_u1 = __lasx_xvpermi_q(src_u1, src_v1, 0x02); src_y = __lasx_xvsrari_h(src_y, 7); - u = __lasx_xvaddwev_w_h(src_u0, src_u1); - v = __lasx_xvaddwod_w_h(src_u0, src_u1); + u_ev = __lasx_xvmulwev_w_h(src_u0, uvalpha_tmp1); + v_od = __lasx_xvmulwod_w_h(src_u0, uvalpha_tmp1); + u = __lasx_xvmaddwev_w_h(u_ev, src_u1, uvalpha_tmp); + v = __lasx_xvmaddwod_w_h(v_od, src_u1, uvalpha_tmp); y_l = __lasx_xvsllwil_w_h(src_y, 0); y_h = __lasx_xvexth_w_h(src_y); - u = __lasx_xvsrari_w(u, 8); - v = __lasx_xvsrari_w(v, 8); + u = __lasx_xvsrari_w(u, 19); + v = __lasx_xvsrari_w(v, 19); u = __lasx_xvadd_w(u, headroom); v = __lasx_xvadd_w(v, headroom); WRITE_YUV2RGB(y_l, y_l, u, u, 0, 1, 0, 4); @@ -738,32 +743,6 @@ yuv2rgb_1_template_lasx(SwsInternal *c, const int16_t *buf0, WRITE_YUV2RGB(y_h, y_h, u, u, 4, 5, 3, 7); WRITE_YUV2RGB(y_h, y_h, v, v, 6, 7, 3, 7); } - if (dstW - i >= 8) { - int Y1, Y2, U, V; - int i_dex = i << 1; - __m256i src_y, src_u0, src_v0, src_u1, src_v1; - __m256i uv; - - src_y = __lasx_xvldx(buf0, i_dex); - src_u0 = __lasx_xvldrepl_d((ubuf0 + count), 0); - src_v0 = __lasx_xvldrepl_d((vbuf0 + count), 0); - src_u1 = __lasx_xvldrepl_d((ubuf1 + count), 0); - src_v1 = __lasx_xvldrepl_d((vbuf1 + count), 0); - - src_u0 = __lasx_xvilvl_h(src_u1, src_u0); - src_v0 = __lasx_xvilvl_h(src_v1, src_v0); - src_u0 = __lasx_xvpermi_q(src_u0, src_v0, 0x02); - src_y = __lasx_xvsrari_h(src_y, 7); - uv = __lasx_xvhaddw_w_h(src_u0, src_u0); - src_y = __lasx_vext2xv_w_h(src_y); - uv = __lasx_xvsrari_w(uv, 8); - uv = __lasx_xvadd_w(uv, headroom); - WRITE_YUV2RGB(src_y, src_y, uv, uv, 0, 1, 0, 4); - WRITE_YUV2RGB(src_y, src_y, uv, uv, 2, 3, 1, 5); - WRITE_YUV2RGB(src_y, src_y, uv, uv, 4, 5, 2, 6); - WRITE_YUV2RGB(src_y, src_y, uv, uv, 6, 7, 3, 7); - i += 8; - } for (; count < len_count; count++) { int Y1 = (buf0[count * 2 ] + 64) >> 7; int Y2 = (buf0[count * 2 + 1] + 64) >> 7; diff --git a/libswscale/loongarch/output_lsx.c b/libswscale/loongarch/output_lsx.c index 24e6de5535..6c8e0c816c 100644 --- a/libswscale/loongarch/output_lsx.c +++ b/libswscale/loongarch/output_lsx.c @@ -595,7 +595,7 @@ yuv2rgb_1_template_lsx(SwsInternal *c, const int16_t *buf0, int len_count = (dstW + 1) >> 1; const void *r, *g, *b; - if (uvalpha < 2048) { + if (uvalpha == 0) { int count = 0; int head = YUVRGB_TABLE_HEADROOM; __m128i headroom = __lsx_vreplgr2vr_h(head); @@ -659,61 +659,46 @@ yuv2rgb_1_template_lsx(SwsInternal *c, const int16_t *buf0, const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; int count = 0; int HEADROOM = YUVRGB_TABLE_HEADROOM; + int uvalpha1 = 4096 - uvalpha; __m128i headroom = __lsx_vreplgr2vr_w(HEADROOM); + __m128i uvalpha_tmp1 = __lsx_vreplgr2vr_h(uvalpha1); + __m128i uvalpha_tmp = __lsx_vreplgr2vr_h(uvalpha); for (i = 0; i < len; i += 8) { int Y1, Y2, U, V; int i_dex = i << 1; int c_dex = count << 1; __m128i src_y, src_u0, src_v0, src_u1, src_v1; - __m128i y_l, y_h, u1, u2, v1, v2; + __m128i y_l, y_h, u1, u2, v1, v2, u_ev, v_od; DUP4_ARG2(__lsx_vldx, buf0, i_dex, ubuf0, c_dex, vbuf0, c_dex, ubuf1, c_dex, src_y, src_u0, src_v0, src_u1); src_v1 = __lsx_vldx(vbuf1, c_dex); src_y = __lsx_vsrari_h(src_y, 7); - u1 = __lsx_vaddwev_w_h(src_u0, src_u1); - v1 = __lsx_vaddwod_w_h(src_u0, src_u1); - u2 = __lsx_vaddwev_w_h(src_v0, src_v1); - v2 = __lsx_vaddwod_w_h(src_v0, src_v1); + + u_ev = __lsx_vmulwev_w_h(src_u0, uvalpha_tmp1); + v_od = __lsx_vmulwod_w_h(src_u0, uvalpha_tmp1); + u1 = __lsx_vmaddwev_w_h(u_ev, src_u1, uvalpha_tmp); + v1 = __lsx_vmaddwod_w_h(v_od, src_u1, uvalpha_tmp); + u_ev = __lsx_vmulwev_w_h(src_v0, uvalpha_tmp1); + v_od = __lsx_vmulwod_w_h(src_v0, uvalpha_tmp1); + u2 = __lsx_vmaddwev_w_h(u_ev, src_v1, uvalpha_tmp); + v2 = __lsx_vmaddwod_w_h(v_od, src_v1, uvalpha_tmp); + y_l = __lsx_vsllwil_w_h(src_y, 0); y_h = __lsx_vexth_w_h(src_y); - u1 = __lsx_vsrari_w(u1, 8); - v1 = __lsx_vsrari_w(v1, 8); - u2 = __lsx_vsrari_w(u2, 8); - v2 = __lsx_vsrari_w(v2, 8); + u1 = __lsx_vsrari_w(u1, 19); + v1 = __lsx_vsrari_w(v1, 19); + u2 = __lsx_vsrari_w(u2, 19); + v2 = __lsx_vsrari_w(v2, 19); u1 = __lsx_vadd_w(u1, headroom); v1 = __lsx_vadd_w(v1, headroom); u2 = __lsx_vadd_w(u2, headroom); v2 = __lsx_vadd_w(v2, headroom); - WRITE_YUV2RGB_LSX(y_l, y_l, u1, v1, 0, 1, 0, 0); - WRITE_YUV2RGB_LSX(y_l, y_l, u2, v2, 2, 3, 0, 0); - WRITE_YUV2RGB_LSX(y_h, y_h, u1, v1, 0, 1, 1, 1); - WRITE_YUV2RGB_LSX(y_h, y_h, u2, v2, 2, 3, 1, 1); - } - if (dstW - i >= 4) { - int Y1, Y2, U, V; - int i_dex = i << 1; - __m128i src_y, src_u0, src_v0, src_u1, src_v1; - __m128i uv; - - src_y = __lsx_vldx(buf0, i_dex); - src_u0 = __lsx_vldrepl_d((ubuf0 + count), 0); - src_v0 = __lsx_vldrepl_d((vbuf0 + count), 0); - src_u1 = __lsx_vldrepl_d((ubuf1 + count), 0); - src_v1 = __lsx_vldrepl_d((vbuf1 + count), 0); - - src_u0 = __lsx_vilvl_h(src_u1, src_u0); - src_v0 = __lsx_vilvl_h(src_v1, src_v0); - src_y = __lsx_vsrari_h(src_y, 7); - src_y = __lsx_vsllwil_w_h(src_y, 0); - uv = __lsx_vilvl_h(src_v0, src_u0); - uv = __lsx_vhaddw_w_h(uv, uv); - uv = __lsx_vsrari_w(uv, 8); - uv = __lsx_vadd_w(uv, headroom); - WRITE_YUV2RGB_LSX(src_y, src_y, uv, uv, 0, 1, 0, 1); - WRITE_YUV2RGB_LSX(src_y, src_y, uv, uv, 2, 3, 2, 3); - i += 4; + WRITE_YUV2RGB_LSX(y_l, y_l, u1, u2, 0, 1, 0, 0); + WRITE_YUV2RGB_LSX(y_l, y_l, v1, v2, 2, 3, 0, 0); + WRITE_YUV2RGB_LSX(y_h, y_h, u1, u2, 0, 1, 1, 1); + WRITE_YUV2RGB_LSX(y_h, y_h, v1, v2, 2, 3, 1, 1); } for (; count < len_count; count++) { int Y1 = (buf0[count * 2 ] + 64) >> 7; -- 2.20.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".