Apr 4, 2024, 04:58 by andreas.rheinha...@outlook.com: > Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com> > --- > libswscale/ppc/swscale_ppc_template.c | 107 +++++++++++++------------- > 1 file changed, 53 insertions(+), 54 deletions(-) > > diff --git a/libswscale/ppc/swscale_ppc_template.c > b/libswscale/ppc/swscale_ppc_template.c > index e9abd33cbf..3c2addd4a4 100644 > --- a/libswscale/ppc/swscale_ppc_template.c > +++ b/libswscale/ppc/swscale_ppc_template.c > @@ -101,70 +101,69 @@ static void FUNC(hScale_real)(SwsContext *c, int16_t > *dst, int dstW, > const uint8_t *src, const int16_t *filter, > const int32_t *filterPos, int filterSize) > { > - register int i; > LOCAL_ALIGNED(16, int, tempo, [4]); > > - switch (filterSize) { > - case 4: > - for (i = 0; i < dstW; i++) { > - register int srcPos = filterPos[i]; > - > - vector unsigned char src_vF = unaligned_load(srcPos, src); > - vector signed short src_v, filter_v; > - vector signed int val_vEven, val_s; > - src_v = // vec_unpackh sign-extends... > - (vector signed short)(VEC_MERGEH((vector unsigned > char)vzero, src_vF)); > - // now put our elements in the even slots > - src_v = vec_mergeh(src_v, (vector signed short)vzero); > - GET_VF4(i, filter_v, filter); > - val_vEven = vec_mule(src_v, filter_v); > - val_s = vec_sums(val_vEven, vzero); > - vec_st(val_s, 0, tempo); > - dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); > - } > + switch (filterSize) { > + case 4: > + for (register int i = 0; i < dstW; i++) { > + register int srcPos = filterPos[i]; > + > + vector unsigned char src_vF = unaligned_load(srcPos, src); > + vector signed short src_v, filter_v; > + vector signed int val_vEven, val_s; > + src_v = // vec_unpackh sign-extends... > + (vector signed short)(VEC_MERGEH((vector unsigned > char)vzero, src_vF)); > + // now put our elements in the even slots > + src_v = vec_mergeh(src_v, (vector signed short)vzero); > + GET_VF4(i, filter_v, filter); > + val_vEven = vec_mule(src_v, filter_v); > + val_s = vec_sums(val_vEven, vzero); > + vec_st(val_s, 0, tempo); > + dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); > + } > break; > - case 8: > - for (i = 0; i < dstW; i++) { > - register int srcPos = filterPos[i]; > - vector unsigned char src_vF, av_unused src_v0, av_unused > src_v1; > - vector unsigned char av_unused permS; > - vector signed short src_v, filter_v; > - vector signed int val_v, val_s; > - FIRST_LOAD(src_v0, srcPos, src, permS); > - LOAD_SRCV8(srcPos, 0, src, permS, src_v0, src_v1, src_vF); > - src_v = // vec_unpackh sign-extends... > - (vector signed short)(VEC_MERGEH((vector unsigned > char)vzero, src_vF)); > - filter_v = vec_ld(i << 4, filter); > - val_v = vec_msums(src_v, filter_v, (vector signed int)vzero); > - val_s = vec_sums(val_v, vzero); > - vec_st(val_s, 0, tempo); > - dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); > - } > + case 8: > + for (register int i = 0; i < dstW; i++) { > + register int srcPos = filterPos[i]; > + vector unsigned char src_vF, av_unused src_v0, av_unused src_v1; > + vector unsigned char av_unused permS; > + vector signed short src_v, filter_v; > + vector signed int val_v, val_s; > + FIRST_LOAD(src_v0, srcPos, src, permS); > + LOAD_SRCV8(srcPos, 0, src, permS, src_v0, src_v1, src_vF); > + src_v = // vec_unpackh sign-extends... > + (vector signed short)(VEC_MERGEH((vector unsigned > char)vzero, src_vF)); > + filter_v = vec_ld(i << 4, filter); > + val_v = vec_msums(src_v, filter_v, (vector signed int)vzero); > + val_s = vec_sums(val_v, vzero); > + vec_st(val_s, 0, tempo); > + dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); > + } > break; > > - case 16: > - for (i = 0; i < dstW; i++) { > - register int srcPos = filterPos[i]; > + case 16: > + for (register int i = 0; i < dstW; i++) { > + register int srcPos = filterPos[i]; > > - vector unsigned char src_vF = unaligned_load(srcPos, src); > - vector signed short src_vA = // vec_unpackh sign-extends... > - (vector signed > short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); > - vector signed short src_vB = // vec_unpackh sign-extends... > - (vector signed > short)(VEC_MERGEL((vector unsigned char)vzero, src_vF)); > - vector signed short filter_v0 = vec_ld(i << 5, filter); > - vector signed short filter_v1 = vec_ld((i << 5) + 16, > filter); > + vector unsigned char src_vF = unaligned_load(srcPos, src); > + vector signed short src_vA = // vec_unpackh sign-extends... > + (vector signed > short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); > + vector signed short src_vB = // vec_unpackh sign-extends... > + (vector signed > short)(VEC_MERGEL((vector unsigned char)vzero, src_vF)); > + vector signed short filter_v0 = vec_ld(i << 5, filter); > + vector signed short filter_v1 = vec_ld((i << 5) + 16, filter); > > - vector signed int val_acc = vec_msums(src_vA, filter_v0, > (vector signed int)vzero); > - vector signed int val_v = vec_msums(src_vB, filter_v1, > val_acc); > + vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector > signed int)vzero); > + vector signed int val_v = vec_msums(src_vB, filter_v1, > val_acc); > > - vector signed int val_s = vec_sums(val_v, vzero); > + vector signed int val_s = vec_sums(val_v, vzero); > > - VEC_ST(val_s, 0, tempo); > - dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); > - } > + VEC_ST(val_s, 0, tempo); > + dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); > + } > break; > > - default: > + default: > for (register int i = 0; i < dstW; i++) { > register int j; > register int srcPos = filterPos[i]; > @@ -174,5 +173,5 @@ static void FUNC(hScale_real)(SwsContext *c, int16_t > *dst, int dstW, > dst[i] = FFMIN(val >> 7, (1 << 15) - 1); > } > break; > - } > + } > } >
Patchset LGTM. I missed those in the previous broken PPC code removal I did. _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".