Stone Chen: > To prepare for adding AVX2 functions for different block widths, change > VVCInterDSPContext to contain (*sad[6]) instead of (*sad). This also default > initializes the pointer array with the scalar function and the calling sites > to jump to the correct function based on block width. There's no change in > functionality. > --- > libavcodec/vvc/dsp.h | 2 +- > libavcodec/vvc/inter.c | 4 ++-- > libavcodec/vvc/inter_template.c | 5 ++++- > 3 files changed, 7 insertions(+), 4 deletions(-) > > diff --git a/libavcodec/vvc/dsp.h b/libavcodec/vvc/dsp.h > index 9810ac314c..b06a3ef10e 100644 > --- a/libavcodec/vvc/dsp.h > +++ b/libavcodec/vvc/dsp.h > @@ -86,7 +86,7 @@ typedef struct VVCInterDSPContext { > > void (*apply_bdof)(uint8_t *dst, ptrdiff_t dst_stride, int16_t *src0, > int16_t *src1, int block_w, int block_h); > > - int (*sad)(const int16_t *src0, const int16_t *src1, int dx, int dy, int > block_w, int block_h); > + int (*sad[6])(const int16_t *src0, const int16_t *src1, int dx, int dy, > int block_w, int block_h); > void (*dmvr[2][2])(int16_t *dst, const uint8_t *src, ptrdiff_t > src_stride, int height, > intptr_t mx, intptr_t my, int width); > } VVCInterDSPContext; > diff --git a/libavcodec/vvc/inter.c b/libavcodec/vvc/inter.c > index 4a8d1d866a..a68f4f9452 100644 > --- a/libavcodec/vvc/inter.c > +++ b/libavcodec/vvc/inter.c > @@ -742,7 +742,7 @@ static void dmvr_mv_refine(VVCLocalContext *lc, MvField > *mvf, MvField *orig_mv, > fc->vvcdsp.inter.dmvr[!!my][!!mx](tmp[i], src, src_stride, pred_h, > mx, my, pred_w); > } > > - min_sad = fc->vvcdsp.inter.sad(tmp[L0], tmp[L1], dx, dy, block_w, > block_h); > + min_sad = fc->vvcdsp.inter.sad[av_log2(block_w) - 2](tmp[L0], tmp[L1], > dx, dy, block_w, block_h); > min_sad -= min_sad >> 2; > sad[dy][dx] = min_sad; > > @@ -752,7 +752,7 @@ static void dmvr_mv_refine(VVCLocalContext *lc, MvField > *mvf, MvField *orig_mv, > for (dy = 0; dy < SAD_ARRAY_SIZE; dy++) { > for (dx = 0; dx < SAD_ARRAY_SIZE; dx++) { > if (dx != sr_range || dy != sr_range) { > - sad[dy][dx] = fc->vvcdsp.inter.sad(lc->tmp, lc->tmp1, > dx, dy, block_w, block_h); > + sad[dy][dx] = fc->vvcdsp.inter.sad[av_log2(block_w) - > 2](lc->tmp, lc->tmp1, dx, dy, block_w, block_h); > if (sad[dy][dx] < min_sad) { > min_sad = sad[dy][dx]; > min_dx = dx; > diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c > index e2fbfd4fc0..545e8dd184 100644 > --- a/libavcodec/vvc/inter_template.c > +++ b/libavcodec/vvc/inter_template.c > @@ -458,7 +458,10 @@ static void > FUNC(ff_vvc_inter_dsp_init)(VVCInterDSPContext *const inter) > inter->apply_prof_uni_w = FUNC(apply_prof_uni_w); > inter->apply_bdof = FUNC(apply_bdof); > inter->prof_grad_filter = FUNC(prof_grad_filter); > - inter->sad = vvc_sad; > + > + for (int i = 0; i < FF_ARRAY_ELEMS(inter->sad); i++) { > + inter->sad[i] = vvc_sad; > + } > } > > #undef FUNCS
Why is the jump depending upon block width not performed inside your avx2 implementation? - Andreas _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".