On Tue, Feb 17, 2015 at 04:31:07PM +0800, Zhaoxiu Zeng wrote: > From bf2964c07fde48c633ca4d8276282010e7c7f084 Mon Sep 17 00:00:00 2001 > From: "zhaoxiu.zeng" <zhaoxiu.z...@gmail.com> > Date: Tue, 17 Feb 2015 16:03:47 +0800 > Subject: [PATCH 1/1] avcodec: change type of ff_square_tab from uint32_t to > uint16_t > > uint16_t is big enough except the first element, but the first element > is never used. > This also macroize nsse function, and use ff_square_tab when possible. > > Signed-off-by: zhaoxiu.zeng <zhaoxiu.z...@gmail.com> > --- > libavcodec/me_cmp.c | 94 > ++++++++++++++++++---------------------------- > libavcodec/me_cmp.h | 3 +- > libavcodec/mpegvideo_enc.c | 2 +- > libavcodec/snowenc.c | 2 +- > 4 files changed, 41 insertions(+), 60 deletions(-) > [...] > -static int nsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, > - ptrdiff_t stride, int h) > -{ > - int score1 = 0, score2 = 0, x, y; > - > - for (y = 0; y < h; y++) { > - for (x = 0; x < 16; x++) > - score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]); > - if (y + 1 < h) { > - for (x = 0; x < 15; x++) > - score2 += FFABS(s1[x] - s1[x + stride] - > - s1[x + 1] + s1[x + stride + 1]) - > - FFABS(s2[x] - s2[x + stride] - > - s2[x + 1] + s2[x + stride + 1]); > - } > - s1 += stride; > - s2 += stride; > - } > - > - if (c) > - return score1 + FFABS(score2) * c->avctx->nsse_weight; > - else > - return score1 + FFABS(score2) * 8; > -} > - > -static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, > - ptrdiff_t stride, int h) > -{ > - int score1 = 0, score2 = 0, x, y; > - > - for (y = 0; y < h; y++) { > - for (x = 0; x < 8; x++) > - score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]); > - if (y + 1 < h) { > - for (x = 0; x < 7; x++) > - score2 += FFABS(s1[x] - s1[x + stride] - > - s1[x + 1] + s1[x + stride + 1]) - > - FFABS(s2[x] - s2[x + stride] - > - s2[x + 1] + s2[x + stride + 1]); > - } > - s1 += stride; > - s2 += stride; > - } > - > - if (c) > - return score1 + FFABS(score2) * c->avctx->nsse_weight; > - else > - return score1 + FFABS(score2) * 8; > -} > +#define NSSE(size) \ > +static int nsse ## size ## _c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, \ > + ptrdiff_t stride, int h) \ > +{ \ > + int score1 = 0, score2 = 0, x, y; \ > + uint16_t *sq = ff_square_tab + 256; \ > + \ > + for (y = 0; y < h; y++) { \ > + for (x = 0; x < size; x++) \ > + score1 += sq[s1[x] - s2[x]]; \ > + if (y + 1 < h) { \ > + for (x = 0; x < size - 1; x++) \ > + score2 += FFABS(s1[x] - s1[x + stride] - \ > + s1[x + 1] + s1[x + stride + 1]) - \ > + FFABS(s2[x] - s2[x + stride] - \ > + s2[x + 1] + s2[x + stride + 1]); \ > + } \ > + s1 += stride; \ > + s2 += stride; \ > + } \ > + \ > + if (c) \ > + return score1 + FFABS(score2) * c->avctx->nsse_weight; \ > + else \ > + return score1 + FFABS(score2) * 8; \ > +} > +NSSE(8) > +NSSE(16) >
You can do better than this: static inline int nsse_base(MpegEncContext *c, uint8_t *s1, uint8_t *s2, ptrdiff_t stride, int h, const int size) { int score1 = 0, score2 = 0, x, y; for (y = 0; y < h; y++) { for (x = 0; x < size; x++) score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]); if (y + 1 < h) { for (x = 0; x < size - 1; x++) score2 += FFABS(s1[x] - s1[x + stride] - s1[x + 1] + s1[x + stride + 1]) - FFABS(s2[x] - s2[x + stride] - s2[x + 1] + s2[x + stride + 1]); } s1 += stride; s2 += stride; } if (c) return score1 + FFABS(score2) * c->avctx->nsse_weight; else return score1 + FFABS(score2) * 8; } #define DEFINE_NSSE_FUNC(s) \ static nsse_##s##_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, \ ptrdiff_t stride, int h) \ { \ nsse_base(c, s1, s2, stride, h, s); \ } DEFINE_NSSE_FUNC(8) DEFINE_NSSE_FUNC(16) (untested) [...] Way less macro code that way, so easier to maintain, and with the same result. -- Clément B.
pgpgdpdqVoZFs.pgp
Description: PGP signature
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel