On Wed, Dec 27, 2017 at 01:11:56AM +0000, Kieran Kunhya wrote: > For MPEG-4 Simple Studio Profile, I need to be able to support int32_t > input coeffcients to the IDCT functions. I have attempted to implement this > with the attached patch. > Any comments would be appreciated, I'm pretty sure it is not optimal as-is. > > Regards, > Kieran Kunhya
> bit_depth_template.c | 17 ++++++++++++++++- > idctdsp.c | 18 +++++++++--------- > me_cmp.c | 2 +- > simple_idct.c | 15 ++++++++++++--- > simple_idct.h | 24 ++++++++++++++---------- > simple_idct_template.c | 43 ++++++++++++++++++++++++++++--------------- > vc1.c | 4 ++-- > 7 files changed, 82 insertions(+), 41 deletions(-) > b69afd1419eafb71e999874a220369b08c01d931 > 0001-simple_idct-Template-functions-to-support-an-input-b.patch > From 9675ff0714df15e433dbe78d6e40c2430c21b519 Mon Sep 17 00:00:00 2001 > From: Kieran Kunhya <kie...@kunhya.com> > Date: Wed, 27 Dec 2017 01:08:39 +0000 > Subject: [PATCH] simple_idct: Template functions to support an input bitdepth > parameter > > --- > libavcodec/bit_depth_template.c | 17 +++++++++++++++- > libavcodec/idctdsp.c | 18 ++++++++-------- > libavcodec/me_cmp.c | 2 +- > libavcodec/simple_idct.c | 15 +++++++++++--- > libavcodec/simple_idct.h | 24 +++++++++++++--------- > libavcodec/simple_idct_template.c | 43 > +++++++++++++++++++++++++-------------- > libavcodec/vc1.c | 4 ++-- > 7 files changed, 82 insertions(+), 41 deletions(-) > > diff --git a/libavcodec/bit_depth_template.c b/libavcodec/bit_depth_template.c > index 8018489..bd7237f 100644 > --- a/libavcodec/bit_depth_template.c > +++ b/libavcodec/bit_depth_template.c > @@ -29,6 +29,7 @@ > # undef pixel2 > # undef pixel4 > # undef dctcoef > +# undef idctin > # undef INIT_CLIP > # undef no_rnd_avg_pixel4 > # undef rnd_avg_pixel4 > @@ -53,6 +54,16 @@ > # define pixel4 uint64_t > # define dctcoef int32_t > > +#ifdef IN_IDCT_DEPTH > +#if IN_IDCT_DEPTH == 32 > +# define idctin int32_t > +#else > +# define idctin int16_t > +#endif > +#else > +# define idctin int16_t > +#endif > + > # define INIT_CLIP > # define no_rnd_avg_pixel4 no_rnd_avg64 > # define rnd_avg_pixel4 rnd_avg64 > @@ -71,6 +82,7 @@ > # define pixel2 uint16_t > # define pixel4 uint32_t > # define dctcoef int16_t > +# define idctin int16_t > > # define INIT_CLIP > # define no_rnd_avg_pixel4 no_rnd_avg32 > @@ -87,7 +99,10 @@ > # define CLIP(a) av_clip_uint8(a) > #endif > > -#define FUNC3(a, b, c) a ## _ ## b ## c > +#define FUNC3(a, b, c) a ## _ ## b ## c > #define FUNC2(a, b, c) FUNC3(a, b, c) > #define FUNC(a) FUNC2(a, BIT_DEPTH,) > #define FUNCC(a) FUNC2(a, BIT_DEPTH, _c) > +#define FUNC4(a, b, c) a ## _ ## b ## _ ## c > +#define FUNC5(a, b, c) FUNC4(a, b, c) > +#define FUNC6(a) FUNC5(a, IN_IDCT_DEPTH, BIT_DEPTH) > \ No newline at end of file > diff --git a/libavcodec/idctdsp.c b/libavcodec/idctdsp.c > index 0ff74d8..16703aa 100644 > --- a/libavcodec/idctdsp.c > +++ b/libavcodec/idctdsp.c > @@ -256,14 +256,14 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, > AVCodecContext *avctx) > c->perm_type = FF_IDCT_PERM_NONE; > } else { > if (avctx->bits_per_raw_sample == 10 || avctx->bits_per_raw_sample > == 9) { > - c->idct_put = ff_simple_idct_put_10; > - c->idct_add = ff_simple_idct_add_10; > - c->idct = ff_simple_idct_10; > + c->idct_put = ff_simple_idct_put_16_10; > + c->idct_add = ff_simple_idct_add_16_10; > + c->idct = ff_simple_idct_16_10; please call the functions ff_simple_idct_int16_10bit or something that makes it clear what the 2 numbers mean. [...] > diff --git a/libavcodec/simple_idct_template.c > b/libavcodec/simple_idct_template.c > index f532313..8d60b50 100644 > --- a/libavcodec/simple_idct_template.c > +++ b/libavcodec/simple_idct_template.c > @@ -77,6 +77,10 @@ > #define ROW_SHIFT 13 > #define COL_SHIFT 18 > #define DC_SHIFT 1 > +# elif IN_IDCT_DEPTH == 32 > +#define ROW_SHIFT 13 > +#define COL_SHIFT 21 > +#define DC_SHIFT 2 > # else > #define ROW_SHIFT 12 > #define COL_SHIFT 19 > @@ -109,11 +113,12 @@ > #ifdef EXTRA_SHIFT > static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int > extra_shift) > #else > -static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) > +static inline void FUNC6(idctRowCondDC)(idctin *row, int extra_shift) > #endif > { > SUINT a0, a1, a2, a3, b0, b1, b2, b3; > > +#if IN_IDCT_DEPTH == 16 > #if HAVE_FAST_64BIT > #define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN) > if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) { > @@ -148,6 +153,7 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int > extra_shift) > return; > } > #endif > +#endif > > a0 = (W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1)); > a1 = a0; > @@ -168,7 +174,11 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int > extra_shift) > b3 = MUL(W7, row[1]); > MAC(b3, -W5, row[3]); > > +#if IN_IDCT_DEPTH == 32 > + if (1) { is that faster than checking row 4-7 for 0 with sparse matrixes as occuring in video data ? [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Awnsering whenever a program halts or runs forever is On a turing machine, in general impossible (turings halting problem). On any real computer, always possible as a real computer has a finite number of states N, and will either halt in less than N cycles or never halt.
signature.asc
Description: PGP signature
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel