On Thu, Aug 23, 2018 at 08:13:13PM +0200, Martin Vignali wrote: [...] > swscale_internal.h | 2 - > utils.c | 70 > ++++++++++++++++++++++++++++++++++++++++++++++++++--- > 2 files changed, 68 insertions(+), 4 deletions(-) > 911c6d681b09ab719e2c8abe337887732c28b65e > 0003-swscale-add-bit_exact-lut-creation-for-8bit-to-float.patch > From e7b78d6416189a72695dac0680782a987c84b274 Mon Sep 17 00:00:00 2001 > From: Martin Vignali <martin.vign...@gmail.com> > Date: Thu, 23 Aug 2018 18:40:54 +0200 > Subject: [PATCH 3/4] swscale : add bit_exact lut creation for 8bit to float > > --- > libswscale/swscale_internal.h | 2 +- > libswscale/utils.c | 70 > +++++++++++++++++++++++++++++++++++++++++-- > 2 files changed, 68 insertions(+), 4 deletions(-) > > diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h > index 3b6b682d5f..a9cd7bc8d7 100644 > --- a/libswscale/swscale_internal.h > +++ b/libswscale/swscale_internal.h > @@ -336,7 +336,7 @@ typedef struct SwsContext { > uint32_t pal_yuv[256]; > uint32_t pal_rgb[256]; > > - float uint2float_lut[256]; > + float *uint2float_lut; /*! store uint8 to float or uint16 to float */ > > /** > * @name Scaled horizontal lines ring buffer. > diff --git a/libswscale/utils.c b/libswscale/utils.c > index 5e56371180..ce65467c35 100644 > --- a/libswscale/utils.c > +++ b/libswscale/utils.c > @@ -1098,6 +1098,69 @@ static uint16_t * alloc_gamma_tbl(double e) > return tbl; > } > > +static void inline fill_uint_to_float_lut(SwsContext *c, int bitdepth) { > + static const float float_mult8 = 1.0f / 255.0f; > + int i; > + > + if (bitdepth == 8) { /*! fill uint8 to float lut */ > + for (i = 0; i < 256; ++i){ > + c->uint2float_lut[i] = (float)i * float_mult8; > + } > + } else { /*! unsupported bitdepth */ > + av_assert0(0); > + } > +} > + > +#define SIGN_EXP_MANT_TO_UINT32(sign, exp, mant) sign << 31 | exp << 23 | > mant > + > +static void inline fill_uint_to_float_lut_bitexact(SwsContext *c, int > bitdepth) { > + int i, j, exp, mant, div; > + uint32_t off_coeff_mant; > + uint32_t coeff_mant; > + uint32_t *lut = (uint32_t *)c->uint2float_lut; > + int min_loop = 1; > + int max_loop = 2; > + > + if (bitdepth == 8) { /*! fill uint8 to float lut */ > + lut[0] = 0; > + lut[1] = SIGN_EXP_MANT_TO_UINT32(0, 119, 32897); > + > + exp = 119; /*! initial exp */ > + off_coeff_mant = coeff_mant = 4210752; > + > + for (j = 0; j < 7; ++j) { > + exp++; > + min_loop *= 2; > + max_loop *= 2; > + div = (max_loop - 1 - min_loop); > + > + for (i = min_loop; i < max_loop; ++i) { > + mant = (i - min_loop) * coeff_mant / div + 32897; > + lut[i] = SIGN_EXP_MANT_TO_UINT32(0, exp, mant); > + } > + > + if (j < 6) { > + off_coeff_mant >>= 1; > + coeff_mant |= off_coeff_mant; > + } > + } > + lut[255] = SIGN_EXP_MANT_TO_UINT32(0, 127, 0); > + } else { /*! unsupported bitdepth */ > + av_assert0(0); > + } > +} > +
> +static void alloc_uint_to_float_lut(SwsContext *c, int bitdepth) { > + c->uint2float_lut = (float*)av_malloc(sizeof(float) * 1 << bitdepth); > + if (!c->uint2float_lut) > + return; > + if (c->flags & SWS_BITEXACT) { > + fill_uint_to_float_lut_bitexact(c, bitdepth); > + } else { > + fill_uint_to_float_lut(c, bitdepth); > + } is this complexity really needed ? does the LUT generation code produce different results on platforms ? if so i would suggest to try to use double and to add a small offset if needed a 8bit table has 256 entries, a 16bit table 65536 a difference would occur if a source value from 64bit floats gets rounded differently to 32bit floats. If this occurs a small offset could be added so that none of the 65536 cases end up close to being between 2 32bit floats This would avoid teh rather complex code if it works thx [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Democracy is the form of government in which you can choose your dictator
signature.asc
Description: PGP signature
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel