From e7b78d6416189a72695dac0680782a987c84b274 Mon Sep 17 00:00:00 2001
From: Martin Vignali <martin.vignali@gmail.com>
Date: Thu, 23 Aug 2018 18:40:54 +0200
Subject: [PATCH 3/4] swscale : add bit_exact lut creation for 8bit to float

---
 libswscale/swscale_internal.h |  2 +-
 libswscale/utils.c            | 70 +++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 68 insertions(+), 4 deletions(-)

diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 3b6b682d5f..a9cd7bc8d7 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -336,7 +336,7 @@ typedef struct SwsContext {
     uint32_t pal_yuv[256];
     uint32_t pal_rgb[256];
 
-    float uint2float_lut[256];
+    float *uint2float_lut; /*! store uint8 to float or uint16 to float */
 
     /**
      * @name Scaled horizontal lines ring buffer.
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 5e56371180..ce65467c35 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -1098,6 +1098,69 @@ static uint16_t * alloc_gamma_tbl(double e)
     return tbl;
 }
 
+static void inline fill_uint_to_float_lut(SwsContext *c, int bitdepth) {
+    static const float float_mult8 = 1.0f / 255.0f;
+    int i;
+
+    if (bitdepth == 8) { /*! fill uint8 to float lut */
+        for (i = 0; i < 256; ++i){
+            c->uint2float_lut[i] = (float)i * float_mult8;
+        }
+    } else { /*! unsupported bitdepth */
+        av_assert0(0);
+    }
+}
+
+#define SIGN_EXP_MANT_TO_UINT32(sign, exp, mant) sign << 31 | exp << 23 | mant
+
+static void inline fill_uint_to_float_lut_bitexact(SwsContext *c, int bitdepth) {
+    int i, j, exp, mant, div;
+    uint32_t off_coeff_mant;
+    uint32_t coeff_mant;
+    uint32_t *lut = (uint32_t *)c->uint2float_lut;
+    int min_loop = 1;
+    int max_loop = 2;
+
+    if (bitdepth == 8) { /*! fill uint8 to float lut */
+        lut[0] = 0;
+        lut[1] = SIGN_EXP_MANT_TO_UINT32(0, 119, 32897);
+
+        exp = 119; /*! initial exp */
+        off_coeff_mant = coeff_mant = 4210752;
+
+        for (j = 0; j < 7; ++j) {
+            exp++;
+            min_loop *= 2;
+            max_loop *= 2;
+            div = (max_loop - 1 - min_loop);
+
+            for (i = min_loop; i < max_loop; ++i) {
+                mant = (i - min_loop) * coeff_mant / div + 32897;
+                lut[i] = SIGN_EXP_MANT_TO_UINT32(0, exp, mant);
+            }
+
+            if (j < 6) {
+                off_coeff_mant >>= 1;
+                coeff_mant |= off_coeff_mant;
+            }
+        }
+        lut[255] = SIGN_EXP_MANT_TO_UINT32(0, 127, 0);
+    } else { /*! unsupported bitdepth */
+            av_assert0(0);
+    }
+}
+
+static void alloc_uint_to_float_lut(SwsContext *c, int bitdepth) {
+    c->uint2float_lut = (float*)av_malloc(sizeof(float) * 1 << bitdepth);
+    if (!c->uint2float_lut)
+        return;
+    if (c->flags & SWS_BITEXACT) {
+        fill_uint_to_float_lut_bitexact(c, bitdepth);
+    } else {
+        fill_uint_to_float_lut(c, bitdepth);
+    }
+}
+
 static enum AVPixelFormat alphaless_fmt(enum AVPixelFormat fmt)
 {
     switch(fmt) {
@@ -1175,7 +1238,6 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
     const AVPixFmtDescriptor *desc_dst;
     int ret = 0;
     enum AVPixelFormat tmpFmt;
-    static const float float_mult = 1.0f / 255.0f;
 
     cpu_flags = av_get_cpu_flags();
     flags     = c->flags;
@@ -1541,8 +1603,9 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
     }
 
     if (unscaled && c->srcBpc == 8 && dstFormat == AV_PIX_FMT_GRAYF32){
-        for (i = 0; i < 256; ++i){
-            c->uint2float_lut[i] = (float)i * float_mult;
+        alloc_uint_to_float_lut(c, c->srcBpc);
+        if (!c->uint2float_lut) {
+            return AVERROR(ENOMEM);
         }
     }
 
@@ -2357,6 +2420,7 @@ void sws_freeContext(SwsContext *c)
 
     av_freep(&c->gamma);
     av_freep(&c->inv_gamma);
+    av_freep(&c->uint2float_lut);
 
     ff_free_filters(c);
 
-- 
2.14.3 (Apple Git-98)