Hello FFmpeg Team,

I am submitting the completed work from my Google Summer of Code project,
which focuses on updating the SiTi filter for ITU-T P.910 07/22 compliance
and adding HDR support.

Attached is the patch for your review. I appreciate your feedback and
suggestions.
From e33fa1928164d986b5b62c02f00fab788a1f2f3b Mon Sep 17 00:00:00 2001
From: PoorvaGaikar <2003gaikarpoorva.com>
Date: Wed, 17 Jul 2024 23:36:56 +0530
Subject: [PATCH] libavfilter/siti: Update SITI filter to align with ITU-T
 P.910 07/22 and add HDR support

---
 libavfilter/vf_siti.c | 217 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 208 insertions(+), 9 deletions(-)

diff --git a/libavfilter/vf_siti.c b/libavfilter/vf_siti.c
index 722e7cecc7..2bbcaa0afd 100644
--- a/libavfilter/vf_siti.c
+++ b/libavfilter/vf_siti.c
@@ -47,6 +47,19 @@ static const int Y_FILTER[9] = {
     -1, -2, -1
 };
 
+static const float PU21_MATRICES[4][7] = {
+    // Reference: "PU21: A novel perceptually uniform encoding for adapting existing quality metrics for HDR"
+    // Rafał K. Mantiuk and M. Azimi, Picture Coding Symposium 2021
+    // https://github.com/gfxdisp/pu21
+    {1.070275272f, 0.4088273932f, 0.153224308f, 0.2520326168f, 1.063512885f, 1.14115047f, 521.4527484f},  // BANDING
+    {0.353487901f, 0.3734658629f, 8.277049286e-05f, 0.9062562627f, 0.09150303166f, 0.9099517204f, 596.3148142f},  // BANDING_GLARE
+    {1.043882782f, 0.6459495343f, 0.3194584211f, 0.374025247f, 1.114783422f, 1.095360363f, 384.9217577f},  // PEAKS
+    {816.885024f, 1479.463946f, 0.001253215609f, 0.9329636822f, 0.06746643971f, 1.573435413f, 419.6006374f}  // PEAKS_GLARE
+};
+
+static const float PU21_MIN_VALUES[4] = {-1.5580e-07f, 5.4705e-10f, 1.3674e-07f, -9.7360e-08f};
+static const float PU21_MAX_VALUES[4] = {520.4673f, 595.3939f, 380.9853f, 407.5066f};
+
 typedef struct SiTiContext {
     const AVClass *class;
     int pixel_depth;
@@ -63,8 +76,45 @@ typedef struct SiTiContext {
     float *motion_matrix;
     int full_range;
     int print_summary;
+    int hdr_mode;
+    int bit_depth;
+    int color_range;
+    int eotf_function;
+    int calculation_domain;
+    float l_max;
+    float l_min;
+    float gamma;
+    int pu21_mode;
 } SiTiContext;
 
+enum HdrMode {
+    SDR,
+    HDR10,
+    HLG
+};
+
+enum ColorRange {
+    LIMITED,
+    FULL
+};
+
+enum EotfFunction {
+    BT1886,
+    INV_SRGB
+};
+
+enum CalculationDomain {
+    PQ,
+    PU21
+};
+
+enum Pu21Mode {
+    BANDING,
+    BANDING_GLARE,
+    PEAKS,
+    PEAKS_GLARE
+};
+
 static const enum AVPixelFormat pix_fmts[] = {
     AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
     AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
@@ -78,6 +128,15 @@ static av_cold int init(AVFilterContext *ctx)
     SiTiContext *s = ctx->priv;
     s->max_si = 0;
     s->max_ti = 0;
+    s->hdr_mode = SDR;
+    s->bit_depth = 8;
+    s->color_range = LIMITED;
+    s->eotf_function = BT1886;
+    s->calculation_domain = PQ;
+    s->l_max = 300.0;
+    s->l_min = 0.1;
+    s->gamma = 2.4;
+    s->pu21_mode = BANDING;
     return 0;
 }
 
@@ -166,6 +225,105 @@ static uint16_t convert_full_range(int factor, uint16_t y)
     return (full_upper * limit_y / limit_upper);
 }
 
+// EOTF for BT.1886
+static float eotf_1886(float x, float gamma, float l_min, float l_max)
+{
+    // Reference: ITU-R BT.1886, Annex 1
+    // https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.1886-0-201103-I!!PDF-E.pdf
+    const float l_max_gamma = powf(l_max, 1.0f / gamma);
+    const float l_min_gamma = powf(l_min, 1.0f / gamma);
+    const float l_diff_gamma = l_max_gamma - l_min_gamma;
+    const float a = powf(l_diff_gamma, gamma);
+    const float b = l_min_gamma / l_diff_gamma;
+    const float adjusted_x = fmaxf(x + b, 0.0f);
+
+    return a * powf(adjusted_x, gamma);
+}
+
+static float eotf_inv_srgb(float x)
+{
+    // Inverse sRGB EOTF (Electro-Optical Transfer Function) according to IEC 61966-2-1:1999
+    // Section G.2 (Encoding transformation)
+    // Reference: https://cdn.standards.iteh.ai/samples/10795/ae461684569b40bbbb2d9a22b1047f05/IEC-61966-2-1-1999-AMD1-2003.pdf
+    return (x <= 0.04045f) ? x / 12.92f : powf((x + 0.055f) / 1.055f, 2.4f);
+}
+
+static float apply_display_model(SiTiContext *s, float x)
+{
+    // Apply either BT.1886 or inverse sRGB EOTF based on the context
+    if (s->eotf_function == BT1886) {
+        // BT.1886 EOTF
+        // Reference: ITU-R BT.1886-0, Annex 1
+        // https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.1886-0-201103-I!!PDF-E.pdf
+        return eotf_1886(x, s->gamma, 0.0f, 1.0f);
+    } else {
+        // Inverse sRGB EOTF
+        return eotf_inv_srgb(x);
+    }
+}
+
+static float oetf_pq(float x)
+{
+    // PQ (Perceptual Quantizer) OETF according to ITU-R BT.2100-2
+    // Reference: https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.2100-2-201807-I!!PDF-E.pdf
+    // See page 5, Table 4
+
+    const float m1 = 0.1593017578125f;
+    const float m2 = 78.84375f;
+    const float c1 = 0.8359375f;
+    const float c2 = 18.8515625f;
+    const float c3 = 18.6875f;
+
+    float y = powf(x / 10000.0f, m1);
+    return powf((c1 + c2 * y) / (1.0f + c3 * y), m2);
+}
+
+static float oetf_pu21(float x, int mode);
+
+static float oetf_pu21_wrapper(float x) {
+    int mode = 0; // Set the appropriate mode or pass it as a parameter
+    return oetf_pu21(x, mode);
+}
+
+static float oetf_pu21(float x, int mode)
+{
+    // Reference: "PU21: A novel perceptually uniform encoding for adapting existing quality metrics for HDR"
+    // Rafał K. Mantiuk and M. Azimi, Picture Coding Symposium 2021
+    // https://github.com/gfxdisp/pu21
+    // Validate the mode
+    const float *p;
+    float p_min;
+    float p_max;
+    float numerator;
+    float denominator;
+    float base;
+    float exponentiated;
+    float scaled;
+
+    if (mode < 0 || mode > 3) {
+        return x;  // Invalid mode, return input unchanged
+    }
+
+    // Get the parameters and min/max values for the given mode
+
+
+    p = PU21_MATRICES[mode];
+    p_min = PU21_MIN_VALUES[mode];
+    p_max = PU21_MAX_VALUES[mode];
+
+    // Declare all other variables at the beginning of the block
+    numerator = p[0] + p[1] * powf(x, p[3]);
+    denominator = 1.0f + p[2] * powf(x, p[3]);
+    base = numerator / denominator;
+
+    // Calculate the exponentiated and scaled value
+    exponentiated = powf(base, p[4]);
+    scaled = p[6] * (exponentiated - p[5]);
+
+    // Normalize the result to the range [0, 1]
+    return (scaled - p_min) / (p_max - p_min);
+}
+
 // Applies sobel convolution
 static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int linesize)
 {
@@ -186,20 +344,20 @@ static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int l
     #define CONVOLVE(bps)                                           \
     {                                                               \
         uint##bps##_t *vsrc = (uint##bps##_t*)src;                  \
-        for (int j = 1; j < s->height - 1; j++) {                   \
-            for (int i = 1; i < s->width - 1; i++) {                \
+        for (int img_j = 1; img_j < s->height - 1; img_j++) {                   \
+            for (int img_i = 1; img_i < s->width - 1; img_i++) {                \
                 x_conv_sum = 0.0;                                   \
                 y_conv_sum = 0.0;                                   \
                 for (int k = 0; k < filter_size; k++) {             \
                     ki = k % filter_width - 1;                      \
                     kj = floor(k / filter_width) - 1;               \
-                    index = (j + kj) * stride + (i + ki);           \
+                    index = (img_j + kj) * stride + (img_i + ki);           \
                     data = s->full_range ? vsrc[index] : convert_full_range(factor, vsrc[index]); \
                     x_conv_sum += data * X_FILTER[k];               \
                     y_conv_sum += data * Y_FILTER[k];               \
                 }                                                   \
                 gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum); \
-                dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient; \
+                dst[(img_j - 1) * (s->width - 2) + (img_i - 1)] = gradient; \
             }                                                       \
         }                                                           \
     }
@@ -254,15 +412,16 @@ static float std_deviation(float *img_metrics, int width, int height)
     int size = height * width;
     double mean = 0.0;
     double sqr_diff = 0;
+    int j, i;
 
-    for (int j = 0; j < height; j++)
-        for (int i = 0; i < width; i++)
+    for (j = 0; j < height; j++)
+        for (i = 0; i < width; i++)
             mean += img_metrics[j * width + i];
 
     mean /= size;
 
-    for (int j = 0; j < height; j++) {
-        for (int i = 0; i < width; i++) {
+    for (j = 0; j < height; j++) {
+        for (i = 0; i < width; i++) {
             float mean_diff = img_metrics[j * width + i] - mean;
             sqr_diff += (mean_diff * mean_diff);
         }
@@ -285,15 +444,46 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
     float si;
     float ti;
 
-    s->full_range = is_full_range(frame);
+    s->full_range = is_full_range(frame); // Determine if full range is needed
     s->nb_frames++;
 
+    // Apply EOTF and OETF transformations
+    for (int idx = 0; idx < s->width * s->height; idx++) {
+        float pixel = ((uint8_t*)frame->data[0])[idx] / 255.0f;
+        float eotf = apply_display_model(s, pixel);
+        float oetf = oetf_pq(eotf * s->l_max);
+        ((uint8_t*)frame->data[0])[idx] = (uint8_t)(oetf * 255.0f);
+    }
+
     // Calculate si and ti
     convolve_sobel(s, frame->data[0], s->gradient_matrix, frame->linesize[0]);
     calculate_motion(s, frame->data[0], s->motion_matrix, frame->linesize[0]);
     si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2);
     ti = std_deviation(s->motion_matrix, s->width, s->height);
 
+    // Apply HDR transformations if necessary
+    if (s->hdr_mode != SDR) {
+        float (*oetf_func)(float);
+        if (s->calculation_domain == PQ) {
+            oetf_func = oetf_pq;
+        } else {
+            oetf_func = oetf_pu21_wrapper;
+        }
+
+        for (int i = 0; i < (s->width - 2) * (s->height - 2); i++) {
+            s->gradient_matrix[i] = oetf_func(apply_display_model(s, s->gradient_matrix[i] / 255.0f)) * 255.0f;
+        }
+        for (int i = 0; i < s->width * s->height; i++) {
+            s->motion_matrix[i] = oetf_func(apply_display_model(s, s->motion_matrix[i] / 255.0f)) * 255.0f;
+        }
+
+        si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2);
+        ti = std_deviation(s->motion_matrix, s->width, s->height);
+    }
+
+    // Print SI and TI values for each frame
+    av_log(ctx, AV_LOG_INFO, "Frame %"PRId64" - SI: %.6f, TI: %.6f\n", s->nb_frames, si, ti);
+
     // Calculate statistics
     s->max_si  = fmaxf(si, s->max_si);
     s->max_ti  = fmaxf(ti, s->max_ti);
@@ -314,6 +504,15 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
 
 static const AVOption siti_options[] = {
     { "print_summary", "Print summary showing average values", OFFSET(print_summary), AV_OPT_TYPE_BOOL, { .i64=0 }, 0, 1, FLAGS },
+    { "hdr_mode", "HDR mode (0: SDR, 1: HDR10, 2: HLG)", OFFSET(hdr_mode), AV_OPT_TYPE_INT, {.i64=0}, 0, 2, FLAGS },
+    { "bit_depth", "Bit depth (8, 10, or 12)", OFFSET(bit_depth), AV_OPT_TYPE_INT, {.i64=8}, 8, 12, FLAGS },
+    { "color_range", "Color range (0: limited, 1: full)", OFFSET(color_range), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
+    { "eotf_function", "EOTF function (0: BT.1886, 1: Inverse sRGB)", OFFSET(eotf_function), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
+    { "calculation_domain", "Calculation domain (0: PQ, 1: PU21)", OFFSET(calculation_domain), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
+    { "l_max", "Maximum luminance", OFFSET(l_max), AV_OPT_TYPE_FLOAT, {.dbl=300.0}, 0, 10000, FLAGS },
+    { "l_min", "Minimum luminance", OFFSET(l_min), AV_OPT_TYPE_FLOAT, {.dbl=0.1}, 0, 1, FLAGS },
+    { "gamma", "Gamma value for BT.1886", OFFSET(gamma), AV_OPT_TYPE_FLOAT, {.dbl=2.4}, 1, 3, FLAGS },
+    { "pu21_mode", "PU21 mode (0: BANDING, 1: BANDING_GLARE, 2: PEAKS, 3: PEAKS_GLARE)", OFFSET(pu21_mode), AV_OPT_TYPE_INT, {.i64=0}, 0, 3, FLAGS },
     { NULL }
 };
 
-- 
2.43.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to