Hello FFmpeg Team, I am submitting the completed work from my Google Summer of Code project, which focuses on updating the SiTi filter for ITU-T P.910 07/22 compliance and adding HDR support.
Attached is the patch for your review. I appreciate your feedback and suggestions.
From e33fa1928164d986b5b62c02f00fab788a1f2f3b Mon Sep 17 00:00:00 2001 From: PoorvaGaikar <2003gaikarpoorva.com> Date: Wed, 17 Jul 2024 23:36:56 +0530 Subject: [PATCH] libavfilter/siti: Update SITI filter to align with ITU-T P.910 07/22 and add HDR support --- libavfilter/vf_siti.c | 217 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 208 insertions(+), 9 deletions(-) diff --git a/libavfilter/vf_siti.c b/libavfilter/vf_siti.c index 722e7cecc7..2bbcaa0afd 100644 --- a/libavfilter/vf_siti.c +++ b/libavfilter/vf_siti.c @@ -47,6 +47,19 @@ static const int Y_FILTER[9] = { -1, -2, -1 }; +static const float PU21_MATRICES[4][7] = { + // Reference: "PU21: A novel perceptually uniform encoding for adapting existing quality metrics for HDR" + // Rafał K. Mantiuk and M. Azimi, Picture Coding Symposium 2021 + // https://github.com/gfxdisp/pu21 + {1.070275272f, 0.4088273932f, 0.153224308f, 0.2520326168f, 1.063512885f, 1.14115047f, 521.4527484f}, // BANDING + {0.353487901f, 0.3734658629f, 8.277049286e-05f, 0.9062562627f, 0.09150303166f, 0.9099517204f, 596.3148142f}, // BANDING_GLARE + {1.043882782f, 0.6459495343f, 0.3194584211f, 0.374025247f, 1.114783422f, 1.095360363f, 384.9217577f}, // PEAKS + {816.885024f, 1479.463946f, 0.001253215609f, 0.9329636822f, 0.06746643971f, 1.573435413f, 419.6006374f} // PEAKS_GLARE +}; + +static const float PU21_MIN_VALUES[4] = {-1.5580e-07f, 5.4705e-10f, 1.3674e-07f, -9.7360e-08f}; +static const float PU21_MAX_VALUES[4] = {520.4673f, 595.3939f, 380.9853f, 407.5066f}; + typedef struct SiTiContext { const AVClass *class; int pixel_depth; @@ -63,8 +76,45 @@ typedef struct SiTiContext { float *motion_matrix; int full_range; int print_summary; + int hdr_mode; + int bit_depth; + int color_range; + int eotf_function; + int calculation_domain; + float l_max; + float l_min; + float gamma; + int pu21_mode; } SiTiContext; +enum HdrMode { + SDR, + HDR10, + HLG +}; + +enum ColorRange { + LIMITED, + FULL +}; + +enum EotfFunction { + BT1886, + INV_SRGB +}; + +enum CalculationDomain { + PQ, + PU21 +}; + +enum Pu21Mode { + BANDING, + BANDING_GLARE, + PEAKS, + PEAKS_GLARE +}; + static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, @@ -78,6 +128,15 @@ static av_cold int init(AVFilterContext *ctx) SiTiContext *s = ctx->priv; s->max_si = 0; s->max_ti = 0; + s->hdr_mode = SDR; + s->bit_depth = 8; + s->color_range = LIMITED; + s->eotf_function = BT1886; + s->calculation_domain = PQ; + s->l_max = 300.0; + s->l_min = 0.1; + s->gamma = 2.4; + s->pu21_mode = BANDING; return 0; } @@ -166,6 +225,105 @@ static uint16_t convert_full_range(int factor, uint16_t y) return (full_upper * limit_y / limit_upper); } +// EOTF for BT.1886 +static float eotf_1886(float x, float gamma, float l_min, float l_max) +{ + // Reference: ITU-R BT.1886, Annex 1 + // https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.1886-0-201103-I!!PDF-E.pdf + const float l_max_gamma = powf(l_max, 1.0f / gamma); + const float l_min_gamma = powf(l_min, 1.0f / gamma); + const float l_diff_gamma = l_max_gamma - l_min_gamma; + const float a = powf(l_diff_gamma, gamma); + const float b = l_min_gamma / l_diff_gamma; + const float adjusted_x = fmaxf(x + b, 0.0f); + + return a * powf(adjusted_x, gamma); +} + +static float eotf_inv_srgb(float x) +{ + // Inverse sRGB EOTF (Electro-Optical Transfer Function) according to IEC 61966-2-1:1999 + // Section G.2 (Encoding transformation) + // Reference: https://cdn.standards.iteh.ai/samples/10795/ae461684569b40bbbb2d9a22b1047f05/IEC-61966-2-1-1999-AMD1-2003.pdf + return (x <= 0.04045f) ? x / 12.92f : powf((x + 0.055f) / 1.055f, 2.4f); +} + +static float apply_display_model(SiTiContext *s, float x) +{ + // Apply either BT.1886 or inverse sRGB EOTF based on the context + if (s->eotf_function == BT1886) { + // BT.1886 EOTF + // Reference: ITU-R BT.1886-0, Annex 1 + // https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.1886-0-201103-I!!PDF-E.pdf + return eotf_1886(x, s->gamma, 0.0f, 1.0f); + } else { + // Inverse sRGB EOTF + return eotf_inv_srgb(x); + } +} + +static float oetf_pq(float x) +{ + // PQ (Perceptual Quantizer) OETF according to ITU-R BT.2100-2 + // Reference: https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.2100-2-201807-I!!PDF-E.pdf + // See page 5, Table 4 + + const float m1 = 0.1593017578125f; + const float m2 = 78.84375f; + const float c1 = 0.8359375f; + const float c2 = 18.8515625f; + const float c3 = 18.6875f; + + float y = powf(x / 10000.0f, m1); + return powf((c1 + c2 * y) / (1.0f + c3 * y), m2); +} + +static float oetf_pu21(float x, int mode); + +static float oetf_pu21_wrapper(float x) { + int mode = 0; // Set the appropriate mode or pass it as a parameter + return oetf_pu21(x, mode); +} + +static float oetf_pu21(float x, int mode) +{ + // Reference: "PU21: A novel perceptually uniform encoding for adapting existing quality metrics for HDR" + // Rafał K. Mantiuk and M. Azimi, Picture Coding Symposium 2021 + // https://github.com/gfxdisp/pu21 + // Validate the mode + const float *p; + float p_min; + float p_max; + float numerator; + float denominator; + float base; + float exponentiated; + float scaled; + + if (mode < 0 || mode > 3) { + return x; // Invalid mode, return input unchanged + } + + // Get the parameters and min/max values for the given mode + + + p = PU21_MATRICES[mode]; + p_min = PU21_MIN_VALUES[mode]; + p_max = PU21_MAX_VALUES[mode]; + + // Declare all other variables at the beginning of the block + numerator = p[0] + p[1] * powf(x, p[3]); + denominator = 1.0f + p[2] * powf(x, p[3]); + base = numerator / denominator; + + // Calculate the exponentiated and scaled value + exponentiated = powf(base, p[4]); + scaled = p[6] * (exponentiated - p[5]); + + // Normalize the result to the range [0, 1] + return (scaled - p_min) / (p_max - p_min); +} + // Applies sobel convolution static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int linesize) { @@ -186,20 +344,20 @@ static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int l #define CONVOLVE(bps) \ { \ uint##bps##_t *vsrc = (uint##bps##_t*)src; \ - for (int j = 1; j < s->height - 1; j++) { \ - for (int i = 1; i < s->width - 1; i++) { \ + for (int img_j = 1; img_j < s->height - 1; img_j++) { \ + for (int img_i = 1; img_i < s->width - 1; img_i++) { \ x_conv_sum = 0.0; \ y_conv_sum = 0.0; \ for (int k = 0; k < filter_size; k++) { \ ki = k % filter_width - 1; \ kj = floor(k / filter_width) - 1; \ - index = (j + kj) * stride + (i + ki); \ + index = (img_j + kj) * stride + (img_i + ki); \ data = s->full_range ? vsrc[index] : convert_full_range(factor, vsrc[index]); \ x_conv_sum += data * X_FILTER[k]; \ y_conv_sum += data * Y_FILTER[k]; \ } \ gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum); \ - dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient; \ + dst[(img_j - 1) * (s->width - 2) + (img_i - 1)] = gradient; \ } \ } \ } @@ -254,15 +412,16 @@ static float std_deviation(float *img_metrics, int width, int height) int size = height * width; double mean = 0.0; double sqr_diff = 0; + int j, i; - for (int j = 0; j < height; j++) - for (int i = 0; i < width; i++) + for (j = 0; j < height; j++) + for (i = 0; i < width; i++) mean += img_metrics[j * width + i]; mean /= size; - for (int j = 0; j < height; j++) { - for (int i = 0; i < width; i++) { + for (j = 0; j < height; j++) { + for (i = 0; i < width; i++) { float mean_diff = img_metrics[j * width + i] - mean; sqr_diff += (mean_diff * mean_diff); } @@ -285,15 +444,46 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) float si; float ti; - s->full_range = is_full_range(frame); + s->full_range = is_full_range(frame); // Determine if full range is needed s->nb_frames++; + // Apply EOTF and OETF transformations + for (int idx = 0; idx < s->width * s->height; idx++) { + float pixel = ((uint8_t*)frame->data[0])[idx] / 255.0f; + float eotf = apply_display_model(s, pixel); + float oetf = oetf_pq(eotf * s->l_max); + ((uint8_t*)frame->data[0])[idx] = (uint8_t)(oetf * 255.0f); + } + // Calculate si and ti convolve_sobel(s, frame->data[0], s->gradient_matrix, frame->linesize[0]); calculate_motion(s, frame->data[0], s->motion_matrix, frame->linesize[0]); si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2); ti = std_deviation(s->motion_matrix, s->width, s->height); + // Apply HDR transformations if necessary + if (s->hdr_mode != SDR) { + float (*oetf_func)(float); + if (s->calculation_domain == PQ) { + oetf_func = oetf_pq; + } else { + oetf_func = oetf_pu21_wrapper; + } + + for (int i = 0; i < (s->width - 2) * (s->height - 2); i++) { + s->gradient_matrix[i] = oetf_func(apply_display_model(s, s->gradient_matrix[i] / 255.0f)) * 255.0f; + } + for (int i = 0; i < s->width * s->height; i++) { + s->motion_matrix[i] = oetf_func(apply_display_model(s, s->motion_matrix[i] / 255.0f)) * 255.0f; + } + + si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2); + ti = std_deviation(s->motion_matrix, s->width, s->height); + } + + // Print SI and TI values for each frame + av_log(ctx, AV_LOG_INFO, "Frame %"PRId64" - SI: %.6f, TI: %.6f\n", s->nb_frames, si, ti); + // Calculate statistics s->max_si = fmaxf(si, s->max_si); s->max_ti = fmaxf(ti, s->max_ti); @@ -314,6 +504,15 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) static const AVOption siti_options[] = { { "print_summary", "Print summary showing average values", OFFSET(print_summary), AV_OPT_TYPE_BOOL, { .i64=0 }, 0, 1, FLAGS }, + { "hdr_mode", "HDR mode (0: SDR, 1: HDR10, 2: HLG)", OFFSET(hdr_mode), AV_OPT_TYPE_INT, {.i64=0}, 0, 2, FLAGS }, + { "bit_depth", "Bit depth (8, 10, or 12)", OFFSET(bit_depth), AV_OPT_TYPE_INT, {.i64=8}, 8, 12, FLAGS }, + { "color_range", "Color range (0: limited, 1: full)", OFFSET(color_range), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS }, + { "eotf_function", "EOTF function (0: BT.1886, 1: Inverse sRGB)", OFFSET(eotf_function), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS }, + { "calculation_domain", "Calculation domain (0: PQ, 1: PU21)", OFFSET(calculation_domain), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS }, + { "l_max", "Maximum luminance", OFFSET(l_max), AV_OPT_TYPE_FLOAT, {.dbl=300.0}, 0, 10000, FLAGS }, + { "l_min", "Minimum luminance", OFFSET(l_min), AV_OPT_TYPE_FLOAT, {.dbl=0.1}, 0, 1, FLAGS }, + { "gamma", "Gamma value for BT.1886", OFFSET(gamma), AV_OPT_TYPE_FLOAT, {.dbl=2.4}, 1, 3, FLAGS }, + { "pu21_mode", "PU21 mode (0: BANDING, 1: BANDING_GLARE, 2: PEAKS, 3: PEAKS_GLARE)", OFFSET(pu21_mode), AV_OPT_TYPE_INT, {.i64=0}, 0, 3, FLAGS }, { NULL } }; -- 2.43.0
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".