On Mon, 28 Nov 2016, Kyle Swanson wrote:

On Thu, Nov 17, 2016 at 11:04 AM, Kyle Swanson <k...@ylo.ph> wrote:
Hi,

Here's a couple of patches which update the ebur128 filter to use the
recently added ebur128 API. This updated filter allows fine-tuned
control over which EBU R128 parameters are measured, and provides
modest speed increases over the previous ebur128 filter. Also
noteworthy: this removes the video output option of the ebur128
filter. This is extraneous for an ebur128 measurement filter IMHO, but
if we wanted to keep similar functionality in FFmpeg, we'd be better
served by a new video source filter where custom meters could be
created via exported frame metadata.

The first patch adds true peak functionality to the ebur128 API using
swresample (this was already discussed a little bit:
http://ffmpeg.org/pipermail/ffmpeg-devel/2016-November/202583.html)
The second patch is an update to the ebur128 filter.

Kyle

Does anyone have any problems with the first patch?

From 6912ed3a03cd19f46e96f1f4b9eb3aa69b7ce4df Mon Sep 17 00:00:00 2001
From: Kyle Swanson <k...@ylo.ph>
Date: Thu, 17 Nov 2016 10:32:45 -0600
Subject: [PATCH 1/2] lavfi/ebur128: add ebur128_check_true_peak()

Signed-off-by: Kyle Swanson <k...@ylo.ph>
---
 libavfilter/ebur128.c | 162 +++++++++++++++++++++++++++++++++++++++++++++++++-
 libavfilter/ebur128.h |  17 ++++++
 2 files changed, 177 insertions(+), 2 deletions(-)

diff --git a/libavfilter/ebur128.c b/libavfilter/ebur128.c
index a46692e..dc16647 100644
--- a/libavfilter/ebur128.c
+++ b/libavfilter/ebur128.c
@@ -50,6 +50,9 @@
 #include "libavutil/common.h"
 #include "libavutil/mem.h"
 #include "libavutil/thread.h"
+#include "libavutil/channel_layout.h"
+#include "libswresample/swresample.h"

Isn't this include needs an ifdef as well?

+#include "libavutil/opt.h"

 #define CHECK_ERROR(condition, errorcode, goto_point)                          
\
     if ((condition)) {                                                         
\
@@ -91,6 +94,16 @@ struct FFEBUR128StateInternal {
     size_t short_term_frame_counter;
     /** Maximum sample peak, one per channel */
     double *sample_peak;
+    /** Maximum true peak, one per channel */
+    double* true_peak;
+#if CONFIG_SWRESAMPLE
+    SwrContext *resampler;
+    size_t oversample_factor;
+    float* resampler_buffer_input;
+    size_t resampler_buffer_input_frames;
+    float* resampler_buffer_output;
+    size_t resampler_buffer_output_frames;
+#endif
     /** The maximum window duration in ms. */
     unsigned long window;
     /** Data pointer array for interleaved data */
@@ -214,12 +227,78 @@ static inline void init_histogram(void)
     }
 }

+#if CONFIG_SWRESAMPLE
+static int ebur128_init_resampler(FFEBUR128State* st) {
+    int64_t channel_layout;
+    int errcode;
+
+    if (st->samplerate < 96000) {
+        st->d->oversample_factor = 4;
+    } else if (st->samplerate < 192000) {
+        st->d->oversample_factor = 2;
+    } else {
+        st->d->oversample_factor = 1;
+        st->d->resampler_buffer_input = NULL;
+        st->d->resampler_buffer_output = NULL;
+        st->d->resampler = NULL;
+    }
+
+    st->d->resampler_buffer_input_frames = st->d->samples_in_100ms * 4;
+    st->d->resampler_buffer_input = 
av_malloc(st->d->resampler_buffer_input_frames *
+                                              st->channels *
+                                              sizeof(float));

av_malloc_array

+    CHECK_ERROR(!st->d->resampler_buffer_input, 0, exit)
+
+    st->d->resampler_buffer_output_frames =
+    st->d->resampler_buffer_input_frames *
+    st->d->oversample_factor;
+    st->d->resampler_buffer_output = 
av_malloc(st->d->resampler_buffer_output_frames *
+                                               st->channels *
+                                               sizeof(float));

av_malloc_array

+    CHECK_ERROR(!st->d->resampler_buffer_output, 0, free_input)
+
+    st->d->resampler = swr_alloc();
+    CHECK_ERROR(!st->d->resampler, 0, free_output)
+
+    channel_layout = av_get_default_channel_layout(st->channels);
+
+    av_opt_set_int(st->d->resampler, "in_channel_layout", channel_layout, 0);
+    av_opt_set_int(st->d->resampler, "in_sample_rate", st->samplerate, 0);
+    av_opt_set_sample_fmt(st->d->resampler, "in_sample_fmt", 
AV_SAMPLE_FMT_FLT, 0);
+    av_opt_set_int(st->d->resampler, "out_channel_layout", channel_layout, 0);
+    av_opt_set_int(st->d->resampler, "out_sample_rate", st->samplerate * 
st->d->oversample_factor, 0);
+    av_opt_set_sample_fmt(st->d->resampler, "out_sample_fmt", 
AV_SAMPLE_FMT_FLT, 0);
+
+    swr_init(st->d->resampler);
+    return 0;
+
+free_output:
+    av_free(st->d->resampler_buffer_output);
+    st->d->resampler_buffer_output = NULL;

av_freep

+free_input:
+    av_free(st->d->resampler_buffer_input);
+    st->d->resampler_buffer_input = NULL;

av_freep

+exit:
+    return AVERROR(ENOMEM);
+}
+
+static void ebur128_destroy_resampler(FFEBUR128State* st) {
+    av_free(st->d->resampler_buffer_input);
+    st->d->resampler_buffer_input = NULL;

av_freep

+    av_free(st->d->resampler_buffer_output);
+    st->d->resampler_buffer_output = NULL;

av_freep

+    swr_free(&st->d->resampler);
+    st->d->resampler = NULL;

swr_free already sets resampler to NULL.

+}
+#endif
+
 FFEBUR128State *ff_ebur128_init(unsigned int channels,
                                 unsigned long samplerate,
                                 unsigned long window, int mode)
 {
     int errcode;
     FFEBUR128State *st;
+    unsigned int i;

     st = (FFEBUR128State *) av_malloc(sizeof(FFEBUR128State));
     CHECK_ERROR(!st, 0, exit)
@@ -233,6 +312,14 @@ FFEBUR128State *ff_ebur128_init(unsigned int channels,
     st->d->sample_peak =
         (double *) av_mallocz_array(channels, sizeof(double));
     CHECK_ERROR(!st->d->sample_peak, 0, free_channel_map)
+    st->d->true_peak =
+        (double*) malloc(channels * sizeof(double));

av_mallocz_array

+    CHECK_ERROR(!st->d->true_peak, 0, free_sample_peak)
+
+    for (i = 0; i < channels; ++i) {
+        st->d->sample_peak[i] = 0.0;
+        st->d->true_peak[i] = 0.0;
+    }

Technically not portable (AFAIK), but we assume in a lot of places that a mallocz-ed double is 0.0, so this initialization is unneeded.


     st->samplerate = samplerate;
     st->d->samples_in_100ms = (st->samplerate + 5) / 10;
@@ -242,7 +329,7 @@ FFEBUR128State *ff_ebur128_init(unsigned int channels,
     } else if ((mode & FF_EBUR128_MODE_M) == FF_EBUR128_MODE_M) {
         st->d->window = FFMAX(window, 400);
     } else {
-        goto free_sample_peak;
+        goto free_true_peak;
     }
     st->d->audio_data_frames = st->samplerate * st->d->window / 1000;
     if (st->d->audio_data_frames % st->d->samples_in_100ms) {
@@ -254,7 +341,7 @@ FFEBUR128State *ff_ebur128_init(unsigned int channels,
     st->d->audio_data =
         (double *) av_mallocz_array(st->d->audio_data_frames,
                                     st->channels * sizeof(double));
-    CHECK_ERROR(!st->d->audio_data, 0, free_sample_peak)
+    CHECK_ERROR(!st->d->audio_data, 0, free_true_peak)

     ebur128_init_filter(st);

@@ -267,6 +354,11 @@ FFEBUR128State *ff_ebur128_init(unsigned int channels,
                 free_block_energy_histogram)
     st->d->short_term_frame_counter = 0;

+#if CONFIG_SWRESAMPLE
+    unsigned int result = ebur128_init_resampler(st);

Why unsigned?

+    CHECK_ERROR(result, 0, free_short_term_block_energy_histogram)
+#endif
+
     /* the first block needs 400ms of audio data */
     st->d->needed_frames = st->d->samples_in_100ms * 4;
     /* start at the beginning of the buffer */
@@ -287,6 +379,8 @@ free_block_energy_histogram:
     av_free(st->d->block_energy_histogram);
 free_audio_data:
     av_free(st->d->audio_data);
+free_true_peak:
+    av_free(st->d->true_peak);
 free_sample_peak:
     av_free(st->d->sample_peak);
 free_channel_map:
@@ -306,12 +400,53 @@ void ff_ebur128_destroy(FFEBUR128State ** st)
     av_free((*st)->d->audio_data);
     av_free((*st)->d->channel_map);
     av_free((*st)->d->sample_peak);
+    av_free((*st)->d->true_peak);
     av_free((*st)->d->data_ptrs);
+#if CONFIG_SWRESAMPLE
+  ebur128_destroy_resampler(*st);
+#endif
     av_free((*st)->d);
     av_free(*st);
     *st = NULL;
 }

+static int ebur128_use_swresample(FFEBUR128State* st) {
+#if CONFIG_SWRESAMPLE
+    return ((st->mode & FF_EBUR128_MODE_TRUE_PEAK) == 
FF_EBUR128_MODE_TRUE_PEAK);
+#else
+    (void) st;
+    return 0;
+#endif
+}
+
+static void ebur128_check_true_peak(FFEBUR128State* st, size_t frames) {
+#if CONFIG_SWRESAMPLE
+    size_t c, i;
+
+    const int in_len  = frames;
+    const int out_len = st->d->resampler_buffer_output_frames;
+    swr_convert(st->d->resampler, (uint8_t **)&st->d->resampler_buffer_output, 
out_len,
+                (const uint8_t **)&st->d->resampler_buffer_input, in_len);
+
+    for (c = 0; c < st->channels; ++c) {
+        for (i = 0; i < out_len; ++i) {
+            if (st->d->resampler_buffer_output[i * st->channels + c] >
+                                                               
st->d->true_peak[c]) {
+              st->d->true_peak[c] =
+                  st->d->resampler_buffer_output[i * st->channels + c];
+            } else if (-st->d->resampler_buffer_output[i * st->channels + c] >
+                                                               
st->d->true_peak[c]) {
+              st->d->true_peak[c] =
+                 -st->d->resampler_buffer_output[i * st->channels + c];
+            }
+        }
+    }
+#else
+    (void) st; (void) frames;
+#endif
+}
+
+
 #define EBUR128_FILTER(type, scaling_factor)                                   
    \
 static void ebur128_filter_##type(FFEBUR128State* st, const type** srcs,       
    \
                                   size_t src_index, size_t frames,             
    \
@@ -334,6 +469,15 @@ static void ebur128_filter_##type(FFEBUR128State* st, 
const type** srcs,
             if (max > st->d->sample_peak[c]) st->d->sample_peak[c] = max;      
    \
         }                                                                      
    \
     }                                                                          
    \
+    if (ebur128_use_swresample(st)) {                                          
    \
+        for (c = 0; c < st->channels; ++c) {                                   
    \
+            for (i = 0; i < frames; ++i) {                                     
    \
+                st->d->resampler_buffer_input[i * st->channels + c] =          
    \
+                    (float) (srcs[c][src_index + i * stride] / 
scaling_factor);    \
+            }                                                                  
    \
+        }                                                                      
    \
+        ebur128_check_true_peak(st, frames);                                   
    \
+    }                                                                          
    \
     for (c = 0; c < st->channels; ++c) {                                       
    \
         int ci = st->d->channel_map[c] - 1;                                    
    \
         if (ci < 0) continue;                                                  
    \
@@ -781,3 +925,17 @@ int ff_ebur128_sample_peak(FFEBUR128State * st,
     *out = st->d->sample_peak[channel_number];
     return 0;

Hmm, okay, I got a bit of a problem with this performance-wise. The way I see it first we convert everything to float, then we resample, then we find the maximum in an interleaved output. I'd say performance-wise it would be alot better if we could resample directly the input data to a planar float, and then measure the maximum there, so no intermediate conversions.

This can be a second step, if you are not interested in this now.

}
+
+int ff_ebur128_true_peak(FFEBUR128State * st,
+                         unsigned int channel_number,
+                         double* out) {
+  if ((st->mode & FF_EBUR128_MODE_TRUE_PEAK) != FF_EBUR128_MODE_TRUE_PEAK) {
+      return AVERROR(EINVAL);
+  } else if (channel_number >= st->channels) {
+      return AVERROR(EINVAL);
+  }
+  *out = st->d->true_peak[channel_number] > st->d->sample_peak[channel_number]
+       ? st->d->true_peak[channel_number]
+       : st->d->sample_peak[channel_number];
+  return 0;
+}
diff --git a/libavfilter/ebur128.h b/libavfilter/ebur128.h
index b94cd24..ca6dd62 100644
--- a/libavfilter/ebur128.h
+++ b/libavfilter/ebur128.h
@@ -91,6 +91,9 @@ enum mode {
     FF_EBUR128_MODE_LRA = (1 << 3) | FF_EBUR128_MODE_S,
   /** can call ff_ebur128_sample_peak */
     FF_EBUR128_MODE_SAMPLE_PEAK = (1 << 4) | FF_EBUR128_MODE_M,
+  /** can call ff_ebur128_true_peak */
+    FF_EBUR128_MODE_TRUE_PEAK   = (1 << 5) | FF_EBUR128_MODE_M
+                                           | FF_EBUR128_MODE_SAMPLE_PEAK

I'd rather not set implicitly MODE_M, because I don't want to give loudness measurement to the user, who only wants true peak.

 };

 /** forward declaration of FFEBUR128StateInternal */
@@ -283,6 +286,20 @@ int ff_ebur128_loudness_range_multiple(FFEBUR128State ** 
sts,
 int ff_ebur128_sample_peak(FFEBUR128State * st,
                            unsigned int channel_number, double *out);

+/** \brief Get maximum true peak from all frames that have been processed.
+ *
+ *  @param st library state
+ *  @param channel_number channel to analyse
+ *  @param out maximum true peak in float format (1.0 is 0 dBTP)
+ *  @return
+ *    - 0 on success.
+ *    - AVERROR(EINVAL) if mode "FF_EBUR128_MODE_TRUE_PEAK" has not
+ *      been set.
+ *    - AVERROR(EINVAL) if invalid channel index.
+ */
+int ff_ebur128_true_peak(FFEBUR128State* st,
+                      unsigned int channel_number, double* out);
+
 /** \brief Get relative threshold in LUFS.
  *
  *  @param st library state
--
2.10.1


Regards,
Marton
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Reply via email to