date:20220810


I'd like to push this series within the week or so.
Some additional review, specially on the POSIX-Shell-Script, would be 
highly appreciated.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] avcodec/pthread_frame: Fix compiling with thread-unsafe-callbacks disabled

Forgotten in 02220b88fc38ef9dd4f2d519f5d3e4151258b60c.

Signed-off-by: Andreas Rheinhardt 
---
Sorry. Will apply this soon.

 libavcodec/pthread_frame.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index a54d16fee4..726bd1bcc7 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -970,7 +970,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
 
 pthread_mutex_lock(&p->parent->buffer_mutex);
 #if !FF_API_THREAD_SAFE_CALLBACKS
-err = ff_get_buffer(avctx, f->f, flags);
+err = ff_get_buffer(avctx, f, flags);
 #else
 FF_DISABLE_DEPRECATION_WARNINGS
 if (THREAD_SAFE_CALLBACKS(avctx)) {
@@ -1091,7 +1091,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
 av_log(avctx, AV_LOG_DEBUG, "thread_release_buffer called on pic 
%p\n", f);
 
 #if !FF_API_THREAD_SAFE_CALLBACKS
-av_frame_unref(f->f);
+av_frame_unref(f);
 #else
 // when the frame buffers are not allocated, just reset it to clean state
 if (can_direct_free || !f->buf[0]) {
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [FFmpeg-cvslog] avfilter/framesync: add a new option to set how to sync streams based on secondary input timestamps

2022-08-10 Thread Gyan Doshi





On 2022-08-10 05:46 pm, James Almer wrote:

ffmpeg | branch: master | James Almer  | Tue Aug  2 08:28:48 
2022 -0300| [0c3e3fd1b478e14692f3b02bb8bf42262ee18af0] | committer: James Almer

avfilter/framesync: add a new option to set how to sync streams based on 
secondary input timestamps


Needs doc entry in filters.texi

http://www.ffmpeg.org/ffmpeg-filters.html#Options-for-filters-with-several-inputs-_0028framesync_0029

Regards,
Gyan




Include two values for it, a default one that sets/keeps the current behavior,
where the frame event generated by the primary input will have a timestamp
equal or higher than frames in secondary input, plus a new one where the
secondary input frame will be that with the absolute closest timestamp to that
of the frame event one.

Addresses ticket #9689, where the new optional behavior produces better frame
syncronization.

Reviewed-by: Nicolas George 
Signed-off-by: James Almer 


http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0c3e3fd1b478e14692f3b02bb8bf42262ee18af0

---

  libavfilter/framesync.c | 19 +++
  libavfilter/framesync.h | 23 +++
  2 files changed, 42 insertions(+)

diff --git a/libavfilter/framesync.c b/libavfilter/framesync.c
index 7510550d8e..ee91e4cf68 100644
--- a/libavfilter/framesync.c
+++ b/libavfilter/framesync.c
@@ -42,6 +42,13 @@ static const AVOption framesync_options[] = {
  { "pass",   "Pass through the main input.", 0, AV_OPT_TYPE_CONST, { .i64 = 
EOF_ACTION_PASS },   .flags = FLAGS, "eof_action" },
  { "shortest", "force termination when the shortest input terminates", 
OFFSET(opt_shortest), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
  { "repeatlast", "extend last frame of secondary streams beyond EOF", 
OFFSET(opt_repeatlast), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS },
+{ "ts_sync_mode", "How strictly to sync streams based on secondary input 
timestamps",
+OFFSET(opt_ts_sync_mode), AV_OPT_TYPE_INT, { .i64 = TS_DEFAULT },
+TS_DEFAULT, TS_NEAREST, .flags = FLAGS, "ts_sync_mode" },
+{ "default", "Frame from secondary input with the nearest lower or equal 
timestamp to the primary input frame",
+0, AV_OPT_TYPE_CONST, { .i64 = TS_DEFAULT }, .flags = FLAGS, 
"ts_sync_mode" },
+{ "nearest", "Frame from secondary input with the absolute nearest 
timestamp to the primary input frame",
+0, AV_OPT_TYPE_CONST, { .i64 = TS_NEAREST }, .flags = FLAGS, 
"ts_sync_mode" },
  { NULL }
  };
  static const AVClass framesync_class = {
@@ -110,6 +117,14 @@ static void framesync_sync_level_update(FFFrameSync *fs)
  av_assert0(level <= fs->sync_level);
  if (level < fs->sync_level)
  av_log(fs, AV_LOG_VERBOSE, "Sync level %u\n", level);
+if (fs->opt_ts_sync_mode > TS_DEFAULT) {
+for (i = 0; i < fs->nb_in; i++) {
+if (fs->in[i].sync < level)
+fs->in[i].ts_mode = fs->opt_ts_sync_mode;
+else
+fs->in[i].ts_mode = TS_DEFAULT;
+}
+}
  if (level)
  fs->sync_level = level;
  else
@@ -187,6 +202,10 @@ static int framesync_advance(FFFrameSync *fs)
  }
  for (i = 0; i < fs->nb_in; i++) {
  if (fs->in[i].pts_next == pts ||
+(fs->in[i].ts_mode == TS_NEAREST &&
+ fs->in[i].have_next &&
+ fs->in[i].pts_next != INT64_MAX && fs->in[i].pts != AV_NOPTS_VALUE 
&&
+ fs->in[i].pts_next - pts < pts - fs->in[i].pts) ||
  (fs->in[i].before == EXT_INFINITY &&
   fs->in[i].state == STATE_BOF)) {
  av_frame_free(&fs->in[i].frame);
diff --git a/libavfilter/framesync.h b/libavfilter/framesync.h
index a246d2d1e5..233f50a0eb 100644
--- a/libavfilter/framesync.h
+++ b/libavfilter/framesync.h
@@ -75,6 +75,27 @@ enum FFFrameSyncExtMode {
  EXT_INFINITY,
  };
  
+/**

+ * Timestamp syncronization mode
+ *
+ * Describe how the frames of a stream are syncronized based on timestamp
+ * distance.
+ */
+enum FFFrameTSSyncMode {
+
+/**
+ * Sync to frames from secondary input with the nearest, lower or equal
+ * timestamp to the frame event one.
+ */
+TS_DEFAULT,
+
+/**
+ * Sync to frames from secondary input with the absolute nearest timestamp
+ * to the frame event one.
+ */
+TS_NEAREST,
+};
+
  /**
   * Input stream structure
   */
@@ -138,6 +159,7 @@ typedef struct FFFrameSyncIn {
   */
  unsigned sync;
  
+enum FFFrameTSSyncMode ts_mode;

  } FFFrameSyncIn;
  
  /**

@@ -205,6 +227,7 @@ typedef struct FFFrameSync {
  int opt_repeatlast;
  int opt_shortest;
  int opt_eof_action;
+int opt_ts_sync_mode;
  
  } FFFrameSync;
  


___
ffmpeg-cvslog mailing list
ffmpeg-cvs...@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org wit

[FFmpeg-devel] [PATCH] avcodec/vp9: Remove redundant reporting of progress on error

Redundant since 5e03eea673a9da2253ed15152e46b1422b35d145.

Signed-off-by: Andreas Rheinhardt 
---
 libavcodec/vp9.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index db06acd748..621627ddc5 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -1293,17 +1293,13 @@ static int decode_tiles(AVCodecContext *avctx,
 data += 4;
 size -= 4;
 }
-if (tile_size > size) {
-ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 
0);
+if (tile_size > size)
 return AVERROR_INVALIDDATA;
-}
 ret = ff_vpx_init_range_decoder(&td->c_b[tile_col], data, 
tile_size);
 if (ret < 0)
 return ret;
-if (vpx_rac_get_prob_branchy(&td->c_b[tile_col], 128)) { // marker 
bit
-ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 
0);
+if (vpx_rac_get_prob_branchy(&td->c_b[tile_col], 128)) // marker 
bit
 return AVERROR_INVALIDDATA;
-}
 data += tile_size;
 size -= tile_size;
 }
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] avfilter/vf_showinfo: add wallclock option

2022-08-10 Thread Michael Riedl

Signed-off-by: Michael Riedl 
---
 libavfilter/vf_showinfo.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/libavfilter/vf_showinfo.c b/libavfilter/vf_showinfo.c
index 2c8514fc80..1953f777c7 100644
--- a/libavfilter/vf_showinfo.c
+++ b/libavfilter/vf_showinfo.c
@@ -43,6 +43,7 @@
 #include "libavutil/video_enc_params.h"
 #include "libavutil/detection_bbox.h"
 #include "libavutil/uuid.h"
+#include "libavutil/time.h"
 
 #include "avfilter.h"
 #include "internal.h"
@@ -51,6 +52,7 @@
 typedef struct ShowInfoContext {
 const AVClass *class;
 int calculate_checksums;
+int print_wallclock;
 } ShowInfoContext;
 
 #define OFFSET(x) offsetof(ShowInfoContext, x)
@@ -58,6 +60,7 @@ typedef struct ShowInfoContext {
 
 static const AVOption showinfo_options[] = {
 { "checksum", "calculate checksums", OFFSET(calculate_checksums), 
AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, VF },
+{ "wallclock", "print wallclock", OFFSET(print_wallclock), 
AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, VF },
 { NULL }
 };
 
@@ -740,6 +743,14 @@ static int filter_frame(AVFilterLink *inlink, AVFrame 
*frame)
sqrt((sum2[plane] - 
sum[plane]*(double)sum[plane]/pixelcount[plane])/pixelcount[plane]));
 av_log(ctx, AV_LOG_INFO, "\b]");
 }
+
+if (s->print_wallclock) {
+av_log(ctx, AV_LOG_INFO,
+   " wallclock:%"PRId64" ",
+   av_gettime()
+);
+}
+
 av_log(ctx, AV_LOG_INFO, "\n");
 
 for (i = 0; i < frame->nb_side_data; i++) {
-- 
2.37.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 6/9] fftools/ffmpeg: move inter-stream ts discontinuity handling to ts_discontinuity_process()

---
 fftools/ffmpeg.c | 37 +++--
 1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index 0332528d57..cacbde27db 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -3743,7 +3743,22 @@ static void ts_discontinuity_process(InputFile *ifile, 
InputStream *ist,
 }
 }
 }
+} else if (ist->next_dts == AV_NOPTS_VALUE && !copy_ts &&
+   fmt_is_discont && ifile->last_ts != AV_NOPTS_VALUE) {
+int64_t delta = pkt_dts - ifile->last_ts;
+if (delta < -1LL*dts_delta_threshold*AV_TIME_BASE ||
+delta >  1LL*dts_delta_threshold*AV_TIME_BASE) {
+ifile->ts_offset -= delta;
+av_log(NULL, AV_LOG_DEBUG,
+   "Inter stream timestamp discontinuity %"PRId64", new 
offset= %"PRId64"\n",
+   delta, ifile->ts_offset);
+pkt->dts -= av_rescale_q(delta, AV_TIME_BASE_Q, 
ist->st->time_base);
+if (pkt->pts != AV_NOPTS_VALUE)
+pkt->pts -= av_rescale_q(delta, AV_TIME_BASE_Q, 
ist->st->time_base);
+}
 }
+
+ifile->last_ts = av_rescale_q(pkt->dts, ist->st->time_base, 
AV_TIME_BASE_Q);
 }
 
 /*
@@ -3761,7 +3776,6 @@ static int process_input(int file_index)
 AVPacket *pkt;
 int ret, i, j;
 int64_t duration;
-int64_t pkt_dts;
 
 is  = ifile->ctx;
 ret = ifile_get_packet(ifile, &pkt);
@@ -3877,24 +3891,6 @@ static int process_input(int file_index)
 if (pkt->dts != AV_NOPTS_VALUE)
 pkt->dts *= ist->ts_scale;
 
-pkt_dts = av_rescale_q_rnd(pkt->dts, ist->st->time_base, AV_TIME_BASE_Q, 
AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
-if ((ist->st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO ||
- ist->st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) &&
-pkt_dts != AV_NOPTS_VALUE && ist->next_dts == AV_NOPTS_VALUE && 
!copy_ts
-&& (is->iformat->flags & AVFMT_TS_DISCONT) && ifile->last_ts != 
AV_NOPTS_VALUE) {
-int64_t delta   = pkt_dts - ifile->last_ts;
-if (delta < -1LL*dts_delta_threshold*AV_TIME_BASE ||
-delta >  1LL*dts_delta_threshold*AV_TIME_BASE){
-ifile->ts_offset -= delta;
-av_log(NULL, AV_LOG_DEBUG,
-   "Inter stream timestamp discontinuity %"PRId64", new 
offset= %"PRId64"\n",
-   delta, ifile->ts_offset);
-pkt->dts -= av_rescale_q(delta, AV_TIME_BASE_Q, 
ist->st->time_base);
-if (pkt->pts != AV_NOPTS_VALUE)
-pkt->pts -= av_rescale_q(delta, AV_TIME_BASE_Q, 
ist->st->time_base);
-}
-}
-
 duration = av_rescale_q(ifile->duration, ifile->time_base, 
ist->st->time_base);
 if (pkt->pts != AV_NOPTS_VALUE) {
 pkt->pts += duration;
@@ -3911,9 +3907,6 @@ static int process_input(int file_index)
 pkt->dts != AV_NOPTS_VALUE)
 ts_discontinuity_process(ifile, ist, pkt);
 
-if (pkt->dts != AV_NOPTS_VALUE)
-ifile->last_ts = av_rescale_q(pkt->dts, ist->st->time_base, 
AV_TIME_BASE_Q);
-
 if (debug_ts) {
 av_log(NULL, AV_LOG_INFO, "demuxer+ffmpeg -> ist_index:%d type:%s 
pkt_pts:%s pkt_pts_time:%s pkt_dts:%s pkt_dts_time:%s duration:%s 
duration_time:%s off:%s off_time:%s\n",
ifile->ist_index + pkt->stream_index,
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 2/9] fftools/ffmpeg_mux: avoid leaking pkt on errors

---
 fftools/ffmpeg_mux.c | 18 +-
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/fftools/ffmpeg_mux.c b/fftools/ffmpeg_mux.c
index 08a76f0066..b424ef0021 100644
--- a/fftools/ffmpeg_mux.c
+++ b/fftools/ffmpeg_mux.c
@@ -97,8 +97,10 @@ static int write_packet(OutputFile *of, OutputStream *ost, 
AVPacket *pkt)
 
 fs = filesize(s->pb);
 atomic_store(&of->mux->last_filesize, fs);
-if (fs >= of->mux->limit_filesize)
-return AVERROR_EOF;
+if (fs >= of->mux->limit_filesize) {
+ret = AVERROR_EOF;
+goto fail;
+}
 
 if ((st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO && ost->vsync_method 
== VSYNC_DROP) ||
 (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO && audio_sync_method < 
0))
@@ -138,8 +140,11 @@ static int write_packet(OutputFile *of, OutputStream *ost, 
AVPacket *pkt)
 av_log(s, loglevel, "Non-monotonous DTS in output stream "
"%d:%d; previous: %"PRId64", current: %"PRId64"; ",
ost->file_index, ost->st->index, ms->last_mux_dts, 
pkt->dts);
-if (exit_on_error)
-return AVERROR(EINVAL);
+if (exit_on_error) {
+ret = AVERROR(EINVAL);
+goto fail;
+}
+
 av_log(s, loglevel, "changing to %"PRId64". This may result "
"in incorrect timestamps in the output file.\n",
max);
@@ -170,10 +175,13 @@ static int write_packet(OutputFile *of, OutputStream 
*ost, AVPacket *pkt)
 ret = av_interleaved_write_frame(s, pkt);
 if (ret < 0) {
 print_error("av_interleaved_write_frame()", ret);
-return ret;
+goto fail;
 }
 
 return 0;
+fail:
+av_packet_unref(pkt);
+return ret;
 }
 
 static int sync_queue_process(OutputFile *of, OutputStream *ost, AVPacket *pkt)
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 3/9] fftools/ffmpeg: move stream-dependent starttime correction to transcode_init()

Currently this code is located in the discontinuity handling block,
where it does not belong.
---
 fftools/ffmpeg.c | 39 +--
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index 16b1ba8af7..6f822de97d 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -3313,6 +3313,27 @@ static int transcode_init(void)
 input_streams[j + ifile->ist_index]->start = 
av_gettime_relative();
 }
 
+// Correct starttime based on the enabled streams
+for (i = 0; i < nb_input_files; i++) {
+InputFile   *ifile = input_files[i];
+AVFormatContext*is = ifile->ctx;
+int64_t new_start_time = INT64_MAX;
+
+if (!(is->iformat->flags & AVFMT_TS_DISCONT))
+continue;
+
+for (int j = 0; j < is->nb_streams; j++) {
+AVStream *st = is->streams[j];
+if(st->discard == AVDISCARD_ALL || st->start_time == 
AV_NOPTS_VALUE)
+continue;
+new_start_time = FFMIN(new_start_time, 
av_rescale_q(st->start_time, st->time_base, AV_TIME_BASE_Q));
+}
+if (new_start_time > is->start_time) {
+av_log(is, AV_LOG_VERBOSE, "Correcting start time by %"PRId64"\n", 
new_start_time - is->start_time);
+ifile->ts_offset = -new_start_time;
+}
+}
+
 /* init input streams */
 for (i = 0; i < nb_input_streams; i++)
 if ((ret = init_input_stream(i, error, sizeof(error))) < 0)
@@ -3752,24 +3773,6 @@ static int process_input(int file_index)
 
 if(!ist->wrap_correction_done && is->start_time != AV_NOPTS_VALUE && 
ist->st->pts_wrap_bits < 64){
 int64_t stime, stime2;
-// Correcting starttime based on the enabled streams
-// FIXME this ideally should be done before the first use of starttime 
but we do not know which are the enabled streams at that point.
-//   so we instead do it here as part of discontinuity handling
-if (   ist->next_dts == AV_NOPTS_VALUE
-&& ifile->ts_offset == -is->start_time
-&& (is->iformat->flags & AVFMT_TS_DISCONT)) {
-int64_t new_start_time = INT64_MAX;
-for (i=0; inb_streams; i++) {
-AVStream *st = is->streams[i];
-if(st->discard == AVDISCARD_ALL || st->start_time == 
AV_NOPTS_VALUE)
-continue;
-new_start_time = FFMIN(new_start_time, 
av_rescale_q(st->start_time, st->time_base, AV_TIME_BASE_Q));
-}
-if (new_start_time > is->start_time) {
-av_log(is, AV_LOG_VERBOSE, "Correcting start time by 
%"PRId64"\n", new_start_time - is->start_time);
-ifile->ts_offset = -new_start_time;
-}
-}
 
 stime = av_rescale_q(is->start_time, AV_TIME_BASE_Q, 
ist->st->time_base);
 stime2= stime + (1ULLpts_wrap_bits);
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/9] fftools/ffmpeg: mark all encode sync queues as done before flushing encoders

---
 fftools/ffmpeg.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index 0682a6fcc5..16b1ba8af7 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -1780,6 +1780,13 @@ static void flush_encoders(void)
 {
 int i, ret;
 
+for (i = 0; i < nb_output_streams; i++) {
+OutputStream   *ost = output_streams[i];
+OutputFile  *of = output_files[ost->file_index];
+if (ost->sq_idx_encode >= 0)
+sq_send(of->sq_encode, ost->sq_idx_encode, SQFRAME(NULL));
+}
+
 for (i = 0; i < nb_output_streams; i++) {
 OutputStream   *ost = output_streams[i];
 AVCodecContext *enc = ost->enc_ctx;
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 4/9] fftools/ffmpeg: pre-compute the streamcopy start pts before transcoding starts

InputFile.ts_offset can change during transcoding, due to discontinuity
correction. This should not affect the streamcopy starting timestamp.

Cf. bf2590aed3e64d44a5e2430fdbe89f91f5e55bfe
---
 fftools/ffmpeg.c | 17 -
 fftools/ffmpeg.h |  6 ++
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index 6f822de97d..b895f85e75 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -1879,12 +1879,9 @@ static void do_streamcopy(InputStream *ist, OutputStream 
*ost, const AVPacket *p
 return;
 
 if (!ost->streamcopy_started && !ost->copy_prior_start) {
-int64_t comp_start = start_time;
-if (copy_ts && f->start_time != AV_NOPTS_VALUE)
-comp_start = FFMAX(start_time, f->start_time + f->ts_offset);
 if (pkt->pts == AV_NOPTS_VALUE ?
-ist->pts < comp_start :
-pkt->pts < av_rescale_q(comp_start, AV_TIME_BASE_Q, 
ist->st->time_base))
+ist->pts < ost->ts_copy_start :
+pkt->pts < av_rescale_q(ost->ts_copy_start, AV_TIME_BASE_Q, 
ist->st->time_base))
 return;
 }
 
@@ -2741,6 +2738,7 @@ static int init_output_stream_streamcopy(OutputStream 
*ost)
 {
 OutputFile *of = output_files[ost->file_index];
 InputStream *ist = get_input_stream(ost);
+InputFile *ifile = input_files[ist->file_index];
 AVCodecParameters *par = ost->st->codecpar;
 AVCodecContext *codec_ctx;
 AVRational sar;
@@ -2805,6 +2803,15 @@ static int init_output_stream_streamcopy(OutputStream 
*ost)
 if (ost->st->duration <= 0 && ist->st->duration > 0)
 ost->st->duration = av_rescale_q(ist->st->duration, 
ist->st->time_base, ost->st->time_base);
 
+if (!ost->copy_prior_start) {
+ost->ts_copy_start = (of->start_time == AV_NOPTS_VALUE) ?
+ 0 : of->start_time;
+if (copy_ts && ifile->start_time != AV_NOPTS_VALUE) {
+ost->ts_copy_start = FFMAX(ost->ts_copy_start,
+   ifile->start_time + ifile->ts_offset);
+}
+}
+
 if (ist->st->nb_side_data) {
 for (i = 0; i < ist->st->nb_side_data; i++) {
 const AVPacketSideData *sd_src = &ist->st->side_data[i];
diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
index 2ac7cbe522..8b2e73d642 100644
--- a/fftools/ffmpeg.h
+++ b/fftools/ffmpeg.h
@@ -484,6 +484,12 @@ typedef struct OutputStream {
 int64_t last_mux_dts;
 /* pts of the last frame received from the filters, in AV_TIME_BASE_Q */
 int64_t last_filter_pts;
+
+// timestamp from which the streamcopied streams should start,
+// in AV_TIME_BASE_Q;
+// everything before it should be discarded
+int64_t ts_copy_start;
+
 // the timebase of the packets sent to the muxer
 AVRational mux_timebase;
 AVRational enc_timebase;
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 8/9] fftools/ffmpeg: use a separate variable for discontinuity offset

This will allow to move normal offset handling to demuxer thread, since
discontinuities currently have to be processed in the main thread, as
the code uses some decoder-produced values.
---
 fftools/ffmpeg.c | 39 ---
 fftools/ffmpeg.h |  4 
 2 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index dd45cb4516..8d85af1c97 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -3693,8 +3693,8 @@ static void decode_flush(InputFile *ifile)
 }
 }
 
-static void ts_discontinuity_process(InputFile *ifile, InputStream *ist,
- AVPacket *pkt)
+static void ts_discontinuity_detect(InputFile *ifile, InputStream *ist,
+AVPacket *pkt)
 {
 const int fmt_is_discont = ifile->ctx->iformat->flags & AVFMT_TS_DISCONT;
 int disable_discontinuity_correction = copy_ts;
@@ -3715,13 +3715,13 @@ static void ts_discontinuity_process(InputFile *ifile, 
InputStream *ist,
 if (fmt_is_discont) {
 if (FFABS(delta) > 1LL * dts_delta_threshold * AV_TIME_BASE ||
 pkt_dts + AV_TIME_BASE/10 < FFMAX(ist->pts, ist->dts)) {
-ifile->ts_offset -= delta;
+ifile->ts_offset_discont -= delta;
 av_log(NULL, AV_LOG_DEBUG,
"timestamp discontinuity for stream #%d:%d "
"(id=%d, type=%s): %"PRId64", new offset= %"PRId64"\n",
ist->file_index, ist->st->index, ist->st->id,
av_get_media_type_string(ist->st->codecpar->codec_type),
-   delta, ifile->ts_offset);
+   delta, ifile->ts_offset_discont);
 pkt->dts -= av_rescale_q(delta, AV_TIME_BASE_Q, 
ist->st->time_base);
 if (pkt->pts != AV_NOPTS_VALUE)
 pkt->pts -= av_rescale_q(delta, AV_TIME_BASE_Q, 
ist->st->time_base);
@@ -3744,10 +3744,10 @@ static void ts_discontinuity_process(InputFile *ifile, 
InputStream *ist,
fmt_is_discont && ifile->last_ts != AV_NOPTS_VALUE) {
 int64_t delta = pkt_dts - ifile->last_ts;
 if (FFABS(delta) > 1LL * dts_delta_threshold * AV_TIME_BASE) {
-ifile->ts_offset -= delta;
+ifile->ts_offset_discont -= delta;
 av_log(NULL, AV_LOG_DEBUG,
"Inter stream timestamp discontinuity %"PRId64", new 
offset= %"PRId64"\n",
-   delta, ifile->ts_offset);
+   delta, ifile->ts_offset_discont);
 pkt->dts -= av_rescale_q(delta, AV_TIME_BASE_Q, 
ist->st->time_base);
 if (pkt->pts != AV_NOPTS_VALUE)
 pkt->pts -= av_rescale_q(delta, AV_TIME_BASE_Q, 
ist->st->time_base);
@@ -3757,6 +3757,26 @@ static void ts_discontinuity_process(InputFile *ifile, 
InputStream *ist,
 ifile->last_ts = av_rescale_q(pkt->dts, ist->st->time_base, 
AV_TIME_BASE_Q);
 }
 
+static void ts_discontinuity_process(InputFile *ifile, InputStream *ist,
+ AVPacket *pkt)
+{
+int64_t offset = av_rescale_q(ifile->ts_offset_discont, AV_TIME_BASE_Q,
+  ist->st->time_base);
+
+// apply previously-detected timestamp-discontinuity offset
+// (to all streams, not just audio/video)
+if (pkt->dts != AV_NOPTS_VALUE)
+pkt->dts += offset;
+if (pkt->pts != AV_NOPTS_VALUE)
+pkt->pts += offset;
+
+// detect timestamp discontinuities for audio/video
+if ((ist->st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO ||
+ ist->st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) &&
+pkt->dts != AV_NOPTS_VALUE)
+ts_discontinuity_detect(ifile, ist, pkt);
+}
+
 /*
  * Return
  * - 0 -- one packet was read and processed
@@ -3897,11 +3917,8 @@ static int process_input(int file_index)
 if (pkt->dts != AV_NOPTS_VALUE)
 pkt->dts += duration;
 
-// detect and correct timestamp discontinuities for audio/video
-if ((ist->st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO ||
- ist->st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) &&
-pkt->dts != AV_NOPTS_VALUE)
-ts_discontinuity_process(ifile, ist, pkt);
+// detect and try to correct for timestamp discontinuities
+ts_discontinuity_process(ifile, ist, pkt);
 
 if (debug_ts) {
 av_log(NULL, AV_LOG_INFO, "demuxer+ffmpeg -> ist_index:%d type:%s 
pkt_pts:%s pkt_pts_time:%s pkt_dts:%s pkt_dts_time:%s duration:%s 
duration_time:%s off:%s off_time:%s\n",
diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
index 8b2e73d642..6991ba7632 100644
--- a/fftools/ffmpeg.h
+++ b/fftools/ffmpeg.h
@@ -427,6 +427,10 @@ typedef struct InputFile {
 int input_sync_ref;
 
 int64_t ts_offset;
+/**
+ * Extra timestamp offset added by discontinuity handling.
+ */
+int64_t ts_offset_discont;
 int64_t last_ts;
 int64_t start_time;   /* user-specifi

[FFmpeg-devel] [PATCH 5/9] fftools/ffmpeg: move timestamp discontinuity correction out of process_input()

---
 fftools/ffmpeg.c | 103 ++-
 1 file changed, 56 insertions(+), 47 deletions(-)

diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index b895f85e75..0332528d57 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -3693,6 +3693,59 @@ static void decode_flush(InputFile *ifile)
 }
 }
 
+static void ts_discontinuity_process(InputFile *ifile, InputStream *ist,
+ AVPacket *pkt)
+{
+const int fmt_is_discont = ifile->ctx->iformat->flags & AVFMT_TS_DISCONT;
+int disable_discontinuity_correction = copy_ts;
+int64_t pkt_dts = av_rescale_q_rnd(pkt->dts, ist->st->time_base, 
AV_TIME_BASE_Q,
+   AV_ROUND_NEAR_INF | 
AV_ROUND_PASS_MINMAX);
+
+if (copy_ts && ist->next_dts != AV_NOPTS_VALUE &&
+fmt_is_discont && ist->st->pts_wrap_bits < 60) {
+int64_t wrap_dts = av_rescale_q_rnd(pkt->dts + 
(1LLpts_wrap_bits),
+ist->st->time_base, AV_TIME_BASE_Q,
+
AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
+if (FFABS(wrap_dts - ist->next_dts) < FFABS(pkt_dts - 
ist->next_dts)/10)
+disable_discontinuity_correction = 0;
+}
+
+if (ist->next_dts != AV_NOPTS_VALUE && !disable_discontinuity_correction) {
+int64_t delta = pkt_dts - ist->next_dts;
+if (fmt_is_discont) {
+if (delta < -1LL*dts_delta_threshold*AV_TIME_BASE ||
+delta >  1LL*dts_delta_threshold*AV_TIME_BASE ||
+pkt_dts + AV_TIME_BASE/10 < FFMAX(ist->pts, ist->dts)) {
+ifile->ts_offset -= delta;
+av_log(NULL, AV_LOG_DEBUG,
+   "timestamp discontinuity for stream #%d:%d "
+   "(id=%d, type=%s): %"PRId64", new offset= %"PRId64"\n",
+   ist->file_index, ist->st->index, ist->st->id,
+   av_get_media_type_string(ist->st->codecpar->codec_type),
+   delta, ifile->ts_offset);
+pkt->dts -= av_rescale_q(delta, AV_TIME_BASE_Q, 
ist->st->time_base);
+if (pkt->pts != AV_NOPTS_VALUE)
+pkt->pts -= av_rescale_q(delta, AV_TIME_BASE_Q, 
ist->st->time_base);
+}
+} else {
+if (delta < -1LL*dts_error_threshold*AV_TIME_BASE ||
+delta >  1LL*dts_error_threshold*AV_TIME_BASE) {
+av_log(NULL, AV_LOG_WARNING, "DTS %"PRId64", next:%"PRId64" 
st:%d invalid dropping\n", pkt->dts, ist->next_dts, pkt->stream_index);
+pkt->dts = AV_NOPTS_VALUE;
+}
+if (pkt->pts != AV_NOPTS_VALUE){
+int64_t pkt_pts = av_rescale_q(pkt->pts, ist->st->time_base, 
AV_TIME_BASE_Q);
+delta = pkt_pts - ist->next_dts;
+if (delta < -1LL*dts_error_threshold*AV_TIME_BASE ||
+delta >  1LL*dts_error_threshold*AV_TIME_BASE) {
+av_log(NULL, AV_LOG_WARNING, "PTS %"PRId64", 
next:%"PRId64" invalid dropping st:%d\n", pkt->pts, ist->next_dts, 
pkt->stream_index);
+pkt->pts = AV_NOPTS_VALUE;
+}
+}
+}
+}
+}
+
 /*
  * Return
  * - 0 -- one packet was read and processed
@@ -3709,7 +3762,6 @@ static int process_input(int file_index)
 int ret, i, j;
 int64_t duration;
 int64_t pkt_dts;
-int disable_discontinuity_correction = copy_ts;
 
 is  = ifile->ctx;
 ret = ifile_get_packet(ifile, &pkt);
@@ -3853,54 +3905,11 @@ static int process_input(int file_index)
 if (pkt->dts != AV_NOPTS_VALUE)
 pkt->dts += duration;
 
-pkt_dts = av_rescale_q_rnd(pkt->dts, ist->st->time_base, AV_TIME_BASE_Q, 
AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
-
-if (copy_ts && pkt_dts != AV_NOPTS_VALUE && ist->next_dts != 
AV_NOPTS_VALUE &&
-(is->iformat->flags & AVFMT_TS_DISCONT) && ist->st->pts_wrap_bits < 
60) {
-int64_t wrap_dts = av_rescale_q_rnd(pkt->dts + 
(1LLpts_wrap_bits),
-ist->st->time_base, AV_TIME_BASE_Q,
-
AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
-if (FFABS(wrap_dts - ist->next_dts) < FFABS(pkt_dts - 
ist->next_dts)/10)
-disable_discontinuity_correction = 0;
-}
-
+// detect and correct timestamp discontinuities for audio/video
 if ((ist->st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO ||
  ist->st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) &&
- pkt_dts != AV_NOPTS_VALUE && ist->next_dts != AV_NOPTS_VALUE &&
-!disable_discontinuity_correction) {
-int64_t delta   = pkt_dts - ist->next_dts;
-if (is->iformat->flags & AVFMT_TS_DISCONT) {
-if (delta < -1LL*dts_delta_threshold*AV_TIME_BASE ||
-delta >  1LL*dts_delta_threshold*AV_TIME_BASE ||
-pkt_dts + AV_TIME_BA

[FFmpeg-devel] [PATCH 7/9] fftools/ffmpeg: simplify conditions in ts_discontinuity_process

---
 fftools/ffmpeg.c | 12 
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index cacbde27db..dd45cb4516 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -3713,8 +3713,7 @@ static void ts_discontinuity_process(InputFile *ifile, 
InputStream *ist,
 if (ist->next_dts != AV_NOPTS_VALUE && !disable_discontinuity_correction) {
 int64_t delta = pkt_dts - ist->next_dts;
 if (fmt_is_discont) {
-if (delta < -1LL*dts_delta_threshold*AV_TIME_BASE ||
-delta >  1LL*dts_delta_threshold*AV_TIME_BASE ||
+if (FFABS(delta) > 1LL * dts_delta_threshold * AV_TIME_BASE ||
 pkt_dts + AV_TIME_BASE/10 < FFMAX(ist->pts, ist->dts)) {
 ifile->ts_offset -= delta;
 av_log(NULL, AV_LOG_DEBUG,
@@ -3728,16 +3727,14 @@ static void ts_discontinuity_process(InputFile *ifile, 
InputStream *ist,
 pkt->pts -= av_rescale_q(delta, AV_TIME_BASE_Q, 
ist->st->time_base);
 }
 } else {
-if (delta < -1LL*dts_error_threshold*AV_TIME_BASE ||
-delta >  1LL*dts_error_threshold*AV_TIME_BASE) {
+if (FFABS(delta) > 1LL * dts_error_threshold * AV_TIME_BASE) {
 av_log(NULL, AV_LOG_WARNING, "DTS %"PRId64", next:%"PRId64" 
st:%d invalid dropping\n", pkt->dts, ist->next_dts, pkt->stream_index);
 pkt->dts = AV_NOPTS_VALUE;
 }
 if (pkt->pts != AV_NOPTS_VALUE){
 int64_t pkt_pts = av_rescale_q(pkt->pts, ist->st->time_base, 
AV_TIME_BASE_Q);
 delta = pkt_pts - ist->next_dts;
-if (delta < -1LL*dts_error_threshold*AV_TIME_BASE ||
-delta >  1LL*dts_error_threshold*AV_TIME_BASE) {
+if (FFABS(delta) > 1LL * dts_error_threshold * AV_TIME_BASE) {
 av_log(NULL, AV_LOG_WARNING, "PTS %"PRId64", 
next:%"PRId64" invalid dropping st:%d\n", pkt->pts, ist->next_dts, 
pkt->stream_index);
 pkt->pts = AV_NOPTS_VALUE;
 }
@@ -3746,8 +3743,7 @@ static void ts_discontinuity_process(InputFile *ifile, 
InputStream *ist,
 } else if (ist->next_dts == AV_NOPTS_VALUE && !copy_ts &&
fmt_is_discont && ifile->last_ts != AV_NOPTS_VALUE) {
 int64_t delta = pkt_dts - ifile->last_ts;
-if (delta < -1LL*dts_delta_threshold*AV_TIME_BASE ||
-delta >  1LL*dts_delta_threshold*AV_TIME_BASE) {
+if (FFABS(delta) > 1LL * dts_delta_threshold * AV_TIME_BASE) {
 ifile->ts_offset -= delta;
 av_log(NULL, AV_LOG_DEBUG,
"Inter stream timestamp discontinuity %"PRId64", new 
offset= %"PRId64"\n",
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 9/9] fftools/ffmpeg: move packet timestamp processing to demuxer thread

Discontinuity detection/correction is left in the main thread, as it is
entangled with InputStream.next_dts and related variables, which may be
set by decoding code.

Fixes races e.g. in fate-ffmpeg-streamloop after
aae9de0cb2887e6e0bbfda6ffdf85ab77d3390f0.
---
 fftools/ffmpeg.c   | 52 --
 fftools/ffmpeg_demux.c | 57 ++
 2 files changed, 57 insertions(+), 52 deletions(-)

diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index 8d85af1c97..71a7e0b837 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -3791,7 +3791,6 @@ static int process_input(int file_index)
 InputStream *ist;
 AVPacket *pkt;
 int ret, i, j;
-int64_t duration;
 
 is  = ifile->ctx;
 ret = ifile_get_packet(ifile, &pkt);
@@ -3846,37 +3845,6 @@ static int process_input(int file_index)
 if (ist->discard)
 goto discard_packet;
 
-if (debug_ts) {
-av_log(NULL, AV_LOG_INFO, "demuxer -> ist_index:%d type:%s "
-   "next_dts:%s next_dts_time:%s next_pts:%s next_pts_time:%s 
pkt_pts:%s pkt_pts_time:%s pkt_dts:%s pkt_dts_time:%s duration:%s 
duration_time:%s off:%s off_time:%s\n",
-   ifile->ist_index + pkt->stream_index,
-   av_get_media_type_string(ist->st->codecpar->codec_type),
-   av_ts2str(ist->next_dts), av_ts2timestr(ist->next_dts, 
&AV_TIME_BASE_Q),
-   av_ts2str(ist->next_pts), av_ts2timestr(ist->next_pts, 
&AV_TIME_BASE_Q),
-   av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, 
&ist->st->time_base),
-   av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, 
&ist->st->time_base),
-   av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, 
&ist->st->time_base),
-   av_ts2str(input_files[ist->file_index]->ts_offset),
-   av_ts2timestr(input_files[ist->file_index]->ts_offset, 
&AV_TIME_BASE_Q));
-}
-
-if(!ist->wrap_correction_done && is->start_time != AV_NOPTS_VALUE && 
ist->st->pts_wrap_bits < 64){
-int64_t stime, stime2;
-
-stime = av_rescale_q(is->start_time, AV_TIME_BASE_Q, 
ist->st->time_base);
-stime2= stime + (1ULLpts_wrap_bits);
-ist->wrap_correction_done = 1;
-
-if(stime2 > stime && pkt->dts != AV_NOPTS_VALUE && pkt->dts > stime + 
(1LL<<(ist->st->pts_wrap_bits-1))) {
-pkt->dts -= 1ULLpts_wrap_bits;
-ist->wrap_correction_done = 0;
-}
-if(stime2 > stime && pkt->pts != AV_NOPTS_VALUE && pkt->pts > stime + 
(1LL<<(ist->st->pts_wrap_bits-1))) {
-pkt->pts -= 1ULLpts_wrap_bits;
-ist->wrap_correction_done = 0;
-}
-}
-
 /* add the stream-global side data to the first packet */
 if (ist->nb_packets == 1) {
 for (i = 0; i < ist->st->nb_side_data; i++) {
@@ -3897,26 +3865,6 @@ static int process_input(int file_index)
 }
 }
 
-if (pkt->dts != AV_NOPTS_VALUE)
-pkt->dts += av_rescale_q(ifile->ts_offset, AV_TIME_BASE_Q, 
ist->st->time_base);
-if (pkt->pts != AV_NOPTS_VALUE)
-pkt->pts += av_rescale_q(ifile->ts_offset, AV_TIME_BASE_Q, 
ist->st->time_base);
-
-if (pkt->pts != AV_NOPTS_VALUE)
-pkt->pts *= ist->ts_scale;
-if (pkt->dts != AV_NOPTS_VALUE)
-pkt->dts *= ist->ts_scale;
-
-duration = av_rescale_q(ifile->duration, ifile->time_base, 
ist->st->time_base);
-if (pkt->pts != AV_NOPTS_VALUE) {
-pkt->pts += duration;
-ist->max_pts = FFMAX(pkt->pts, ist->max_pts);
-ist->min_pts = FFMIN(pkt->pts, ist->min_pts);
-}
-
-if (pkt->dts != AV_NOPTS_VALUE)
-pkt->dts += duration;
-
 // detect and try to correct for timestamp discontinuities
 ts_discontinuity_process(ifile, ist, pkt);
 
diff --git a/fftools/ffmpeg_demux.c b/fftools/ffmpeg_demux.c
index d15cee614d..6dfb5bb35b 100644
--- a/fftools/ffmpeg_demux.c
+++ b/fftools/ffmpeg_demux.c
@@ -114,6 +114,61 @@ static int seek_to_start(InputFile *ifile)
 return ret;
 }
 
+static void ts_fixup(InputFile *ifile, AVPacket *pkt)
+{
+InputStream *ist = input_streams[ifile->ist_index + pkt->stream_index];
+const int64_t start_time = ifile->ctx->start_time;
+int64_t duration;
+
+if (debug_ts) {
+av_log(NULL, AV_LOG_INFO, "demuxer -> ist_index:%d type:%s "
+   "pkt_pts:%s pkt_pts_time:%s pkt_dts:%s pkt_dts_time:%s 
duration:%s duration_time:%s\n",
+   ifile->ist_index + pkt->stream_index,
+   av_get_media_type_string(ist->st->codecpar->codec_type),
+   av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, 
&ist->st->time_base),
+   av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, 
&ist->st->time_base),
+   av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, 
&ist->st->time_base));
+}
+
+if (!ist->wrap_correction_done && start_time != AV_NOPTS_VALUE &&
+ist->st->pts_wrap_bits < 64) {
+int64_t stime, stime2;
+
+

[FFmpeg-devel] [PATCH 02/11] avutil/hwcontext_d3d11va: add support for rgbaf16 pixel format

---
 libavutil/hwcontext_d3d11va.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavutil/hwcontext_d3d11va.c b/libavutil/hwcontext_d3d11va.c
index 27c0c80413..363ec6a47d 100644
--- a/libavutil/hwcontext_d3d11va.c
+++ b/libavutil/hwcontext_d3d11va.c
@@ -88,6 +88,7 @@ static const struct {
 { DXGI_FORMAT_P010, AV_PIX_FMT_P010 },
 { DXGI_FORMAT_B8G8R8A8_UNORM,AV_PIX_FMT_BGRA },
 { DXGI_FORMAT_R10G10B10A2_UNORM, AV_PIX_FMT_X2BGR10 },
+{ DXGI_FORMAT_R16G16B16A16_FLOAT, AV_PIX_FMT_RGBAF16 },
 // Special opaque formats. The pix_fmt is merely a place holder, as the
 // opaque format cannot be accessed directly.
 { DXGI_FORMAT_420_OPAQUE,   AV_PIX_FMT_YUV420P },
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 01/11] lavu/pixfmt: add packed RGBA float16 format

This is the default format of the Windows compositor and what DXGI
Desktop Duplication will give you for any kind of HDR output.
---
 libavutil/pixdesc.c  | 28 
 libavutil/pixfmt.h   |  5 +
 libavutil/version.h  |  4 ++--
 tests/ref/fate/imgutils  |  2 ++
 tests/ref/fate/sws-pixdesc-query | 13 +
 5 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
index e078fd5320..f7558ff8b9 100644
--- a/libavutil/pixdesc.c
+++ b/libavutil/pixdesc.c
@@ -2504,6 +2504,34 @@ static const AVPixFmtDescriptor 
av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
 },
 .flags = AV_PIX_FMT_FLAG_ALPHA,
 },
+[AV_PIX_FMT_RGBAF16BE] = {
+.name = "rgbaf16be",
+.nb_components = 4,
+.log2_chroma_w = 0,
+.log2_chroma_h = 0,
+.comp = {
+{ 0, 8, 0, 0, 16 },   /* R */
+{ 0, 8, 2, 0, 16 },   /* G */
+{ 0, 8, 4, 0, 16 },   /* B */
+{ 0, 8, 6, 0, 16 },   /* A */
+},
+.flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_RGB |
+ AV_PIX_FMT_FLAG_ALPHA | AV_PIX_FMT_FLAG_FLOAT,
+},
+[AV_PIX_FMT_RGBAF16LE] = {
+.name = "rgbaf16le",
+.nb_components = 4,
+.log2_chroma_w = 0,
+.log2_chroma_h = 0,
+.comp = {
+{ 0, 8, 0, 0, 16 },   /* R */
+{ 0, 8, 2, 0, 16 },   /* G */
+{ 0, 8, 4, 0, 16 },   /* B */
+{ 0, 8, 6, 0, 16 },   /* A */
+},
+.flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_ALPHA |
+ AV_PIX_FMT_FLAG_FLOAT,
+},
 };
 
 static const char * const color_range_names[] = {
diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
index 9d1fdaf82d..86c9bdefeb 100644
--- a/libavutil/pixfmt.h
+++ b/libavutil/pixfmt.h
@@ -369,6 +369,9 @@ enum AVPixelFormat {
 
 AV_PIX_FMT_VUYA,///< packed VUYA 4:4:4, 32bpp, VUYAVUYA...
 
+AV_PIX_FMT_RGBAF16BE,   ///< IEEE-754 half precision packed RGBA 
16:16:16:16, 64bpp, RGBARGBA..., big-endian
+AV_PIX_FMT_RGBAF16LE,   ///< IEEE-754 half precision packed RGBA 
16:16:16:16, 64bpp, RGBARGBA..., little-endian
+
 AV_PIX_FMT_NB ///< number of pixel formats, DO NOT USE THIS if you 
want to link with shared libav* because the number of formats might differ 
between versions
 };
 
@@ -466,6 +469,8 @@ enum AVPixelFormat {
 #define AV_PIX_FMT_P216   AV_PIX_FMT_NE(P216BE, P216LE)
 #define AV_PIX_FMT_P416   AV_PIX_FMT_NE(P416BE, P416LE)
 
+#define AV_PIX_FMT_RGBAF16AV_PIX_FMT_NE(RGBAF16BE, RGBAF16LE)
+
 /**
   * Chromaticity coordinates of the source primaries.
   * These values match the ones defined by ISO/IEC 23091-2_2019 subclause 8.1 
and ITU-T H.273.
diff --git a/libavutil/version.h b/libavutil/version.h
index ee43526dc6..f0a8b5c098 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -79,8 +79,8 @@
  */
 
 #define LIBAVUTIL_VERSION_MAJOR  57
-#define LIBAVUTIL_VERSION_MINOR  32
-#define LIBAVUTIL_VERSION_MICRO 101
+#define LIBAVUTIL_VERSION_MINOR  33
+#define LIBAVUTIL_VERSION_MICRO 100
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
LIBAVUTIL_VERSION_MINOR, \
diff --git a/tests/ref/fate/imgutils b/tests/ref/fate/imgutils
index 4ec66febb8..01c9877de5 100644
--- a/tests/ref/fate/imgutils
+++ b/tests/ref/fate/imgutils
@@ -247,3 +247,5 @@ p216le  planes: 2, linesizes: 128 128   0   0, 
plane_sizes:  6144  6144
 p416be  planes: 2, linesizes: 128 256   0   0, plane_sizes:  6144 
12288 0 0, plane_offsets:  6144 0 0, total_size: 18432
 p416le  planes: 2, linesizes: 128 256   0   0, plane_sizes:  6144 
12288 0 0, plane_offsets:  6144 0 0, total_size: 18432
 vuyaplanes: 1, linesizes: 256   0   0   0, plane_sizes: 12288 
0 0 0, plane_offsets: 0 0 0, total_size: 12288
+rgbaf16be   planes: 1, linesizes: 512   0   0   0, plane_sizes: 24576 
0 0 0, plane_offsets: 0 0 0, total_size: 24576
+rgbaf16le   planes: 1, linesizes: 512   0   0   0, plane_sizes: 24576 
0 0 0, plane_offsets: 0 0 0, total_size: 24576
diff --git a/tests/ref/fate/sws-pixdesc-query b/tests/ref/fate/sws-pixdesc-query
index bd0f1fcb82..f79d99e513 100644
--- a/tests/ref/fate/sws-pixdesc-query
+++ b/tests/ref/fate/sws-pixdesc-query
@@ -21,6 +21,8 @@ is16BPS:
   rgb48le
   rgba64be
   rgba64le
+  rgbaf16be
+  rgbaf16le
   ya16be
   ya16le
   yuv420p16be
@@ -157,6 +159,7 @@ isBE:
   rgb555be
   rgb565be
   rgba64be
+  rgbaf16be
   x2bgr10be
   x2rgb10be
   xyz12be
@@ -479,6 +482,8 @@ isRGB:
   rgb8
   rgba64be
   rgba64le
+  rgbaf16be
+  rgbaf16le
   x2bgr10be
   x2bgr10le
   x2rgb10be
@@ -629,6 +634,8 @@ AnyRGB:
   rgb8
   rgba64be
   rgba64le
+  rgbaf16be
+  rgbaf16le
   x2bgr10be
   x2

[FFmpeg-devel] [PATCH 03/11] avfilter/vsrc_ddagrab: add rgbaf16 output support

---
 libavfilter/version.h  |  2 +-
 libavfilter/vsrc_ddagrab.c | 13 +
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/libavfilter/version.h b/libavfilter/version.h
index 19a009c110..fa67606495 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -32,7 +32,7 @@
 #include "version_major.h"
 
 #define LIBAVFILTER_VERSION_MINOR  46
-#define LIBAVFILTER_VERSION_MICRO 101
+#define LIBAVFILTER_VERSION_MICRO 102
 
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
diff --git a/libavfilter/vsrc_ddagrab.c b/libavfilter/vsrc_ddagrab.c
index ce36716281..252505b96d 100644
--- a/libavfilter/vsrc_ddagrab.c
+++ b/libavfilter/vsrc_ddagrab.c
@@ -115,6 +115,8 @@ static const AVOption ddagrab_options[] = {
 { "bgra",   "only output 8 Bit BGRA",0,
AV_OPT_TYPE_CONST,  { .i64 = DXGI_FORMAT_B8G8R8A8_UNORM },0, INT_MAX, 
FLAGS, "output_fmt" },
 { "10bit",  "only output default 10 Bit format", 0,
AV_OPT_TYPE_CONST,  { .i64 = DXGI_FORMAT_R10G10B10A2_UNORM }, 0, INT_MAX, 
FLAGS, "output_fmt" },
 { "x2bgr10","only output 10 Bit X2BGR10",0,
AV_OPT_TYPE_CONST,  { .i64 = DXGI_FORMAT_R10G10B10A2_UNORM }, 0, INT_MAX, 
FLAGS, "output_fmt" },
+{ "16bit",  "only output default 16 Bit format", 0,
AV_OPT_TYPE_CONST,  { .i64 = DXGI_FORMAT_R16G16B16A16_FLOAT },0, INT_MAX, 
FLAGS, "output_fmt" },
+{ "rgbaf16","only output 16 Bit RGBAF16",0,
AV_OPT_TYPE_CONST,  { .i64 = DXGI_FORMAT_R16G16B16A16_FLOAT },0, INT_MAX, 
FLAGS, "output_fmt" },
 { NULL }
 };
 
@@ -212,6 +214,7 @@ static av_cold int init_dxgi_dda(AVFilterContext *avctx)
 if (set_thread_dpi && SUCCEEDED(hr)) {
 DPI_AWARENESS_CONTEXT prev_dpi_ctx;
 DXGI_FORMAT formats[] = {
+DXGI_FORMAT_R16G16B16A16_FLOAT,
 DXGI_FORMAT_R10G10B10A2_UNORM,
 DXGI_FORMAT_B8G8R8A8_UNORM
 };
@@ -665,6 +668,10 @@ static av_cold int init_hwframes_ctx(AVFilterContext 
*avctx)
 av_log(avctx, AV_LOG_VERBOSE, "Probed 10 bit RGB frame format\n");
 dda->frames_ctx->sw_format = AV_PIX_FMT_X2BGR10;
 break;
+case DXGI_FORMAT_R16G16B16A16_FLOAT:
+av_log(avctx, AV_LOG_VERBOSE, "Probed 16 bit float RGB frame 
format\n");
+dda->frames_ctx->sw_format = AV_PIX_FMT_RGBAF16;
+break;
 default:
 av_log(avctx, AV_LOG_ERROR, "Unexpected texture output format!\n");
 return AVERROR_BUG;
@@ -990,6 +997,12 @@ static int ddagrab_request_frame(AVFilterLink *outlink)
 frame->color_primaries = AVCOL_PRI_BT709;
 frame->color_trc   = AVCOL_TRC_IEC61966_2_1;
 frame->colorspace  = AVCOL_SPC_RGB;
+} else if(desc.Format == DXGI_FORMAT_R16G16B16A16_FLOAT) {
+// According to MSDN, all floating point formats contain sRGB image 
data with linear 1.0 gamma.
+frame->color_range = AVCOL_RANGE_JPEG;
+frame->color_primaries = AVCOL_PRI_BT709;
+frame->color_trc   = AVCOL_TRC_LINEAR;
+frame->colorspace  = AVCOL_SPC_RGB;
 } else {
 ret = AVERROR_BUG;
 goto fail;
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 05/11] avutil: move half-precision float helper to avutil

---
 libavcodec/exr.c   | 2 +-
 libavcodec/exrenc.c| 2 +-
 libavcodec/pnmdec.c| 3 ++-
 libavcodec/pnmenc.c| 2 +-
 {libavcodec => libavutil}/float2half.h | 6 +++---
 {libavcodec => libavutil}/half2float.h | 6 +++---
 6 files changed, 11 insertions(+), 10 deletions(-)
 rename {libavcodec => libavutil}/float2half.h (96%)
 rename {libavcodec => libavutil}/half2float.h (96%)

diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index 3a6b9c3014..5c6ca9adbf 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -41,6 +41,7 @@
 #include "libavutil/avstring.h"
 #include "libavutil/opt.h"
 #include "libavutil/color_utils.h"
+#include "libavutil/half2float.h"
 
 #include "avcodec.h"
 #include "bytestream.h"
@@ -53,7 +54,6 @@
 #include "exrdsp.h"
 #include "get_bits.h"
 #include "internal.h"
-#include "half2float.h"
 #include "mathops.h"
 #include "thread.h"
 
diff --git a/libavcodec/exrenc.c b/libavcodec/exrenc.c
index 8cf7827bb6..56c084d483 100644
--- a/libavcodec/exrenc.c
+++ b/libavcodec/exrenc.c
@@ -31,11 +31,11 @@
 #include "libavutil/intreadwrite.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/pixdesc.h"
+#include "libavutil/float2half.h"
 #include "avcodec.h"
 #include "bytestream.h"
 #include "codec_internal.h"
 #include "encode.h"
-#include "float2half.h"
 
 enum ExrCompr {
 EXR_RAW,
diff --git a/libavcodec/pnmdec.c b/libavcodec/pnmdec.c
index 130407df25..9383dc8e60 100644
--- a/libavcodec/pnmdec.c
+++ b/libavcodec/pnmdec.c
@@ -21,12 +21,13 @@
 
 #include "config_components.h"
 
+#include "libavutil/half2float.h"
+
 #include "avcodec.h"
 #include "codec_internal.h"
 #include "internal.h"
 #include "put_bits.h"
 #include "pnm.h"
-#include "half2float.h"
 
 static void samplecpy(uint8_t *dst, const uint8_t *src, int n, int maxval)
 {
diff --git a/libavcodec/pnmenc.c b/libavcodec/pnmenc.c
index b16c93c88f..7ce534d06e 100644
--- a/libavcodec/pnmenc.c
+++ b/libavcodec/pnmenc.c
@@ -24,10 +24,10 @@
 #include "libavutil/intreadwrite.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/pixdesc.h"
+#include "libavutil/float2half.h"
 #include "avcodec.h"
 #include "codec_internal.h"
 #include "encode.h"
-#include "float2half.h"
 
 typedef struct PHMEncContext {
 uint16_t basetable[512];
diff --git a/libavcodec/float2half.h b/libavutil/float2half.h
similarity index 96%
rename from libavcodec/float2half.h
rename to libavutil/float2half.h
index e05125088c..d6aaab8278 100644
--- a/libavcodec/float2half.h
+++ b/libavutil/float2half.h
@@ -16,8 +16,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVCODEC_FLOAT2HALF_H
-#define AVCODEC_FLOAT2HALF_H
+#ifndef AVUTIL_FLOAT2HALF_H
+#define AVUTIL_FLOAT2HALF_H
 
 #include 
 
@@ -64,4 +64,4 @@ static uint16_t float2half(uint32_t f, uint16_t *basetable, 
uint8_t *shifttable)
 return h;
 }
 
-#endif /* AVCODEC_FLOAT2HALF_H */
+#endif /* AVUTIL_FLOAT2HALF_H */
diff --git a/libavcodec/half2float.h b/libavutil/half2float.h
similarity index 96%
rename from libavcodec/half2float.h
rename to libavutil/half2float.h
index 7df6747e50..1f6deade07 100644
--- a/libavcodec/half2float.h
+++ b/libavutil/half2float.h
@@ -16,8 +16,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVCODEC_HALF2FLOAT_H
-#define AVCODEC_HALF2FLOAT_H
+#ifndef AVUTIL_HALF2FLOAT_H
+#define AVUTIL_HALF2FLOAT_H
 
 #include 
 
@@ -71,4 +71,4 @@ static uint32_t half2float(uint16_t h, const uint32_t 
*mantissatable, const uint
 return f;
 }
 
-#endif /* AVCODEC_HALF2FLOAT_H */
+#endif /* AVUTIL_HALF2FLOAT_H */
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 04/11] avfilter/vsrc_ddagrab: add options for more control over output format fallback

---
 libavfilter/vsrc_ddagrab.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/libavfilter/vsrc_ddagrab.c b/libavfilter/vsrc_ddagrab.c
index 252505b96d..00c72187ea 100644
--- a/libavfilter/vsrc_ddagrab.c
+++ b/libavfilter/vsrc_ddagrab.c
@@ -98,6 +98,8 @@ typedef struct DdagrabContext {
 intoffset_x;
 intoffset_y;
 intout_fmt;
+intallow_fallback;
+intforce_fmt;
 } DdagrabContext;
 
 #define OFFSET(x) offsetof(DdagrabContext, x)
@@ -117,6 +119,10 @@ static const AVOption ddagrab_options[] = {
 { "x2bgr10","only output 10 Bit X2BGR10",0,
AV_OPT_TYPE_CONST,  { .i64 = DXGI_FORMAT_R10G10B10A2_UNORM }, 0, INT_MAX, 
FLAGS, "output_fmt" },
 { "16bit",  "only output default 16 Bit format", 0,
AV_OPT_TYPE_CONST,  { .i64 = DXGI_FORMAT_R16G16B16A16_FLOAT },0, INT_MAX, 
FLAGS, "output_fmt" },
 { "rgbaf16","only output 16 Bit RGBAF16",0,
AV_OPT_TYPE_CONST,  { .i64 = DXGI_FORMAT_R16G16B16A16_FLOAT },0, INT_MAX, 
FLAGS, "output_fmt" },
+{ "allow_fallback", "don't error on fallback to default 8 Bit format",
+   OFFSET(allow_fallback), 
AV_OPT_TYPE_BOOL,   { .i64 = 0},   0,   1, FLAGS },
+{ "force_fmt",  "exclude BGRA from format list (experimental, discouraged 
by Microsoft)",
+   OFFSET(force_fmt),  
AV_OPT_TYPE_BOOL,   { .i64 = 0},   0,   1, FLAGS },
 { NULL }
 };
 
@@ -226,7 +232,7 @@ static av_cold int init_dxgi_dda(AVFilterContext *avctx)
 } else if (dda->out_fmt) {
 formats[0] = dda->out_fmt;
 formats[1] = DXGI_FORMAT_B8G8R8A8_UNORM;
-nb_formats = 2;
+nb_formats = dda->force_fmt ? 1 : 2;
 }
 
 IDXGIOutput_Release(dxgi_output);
@@ -262,7 +268,7 @@ static av_cold int init_dxgi_dda(AVFilterContext *avctx)
 #else
 {
 #endif
-if (dda->out_fmt && dda->out_fmt != DXGI_FORMAT_B8G8R8A8_UNORM) {
+if (dda->out_fmt && dda->out_fmt != DXGI_FORMAT_B8G8R8A8_UNORM && 
(!dda->allow_fallback || dda->force_fmt)) {
 av_log(avctx, AV_LOG_ERROR, "Only 8 bit output supported with 
legacy API\n");
 return AVERROR(ENOTSUP);
 }
@@ -733,7 +739,7 @@ static int ddagrab_config_props(AVFilterLink *outlink)
 if (ret < 0)
 return ret;
 
-if (dda->out_fmt && dda->raw_format != dda->out_fmt) {
+if (dda->out_fmt && dda->raw_format != dda->out_fmt && 
(!dda->allow_fallback || dda->force_fmt)) {
 av_log(avctx, AV_LOG_ERROR, "Requested output format unavailable.\n");
 return AVERROR(ENOTSUP);
 }
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 09/11] avutil/half2float: use native _Float16 if available

_Float16 support was available on arm/aarch64 for a while, and with gcc
12 was enabled on x86 as long as SSE2 is supported.

If the target arch supports f16c, gcc emits fairly efficient assembly,
taking advantage of it. This is the case on x86-64-v3 or higher.
Without f16c, it emulates it in software using sse2 instructions.
---
 configure  |  4 
 libavutil/float2half.c |  2 ++
 libavutil/float2half.h | 16 
 libavutil/half2float.c |  4 
 libavutil/half2float.h | 16 
 5 files changed, 42 insertions(+)

diff --git a/configure b/configure
index 6761d0cb32..2536ae012d 100755
--- a/configure
+++ b/configure
@@ -2143,6 +2143,7 @@ ARCH_FEATURES="
 fast_64bit
 fast_clz
 fast_cmov
+float16
 local_aligned
 simd_align_16
 simd_align_32
@@ -5125,6 +5126,8 @@ elif enabled arm; then
 ;;
 esac
 
+test_cflags -mfp16-format=ieee && add_cflags -mfp16-format=ieee
+
 elif enabled avr32; then
 
 case $cpu in
@@ -6228,6 +6231,7 @@ check_builtin MemoryBarrier windows.h "MemoryBarrier()"
 check_builtin sync_val_compare_and_swap "" "int *ptr; int oldval, newval; 
__sync_val_compare_and_swap(ptr, oldval, newval)"
 check_builtin gmtime_r time.h "time_t *time; struct tm *tm; gmtime_r(time, tm)"
 check_builtin localtime_r time.h "time_t *time; struct tm *tm; 
localtime_r(time, tm)"
+check_builtin float16 "" "_Float16 f16var"
 
 case "$custom_allocator" in
 jemalloc)
diff --git a/libavutil/float2half.c b/libavutil/float2half.c
index dba14cef5d..1390d3acc0 100644
--- a/libavutil/float2half.c
+++ b/libavutil/float2half.c
@@ -20,6 +20,7 @@
 
 void ff_init_float2half_tables(float2half_tables *t)
 {
+#if !HAVE_FLOAT16
 for (int i = 0; i < 256; i++) {
 int e = i - 127;
 
@@ -50,4 +51,5 @@ void ff_init_float2half_tables(float2half_tables *t)
 t->shifttable[i|0x100] = 13;
 }
 }
+#endif
 }
diff --git a/libavutil/float2half.h b/libavutil/float2half.h
index b8c9cdfc4f..8c1fb804b7 100644
--- a/libavutil/float2half.h
+++ b/libavutil/float2half.h
@@ -20,21 +20,37 @@
 #define AVUTIL_FLOAT2HALF_H
 
 #include 
+#include "intfloat.h"
+
+#include "config.h"
 
 typedef struct float2half_tables {
+#if HAVE_FLOAT16
+uint8_t dummy;
+#else
 uint16_t basetable[512];
 uint8_t shifttable[512];
+#endif
 } float2half_tables;
 
 void ff_init_float2half_tables(float2half_tables *t);
 
 static inline uint16_t float2half(uint32_t f, const float2half_tables *t)
 {
+#if HAVE_FLOAT16
+union {
+_Float16 f;
+uint16_t i;
+} u;
+u.f = av_int2float(f);
+return u.i;
+#else
 uint16_t h;
 
 h = t->basetable[(f >> 23) & 0x1ff] + ((f & 0x007f) >> 
t->shifttable[(f >> 23) & 0x1ff]);
 
 return h;
+#endif
 }
 
 #endif /* AVUTIL_FLOAT2HALF_H */
diff --git a/libavutil/half2float.c b/libavutil/half2float.c
index baac8e4093..873226d3a0 100644
--- a/libavutil/half2float.c
+++ b/libavutil/half2float.c
@@ -18,6 +18,7 @@
 
 #include "libavutil/half2float.h"
 
+#if !HAVE_FLOAT16
 static uint32_t convertmantissa(uint32_t i)
 {
 int32_t m = i << 13; // Zero pad mantissa bits
@@ -33,9 +34,11 @@ static uint32_t convertmantissa(uint32_t i)
 
 return m | e; // Return combined number
 }
+#endif
 
 void ff_init_half2float_tables(half2float_tables *t)
 {
+#if !HAVE_FLOAT16
 t->mantissatable[0] = 0;
 for (int i = 1; i < 1024; i++)
 t->mantissatable[i] = convertmantissa(i);
@@ -60,4 +63,5 @@ void ff_init_half2float_tables(half2float_tables *t)
 t->offsettable[31] = 2048;
 t->offsettable[32] = 0;
 t->offsettable[63] = 2048;
+#endif
 }
diff --git a/libavutil/half2float.h b/libavutil/half2float.h
index cb58e44a1c..b2a7c934a6 100644
--- a/libavutil/half2float.h
+++ b/libavutil/half2float.h
@@ -20,22 +20,38 @@
 #define AVUTIL_HALF2FLOAT_H
 
 #include 
+#include "intfloat.h"
+
+#include "config.h"
 
 typedef struct half2float_tables {
+#if HAVE_FLOAT16
+uint8_t dummy;
+#else
 uint32_t mantissatable[3072];
 uint32_t exponenttable[64];
 uint16_t offsettable[64];
+#endif
 } half2float_tables;
 
 void ff_init_half2float_tables(half2float_tables *t);
 
 static inline uint32_t half2float(uint16_t h, const half2float_tables *t)
 {
+#if HAVE_FLOAT16
+union {
+_Float16 f;
+uint16_t i;
+} u;
+u.i = h;
+return av_float2int(u.f);
+#else
 uint32_t f;
 
 f = t->mantissatable[t->offsettable[h >> 10] + (h & 0x3ff)] + 
t->exponenttable[h >> 10];
 
 return f;
+#endif
 }
 
 #endif /* AVUTIL_HALF2FLOAT_H */
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 11/11] swscale/input: add rgbaf16 input support

This is by no means perfect, since at least ddagrab will return scRGB
data with values outside of 0.0f to 1.0f for HDR values.
Its primary purpose is to be able to work with the format at all.
---
 libavutil/Makefile|   1 +
 libswscale/half2float.c   |  19 +
 libswscale/input.c| 130 ++
 libswscale/slice.c|   9 ++-
 libswscale/swscale_internal.h |  10 +++
 libswscale/utils.c|   2 +
 libswscale/version.h  |   2 +-
 7 files changed, 171 insertions(+), 2 deletions(-)
 create mode 100644 libswscale/half2float.c

diff --git a/libavutil/Makefile b/libavutil/Makefile
index 3d9c07aea8..1aac1a4cc0 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -131,6 +131,7 @@ OBJS = adler32.o
\
float_dsp.o  \
fixed_dsp.o  \
frame.o  \
+   half2float.o \
hash.o   \
hdr_dynamic_metadata.o   \
hdr_dynamic_vivid_metadata.o \
diff --git a/libswscale/half2float.c b/libswscale/half2float.c
new file mode 100644
index 00..1b023f96a5
--- /dev/null
+++ b/libswscale/half2float.c
@@ -0,0 +1,19 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/half2float.c"
diff --git a/libswscale/input.c b/libswscale/input.c
index 36ef1e43ac..818b57d2c3 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -1124,6 +1124,112 @@ static void grayf32##endian_name##ToY16_c(uint8_t *dst, 
const uint8_t *src,
 rgbf32_planar_funcs_endian(le, 0)
 rgbf32_planar_funcs_endian(be, 1)
 
+#define rdpx(src) av_int2float(half2float(is_be ? AV_RB16(&src) : 
AV_RL16(&src), h2f_tbl))
+
+static av_always_inline void rgbaf16ToUV_half_endian(uint16_t *dstU, uint16_t 
*dstV, int is_be,
+ const uint16_t *src, int 
width,
+ int32_t *rgb2yuv, 
half2float_tables *h2f_tbl)
+{
+int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+int i;
+for (i = 0; i < width; i++) {
+int r = (lrintf(av_clipf(65535.0f * rdpx(src[i*8+0]), 0.0f, 65535.0f)) 
+
+ lrintf(av_clipf(65535.0f * rdpx(src[i*8+4]), 0.0f, 
65535.0f))) >> 1;
+int g = (lrintf(av_clipf(65535.0f * rdpx(src[i*8+1]), 0.0f, 65535.0f)) 
+
+ lrintf(av_clipf(65535.0f * rdpx(src[i*8+5]), 0.0f, 
65535.0f))) >> 1;
+int b = (lrintf(av_clipf(65535.0f * rdpx(src[i*8+2]), 0.0f, 65535.0f)) 
+
+ lrintf(av_clipf(65535.0f * rdpx(src[i*8+6]), 0.0f, 
65535.0f))) >> 1;
+
+dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+}
+}
+
+static av_always_inline void rgbaf16ToUV_endian(uint16_t *dstU, uint16_t 
*dstV, int is_be,
+const uint16_t *src, int width,
+int32_t *rgb2yuv, 
half2float_tables *h2f_tbl)
+{
+int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+int i;
+for (i = 0; i < width; i++) {
+int r = lrintf(av_clipf(65535.0f * rdpx(src[i*4+0]), 0.0f, 65535.0f));
+int g = lrintf(av_clipf(65535.0f * rdpx(src[i*4+1]), 0.0f, 65535.0f));
+int b = lrintf(av_clipf(65535.0f * rdpx(src[i*4+2]), 0.0f, 65535.0f));
+
+dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+}
+}
+
+static av_always_inline void rgbaf16ToY_endian(uint16_t *dst, const uint16_t 
*src, int is_be,
+

[FFmpeg-devel] [PATCH 08/11] avutil/half2float: move non-inline init code out of header

---
 libavcodec/Makefile |  8 +++---
 libavcodec/exr.c|  2 +-
 libavcodec/exrenc.c |  2 +-
 libavcodec/float2half.c | 19 +
 libavcodec/half2float.c | 19 +
 libavcodec/pnmdec.c |  2 +-
 libavcodec/pnmenc.c |  2 +-
 libavutil/float2half.c  | 53 ++
 libavutil/float2half.h  | 36 ++-
 libavutil/half2float.c  | 63 +
 libavutil/half2float.h  | 46 ++
 11 files changed, 166 insertions(+), 86 deletions(-)
 create mode 100644 libavcodec/float2half.c
 create mode 100644 libavcodec/half2float.c
 create mode 100644 libavutil/float2half.c
 create mode 100644 libavutil/half2float.c

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 029f1bad3d..cb80f73d99 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -337,8 +337,8 @@ OBJS-$(CONFIG_EIGHTSVX_FIB_DECODER)+= 8svx.o
 OBJS-$(CONFIG_ESCAPE124_DECODER)   += escape124.o
 OBJS-$(CONFIG_ESCAPE130_DECODER)   += escape130.o
 OBJS-$(CONFIG_EVRC_DECODER)+= evrcdec.o acelp_vectors.o lsp.o
-OBJS-$(CONFIG_EXR_DECODER) += exr.o exrdsp.o
-OBJS-$(CONFIG_EXR_ENCODER) += exrenc.o
+OBJS-$(CONFIG_EXR_DECODER) += exr.o exrdsp.o half2float.o
+OBJS-$(CONFIG_EXR_ENCODER) += exrenc.o float2half.o
 OBJS-$(CONFIG_FASTAUDIO_DECODER)   += fastaudio.o
 OBJS-$(CONFIG_FFV1_DECODER)+= ffv1dec.o ffv1.o
 OBJS-$(CONFIG_FFV1_ENCODER)+= ffv1enc.o ffv1.o
@@ -570,8 +570,8 @@ OBJS-$(CONFIG_PGMYUV_DECODER)  += pnmdec.o pnm.o
 OBJS-$(CONFIG_PGMYUV_ENCODER)  += pnmenc.o
 OBJS-$(CONFIG_PGSSUB_DECODER)  += pgssubdec.o
 OBJS-$(CONFIG_PGX_DECODER) += pgxdec.o
-OBJS-$(CONFIG_PHM_DECODER) += pnmdec.o pnm.o
-OBJS-$(CONFIG_PHM_ENCODER) += pnmenc.o
+OBJS-$(CONFIG_PHM_DECODER) += pnmdec.o pnm.o half2float.o
+OBJS-$(CONFIG_PHM_ENCODER) += pnmenc.o float2half.o
 OBJS-$(CONFIG_PHOTOCD_DECODER) += photocd.o
 OBJS-$(CONFIG_PICTOR_DECODER)  += pictordec.o cga_data.o
 OBJS-$(CONFIG_PIXLET_DECODER)  += pixlet.o
diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index 825354873d..a3582bfdd6 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -2208,7 +2208,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
 float one_gamma = 1.0f / s->gamma;
 avpriv_trc_function trc_func = NULL;
 
-init_half2float_tables(&s->h2f_tables);
+ff_init_half2float_tables(&s->h2f_tables);
 
 s->avctx  = avctx;
 
diff --git a/libavcodec/exrenc.c b/libavcodec/exrenc.c
index 6ab9400b7c..77b1ce052b 100644
--- a/libavcodec/exrenc.c
+++ b/libavcodec/exrenc.c
@@ -94,7 +94,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
 {
 EXRContext *s = avctx->priv_data;
 
-init_float2half_tables(&s->f2h_tables);
+ff_init_float2half_tables(&s->f2h_tables);
 
 switch (avctx->pix_fmt) {
 case AV_PIX_FMT_GBRPF32:
diff --git a/libavcodec/float2half.c b/libavcodec/float2half.c
new file mode 100644
index 00..90a6f63fac
--- /dev/null
+++ b/libavcodec/float2half.c
@@ -0,0 +1,19 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/float2half.c"
diff --git a/libavcodec/half2float.c b/libavcodec/half2float.c
new file mode 100644
index 00..1b023f96a5
--- /dev/null
+++ b/libavcodec/half2float.c
@@ -0,0 +1,19 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+

[FFmpeg-devel] [PATCH 10/11] swscale: add SwsContext parameter to input functions

---
 libswscale/hscale.c   |  12 +--
 libswscale/input.c| 149 ++
 libswscale/swscale_internal.h |  17 ++--
 libswscale/x86/swscale.c  |  13 +--
 4 files changed, 106 insertions(+), 85 deletions(-)

diff --git a/libswscale/hscale.c b/libswscale/hscale.c
index eca0635338..6789ce7540 100644
--- a/libswscale/hscale.c
+++ b/libswscale/hscale.c
@@ -105,18 +105,18 @@ static int lum_convert(SwsContext *c, SwsFilterDescriptor 
*desc, int sliceY, int
 uint8_t * dst = desc->dst->plane[0].line[i];
 
 if (c->lumToYV12) {
-c->lumToYV12(dst, src[0], src[1], src[2], srcW, pal);
+c->lumToYV12(dst, src[0], src[1], src[2], srcW, pal, 
c->input_opaque);
 } else if (c->readLumPlanar) {
-c->readLumPlanar(dst, src, srcW, c->input_rgb2yuv_table);
+c->readLumPlanar(dst, src, srcW, c->input_rgb2yuv_table, 
c->input_opaque);
 }
 
 
 if (desc->alpha) {
 dst = desc->dst->plane[3].line[i];
 if (c->alpToYV12) {
-c->alpToYV12(dst, src[3], src[1], src[2], srcW, pal);
+c->alpToYV12(dst, src[3], src[1], src[2], srcW, pal, 
c->input_opaque);
 } else if (c->readAlpPlanar) {
-c->readAlpPlanar(dst, src, srcW, NULL);
+c->readAlpPlanar(dst, src, srcW, NULL, c->input_opaque);
 }
 }
 }
@@ -224,9 +224,9 @@ static int chr_convert(SwsContext *c, SwsFilterDescriptor 
*desc, int sliceY, int
 uint8_t * dst1 = desc->dst->plane[1].line[i];
 uint8_t * dst2 = desc->dst->plane[2].line[i];
 if (c->chrToYV12) {
-c->chrToYV12(dst1, dst2, src[0], src[1], src[2], srcW, pal);
+c->chrToYV12(dst1, dst2, src[0], src[1], src[2], srcW, pal, 
c->input_opaque);
 } else if (c->readChrPlanar) {
-c->readChrPlanar(dst1, dst2, src, srcW, c->input_rgb2yuv_table);
+c->readChrPlanar(dst1, dst2, src, srcW, c->input_rgb2yuv_table, 
c->input_opaque);
 }
 }
 return sliceH;
diff --git a/libswscale/input.c b/libswscale/input.c
index 68abc4d62c..36ef1e43ac 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -88,7 +88,7 @@ rgb64ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
 
 #define rgb64funcs(pattern, BE_LE, origin) \
 static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t 
*_src, const uint8_t *unused0, const uint8_t *unused1,\
-int width, uint32_t *rgb2yuv) \
+int width, uint32_t *rgb2yuv, void *opq) \
 { \
 const uint16_t *src = (const uint16_t *) _src; \
 uint16_t *dst = (uint16_t *) _dst; \
@@ -97,7 +97,7 @@ static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, 
const uint8_t *_src,
  \
 static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
 const uint8_t *unused0, const uint8_t 
*_src1, const uint8_t *_src2, \
-int width, uint32_t *rgb2yuv) \
+int width, uint32_t *rgb2yuv, void *opq) \
 { \
 const uint16_t *src1 = (const uint16_t *) _src1, \
*src2 = (const uint16_t *) _src2; \
@@ -107,7 +107,7 @@ static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t 
*_dstU, uint8_t *_dstV, \
  \
 static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t 
*_dstV, \
 const uint8_t *unused0, const uint8_t 
*_src1, const uint8_t *_src2, \
-int width, uint32_t *rgb2yuv) \
+int width, uint32_t *rgb2yuv, void *opq) \
 { \
 const uint16_t *src1 = (const uint16_t *) _src1, \
*src2 = (const uint16_t *) _src2; \
@@ -192,7 +192,8 @@ static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst,  
\
 const uint8_t *_src,\
 const uint8_t *unused0, const 
uint8_t *unused1,\
 int width,  \
-uint32_t *rgb2yuv)  \
+uint32_t *rgb2yuv,  \
+void *opq)  \
 {   \
 const uint16_t *src = (const uint16_t *)_src;   \
 uint16_t *dst   = (uint16_t *)_dst; \
@@ -205,7 +206,8 @@ static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t 
*_dstU,\
  const uint8_t *_src1,  \
  const uint8_t *_src2,  \
  int width, \
-

[FFmpeg-devel] [PATCH 06/11] avutil/half2float: adjust conversion of NaN

IEEE-754 differentiates two different kind of NaNs.
Quiet and Signaling ones. They are differentiated by the MSB of the
mantissa.

For whatever reason, actual hardware conversion of half to single always
sets the signaling bit to 1 if the mantissa is != 0, and to 0 if it's 0.
So our code has to follow suite or fate-testing hardware float16 will be
impossible.
---
 libavcodec/exr.c| 2 +-
 libavcodec/pnm.h| 2 +-
 libavutil/half2float.h  | 5 +
 tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0x | 2 +-
 4 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index 5c6ca9adbf..47f4786491 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -191,7 +191,7 @@ typedef struct EXRContext {
 float gamma;
 union av_intfloat32 gamma_table[65536];
 
-uint32_t mantissatable[2048];
+uint32_t mantissatable[3072];
 uint32_t exponenttable[64];
 uint16_t offsettable[64];
 } EXRContext;
diff --git a/libavcodec/pnm.h b/libavcodec/pnm.h
index 5bf2eaa4d9..7e5445f529 100644
--- a/libavcodec/pnm.h
+++ b/libavcodec/pnm.h
@@ -34,7 +34,7 @@ typedef struct PNMContext {
 int half;
 float scale;
 
-uint32_t mantissatable[2048];
+uint32_t mantissatable[3072];
 uint32_t exponenttable[64];
 uint16_t offsettable[64];
 } PNMContext;
diff --git a/libavutil/half2float.h b/libavutil/half2float.h
index 1f6deade07..5af4690cfe 100644
--- a/libavutil/half2float.h
+++ b/libavutil/half2float.h
@@ -45,6 +45,9 @@ static void half2float_table(uint32_t *mantissatable, 
uint32_t *exponenttable,
 mantissatable[i] = convertmantissa(i);
 for (int i = 1024; i < 2048; i++)
 mantissatable[i] = 0x3800UL + ((i - 1024) << 13UL);
+for (int i = 2048; i < 3072; i++)
+mantissatable[i] = mantissatable[i - 1024] | 0x40UL;
+mantissatable[2048] = mantissatable[1024];
 
 exponenttable[0] = 0;
 for (int i = 1; i < 31; i++)
@@ -58,7 +61,9 @@ static void half2float_table(uint32_t *mantissatable, 
uint32_t *exponenttable,
 offsettable[0] = 0;
 for (int i = 1; i < 64; i++)
 offsettable[i] = 1024;
+offsettable[31] = 2048;
 offsettable[32] = 0;
+offsettable[63] = 2048;
 }
 
 static uint32_t half2float(uint16_t h, const uint32_t *mantissatable, const 
uint32_t *exponenttable,
diff --git a/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0x 
b/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0x
index b6201116fe..e45a40b498 100644
--- a/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0x
+++ b/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0x
@@ -3,4 +3,4 @@
 #codec_id 0: rawvideo
 #dimensions 0: 256x256
 #sar 0: 1/1
-0,  0,  0,1,   786432, 0x1445e411
+0,  0,  0,1,   786432, 0xce9be2be
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 07/11] avutil/half2float: move tables to header-internal structs

Having to put the knowledge of the size of those arrays into a multitude
of places is rather smelly.
---
 libavcodec/exr.c   | 27 --
 libavcodec/exrenc.c| 11 +
 libavcodec/pnm.h   |  5 ++---
 libavcodec/pnmdec.c| 42 --
 libavcodec/pnmenc.c| 13 +--
 libavutil/float2half.h | 51 +++---
 libavutil/half2float.h | 46 -
 7 files changed, 84 insertions(+), 111 deletions(-)

diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index 47f4786491..825354873d 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -191,9 +191,7 @@ typedef struct EXRContext {
 float gamma;
 union av_intfloat32 gamma_table[65536];
 
-uint32_t mantissatable[3072];
-uint32_t exponenttable[64];
-uint16_t offsettable[64];
+half2float_tables h2f_tables;
 } EXRContext;
 
 static int zip_uncompress(const EXRContext *s, const uint8_t *src, int 
compressed_size,
@@ -899,10 +897,7 @@ static int ac_uncompress(const EXRContext *s, 
GetByteContext *gb, float *block)
 n += val & 0xff;
 } else {
 ret = n;
-block[ff_zigzag_direct[n]] = av_int2float(half2float(val,
-  s->mantissatable,
-  s->exponenttable,
-  s->offsettable));
+block[ff_zigzag_direct[n]] = av_int2float(half2float(val, 
&s->h2f_tables));
 n++;
 }
 }
@@ -1120,8 +1115,7 @@ static int dwa_uncompress(const EXRContext *s, const 
uint8_t *src, int compresse
 uint16_t *dc = (uint16_t *)td->dc_data;
 union av_intfloat32 dc_val;
 
-dc_val.i = half2float(dc[idx], s->mantissatable,
-  s->exponenttable, s->offsettable);
+dc_val.i = half2float(dc[idx], &s->h2f_tables);
 
 block[0] = dc_val.f;
 ac_uncompress(s, &agb, block);
@@ -1171,7 +1165,7 @@ static int dwa_uncompress(const EXRContext *s, const 
uint8_t *src, int compresse
 for (int x = 0; x < td->xsize; x++) {
 uint16_t ha = ai0[x] | (ai1[x] << 8);
 
-ao[x] = half2float(ha, s->mantissatable, s->exponenttable, 
s->offsettable);
+ao[x] = half2float(ha, &s->h2f_tables);
 }
 }
 
@@ -1427,10 +1421,7 @@ static int decode_block(AVCodecContext *avctx, void 
*tdata,
 }
 } else {
 for (x = 0; x < xsize; x++) {
-ptr_x[0].i = half2float(bytestream_get_le16(&src),
-s->mantissatable,
-s->exponenttable,
-s->offsettable);
+ptr_x[0].i = half2float(bytestream_get_le16(&src), 
&s->h2f_tables);
 ptr_x++;
 }
 }
@@ -2217,7 +2208,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
 float one_gamma = 1.0f / s->gamma;
 avpriv_trc_function trc_func = NULL;
 
-half2float_table(s->mantissatable, s->exponenttable, s->offsettable);
+init_half2float_tables(&s->h2f_tables);
 
 s->avctx  = avctx;
 
@@ -2230,18 +2221,18 @@ static av_cold int decode_init(AVCodecContext *avctx)
 trc_func = avpriv_get_trc_function_from_trc(s->apply_trc_type);
 if (trc_func) {
 for (i = 0; i < 65536; ++i) {
-t.i = half2float(i, s->mantissatable, s->exponenttable, 
s->offsettable);
+t.i = half2float(i, &s->h2f_tables);
 t.f = trc_func(t.f);
 s->gamma_table[i] = t;
 }
 } else {
 if (one_gamma > 0.f && one_gamma < 1.0001f) {
 for (i = 0; i < 65536; ++i) {
-s->gamma_table[i].i = half2float(i, s->mantissatable, 
s->exponenttable, s->offsettable);
+s->gamma_table[i].i = half2float(i, &s->h2f_tables);
 }
 } else {
 for (i = 0; i < 65536; ++i) {
-t.i = half2float(i, s->mantissatable, s->exponenttable, 
s->offsettable);
+t.i = half2float(i, &s->h2f_tables);
 /* If negative value we reuse half value */
 if (t.f <= 0.0f) {
 s->gamma_table[i] = t;
diff --git a/libavcodec/exrenc.c b/libavcodec/exrenc.c
index 56c084d483..6ab9400b7c 100644
--- a/libavcodec/exrenc.c
+++ b/libavcodec/exrenc.c
@@ -87,15 +87,14 @@ typedef struct EXRContext {
 
 EXRScanlineData *scanline;
 
-uint16_t basetable[512];
-uint8_t shifttable[512];
+float2half_tables f2h_tables;
 } EXRContext;
 
 static av_cold int encode_init(AVCodecContext *avctx)
 {
 EXRContext *s = avctx->priv_data;
 
-float2half_t

Re: [FFmpeg-devel] [PATCH 10/11] swscale: add SwsContext parameter to input functions


Forgot to update the commit message.
It no longer adds the SwsContext, but an opaque pointer which is easier 
to deal with from assembly, should any future code have a use for it.


Fixed locally
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH 09/11] avutil/half2float: use native _Float16 if available

Timo Rothenpieler:
> _Float16 support was available on arm/aarch64 for a while, and with gcc
> 12 was enabled on x86 as long as SSE2 is supported.
> 
> If the target arch supports f16c, gcc emits fairly efficient assembly,
> taking advantage of it. This is the case on x86-64-v3 or higher.
> Without f16c, it emulates it in software using sse2 instructions.

How is the performance of this emulation compared to our current code?
And how is the native _Float16 performance compared to the current code?

> ---
>  configure  |  4 
>  libavutil/float2half.c |  2 ++
>  libavutil/float2half.h | 16 
>  libavutil/half2float.c |  4 
>  libavutil/half2float.h | 16 
>  5 files changed, 42 insertions(+)
> 
> diff --git a/configure b/configure
> index 6761d0cb32..2536ae012d 100755
> --- a/configure
> +++ b/configure
> @@ -2143,6 +2143,7 @@ ARCH_FEATURES="
>  fast_64bit
>  fast_clz
>  fast_cmov
> +float16
>  local_aligned
>  simd_align_16
>  simd_align_32
> @@ -5125,6 +5126,8 @@ elif enabled arm; then
>  ;;
>  esac
>  
> +test_cflags -mfp16-format=ieee && add_cflags -mfp16-format=ieee
> +
>  elif enabled avr32; then
>  
>  case $cpu in
> @@ -6228,6 +6231,7 @@ check_builtin MemoryBarrier windows.h "MemoryBarrier()"
>  check_builtin sync_val_compare_and_swap "" "int *ptr; int oldval, newval; 
> __sync_val_compare_and_swap(ptr, oldval, newval)"
>  check_builtin gmtime_r time.h "time_t *time; struct tm *tm; gmtime_r(time, 
> tm)"
>  check_builtin localtime_r time.h "time_t *time; struct tm *tm; 
> localtime_r(time, tm)"
> +check_builtin float16 "" "_Float16 f16var"
>  
>  case "$custom_allocator" in
>  jemalloc)
> diff --git a/libavutil/float2half.c b/libavutil/float2half.c
> index dba14cef5d..1390d3acc0 100644
> --- a/libavutil/float2half.c
> +++ b/libavutil/float2half.c
> @@ -20,6 +20,7 @@
>  
>  void ff_init_float2half_tables(float2half_tables *t)
>  {
> +#if !HAVE_FLOAT16
>  for (int i = 0; i < 256; i++) {
>  int e = i - 127;
>  
> @@ -50,4 +51,5 @@ void ff_init_float2half_tables(float2half_tables *t)
>  t->shifttable[i|0x100] = 13;
>  }
>  }
> +#endif
>  }
> diff --git a/libavutil/float2half.h b/libavutil/float2half.h
> index b8c9cdfc4f..8c1fb804b7 100644
> --- a/libavutil/float2half.h
> +++ b/libavutil/float2half.h
> @@ -20,21 +20,37 @@
>  #define AVUTIL_FLOAT2HALF_H
>  
>  #include 
> +#include "intfloat.h"
> +
> +#include "config.h"
>  
>  typedef struct float2half_tables {
> +#if HAVE_FLOAT16
> +uint8_t dummy;
> +#else
>  uint16_t basetable[512];
>  uint8_t shifttable[512];
> +#endif
>  } float2half_tables;
>  
>  void ff_init_float2half_tables(float2half_tables *t);
>  
>  static inline uint16_t float2half(uint32_t f, const float2half_tables *t)
>  {
> +#if HAVE_FLOAT16
> +union {
> +_Float16 f;
> +uint16_t i;
> +} u;
> +u.f = av_int2float(f);
> +return u.i;
> +#else
>  uint16_t h;
>  
>  h = t->basetable[(f >> 23) & 0x1ff] + ((f & 0x007f) >> 
> t->shifttable[(f >> 23) & 0x1ff]);
>  
>  return h;
> +#endif
>  }
>  
>  #endif /* AVUTIL_FLOAT2HALF_H */
> diff --git a/libavutil/half2float.c b/libavutil/half2float.c
> index baac8e4093..873226d3a0 100644
> --- a/libavutil/half2float.c
> +++ b/libavutil/half2float.c
> @@ -18,6 +18,7 @@
>  
>  #include "libavutil/half2float.h"
>  
> +#if !HAVE_FLOAT16
>  static uint32_t convertmantissa(uint32_t i)
>  {
>  int32_t m = i << 13; // Zero pad mantissa bits
> @@ -33,9 +34,11 @@ static uint32_t convertmantissa(uint32_t i)
>  
>  return m | e; // Return combined number
>  }
> +#endif
>  
>  void ff_init_half2float_tables(half2float_tables *t)
>  {
> +#if !HAVE_FLOAT16
>  t->mantissatable[0] = 0;
>  for (int i = 1; i < 1024; i++)
>  t->mantissatable[i] = convertmantissa(i);
> @@ -60,4 +63,5 @@ void ff_init_half2float_tables(half2float_tables *t)
>  t->offsettable[31] = 2048;
>  t->offsettable[32] = 0;
>  t->offsettable[63] = 2048;
> +#endif
>  }
> diff --git a/libavutil/half2float.h b/libavutil/half2float.h
> index cb58e44a1c..b2a7c934a6 100644
> --- a/libavutil/half2float.h
> +++ b/libavutil/half2float.h
> @@ -20,22 +20,38 @@
>  #define AVUTIL_HALF2FLOAT_H
>  
>  #include 
> +#include "intfloat.h"
> +
> +#include "config.h"
>  
>  typedef struct half2float_tables {
> +#if HAVE_FLOAT16
> +uint8_t dummy;
> +#else
>  uint32_t mantissatable[3072];
>  uint32_t exponenttable[64];
>  uint16_t offsettable[64];
> +#endif
>  } half2float_tables;
>  
>  void ff_init_half2float_tables(half2float_tables *t);
>  
>  static inline uint32_t half2float(uint16_t h, const half2float_tables *t)
>  {
> +#if HAVE_FLOAT16
> +union {
> +_Float16 f;
> +uint16_t i;
> +} u;
> +u.i = h;
> +return av_float2int(u.f);
> +#else
>  uint32_t f;
>  
>  f = t->mantissatable[t->offsettable[h >> 10] + (h & 0x3ff)] + 
> t->exponen

Re: [FFmpeg-devel] [PATCH 06/11] avutil/half2float: adjust conversion of NaN

Timo Rothenpieler:
> IEEE-754 differentiates two different kind of NaNs.
> Quiet and Signaling ones. They are differentiated by the MSB of the
> mantissa.
> 
> For whatever reason, actual hardware conversion of half to single always
> sets the signaling bit to 1 if the mantissa is != 0, and to 0 if it's 0.
> So our code has to follow suite or fate-testing hardware float16 will be
> impossible.

What does the exr spec say about quiet and signaling nans?

> ---
>  libavcodec/exr.c| 2 +-
>  libavcodec/pnm.h| 2 +-
>  libavutil/half2float.h  | 5 +
>  tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0x | 2 +-
>  4 files changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/libavcodec/exr.c b/libavcodec/exr.c
> index 5c6ca9adbf..47f4786491 100644
> --- a/libavcodec/exr.c
> +++ b/libavcodec/exr.c
> @@ -191,7 +191,7 @@ typedef struct EXRContext {
>  float gamma;
>  union av_intfloat32 gamma_table[65536];
>  
> -uint32_t mantissatable[2048];
> +uint32_t mantissatable[3072];
>  uint32_t exponenttable[64];
>  uint16_t offsettable[64];
>  } EXRContext;
> diff --git a/libavcodec/pnm.h b/libavcodec/pnm.h
> index 5bf2eaa4d9..7e5445f529 100644
> --- a/libavcodec/pnm.h
> +++ b/libavcodec/pnm.h
> @@ -34,7 +34,7 @@ typedef struct PNMContext {
>  int half;
>  float scale;
>  
> -uint32_t mantissatable[2048];
> +uint32_t mantissatable[3072];
>  uint32_t exponenttable[64];
>  uint16_t offsettable[64];
>  } PNMContext;
> diff --git a/libavutil/half2float.h b/libavutil/half2float.h
> index 1f6deade07..5af4690cfe 100644
> --- a/libavutil/half2float.h
> +++ b/libavutil/half2float.h
> @@ -45,6 +45,9 @@ static void half2float_table(uint32_t *mantissatable, 
> uint32_t *exponenttable,
>  mantissatable[i] = convertmantissa(i);
>  for (int i = 1024; i < 2048; i++)
>  mantissatable[i] = 0x3800UL + ((i - 1024) << 13UL);
> +for (int i = 2048; i < 3072; i++)
> +mantissatable[i] = mantissatable[i - 1024] | 0x40UL;
> +mantissatable[2048] = mantissatable[1024];
>  
>  exponenttable[0] = 0;
>  for (int i = 1; i < 31; i++)
> @@ -58,7 +61,9 @@ static void half2float_table(uint32_t *mantissatable, 
> uint32_t *exponenttable,
>  offsettable[0] = 0;
>  for (int i = 1; i < 64; i++)
>  offsettable[i] = 1024;
> +offsettable[31] = 2048;
>  offsettable[32] = 0;
> +offsettable[63] = 2048;
>  }
>  
>  static uint32_t half2float(uint16_t h, const uint32_t *mantissatable, const 
> uint32_t *exponenttable,
> diff --git a/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0x 
> b/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0x
> index b6201116fe..e45a40b498 100644
> --- a/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0x
> +++ b/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0x
> @@ -3,4 +3,4 @@
>  #codec_id 0: rawvideo
>  #dimensions 0: 256x256
>  #sar 0: 1/1
> -0,  0,  0,1,   786432, 0x1445e411
> +0,  0,  0,1,   786432, 0xce9be2be

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH 06/11] avutil/half2float: adjust conversion of NaN


On 10.08.2022 23:24, Andreas Rheinhardt wrote:

Timo Rothenpieler:

IEEE-754 differentiates two different kind of NaNs.
Quiet and Signaling ones. They are differentiated by the MSB of the
mantissa.

For whatever reason, actual hardware conversion of half to single always
sets the signaling bit to 1 if the mantissa is != 0, and to 0 if it's 0.
So our code has to follow suite or fate-testing hardware float16 will be
impossible.


What does the exr spec say about quiet and signaling nans?


Not sure how exr would be involved here.
But I tested this on both aarch64, x86 with sse2 emulation and x86 f16c 
on alderlake and zen2.
They all perfectly agree and match 100% what this changed code produces 
for the entire range of 65k possible values.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH 11/11] swscale/input: add rgbaf16 input support


On 10.08.2022 22:47, Timo Rothenpieler wrote:
...

+#define rgbaf16_funcs_endian(endian_name, endian)  
   \
+static void rgbaf16##endian_name##ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, 
const uint8_t *unused,  \
+  const uint8_t *src1, const 
uint8_t *src2,   \
+  int width, uint32_t *_rgb2yuv, 
void *opq)   \
+{  
   \
+const uint16_t *src = (const uint16_t*)src1;   
   \
+uint16_t *dstU = (uint16_t*)_dstU; 
   \
+uint16_t *dstV = (uint16_t*)_dstV; 
   \
+int32_t *rgb2yuv = (int32_t*)_rgb2yuv; 
   \
+av_assert1(src1==src2);
   \
+rgbaf16ToUV_half_endian(dstU, dstV, endian, src, width, rgb2yuv, opq); 
   \
+}  
   \
+static void rgbaf16##endian_name##ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const 
uint8_t *unused,   \
+ const uint8_t *src1, const uint8_t 
*src2,\
+ int width, uint32_t *_rgb2yuv, void 
*opq)\
+{  
   \
+const uint16_t *src = (const uint16_t*)src1;   
   \
+uint16_t *dstU = (uint16_t*)_dstU; 
   \
+uint16_t *dstV = (uint16_t*)_dstV; 
   \
+int32_t *rgb2yuv = (int32_t*)_rgb2yuv; 
   \
+av_assert1(src1==src2);
   \
+rgbaf16ToUV_half_endian(dstU, dstV, endian, src, width, rgb2yuv, opq); 
   \
+}  
   \


copy/paste error here: This should be the non-half version. Fixed locally.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH 06/11] avutil/half2float: adjust conversion of NaN

Timo Rothenpieler:
> On 10.08.2022 23:24, Andreas Rheinhardt wrote:
>> Timo Rothenpieler:
>>> IEEE-754 differentiates two different kind of NaNs.
>>> Quiet and Signaling ones. They are differentiated by the MSB of the
>>> mantissa.
>>>
>>> For whatever reason, actual hardware conversion of half to single always
>>> sets the signaling bit to 1 if the mantissa is != 0, and to 0 if it's 0.
>>> So our code has to follow suite or fate-testing hardware float16 will be
>>> impossible.
>>
>> What does the exr spec say about quiet and signaling nans?
> 
> Not sure how exr would be involved here.

Your patch changes the output of an exr-test. The output of the exr
decoder is presumably determined by the exr spec. There is after all the
possibility that what hardware does in hardware and what this patch does
in software is incompatible with what exr specifies.

> But I tested this on both aarch64, x86 with sse2 emulation and x86 f16c
> on alderlake and zen2.
> They all perfectly agree and match 100% what this changed code produces
> for the entire range of 65k possible values.
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH 06/11] avutil/half2float: adjust conversion of NaN

On 10.08.2022 23:43, Andreas Rheinhardt wrote:

Timo Rothenpieler:

On 10.08.2022 23:24, Andreas Rheinhardt wrote:

Timo Rothenpieler:

IEEE-754 differentiates two different kind of NaNs.
Quiet and Signaling ones. They are differentiated by the MSB of the
mantissa.

For whatever reason, actual hardware conversion of half to single always
sets the signaling bit to 1 if the mantissa is != 0, and to 0 if it's 0.
So our code has to follow suite or fate-testing hardware float16 will be
impossible.

What does the exr spec say about quiet and signaling nans?

Not sure how exr would be involved here.

Your patch changes the output of an exr-test. The output of the exr
decoder is presumably determined by the exr spec. There is after all the
possibility that what hardware does in hardware and what this patch does
in software is incompatible with what exr specifies.

The exr spec just says something along the lines of analogous to
ieee-754 floats:
https://openexr.readthedocs.io/en/latest/TechnicalIntroduction.html?highlight=ieee#the-half-data-type
It barely ever mentions NaNs, other than that they exist. Which makes
sense, given they don't typically appear in images.

The only output changed is that for how NaNs are converted.
And given the cross-validation with multiple hardware implementations,
I'm confident that it's correct.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH 10/11] swscale: add SwsContext parameter to input functions

Timo Rothenpieler:
> ---
>  libswscale/hscale.c   |  12 +--
>  libswscale/input.c| 149 ++
>  libswscale/swscale_internal.h |  17 ++--
>  libswscale/x86/swscale.c  |  13 +--
>  4 files changed, 106 insertions(+), 85 deletions(-)
> 
> diff --git a/libswscale/hscale.c b/libswscale/hscale.c
> index eca0635338..6789ce7540 100644
> --- a/libswscale/hscale.c
> +++ b/libswscale/hscale.c
> @@ -105,18 +105,18 @@ static int lum_convert(SwsContext *c, 
> SwsFilterDescriptor *desc, int sliceY, int
>  uint8_t * dst = desc->dst->plane[0].line[i];
>  
>  if (c->lumToYV12) {
> -c->lumToYV12(dst, src[0], src[1], src[2], srcW, pal);
> +c->lumToYV12(dst, src[0], src[1], src[2], srcW, pal, 
> c->input_opaque);
>  } else if (c->readLumPlanar) {
> -c->readLumPlanar(dst, src, srcW, c->input_rgb2yuv_table);
> +c->readLumPlanar(dst, src, srcW, c->input_rgb2yuv_table, 
> c->input_opaque);
>  }
>  
>  
>  if (desc->alpha) {
>  dst = desc->dst->plane[3].line[i];
>  if (c->alpToYV12) {
> -c->alpToYV12(dst, src[3], src[1], src[2], srcW, pal);
> +c->alpToYV12(dst, src[3], src[1], src[2], srcW, pal, 
> c->input_opaque);
>  } else if (c->readAlpPlanar) {
> -c->readAlpPlanar(dst, src, srcW, NULL);
> +c->readAlpPlanar(dst, src, srcW, NULL, c->input_opaque);
>  }
>  }
>  }
> @@ -224,9 +224,9 @@ static int chr_convert(SwsContext *c, SwsFilterDescriptor 
> *desc, int sliceY, int
>  uint8_t * dst1 = desc->dst->plane[1].line[i];
>  uint8_t * dst2 = desc->dst->plane[2].line[i];
>  if (c->chrToYV12) {
> -c->chrToYV12(dst1, dst2, src[0], src[1], src[2], srcW, pal);
> +c->chrToYV12(dst1, dst2, src[0], src[1], src[2], srcW, pal, 
> c->input_opaque);
>  } else if (c->readChrPlanar) {
> -c->readChrPlanar(dst1, dst2, src, srcW, c->input_rgb2yuv_table);
> +c->readChrPlanar(dst1, dst2, src, srcW, c->input_rgb2yuv_table, 
> c->input_opaque);
>  }
>  }
>  return sliceH;
> diff --git a/libswscale/input.c b/libswscale/input.c
> index 68abc4d62c..36ef1e43ac 100644
> --- a/libswscale/input.c
> +++ b/libswscale/input.c
> @@ -88,7 +88,7 @@ rgb64ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
>  
>  #define rgb64funcs(pattern, BE_LE, origin) \
>  static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t 
> *_src, const uint8_t *unused0, const uint8_t *unused1,\
> -int width, uint32_t *rgb2yuv) \
> +int width, uint32_t *rgb2yuv, void *opq) 
> \
>  { \
>  const uint16_t *src = (const uint16_t *) _src; \
>  uint16_t *dst = (uint16_t *) _dst; \
> @@ -97,7 +97,7 @@ static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, 
> const uint8_t *_src,
>   \
>  static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, 
> \
>  const uint8_t *unused0, const uint8_t 
> *_src1, const uint8_t *_src2, \
> -int width, uint32_t *rgb2yuv) \
> +int width, uint32_t *rgb2yuv, void *opq) 
> \
>  { \
>  const uint16_t *src1 = (const uint16_t *) _src1, \
> *src2 = (const uint16_t *) _src2; \
> @@ -107,7 +107,7 @@ static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t 
> *_dstU, uint8_t *_dstV, \
>   \
>  static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t 
> *_dstV, \
>  const uint8_t *unused0, const uint8_t 
> *_src1, const uint8_t *_src2, \
> -int width, uint32_t *rgb2yuv) \
> +int width, uint32_t *rgb2yuv, void *opq) 
> \
>  { \
>  const uint16_t *src1 = (const uint16_t *) _src1, \
> *src2 = (const uint16_t *) _src2; \
> @@ -192,7 +192,8 @@ static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t 
> *_dst,  \
>  const uint8_t *_src,\
>  const uint8_t *unused0, const 
> uint8_t *unused1,\
>  int width,  \
> -uint32_t *rgb2yuv)  \
> +uint32_t *rgb2yuv,  \
> +void *opq)  \
>  {   \
>  const uint16_t *src = (const uint16_t *)_src;   \
>  uint16_t *dst   = (uint16_t *)_dst; \
> @@ -205,7 +206,8 @@ static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t 
> *_dstU,\
>

Re: [FFmpeg-devel] [PATCH 09/11] avutil/half2float: use native _Float16 if available


On 10.08.2022 23:03, Andreas Rheinhardt wrote:

Timo Rothenpieler:

_Float16 support was available on arm/aarch64 for a while, and with gcc
12 was enabled on x86 as long as SSE2 is supported.

If the target arch supports f16c, gcc emits fairly efficient assembly,
taking advantage of it. This is the case on x86-64-v3 or higher.
Without f16c, it emulates it in software using sse2 instructions.


How is the performance of this emulation compared to our current code?
And how is the native _Float16 performance compared to the current code?


The performance of the sse2 emulation is actually surprisingly poor, in 
a quick test:


./ffmpeg -s 512x512 -f rawvideo -pix_fmt rgbaf16 -i /dev/zero -vf 
format=yuv444p -f null -


_Float16 full SSE2 emulation:
frame=50074 fps=848 q=-0.0 size=N/A time=00:33:22.96 bitrate=N/A speed=33.9x

_Float16 f16c accelerated (Zen2, --cpu=znver2):
frame=50636 fps=1965 q=-0.0 Lsize=N/A time=00:33:45.40 bitrate=N/A 
speed=78.6x


classic half2float full software implementation:
frame=49926 fps=1605 q=-0.0 Lsize=N/A time=00:33:17.00 bitrate=N/A 
speed=64.2x


Unfortunately I don't see a good way to runtime-detect the presence of 
f16c without going full self-written assembly, which would diminish the 
compilers ability to take advantage of f16c only ever operating on 4 or 
8 values at a time.
But the HAVE_FLOAT16 checks could be paired with a check for __F16C__, 
which seems to universally be the established define for "the code is 
being built f16c optimizations".


That at least avoids the case of the apparently quite slow sse2 emulation.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH 10/11] swscale: add SwsContext parameter to input functions


On 10.08.2022 23:55, Andreas Rheinhardt wrote:

Don't you need to update the assembly, too? (Do we support anything x86
where the callee has to clean up the stack?)


We concluded on IRC that that's not neccesary.
The assembly is pretty hard written to be cdecl, in which the caller 
cleans up the stack.


I tried adding the parameter there, and broke it in the process.
But fate still passes and the assembly isn't suddenly gonna change 
calling convention.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH 09/11] avutil/half2float: use native _Float16 if available

2022-08-10 Thread James Almer





On 8/10/2022 6:58 PM, Timo Rothenpieler wrote:

On 10.08.2022 23:03, Andreas Rheinhardt wrote:

Timo Rothenpieler:

_Float16 support was available on arm/aarch64 for a while, and with gcc
12 was enabled on x86 as long as SSE2 is supported.

If the target arch supports f16c, gcc emits fairly efficient assembly,
taking advantage of it. This is the case on x86-64-v3 or higher.
Without f16c, it emulates it in software using sse2 instructions.


How is the performance of this emulation compared to our current code?
And how is the native _Float16 performance compared to the current code?


The performance of the sse2 emulation is actually surprisingly poor, in 
a quick test:


./ffmpeg -s 512x512 -f rawvideo -pix_fmt rgbaf16 -i /dev/zero -vf 
format=yuv444p -f null -


_Float16 full SSE2 emulation:
frame=50074 fps=848 q=-0.0 size=N/A time=00:33:22.96 bitrate=N/A 
speed=33.9x


_Float16 f16c accelerated (Zen2, --cpu=znver2):
frame=50636 fps=1965 q=-0.0 Lsize=N/A time=00:33:45.40 bitrate=N/A 
speed=78.6x


classic half2float full software implementation:
frame=49926 fps=1605 q=-0.0 Lsize=N/A time=00:33:17.00 bitrate=N/A 
speed=64.2x


Unfortunately I don't see a good way to runtime-detect the presence of 
f16c without going full self-written assembly, which would diminish the 
compilers ability to take advantage of f16c only ever operating on 4 or 
8 values at a time.
But the HAVE_FLOAT16 checks could be paired with a check for __F16C__, 
which seems to universally be the established define for "the code is 
being built f16c optimizations".


That should do it, yes. We do check for __SSE__ and similar for some 
other lavu functions after all.




That at least avoids the case of the apparently quite slow sse2 emulation.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH 06/11] avutil/half2float: adjust conversion of NaN

2022-08-10 Thread Mark Reid

On Wed, Aug 10, 2022 at 2:53 PM Timo Rothenpieler 
wrote:

> On 10.08.2022 23:43, Andreas Rheinhardt wrote:
> > Timo Rothenpieler:
> >> On 10.08.2022 23:24, Andreas Rheinhardt wrote:
> >>> Timo Rothenpieler:
>  IEEE-754 differentiates two different kind of NaNs.
>  Quiet and Signaling ones. They are differentiated by the MSB of the
>  mantissa.
> 
>  For whatever reason, actual hardware conversion of half to single
> always
>  sets the signaling bit to 1 if the mantissa is != 0, and to 0 if it's
> 0.
>  So our code has to follow suite or fate-testing hardware float16 will
> be
>  impossible.
> >>>
> >>> What does the exr spec say about quiet and signaling nans?
> >>
> >> Not sure how exr would be involved here.
> >
> > Your patch changes the output of an exr-test. The output of the exr
> > decoder is presumably determined by the exr spec. There is after all the
> > possibility that what hardware does in hardware and what this patch does
> > in software is incompatible with what exr specifies.
>
> The exr spec just says something along the lines of analogous to
> ieee-754 floats:
>
> https://openexr.readthedocs.io/en/latest/TechnicalIntroduction.html?highlight=ieee#the-half-data-type
> It barely ever mentions NaNs, other than that they exist. Which makes
> sense, given they don't typically appear in images.
>
> The only output changed is that for how NaNs are converted.
> And given the cross-validation with multiple hardware implementations,
> I'm confident that it's correct.
>

here is openexr implementation
https://github.com/AcademySoftwareFoundation/Imath/blob/main/src/Imath/toFloat.cpp#L78
It has been a while since I check but I believe the current implementation
matches this.

The fate sample: rgb_scanline_zip_half_float_0x0_to_0x.exr was created
to test this.
it contains every possible float16 value


> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH 06/11] avutil/half2float: adjust conversion of NaN

2022-08-10 Thread James Almer

On 8/10/2022 7:14 PM, Mark Reid wrote:

On Wed, Aug 10, 2022 at 2:53 PM Timo Rothenpieler
wrote:

On 10.08.2022 23:43, Andreas Rheinhardt wrote:

Timo Rothenpieler:

On 10.08.2022 23:24, Andreas Rheinhardt wrote:

Timo Rothenpieler:

IEEE-754 differentiates two different kind of NaNs.
Quiet and Signaling ones. They are differentiated by the MSB of the
mantissa.

For whatever reason, actual hardware conversion of half to single

always

sets the signaling bit to 1 if the mantissa is != 0, and to 0 if it's

So our code has to follow suite or fate-testing hardware float16 will

impossible.

What does the exr spec say about quiet and signaling nans?

Not sure how exr would be involved here.

The exr spec just says something along the lines of analogous to
ieee-754 floats:

https://openexr.readthedocs.io/en/latest/TechnicalIntroduction.html?highlight=ieee#the-half-data-type
It barely ever mentions NaNs, other than that they exist. Which makes
sense, given they don't typically appear in images.

The only output changed is that for how NaNs are converted.
And given the cross-validation with multiple hardware implementations,
I'm confident that it's correct.

here is openexr implementation
https://github.com/AcademySoftwareFoundation/Imath/blob/main/src/Imath/toFloat.cpp#L78
It has been a while since I check but I believe the current implementation
matches this.

The fate sample: rgb_scanline_zip_half_float_0x0_to_0x.exr was created
to test this.
it contains every possible float16 value

Then maybe the current implementation should be moved back to exr (it
used to be internal to exr until Paul made it standalone), so this lavu
module can match the existing hardware implementations of IEEE-734 half
floats for the purpose of relevant pixel format support.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] ipfsgateway: Remove default gateway

2022-08-10 Thread Derek Buitenhuis

A gateway can see everything, and we should not be shipping a hardcoded
default from a third party company; it's a security risk.

Signed-off-by: Derek Buitenhuis 
---
 libavformat/ipfsgateway.c | 11 ---
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/libavformat/ipfsgateway.c b/libavformat/ipfsgateway.c
index 5a5178c563..907b61b017 100644
--- a/libavformat/ipfsgateway.c
+++ b/libavformat/ipfsgateway.c
@@ -240,13 +240,8 @@ static int translate_ipfs_to_http(URLContext *h, const 
char *uri, int flags, AVD
 ret = populate_ipfs_gateway(h);
 
 if (ret < 1) {
-// We fallback on dweb.link (managed by Protocol Labs).
-snprintf(c->gateway_buffer, sizeof(c->gateway_buffer), 
"https://dweb.link";);
-
-av_log(h, AV_LOG_WARNING,
-   "IPFS does not appear to be running. "
-   "You’re now using the public gateway at dweb.link.\n");
-av_log(h, AV_LOG_INFO,
+av_log(h, AV_LOG_ERROR,
+   "IPFS does not appear to be running.\n\n"
"Installing IPFS locally is recommended to "
"improve performance and reliability, "
"and not share all your activity with a single IPFS 
gateway.\n"
@@ -259,6 +254,8 @@ static int translate_ipfs_to_http(URLContext *h, const char 
*uri, int flags, AVD
"3. Define an $IPFS_PATH environment variable "
"and point it to the IPFS data path "
"- this is typically ~/.ipfs\n");
+ret = AVERROR(EINVAL);
+goto err;
 }
 }
 
-- 
2.36.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH 06/11] avutil/half2float: adjust conversion of NaN


On 11.08.2022 00:18, James Almer wrote:
Then maybe the current implementation should be moved back to exr (it 
used to be internal to exr until Paul made it standalone), so this lavu 
module can match the existing hardware implementations of IEEE-734 half 
floats for the purpose of relevant pixel format support.


That doesn't seem necessary to me.
The values produced before and now are both correct, just different.
But there is no functional difference in the values it produces.

Duplicating the entirety of that code just for that seems extremely 
unnecessary.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH 3/9] fftools/ffmpeg: move stream-dependent starttime correction to transcode_init()

2022-08-10 Thread Michael Niedermayer

On Wed, Aug 10, 2022 at 06:25:39PM +0200, Anton Khirnov wrote:
> Currently this code is located in the discontinuity handling block,
> where it does not belong.
> ---
>  fftools/ffmpeg.c | 39 +--
>  1 file changed, 21 insertions(+), 18 deletions(-)

this seems to break this:
./ffmpeg -y -vsync cfr -i fate-suite/lena.pnm -pix_fmt yuv422p -vcodec 
mpeg2video -qscale 2 -bitexact /tmp/422.mpg && ./ffmpeg -y -i /tmp/422.mpg -vf 
format=yuv420p -vcodec mpeg2video -bitexact /tmp/file1080.mpg

the /tmp/file1080.mpg is 0 bytes here after this commit

thx

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Dictatorship: All citizens are under surveillance, all their steps and
actions recorded, for the politicians to enforce control.
Democracy: All politicians are under surveillance, all their steps and
actions recorded, for the citizens to enforce control.

signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH 06/11] avutil/half2float: adjust conversion of NaN

2022-08-10 Thread Mark Reid

On Wed, Aug 10, 2022 at 3:28 PM Timo Rothenpieler 
wrote:

> On 11.08.2022 00:18, James Almer wrote:
> > Then maybe the current implementation should be moved back to exr (it
> > used to be internal to exr until Paul made it standalone), so this lavu
> > module can match the existing hardware implementations of IEEE-734 half
> > floats for the purpose of relevant pixel format support.
>
> That doesn't seem necessary to me.
> The values produced before and now are both correct, just different.
> But there is no functional difference in the values it produces.
>
> Duplicating the entirety of that code just for that seems extremely
> unnecessary.
>

openexr does note the intel implementations difference here
https://github.com/AcademySoftwareFoundation/Imath/blob/main/src/Imath/half.h#L288


> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2 09/11] avutil/half2float: use native _Float16 if available

_Float16 support was available on arm/aarch64 for a while, and with gcc
12 was enabled on x86 as long as SSE2 is supported.

If the target arch supports f16c, gcc emits fairly efficient assembly,
taking advantage of it. This is the case on x86-64-v3 or higher.
Same goes on arm, which has native float16 support.
On x86, without f16c, it emulates it in software using sse2 instructions.

This has shown to perform rather poorly:

_Float16 full SSE2 emulation:
frame=50074 fps=848 q=-0.0 size=N/A time=00:33:22.96 bitrate=N/A speed=33.9x

_Float16 f16c accelerated (Zen2, --cpu=znver2):
frame=50636 fps=1965 q=-0.0 Lsize=N/A time=00:33:45.40 bitrate=N/A speed=78.6x

classic half2float full software implementation:
frame=49926 fps=1605 q=-0.0 Lsize=N/A time=00:33:17.00 bitrate=N/A speed=64.2x

Hence an additional check was introduced, that only enables use of
_Float16 on x86 if f16c is being utilized.

On aarch64, a similar uplift in performance is seen:

RPi4 half2float full software implementation:
frame= 6088 fps=126 q=-0.0 Lsize=N/A time=00:04:03.48 bitrate=N/A speed=5.06x

RPi4 _Float16:
frame= 6103 fps=158 q=-0.0 Lsize=N/A time=00:04:04.08 bitrate=N/A speed=6.32x

Since arm/aarch64 always natively support 16 bit floats, it can always
be considered fast there.

I'm not aware of any additional platforms that currently support
_Float16. And if there are, they should be considered non-fast until
proven fast.
---
 configure  | 13 +
 libavutil/float2half.c |  2 ++
 libavutil/float2half.h | 16 
 libavutil/half2float.c |  4 
 libavutil/half2float.h | 16 
 5 files changed, 51 insertions(+)

diff --git a/configure b/configure
index 6761d0cb32..6ede9a5a8f 100755
--- a/configure
+++ b/configure
@@ -2143,6 +2143,8 @@ ARCH_FEATURES="
 fast_64bit
 fast_clz
 fast_cmov
+fast_float16
+float16
 local_aligned
 simd_align_16
 simd_align_32
@@ -5125,6 +5127,8 @@ elif enabled arm; then
 ;;
 esac
 
+test_cflags -mfp16-format=ieee && add_cflags -mfp16-format=ieee
+
 elif enabled avr32; then
 
 case $cpu in
@@ -6229,6 +6233,15 @@ check_builtin sync_val_compare_and_swap "" "int *ptr; 
int oldval, newval; __sync
 check_builtin gmtime_r time.h "time_t *time; struct tm *tm; gmtime_r(time, tm)"
 check_builtin localtime_r time.h "time_t *time; struct tm *tm; 
localtime_r(time, tm)"
 
+check_builtin float16 "" "_Float16 f16var"
+if enabled float16; then
+if enabled x86; then
+test_cpp_condition stddef.h "defined(__F16C__)" && enable fast_float16
+elif enabled arm || enabled aarch64; then
+enable fast_float16
+fi
+fi
+
 case "$custom_allocator" in
 jemalloc)
 # jemalloc by default does not use a prefix
diff --git a/libavutil/float2half.c b/libavutil/float2half.c
index dba14cef5d..7002612194 100644
--- a/libavutil/float2half.c
+++ b/libavutil/float2half.c
@@ -20,6 +20,7 @@
 
 void ff_init_float2half_tables(float2half_tables *t)
 {
+#if !HAVE_FAST_FLOAT16
 for (int i = 0; i < 256; i++) {
 int e = i - 127;
 
@@ -50,4 +51,5 @@ void ff_init_float2half_tables(float2half_tables *t)
 t->shifttable[i|0x100] = 13;
 }
 }
+#endif
 }
diff --git a/libavutil/float2half.h b/libavutil/float2half.h
index b8c9cdfc4f..437666966b 100644
--- a/libavutil/float2half.h
+++ b/libavutil/float2half.h
@@ -20,21 +20,37 @@
 #define AVUTIL_FLOAT2HALF_H
 
 #include 
+#include "intfloat.h"
+
+#include "config.h"
 
 typedef struct float2half_tables {
+#if HAVE_FAST_FLOAT16
+uint8_t dummy;
+#else
 uint16_t basetable[512];
 uint8_t shifttable[512];
+#endif
 } float2half_tables;
 
 void ff_init_float2half_tables(float2half_tables *t);
 
 static inline uint16_t float2half(uint32_t f, const float2half_tables *t)
 {
+#if HAVE_FAST_FLOAT16
+union {
+_Float16 f;
+uint16_t i;
+} u;
+u.f = av_int2float(f);
+return u.i;
+#else
 uint16_t h;
 
 h = t->basetable[(f >> 23) & 0x1ff] + ((f & 0x007f) >> 
t->shifttable[(f >> 23) & 0x1ff]);
 
 return h;
+#endif
 }
 
 #endif /* AVUTIL_FLOAT2HALF_H */
diff --git a/libavutil/half2float.c b/libavutil/half2float.c
index baac8e4093..ff198a8187 100644
--- a/libavutil/half2float.c
+++ b/libavutil/half2float.c
@@ -18,6 +18,7 @@
 
 #include "libavutil/half2float.h"
 
+#if !HAVE_FAST_FLOAT16
 static uint32_t convertmantissa(uint32_t i)
 {
 int32_t m = i << 13; // Zero pad mantissa bits
@@ -33,9 +34,11 @@ static uint32_t convertmantissa(uint32_t i)
 
 return m | e; // Return combined number
 }
+#endif
 
 void ff_init_half2float_tables(half2float_tables *t)
 {
+#if !HAVE_FAST_FLOAT16
 t->mantissatable[0] = 0;
 for (int i = 1; i < 1024; i++)
 t->mantissatable[i] = convertmantissa(i);
@@ -60,4 +63,5 @@ void ff_init_half2float_tables(half2float_tables *t)
 t->offsettable[31] = 2048;
 t->offsettable[32] = 0;
 t->offsettable[63] = 2048;
+#endif
 }
diff --git a/libavutil/half2float.h

Re: [FFmpeg-devel] [PATCH 06/11] avutil/half2float: adjust conversion of NaN