[FFmpeg-devel] [PATCH 02/13] lavc/jpeg2000dec: Reindent
From 86c30e327e1eb8ba913d74d5394ea90a87b55a69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= Date: Fri, 10 Jun 2022 14:12:11 +0200 Subject: [PATCH 02/13] lavc/jpeg2000dec: Reindent --- libavcodec/jpeg2000dec.c | 36 ++-- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c index d9754fc50e..9d3d406870 100644 --- a/libavcodec/jpeg2000dec.c +++ b/libavcodec/jpeg2000dec.c @@ -1969,26 +1969,26 @@ static int jpeg2000_decode_cb(AVCodecContext *avctx, void *td, t1.stride = (1coded = 0; -ret = decode_cblk(s, codsty, &t1, cblk, -cblk->coord[0][1] - cblk->coord[0][0], -cblk->coord[1][1] - cblk->coord[1][0], -bandpos, comp->roi_shift); -if (ret) -cb->coded = 1; -else -return 0; +ret = decode_cblk(s, codsty, &t1, cblk, +cblk->coord[0][1] - cblk->coord[0][0], +cblk->coord[1][1] - cblk->coord[1][0], +bandpos, comp->roi_shift); +if (ret) +cb->coded = 1; +else +return 0; -x = cblk->coord[0][0] - band->coord[0][0]; -y = cblk->coord[1][0] - band->coord[1][0]; +x = cblk->coord[0][0] - band->coord[0][0]; +y = cblk->coord[1][0] - band->coord[1][0]; -if (comp->roi_shift) -roi_scale_cblk(cblk, comp, &t1); -if (codsty->transform == FF_DWT97) -dequantization_float(x, y, cblk, comp, &t1, band); -else if (codsty->transform == FF_DWT97_INT) -dequantization_int_97(x, y, cblk, comp, &t1, band); -else -dequantization_int(x, y, cblk, comp, &t1, band); +if (comp->roi_shift) +roi_scale_cblk(cblk, comp, &t1); +if (codsty->transform == FF_DWT97) +dequantization_float(x, y, cblk, comp, &t1, band); +else if (codsty->transform == FF_DWT97_INT) +dequantization_int_97(x, y, cblk, comp, &t1, band); +else +dequantization_int(x, y, cblk, comp, &t1, band); return 0; } -- 2.30.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 03/13] lavc/jpeg2000dwt: Implement sliced transforms
From 6ab67531c946ca320e49bc93f4f086835ffd2c1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= Date: Fri, 10 Jun 2022 17:18:14 +0200 Subject: [PATCH 03/13] lavc/jpeg2000dwt: Implement sliced transforms lavc/tests/jpeg2000dwt tests this. --- libavcodec/j2kenc.c| 3 +- libavcodec/jpeg2000.c | 5 +- libavcodec/jpeg2000.h | 2 +- libavcodec/jpeg2000dec.c | 2 +- libavcodec/jpeg2000dwt.c | 131 +++-- libavcodec/jpeg2000dwt.h | 5 +- libavcodec/tests/jpeg2000dwt.c | 15 ++-- 7 files changed, 94 insertions(+), 69 deletions(-) diff --git a/libavcodec/j2kenc.c b/libavcodec/j2kenc.c index 0b761d0b00..4de596ffa9 100644 --- a/libavcodec/j2kenc.c +++ b/libavcodec/j2kenc.c @@ -496,7 +496,8 @@ static int init_tiles(Jpeg2000EncoderContext *s) s->cbps[compno], compno?1avctx +s->avctx, +1 )) < 0) return ret; } diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c index 0aa984bc53..945b787565 100644 --- a/libavcodec/jpeg2000.c +++ b/libavcodec/jpeg2000.c @@ -467,7 +467,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty, Jpeg2000QuantStyle *qntsty, int cbps, int dx, int dy, - AVCodecContext *avctx) + AVCodecContext *avctx, int max_slices) { int reslevelno, bandno, gbandno = 0, ret, i, j; uint32_t csize; @@ -479,7 +479,8 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp, if (ret = ff_jpeg2000_dwt_init(&comp->dwt, comp->coord, codsty->nreslevels2decode - 1, - codsty->transform)) + codsty->transform, + max_slices)) return ret; if (av_image_check_size(comp->coord[0][1] - comp->coord[0][0], diff --git a/libavcodec/jpeg2000.h b/libavcodec/jpeg2000.h index d06313425e..cbb8e0d951 100644 --- a/libavcodec/jpeg2000.h +++ b/libavcodec/jpeg2000.h @@ -278,7 +278,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty, Jpeg2000QuantStyle *qntsty, int cbps, int dx, int dy, - AVCodecContext *ctx); + AVCodecContext *ctx, int max_slices); void ff_jpeg2000_reinit(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty); diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c index 9d3d406870..874a56 100644 --- a/libavcodec/jpeg2000dec.c +++ b/libavcodec/jpeg2000dec.c @@ -1052,7 +1052,7 @@ static int init_tile(Jpeg2000DecoderContext *s, int tileno) return AVERROR_INVALIDDATA; if (ret = ff_jpeg2000_init_component(comp, codsty, qntsty, s->cbps[compno], s->cdx[compno], - s->cdy[compno], s->avctx)) + s->cdy[compno], s->avctx, 1)) return ret; } return 0; diff --git a/libavcodec/jpeg2000dwt.c b/libavcodec/jpeg2000dwt.c index f2da7307c4..42a92b6c64 100644 --- a/libavcodec/jpeg2000dwt.c +++ b/libavcodec/jpeg2000dwt.c @@ -322,24 +322,24 @@ static void sr_1d53(unsigned *p, int i0, int i1) p[2 * i + 1] += (int)(p[2 * i] + p[2 * i + 2]) >> 1; } -static void dwt_decode53(DWTContext *s, int *t) +static void dwt_decode53(DWTContext *s, int *t, int lev, int dir, int slice, int slices) { -int lev; int w = s->linelen[s->ndeclevels - 1][0]; -int32_t *line = s->i_linebuf; -line += 3; +int32_t *line = s->i_linebuf + slice * s->linesize + 3; -for (lev = 0; lev < s->ndeclevels; lev++) { int lh = s->linelen[lev][0], lv = s->linelen[lev][1], mh = s->mod[lev][0], mv = s->mod[lev][1], +sh = (lh + slices - 1)/slices, +sv = (lv + slices - 1)/slices, lp; int *l; +if (dir == 0) { // HOR_SD l = line + mh; -for (lp = 0; lp < lv; lp++) { +for (lp = slice*sv; lp < lv && lp - sv < slice*sv; lp++) { int i, j = 0; // copy with interleaving for (i = mh; i < lh; i += 2, j++) @@ -352,10 +352,10 @@ static void dwt_decode53(DWTContext *s, int *t) for (i = 0; i < lh; i++) t[w * l
[FFmpeg-devel] [PATCH 04/13] lavc/jpeg2000dec: Implement IDWT slicing
From d0ec602b0f61dd7f8d53efccc2c4859058a5d55d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= Date: Mon, 13 Jun 2022 14:45:07 +0200 Subject: [PATCH 04/13] lavc/jpeg2000dec: Implement IDWT slicing --- libavcodec/jpeg2000dec.c | 99 +++- libavcodec/jpeg2000dwt.c | 1 - libavcodec/jpeg2000dwt.h | 1 + 3 files changed, 88 insertions(+), 13 deletions(-) diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c index 874a56..9344630c6f 100644 --- a/libavcodec/jpeg2000dec.c +++ b/libavcodec/jpeg2000dec.c @@ -150,6 +150,10 @@ typedef struct Jpeg2000DecoderContext { unsigned int idwt_size; Jpeg2000CodeblockThread *cb; unsigned int cb_size; + +// used for idwt slicing +int reslevel, dir, slices; +int have_dwt97_int; // 1 if any coding style is FF_DWT97_INT } Jpeg2000DecoderContext; /* get_bits functions for JPEG2000 packet bitstream @@ -541,9 +545,10 @@ static int get_cox(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c) } c->transform = bytestream2_get_byteu(&s->g); // DWT transformation type /* set integer 9/7 DWT in case of BITEXACT flag */ -if ((s->avctx->flags & AV_CODEC_FLAG_BITEXACT) && (c->transform == FF_DWT97)) +if ((s->avctx->flags & AV_CODEC_FLAG_BITEXACT) && (c->transform == FF_DWT97)) { c->transform = FF_DWT97_INT; -else if (c->transform == FF_DWT53) { +s->have_dwt97_int = 1; +} else if (c->transform == FF_DWT53) { s->avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS; } @@ -1052,7 +1057,7 @@ static int init_tile(Jpeg2000DecoderContext *s, int tileno) return AVERROR_INVALIDDATA; if (ret = ff_jpeg2000_init_component(comp, codsty, qntsty, s->cbps[compno], s->cdx[compno], - s->cdy[compno], s->avctx, 1)) + s->cdy[compno], s->avctx, s->slices)) return ret; } return 0; @@ -1993,19 +1998,74 @@ static int jpeg2000_decode_cb(AVCodecContext *avctx, void *td, return 0; } +static int jpeg2000_dwt97_int_preshift(AVCodecContext *avctx, void *td, + int jobnr, int threadnr) +{ +Jpeg2000DecoderContext *s = avctx->priv_data; +Jpeg2000IdwtThread *idwt= s->idwt + jobnr / s->slices; +Jpeg2000Tile *tile = s->tile + jobnr / s->slices / s->ncomponents; +int compno = (jobnr / s->slices) % s->ncomponents; +int slice = jobnr % s->slices; +Jpeg2000Component *comp = tile->comp + compno; +Jpeg2000CodingStyle *codsty = tile->codsty + compno; +int a = comp->dwt.linelen[comp->dwt.ndeclevels - 1][0] * +comp->dwt.linelen[comp->dwt.ndeclevels - 1][1]; +int as = (a + s->slices - 1)/s->slices; + +for (int i = idwt->cb_start; i < idwt->cb_end; i++) { +if (s->cb[i].coded) { +if (codsty->transform == FF_DWT97_INT) { +for (int i = as*slice; i - as < as*slice; i++) +comp->i_data[i] *= 1LL << I_PRESHIFT; +} +break; +} +} + +return 0; +} + static int jpeg2000_idwt(AVCodecContext *avctx, void *td, int jobnr, int threadnr) { Jpeg2000DecoderContext *s = avctx->priv_data; -Jpeg2000IdwtThread *idwt= s->idwt + jobnr; -Jpeg2000Tile *tile = s->tile + jobnr / s->ncomponents; -int compno = jobnr % s->ncomponents; +Jpeg2000IdwtThread *idwt= s->idwt + jobnr / s->slices; +Jpeg2000Tile *tile = s->tile + jobnr / s->slices / s->ncomponents; +int compno = (jobnr / s->slices) % s->ncomponents; +int slice = jobnr % s->slices; Jpeg2000Component *comp = tile->comp + compno; Jpeg2000CodingStyle *codsty = tile->codsty + compno; for (int i = idwt->cb_start; i < idwt->cb_end; i++) { if (s->cb[i].coded) { -ff_dwt_decode(&comp->dwt, codsty->transform == FF_DWT97 ? (void*)comp->f_data : (void*)comp->i_data); +ff_dwt_decode_thread(&comp->dwt, codsty->transform == FF_DWT97 ? (void*)comp->f_data : (void*)comp->i_data, s->reslevel, s->dir, slice, s->slices); +break; +} +} + +return 0; +} + +static int jpeg2000_dwt97_int_postshift(AVCodecContext *avctx, void *td, +int jobnr, int threadnr) +{ +Jpeg2000DecoderContext *s = avctx->priv_data; +Jpeg2000IdwtThread *idwt= s->idwt + jobnr / s->slices; +Jpeg2000Tile *tile = s->tile + jobnr / s->slices / s->ncomponents; +int compno = (jobnr / s->slices) % s->ncomponents; +int slice = jobnr % s->slices; +Jpeg2000Component *comp = tile->comp + compno; +Jpeg2000CodingStyle *codsty = tile->codsty + compno; +int a = comp->dw
[FFmpeg-devel] [PATCH 05/13] lavc/jpeg2000dec: Thread init_tile()
From 080ebdc9bad130098bff575f9ce690b8a522c9f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= Date: Mon, 13 Jun 2022 15:09:17 +0200 Subject: [PATCH 05/13] lavc/jpeg2000dec: Thread init_tile() --- libavcodec/jpeg2000dec.c | 30 +++--- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c index 9344630c6f..ef5167c29e 100644 --- a/libavcodec/jpeg2000dec.c +++ b/libavcodec/jpeg2000dec.c @@ -1015,12 +1015,19 @@ static int get_ppt(Jpeg2000DecoderContext *s, int n) return 0; } -static int init_tile(Jpeg2000DecoderContext *s, int tileno) +static int init_tile(AVCodecContext *avctx, void *td, + int jobnr, int threadnr) { -int compno; -int tilex = tileno % s->numXtiles; -int tiley = tileno / s->numXtiles; -Jpeg2000Tile *tile = s->tile + tileno; +Jpeg2000DecoderContext *s = avctx->priv_data; +int tileno = jobnr / s->ncomponents; +int tilex = tileno % s->numXtiles; +int tiley = tileno / s->numXtiles; +int compno = jobnr % s->ncomponents; +Jpeg2000Tile *tile = s->tile + tileno; +Jpeg2000Component *comp = tile->comp + compno; +Jpeg2000CodingStyle *codsty = tile->codsty + compno; +Jpeg2000QuantStyle *qntsty = tile->qntsty + compno; +int ret; // global bandno if (!tile->comp) return AVERROR(ENOMEM); @@ -1030,12 +1037,6 @@ static int init_tile(Jpeg2000DecoderContext *s, int tileno) tile->coord[1][0] = av_clip(tiley * (int64_t)s->tile_height + s->tile_offset_y, s->image_offset_y, s->height); tile->coord[1][1] = av_clip((tiley + 1) * (int64_t)s->tile_height + s->tile_offset_y, s->image_offset_y, s->height); -for (compno = 0; compno < s->ncomponents; compno++) { -Jpeg2000Component *comp = tile->comp + compno; -Jpeg2000CodingStyle *codsty = tile->codsty + compno; -Jpeg2000QuantStyle *qntsty = tile->qntsty + compno; -int ret; // global bandno - comp->coord_o[0][0] = tile->coord[0][0]; comp->coord_o[0][1] = tile->coord[0][1]; comp->coord_o[1][0] = tile->coord[1][0]; @@ -1059,7 +1060,7 @@ static int init_tile(Jpeg2000DecoderContext *s, int tileno) s->cbps[compno], s->cdx[compno], s->cdy[compno], s->avctx, s->slices)) return ret; -} + return 0; } @@ -2367,9 +2368,6 @@ static int jpeg2000_read_bitstream_packets(Jpeg2000DecoderContext *s) for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++) { Jpeg2000Tile *tile = s->tile + tileno; -if ((ret = init_tile(s, tileno)) < 0) -return ret; - if ((ret = jpeg2000_decode_packets(s, tile)) < 0) return ret; } @@ -2656,6 +2654,8 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture, picture->key_frame = 1; s->slices = avctx->active_thread_type == FF_THREAD_SLICE ? avctx->thread_count : 1; +avctx->execute2(avctx, init_tile, NULL, NULL, s->numXtiles * s->numYtiles * s->ncomponents); + if (ret = jpeg2000_read_bitstream_packets(s)) goto end; -- 2.30.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 06/13] lavu/mem: Add ff_fast_recalloc()
Left this as an ff_ funtion for now since it's only used by the j2k code /Tomas From 5d36d431ffe4c8ba0f698d0c288ebc16b83f0bbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= Date: Tue, 14 Jun 2022 13:35:18 +0200 Subject: [PATCH 06/13] lavu/mem: Add ff_fast_recalloc() --- libavutil/mem.c | 24 + libavutil/mem.h | 55 + 2 files changed, 79 insertions(+) diff --git a/libavutil/mem.c b/libavutil/mem.c index a0c9a42849..7781b715a0 100644 --- a/libavutil/mem.c +++ b/libavutil/mem.c @@ -530,6 +530,30 @@ void *av_fast_realloc(void *ptr, unsigned int *size, size_t min_size) return ptr; } +int ff_fast_recalloc(void *ptr, unsigned int *size, size_t nelem, size_t elsize) +{ +void *val; +void *new_ptr; +unsigned int new_size = *size; +size_t product; +int ret; +memcpy(&val, ptr, sizeof(val)); + +if ((ret = av_size_mult(nelem, elsize, &product)) < 0) +return ret; + +if (!(new_ptr = av_fast_realloc(val, &new_size, product))) +return AVERROR(ENOMEM); + +if (new_size > *size) { +memset((uint8_t*)new_ptr + *size, 0, new_size - *size); +*size = new_size; +memcpy(ptr, &new_ptr, sizeof(new_ptr)); +} + +return 0; +} + static inline void fast_malloc(void *ptr, unsigned int *size, size_t min_size, int zero_realloc) { size_t max_size; diff --git a/libavutil/mem.h b/libavutil/mem.h index d91174196c..74abf3dce2 100644 --- a/libavutil/mem.h +++ b/libavutil/mem.h @@ -380,6 +380,61 @@ int av_reallocp_array(void *ptr, size_t nmemb, size_t size); */ void *av_fast_realloc(void *ptr, unsigned int *size, size_t min_size); +/** + * Reallocate the pointed-to buffer if it is not large enough, otherwise do + * nothing. Old data is memcpy()'d to the start of the new buffer. The newly + * allocated space at the end of the buffer is zero-initialized. In other + * words the buffer is expanded with zeroes when necessary. + * + * If the pointed-to buffer is `NULL`, then a new zero-initialized buffer is + * allocated. + * + * If the pointed-to buffer is not large enough, and reallocation fails, + * `AVERROR(ENOMEM)` is returned. + * + * If nelem*elsize is too large then `AVERROR(EINVAL)` is returned. + * + * Contrary to av_fast_malloc(), *ptr and *size are not touched in case of + * error, to allow for proper cleanup. + * + * *ptr is not guaranteed to be an exact multiple of elsize bytes. + * + * This function is intended for use with arrays of structures that contain + * pointers that are allowed to grow and typically don't shrink. + * + * A typical use pattern follows: + * + * @code{.c} + * int foo_work(SomeContext *s) { + * if (ff_fast_recalloc(&s->foo, &s->foo_size, s->nfoo, sizeof(Foo))) + * return AVERROR(ENOMEM); + * for (x = 0; x < s->nfoo; x++) + * do stuff with s->foo[x] + * return 0; + * } + * + * void foo_close(SomeContext *s) { + * // note the use of s->foo_size, not s->nfoo + * for (x = 0; x < s->foo_size/sizeof(Foo); x++) + * av_freep(&s->foo[x].bar); + * av_freep(&s->foo); + * } + * @endcode + * + * @param[in,out] ptr Pointer to pointer to an already allocated buffer. + * `*ptr` will be overwritten with pointer to new + * buffer on success and will be left alone on failure + * @param[in,out] size Pointer to the size of buffer `*ptr`. `*size` is + * updated to the new allocated size and will be left + * along on failure. + * @param[in] nelemNumber of desired elements in *ptr + * @param[in] elsize Size of each element in *ptr + * @return Zero on success, <0 on error. + * @see av_fast_realloc() + * @see av_fast_malloc() + */ +int ff_fast_recalloc(void *ptr, unsigned int *size, size_t nelem, size_t elsize); + /** * Allocate a buffer, reusing the given one if large enough. * -- 2.30.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 07/13] lavc/jpeg2000*: Use ff_fast_recalloc() to eliminate lots of allocations
From 72a5f47503338a4fff816440ad64bc62cc23a738 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= Date: Mon, 13 Jun 2022 17:04:10 +0200 Subject: [PATCH 07/13] lavc/jpeg2000*: Use ff_fast_recalloc() to eliminate lots of allocations --- libavcodec/jpeg2000.c| 72 +--- libavcodec/jpeg2000.h| 9 + libavcodec/jpeg2000dec.c | 28 libavcodec/jpeg2000dwt.c | 9 +++-- libavcodec/jpeg2000dwt.h | 2 ++ 5 files changed, 70 insertions(+), 50 deletions(-) diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c index 945b787565..7ec5986875 100644 --- a/libavcodec/jpeg2000.c +++ b/libavcodec/jpeg2000.c @@ -52,17 +52,23 @@ static int32_t tag_tree_size(int w, int h) } /* allocate the memory for tag tree */ -static Jpeg2000TgtNode *ff_jpeg2000_tag_tree_init(int w, int h) +static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size, int w, int h) { int pw = w, ph = h; -Jpeg2000TgtNode *res, *t, *t2; +Jpeg2000TgtNode *t, *t2; int32_t tt_size; +size_t prod; tt_size = tag_tree_size(w, h); -t = res = av_calloc(tt_size, sizeof(*t)); -if (!res) -return NULL; +if (av_size_mult(tt_size, sizeof(*t), &prod)) +return AVERROR(ENOMEM); + +av_fast_malloc(old, size, prod); +if (!*old) +return AVERROR(ENOMEM); +t = *old; +memset(*old, 0, prod); while (w > 1 || h > 1) { int i, j; @@ -80,7 +86,7 @@ static Jpeg2000TgtNode *ff_jpeg2000_tag_tree_init(int w, int h) t = t2; } t[0].parent = NULL; -return res; +return 0; } void ff_tag_tree_zero(Jpeg2000TgtNode *t, int w, int h, int val) @@ -316,16 +322,14 @@ static int init_prec(AVCodecContext *avctx, /* Tag trees initialization */ -prec->cblkincl = -ff_jpeg2000_tag_tree_init(prec->nb_codeblocks_width, - prec->nb_codeblocks_height); -if (!prec->cblkincl) -return AVERROR(ENOMEM); - -prec->zerobits = -ff_jpeg2000_tag_tree_init(prec->nb_codeblocks_width, - prec->nb_codeblocks_height); -if (!prec->zerobits) +if (ff_jpeg2000_tag_tree_init(&prec->cblkincl, + &prec->cblkincl_size, + prec->nb_codeblocks_width, + prec->nb_codeblocks_height) || +ff_jpeg2000_tag_tree_init(&prec->zerobits, + &prec->zerobits_size, + prec->nb_codeblocks_width, + prec->nb_codeblocks_height)) return AVERROR(ENOMEM); if (prec->nb_codeblocks_width * (uint64_t)prec->nb_codeblocks_height > INT_MAX) { @@ -333,8 +337,7 @@ static int init_prec(AVCodecContext *avctx, return AVERROR(ENOMEM); } nb_codeblocks = prec->nb_codeblocks_width * prec->nb_codeblocks_height; -prec->cblk = av_calloc(nb_codeblocks, sizeof(*prec->cblk)); -if (!prec->cblk) +if (ff_fast_recalloc(&prec->cblk, &prec->cblk_size, nb_codeblocks, sizeof(*prec->cblk))) return AVERROR(ENOMEM); for (cblkno = 0; cblkno < nb_codeblocks; cblkno++) { Jpeg2000Cblk *cblk = prec->cblk + cblkno; @@ -376,6 +379,7 @@ static int init_prec(AVCodecContext *avctx, cblk->length= 0; cblk->npasses = 0; if (av_codec_is_encoder(avctx->codec)) { +av_freep(&cblk->layers); cblk->layers = av_calloc(codsty->nlayers, sizeof(*cblk->layers)); if (!cblk->layers) return AVERROR(ENOMEM); @@ -448,8 +452,7 @@ static int init_band(AVCodecContext *avctx, return AVERROR(ENOMEM); } nb_precincts = reslevel->num_precincts_x * reslevel->num_precincts_y; -band->prec = av_calloc(nb_precincts, sizeof(*band->prec)); -if (!band->prec) +if (ff_fast_recalloc(&band->prec, &band->prec_size, nb_precincts, sizeof(*band->prec))) return AVERROR(ENOMEM); for (precno = 0; precno < nb_precincts; precno++) { @@ -471,6 +474,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp, { int reslevelno, bandno, gbandno = 0, ret, i, j; uint32_t csize; +size_t prod; if (codsty->nreslevels2decode <= 0) { av_log(avctx, AV_LOG_ERROR, "nreslevels2decode %d invalid or uninitialized\n", codsty->nreslevels2decode); @@ -496,19 +500,22 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp, if (codsty->transform == FF_DWT97) { csize += AV_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->f_data); -comp->i_data = NULL; -comp->f_data = av_calloc(csize, sizeof(*comp->f_data)); +if (av_size_mult(csize, sizeof(*comp->f_data), &prod)) +return AVERROR(ENOMEM); +av_fast_malloc(&comp->f_data, &comp->f_data_size, prod); if (!comp->f_data) return AVERROR(ENOMEM); +memset(comp->f_data, 0, pro
[FFmpeg-devel] [PATCH 08/13] lavc/jpeg2000: Switch Jpeg2000TgtNode to int32_t parent
From c0e00cf03f5a1fcffc90395d4b26607e1681690c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= Date: Tue, 7 Jun 2022 16:43:40 +0200 Subject: [PATCH 08/13] lavc/jpeg2000: Switch Jpeg2000TgtNode to int32_t parent --- libavcodec/j2kenc.c | 44 libavcodec/jpeg2000.c| 20 +- libavcodec/jpeg2000.h| 2 +- libavcodec/jpeg2000dec.c | 18 4 files changed, 42 insertions(+), 42 deletions(-) diff --git a/libavcodec/j2kenc.c b/libavcodec/j2kenc.c index 4de596ffa9..a3d8144acc 100644 --- a/libavcodec/j2kenc.c +++ b/libavcodec/j2kenc.c @@ -249,36 +249,36 @@ static void j2k_flush(Jpeg2000EncoderContext *s) /* tag tree routines */ /** code the value stored in node */ -static void tag_tree_code(Jpeg2000EncoderContext *s, Jpeg2000TgtNode *node, int threshold) +static void tag_tree_code(Jpeg2000EncoderContext *s, Jpeg2000TgtNode *nodes, int32_t node, int threshold) { -Jpeg2000TgtNode *stack[30]; +int32_t stack[30]; int sp = -1, curval = 0; -while(node->parent){ +while(nodes[node].parent >= 0){ stack[++sp] = node; -node = node->parent; +node = nodes[node].parent; } while (1) { -if (curval > node->temp_val) -node->temp_val = curval; +if (curval > nodes[node].temp_val) +nodes[node].temp_val = curval; else { -curval = node->temp_val; +curval = nodes[node].temp_val; } -if (node->val >= threshold) { +if (nodes[node].val >= threshold) { put_bits(s, 0, threshold - curval); curval = threshold; } else { -put_bits(s, 0, node->val - curval); -curval = node->val; -if (!node->vis) { +put_bits(s, 0, nodes[node].val - curval); +curval = nodes[node].val; +if (!nodes[node].vis) { put_bits(s, 1, 1); -node->vis = 1; +nodes[node].vis = 1; } } -node->temp_val = curval; +nodes[node].temp_val = curval; if (sp < 0) break; node = stack[sp--]; @@ -286,14 +286,14 @@ static void tag_tree_code(Jpeg2000EncoderContext *s, Jpeg2000TgtNode *node, int } /** update the value in node */ -static void tag_tree_update(Jpeg2000TgtNode *node) +static void tag_tree_update(Jpeg2000TgtNode *nodes, int node) { int lev = 0; -while (node->parent){ -if (node->parent->val <= node->val) +while (nodes[node].parent >= 0){ +if (nodes[nodes[node].parent].val <= nodes[node].val) break; -node->parent->val = node->val; -node = node->parent; +nodes[nodes[node].parent].val = nodes[node].val; +node = nodes[node].parent; lev++; } } @@ -814,7 +814,7 @@ static int encode_packet(Jpeg2000EncoderContext *s, Jpeg2000ResLevel *rlevel, in prec->zerobits[pos].val = expn[bandno] + numgbits - 1 - cblk->nonzerobits; cblk->incl = 0; cblk->lblock = 3; -tag_tree_update(prec->zerobits + pos); +tag_tree_update(prec->zerobits, pos); for (i = 0; i < nlayers; i++) { if (cblk->layers[i].npasses > 0) { prec->cblkincl[pos].val = i; @@ -823,7 +823,7 @@ static int encode_packet(Jpeg2000EncoderContext *s, Jpeg2000ResLevel *rlevel, in } if (i == nlayers) prec->cblkincl[pos].val = i; -tag_tree_update(prec->cblkincl + pos); +tag_tree_update(prec->cblkincl, pos); } } } @@ -877,7 +877,7 @@ static int encode_packet(Jpeg2000EncoderContext *s, Jpeg2000ResLevel *rlevel, in // inclusion information if (!cblk->incl) -tag_tree_code(s, prec->cblkincl + pos, layno + 1); +tag_tree_code(s, prec->cblkincl, pos, layno + 1); else { put_bits(s, cblk->layers[layno].npasses > 0, 1); } @@ -887,7 +887,7 @@ static int encode_packet(Jpeg2000EncoderContext *s, Jpeg2000ResLevel *rlevel, in // zerobits information if (!cblk->incl) { -tag_tree_code(s, prec->zerobits + pos, 100); +tag_tree_code(s, prec->zerobits, pos, 100); cblk->incl = 1; } diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c index 7ec5986875..0bec2e187d 100644 --- a/libavcodec/jpeg2000.c +++ b/libavcodec/jpeg2000.c @@ -55,8 +55,8 @@ static int32_t tag_tree_size(int w, int h) static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size, int w, int h) { int pw = w, ph = h; -Jpeg2000TgtNode *t, *t2; -
[FFmpeg-devel] [PATCH 09/13] lavc/jpeg2000: Speed up ff_jpeg2000_tag_tree_init() using stereotypes for sizes <= 4x4
From 03b806f89453571310dcb14edbd9f51e059b7476 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= Date: Wed, 8 Jun 2022 10:08:15 +0200 Subject: [PATCH 09/13] lavc/jpeg2000: Speed up ff_jpeg2000_tag_tree_init() using stereotypes for sizes <= 4x4 --- libavcodec/jpeg2000.c | 35 +++ 1 file changed, 35 insertions(+) diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c index 0bec2e187d..b80e68bcba 100644 --- a/libavcodec/jpeg2000.c +++ b/libavcodec/jpeg2000.c @@ -51,6 +51,31 @@ static int32_t tag_tree_size(int w, int h) return (int32_t)(res + 1); } +#define T(x) (x*sizeof(Jpeg2000TgtNode)) + +static const size_t tt_sizes[16] = { +T(1),T(3),T(6),T(7),T(3),T(5),T(9),T(11),T(6),T(9),T(14),T(17),T(7),T(11),T(17),T(21), +}; + +static const Jpeg2000TgtNode tt_stereotypes[16][21] = { +{{-1},}, +{{2},{2},{-1},}, +{{3},{3},{4},{5},{5},{-1},}, +{{4},{4},{5},{5},{6},{6},{-1},}, +{{2},{2},{-1},}, +{{4},{4},{4},{4},{-1},}, +{{6},{6},{7},{6},{6},{7},{8},{8},{-1},}, +{{8},{8},{9},{9},{8},{8},{9},{9},{10},{10},{-1},}, +{{3},{3},{4},{5},{5},{-1},}, +{{6},{6},{6},{6},{7},{7},{8},{8},{-1},}, +{{9},{9},{10},{9},{9},{10},{11},{11},{12},{13},{13},{13},{13},{-1},}, +{{12},{12},{13},{13},{12},{12},{13},{13},{14},{14},{15},{15},{16},{16},{16},{16},{-1},}, +{{4},{4},{5},{5},{6},{6},{-1},}, +{{8},{8},{8},{8},{9},{9},{9},{9},{10},{10},{-1},}, +{{12},{12},{13},{12},{12},{13},{14},{14},{15},{14},{14},{15},{16},{16},{16},{16},{-1},}, +{{16},{16},{17},{17},{16},{16},{17},{17},{18},{18},{19},{19},{18},{18},{19},{19},{20},{20},{20},{20},{-1},}, +}; + /* allocate the memory for tag tree */ static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size, int w, int h) { @@ -59,6 +84,15 @@ static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size, int32_t tt_size, ofs = 0; size_t prod; +if (w <= 4 && h <= 4) { +int idx = w-1 + (h-1)*4; +size_t sz = tt_sizes[idx]; +av_fast_malloc(old, size, sz); +if (*old) { +memcpy(*old, tt_stereotypes[idx], sz); +} +return 0; +} else { tt_size = tag_tree_size(w, h); if (av_size_mult(tt_size, sizeof(*t), &prod)) @@ -87,6 +121,7 @@ static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size, } t[0].parent = -1; return 0; +} } void ff_tag_tree_zero(Jpeg2000TgtNode *t, int w, int h, int val) -- 2.30.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 10/13] lavc/jpeg2000: Reindent
From d3aaf24ca4778e6ba280f99f9ce90cb15738699b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= Date: Tue, 14 Jun 2022 11:23:08 +0200 Subject: [PATCH 10/13] lavc/jpeg2000: Reindent --- libavcodec/jpeg2000.c | 44 +-- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c index b80e68bcba..8ee50b77c5 100644 --- a/libavcodec/jpeg2000.c +++ b/libavcodec/jpeg2000.c @@ -93,34 +93,34 @@ static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size, } return 0; } else { -tt_size = tag_tree_size(w, h); +tt_size = tag_tree_size(w, h); -if (av_size_mult(tt_size, sizeof(*t), &prod)) -return AVERROR(ENOMEM); +if (av_size_mult(tt_size, sizeof(*t), &prod)) +return AVERROR(ENOMEM); -av_fast_malloc(old, size, prod); -if (!*old) -return AVERROR(ENOMEM); -t = *old; -memset(*old, 0, prod); +av_fast_malloc(old, size, prod); +if (!*old) +return AVERROR(ENOMEM); +t = *old; +memset(*old, 0, prod); -while (w > 1 || h > 1) { -int i, j; -pw = w; -ph = h; +while (w > 1 || h > 1) { +int i, j; +pw = w; +ph = h; -w = (w + 1) >> 1; -h = (h + 1) >> 1; -ofs += pw * ph; +w = (w + 1) >> 1; +h = (h + 1) >> 1; +ofs += pw * ph; -for (i = 0; i < ph; i++) -for (j = 0; j < pw; j++) -t[i * pw + j].parent = (i >> 1) * w + (j >> 1) + ofs; +for (i = 0; i < ph; i++) +for (j = 0; j < pw; j++) +t[i * pw + j].parent = (i >> 1) * w + (j >> 1) + ofs; -t += pw * ph; -} -t[0].parent = -1; -return 0; +t += pw * ph; +} +t[0].parent = -1; +return 0; } } -- 2.30.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 11/13] lavc/jpeg2000: Minimize calls to av_codec_is_encoder()
From 5b492d4e92a11946fd7425497205b1842fa1912c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= Date: Tue, 14 Jun 2022 10:57:45 +0200 Subject: [PATCH 11/13] lavc/jpeg2000: Minimize calls to av_codec_is_encoder() --- libavcodec/jpeg2000.c | 19 +++ 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c index 8ee50b77c5..2e3c33303b 100644 --- a/libavcodec/jpeg2000.c +++ b/libavcodec/jpeg2000.c @@ -247,7 +247,7 @@ static void init_band_stepsize(AVCodecContext *avctx, Jpeg2000CodingStyle *codsty, Jpeg2000QuantStyle *qntsty, int bandno, int gbandno, int reslevelno, - int cbps) + int cbps, int is_enc) { /* TODO: Implementation of quantization step not finished, * see ISO/IEC 15444-1:2002 E.1 and A.6.4. */ @@ -305,7 +305,7 @@ static void init_band_stepsize(AVCodecContext *avctx, /* FIXME: In OpenJPEG code stepsize = stepsize * 0.5. Why? * If not set output of entropic decoder is not correct. */ -if (!av_codec_is_encoder(avctx->codec)) +if (!is_enc) band->f_stepsize *= 0.5; } @@ -316,7 +316,8 @@ static int init_prec(AVCodecContext *avctx, Jpeg2000CodingStyle *codsty, int precno, int bandno, int reslevelno, int log2_band_prec_width, - int log2_band_prec_height) + int log2_band_prec_height, + int is_enc) { Jpeg2000Prec *prec = band->prec + precno; int nb_codeblocks, cblkno; @@ -413,7 +414,7 @@ static int init_prec(AVCodecContext *avctx, cblk->lblock= 3; cblk->length= 0; cblk->npasses = 0; -if (av_codec_is_encoder(avctx->codec)) { +if (is_enc) { av_freep(&cblk->layers); cblk->layers = av_calloc(codsty->nlayers, sizeof(*cblk->layers)); if (!cblk->layers) @@ -430,7 +431,7 @@ static int init_band(AVCodecContext *avctx, Jpeg2000CodingStyle *codsty, Jpeg2000QuantStyle *qntsty, int bandno, int gbandno, int reslevelno, - int cbps, int dx, int dy) + int cbps, int dx, int dy, int is_enc) { Jpeg2000Band *band = reslevel->band + bandno; uint8_t log2_band_prec_width, log2_band_prec_height; @@ -439,7 +440,7 @@ static int init_band(AVCodecContext *avctx, int nb_precincts; int i, j, ret; -init_band_stepsize(avctx, band, codsty, qntsty, bandno, gbandno, reslevelno, cbps); +init_band_stepsize(avctx, band, codsty, qntsty, bandno, gbandno, reslevelno, cbps, is_enc); /* computation of tbx_0, tbx_1, tby_0, tby_1 * see ISO/IEC 15444-1:2002 B.5 eq. B-15 and tbl B.1 @@ -493,7 +494,8 @@ static int init_band(AVCodecContext *avctx, for (precno = 0; precno < nb_precincts; precno++) { ret = init_prec(avctx, band, reslevel, comp, codsty, precno, bandno, reslevelno, -log2_band_prec_width, log2_band_prec_height); +log2_band_prec_width, log2_band_prec_height, +is_enc); if (ret < 0) return ret; } @@ -510,6 +512,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp, int reslevelno, bandno, gbandno = 0, ret, i, j; uint32_t csize; size_t prod; +int is_enc = av_codec_is_encoder(avctx->codec); if (codsty->nreslevels2decode <= 0) { av_log(avctx, AV_LOG_ERROR, "nreslevels2decode %d invalid or uninitialized\n", codsty->nreslevels2decode); @@ -607,7 +610,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp, ret = init_band(avctx, reslevel, comp, codsty, qntsty, bandno, gbandno, reslevelno, -cbps, dx, dy); +cbps, dx, dy, is_enc); if (ret < 0) return ret; } -- 2.30.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 12/13] lavc/jpeg2000dec: Use coarser slicing for initial reslevels
From 15761070d1cdc622ffbc5d6aeb0a50e063361012 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= Date: Tue, 14 Jun 2022 11:19:06 +0200 Subject: [PATCH 12/13] lavc/jpeg2000dec: Use coarser slicing for initial reslevels This brings -lowres 2 lossless 4K J2K on an AMD EPYC 7R32 to 52 fps (2080% CPU). --- libavcodec/jpeg2000dec.c | 9 + 1 file changed, 9 insertions(+) diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c index a2b9f0166b..18ebe5219d 100644 --- a/libavcodec/jpeg2000dec.c +++ b/libavcodec/jpeg2000dec.c @@ -2679,7 +2679,16 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture, for (s->reslevel = 0; s->reslevel < maxreslevels; s->reslevel++) { for (s->dir = 0; s->dir < 2; s->dir++) { +int before = s->slices; +int div = s->slices >= 96 ? 7 : 5; + +if (s->reslevel < div) { +int halve = 1<<(div - s->reslevel + (s->slices >= 96 ? 0 : 1 - s->dir)); +s->slices = (s->slices + halve-1)/halve; +} + avctx->execute2(avctx, jpeg2000_idwt, NULL, NULL, s->numXtiles * s->numYtiles * s->ncomponents * s->slices); +s->slices = before; } } -- 2.30.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 13/13] lavc/jpeg2000dec: Component-level threading of write_frame()
Don't have access to the full machine to test this with 96 threads. On 2/3rds of an AMD EPYC 7R32 (-threads 64) it runs at 50 fps. Specifically the decoder uses 59.2 seconds to decode a 60.0 second clip. /Tomas From 19fc2413dc2bafff577c68830cde48e08138771e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= Date: Tue, 14 Jun 2022 15:45:32 +0200 Subject: [PATCH 13/13] lavc/jpeg2000dec: Component-level threading of write_frame() Split off MCT and don't bother with it unless the picture actually uses MCT. --- libavcodec/jpeg2000dec.c | 35 --- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c index 18ebe5219d..8eaeda1c66 100644 --- a/libavcodec/jpeg2000dec.c +++ b/libavcodec/jpeg2000dec.c @@ -156,6 +156,7 @@ typedef struct Jpeg2000DecoderContext { // used for idwt slicing int reslevel, dir, slices; int have_dwt97_int; // 1 if any coding style is FF_DWT97_INT +int have_mct; } Jpeg2000DecoderContext; /* get_bits functions for JPEG2000 packet bitstream @@ -600,6 +601,9 @@ static int get_cod(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c, return AVERROR_INVALIDDATA; } +if (tmp.mct) +s->have_mct = 1; + if ((ret = get_cox(s, &tmp)) < 0) return ret; tmp.init = 1; @@ -2073,16 +2077,14 @@ static int jpeg2000_dwt97_int_postshift(AVCodecContext *avctx, void *td, #define WRITE_FRAME(D, PIXEL) \ static inline void write_frame_ ## D(Jpeg2000DecoderContext * s, Jpeg2000Tile * tile, \ - AVFrame * picture, int precision)\ + AVFrame * picture, int precision, int compno)\ { \ const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->avctx->pix_fmt); \ int planar= !!(pixdesc->flags & AV_PIX_FMT_FLAG_PLANAR); \ int pixelsize = planar ? 1 : pixdesc->nb_components; \ \ -int compno; \ int x, y; \ \ -for (compno = 0; compno < s->ncomponents; compno++) { \ Jpeg2000Component *comp = tile->comp + compno;\ Jpeg2000CodingStyle *codsty = tile->codsty + compno; \ PIXEL *line; \ @@ -2129,8 +2131,6 @@ static int jpeg2000_dwt97_int_postshift(AVCodecContext *avctx, void *td, } \ line += picture->linesize[plane] / sizeof(PIXEL); \ } \ -} \ - \ } WRITE_FRAME(8, uint8_t) @@ -2138,26 +2138,36 @@ WRITE_FRAME(16, uint16_t) #undef WRITE_FRAME -static int jpeg2000_mct_write_frame(AVCodecContext *avctx, void *td, -int jobnr, int threadnr) +static int jpeg2000_mct(AVCodecContext *avctx, void *td, +int jobnr, int threadnr) { Jpeg2000DecoderContext *s = avctx->priv_data; -AVFrame *picture = td; Jpeg2000Tile *tile = s->tile + jobnr; /* inverse MCT transformation */ if (tile->codsty[0].mct) mct_decode(s, tile); +return 0; +} + +static int jpeg2000_write_frame(AVCodecContext *avctx, void *td, +int jobnr, int threadnr) +{ +Jpeg2000DecoderContext *s = avctx->priv_data; +AVFrame *picture = td; +Jpeg2000Tile *tile = s->tile + jobnr / s->ncomponents; +int compno = jobnr % s->ncomponents; + if (s->precision <= 8) { -write_frame_8(s, tile, picture, 8); +write_frame_8(s, tile, picture, 8, compno); } else { int precision = picture->format == AV_PIX_FMT_XYZ12 || picture->format == AV_PIX_FMT_RGB48 || picture->format == AV_PIX_FMT_RGBA64 || picture->format == AV_PIX_FMT_GRAY16 ? 16 : s->precision;
Re: [FFmpeg-devel] [PATCH 11/13] lavc/jpeg2000: Minimize calls to av_codec_is_encoder()
Tomas Härdin: > > Why call it at all? Why not just add a new parameter to ff_jpeg2000_init_component that is always set to 1 when called from the encoder and 0 when called from the decoder? (And is this really a bottleneck?) - Andreas ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 07/13] lavc/jpeg2000*: Use ff_fast_recalloc() to eliminate lots of allocations
Tomas Härdin: > > > @@ -2166,12 +2163,13 @@ static int jpeg2000_mct_write_frame(AVCodecContext > *avctx, void *td, > return 0; > } > > -static void jpeg2000_dec_cleanup(Jpeg2000DecoderContext *s) > +static void jpeg2000_dec_cleanup(Jpeg2000DecoderContext *s, int close) > { > int tileno, compno; > -for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++) { > +if (close) { > +for (tileno = 0; tileno < s->tile_size/sizeof(*s->tile); tileno++) { > if (s->tile[tileno].comp) { > -for (compno = 0; compno < s->ncomponents; compno++) { > +for (compno = 0; compno < > s->tile[tileno].comp_size/sizeof(*s->tile[tileno].comp); compno++) { > Jpeg2000Component *comp = s->tile[tileno].comp + > compno; > Jpeg2000CodingStyle *codsty = s->tile[tileno].codsty + > compno; > > @@ -2182,10 +2180,11 @@ static void > jpeg2000_dec_cleanup(Jpeg2000DecoderContext *s) > s->tile[tileno].packed_headers_size = 0; > } > } > +av_freep(&s->tile); > +} > av_freep(&s->packed_headers); > s->packed_headers_size = 0; > memset(&s->packed_headers_stream, 0, sizeof(s->packed_headers_stream)); > -av_freep(&s->tile); > memset(s->codsty, 0, sizeof(s->codsty)); > memset(s->qntsty, 0, sizeof(s->qntsty)); > memset(s->properties, 0, sizeof(s->properties)); > @@ -2689,7 +2688,7 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, > AVFrame *picture, > > avctx->execute2(avctx, jpeg2000_mct_write_frame, picture, NULL, > s->numXtiles * s->numYtiles); > > -jpeg2000_dec_cleanup(s); > +jpeg2000_dec_cleanup(s, 0); > > *got_frame = 1; > > @@ -2702,7 +2701,7 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, > AVFrame *picture, > return bytestream2_tell(&s->g); > > end: > -jpeg2000_dec_cleanup(s); > +jpeg2000_dec_cleanup(s, 0); > return ret; > } > > @@ -2712,6 +2711,7 @@ static av_cold int jpeg2000_decode_close(AVCodecContext > *avctx) > > av_freep(&s->idwt); > av_freep(&s->cb); > +jpeg2000_dec_cleanup(s, 1); > > return 0; > } Why don't you just move the part of jpeg2000_dec_cleanup() that you intend to be only executed in jpeg2000_decode_close() to jpeg2000_decode_close()? - Andreas ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] avformat: set start_time_realtime when demuxing
On Tue, 14 Jun 2022, Gyan Doshi wrote: On 2022-06-14 01:51 pm, Marton Balint wrote: On Tue, 14 Jun 2022, Gyan Doshi wrote: Add new flag AVFMT_START_REALTIME for the couple of demuxers that set it internally. I don't think this is the same semantics. start_time_realtime is the time when the stream was originally captured, and not when the stream was received. So you can't simply use av_gettime() in the demuxers for that field. I saw that in the rtsp read_packet. I thought of adding a new field but think the semantics are close enough to serve the purpose of a rough sync so I added the flag for the demuxers that receive origin timestamp. I can update the doxy to record this difference. Or should I add a new field? I prefer a new field. Thanks, Marton ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 06/13] lavu/mem: Add ff_fast_recalloc()
On Tue, Jun 14, 2022 at 04:42:06PM +0200, Tomas Härdin wrote: > Left this as an ff_ funtion for now since it's only used by the j2k > code > > /Tomas > mem.c | 24 > mem.h | 55 +++ > 2 files changed, 79 insertions(+) > 21be65bd06e3260f9f36598d5d574ee32e7131a6 > 0006-lavu-mem-Add-ff_fast_recalloc.patch > From 5d36d431ffe4c8ba0f698d0c288ebc16b83f0bbc Mon Sep 17 00:00:00 2001 > From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= > Date: Tue, 14 Jun 2022 13:35:18 +0200 > Subject: [PATCH 06/13] lavu/mem: Add ff_fast_recalloc() You cannot call a ff_* function thats in libavutil from outside libavutil this will fail with shared libs as the ff* stuff is not exported thx [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Observe your enemies, for they first find out your faults. -- Antisthenes signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 05/13] lavc/jpeg2000dec: Thread init_tile()
On Tue, Jun 14, 2022 at 04:41:14PM +0200, Tomas Härdin wrote: > > jpeg2000dec.c | 30 +++--- > 1 file changed, 15 insertions(+), 15 deletions(-) > 6fa2fbf99afee36ee73459863df0527a72663f43 > 0005-lavc-jpeg2000dec-Thread-init_tile.patch > From 080ebdc9bad130098bff575f9ce690b8a522c9f7 Mon Sep 17 00:00:00 2001 > From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= > Date: Mon, 13 Jun 2022 15:09:17 +0200 > Subject: [PATCH 05/13] lavc/jpeg2000dec: Thread init_tile() Causes segfaults [jpeg2000 @ 0x2cf53380] End mismatch 149 [jpeg2000 @ 0x2cf53380] ==1439== Thread 6: ==1439== Invalid read of size 4 ==1439==at 0x9771F0: jpeg2000_mct_write_frame (in ffmpeg_g) ==1439==by 0x78BA6F: avcodec_default_execute2 (in ffmpeg_g) ==1439==by 0x97C0BB: jpeg2000_decode_frame (in ffmpeg_g) ==1439==by 0xA90F72: frame_worker_thread (in ffmpeg_g) ==1439==by 0x54046DA: start_thread (pthread_create.c:463) ==1439==by 0xF8F261E: clone (clone.S:95) i will send you the sample privatly thx [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB "Nothing to hide" only works if the folks in power share the values of you and everyone you know entirely and always will -- Tom Scott signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] avformat: set start_time_realtime when demuxing
Gyan Doshi: > Add new flag AVFMT_START_REALTIME for the couple of demuxers that set it > internally. > --- > doc/APIchanges | 3 +++ > libavformat/avformat.h | 1 + > libavformat/demux.c| 3 +++ > libavformat/rtsp.c | 3 ++- > libavformat/version.h | 2 +- > 5 files changed, 10 insertions(+), 2 deletions(-) > > diff --git a/doc/APIchanges b/doc/APIchanges > index 5857e67ae6..4aa9e36777 100644 > --- a/doc/APIchanges > +++ b/doc/APIchanges > @@ -14,6 +14,9 @@ libavutil: 2021-04-27 > > API changes, most recent first: > > +2022-06-xx - xx - lavf 59.26.100 - avformat.h > + Add AVFMT_START_REALTIME flag. > + > 2022-06-12 - xx - lavf 59.25.100 - avio.h >Add avio_vprintf(), similar to avio_printf() but allow to use it >from within a function taking a variable argument list as input. > diff --git a/libavformat/avformat.h b/libavformat/avformat.h > index f12fa7d904..4e247d68fc 100644 > --- a/libavformat/avformat.h > +++ b/libavformat/avformat.h > @@ -499,6 +499,7 @@ typedef struct AVProbeData { > The user or muxer can override this > through > AVFormatContext.avoid_negative_ts > */ > +#define AVFMT_START_REALTIME 0x8 /**< Demuxer sets start_time_realtime */ This is incorrect: If this were merged, start_time_realtime would always be set by libavformat; whether it is done by the generic code or by demuxer specific code is an implementation detail and irrelevant for the user. Given that it is also set for demuxers without this flag actually means that libavformat lies when using a demuxer without this flag. > > #define AVFMT_SEEK_TO_PTS 0x400 /**< Seeking is based on PTS */ > > diff --git a/libavformat/demux.c b/libavformat/demux.c > index 1620716716..28c6966e71 100644 > --- a/libavformat/demux.c > +++ b/libavformat/demux.c > @@ -628,6 +628,9 @@ FF_ENABLE_DEPRECATION_WARNINGS > > force_codec_ids(s, st); > > +if (!(s->iformat->flags & AVFMT_START_REALTIME) && > s->start_time_realtime == AV_NOPTS_VALUE) > +s->start_time_realtime = av_gettime(); > + > /* TODO: audio: time filter; video: frame reordering (pts != dts) */ > if (s->use_wallclock_as_timestamps) > pkt->dts = pkt->pts = av_rescale_q(av_gettime(), AV_TIME_BASE_Q, > st->time_base); > diff --git a/libavformat/rtsp.c b/libavformat/rtsp.c > index 88e9ef5226..2cafe33887 100644 > --- a/libavformat/rtsp.c > +++ b/libavformat/rtsp.c > @@ -2489,6 +2489,7 @@ const AVInputFormat ff_sdp_demuxer = { > .read_header= sdp_read_header, > .read_packet= ff_rtsp_fetch_packet, > .read_close = sdp_read_close, > +.flags = AVFMT_START_REALTIME, > .priv_class = &sdp_demuxer_class, > }; > #endif /* CONFIG_SDP_DEMUXER */ > @@ -2648,7 +2649,7 @@ const AVInputFormat ff_rtp_demuxer = { > .read_header= rtp_read_header, > .read_packet= ff_rtsp_fetch_packet, > .read_close = sdp_read_close, > -.flags = AVFMT_NOFILE, > +.flags = AVFMT_NOFILE | AVFMT_START_REALTIME, > .priv_class = &rtp_demuxer_class, > }; > #endif /* CONFIG_RTP_DEMUXER */ > diff --git a/libavformat/version.h b/libavformat/version.h > index 966ebb7ed3..0708d619c0 100644 > --- a/libavformat/version.h > +++ b/libavformat/version.h > @@ -31,7 +31,7 @@ > > #include "version_major.h" > > -#define LIBAVFORMAT_VERSION_MINOR 25 > +#define LIBAVFORMAT_VERSION_MINOR 26 > #define LIBAVFORMAT_VERSION_MICRO 100 > > #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] avcodec/qsvenc: make QSV encoder encode VAAPI and D3D11 frames directly
> > -Original Message- > > From: ffmpeg-devel On Behalf Of > > Xiang, Haihao > > Sent: Thursday, June 9, 2022 8:48 AM > > To: ffmpeg-devel@ffmpeg.org > > Cc: Wu, Tong1 ; Chen, Wenbin > > > > Subject: Re: [FFmpeg-devel] [PATCH] avcodec/qsvenc: make QSV encoder > > encode VAAPI and D3D11 frames directly > > > > On Wed, 2022-06-08 at 11:13 +, Soft Works wrote: > > > > -Original Message- > > > > From: ffmpeg-devel On Behalf Of > > Xiang, > > > > Haihao > > > > Sent: Wednesday, June 8, 2022 10:42 AM > > > > To: ffmpeg-devel@ffmpeg.org > > > > Cc: Wu, Tong1 ; Chen, Wenbin > > > > > > Subject: Re: [FFmpeg-devel] [PATCH] avcodec/qsvenc: make QSV > > encoder encode > > > > VAAPI and D3D11 frames directly > > > > > > > > On Wed, 2022-06-08 at 05:08 +, Soft Works wrote: > > > > > > -Original Message- > > > > > > From: ffmpeg-devel On > > Behalf Of Tong > > > > > > Wu > > > > > > Sent: Tuesday, June 7, 2022 11:22 AM > > > > > > To: ffmpeg-devel@ffmpeg.org > > > > > > Cc: Tong Wu ; Wenbin Chen > > > > > > > > Subject: [FFmpeg-devel] [PATCH] avcodec/qsvenc: make QSV > > encoder encode > > > > > > VAAPI > > > > > > and D3D11 frames directly > > > > > > [..] > > > > > > > > > 2.35.1.windows.2 > > > > > > > > > > Hi, > > > > > > > > > > thanks for submitting this patch. Though, I'm afraid, but this > > > > > > > > > > - fundamentally contradicts the logic of ffmpeg's handling of > > hw > > > > > > > > acceleration, > > > > > hw device and hw frames contexts > > > > > - adds code to an encoder, doing things an encoder is not > > supposed to do- > > > > > > > > qsv > > > > > encoders and decoders have their own context => QSV > > > > > > > > nvdec and nvenc have CUDA but nvenc can also support D3D11va, it > > sounds make > > > > sense for me to support D3D11va/vaapi in qsvenc too as > > d3d11va/vaapi are > > > > used > > > > internally in MediaSDK. > > > > > > Can you please post a command line showing nvenc working with input > > > from a D3D11VA decoder and without using any > > hwmap/hwupload/hwdownload > > > filters? > > > > > > > According to the code below, nvenc may accept d3d11 frames directly, > > > > https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/nvenc.c#L46- > > L72 > > > > so the command below should work > > > > $> ffmpeg -y -hwaccel_output_format d3d11 -hwaccel d3d11va -i > > input.mp4 -c:v > > hevc_nvenc out.mp4 > > Right, it does work. Thanks for the command, I had tried like that before, but > in a "wrong" branch. > > > Now I took a bit of a deeper look into it and the ability of NVENC to encode > from plain D3D11 frames. There are quite a few differences between NVENC > and QSVENC. > > > HW Frames Contexts > -- > > QSVENV > > MSDK cannot work with VAAPI frames, D3D9 frames or D3D11 frames > directly. > An application is always required to wrap such frames via mfxSurface and > manage a collection of mfxSurface descriptions. > It's an abstraction that allows coding against the MSDK API independent from > the underlying technology. > The technical representation of this in ffmpeg is the QSVFramesContext. > When there's an input of plain VAAPI or D3D11 frames (hw frames context), > then it is required to derive a new QSVFramesContext from the input hw > frames context (e.g. via hwmap filter) where the procedure of deriving > means to set up a new QSVFramesContext which does the required wrapping > (or "mapping" as ffmpeg calls it). > > I think that the way how this logic is reflected in ffmpeg is thought out very > well and provides a high degree of flexibility. > > > NVENC > > The situation is very different here. Nvidia provides platform independency > not by wrapping platform-specific GPU frame types, but instead uses its own > custom type - CUDA memory/frames. This is what decoders are outputting, > filters are using for input/output and encoders take as input. > > What I do not know, is whether it would be possible to map D3D11 frames to > CUDA frames and vice versa. In case, that would be the preferable way IMO > to deal with different hw frame types. > At least this isn't implemented at this time. The only possible frames > derivation/mapping is from and to Vulkan. > > Hence I can't say whether the NVENC implementation to take D3D11 frames > directly has been done out of laziness or whether it was the only possible > way. In case when it wouldn't be possible to map D3D11 frames to CUDA > frames, and only NVENC encoders would be able to process D3D11 frames, > then it would have been the only option of course. > > But in any way, it's not the same as with QSVENC, because NVENC can take > D3D11 frames as input directly without wrapping/mapping first. > > > > There are more differences, but I don't want to drive it too far. > > What stands at the bottom line is: > > - NVENC can take D3D11 frames context directly > - QSVENC can't - it needs to map it to a QSVFramesContext first > > > Concluding opinion: > >
Re: [FFmpeg-devel] [PATCH] avcodec/qsvenc: make QSV encoder encode VAAPI and D3D11 frames directly
> Quoting Wu, Tong1 (2022-06-08 06:47:27) > > > > > > Quoting Tong Wu (2022-06-07 11:22:16) > > > > QSV encoder is able to encode frames with VAAPI or D3D11 pixel > > > > format directly. This patch adds support for qsv encoder to accept > > > > VAAPI and > > > > D3D11 pixel formats as input. > > > > > > This looks like an ad-hoc hack to me. Encoders should not do these > > > kinds of tricks. > > > > > > -- > > > Anton Khirnov > > > > Thanks for the comments. The MFXSurface is based on VaSurface on Linux > > and D3D texture on Windows. Since the QSV encoder can accept > > AV_PIX_FMT_QSV as input, it seems kind of reasonable to accept VAAPI > > and D3D as its input. And it just may not look like a 'real' trick, > > let's say, for example, make QSV encoder accept VULKAN format > > directly. By adding this patch, we just want QSV encoder have more > > input format supports like what nvenc does. > > The difference with nvenc is that the nvenc API actually supports d3d > textures directly, our encoder wrapper merely passes them through. > > Your patch, on the other hand, derives a new device inside the decoder. > The intent behind the hwcontext interface is that such operations should be > left to the library caller, and are actually quite easy to do. So I don't see > why > is this patch really needed. > > > Plus, this patch can really help the users who have hybrid transcode needs. > > Could you elaborate? How would this patch be useful in this specific case. > Why can't the callers dervice the device themselves? > > -- > Anton Khirnov It looks easier and more convenient for the users because they don't derive them manually. But yes, I'm convinced that it may be not the work that an encoder should do. Thanks for the comments. Regards, Tong ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] avformat: set start_time_realtime when demuxing
On 2022-06-15 03:18 am, Andreas Rheinhardt wrote: Gyan Doshi: Add new flag AVFMT_START_REALTIME for the couple of demuxers that set it internally. --- doc/APIchanges | 3 +++ libavformat/avformat.h | 1 + libavformat/demux.c| 3 +++ libavformat/rtsp.c | 3 ++- libavformat/version.h | 2 +- 5 files changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/APIchanges b/doc/APIchanges index 5857e67ae6..4aa9e36777 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -14,6 +14,9 @@ libavutil: 2021-04-27 API changes, most recent first: +2022-06-xx - xx - lavf 59.26.100 - avformat.h + Add AVFMT_START_REALTIME flag. + 2022-06-12 - xx - lavf 59.25.100 - avio.h Add avio_vprintf(), similar to avio_printf() but allow to use it from within a function taking a variable argument list as input. diff --git a/libavformat/avformat.h b/libavformat/avformat.h index f12fa7d904..4e247d68fc 100644 --- a/libavformat/avformat.h +++ b/libavformat/avformat.h @@ -499,6 +499,7 @@ typedef struct AVProbeData { The user or muxer can override this through AVFormatContext.avoid_negative_ts */ +#define AVFMT_START_REALTIME 0x8 /**< Demuxer sets start_time_realtime */ This is incorrect: If this were merged, start_time_realtime would always be set by libavformat; whether it is done by the generic code or by demuxer specific code is an implementation detail and irrelevant for the user. Given that it is also set for demuxers without this flag actually means that libavformat lies when using a demuxer without this flag. This is moot since I'm switching to a new field but would either of these satisfy you? s/Demuxer/Format/ or /**< start_time_realtime is populated with conveyed origin wallclock timestamp */ Regards, Gyan ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] avformat: set start_time_realtime when demuxing
Gyan Doshi: > > > On 2022-06-15 03:18 am, Andreas Rheinhardt wrote: >> Gyan Doshi: >>> Add new flag AVFMT_START_REALTIME for the couple of demuxers that set it >>> internally. >>> --- >>> doc/APIchanges | 3 +++ >>> libavformat/avformat.h | 1 + >>> libavformat/demux.c | 3 +++ >>> libavformat/rtsp.c | 3 ++- >>> libavformat/version.h | 2 +- >>> 5 files changed, 10 insertions(+), 2 deletions(-) >>> >>> diff --git a/doc/APIchanges b/doc/APIchanges >>> index 5857e67ae6..4aa9e36777 100644 >>> --- a/doc/APIchanges >>> +++ b/doc/APIchanges >>> @@ -14,6 +14,9 @@ libavutil: 2021-04-27 >>> API changes, most recent first: >>> +2022-06-xx - xx - lavf 59.26.100 - avformat.h >>> + Add AVFMT_START_REALTIME flag. >>> + >>> 2022-06-12 - xx - lavf 59.25.100 - avio.h >>> Add avio_vprintf(), similar to avio_printf() but allow to use it >>> from within a function taking a variable argument list as input. >>> diff --git a/libavformat/avformat.h b/libavformat/avformat.h >>> index f12fa7d904..4e247d68fc 100644 >>> --- a/libavformat/avformat.h >>> +++ b/libavformat/avformat.h >>> @@ -499,6 +499,7 @@ typedef struct AVProbeData { >>> The user or muxer can >>> override this through >>> >>> AVFormatContext.avoid_negative_ts >>> */ >>> +#define AVFMT_START_REALTIME 0x8 /**< Demuxer sets >>> start_time_realtime */ >> This is incorrect: If this were merged, start_time_realtime would always >> be set by libavformat; whether it is done by the generic code or by >> demuxer specific code is an implementation detail and irrelevant for the >> user. Given that it is also set for demuxers without this flag actually >> means that libavformat lies when using a demuxer without this flag. > > This is moot since I'm switching to a new field but would either of > these satisfy you? > > s/Demuxer/Format/ > > or > > /**< start_time_realtime is populated with conveyed origin wallclock > timestamp */ > Of course not. My point was not about the semantics of the field being populated, but about the flag and the fact that the field was populated for demuxers that don't have this flag set. To remedy this, you should stop setting start_time_realtime for demuxers that don't have this flag set. - Andreas ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/2] avutil/cpu_internal: Fix check for SSE2SLOW
For SSE2 and SSE3, there are four states that the two flags involved (AV_CPU_FLAG_SSE[23] and AV_CPU_FLAG_SSE[23]SLOW) can convey. When ordered from worst to best they are: 1. both flags unset (SSE[23] unavailable) 2. the slow flag set, the ordinary flag unset (this is designed for cases where SSE2 is available, but so slow that MMX(EXT)/SSE code is usually faster) 3. both flags set (SSE2 is available, but there might be scenarios where MMX(EXT)/SSE code is faster) 4. the ordinary flag set, the slow flag unset (this is the normal case) The ordinary macros for checking cpuflags return true in the latter two cases; the fast macros only return true for the latter case. Yet the macros to check for slow currently only return true in case three. This seems unintended. In fact, the only uses of the slow macros are all of the form if (EXTERNAL_SSE2(cpu_flags) || EXTERNAL_SSE2_SLOW(cpu_flags)) where the check for EXTERNAL_SSE2_SLOW is completely redundant. Even more importantly, it is not what was intended. Before 6369ba3c9cc74becfaad2a8882dff3dd3e7ae3c0, the checks passed in cases 2 to 4. Said commit changed this to something that only passes for the third case. Commits 7fb758cd8ed08e4a37f10e25003953d13c68b8cd and c1913064e38cb338039f29c280a0dacc3fd1e451 restored the old behaviour, yet merging 4efab89332ea39a77145e8b15562b981d9dbde68 (in commit ac774cfa571734c49c26e2d3387adccff8957ff8) broke this again by changing it to what it is now.* This commit changes the macros to make the slow macros check whether a specific instruction is supported, even if slow. This restores the intended meaning to all uses of the SLOW macros and is generally more natural. *: Libav only checks for EXTERNAL_SSE2_SLOW, i.e. for the third case only. Signed-off-by: Andreas Rheinhardt --- libavutil/cpu_internal.h | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h index e207b2d480..650d47fc96 100644 --- a/libavutil/cpu_internal.h +++ b/libavutil/cpu_internal.h @@ -30,12 +30,15 @@ (HAVE_ ## cpuext ## suffix && ((flags) & AV_CPU_FLAG_ ## cpuext) && \ !((flags) & AV_CPU_FLAG_ ## slow_cpuext ## SLOW)) +#define CPUEXT_SUFFIX_SLOW(flags, suffix, cpuext) \ +(HAVE_ ## cpuext ## suffix && \ + ((flags) & (AV_CPU_FLAG_ ## cpuext | AV_CPU_FLAG_ ## cpuext ## SLOW))) + #define CPUEXT_SUFFIX_SLOW2(flags, suffix, cpuext, slow_cpuext) \ (HAVE_ ## cpuext ## suffix && ((flags) & AV_CPU_FLAG_ ## cpuext) && \ - ((flags) & AV_CPU_FLAG_ ## slow_cpuext ## SLOW)) + ((flags) & (AV_CPU_FLAG_ ## slow_cpuext | AV_CPU_FLAG_ ## slow_cpuext ## SLOW))) #define CPUEXT_SUFFIX_FAST(flags, suffix, cpuext) CPUEXT_SUFFIX_FAST2(flags, suffix, cpuext, cpuext) -#define CPUEXT_SUFFIX_SLOW(flags, suffix, cpuext) CPUEXT_SUFFIX_SLOW2(flags, suffix, cpuext, cpuext) #define CPUEXT(flags, cpuext) CPUEXT_SUFFIX(flags, , cpuext) #define CPUEXT_FAST(flags, cpuext) CPUEXT_SUFFIX_FAST(flags, , cpuext) -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/2] avcodec/x86/lpc, vp8dsp_init: Remove redundant checks
EXTERNAL_SSE2_SLOW is now more encompassing than EXTERNAL_SSE2. Signed-off-by: Andreas Rheinhardt --- libavcodec/x86/lpc.c | 2 +- libavcodec/x86/vp8dsp_init.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libavcodec/x86/lpc.c b/libavcodec/x86/lpc.c index 544083bd37..40fc29fc0f 100644 --- a/libavcodec/x86/lpc.c +++ b/libavcodec/x86/lpc.c @@ -154,7 +154,7 @@ av_cold void ff_lpc_init_x86(LPCContext *c) #if HAVE_SSE2_INLINE int cpu_flags = av_get_cpu_flags(); -if (INLINE_SSE2(cpu_flags) || INLINE_SSE2_SLOW(cpu_flags)) { +if (INLINE_SSE2_SLOW(cpu_flags)) { c->lpc_apply_welch_window = lpc_apply_welch_window_sse2; c->lpc_compute_autocorr = lpc_compute_autocorr_sse2; } diff --git a/libavcodec/x86/vp8dsp_init.c b/libavcodec/x86/vp8dsp_init.c index 289f1d5115..0238898e58 100644 --- a/libavcodec/x86/vp8dsp_init.c +++ b/libavcodec/x86/vp8dsp_init.c @@ -348,7 +348,7 @@ av_cold void ff_vp78dsp_init_x86(VP8DSPContext *c) c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse; } -if (EXTERNAL_SSE2(cpu_flags) || EXTERNAL_SSE2_SLOW(cpu_flags)) { +if (EXTERNAL_SSE2_SLOW(cpu_flags)) { VP8_LUMA_MC_FUNC(0, 16, sse2); VP8_MC_FUNC(1, 8, sse2); VP8_BILINEAR_MC_FUNC(0, 16, sse2); @@ -418,7 +418,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext *c) c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_sse; } -if (EXTERNAL_SSE2(cpu_flags) || EXTERNAL_SSE2_SLOW(cpu_flags)) { +if (EXTERNAL_SSE2_SLOW(cpu_flags)) { c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2; -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] avformat/mpegts: remove obsolate hacks for detecting streams with bad PMTs
Ffmpeg/ffprobe/ffplay sets scan_all_pmts to 1 when finding the streams, that should be enough to handle files for which some early PMTs miss some streams. Fixes ticket #9782. Signed-off-by: Marton Balint --- libavformat/mpegts.c | 12 ++-- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c index 6e761c07f1..8a3436f2be 100644 --- a/libavformat/mpegts.c +++ b/libavformat/mpegts.c @@ -2870,16 +2870,8 @@ static int handle_packet(MpegTSContext *ts, const uint8_t *packet, int64_t pos) break; } if (i == ts->nb_prg && ts->nb_prg > 0) { -int types = 0; -for (i = 0; i < ts->stream->nb_streams; i++) { -AVStream *st = ts->stream->streams[i]; -if (st->codecpar->codec_type >= 0) -types |= 1codec_type; -} -if ((types & (1< 10) { -av_log(ts->stream, AV_LOG_DEBUG, "All programs have pmt, headers found\n"); -ts->stream->ctx_flags &= ~AVFMTCTX_NOHEADER; -} +av_log(ts->stream, AV_LOG_DEBUG, "All programs have pmt, headers found\n"); +ts->stream->ctx_flags &= ~AVFMTCTX_NOHEADER; } } -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] avformat/matroskaenc: Convert chapter metadata
It is no longer converted since mkv_write_chapters() is called before mkv_write_tags() which happens since commit 4ebfc13c338423cf48f1a1266c890422367f7775. Given the fact that chapters can also be written late, mkv_write_chapters() has to convert the metadata itself. Fixes ticket #9812. Signed-off-by: Andreas Rheinhardt --- Will apply this tomorrow unless there are objections. libavformat/matroskaenc.c | 16 ++-- 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c index 482b5812e5..2211d99ae8 100644 --- a/libavformat/matroskaenc.c +++ b/libavformat/matroskaenc.c @@ -2094,7 +2094,7 @@ static int mkv_write_chapters(AVFormatContext *s) create_new_ids = mkv_new_chapter_ids_needed(s); for (unsigned i = 0; i < s->nb_chapters; i++) { -const AVChapter *c = s->chapters[i]; +AVChapter *const c = s->chapters[i]; int64_t chapterstart = av_rescale_q(c->start, c->time_base, scale); int64_t chapterend = av_rescale_q(c->end, c->time_base, scale); const AVDictionaryEntry *t; @@ -2122,11 +2122,15 @@ static int mkv_write_chapters(AVFormatContext *s) if (ret < 0) goto fail; -if (tags && mkv_check_tag(c->metadata, MATROSKA_ID_TAGTARGETS_CHAPTERUID)) { -ret = mkv_write_tag(mkv, c->metadata, tags, NULL, -MATROSKA_ID_TAGTARGETS_CHAPTERUID, uid); -if (ret < 0) -goto fail; +if (tags) { +ff_metadata_conv(&c->metadata, ff_mkv_metadata_conv, NULL); + +if (mkv_check_tag(c->metadata, MATROSKA_ID_TAGTARGETS_CHAPTERUID)) { +ret = mkv_write_tag(mkv, c->metadata, tags, NULL, +MATROSKA_ID_TAGTARGETS_CHAPTERUID, uid); +if (ret < 0) +goto fail; +} } } end_ebml_master(dyn_cp, editionentry); -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] avformat: set start_time_realtime when demuxing
Add new flag AVFMT_START_REALTIME for the couple of demuxers that set it internally. --- doc/APIchanges | 3 +++ libavformat/avformat.h | 1 + libavformat/demux.c| 3 +++ libavformat/rtsp.c | 3 ++- libavformat/version.h | 2 +- 5 files changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/APIchanges b/doc/APIchanges index 5857e67ae6..4aa9e36777 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -14,6 +14,9 @@ libavutil: 2021-04-27 API changes, most recent first: +2022-06-xx - xx - lavf 59.26.100 - avformat.h + Add AVFMT_START_REALTIME flag. + 2022-06-12 - xx - lavf 59.25.100 - avio.h Add avio_vprintf(), similar to avio_printf() but allow to use it from within a function taking a variable argument list as input. diff --git a/libavformat/avformat.h b/libavformat/avformat.h index f12fa7d904..4e247d68fc 100644 --- a/libavformat/avformat.h +++ b/libavformat/avformat.h @@ -499,6 +499,7 @@ typedef struct AVProbeData { The user or muxer can override this through AVFormatContext.avoid_negative_ts */ +#define AVFMT_START_REALTIME 0x8 /**< Demuxer sets start_time_realtime */ #define AVFMT_SEEK_TO_PTS 0x400 /**< Seeking is based on PTS */ diff --git a/libavformat/demux.c b/libavformat/demux.c index 1620716716..28c6966e71 100644 --- a/libavformat/demux.c +++ b/libavformat/demux.c @@ -628,6 +628,9 @@ FF_ENABLE_DEPRECATION_WARNINGS force_codec_ids(s, st); +if (!(s->iformat->flags & AVFMT_START_REALTIME) && s->start_time_realtime == AV_NOPTS_VALUE) +s->start_time_realtime = av_gettime(); + /* TODO: audio: time filter; video: frame reordering (pts != dts) */ if (s->use_wallclock_as_timestamps) pkt->dts = pkt->pts = av_rescale_q(av_gettime(), AV_TIME_BASE_Q, st->time_base); diff --git a/libavformat/rtsp.c b/libavformat/rtsp.c index 88e9ef5226..2cafe33887 100644 --- a/libavformat/rtsp.c +++ b/libavformat/rtsp.c @@ -2489,6 +2489,7 @@ const AVInputFormat ff_sdp_demuxer = { .read_header= sdp_read_header, .read_packet= ff_rtsp_fetch_packet, .read_close = sdp_read_close, +.flags = AVFMT_START_REALTIME, .priv_class = &sdp_demuxer_class, }; #endif /* CONFIG_SDP_DEMUXER */ @@ -2648,7 +2649,7 @@ const AVInputFormat ff_rtp_demuxer = { .read_header= rtp_read_header, .read_packet= ff_rtsp_fetch_packet, .read_close = sdp_read_close, -.flags = AVFMT_NOFILE, +.flags = AVFMT_NOFILE | AVFMT_START_REALTIME, .priv_class = &rtp_demuxer_class, }; #endif /* CONFIG_RTP_DEMUXER */ diff --git a/libavformat/version.h b/libavformat/version.h index 966ebb7ed3..0708d619c0 100644 --- a/libavformat/version.h +++ b/libavformat/version.h @@ -31,7 +31,7 @@ #include "version_major.h" -#define LIBAVFORMAT_VERSION_MINOR 25 +#define LIBAVFORMAT_VERSION_MINOR 26 #define LIBAVFORMAT_VERSION_MICRO 100 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \ -- 2.36.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] avformat: set start_time_realtime when demuxing
On Tue, 14 Jun 2022, Gyan Doshi wrote: Add new flag AVFMT_START_REALTIME for the couple of demuxers that set it internally. I don't think this is the same semantics. start_time_realtime is the time when the stream was originally captured, and not when the stream was received. So you can't simply use av_gettime() in the demuxers for that field. Regards. Marton --- doc/APIchanges | 3 +++ libavformat/avformat.h | 1 + libavformat/demux.c| 3 +++ libavformat/rtsp.c | 3 ++- libavformat/version.h | 2 +- 5 files changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/APIchanges b/doc/APIchanges index 5857e67ae6..4aa9e36777 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -14,6 +14,9 @@ libavutil: 2021-04-27 API changes, most recent first: +2022-06-xx - xx - lavf 59.26.100 - avformat.h + Add AVFMT_START_REALTIME flag. + 2022-06-12 - xx - lavf 59.25.100 - avio.h Add avio_vprintf(), similar to avio_printf() but allow to use it from within a function taking a variable argument list as input. diff --git a/libavformat/avformat.h b/libavformat/avformat.h index f12fa7d904..4e247d68fc 100644 --- a/libavformat/avformat.h +++ b/libavformat/avformat.h @@ -499,6 +499,7 @@ typedef struct AVProbeData { The user or muxer can override this through AVFormatContext.avoid_negative_ts */ +#define AVFMT_START_REALTIME 0x8 /**< Demuxer sets start_time_realtime */ #define AVFMT_SEEK_TO_PTS 0x400 /**< Seeking is based on PTS */ diff --git a/libavformat/demux.c b/libavformat/demux.c index 1620716716..28c6966e71 100644 --- a/libavformat/demux.c +++ b/libavformat/demux.c @@ -628,6 +628,9 @@ FF_ENABLE_DEPRECATION_WARNINGS force_codec_ids(s, st); +if (!(s->iformat->flags & AVFMT_START_REALTIME) && s->start_time_realtime == AV_NOPTS_VALUE) +s->start_time_realtime = av_gettime(); + /* TODO: audio: time filter; video: frame reordering (pts != dts) */ if (s->use_wallclock_as_timestamps) pkt->dts = pkt->pts = av_rescale_q(av_gettime(), AV_TIME_BASE_Q, st->time_base); diff --git a/libavformat/rtsp.c b/libavformat/rtsp.c index 88e9ef5226..2cafe33887 100644 --- a/libavformat/rtsp.c +++ b/libavformat/rtsp.c @@ -2489,6 +2489,7 @@ const AVInputFormat ff_sdp_demuxer = { .read_header= sdp_read_header, .read_packet= ff_rtsp_fetch_packet, .read_close = sdp_read_close, +.flags = AVFMT_START_REALTIME, .priv_class = &sdp_demuxer_class, }; #endif /* CONFIG_SDP_DEMUXER */ @@ -2648,7 +2649,7 @@ const AVInputFormat ff_rtp_demuxer = { .read_header= rtp_read_header, .read_packet= ff_rtsp_fetch_packet, .read_close = sdp_read_close, -.flags = AVFMT_NOFILE, +.flags = AVFMT_NOFILE | AVFMT_START_REALTIME, .priv_class = &rtp_demuxer_class, }; #endif /* CONFIG_RTP_DEMUXER */ diff --git a/libavformat/version.h b/libavformat/version.h index 966ebb7ed3..0708d619c0 100644 --- a/libavformat/version.h +++ b/libavformat/version.h @@ -31,7 +31,7 @@ #include "version_major.h" -#define LIBAVFORMAT_VERSION_MINOR 25 +#define LIBAVFORMAT_VERSION_MINOR 26 #define LIBAVFORMAT_VERSION_MICRO 100 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \ -- 2.36.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] avformat: set start_time_realtime when demuxing
On 2022-06-14 01:51 pm, Marton Balint wrote: On Tue, 14 Jun 2022, Gyan Doshi wrote: Add new flag AVFMT_START_REALTIME for the couple of demuxers that set it internally. I don't think this is the same semantics. start_time_realtime is the time when the stream was originally captured, and not when the stream was received. So you can't simply use av_gettime() in the demuxers for that field. I saw that in the rtsp read_packet. I thought of adding a new field but think the semantics are close enough to serve the purpose of a rough sync so I added the flag for the demuxers that receive origin timestamp. I can update the doxy to record this difference. Or should I add a new field? Regards, Gyan ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] doc/APIchanges: add missing marker for release 5.0
--- doc/APIchanges | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/APIchanges b/doc/APIchanges index 5857e67ae6..20b944933a 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -96,6 +96,8 @@ API changes, most recent first: 2022-01-26 - af94ab7c7c0 - lavu 57.19.100 - tx.h Add AV_TX_FLOAT_RDFT, AV_TX_DOUBLE_RDFT and AV_TX_INT32_RDFT. + 8< - FFmpeg 5.0 was cut here 8< - + 2022-01-04 - 78dc21b123e - lavu 57.16.100 - frame.h Add AV_FRAME_DATA_DOVI_METADATA. -- 2.36.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] all: Replace if (ARCH_FOO) checks by #if ARCH_FOO
Andreas Rheinhardt: > This is more spec-compliant because it does not rely > on dead-code elimination by the compiler. Especially > MSVC has problems with this, as can be seen in > https://ffmpeg.org/pipermail/ffmpeg-devel/2022-May/296373.html > or > https://ffmpeg.org/pipermail/ffmpeg-devel/2022-May/297022.html > > This commit does not eliminate every instance where we rely > on the dead code elimination: It only tackles branching to > the initialization of arch-specific dsp code, not e.g. all > uses of CONFIG_ and HAVE_ checks. But maybe it is already > enough to compile FFmpeg with MSVC with whole-programm-optimizations > enabled (if one does not disable too many components). > > Signed-off-by: Andreas Rheinhardt > --- > libavcodec/aacdec_template.c | 5 ++-- > libavcodec/aacenc.c | 5 ++-- > libavcodec/aacpsdsp_template.c| 17 ++- > libavcodec/aacsbr_template.c | 5 ++-- > libavcodec/ac3dsp.c | 18 ++- > libavcodec/alacdsp.c | 5 ++-- > libavcodec/audiodsp.c | 13 > libavcodec/blockdsp.c | 21 ++--- > libavcodec/bswapdsp.c | 5 ++-- > libavcodec/cavsdsp.c | 5 ++-- > libavcodec/cfhddsp.c | 5 ++-- > libavcodec/cfhdencdsp.c | 5 ++-- > libavcodec/dcadsp.c | 5 ++-- > libavcodec/dct.c | 5 ++-- > libavcodec/dirac_dwt.c| 4 ++- > libavcodec/diracdsp.c | 5 ++-- > libavcodec/dnxhdenc.c | 5 ++-- > libavcodec/exrdsp.c | 5 ++-- > libavcodec/fdctdsp.c | 9 +++--- > libavcodec/fft_template.c | 13 +--- > libavcodec/flacdsp.c | 9 +++--- > libavcodec/fmtconvert.c | 17 ++- > libavcodec/g722dsp.c | 9 +++--- > libavcodec/h263dsp.c | 9 +++--- > libavcodec/h264chroma.c | 25 > libavcodec/h264dsp.c | 19 > libavcodec/h264pred.c | 21 ++--- > libavcodec/h264qpel.c | 25 > libavcodec/hevcdsp.c | 25 > libavcodec/hevcpred.c | 5 ++-- > libavcodec/hpeldsp.c | 29 +- > libavcodec/huffyuvdsp.c | 5 ++-- > libavcodec/huffyuvencdsp.c| 5 ++-- > libavcodec/idctdsp.c | 38 --- > libavcodec/jpeg2000dsp.c | 5 ++-- > libavcodec/lossless_audiodsp.c| 13 > libavcodec/lossless_videodsp.c| 9 +++--- > libavcodec/lossless_videoencdsp.c | 5 ++-- > libavcodec/lpc.c | 5 ++-- > libavcodec/mdct15.c | 5 ++-- > libavcodec/me_cmp.c | 21 ++--- > libavcodec/mlpdsp.c | 9 +++--- > libavcodec/mpegaudiodsp.c | 18 +++ > libavcodec/mpegvideo.c| 21 ++--- > libavcodec/mpegvideo_enc.c| 5 ++-- > libavcodec/mpegvideodsp.c | 9 +++--- > libavcodec/mpegvideoencdsp.c | 17 ++- > libavcodec/opus_pvq.c | 5 ++-- > libavcodec/opusdsp.c | 10 +++ > libavcodec/pixblockdsp.c | 25 > libavcodec/pngdsp.c | 5 ++-- > libavcodec/proresdsp.c| 5 ++-- > libavcodec/qpeldsp.c | 9 +++--- > libavcodec/rdft.c | 4 ++- > libavcodec/rv34dsp.c | 9 +++--- > libavcodec/rv40dsp.c | 13 > libavcodec/sbcdsp.c | 9 +++--- > libavcodec/sbrdsp_template.c | 17 ++- > libavcodec/svq1enc.c | 9 +++--- > libavcodec/synth_filter.c | 13 > libavcodec/takdsp.c | 5 ++-- > libavcodec/ttadsp.c | 5 ++-- > libavcodec/ttaencdsp.c| 5 ++-- > libavcodec/utvideodsp.c | 5 ++-- > libavcodec/v210dec_init.h | 5 ++-- > libavcodec/v210enc_init.h | 5 ++-- > libavcodec/vc1dsp.c | 25 > libavcodec/videodsp.c | 25 > libavcodec/vorbisdsp.c| 17 ++- > libavcodec/vp3dsp.c | 17 ++- > libavcodec/vp56dsp.c | 9 +++--- > libavcodec/vp8dsp.c | 38 --- > libavcodec/vp9dsp.c | 16 ++ > libavcodec/wmv2dsp.c | 5 ++-- > libavcodec/x86/mdct15_init.c | 6 ++-- > libavcodec/xvididct.c | 9 +++--- > libavfilter/af_afirdsp.h | 5 ++-- > libavfilter/af_anlmdn.c | 5 ++-- > libavfilter/af_volume.c | 5 ++-- > libavfilter/avf_showcqt.c | 5 ++-- > libavfilter/colorspacedsp.c | 5 ++-- > libavfilter/scene_sad.c | 5 ++-- > libavfilter/vf_atadenoise.c | 5 ++-- > libavfilter/vf_blend_init.h | 5 ++-- > libavfilter/vf_bwdif.c|
[FFmpeg-devel] [PATCH 01/13] lavc/jpeg2000dec: Finer granularity threading
Patch 12 in this series is optional since it's just me getting the speed up on a specific machine /Tomas From 115aa26c343419e81c1b5ba0bfdb1615cbec27e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= Date: Fri, 10 Jun 2022 14:10:02 +0200 Subject: [PATCH 01/13] lavc/jpeg2000dec: Finer granularity threading Decoding and dequant is now threaded on codeblock level. IDWT is threaded on component level. MCT and write_frame() remain threaded on tile level. This brings lossless 4K J2K with -lowres 2 -thread_type slice -threads 96 on an AMD EPYC 7R32 from 4.8 fps (177% CPU) to 31 fps (1284% CPU). --- libavcodec/jpeg2000dec.c | 196 --- 1 file changed, 142 insertions(+), 54 deletions(-) diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c index 92966b11f5..d9754fc50e 100644 --- a/libavcodec/jpeg2000dec.c +++ b/libavcodec/jpeg2000dec.c @@ -92,6 +92,15 @@ typedef struct Jpeg2000Tile { int coord[2][2];// border coordinates {{x0, x1}, {y0, y1}} } Jpeg2000Tile; +typedef struct Jpeg2000IdwtThread { +int cb_start, cb_end; +} Jpeg2000IdwtThread; + +typedef struct Jpeg2000CodeblockThread { +int tileno, compno, reslevelno, bandno, precno, cblkno; +int coded; +} Jpeg2000CodeblockThread; + typedef struct Jpeg2000DecoderContext { AVClass *class; AVCodecContext *avctx; @@ -136,6 +145,11 @@ typedef struct Jpeg2000DecoderContext { /*options parameters*/ int reduction_factor; + +Jpeg2000IdwtThread *idwt; +unsigned int idwt_size; +Jpeg2000CodeblockThread *cb; +unsigned int cb_size; } Jpeg2000DecoderContext; /* get_bits functions for JPEG2000 packet bitstream @@ -1937,54 +1951,33 @@ static inline void roi_scale_cblk(Jpeg2000Cblk *cblk, } } -static inline void tile_codeblocks(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile) +static int jpeg2000_decode_cb(AVCodecContext *avctx, void *td, + int jobnr, int threadnr) { Jpeg2000T1Context t1; - -int compno, reslevelno, bandno; - -/* Loop on tile components */ -for (compno = 0; compno < s->ncomponents; compno++) { -Jpeg2000Component *comp = tile->comp + compno; -Jpeg2000CodingStyle *codsty = tile->codsty + compno; -int coded = 0; - -t1.stride = (1nreslevels2decode; reslevelno++) { -Jpeg2000ResLevel *rlevel = comp->reslevel + reslevelno; -/* Loop on bands */ -for (bandno = 0; bandno < rlevel->nbands; bandno++) { -int nb_precincts, precno; -Jpeg2000Band *band = rlevel->band + bandno; -int cblkno = 0, bandpos; - -bandpos = bandno + (reslevelno > 0); - -if (band->coord[0][0] == band->coord[0][1] || -band->coord[1][0] == band->coord[1][1]) -continue; - -nb_precincts = rlevel->num_precincts_x * rlevel->num_precincts_y; -/* Loop on precincts */ -for (precno = 0; precno < nb_precincts; precno++) { -Jpeg2000Prec *prec = band->prec + precno; - -/* Loop on codeblocks */ -for (cblkno = 0; - cblkno < prec->nb_codeblocks_width * prec->nb_codeblocks_height; - cblkno++) { -int x, y; -Jpeg2000Cblk *cblk = prec->cblk + cblkno; -int ret = decode_cblk(s, codsty, &t1, cblk, +Jpeg2000DecoderContext *s = avctx->priv_data; +Jpeg2000CodeblockThread *cb = s->cb + jobnr; +Jpeg2000Tile *tile = s->tile + cb->tileno; +Jpeg2000Component *comp = tile->comp + cb->compno; +Jpeg2000CodingStyle *codsty = tile->codsty + cb->compno; +Jpeg2000ResLevel *rlevel= comp->reslevel + cb->reslevelno; +Jpeg2000Band *band = rlevel->band + cb->bandno; +Jpeg2000Prec *prec = band->prec + cb->precno; +Jpeg2000Cblk *cblk = prec->cblk + cb->cblkno; +int ret, x, y, bandpos = cb->bandno + (cb->reslevelno > 0); + +t1.stride = (1 coded = 0; + +ret = decode_cblk(s, codsty, &t1, cblk, cblk->coord[0][1] - cblk->coord[0][0], cblk->coord[1][1] - cblk->coord[1][0], bandpos, comp->roi_shift); if (ret) -coded = 1; +cb->coded = 1; else -continue; +return 0; + x = cblk->coord[0][0] - band->coord[0][0]; y = cblk->coord[1][0] - band->coord[1][0]; @@