No, this does not work yet, just posting for anyone curious Sponsored-by: Sovereign Tech Fund Signed-off-by: Michael Niedermayer <mich...@niedermayer.cc> --- libavcodec/ffv1.h | 5 +- libavcodec/ffv1_parse.c | 10 ++ libavcodec/ffv1dec.c | 36 ++++-- libavcodec/ffv1dec_template.c | 30 +++-- libavcodec/ffv1enc.c | 211 +++++++++++++++++++++++++++++++++- 5 files changed, 272 insertions(+), 20 deletions(-)
diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h index dd8a236efad..4db9a303bf1 100644 --- a/libavcodec/ffv1.h +++ b/libavcodec/ffv1.h @@ -109,7 +109,10 @@ typedef struct FFV1SliceContext { uint64_t (*rc_stat2[MAX_QUANT_TABLES])[32][2]; }; }; - uint16_t fltmap[4][65536]; + union { + uint16_t fltmap [4][65536]; //halffloat encode & decode + uint32_t fltmap32[4][65536]; //float decode + }; } FFV1SliceContext; typedef struct FFV1Context { diff --git a/libavcodec/ffv1_parse.c b/libavcodec/ffv1_parse.c index 9745f9de694..10f3652ff51 100644 --- a/libavcodec/ffv1_parse.c +++ b/libavcodec/ffv1_parse.c @@ -419,6 +419,16 @@ int ff_ffv1_parse_header(FFV1Context *f, RangeCoder *c, uint8_t *state) } else f->pix_fmt = AV_PIX_FMT_GBRAP16; f->use32bit = 1; + } else if (f->avctx->bits_per_raw_sample == 32 && !f->transparency) { + if (f->flt) { + f->pix_fmt = AV_PIX_FMT_GBRPF32; + } + f->use32bit = 1; + } else if (f->avctx->bits_per_raw_sample == 32 && f->transparency) { + if (f->flt) { + f->pix_fmt = AV_PIX_FMT_GBRAPF32; + } + f->use32bit = 1; } } else { av_log(f->avctx, AV_LOG_ERROR, "colorspace not supported\n"); diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c index 75fb5ae2f69..d45aabbbde8 100644 --- a/libavcodec/ffv1dec.c +++ b/libavcodec/ffv1dec.c @@ -250,6 +250,16 @@ static int decode_slice_header(const FFV1Context *f, sc->rawlsb = ff_ffv1_get_symbol(c, state, 0); } } + if (f->avctx->bits_per_raw_sample == 32) { + if (!sc->remap) { + av_log(f->avctx, AV_LOG_ERROR, "unsupported remap\n"); + return AVERROR_INVALIDDATA; + } + if (sc->slice_width * sc->slice_height > 65536) { + av_log(f->avctx, AV_LOG_ERROR, "32bit needs remap\n"); + return AVERROR_INVALIDDATA; + } + } return 0; } @@ -266,28 +276,38 @@ static void slice_set_damaged(FFV1Context *f, FFV1SliceContext *sc) static int decode_remap(FFV1Context *f, FFV1SliceContext *sc) { - int flip = sc->remap == 2 ? 0x7FFF : 0; + unsigned int end = f->avctx->bits_per_raw_sample == 32 ? 0xFFFFFFFF : 0xFFFF; + int flip = sc->remap == 2 ? (end>>1) : 0; + int sign = (end>>1)+1; for (int p= 0; p < 1 + 2*f->chroma_planes + f->transparency; p++) { int j = 0; int lu = 0; uint8_t state[2][32]; + int64_t i; memset(state, 128, sizeof(state)); - - for (int i= 0; i<65536; i++) { - int run = get_symbol_inline(&sc->c, state[lu], 0); - if (run > 65536U - i) + for (i=0; i <= end ; i++) { + unsigned run = get_symbol_inline(&sc->c, state[lu], 0); + if (run > end - i + 1) return AVERROR_INVALIDDATA; if (lu) { lu ^= !run; while (run--) { - sc->fltmap[p][j++] = i ^ ((i&0x8000) ? 0 : flip); + if (end == 0xFFFF) { + sc->fltmap [p][j++] = i ^ ((i& 0x8000) ? 0 : flip); + } else + sc->fltmap32[p][j++] = i ^ ((i&0x80000000) ? 0 : flip); i++; } } else { i += run; - if (i != 65536) - sc->fltmap[p][j++] = i ^ ((i&0x8000) ? 0 : flip); + if (i <= end) { + if (end == 0xFFFF) { + sc->fltmap [p][j++] = i ^ ((i& 0x8000) ? 0 : flip); + } else { + sc->fltmap32[p][j++] = i ^ ((i&0x80000000) ? 0 : flip); + } + } lu ^= !run; } } diff --git a/libavcodec/ffv1dec_template.c b/libavcodec/ffv1dec_template.c index f9499931b1d..4e500ab5212 100644 --- a/libavcodec/ffv1dec_template.c +++ b/libavcodec/ffv1dec_template.c @@ -150,7 +150,7 @@ static int RENAME(decode_rgb_frame)(FFV1Context *f, FFV1SliceContext *sc, int x, y, p; TYPE *sample[4][2]; int lbd = f->avctx->bits_per_raw_sample <= 8; - int bits = f->avctx->bits_per_raw_sample > 0 ? f->avctx->bits_per_raw_sample : 8; + int bits = f->avctx->bits_per_raw_sample > 0 ? FFMIN(f->avctx->bits_per_raw_sample, 16) : 8; int offset = 1 << bits; int transparency = f->transparency; int ac = f->ac; @@ -198,16 +198,30 @@ static int RENAME(decode_rgb_frame)(FFV1Context *f, FFV1SliceContext *sc, r += g; } if (sc->remap) { - r = sc->fltmap[0][r & 0xFFFF]; - g = sc->fltmap[1][g & 0xFFFF]; - b = sc->fltmap[2][b & 0xFFFF]; - if (transparency) - a = sc->fltmap[3][a & 0xFFFF]; + if (f->avctx->bits_per_raw_sample == 32) { + r = sc->fltmap32[0][r & 0xFFFF]; + g = sc->fltmap32[1][g & 0xFFFF]; + b = sc->fltmap32[2][b & 0xFFFF]; + if (transparency) + a = sc->fltmap32[3][a & 0xFFFF]; + } else { + r = sc->fltmap[0][r & 0xFFFF]; + g = sc->fltmap[1][g & 0xFFFF]; + b = sc->fltmap[2][b & 0xFFFF]; + if (transparency) + a = sc->fltmap[3][a & 0xFFFF]; + } } - if (lbd) + if (lbd) { *((uint32_t*)(src[0] + x*4 + stride[0]*y)) = b + ((unsigned)g<<8) + ((unsigned)r<<16) + ((unsigned)a<<24); - else if (sizeof(TYPE) == 4 || transparency) { + } else if (f->avctx->bits_per_raw_sample == 32) { + *((uint32_t*)(src[0] + x*4 + stride[0]*y)) = g; + *((uint32_t*)(src[1] + x*4 + stride[1]*y)) = b; + *((uint32_t*)(src[2] + x*4 + stride[2]*y)) = r; + if (transparency) + *((uint32_t*)(src[3] + x*4 + stride[3]*y)) = a; + } else if (sizeof(TYPE) == 4 || transparency) { *((uint16_t*)(src[0] + x*2 + stride[0]*y)) = g; *((uint16_t*)(src[1] + x*2 + stride[1]*y)) = b; *((uint16_t*)(src[2] + x*2 + stride[2]*y)) = r; diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c index 64add25b407..58e1227bd6c 100644 --- a/libavcodec/ffv1enc.c +++ b/libavcodec/ffv1enc.c @@ -31,6 +31,7 @@ #include "libavutil/mem.h" #include "libavutil/opt.h" #include "libavutil/pixdesc.h" +#include "libavutil/qsort.h" #include "avcodec.h" #include "encode.h" @@ -576,6 +577,9 @@ int ff_ffv1_encode_determine_slices(AVCodecContext *avctx) continue; if (maxw * maxh * (int64_t)(s->bits_per_raw_sample+1) * plane_count > 8<<24) continue; + if (s->bits_per_raw_sample == 32) + if (maxw * maxh > 65536) + continue; if (s->version < 4) if ( ff_need_new_slices(avctx->width , s->num_h_slices, s->chroma_h_shift) ||ff_need_new_slices(avctx->height, s->num_v_slices, s->chroma_v_shift)) @@ -920,6 +924,10 @@ av_cold int ff_ffv1_encode_setup_plane_info(AVCodecContext *avctx, case AV_PIX_FMT_GBRAPF16: if (!avctx->bits_per_raw_sample && !s->bits_per_raw_sample) s->bits_per_raw_sample = 16; + case AV_PIX_FMT_GBRPF32: + case AV_PIX_FMT_GBRAPF32: + if (!avctx->bits_per_raw_sample && !s->bits_per_raw_sample) + s->bits_per_raw_sample = 32; else if (!s->bits_per_raw_sample) s->bits_per_raw_sample = avctx->bits_per_raw_sample; s->transparency = !!(desc->flags & AV_PIX_FMT_FLAG_ALPHA); @@ -942,6 +950,10 @@ av_cold int ff_ffv1_encode_setup_plane_info(AVCodecContext *avctx, if (s->remap_mode < 0) s->remap_mode = s->flt ? 2 : 0; + if (s->remap_mode == 0 && s->bits_per_raw_sample == 32) { + av_log(avctx, AV_LOG_ERROR, "32bit requires remap\n"); + return AVERROR(EINVAL); + } return av_pix_fmt_get_chroma_sub_sample(pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift); } @@ -1158,7 +1170,7 @@ static void choose_rct_params(const FFV1Context *f, FFV1SliceContext *sc, sc->slice_rct_ry_coef = rct_y_coeff[best][0]; } -static void encode_remap(FFV1Context *f, FFV1SliceContext *sc) +static void encode_histogram_remap(FFV1Context *f, FFV1SliceContext *sc) { int flip = sc->remap == 2 ? 0x7FFF : 0; @@ -1188,6 +1200,180 @@ static void encode_remap(FFV1Context *f, FFV1SliceContext *sc) } } +typedef struct Unit { + uint32_t val; //this is unneeded if you accept a dereference on each access + int ndx; //unsigned 16 bit would suffice but make code more complex/slow +} Unit; + +static void load_rgb_float32_frame(FFV1Context *f, FFV1SliceContext *sc, + const uint8_t *src[4], + int w, int h, const int stride[4], + Unit unit[4][65536]) +{ + int x, y; + int transparency = f->transparency; + int i = 0; + + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + int b, g, r, av_uninit(a); + int gi = x*4 + stride[0]*y; + int bi = x*4 + stride[1]*y; + int ri = x*4 + stride[2]*y; + + g = *((const uint32_t *)(src[0] + gi)); + b = *((const uint32_t *)(src[1] + bi)); + r = *((const uint32_t *)(src[2] + ri)); + if (transparency) + a = *((const uint32_t *)(src[3] + x*4 + stride[3]*y)); + + // We cannot build a histogram as we do for 16bit, we need a bit of magic here + // Its possible to reduce the memory needed at the cost of more dereferencing + unit[0][i].val = r; + unit[0][i].ndx = ri; + + unit[1][i].val = g; + unit[1][i].ndx = gi; + + unit[2][i].val = b; + unit[2][i].ndx = bi; + + if (transparency) { + unit[3][i].val = a; + unit[3][i].ndx = x*4 + stride[3]*y; + } + i++; + } + } + + //TODO switch to radix sort +#define CMP(A,B) ((A)->val - (int64_t)(B)->val) + AV_QSORT(unit[0], i, Unit, CMP); + AV_QSORT(unit[1], i, Unit, CMP); + AV_QSORT(unit[2], i, Unit, CMP); + if (transparency) + AV_QSORT(unit[3], i, Unit, CMP); +} + +static void encode_float32_remap(FFV1Context *f, FFV1SliceContext *sc, + uint8_t *src[4], Unit unit[4][65536]) +{ + int flip = sc->remap == 2 ? 0x7FFF : 0; + int pixel_num = sc->slice_width * sc->slice_height; + + av_assert0 (pixel_num <= 65536); + + for (int p= 0; p < 1 + 2*f->chroma_planes + f->transparency; p++) { + int lu = 0; + uint8_t state[2][32]; + int run = 0; + int64_t last_val = -1; + int compact_index = 0; + + memset(state, 128, sizeof(state)); + for (int i= 0; i<pixel_num+1; i++) { + int64_t val; + if (i == pixel_num) { + if (last_val == 0xFFFFFFFF) { + break; //i think + } else { + val = 1LL<<32; + } + } else + val = unit[p][i].val; +// if (flip) TODO +// val ^= (XX&0x80000000) ? 0 : 0x7FFFFFFF; + + *((uint32_t *)(src[p] + unit[p][i].ndx)) = compact_index; + + if (last_val != val) { + av_assert2(last_val < val); + if (lu) { + if (val - last_val == 1) { + run ++; + last_val = val; + } else { + av_log(0,0, "R%d %6d\n", lu, (int)(run)); + put_symbol_inline(&sc->c, state[lu], run, 0, NULL, NULL); + if (run == 0) + lu ^= 1; + run = 0; + i--; // we did not encode val so we need to backstep + last_val ++; + continue; + } + } else { + av_assert2(run == 0); + av_log(0,0, "R%d %6d\n", lu, (int)(val - last_val - 1)); + put_symbol_inline(&sc->c, state[lu], val - last_val - 1, 0, NULL, NULL); + if (val - last_val == 1) + lu ^= 1; + last_val = val; + } + compact_index ++; + } + } + } +} + +//TODO once this is working consider factorizing with the 16bit integer version and see how it looks if its too messy or better +static int encode_float32_rgb_frame(FFV1Context *f, FFV1SliceContext *sc, + const uint8_t *src[4], + int w, int h, const int stride[4], int ac) +{ + int x, y, p, i; + const int ring_size = f->context_model ? 3 : 2; + int32_t *sample[4][3]; + const int pass1 = !!(f->avctx->flags & AV_CODEC_FLAG_PASS1); + int bits = 16; //TODO explain this in the specifciation, we have 32bits in but really encode max 16 + int offset = 1 << bits; + int transparency = f->transparency; + + sc->run_index = 0; + + memset(RENAME(sc->sample_buffer), 0, ring_size * MAX_PLANES * + (w + 6) * sizeof(*RENAME(sc->sample_buffer))); + + for (y = 0; y < h; y++) { + for (i = 0; i < ring_size; i++) + for (p = 0; p < MAX_PLANES; p++) + sample[p][i]= RENAME(sc->sample_buffer) + p*ring_size*(w+6) + ((h+i-y)%ring_size)*(w+6) + 3; + + for (x = 0; x < w; x++) { + int b, g, r, av_uninit(a); + g = *((const uint32_t *)(src[0] + x*4 + stride[0]*y)); + b = *((const uint32_t *)(src[1] + x*4 + stride[1]*y)); + r = *((const uint32_t *)(src[2] + x*4 + stride[2]*y)); + if (transparency) + a = *((const uint32_t *)(src[3] + x*4 + stride[3]*y)); + + if (sc->slice_coding_mode != 1) { + b -= g; + r -= g; + g += (b * sc->slice_rct_by_coef + r * sc->slice_rct_ry_coef) >> 2; + b += offset; + r += offset; + } + + sample[0][0][x] = g; + sample[1][0][x] = b; + sample[2][0][x] = r; + sample[3][0][x] = a; + } + for (p = 0; p < 3 + transparency; p++) { + int ret; + sample[p][0][-1] = sample[p][1][0 ]; + sample[p][1][ w] = sample[p][1][w-1]; + ret = encode_line32(f, sc, f->avctx, w, sample[p], (p + 1) / 2, + bits + (sc->slice_coding_mode != 1), ac, pass1); + if (ret < 0) + return ret; + } + } + return 0; +} + + static int encode_slice(AVCodecContext *c, void *arg) { FFV1SliceContext *sc = arg; @@ -1226,6 +1412,10 @@ retry: } if (sc->remap) { + //Both the 16bit and 32bit remap do exactly the same thing but with 16bits we can + //Implement this using a "histogram" while for 32bit that would be gb sized, thus a more + //complex implementation sorting pairs is used. + if (f->bits_per_raw_sample != 32) { if (f->colorspace == 0 && c->pix_fmt != AV_PIX_FMT_YA8 && c->pix_fmt != AV_PIX_FMT_YAF16) { const int cx = x >> f->chroma_h_shift; const int cy = y >> f->chroma_v_shift; @@ -1249,7 +1439,12 @@ retry: } else load_rgb_frame (f, sc, planes, width, height, p->linesize); - encode_remap(f, sc); + encode_histogram_remap(f, sc); + } else { + Unit pairs[4][65536]; + load_rgb_float32_frame(f, sc, planes, width, height, p->linesize, pairs); + encode_float32_remap(f, sc, planes, pairs); + } } if (ac == AC_GOLOMB_RICE) { @@ -1281,6 +1476,8 @@ retry: } else if (c->pix_fmt == AV_PIX_FMT_YA8 || c->pix_fmt == AV_PIX_FMT_YAF16) { ret = encode_plane(f, sc, p->data[0] + ps*x + y*p->linesize[0], width, height, p->linesize[0], 0, 0, 2, ac); ret |= encode_plane(f, sc, p->data[0] + (ps>>1) + ps*x + y*p->linesize[0], width, height, p->linesize[0], 1, 1, 2, ac); + } else if (f->bits_per_raw_sample == 32) { + ret = encode_float32_rgb_frame(f, sc, planes, width, height, p->linesize, ac); } else if (f->use32bit) { ret = encode_rgb_frame32(f, sc, planes, width, height, p->linesize, ac); } else { @@ -1388,6 +1585,14 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, return 0; } + if (f->bits_per_raw_sample == 32 && f->remap_mode) { + //To reduce the needed memory we use the input frame (which is generally freely available) + //TODO use fltmap instead of the frame so as not to have to make it writable + int ret = av_frame_make_writable(pict); + if (ret < 0) + return ret; + } + /* Maximum packet size */ maxsize = ff_ffv1_encode_buffer_size(avctx); @@ -1567,7 +1772,7 @@ const FFCodec ff_ffv1_encoder = { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YAF16, AV_PIX_FMT_GRAYF16, - AV_PIX_FMT_GBRPF16), + AV_PIX_FMT_GBRPF16, AV_PIX_FMT_GBRPF32), .color_ranges = AVCOL_RANGE_MPEG, .p.priv_class = &ffv1_class, .caps_internal = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_EOF_FLUSH, -- 2.48.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".