On 16/10/2024 15:26, Michael Niedermayer wrote:
This makes a 16bit RGB raw sample 25% faster at a 2% loss of compression with 
rawlsb=4

Please test and comment

This stores the LSB through non binary range coding, this is simpler than using 
a
separate coder
For cases where range coding is not wanted its probably best to use golomb rice
for everything.

We also pass the LSB through the decorrelation and context stages (which is 
basically free)
this leads to slightly better compression than separating them earlier.

Signed-off-by: Michael Niedermayer <mich...@niedermayer.cc>
---
  libavcodec/ffv1.h             |  2 ++
  libavcodec/ffv1_template.c    | 19 ++++++++++---------
  libavcodec/ffv1dec.c          |  2 ++
  libavcodec/ffv1dec_template.c | 16 +++++++++++++---
  libavcodec/ffv1enc.c          | 15 ++++++++++++++-
  libavcodec/ffv1enc_template.c | 17 +++++++++++++++--
  6 files changed, 56 insertions(+), 15 deletions(-)

diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
index 4f5a8ab2be7..02bfc33f680 100644
--- a/libavcodec/ffv1.h
+++ b/libavcodec/ffv1.h
@@ -83,6 +83,7 @@ typedef struct FFV1SliceContext {
      int slice_coding_mode;
      int slice_rct_by_coef;
      int slice_rct_ry_coef;
+    int rawlsb;
// RefStruct reference, array of MAX_PLANES elements
      PlaneContext *plane;
@@ -139,6 +140,7 @@ typedef struct FFV1Context {
      int key_frame_ok;
      int context_model;
      int qtable;
+    int rawlsb;
int bits_per_raw_sample;
      int packed_at_lsb;
diff --git a/libavcodec/ffv1_template.c b/libavcodec/ffv1_template.c
index abb90a12e49..10206702ee8 100644
--- a/libavcodec/ffv1_template.c
+++ b/libavcodec/ffv1_template.c
@@ -30,24 +30,25 @@ static inline int RENAME(predict)(TYPE *src, TYPE *last)
  }
static inline int RENAME(get_context)(const int16_t quant_table[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE],
-                                      TYPE *src, TYPE *last, TYPE *last2)
+                                      TYPE *src, TYPE *last, TYPE *last2, int 
rawlsb)
  {
      const int LT = last[-1];
      const int T  = last[0];
      const int RT = last[1];
      const int L  = src[-1];
+    const int rawoff = (1<<rawlsb) >> 1;
if (quant_table[3][127] || quant_table[4][127]) {
          const int TT = last2[0];
          const int LL = src[-2];
-        return quant_table[0][(L - LT) & MAX_QUANT_TABLE_MASK] +
-               quant_table[1][(LT - T) & MAX_QUANT_TABLE_MASK] +
-               quant_table[2][(T - RT) & MAX_QUANT_TABLE_MASK] +
-               quant_table[3][(LL - L) & MAX_QUANT_TABLE_MASK] +
-               quant_table[4][(TT - T) & MAX_QUANT_TABLE_MASK];
+        return quant_table[0][(L - LT + rawoff >> rawlsb) & 
MAX_QUANT_TABLE_MASK] +
+               quant_table[1][(LT - T + rawoff >> rawlsb) & 
MAX_QUANT_TABLE_MASK] +
+               quant_table[2][(T - RT + rawoff >> rawlsb) & 
MAX_QUANT_TABLE_MASK] +
+               quant_table[3][(LL - L + rawoff >> rawlsb) & 
MAX_QUANT_TABLE_MASK] +
+               quant_table[4][(TT - T + rawoff >> rawlsb) & 
MAX_QUANT_TABLE_MASK];
      } else
-        return quant_table[0][(L - LT) & MAX_QUANT_TABLE_MASK] +
-               quant_table[1][(LT - T) & MAX_QUANT_TABLE_MASK] +
-               quant_table[2][(T - RT) & MAX_QUANT_TABLE_MASK];
+        return quant_table[0][(L - LT + rawoff >> rawlsb) & 
MAX_QUANT_TABLE_MASK] +
+               quant_table[1][(LT - T + rawoff >> rawlsb) & 
MAX_QUANT_TABLE_MASK] +
+               quant_table[2][(T - RT + rawoff >> rawlsb) & 
MAX_QUANT_TABLE_MASK];
  }
diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index 5c099e49ad4..fc96bfb4cea 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -249,6 +249,8 @@ static int decode_slice_header(const FFV1Context *f,
                  return AVERROR_INVALIDDATA;
              }
          }
+        if (f->micro_version > 2)
+            sc->rawlsb = get_symbol(c, state, 0);
      }
return 0;
diff --git a/libavcodec/ffv1dec_template.c b/libavcodec/ffv1dec_template.c
index 2da6bd935dc..dbdcad7768e 100644
--- a/libavcodec/ffv1dec_template.c
+++ b/libavcodec/ffv1dec_template.c
@@ -60,8 +60,13 @@ RENAME(decode_line)(FFV1Context *f, FFV1SliceContext *sc,
                  return AVERROR_INVALIDDATA;
          }
- context = RENAME(get_context)(quant_table,
-                                      sample[1] + x, sample[0] + x, sample[1] 
+ x);
+        if (sc->rawlsb) {
+            context = RENAME(get_context)(quant_table,
+                                          sample[1] + x, sample[0] + x, sample[1] 
+ x, sc->rawlsb);
+        } else {
+            context = RENAME(get_context)(quant_table,
+                                          sample[1] + x, sample[0] + x, 
sample[1] + x, 0);
+        }
          if (context < 0) {
              context = -context;
              sign    = 1;
@@ -71,7 +76,12 @@ RENAME(decode_line)(FFV1Context *f, FFV1SliceContext *sc,
          av_assert2(context < p->context_count);
if (ac != AC_GOLOMB_RICE) {
-            diff = get_symbol_inline(c, p->state[context], 1);
+            if (sc->rawlsb) {
+                const int rawoff = (1<<sc->rawlsb) >> 1;
+                diff = get_rac_raw(c, sc->rawlsb);
+                diff += (get_symbol_inline(c, p->state[context], 1) << 
sc->rawlsb) - rawoff;
+            } else
+                diff = get_symbol_inline(c, p->state[context], 1);
          } else {
              if (context == 0 && run_mode == 0)
                  run_mode = 1;
diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index 0dbfebc1a1a..0548daf8c47 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -416,7 +416,7 @@ static int write_extradata(FFV1Context *f)
          if (f->version == 3) {
              f->micro_version = 4;
          } else if (f->version == 4)
-            f->micro_version = 2;
+            f->micro_version = 3;
          put_symbol(&c, state, f->micro_version, 0);
      }
@@ -564,6 +564,9 @@ static av_cold int encode_init(AVCodecContext *avctx)
      if (s->ec == 2)
          s->version = FFMAX(s->version, 4);
+ if (s->rawlsb)
+        s->version = FFMAX(s->version, 4);
+
      if ((s->version == 2 || s->version>3) && avctx->strict_std_compliance > 
FF_COMPLIANCE_EXPERIMENTAL) {
          av_log(avctx, AV_LOG_ERROR, "Version 2 or 4 needed for requested features 
but version 2 or 4 is experimental and not enabled\n");
          return AVERROR_INVALIDDATA;
@@ -716,6 +719,11 @@ static av_cold int encode_init(AVCodecContext *avctx)
          }
      }
+ if (s->rawlsb > s->bits_per_raw_sample) {
+        av_log(avctx, AV_LOG_ERROR, "too many raw lsb\n");
+        return AVERROR(EINVAL);
+    }
+
      if (s->ac == AC_RANGE_CUSTOM_TAB) {
          for (i = 1; i < 256; i++)
              s->state_transition[i] = ver2_state[i];
@@ -958,6 +966,7 @@ static void encode_slice_header(FFV1Context *f, 
FFV1SliceContext *sc)
              put_symbol(c, state, sc->slice_rct_by_coef, 0);
              put_symbol(c, state, sc->slice_rct_ry_coef, 0);
          }
+        put_symbol(c, state, sc->rawlsb, 0);
      }
  }
@@ -1077,6 +1086,8 @@ static int encode_slice(AVCodecContext *c, void *arg)
          sc->slice_rct_ry_coef = 1;
      }
+ sc->rawlsb = f->rawlsb; // we do not optimize this per slice, but other encoders could
+
  retry:
      if (f->key_frame)
          ff_ffv1_clear_slice_state(f, sc);
@@ -1291,6 +1302,8 @@ static const AVOption options[] = {
              { .i64 = 0 }, 0, 1, VE },
      { "qtable", "Quantization table", OFFSET(qtable), AV_OPT_TYPE_INT,
              { .i64 = -1 }, -1, 2, VE },
+    { "rawlsb", "number of LSBs stored RAW", OFFSET(rawlsb), AV_OPT_TYPE_INT,
+            { .i64 = 0 }, 0, 8, VE },
{ NULL }
  };
diff --git a/libavcodec/ffv1enc_template.c b/libavcodec/ffv1enc_template.c
index bc14926ab95..848328c70af 100644
--- a/libavcodec/ffv1enc_template.c
+++ b/libavcodec/ffv1enc_template.c
@@ -62,8 +62,14 @@ RENAME(encode_line)(FFV1Context *f, FFV1SliceContext *sc,
      for (x = 0; x < w; x++) {
          int diff, context;
- context = RENAME(get_context)(f->quant_tables[p->quant_table_index],
-                                      sample[0] + x, sample[1] + x, sample[2] 
+ x);
+        if (f->rawlsb) {
+            context = 
RENAME(get_context)(f->quant_tables[p->quant_table_index],
+                                        sample[0] + x, sample[1] + x, sample[2] + 
x, f->rawlsb);
+        } else {
+            //try to force a version with rawlsb optimized out
+            context = 
RENAME(get_context)(f->quant_tables[p->quant_table_index],
+                                        sample[0] + x, sample[1] + x, 
sample[2] + x, 0);
+        }
          diff    = sample[0][x] - RENAME(predict)(sample[0] + x, sample[1] + 
x);
if (context < 0) {
@@ -74,6 +80,13 @@ RENAME(encode_line)(FFV1Context *f, FFV1SliceContext *sc,
          diff = fold(diff, bits);
if (ac != AC_GOLOMB_RICE) {
+            if (f->rawlsb) {
+                const int rawoff = (1<<f->rawlsb) >> 1;
+                const unsigned mask = (1<<f->rawlsb) - 1;
+                diff += rawoff;
+                put_rac_raw(c, (diff & mask), f->rawlsb);
+                diff = diff >> f->rawlsb; // Note, this will be biased on 
small rawlsb
+            }
              if (pass1) {
                  put_symbol_inline(c, p->state[context], diff, 1, sc->rc_stat,
                                    
sc->rc_stat2[p->quant_table_index][context]);

TBH I'm still not a fan of this and I don't think it should be part of version 4. It seems like it'll be a very niche coding feature that's better served by Golomb-Rice coding, which is many times faster as you can *parallelize* it. It could potentially help improve compression of noisy images as the noise wouldn't affect EC state, but without a search mechanism, leaving it in the hands of users is bound to lead to a misuse.

Attachment: OpenPGP_0xA2FEA5F03F034464.asc
Description: OpenPGP public key

Attachment: OpenPGP_signature.asc
Description: OpenPGP digital signature

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to