Hi,

I have added support for MPEG-4 Sstp using the available samples on trac.
Yes it doesn't pass fate, yes it's not format-patch, yes it uses printfs.
https://trac.ffmpeg.org/ticket/4447

Being MPEG-4, it depends on mpegvideo.c so has tons of yuv420p assumptions
baked in which are of course undocumented.
Here are my questions (line number refers to attached patch):

Line 35: How do I signal to this idctdsp thing that I want an idct with
32-bit coefficients AND intermediates? A lot of that code has assumptions
that intermediates will be the next size up, i.e 8-bit coeffs, 16-bit
intermediates, or 16-bit coeffs and 32-bit intermediates.

Line 906: Why do RGB samples not decode unless the GBRP format is moved to
the top of PIX_FMT. I get "[mpeg4 @ 0x7f945c029600] format change not
supported" otherwise.

Line 932: What's going on with this branch. Normal mpeg-4 video does
dequant during unpack, why is it not part of this condition?

Line 987: Are there more assumptions baked into mpegvideo.c about "square"
macroblocks, i.e ones where (width == height)?

Line 997: What is all this stuff going on with -1U, unless I remove this I
get a segfault. I do get a stripe on the left though.

Line 1055: How can I make the existing code use 32-bit coefficients
cleanly? I can't reuse block[a][b] because it's allocated in a single
malloc.

Regards,
Kieran Kunhya
-- 

Sent from my mobile device
diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c
index d0da1d3..f09df29 100644
--- a/libavcodec/h263dec.c
+++ b/libavcodec/h263dec.c
@@ -197,6 +197,10 @@ static int decode_slice(MpegEncContext *s)
 
     ff_set_qscale(s, s->qscale);
 
+    if (s->studio_profile) {
+        ff_mpeg4_decode_studio_slice_header(s->avctx->priv_data);
+    }
+
     if (s->avctx->hwaccel) {
         const uint8_t *start = s->gb.buffer + get_bits_count(&s->gb) / 8;
         ret = s->avctx->hwaccel->decode_slice(s->avctx, start, s->gb.buffer_end - start);
@@ -252,7 +256,7 @@ static int decode_slice(MpegEncContext *s)
             ff_dlog(s, "%d %06X\n",
                     get_bits_count(&s->gb), show_bits(&s->gb, 24));
 
-            ff_tlog(NULL, "Decoding MB at %dx%d\n", s->mb_x, s->mb_y);
+            //printf("Decoding MB at %dx%d\n", s->mb_x, s->mb_y);
             ret = s->decode_mb(s, s->block);
 
             if (s->pict_type != AV_PICTURE_TYPE_B)
diff --git a/libavcodec/idctdsp.c b/libavcodec/idctdsp.c
index 63e9b52..396fd53 100644
--- a/libavcodec/idctdsp.c
+++ b/libavcodec/idctdsp.c
@@ -259,7 +259,8 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx)
         c->perm_type = FF_IDCT_PERM_NONE;
     } else {
         if (avctx->bits_per_raw_sample == 10 || avctx->bits_per_raw_sample == 9) {
-            c->idct_put              = ff_simple_idct_put_10;
+            //c->idct_put              = ff_simple_idct_put_10;
+            c->idct_put              = ff_idct_float;
             c->idct_add              = ff_simple_idct_add_10;
             c->idct                  = ff_simple_idct_10;
             c->perm_type             = FF_IDCT_PERM_NONE;
@@ -303,8 +304,8 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx)
         ff_idctdsp_init_arm(c, avctx, high_bit_depth);
     if (ARCH_PPC)
         ff_idctdsp_init_ppc(c, avctx, high_bit_depth);
-    if (ARCH_X86)
-        ff_idctdsp_init_x86(c, avctx, high_bit_depth);
+    //if (ARCH_X86)
+    //    ff_idctdsp_init_x86(c, avctx, high_bit_depth);
     if (ARCH_MIPS)
         ff_idctdsp_init_mips(c, avctx, high_bit_depth);
 
diff --git a/libavcodec/ituh263dec.c b/libavcodec/ituh263dec.c
index 5e3c0ea..85c8f76 100644
--- a/libavcodec/ituh263dec.c
+++ b/libavcodec/ituh263dec.c
@@ -207,12 +207,21 @@ static int h263_decode_gob_header(MpegEncContext *s)
 }
 
 /**
- * Decode the group of blocks / video packet header.
+ * Decode the group of blocks / video packet header / slice header (MPEG-4 Studio).
  * @return bit position of the resync_marker, or <0 if none was found
  */
 int ff_h263_resync(MpegEncContext *s){
     int left, pos, ret;
 
+    //printf ("__PRETTY_FUNCTION__ = %s\n", __PRETTY_FUNCTION__);
+
+    /* In MPEG-4 studio mode look for a new slice startcode
+     * and decode slice header */
+    if(s->codec_id==AV_CODEC_ID_MPEG4 && s->studio_profile) {
+        // FIXME search for new slice startcode if not there already
+        return pos;
+    }
+
     if(s->codec_id==AV_CODEC_ID_MPEG4){
         skip_bits1(&s->gb);
         align_get_bits(&s->gb);
diff --git a/libavcodec/mpeg4data.h b/libavcodec/mpeg4data.h
index b7c3fab..4756e9e 100644
--- a/libavcodec/mpeg4data.h
+++ b/libavcodec/mpeg4data.h
@@ -373,4 +373,120 @@ const uint8_t ff_mpeg4_dc_threshold[8]={
     99, 13, 15, 17, 19, 21, 23, 0
 };
 
+/* Note these are different in studio mode */
+const uint16_t ff_mpeg4_studio_dc_luma[19][2]={
+    {0x0e,  6}, {0x06,  5}, {0x00,  4}, {0x02,  4},
+    {0x07,  3}, {0x05,  3}, {0x03,  3}, {0x02,  3},
+    {0x04,  3}, {0x06,  3}, {0x01,  4}, {0x1e,  7},
+    {0x3e,  8}, {0x7e,  9}, {0xfe, 10}, {0x1fe, 11},
+    {0x3fe, 12}, {0x7fe, 13}, {0x7ff, 13}
+};
+
+const uint16_t ff_mpeg4_studio_dc_chroma[19][2]={
+    {0x00,  4}, {0x02,  4}, {0x07,  3}, {0x05,  3},
+    {0x03,  3}, {0x02,  3}, {0x04,  3}, {0x06,  3},
+    {0x01,  4}, {0x06,  5}, {0x0e,  6}, {0x1e,  7},
+    {0x3e,  8}, {0x7e,  9}, {0xfe, 10}, {0x1fe, 11},
+    {0x3fe, 12}, {0x7fe, 13}, {0x7ff, 13}
+};
+
+const uint16_t ff_mpeg4_studio_intra[12][22][2]={
+    {
+        {0x05,  4}, {0x04,  4}, {0x05,  7}, {0x09,  9},
+        {0x21, 11}, {0x41, 12}, {0x81, 13}, {0x03,  4},
+        {0x03,  5}, {0x05,  6}, {0x04,  7}, {0x03,  7},
+        {0x05,  8}, {0x03,  2}, {0x05,  3}, {0x04,  3},
+        {0x03,  3}, {0x02,  4}, {0x04,  6}, {0x03,  6},
+        {0x11, 10}, {0x80, 13}
+    },
+    {
+        {0x00,  0}, {0x00,  0}, {0x00,  0}, {0x00,  0},
+        {0x00,  0}, {0x00,  0}, {0x00,  0}, {0x00,  0},
+        {0x00,  0}, {0x00,  0}, {0x00,  0}, {0x00,  0},
+        {0x00,  0}, {0x00,  0}, {0x01,  1}, {0x01,  2},
+        {0x01,  3}, {0x01,  4}, {0x01,  5}, {0x03,  7},
+        {0x05,  8}, {0x04,  8}
+    },
+    {
+        {0x05,  3},  {0x03,  5},  {0x02,  5},  {0x03,  7},
+        {0x09,  9},  {0x103, 14}, {0x102, 14}, {0x04,  3},
+        {0x03,  3},  {0x03,  4},  {0x02,  4},  {0x03,  6},
+        {0x11, 10},  {0x03,  2},  {0x02,  3},  {0x02,  6},
+        {0x05,  8},  {0x21, 11},  {0x83, 13},  {0x101, 14},
+        {0x201, 15}, {0x82, 13}
+    },
+    {
+        {0x05,  5}, {0x05,  4}, {0x04,  5}, {0x03,  6},
+        {0x09,  9}, {0x83, 13}, {0x82, 13}, {0x03,  3},
+        {0x04,  4}, {0x03,  4}, {0x03,  5}, {0x05,  8},
+        {0x81, 13}, {0x03,  2}, {0x02,  2}, {0x02,  5},
+        {0x02,  6}, {0x03,  7}, {0x11, 10}, {0x43, 12},
+        {0x80, 13}, {0x42, 12}
+    },
+    {
+        {0x05,  7},  {0x03,  4}, {0x03,  5},  {0x04,  7},
+        {0x09,  9},  {0x83, 13}, {0x101, 14}, {0x03,  3},
+        {0x02,  4},  {0x05,  6}, {0x03,  7},  {0x11, 10},
+        {0x201, 15}, {0x03,  2}, {0x02,  2},  {0x02,  3},
+        {0x04,  6},  {0x03,  6}, {0x05,  8},  {0x21, 11},
+        {0x82, 13},  {0x81, 13}
+    },
+    {
+        {0x13, 10},  {0x03,  5}, {0x05,  7}, {0x12, 10},
+        {0x43, 12},  {0x83, 13}, {0x82, 13}, {0x02,  5},
+        {0x04,  7},  {0x05,  8}, {0x23, 11}, {0x81, 13},
+        {0x101, 14}, {0x03,  2}, {0x02,  2}, {0x01,  2},
+        {0x01,  3},  {0x03,  6}, {0x03,  7}, {0x22, 11},
+        {0x201, 15}, {0x42, 12}
+    },
+    {
+        {0x23, 11},  {0x01,  4},  {0x07,  8},  {0x13, 10},
+        {0x22, 11},  {0x103, 14}, {0x102, 14}, {0x03,  6},
+        {0x06,  8},  {0x12, 10},  {0x43, 12},  {0x101, 14},
+        {0x201, 15}, {0x03,  3},  {0x02,  3},  {0x03,  2},
+        {0x02,  2},  {0x01,  3},  {0x02,  6},  {0x05,  8},
+        {0x42, 12},  {0x41, 12}
+    },
+    {
+        {0x0b,  9}, {0x03,  5}, {0x07,  8}, {0x07,  7},
+        {0x06,  7}, {0x23, 11}, {0x41, 12}, {0x05,  7},
+        {0x06,  8}, {0x0a,  9}, {0x13, 10}, {0x22, 11},
+        {0x40, 12}, {0x03,  4}, {0x02,  4}, {0x03,  2},
+        {0x02,  2}, {0x01,  2}, {0x02,  5}, {0x04,  7},
+        {0x12, 10}, {0x21, 11}
+    },
+    {
+        {0x15, 10}, {0x03,  6}, {0x14, 10}, {0x23, 11},
+        {0x07,  8}, {0x43, 12}, {0x81, 13}, {0x06,  8},
+        {0x0b,  9}, {0x13, 10}, {0x12, 10}, {0x42, 12},
+        {0x80, 13}, {0x01,  4}, {0x03,  3}, {0x02,  3},
+        {0x03,  2}, {0x02,  2}, {0x01,  3}, {0x02,  6},
+        {0x22, 11}, {0x41, 12}
+    },
+    {
+        {0x43, 12}, {0x05,  6}, {0x07,  8}, {0x04,  6},
+        {0x03,  6}, {0x13, 10}, {0x42, 12}, {0x05,  7},
+        {0x04,  7}, {0x06,  8}, {0x12, 10}, {0x41, 12},
+        {0x40, 12}, {0x03,  5}, {0x03,  4}, {0x03,  3},
+        {0x02,  3}, {0x03,  2}, {0x02,  2}, {0x02,  4},
+        {0x05,  8}, {0x11, 10}
+    },
+    {
+        {0x83, 13}, {0x05,  7}, {0x07,  8}, {0x03,  4},
+        {0x21, 11}, {0x82, 13}, {0x81, 13}, {0x04,  7},
+        {0x06,  8}, {0x0b,  9}, {0x0a,  9}, {0x11, 10},
+        {0x80, 13}, {0x03,  5}, {0x02,  5}, {0x02,  4},
+        {0x03,  3}, {0x02,  3}, {0x03,  2}, {0x02,  2},
+        {0x03,  6}, {0x09,  9}
+    },
+    {
+        {0x13, 10}, {0x03,  5}, {0x03,  6}, {0x0d,  9},
+        {0x0c,  9}, {0x21, 11}, {0x20, 11}, {0x02,  5},
+        {0x02,  6}, {0x07,  8}, {0x0b,  9}, {0x12, 10},
+        {0x11, 10}, {0x05,  3}, {0x04,  3}, {0x05,  4},
+        {0x04,  4}, {0x03,  4}, {0x02,  4}, {0x03,  3},
+        {0x03,  2}, {0x0a,  9}
+    }
+};
+
 #endif /* AVCODEC_MPEG4DATA_H */
diff --git a/libavcodec/mpeg4video.h b/libavcodec/mpeg4video.h
index 515b008..5565cdb 100644
--- a/libavcodec/mpeg4video.h
+++ b/libavcodec/mpeg4video.h
@@ -58,6 +58,13 @@
 #define GOP_STARTCODE        0x1B3
 #define VISUAL_OBJ_STARTCODE 0x1B5
 #define VOP_STARTCODE        0x1B6
+#define SLICE_STARTCODE      0x1B7
+#define EXT_STARTCODE        0x1B8
+
+#define VOT_VIDEO_ID 0x1
+
+
+#define QUANT_MATRIX_EXT_ID  0x3
 
 /* smaller packets likely don't contain a real frame */
 #define MAX_NVOP_SIZE 19
@@ -105,8 +112,16 @@ typedef struct Mpeg4DecContext {
     int cplx_estimation_trash_i;
     int cplx_estimation_trash_p;
     int cplx_estimation_trash_b;
+
+    VLC studio_intra_tab[12];
+    VLC studio_luma_dc;
+    VLC studio_chroma_dc;
+
+    int rgb;
 } Mpeg4DecContext;
 
+static const uint8_t mpeg4_block_count[4] = {0, 6, 8, 12};
+
 /* dc encoding for MPEG-4 */
 extern const uint8_t ff_mpeg4_DCtab_lum[13][2];
 extern const uint8_t ff_mpeg4_DCtab_chrom[13][2];
@@ -134,6 +149,10 @@ extern const uint16_t ff_mpeg4_resync_prefix[8];
 
 extern const uint8_t ff_mpeg4_dc_threshold[8];
 
+extern const uint16_t ff_mpeg4_studio_dc_luma[19][2];
+extern const uint16_t ff_mpeg4_studio_dc_chroma[19][2];
+extern const uint16_t ff_mpeg4_studio_intra[12][22][2];
+
 void ff_mpeg4_encode_mb(MpegEncContext *s,
                         int16_t block[6][64],
                         int motion_x, int motion_y);
@@ -152,6 +171,7 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s);
 int ff_mpeg4_decode_partitions(Mpeg4DecContext *ctx);
 int ff_mpeg4_get_video_packet_prefix_length(MpegEncContext *s);
 int ff_mpeg4_decode_video_packet_header(Mpeg4DecContext *ctx);
+int ff_mpeg4_decode_studio_slice_header(Mpeg4DecContext *ctx);
 void ff_mpeg4_init_direct_mv(MpegEncContext *s);
 void ff_mpeg4videodec_static_init(void);
 int ff_mpeg4_workaround_bugs(AVCodecContext *avctx);
diff --git a/libavcodec/mpeg4videodec.c b/libavcodec/mpeg4videodec.c
index 0e0cf27..e772281 100644
--- a/libavcodec/mpeg4videodec.c
+++ b/libavcodec/mpeg4videodec.c
@@ -43,6 +43,7 @@
 #define SPRITE_TRAJ_VLC_BITS 6
 #define DC_VLC_BITS 9
 #define MB_TYPE_B_VLC_BITS 4
+#define STUDIO_INTRA_BITS 9
 
 static VLC dc_lum, dc_chrom;
 static VLC sprite_trajectory;
@@ -400,6 +401,8 @@ int ff_mpeg4_decode_video_packet_header(Mpeg4DecContext *ctx)
 {
     MpegEncContext *s = &ctx->m;
 
+    //printf ("__PRETTY_FUNCTION__ = %s\n", __PRETTY_FUNCTION__);
+
     int mb_num_bits      = av_log2(s->mb_num - 1) + 1;
     int header_extension = 0, mb_num, len;
 
@@ -485,6 +488,69 @@ int ff_mpeg4_decode_video_packet_header(Mpeg4DecContext *ctx)
     return 0;
 }
 
+static inline int get_qscale(MpegEncContext *s)
+{
+    int qscale = get_bits(&s->gb, 5);
+    if (s->q_scale_type)
+        return ff_mpeg2_non_linear_qscale[qscale];
+    else
+        return qscale << 1;
+}
+
+static void reset_studio_dc_predictors(MpegEncContext *s)
+{
+    /* Reset DC Predictors */
+    s->studio_dc_val[0] =
+    s->studio_dc_val[1] =
+    s->studio_dc_val[2] = 1 << (s->avctx->bits_per_raw_sample + s->dct_precision + s->intra_dc_precision - 1);
+}
+
+/**
+ * Decode the next video packet.
+ * @return <0 if something went wrong
+ */
+int ff_mpeg4_decode_studio_slice_header(Mpeg4DecContext *ctx)
+{
+    MpegEncContext *s = &ctx->m;
+    int i;
+    GetBitContext *gb = &s->gb;
+
+    //printf ("__PRETTY_FUNCTION__ = %s byte_offset %i\n", __PRETTY_FUNCTION__, get_bits_count(&s->gb) / 8);
+
+    if (get_bits_long(gb, 32) == SLICE_START_CODE) {
+        uint8_t quantiser_scale_code = 0;
+
+        uint16_t mb_num = get_bits(gb, 13); // FIXME, this is a VLC
+        //printf("\n mb_num %i count %i \n", mb_num, get_bits_count(gb));
+
+        s->mb_x = mb_num % s->mb_width;
+        s->mb_y = mb_num / s->mb_width;
+
+        if (ctx->shape != BIN_ONLY_SHAPE)
+            s->qscale = get_qscale(s);
+
+        if (show_bits1(gb)) {
+            skip_bits1(gb);   /* slice_extension_flag */
+            skip_bits1(gb);   /* intra_slice */
+            skip_bits1(gb);   /* slice_VOP_id_enable */
+            skip_bits(gb, 6); /* slice_VOP_id */
+            while (show_bits1(gb)) {
+                skip_bits1(gb);   /* extra_bit_slice */
+                skip_bits(gb, 8); /* slice_VOP_id */
+            }
+        }
+        skip_bits1(gb); /* extra_bit_slice */
+
+        reset_studio_dc_predictors(s);
+    }
+    else {
+        printf("\n NO START CODE \n");
+        exit(0);
+    }
+    // FIXME error out
+    return 0;
+}
+
 /**
  * Get the average motion vector for a GMC MB.
  * @param n either 0 for the x component or 1 for y
@@ -1675,6 +1741,227 @@ end:
     return SLICE_OK;
 }
 
+static void next_start_code_studio(GetBitContext *gb)
+{
+    align_get_bits(gb);
+
+    while (get_bits_left(gb) >= 24 && show_bits_long(gb, 24) != 0x1) {
+        get_bits(gb, 8);
+        //printf("reading byte \n");
+    }
+}
+
+/* additional_code, vlc index */
+static int ac_state_tab[22][2] =
+{
+    {0, 0},
+    {0, 1},
+    {1, 1},
+    {2, 1},
+    {3, 1},
+    {4, 1},
+    {5, 1},
+    {1, 2},
+    {2, 2},
+    {3, 2},
+    {4, 2},
+    {5, 2},
+    {6, 2},
+    {1, 3},
+    {2, 4},
+    {3, 5},
+    {4, 6},
+    {5, 7},
+    {6, 8},
+    {7, 9},
+    {8, 10},
+    {0, 11}
+};
+
+static int mpeg4_decode_studio_block(MpegEncContext *s, int32_t block[64], int n)
+{
+    Mpeg4DecContext *ctx = (Mpeg4DecContext *)s;
+
+    //printf ("__PRETTY_FUNCTION__ = %s block %i\n", __PRETTY_FUNCTION__, n);
+
+    int cc, dct_dc_size, dct_diff, code, i, j;
+    VLC *cur_vlc = &ctx->studio_intra_tab[0];
+    uint8_t *const scantable = s->intra_scantable.permutated;
+    const uint16_t *quant_matrix;
+    int idx = 1;
+    uint32_t flc;
+    int mismatch;
+    const int min = -1 *  (1 << (s->avctx->bits_per_raw_sample + 6));
+    const int max =      ((1 << (s->avctx->bits_per_raw_sample + 6)) - 1);
+
+    mismatch = 1;
+
+    memset(block, 0, 64 * sizeof(int32_t));
+
+    if (n < 4) {
+        cc = 0;
+        dct_dc_size = get_vlc2(&s->gb, ctx->studio_luma_dc.table, STUDIO_INTRA_BITS, 2);
+        quant_matrix = s->intra_matrix;
+    }
+    else {
+        cc = ((n - 4) % 2) + 1; /* Table 7-30 */
+        if (ctx->rgb)
+            dct_dc_size = get_vlc2(&s->gb, ctx->studio_luma_dc.table, STUDIO_INTRA_BITS, 2);
+        else
+            dct_dc_size = get_vlc2(&s->gb, ctx->studio_chroma_dc.table, STUDIO_INTRA_BITS, 2);
+        quant_matrix = s->chroma_intra_matrix;
+    }
+
+    if (dct_dc_size == 0) {
+        dct_diff = 0;
+    } else {
+        dct_diff = get_xbits(&s->gb, dct_dc_size);
+
+        if (dct_dc_size > 8) {
+            if (get_bits1(&s->gb) == 0) { /* marker */
+                //printf("\n no marker \n");
+                exit(0);
+            }
+        }
+    }
+
+    //printf("\n %i %i mult %i \n", s->intra_dc_precision, s->dct_precision, (8 >> s->intra_dc_precision) * (8 >> s->dct_precision));
+
+    s->studio_dc_val[cc] += dct_diff;
+
+    //printf("\n dc raw %i \n", s->studio_dc_val[cc]);
+
+    if (s->mpeg_quant)
+        block[0] = s->studio_dc_val[cc] * (8 >> s->intra_dc_precision);
+    else
+        block[0] = s->studio_dc_val[cc] * (8 >> s->intra_dc_precision) * (8 >> s->dct_precision);
+    // FIXME support mpeg_quant for AC coefficients
+
+    block[0] = av_clip(block[0], min, max);
+    mismatch ^= block[0];
+
+    /* AC Coefficients */
+    int group = 0, run = 0;
+    int additional_code_len, sign;
+
+    while (1) {
+        group = get_vlc2(&s->gb, cur_vlc->table, STUDIO_INTRA_BITS, 2);
+
+        additional_code_len = ac_state_tab[group][0];
+        cur_vlc = &ctx->studio_intra_tab[ac_state_tab[group][1]];
+
+        if (group == 0) {
+            /* End of Block */
+            //printf("\n END OF BLOCK coeffs %i\n", idx);
+            break;
+        }
+        else if (group >= 1 && group <= 6) {
+            /* Zero run length (Table B.47) */
+            run = 1 << additional_code_len;
+            if (additional_code_len)
+                run += get_bits(&s->gb, additional_code_len);
+            idx += run;
+        }
+        else if (group >= 7 && group <= 12) {
+            /* Zero run length and +/-1 level (Table B.48) */
+            code = get_bits(&s->gb, additional_code_len);
+            sign = code & 1;
+            code >>= 1;
+            run = (1 << (additional_code_len - 1)) + code;
+            idx += run;
+            j = scantable[idx++];
+            block[j] = sign ? 1 : -1;
+            block[j] = ((8 * 2 * block[j] * quant_matrix[j] * s->qscale) >> s->dct_precision) / 32;
+            block[j] = av_clip(block[j], min, max);
+            mismatch ^= block[j];
+        }
+        else if (group >= 13 && group <= 20) {
+            /* Level value (Table B.49) */
+            j = scantable[idx++];
+            block[j] = get_xbits(&s->gb, additional_code_len);
+            block[j] = ((8 * 2 * block[j] * quant_matrix[j] * s->qscale) >> s->dct_precision) / 32;
+            block[j] = av_clip(block[j], min, max);
+            mismatch ^= block[j];
+        }
+        else if (group == 21) {
+            /* Escape */
+            j = scantable[idx++];
+            additional_code_len = s->avctx->bits_per_raw_sample + s->dct_precision + 4;
+            flc = get_bits(&s->gb, additional_code_len);
+            if (flc >> (additional_code_len-1))
+                block[j] = -1 * (( flc ^ ((1 << additional_code_len) -1)) + 1);
+            else
+                block[j] = flc;
+
+            block[j] = ((8 * 2 * block[j] * quant_matrix[j] * s->qscale) >> s->dct_precision) / 32;
+            block[j] = av_clip(block[j], min, max);
+            mismatch ^= block[j];
+        }
+    }
+
+    //printf("\n coeffs %i \n", idx);
+    if( idx > 64 ) {
+        printf("\n fail \n");
+        exit(0);
+    }
+
+    block[63] ^= mismatch & 1;
+#if 0
+    if( n == 4 && s->mb_x == 0 && s->mb_y == 0) {
+        printf("\n coeffs \n");
+        for( int a = 0; a < 8; a++ ) {
+            for( int b = 0; b < 8; b++ ) {
+                printf("%10i ", block[8*a+b]);
+            }
+            printf("\n");
+        }
+        printf("\n \n");
+    }
+#endif
+
+    return 0;
+}
+
+static int mpeg4_decode_studio_mb(MpegEncContext *s, int16_t block_[12][64])
+{
+    int i;
+    //printf ("__PRETTY_FUNCTION__ = %s\n", __PRETTY_FUNCTION__);
+    //printf("\n mb count %i \n", get_bits_count(&s->gb));
+
+    /* StudioMacroblock */
+    /* Assumes I-VOP */
+    s->mb_intra = 1;
+    if (get_bits1(&s->gb)) { /* compression_mode */
+        /* DCT */
+        /* macroblock_type, 1 or 2-bit VLC */
+        if (!get_bits1(&s->gb)) {
+            skip_bits1(&s->gb);
+            s->qscale = get_qscale(s);
+            //printf("\n new qscale \n");
+        }
+
+        for (i = 0; i < mpeg4_block_count[s->chroma_format]; i++) {
+            mpeg4_decode_studio_block(s, s->block2[i], i);
+        }
+        //printf("\n done %i blocks pos %i \n", mpeg4_block_count[s->chroma_format], get_bits_count(&s->gb));
+    } else {
+        /* DPCM */
+        printf("\n dpcm \n");
+        check_marker(s->avctx, &s->gb, "DPCM block start");
+
+
+        exit(0);
+    }
+
+    if (show_bits(&s->gb, 23) == 0) {
+        next_start_code_studio(&s->gb);
+        //printf("\n end of slice \n");
+        return SLICE_END;
+    }
+
+    return SLICE_OK;
+}
+
 static int mpeg4_decode_gop_header(MpegEncContext *s, GetBitContext *gb)
 {
     int hours, minutes, seconds;
@@ -2516,6 +2803,246 @@ static int decode_vop_header(Mpeg4DecContext *ctx, GetBitContext *gb)
     return 0;
 }
 
+static void read_quant_matrix_ext(MpegEncContext *s, GetBitContext *gb)
+{
+    int i, j, v;
+
+    if (get_bits1(gb)) {
+        //printf("\n intra quant \n");
+        /* intra_quantiser_matrix */
+        for (i = 0; i < 64; i++) {
+            v = get_bits(gb, 8);
+            j = s->idsp.idct_permutation[ff_zigzag_direct[i]];
+            s->intra_matrix[j]        = v;
+            s->chroma_intra_matrix[j] = v;
+        }
+    }
+
+    if (get_bits1(gb)) {
+        //printf("\n non intra quant \n");
+        /* non_intra_quantiser_matrix */
+        for (i = 0; i < 64; i++) {
+            get_bits(gb, 8);
+        }
+    }
+
+    if (get_bits1(gb)) {
+        //printf("\n chroma intra quant \n");
+        /* chroma_intra_quantiser_matrix */
+        for (i = 0; i < 64; i++) {
+            v = get_bits(gb, 8);
+            j = s->idsp.idct_permutation[ff_zigzag_direct[i]];
+            s->chroma_intra_matrix[j] = v;
+        }
+    }
+
+    if (get_bits1(gb)) {
+        //printf("\n chroma non intra quant \n");
+        /* chroma_non_intra_quantiser_matrix */
+        for (i = 0; i < 64; i++) {
+            get_bits(gb, 8);
+        }
+    }
+
+    next_start_code_studio(gb);
+}
+
+static void extension_and_user_data(MpegEncContext *s, GetBitContext *gb, int id)
+{
+    uint32_t startcode;
+
+    startcode = show_bits_long(gb, 32);
+    if (startcode == USER_DATA_STARTCODE || startcode == EXT_STARTCODE) {
+        //printf("\n extension or user data stuff id %i \n", id);
+
+        if ((id == 2 || id == 4) && startcode == EXT_STARTCODE) {
+            skip_bits_long(gb, 32);
+            uint8_t type = get_bits(gb, 4);
+            if (type == QUANT_MATRIX_EXT_ID) {
+                read_quant_matrix_ext(s, gb);
+            }
+            else {
+                //printf("\n unknown type %i \n", type );
+                exit(0);
+            }
+        }
+    }
+}
+
+static int decode_smpte_tc(Mpeg4DecContext *ctx, GetBitContext *gb)
+{
+    MpegEncContext *s = &ctx->m;
+
+    skip_bits(gb, 16); /* Time_code[63..48] */
+    check_marker(s->avctx, gb, "after Time_code[63..48]");
+    skip_bits(gb, 16); /* Time_code[47..32] */
+    check_marker(s->avctx, gb, "after Time_code[47..32]");
+    skip_bits(gb, 16); /* Time_code[31..16] */
+    check_marker(s->avctx, gb, "after Time_code[31..16]");
+    skip_bits(gb, 16); /* Time_code[15..0] */
+    check_marker(s->avctx, gb, "after Time_code[15..0]");
+    skip_bits(gb, 4); /* reserved_bits */
+
+    return 0;
+}
+
+/**
+ * Decode the next studio vop header.
+ * @return <0 if something went wrong
+ */
+static int decode_studio_vop_header(Mpeg4DecContext *ctx, GetBitContext *gb)
+{
+    MpegEncContext *s = &ctx->m;
+    int i, v;
+
+    //printf ("__PRETTY_FUNCTION__ = %s\n", __PRETTY_FUNCTION__);
+
+    if (get_bits_left(gb) <= 32)
+        return 0;
+
+    if (get_bits_long(gb, 32) != VOP_STARTCODE)
+        return -1;
+
+    s->decode_mb = mpeg4_decode_studio_mb;
+
+    if (decode_smpte_tc(ctx, gb) < 0)
+        return -1;
+
+    skip_bits(gb, 10); /* temporal_reference */
+    skip_bits(gb, 2); /* vop_structure */
+    s->pict_type = get_bits(gb, 2) + AV_PICTURE_TYPE_I; /* vop_coding_type */
+    if (get_bits1(gb)) { /* vop_coded */
+        skip_bits1(gb); /* top_field_first */
+        skip_bits1(gb); /* repeat_first_field */
+        skip_bits1(gb); /* progressive_frame FIXME */
+    }
+
+    if (s->pict_type == AV_PICTURE_TYPE_I) {
+        if (get_bits1(gb))
+            reset_studio_dc_predictors(s);
+    }
+
+    if (ctx->shape != BIN_ONLY_SHAPE) {
+        s->alternate_scan = get_bits1(gb);
+        s->frame_pred_frame_dct = get_bits1(gb);
+        s->dct_precision = get_bits(gb, 2);
+        s->intra_dc_precision = get_bits(gb, 2);
+        s->q_scale_type = get_bits1(gb);
+    }
+
+    if (s->alternate_scan) {
+        ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable,   ff_alternate_vertical_scan);
+        ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable,   ff_alternate_vertical_scan);
+        ff_init_scantable(s->idsp.idct_permutation, &s->intra_h_scantable, ff_alternate_vertical_scan);
+        ff_init_scantable(s->idsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
+    } else {
+        ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable,   ff_zigzag_direct);
+        ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable,   ff_zigzag_direct);
+        ff_init_scantable(s->idsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
+        ff_init_scantable(s->idsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
+    }
+
+    /* load default matrixes */
+    for (i = 0; i < 64; i++) {
+        int j = s->idsp.idct_permutation[i];
+        v = ff_mpeg4_default_intra_matrix[i];
+        s->intra_matrix[j]        = v;
+        s->chroma_intra_matrix[j] = v;
+
+        v = ff_mpeg4_default_non_intra_matrix[i];
+        s->inter_matrix[j]        = v;
+        s->chroma_inter_matrix[j] = v;
+    }
+
+    next_start_code_studio(gb);
+    extension_and_user_data(s, gb, 4);
+
+    return 0;
+}
+
+static void decode_studiovisualobject(Mpeg4DecContext *ctx, GetBitContext *gb)
+{
+    uint32_t startcode;
+    MpegEncContext *s = &ctx->m;
+    int i, width, height;
+
+    startcode = get_bits_long(gb, 32);
+
+    /* StudioVisualObject() */
+    if (startcode == VISUAL_OBJ_STARTCODE) {
+        skip_bits(gb, 4); /* visual_object_verid */
+        uint8_t vot = get_bits(gb, 4); /* visual_object_type */
+
+        next_start_code_studio(gb);
+        extension_and_user_data(s, gb, 1);
+
+        if (vot == VOT_VIDEO_ID) {
+            /* StudioVideoObjectLayer */
+            skip_bits_long(gb, 32); /* video_object_start_code */
+            skip_bits_long(gb, 32); /* video_object_layer_start_code */
+            skip_bits1(gb); /* random_accessible_vol */
+            skip_bits(gb, 8); /* video_object_type_indication */
+            skip_bits(gb, 4); /* video_object_layer_verid */
+            ctx->shape = get_bits(gb, 2); /* video_object_layer_shape */
+            skip_bits(gb, 4); /* video_object_layer_shape_extension */
+            skip_bits1(gb); /* progressive_sequence */
+            if (ctx->shape != BIN_ONLY_SHAPE) {
+                ctx->rgb = get_bits1(gb); /* rgb_components */
+                s->chroma_format = get_bits(gb, 2); /* chroma_format */
+                s->avctx->bits_per_raw_sample = get_bits(gb, 4); /* bit_depth */
+                if (s->avctx->bits_per_raw_sample == 10) {
+                    if (ctx->rgb) {
+                        s->avctx->pix_fmt = AV_PIX_FMT_GBRP10;
+                    }
+                    else {
+                        s->avctx->pix_fmt = s->chroma_format == CHROMA_422 ? AV_PIX_FMT_YUV422P10 : AV_PIX_FMT_YUV444P10;
+                    }
+                }
+            }
+            if (ctx->shape == RECT_SHAPE) {
+                check_marker(s->avctx, gb, "before video_object_layer_width");
+                width = get_bits(gb, 14); /* video_object_layer_width */
+                check_marker(s->avctx, gb, "before video_object_layer_height");
+                height = get_bits(gb, 14); /* video_object_layer_height */
+                check_marker(s->avctx, gb, "after video_object_layer_height");
+
+                /* Do the same check as non-studio profile */
+                if (width && height) {
+                    if (s->width && s->height &&
+                        (s->width != width || s->height != height))
+                        s->context_reinit = 1;
+                    s->width  = 1920;
+                    s->height = 1088;
+                }
+            }
+            s->aspect_ratio_info = get_bits(gb, 4);
+            if (s->aspect_ratio_info == FF_ASPECT_EXTENDED) {
+                s->avctx->sample_aspect_ratio.num = get_bits(gb, 8);  // par_width
+                s->avctx->sample_aspect_ratio.den = get_bits(gb, 8);  // par_height
+            } else {
+                s->avctx->sample_aspect_ratio = ff_h263_pixel_aspect[s->aspect_ratio_info];
+            }
+            skip_bits(gb, 4); /* frame_rate_code */
+            skip_bits(gb, 15); /* first_half_bit_rate */
+            check_marker(s->avctx, gb, "after first_half_bit_rate");
+            skip_bits(gb, 15); /* latter_half_bit_rate */
+            check_marker(s->avctx, gb, "after latter_half_bit_rate");
+            skip_bits(gb, 15); /* first_half_vbv_buffer_size */
+            check_marker(s->avctx, gb, "after first_half_vbv_buffer_size");
+            skip_bits(gb, 3); /* latter_half_vbv_buffer_size */
+            skip_bits(gb, 11); /* first_half_vbv_buffer_size */
+            check_marker(s->avctx, gb, "after first_half_vbv_buffer_size");
+            skip_bits(gb, 15); /* latter_half_vbv_occupancy */
+            check_marker(s->avctx, gb, "after latter_half_vbv_occupancy");
+            s->low_delay = get_bits1(gb);
+            s->mpeg_quant = get_bits1(gb); /* mpeg2_stream */
+
+            next_start_code_studio(gb);
+            extension_and_user_data(s, gb, 2);
+        }
+    }
+}
+
 /**
  * Decode MPEG-4 headers.
  * @return <0 if no VOP found (or a damaged one)
@@ -2623,6 +3150,15 @@ int ff_mpeg4_decode_picture_header(Mpeg4DecContext *ctx, GetBitContext *gb)
             mpeg4_decode_gop_header(s, gb);
         } else if (startcode == VOS_STARTCODE) {
             mpeg4_decode_profile_level(s, gb);
+            if (s->avctx->profile == FF_PROFILE_MPEG4_SIMPLE_STUDIO &&
+                (s->avctx->level > 0 && s->avctx->level < 9)) {
+                s->studio_profile = 1;
+                next_start_code_studio(gb);
+                extension_and_user_data(s, gb, 0);
+
+                decode_studiovisualobject(ctx, gb);
+                break;
+            }
         } else if (startcode == VOP_STARTCODE) {
             break;
         }
@@ -2636,7 +3172,10 @@ end:
         s->low_delay = 1;
     s->avctx->has_b_frames = !s->low_delay;
 
-    return decode_vop_header(ctx, gb);
+    if (s->studio_profile)
+        return decode_studio_vop_header(ctx, gb);
+    else
+        return decode_vop_header(ctx, gb);
 }
 
 av_cold void ff_mpeg4videodec_static_init(void) {
@@ -2736,6 +3275,37 @@ static int mpeg4_update_thread_context(AVCodecContext *dst,
 }
 #endif
 
+static av_cold int init_studio_vlcs(Mpeg4DecContext *ctx)
+{
+    int i, ret;
+
+    for (i = 0; i < 12; i++) {
+        ret = init_vlc(&ctx->studio_intra_tab[i], STUDIO_INTRA_BITS, 22,
+                       &ff_mpeg4_studio_intra[i][0][1], 4, 2,
+                       &ff_mpeg4_studio_intra[i][0][0], 4, 2,
+                       0);
+
+        if (ret < 0)
+            return ret;
+    }
+
+    ret = init_vlc(&ctx->studio_luma_dc, STUDIO_INTRA_BITS, 19,
+                   &ff_mpeg4_studio_dc_luma[0][1], 4, 2,
+                   &ff_mpeg4_studio_dc_luma[0][0], 4, 2,
+                   0);
+    if (ret < 0)
+        return ret;
+
+    ret = init_vlc(&ctx->studio_chroma_dc, STUDIO_INTRA_BITS, 19,
+                   &ff_mpeg4_studio_dc_chroma[0][1], 4, 2,
+                   &ff_mpeg4_studio_dc_chroma[0][0], 4, 2,
+                   0);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
 static av_cold int decode_init(AVCodecContext *avctx)
 {
     Mpeg4DecContext *ctx = avctx->priv_data;
@@ -2751,10 +3321,13 @@ static av_cold int decode_init(AVCodecContext *avctx)
         return ret;
 
     ff_mpeg4videodec_static_init();
+    if ((ret = init_studio_vlcs(ctx)) < 0)
+        return ret;
 
     s->h263_pred = 1;
     s->low_delay = 0; /* default, might be overridden in the vol header during header parsing */
     s->decode_mb = mpeg4_decode_mb;
+    s->avctx->bits_per_raw_sample = 10;
     ctx->time_increment_bits = 4; /* default value for broken headers */
 
     avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
@@ -2776,6 +3349,12 @@ static const AVClass mpeg4_class = {
     LIBAVUTIL_VERSION_INT,
 };
 
+const enum AVPixelFormat mpeg4_pix_fmts[] = {
+    AV_PIX_FMT_YUV422P10,
+    AV_PIX_FMT_GBRP10,
+    AV_PIX_FMT_NONE
+};
+
 AVCodec ff_mpeg4_decoder = {
     .name                  = "mpeg4",
     .long_name             = NULL_IF_CONFIG_SMALL("MPEG-4 part 2"),
@@ -2790,7 +3369,7 @@ AVCodec ff_mpeg4_decoder = {
                              AV_CODEC_CAP_FRAME_THREADS,
     .flush                 = ff_mpeg_flush,
     .max_lowres            = 3,
-    .pix_fmts              = ff_h263_hwaccel_pixfmt_list_420,
+    .pix_fmts              = mpeg4_pix_fmts,
     .profiles              = NULL_IF_CONFIG_SMALL(ff_mpeg4_video_profiles),
     .update_thread_context = ONLY_IF_THREADS_ENABLED(mpeg4_update_thread_context),
     .priv_class = &mpeg4_class,
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index e5424cb..b21e44f 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -2660,7 +2660,7 @@ void mpv_decode_mb_internal(MpegEncContext *s, int16_t block[12][64],
             }
         } else {
             /* dct only in intra block */
-            if(s->encoding || !(s->codec_id==AV_CODEC_ID_MPEG1VIDEO || s->codec_id==AV_CODEC_ID_MPEG2VIDEO)){
+            if(0){
                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
@@ -2680,29 +2680,31 @@ void mpv_decode_mb_internal(MpegEncContext *s, int16_t block[12][64],
                     }
                 }
             }else{
-                s->idsp.idct_put(dest_y,                           dct_linesize, block[0]);
-                s->idsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
-                s->idsp.idct_put(dest_y + dct_offset,              dct_linesize, block[2]);
-                s->idsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
+                // KK
+                const int act_block_size = block_size * 2;
+                s->idsp.idct_put(dest_y,                           dct_linesize, s->block2[0]);
+                s->idsp.idct_put(dest_y              + act_block_size, dct_linesize, s->block2[1]);
+                s->idsp.idct_put(dest_y + dct_offset,              dct_linesize, s->block2[2]);
+                s->idsp.idct_put(dest_y + dct_offset + act_block_size, dct_linesize, s->block2[3]);
 
                 if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
                     if(s->chroma_y_shift){
-                        s->idsp.idct_put(dest_cb, uvlinesize, block[4]);
-                        s->idsp.idct_put(dest_cr, uvlinesize, block[5]);
+                        s->idsp.idct_put(dest_cb, uvlinesize, s->block2[4]);
+                        s->idsp.idct_put(dest_cr, uvlinesize, s->block2[5]);
                     }else{
 
                         dct_linesize = uvlinesize << s->interlaced_dct;
                         dct_offset   = s->interlaced_dct ? uvlinesize : uvlinesize*block_size;
 
-                        s->idsp.idct_put(dest_cb,              dct_linesize, block[4]);
-                        s->idsp.idct_put(dest_cr,              dct_linesize, block[5]);
-                        s->idsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
-                        s->idsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
+                        s->idsp.idct_put(dest_cb,              dct_linesize, s->block2[4]);
+                        s->idsp.idct_put(dest_cr,              dct_linesize, s->block2[5]);
+                        s->idsp.idct_put(dest_cb + dct_offset, dct_linesize, s->block2[6]);
+                        s->idsp.idct_put(dest_cr + dct_offset, dct_linesize, s->block2[7]);
                         if(!s->chroma_x_shift){//Chroma444
-                            s->idsp.idct_put(dest_cb + block_size,              dct_linesize, block[8]);
-                            s->idsp.idct_put(dest_cr + block_size,              dct_linesize, block[9]);
-                            s->idsp.idct_put(dest_cb + block_size + dct_offset, dct_linesize, block[10]);
-                            s->idsp.idct_put(dest_cr + block_size + dct_offset, dct_linesize, block[11]);
+                            s->idsp.idct_put(dest_cb + act_block_size,              dct_linesize, s->block2[8]);
+                            s->idsp.idct_put(dest_cr + act_block_size,              dct_linesize, s->block2[9]);
+                            s->idsp.idct_put(dest_cb + act_block_size + dct_offset, dct_linesize, s->block2[10]);
+                            s->idsp.idct_put(dest_cr + act_block_size + dct_offset, dct_linesize, s->block2[11]);
                         }
                     }
                 }//gray
@@ -2741,7 +2743,8 @@ void ff_mpeg_draw_horiz_band(MpegEncContext *s, int y, int h)
 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
     const int linesize   = s->current_picture.f->linesize[0]; //not s->linesize as this would be wrong for field pics
     const int uvlinesize = s->current_picture.f->linesize[1];
-    const int mb_size= 4 - s->avctx->lowres;
+    const int mb_width= 5 - s->avctx->lowres;
+    const int mb_height= 4 - s->avctx->lowres;
 
     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
@@ -2751,20 +2754,20 @@ void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
     //block_index is not used by mpeg2, so it is not affected by chroma_format
 
-    s->dest[0] = s->current_picture.f->data[0] + (int)((s->mb_x - 1U) <<  mb_size);
-    s->dest[1] = s->current_picture.f->data[1] + (int)((s->mb_x - 1U) << (mb_size - s->chroma_x_shift));
-    s->dest[2] = s->current_picture.f->data[2] + (int)((s->mb_x - 1U) << (mb_size - s->chroma_x_shift));
+    s->dest[0] = s->current_picture.f->data[0] + (int)((s->mb_x) <<  mb_width);
+    s->dest[1] = s->current_picture.f->data[1] + (int)((s->mb_x) << (mb_width - s->chroma_x_shift));
+    s->dest[2] = s->current_picture.f->data[2] + (int)((s->mb_x) << (mb_width - s->chroma_x_shift));
 
     if(!(s->pict_type==AV_PICTURE_TYPE_B && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
     {
         if(s->picture_structure==PICT_FRAME){
-        s->dest[0] += s->mb_y *   linesize << mb_size;
-        s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
-        s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
+        s->dest[0] += s->mb_y *   linesize << mb_height;
+        s->dest[1] += s->mb_y * uvlinesize << (mb_height - s->chroma_y_shift);
+        s->dest[2] += s->mb_y * uvlinesize << (mb_height - s->chroma_y_shift);
         }else{
-            s->dest[0] += (s->mb_y>>1) *   linesize << mb_size;
-            s->dest[1] += (s->mb_y>>1) * uvlinesize << (mb_size - s->chroma_y_shift);
-            s->dest[2] += (s->mb_y>>1) * uvlinesize << (mb_size - s->chroma_y_shift);
+            s->dest[0] += (s->mb_y>>1) *   linesize << mb_height;
+            s->dest[1] += (s->mb_y>>1) * uvlinesize << (mb_height - s->chroma_y_shift);
+            s->dest[2] += (s->mb_y>>1) * uvlinesize << (mb_height - s->chroma_y_shift);
             av_assert1((s->mb_y&1) == (s->picture_structure == PICT_BOTTOM_FIELD));
         }
     }
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index c82fa3e..0bef733 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -69,8 +69,10 @@
 #define PICTURE_START_CODE      0x00000100
 #define SLICE_MIN_START_CODE    0x00000101
 #define SLICE_MAX_START_CODE    0x000001af
-#define EXT_START_CODE          0x000001b5
 #define USER_START_CODE         0x000001b2
+#define EXT_START_CODE          0x000001b5
+#define SLICE_START_CODE        0x000001b7
+
 
 /**
  * MpegEncContext.
@@ -381,6 +383,9 @@ typedef struct MpegEncContext {
     int custom_pcf;
 
     /* MPEG-4 specific */
+    int studio_profile;
+    int32_t studio_dc_val[3];
+    int dct_precision;
     ///< number of bits to represent the fractional part of time (encoder only)
     int time_increment_bits;
     int last_time_base;
@@ -496,7 +501,10 @@ typedef struct MpegEncContext {
 
     int16_t (*block)[64]; ///< points to one of the following blocks
     int16_t (*blocks)[12][64]; // for HQ mode we need to keep the best block
-    int (*decode_mb)(struct MpegEncContext *s, int16_t block[6][64]); // used by some codecs to avoid a switch()
+
+    int32_t block2[12][64];
+
+    int (*decode_mb)(struct MpegEncContext *s, int16_t block[12][64]); // used by some codecs to avoid a switch()
 #define SLICE_OK         0
 #define SLICE_ERROR     -1
 #define SLICE_END       -2 ///<end marker found
diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c
index 0711e16..36e9576 100644
--- a/libavcodec/simple_idct.c
+++ b/libavcodec/simple_idct.c
@@ -58,6 +58,157 @@
    and the butterfly must be multiplied by 0.5 * sqrt(2.0) */
 #define C_SHIFT (4+1+12)
 
+// STEINAR IDCT
+#define DCTSIZE 8
+#define DCTSIZE2 64
+
+// Scale factors; 1.0 / (sqrt(2.0) * cos(k * M_PI / 16.0)), except for the first which is 1.
+static const double scalefac[] = {
+    1.0, 0.7209598220069479, 0.765366864730180, 0.8504300947672564,
+    1.0, 1.2727585805728336, 1.847759065022573, 3.6245097854115502
+};
+
+// 1D 8-point DCT.
+static inline void idct1d_float(double y0, double y1, double y2, double y3, double y4, double y5, double y6, double y7, double *x)
+{
+    // constants
+    static const double a1 = 0.7071067811865474;   // sqrt(2)
+    static const double a2 = 0.5411961001461971;   // cos(3/8 pi) * sqrt(2)
+    // static const double a3 = a1;
+    static const double a3 = 0.7071067811865474;
+    static const double a4 = 1.3065629648763766;   // cos(pi/8) * sqrt(2)
+    // static const double a5 = 0.5 * (a4 - a2);
+    static const double a5 = 0.3826834323650897;
+
+    // phase 1
+    const double p1_0 = y0;
+    const double p1_1 = y4;
+    const double p1_2 = y2;
+    const double p1_3 = y6;
+    const double p1_4 = y5;
+    const double p1_5 = y1;
+    const double p1_6 = y7;
+    const double p1_7 = y3;
+
+    // phase 2
+    const double p2_0 = p1_0;
+    const double p2_1 = p1_1;
+    const double p2_2 = p1_2;
+    const double p2_3 = p1_3;
+    const double p2_4 = p1_4 - p1_7;
+    const double p2_5 = p1_5 + p1_6;
+    const double p2_6 = p1_5 - p1_6;
+    const double p2_7 = p1_4 + p1_7;
+
+    // phase 3
+    const double p3_0 = p2_0;
+    const double p3_1 = p2_1;
+    const double p3_2 = p2_2 - p2_3;
+    const double p3_3 = p2_2 + p2_3;
+    const double p3_4 = p2_4;
+    const double p3_5 = p2_5 - p2_7;
+    const double p3_6 = p2_6;
+    const double p3_7 = p2_5 + p2_7;
+
+    // phase 4
+    const double p4_0 = p3_0;
+    const double p4_1 = p3_1;
+    const double p4_2 = a1 * p3_2;
+    const double p4_3 = p3_3;
+    const double p4_4 = p3_4 * -a2 + (p3_4 + p3_6) * -a5;
+    const double p4_5 = a3 * p3_5;
+    const double p4_6 = p3_6 * a4 + (p3_4 + p3_6) * -a5;
+    const double p4_7 = p3_7;
+
+    // phase 5
+    const double p5_0 = p4_0 + p4_1;
+    const double p5_1 = p4_0 - p4_1;
+    const double p5_2 = p4_2;
+    const double p5_3 = p4_2 + p4_3;
+    const double p5_4 = p4_4;
+    const double p5_5 = p4_5;
+    const double p5_6 = p4_6;
+    const double p5_7 = p4_7;
+
+    // phase 6
+    const double p6_0 = p5_0 + p5_3;
+    const double p6_1 = p5_1 + p5_2;
+    const double p6_2 = p5_1 - p5_2;
+    const double p6_3 = p5_0 - p5_3;
+    const double p6_4 = -p5_4;
+    const double p6_5 = p5_5 - p5_4;
+    const double p6_6 = p5_5 + p5_6;
+    const double p6_7 = p5_6 + p5_7;
+
+    // phase 7
+    x[0] = p6_0 + p6_7;
+    x[1] = p6_1 + p6_6;
+    x[2] = p6_2 + p6_5;
+    x[3] = p6_3 + p6_4;
+    x[4] = p6_3 - p6_4;
+    x[5] = p6_2 - p6_5;
+    x[6] = p6_1 - p6_6;
+    x[7] = p6_0 - p6_7;
+}
+
+void ff_idct_float(uint8_t *dest_, int line_size, int16_t *input_)
+{
+    double temp[DCTSIZE2];
+    double quant_table[DCTSIZE2];
+    uint16_t *dest = (uint16_t *)dest_;
+    int32_t *input = (int32_t *)input_;
+
+    for (unsigned y = 0; y < DCTSIZE; ++y) {
+        for (unsigned x = 0; x < DCTSIZE; ++x) {
+            quant_table[y * DCTSIZE + x] = (1.0/DCTSIZE) * scalefac[x] * scalefac[y];
+        }
+    }
+
+    // IDCT columns.
+    for (unsigned x = 0; x < DCTSIZE; ++x) {
+        idct1d_float(input[DCTSIZE * 0 + x] * quant_table[DCTSIZE * 0 + x],
+                     input[DCTSIZE * 1 + x] * quant_table[DCTSIZE * 1 + x],
+                     input[DCTSIZE * 2 + x] * quant_table[DCTSIZE * 2 + x],
+                     input[DCTSIZE * 3 + x] * quant_table[DCTSIZE * 3 + x],
+                     input[DCTSIZE * 4 + x] * quant_table[DCTSIZE * 4 + x],
+                     input[DCTSIZE * 5 + x] * quant_table[DCTSIZE * 5 + x],
+                     input[DCTSIZE * 6 + x] * quant_table[DCTSIZE * 6 + x],
+                     input[DCTSIZE * 7 + x] * quant_table[DCTSIZE * 7 + x],
+                     temp + x * DCTSIZE);
+    }
+
+    //printf("\n post idct \n");
+    // IDCT rows.
+    for (unsigned y = 0; y < DCTSIZE; ++y) {
+        double temp2[DCTSIZE];
+        idct1d_float(temp[DCTSIZE * 0 + y],
+                     temp[DCTSIZE * 1 + y],
+                     temp[DCTSIZE * 2 + y],
+                     temp[DCTSIZE * 3 + y],
+                     temp[DCTSIZE * 4 + y],
+                     temp[DCTSIZE * 5 + y],
+                     temp[DCTSIZE * 6 + y],
+                     temp[DCTSIZE * 7 + y],
+                     temp2);
+
+        for (unsigned x = 0; x < DCTSIZE; ++x) {
+            const double val = temp2[x] / 8;
+            if( val > 1023)
+                dest[x] = 1023;
+            else if( val < 0)
+                dest[x] = 0;
+            else
+                dest[x] = val;
+            //printf("%10f ", val);
+
+        }
+        dest += line_size / 2;
+        //printf("\n");
+    }
+    //printf("\n \n");
+}
+// END OF STEINAR CODE
+
 static inline void idct4col_put(uint8_t *dest, int line_size, const int16_t *col)
 {
     int c0, c1, c2, c3, a0, a1, a2, a3;
diff --git a/libavcodec/simple_idct.h b/libavcodec/simple_idct.h
index 154e297..c10fe83 100644
--- a/libavcodec/simple_idct.h
+++ b/libavcodec/simple_idct.h
@@ -42,6 +42,8 @@ void ff_simple_idct_put_12(uint8_t *dest, int line_size, int16_t *block);
 void ff_simple_idct_add_12(uint8_t *dest, int line_size, int16_t *block);
 void ff_simple_idct_12(int16_t *block);
 
+void ff_idct_float(uint8_t *dest_, int line_size, int16_t *input_);
+
 /**
  * Special version of ff_simple_idct_10() which does dequantization
  * and scales by a factor of 2 more between the two IDCTs to account
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Reply via email to