From e9fe8038e2efe12fde304bbb7de514bcfe223c8a Mon Sep 17 00:00:00 2001
From: Stanislav Dolganov <dolganov@qst.hk>
Date: Thu, 18 Aug 2016 14:36:58 +0300
Subject: [PATCH 2/4] FFV1 p frames

---
 libavcodec/Makefile          |   4 +-
 libavcodec/ffv1.c            |  33 +++-
 libavcodec/ffv1.h            |  14 +-
 libavcodec/ffv1dec.c         | 354 +++++++++++++++++++++++++++++++++++++++-
 libavcodec/ffv1enc.c         | 372 ++++++++++++++++++++++++++++++++++++++++++-
 libavcodec/x86/me_cmp_init.c |   4 +-
 6 files changed, 768 insertions(+), 13 deletions(-)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index f24cd81..60f83cd 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -278,8 +278,8 @@ OBJS-$(CONFIG_ESCAPE124_DECODER)       += escape124.o
 OBJS-$(CONFIG_ESCAPE130_DECODER)       += escape130.o
 OBJS-$(CONFIG_EVRC_DECODER)            += evrcdec.o acelp_vectors.o lsp.o
 OBJS-$(CONFIG_EXR_DECODER)             += exr.o
-OBJS-$(CONFIG_FFV1_DECODER)            += ffv1dec.o ffv1.o
-OBJS-$(CONFIG_FFV1_ENCODER)            += ffv1enc.o ffv1.o
+OBJS-$(CONFIG_FFV1_DECODER)            += ffv1dec.o ffv1.o obmemc.o obmc.o
+OBJS-$(CONFIG_FFV1_ENCODER)            += ffv1enc.o ffv1.o obmemc.o obme.o
 OBJS-$(CONFIG_FFWAVESYNTH_DECODER)     += ffwavesynth.o
 OBJS-$(CONFIG_FIC_DECODER)             += fic.o
 OBJS-$(CONFIG_FLAC_DECODER)            += flacdec.o flacdata.o flac.o
diff --git a/libavcodec/ffv1.c b/libavcodec/ffv1.c
index a14dd2a..f070a61 100644
--- a/libavcodec/ffv1.c
+++ b/libavcodec/ffv1.c
@@ -36,8 +36,11 @@
 #include "avcodec.h"
 #include "internal.h"
 #include "rangecoder.h"
+#include "golomb.h"
 #include "mathops.h"
 #include "ffv1.h"
+#include "me_cmp.h"
+#include "h263.h"
 
 av_cold int ff_ffv1_common_init(AVCodecContext *avctx)
 {
@@ -49,19 +52,36 @@ av_cold int ff_ffv1_common_init(AVCodecContext *avctx)
     s->avctx = avctx;
     s->flags = avctx->flags;
 
+    int width, height, ret;
+
+    width = avctx->width;
+    height = avctx->height;
+
+    /* new end */
     s->picture.f = av_frame_alloc();
     s->last_picture.f = av_frame_alloc();
-    if (!s->picture.f || !s->last_picture.f)
-        return AVERROR(ENOMEM);
+    s->residual.f = av_frame_alloc();
+    if (!s->picture.f || !s->last_picture.f || !s->residual.f)
+        goto fail;
 
     s->width  = avctx->width;
     s->height = avctx->height;
 
+    s->c_image_line_buf = av_mallocz_array(sizeof(*s->c_image_line_buf), 2 * s->width);
+    s->p_image_line_buf = av_mallocz_array(sizeof(*s->p_image_line_buf), 2 * s->width);
+    if (!s->c_image_line_buf || !s->p_image_line_buf)
+        goto fail;
+
     // defaults
     s->num_h_slices = 1;
     s->num_v_slices = 1;
 
+    if ((ret = ff_obmc_common_init(&s->obmc, avctx)) < 0)
+        return ret;
+
     return 0;
+fail:
+    return AVERROR(ENOMEM);
 }
 
 av_cold int ff_ffv1_init_slice_state(FFV1Context *f, FFV1Context *fs)
@@ -220,6 +240,10 @@ av_cold int ff_ffv1_close(AVCodecContext *avctx)
         ff_thread_release_buffer(avctx, &s->last_picture);
     av_frame_free(&s->last_picture.f);
 
+    if (s->residual.f)
+        ff_thread_release_buffer(avctx, &s->residual);
+    av_frame_free(&s->residual.f);
+
     for (j = 0; j < s->max_slice_count; j++) {
         FFV1Context *fs = s->slice_context[j];
         for (i = 0; i < s->plane_count; i++) {
@@ -232,6 +256,9 @@ av_cold int ff_ffv1_close(AVCodecContext *avctx)
         av_freep(&fs->sample_buffer32);
     }
 
+    av_freep(&s->p_image_line_buf);
+    av_freep(&s->c_image_line_buf);
+
     av_freep(&avctx->stats_out);
     for (j = 0; j < s->quant_table_count; j++) {
         av_freep(&s->initial_states[j]);
@@ -245,5 +272,7 @@ av_cold int ff_ffv1_close(AVCodecContext *avctx)
     for (i = 0; i < s->max_slice_count; i++)
         av_freep(&s->slice_context[i]);
 
+    ff_obmc_close(&s->obmc);
+
     return 0;
 }
diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
index c2bae1e..0a901f0 100644
--- a/libavcodec/ffv1.h
+++ b/libavcodec/ffv1.h
@@ -42,6 +42,11 @@
 #include "rangecoder.h"
 #include "thread.h"
 
+#define FF_MPV_OFFSET(x) (offsetof(MpegEncContext, x) + offsetof(FFV1Context, obmc.m))
+#include "obmemc.h"
+
+#define MID_STATE 128
+
 #ifdef __INTEL_COMPILER
 #undef av_flatten
 #define av_flatten
@@ -49,6 +54,7 @@
 
 #define MAX_PLANES 4
 #define CONTEXT_SIZE 32
+#define FRAC_BITS 4
 
 #define MAX_QUANT_TABLES 8
 #define MAX_CONTEXT_INPUTS 5
@@ -93,7 +99,7 @@ typedef struct FFV1Context {
     int flags;
     int picture_number;
     int key_frame;
-    ThreadFrame picture, last_picture;
+    ThreadFrame picture, last_picture, residual;
     struct FFV1Context *fsrc;
 
     AVFrame *cur;
@@ -113,11 +119,14 @@ typedef struct FFV1Context {
 
     int use32bit;
 
+    uint16_t *p_image_line_buf, *c_image_line_buf;
+
     int ec;
     int intra;
     int slice_damaged;
     int key_frame_ok;
     int context_model;
+    int p_frame;
 
     int bits_per_raw_sample;
     int packed_at_lsb;
@@ -138,6 +147,9 @@ typedef struct FFV1Context {
     int slice_coding_mode;
     int slice_rct_by_coef;
     int slice_rct_ry_coef;
+
+    OBMCContext obmc;
+    uint8_t block_state[128 + 32*128];
 } FFV1Context;
 
 int ff_ffv1_common_init(AVCodecContext *avctx);
diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index d8f35c3..7913baa 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -39,6 +39,75 @@
 #include "mathops.h"
 #include "ffv1.h"
 
+#include "obmc.h"
+
+static int ff_predict_frame(AVCodecContext *avctx, FFV1Context *f)
+{
+    int ret, i, x, y;
+    AVFrame *curr     = f->picture.f;
+    AVFrame *prev     = f->obmc.current_picture;
+    AVFrame *residual = f->residual.f;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(prev->format);
+    int width  = f->width;
+    int height = f->height;
+    int has_plane[4] = { 0 };
+    const int cw = AV_CEIL_RSHIFT(width, desc->log2_chroma_w);
+    const int ch = AV_CEIL_RSHIFT(height, desc->log2_chroma_h);
+
+    if (f->residual.f)
+        ff_thread_release_buffer(avctx, &f->residual);
+    if ((ret = ff_thread_ref_frame(&f->residual, &f->picture)) < 0)
+        return ret;
+    if ((ret = av_frame_make_writable(f->residual.f)) < 0) {
+        ff_thread_release_buffer(avctx, &f->residual);
+        return ret;
+    }
+
+    for (i = 0; i < desc->nb_components; i++)
+        has_plane[desc->comp[i].plane] = 1;
+
+    for (i = 0; i < desc->nb_components && has_plane[i]; i++)
+        memset(residual->buf[i]->data, 0, residual->buf[i]->size * sizeof(*residual->buf[i]->data));
+
+    for (i = 0; i < desc->nb_components; i++) {
+        const int w1 = (i == 1 || i == 2) ? cw : width;
+        const int h1 = (i == 1 || i == 2) ? ch : height;
+
+        const int depth = desc->comp[i].depth;
+        const int max_val = 1 << depth;
+
+        memset(f->p_image_line_buf, 0, 2 * width * sizeof(*f->p_image_line_buf));
+        memset(f->c_image_line_buf, 0, 2 * width * sizeof(*f->c_image_line_buf));
+
+        for (y = 0; y < h1; y++) {
+            memset(f->p_image_line_buf, 0, width * sizeof(*f->p_image_line_buf));
+            memset(f->c_image_line_buf, 0, width * sizeof(*f->c_image_line_buf));
+            av_read_image_line(f->c_image_line_buf,
+                               (void *)curr->data,
+                               curr->linesize,
+                               desc,
+                               0, y, i, w1, 0);
+            av_read_image_line(f->p_image_line_buf,
+                              (void *)prev->data,
+                              prev->linesize,
+                              desc,
+                              0, y, i, w1, 0);
+            for (x = 0; x < w1; ++x) {
+                f->c_image_line_buf[x] = (f->c_image_line_buf[x] + f->p_image_line_buf[x] - (max_val >> 2)) & (max_val - 1);
+            }
+            av_write_image_line(f->c_image_line_buf,
+                                residual->data,
+                                residual->linesize,
+                                desc,
+                                0, y, i, w1);
+        }
+    }
+
+    av_frame_copy(curr, residual);
+
+    return 0;
+}
+
 static inline av_flatten int get_symbol_inline(RangeCoder *c, uint8_t *state,
                                                int is_signed)
 {
@@ -97,6 +166,83 @@ static inline int get_vlc_symbol(GetBitContext *gb, VlcState *const state,
     return ret;
 }
 
+static int decode_q_branch(FFV1Context *f, int level, int x, int y)
+{
+    RangeCoder *const c = &f->slice_context[0]->c;
+    OBMCContext *s = &f->obmc;
+    const int w= s->b_width << s->block_max_depth;
+    const int rem_depth= s->block_max_depth - level;
+    const int index= (x + y*w) << rem_depth;
+    int trx= (x+1)<<rem_depth;
+    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
+    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
+    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
+    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
+    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
+    int res;
+
+    if(s->keyframe){
+        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
+        return 0;
+    }
+
+    if(level==s->block_max_depth || get_rac(c, &f->block_state[4 + s_context])){
+        int type, mx, my;
+        int l = left->color[0];
+        int cb= left->color[1];
+        int cr= left->color[2];
+        unsigned ref = 0;
+        int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
+        int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
+        int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
+
+        type= get_rac(c, &f->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
+
+        if(type){
+            pred_mv(s, &mx, &my, 0, left, top, tr);
+            l += get_symbol(c, &f->block_state[32], 1);
+            if (f->obmc.nb_planes > 2) {
+                cb += get_symbol(c, &f->block_state[64], 1);
+                cr += get_symbol(c, &f->block_state[96], 1);
+            }
+        }else{
+            if(s->ref_frames > 1)
+                ref = get_symbol(c, &f->block_state[128 + 1024 + 32*ref_context], 0);
+            if (ref >= s->ref_frames) {
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid ref\n");
+                return AVERROR_INVALIDDATA;
+            }
+            pred_mv(s, &mx, &my, ref, left, top, tr);
+            mx += get_symbol(c, &f->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
+            my += get_symbol(c, &f->block_state[128 + 32*(my_context + 16*!!ref)], 1);
+        }
+        set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
+    }else{
+        if ((res = decode_q_branch(f, level+1, 2*x+0, 2*y+0)) < 0 ||
+            (res = decode_q_branch(f, level+1, 2*x+1, 2*y+0)) < 0 ||
+            (res = decode_q_branch(f, level+1, 2*x+0, 2*y+1)) < 0 ||
+            (res = decode_q_branch(f, level+1, 2*x+1, 2*y+1)) < 0)
+            return res;
+    }
+    return 0;
+}
+
+static int decode_blocks(FFV1Context *s){
+    int x, y;
+    int w= s->obmc.b_width;
+    int h= s->obmc.b_height;
+    int res;
+
+    for(y=0; y<h; y++){
+        for(x=0; x<w; x++){
+            if ((res = decode_q_branch(s, 0, x, y)) < 0)
+                return res;
+        }
+    }
+
+    return 0;
+}
+
 #define TYPE int16_t
 #define RENAME(name) name
 #include "ffv1dec_template.c"
@@ -419,6 +565,13 @@ static int read_extra_header(FFV1Context *f)
         if (f->micro_version < 0)
             return AVERROR_INVALIDDATA;
     }
+
+    if (f->version == 3 && f->micro_version > 4 || f->version == 4 && f->micro_version > 2) {
+        f->p_frame = 1;
+        f->micro_version--;
+    } else {
+        f->p_frame = 0;
+    }
     f->ac = get_symbol(c, state, 0);
 
     if (f->ac == AC_RANGE_CUSTOM_TAB) {
@@ -514,7 +667,7 @@ static int read_extra_header(FFV1Context *f)
 static int read_header(FFV1Context *f)
 {
     uint8_t state[CONTEXT_SIZE];
-    int i, j, context_count = -1; //-1 to avoid warning
+    int i, j, ret, context_count = -1; //-1 to avoid warning
     RangeCoder *const c = &f->slice_context[0]->c;
 
     memset(state, 128, sizeof(state));
@@ -669,6 +822,9 @@ static int read_header(FFV1Context *f)
         return AVERROR(ENOSYS);
     }
 
+    if ((ret = ff_obmc_decode_init(&f->obmc)) < 0)
+        return ret;
+
     ff_dlog(f->avctx, "%d %d %d\n",
             f->chroma_h_shift, f->chroma_v_shift, f->avctx->pix_fmt);
     if (f->version < 2) {
@@ -750,6 +906,49 @@ static int read_header(FFV1Context *f)
             }
         }
     }
+
+    return 0;
+}
+
+static int decode_p_header(FFV1Context *f)
+{
+    uint8_t state[CONTEXT_SIZE];
+    int plane_index;
+    RangeCoder *const c = &f->slice_context[0]->c;
+
+    memset(state, 128, sizeof(state));
+
+    if (f->key_frame) {
+        memset(f->block_state, MID_STATE, sizeof(f->block_state));
+        f->obmc.max_ref_frames = get_symbol(c, state, 0) + 1;
+    }
+    if (!f->key_frame) {
+        for(plane_index=0; plane_index<FFMIN(f->obmc.nb_planes, 2); plane_index++){
+            int htaps, i, sum=0;
+            PlaneObmc *p= &f->obmc.plane[plane_index];
+            p->diag_mc = get_rac(c, state);
+            htaps = get_symbol(c, state, 0)*2 + 2;
+            if((unsigned)htaps > HTAPS_MAX || htaps==0)
+                return AVERROR_INVALIDDATA;
+            p->htaps= htaps;
+            for(i= p->htaps/2; i; i--) {
+                p->hcoeff[i]= get_symbol(c, state, 0) * (1-2*(i&1));
+                sum += p->hcoeff[i];
+            }
+            p->hcoeff[0]= 32-sum;
+        }
+        f->obmc.plane[2].diag_mc= f->obmc.plane[1].diag_mc;
+        f->obmc.plane[2].htaps  = f->obmc.plane[1].htaps;
+        memcpy(f->obmc.plane[2].hcoeff, f->obmc.plane[1].hcoeff, sizeof(f->obmc.plane[1].hcoeff));
+    }
+
+    f->obmc.mv_scale       = get_symbol(c, state, 0);
+    f->obmc.block_max_depth= get_symbol(c, state, 0);
+    if(f->obmc.block_max_depth > 1 || f->obmc.block_max_depth < 0){
+        av_log(f->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large\n", f->obmc.block_max_depth);
+        f->obmc.block_max_depth= 0;
+        return AVERROR_INVALIDDATA;
+    }
     return 0;
 }
 
@@ -778,7 +977,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
     int buf_size        = avpkt->size;
     FFV1Context *f      = avctx->priv_data;
     RangeCoder *const c = &f->slice_context[0]->c;
-    int i, ret;
+    int i, ret, plane_index;
     uint8_t keystate = 128;
     uint8_t *buf_p;
     AVFrame *p;
@@ -801,8 +1000,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
     ff_build_rac_states(c, 0.05 * (1LL << 32), 256 - 8);
 
     p->pict_type = AV_PICTURE_TYPE_I; //FIXME I vs. P
+    f->obmc.current_picture->pict_type = AV_PICTURE_TYPE_I;
     if (get_rac(c, &keystate)) {
         p->key_frame    = 1;
+        f->obmc.keyframe = f->key_frame = 1;
         f->key_frame_ok = 0;
         if ((ret = read_header(f)) < 0)
             return ret;
@@ -814,6 +1015,17 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
             return AVERROR_INVALIDDATA;
         }
         p->key_frame = 0;
+        f->obmc.keyframe = f->key_frame = 0;
+    }
+
+    if (f->p_frame) {
+        if ((ret = decode_p_header(f)) < 0)
+            return ret;
+
+        p->pict_type = p->key_frame ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
+
+        if ((ret=ff_obmc_common_init_after_header(&f->obmc)) < 0)
+            return ret;
     }
 
     if ((ret = ff_thread_get_buffer(avctx, &f->picture, AV_GET_BUFFER_FLAG_REF)) < 0)
@@ -823,6 +1035,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
         av_log(avctx, AV_LOG_DEBUG, "ver:%d keyframe:%d coder:%d ec:%d slices:%d bps:%d\n",
                f->version, p->key_frame, f->ac, f->ec, f->slice_count, f->avctx->bits_per_raw_sample);
 
+    if (f->p_frame) {
+        if ((ret = ff_obmc_predecode_frame(&f->obmc)) < 0)
+            return ret;
+
+        if ((ret = decode_blocks(f)) < 0)
+            return ret;
+    }
+
     ff_thread_finish_setup(avctx);
 
     buf_p = buf + buf_size;
@@ -899,6 +1119,33 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
                           fs->slice_height);
         }
     }
+
+    if (f->p_frame) {
+        ff_thread_await_progress(&f->last_picture, INT_MAX, 0);
+
+        av_frame_copy(f->obmc.last_pictures[1], f->last_picture.f);
+
+        for (plane_index=0; plane_index < f->obmc.nb_planes; plane_index++) {
+            PlaneObmc *pc = &f->obmc.plane[plane_index];
+            int w = pc->width;
+            int h = pc->height;
+
+            if(!p->key_frame){
+                memset(f->obmc.spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
+                predict_plane(&f->obmc, f->obmc.spatial_idwt_buffer, plane_index, 1);
+            }
+        }
+
+        if (!p->key_frame) {
+            if ((ret = ff_predict_frame(avctx, f)) < 0) {
+                ff_thread_report_progress(&f->picture, INT_MAX, 0);
+                return ret;
+            }
+        }
+        av_frame_copy(f->obmc.current_picture, f->picture.f);
+    }
+    ff_obmc_release_buffer(&f->obmc);
+
     ff_thread_report_progress(&f->picture, INT_MAX, 0);
 
     f->picture_number++;
@@ -922,10 +1169,18 @@ static int init_thread_copy(AVCodecContext *avctx)
 
     f->picture.f      = NULL;
     f->last_picture.f = NULL;
+    f->residual.f     = NULL;
     f->sample_buffer  = NULL;
     f->max_slice_count = 0;
     f->slice_count = 0;
 
+    f->obmc.current_picture = NULL;
+    for (i = 0; i < MAX_REF_FRAMES; i++)
+        f->obmc.last_pictures[i] = NULL;
+
+    f->p_image_line_buf = NULL;
+    f->c_image_line_buf = NULL;
+
     for (i = 0; i < f->quant_table_count; i++) {
         av_assert0(f->version > 1);
         f->initial_states[i] = av_memdup(f->initial_states[i],
@@ -934,11 +1189,47 @@ static int init_thread_copy(AVCodecContext *avctx)
 
     f->picture.f      = av_frame_alloc();
     f->last_picture.f = av_frame_alloc();
+    f->residual.f     = av_frame_alloc();
+
+    if (!f->picture.f || !f->last_picture.f || !f->residual.f)
+        goto fail;
+
+    f->obmc.current_picture = av_frame_alloc();
+    f->obmc.mconly_picture = av_frame_alloc();
+
+    f->width  = avctx->width;
+    f->height = avctx->height;
+
+    FF_ALLOCZ_ARRAY_OR_GOTO(avctx, f->obmc.spatial_idwt_buffer, f->width, f->height * sizeof(IDWTELEM), fail);
+
+    for (i = 0; i < MAX_REF_FRAMES; i++)
+        f->obmc.last_pictures[i] = av_frame_alloc();
+
+    int w= AV_CEIL_RSHIFT(avctx->width,  LOG2_MB_SIZE);
+    int h= AV_CEIL_RSHIFT(avctx->height, LOG2_MB_SIZE);
+
+    f->obmc.b_width = w;
+    f->obmc.b_height= h;
+
+    f->obmc.block = av_mallocz_array(w * h,  sizeof(BlockNode) << 2); // FIXME Maybe large
+
+    f->obmc.avctx = avctx;
+
+    f->obmc.chroma_h_shift = f->chroma_h_shift;
+    f->obmc.chroma_v_shift = f->chroma_v_shift;
+
+    f->p_image_line_buf = av_mallocz_array(sizeof(*f->p_image_line_buf), 2 * f->width);
+    f->c_image_line_buf = av_mallocz_array(sizeof(*f->c_image_line_buf), 2 * f->width);
+
+    if (!f->p_image_line_buf || !f->c_image_line_buf)
+        goto fail;
 
     if ((ret = ff_ffv1_init_slice_contexts(f)) < 0)
         return ret;
 
     return 0;
+fail:
+    return AVERROR(ENOMEM);
 }
 #endif
 
@@ -955,6 +1246,7 @@ static void copy_fields(FFV1Context *fsdst, FFV1Context *fssrc, FFV1Context *fsr
     fsdst->colorspace          = fsrc->colorspace;
 
     fsdst->ec                  = fsrc->ec;
+    fsdst->p_frame             = fsrc->p_frame;
     fsdst->intra               = fsrc->intra;
     fsdst->slice_damaged       = fssrc->slice_damaged;
     fsdst->key_frame_ok        = fsrc->key_frame_ok;
@@ -975,23 +1267,48 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
 {
     FFV1Context *fsrc = src->priv_data;
     FFV1Context *fdst = dst->priv_data;
-    int i, ret;
+    int i, j, ret;
 
     if (dst == src)
         return 0;
 
     {
-        ThreadFrame picture = fdst->picture, last_picture = fdst->last_picture;
+        ThreadFrame picture = fdst->picture, last_picture = fdst->last_picture, residual = fdst->residual;
+        uint16_t *c_image_line_buf = fdst->c_image_line_buf, *p_image_line_buf = fdst->p_image_line_buf;
         uint8_t (*initial_states[MAX_QUANT_TABLES])[32];
         struct FFV1Context *slice_context[MAX_SLICES];
         memcpy(initial_states, fdst->initial_states, sizeof(fdst->initial_states));
         memcpy(slice_context,  fdst->slice_context , sizeof(fdst->slice_context));
+        AVFrame *current_picture = fdst->obmc.current_picture, *mconly_picture = fdst->obmc.mconly_picture;
+        AVFrame *last_pictures[MAX_REF_FRAMES];
+        BlockNode *block = fdst->obmc.block;
+        uint8_t *scratchbuf = fdst->obmc.scratchbuf;
+        uint8_t *emu_edge_buffer = fdst->obmc.emu_edge_buffer;
+        IDWTELEM *spatial_idwt_buffer = fdst->obmc.spatial_idwt_buffer;
+        for (i = 0; i < MAX_REF_FRAMES; i++)
+            last_pictures[i] = fdst->obmc.last_pictures[i];
 
         memcpy(fdst, fsrc, sizeof(*fdst));
         memcpy(fdst->initial_states, initial_states, sizeof(fdst->initial_states));
         memcpy(fdst->slice_context,  slice_context , sizeof(fdst->slice_context));
         fdst->picture      = picture;
         fdst->last_picture = last_picture;
+        fdst->residual     = residual;
+
+        fdst->p_image_line_buf = p_image_line_buf;
+        fdst->c_image_line_buf = c_image_line_buf;
+
+        fdst->obmc.current_picture   = current_picture;
+        fdst->obmc.mconly_picture    = mconly_picture;
+        for (i = 0; i < MAX_REF_FRAMES; i++)
+            fdst->obmc.last_pictures[i] = last_pictures[i];
+        fdst->obmc.block = block;
+        fdst->obmc.scratchbuf = scratchbuf;
+        fdst->obmc.emu_edge_buffer = emu_edge_buffer;
+        fdst->obmc.spatial_idwt_buffer = spatial_idwt_buffer;
+
+        fdst->obmc.avctx = dst;
+
         for (i = 0; i<fdst->num_h_slices * fdst->num_v_slices; i++) {
             FFV1Context *fssrc = fsrc->slice_context[i];
             FFV1Context *fsdst = fdst->slice_context[i];
@@ -1010,6 +1327,35 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
             return ret;
     }
 
+    for (i = 0; i < MAX_REF_FRAMES; i++)
+        av_frame_ref(fdst->obmc.last_pictures[i], fsrc->obmc.last_pictures[i]);
+
+    av_frame_ref(fdst->obmc.current_picture, fsrc->obmc.current_picture);
+
+    for (i = 0; i < MAX_REF_FRAMES; i++) {
+        for (j=0; j<9; j++) {
+            int is_chroma = !!(j%3);
+            int h = is_chroma ? AV_CEIL_RSHIFT(fsrc->avctx->height, fsrc->chroma_v_shift) : fsrc->avctx->height;
+            int ls = fdst->obmc.last_pictures[i]->linesize[j%3];
+            if (fsrc->obmc.halfpel_plane[i][1+j/3][j%3]) {
+                fdst->obmc.halfpel_plane[i][1+j/3][j%3] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
+                memcpy(
+                    fdst->obmc.halfpel_plane[i][1+j/3][j%3],
+                    fsrc->obmc.halfpel_plane[i][1+j/3][j%3] - EDGE_WIDTH*(1+fsrc->obmc.last_pictures[i]->linesize[j%3]),
+                    ls * (h + 2 * EDGE_WIDTH) * sizeof(*fdst->obmc.halfpel_plane[i][1+j/3][j%3])
+                );
+                fdst->obmc.halfpel_plane[i][1+j/3][j%3] += EDGE_WIDTH * (1 + ls);
+            }
+            fdst->obmc.halfpel_plane[i][0][j%3] = fdst->obmc.last_pictures[i]->data[j%3];
+        }
+    }
+
+    memcpy(
+        fdst->obmc.block,
+        fsrc->obmc.block,
+        (fsrc->obmc.b_width * fsrc->obmc.b_height * (sizeof(BlockNode) << (fsrc->obmc.block_max_depth*2)))
+    );
+
     fdst->fsrc = fsrc;
 
     return 0;
diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index dae68ae..4596585 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -41,6 +41,8 @@
 #include "mathops.h"
 #include "ffv1.h"
 
+#include "obme.h"
+
 static const int8_t quant5_10bit[256] = {
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,
      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
@@ -136,6 +138,75 @@ static const uint8_t ver2_state[256] = {
     241, 243, 242, 244, 245, 246, 247, 248, 249, 250, 251, 252, 252, 253, 254, 255,
 };
 
+static int ff_frame_diff(FFV1Context *f, const AVFrame *pict)
+{
+    int ret, i, x, y;
+    AVFrame *prev     = f->obmc.current_picture;
+    AVFrame *residual = f->residual.f;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(prev->format);
+    int width  = f->width;
+    int height = f->height;
+    int has_plane[4] = { 0 };
+    const int cw = AV_CEIL_RSHIFT(width, desc->log2_chroma_w);
+    const int ch = AV_CEIL_RSHIFT(height, desc->log2_chroma_h);
+
+    if (f->picture.f)
+        av_frame_unref(f->picture.f);
+    if (f->residual.f)
+        av_frame_unref(f->residual.f);
+    if ((ret = av_frame_ref(f->residual.f, pict)) < 0)
+        return ret;
+    if ((ret = av_frame_make_writable(f->residual.f)) < 0) {
+        av_frame_unref(f->residual.f);
+        return ret;
+    }
+
+    for (i = 0; i < desc->nb_components; i++)
+        has_plane[desc->comp[i].plane] = 1;
+
+    for (i = 0; i < desc->nb_components && has_plane[i]; i++)
+        memset(residual->buf[i]->data, 0, residual->buf[i]->size * sizeof(*residual->buf[i]->data));
+
+    for (i = 0; i < desc->nb_components; i++) {
+        const int w1 = (i == 1 || i == 2) ? cw : width;
+        const int h1 = (i == 1 || i == 2) ? ch : height;
+
+        const int depth = desc->comp[i].depth;
+        const int max_val = 1 << depth;
+
+        memset(f->p_image_line_buf, 0, 2 * width * sizeof(*f->p_image_line_buf));
+        memset(f->c_image_line_buf, 0, 2 * width * sizeof(*f->c_image_line_buf));
+
+        for (y = 0; y < h1; y++) {
+            memset(f->p_image_line_buf, 0, width * sizeof(*f->p_image_line_buf));
+            memset(f->c_image_line_buf, 0, width * sizeof(*f->c_image_line_buf));
+            av_read_image_line(f->c_image_line_buf,
+                               (void *)pict->data,
+                               pict->linesize,
+                               desc,
+                               0, y, i, w1, 0);
+            av_read_image_line(f->p_image_line_buf,
+                              (void *)prev->data,
+                              prev->linesize,
+                              desc,
+                              0, y, i, w1, 0);
+            for (x = 0; x < w1; ++x) {
+                f->c_image_line_buf[x] = (f->c_image_line_buf[x] - f->p_image_line_buf[x] + (max_val >> 2)) & (max_val - 1);
+            }
+            av_write_image_line(f->c_image_line_buf,
+                                residual->data,
+                                residual->linesize,
+                                desc,
+                                0, y, i, w1);
+        }
+    }
+
+    if ((ret = av_frame_ref(f->picture.f, f->residual.f)) < 0)
+        return ret;
+
+    return 0;
+}
+
 static void find_best_state(uint8_t best_state[256][256],
                             const uint8_t one_state[256])
 {
@@ -268,6 +339,162 @@ static inline void put_vlc_symbol(PutBitContext *pb, VlcState *const state,
     update_vlc_state(state, v);
 }
 
+typedef struct RangeEncoderContext {
+    RangeCoder c;
+    uint8_t buffer[1024];
+    uint8_t state[128 + 32*128];
+    uint8_t *pbbak;
+    uint8_t *pbbak_start;
+    int base_bits;
+} RangeEncoderContext;
+
+static void put_encoder_rac(ObmcCoderContext *c, int ctx, int v)
+{
+    FFV1Context *f = (FFV1Context *)c->avctx->priv_data;
+    RangeCoder *rc = &f->slice_context[0]->c;
+    uint8_t *state = f->block_state;
+    if (c->priv_data) {
+        RangeEncoderContext *coder = (RangeEncoderContext *)c->priv_data;
+        rc = &coder->c;
+        state = coder->state;
+    }
+    put_rac(rc, &state[ctx], v);
+}
+
+static void put_encoder_symbol(ObmcCoderContext *c, int ctx, int v, int sign)
+{
+    FFV1Context *f = (FFV1Context *)c->avctx->priv_data;
+    RangeCoder *rc = &f->slice_context[0]->c;
+    uint8_t *state = f->block_state;
+    if (c->priv_data) {
+        RangeEncoderContext *coder = (RangeEncoderContext *)c->priv_data;
+        rc = &coder->c;
+        state = coder->state;
+    }
+    put_symbol(rc, &state[ctx], v, sign);
+}
+
+static void ff_ffv1_init_encode_callbacks(ObmcCoderContext *, AVCodecContext *);
+
+static void init_frame_encoder(AVCodecContext *avctx, ObmcCoderContext *c)
+{
+    FFV1Context *f = (FFV1Context *)avctx->priv_data;
+    RangeCoder *const rc = &f->slice_context[0]->c;
+    RangeEncoderContext *coder = av_mallocz(sizeof(RangeEncoderContext));
+    c->priv_data = coder;
+
+    coder->pbbak = rc->bytestream;
+    coder->pbbak_start = rc->bytestream_start;
+    coder->base_bits = get_rac_count(rc) - 8*(rc->bytestream - rc->bytestream_start);
+    coder->c = *rc;
+    coder->c.bytestream_start = coder->c.bytestream = coder->buffer; //FIXME end/start? and at the other stoo
+    memcpy(coder->state, f->block_state, sizeof(f->block_state));
+
+    ff_ffv1_init_encode_callbacks(c, avctx);
+}
+
+static void free_coder(ObmcCoderContext *c)
+{
+    av_freep(&c->priv_data);
+}
+
+static void copy_coder(ObmcCoderContext *c)
+{
+    FFV1Context *f = (FFV1Context *)c->avctx->priv_data;
+    RangeCoder *const rc = &f->slice_context[0]->c;
+    RangeEncoderContext *coder = (RangeEncoderContext *)c->priv_data;
+
+    int len = coder->c.bytestream - coder->c.bytestream_start;
+
+    memcpy(coder->pbbak, coder->buffer, len);
+    *rc = coder->c;
+    rc->bytestream_start= coder->pbbak_start;
+    rc->bytestream= coder->pbbak + len;
+    memcpy(f->block_state, coder->state, sizeof(f->block_state));
+}
+
+static void reset_coder(ObmcCoderContext *c)
+{
+    FFV1Context *f = (FFV1Context *)c->avctx->priv_data;
+    RangeCoder *const rc = &f->slice_context[0]->c;
+    RangeEncoderContext *coder = (RangeEncoderContext *)c->priv_data;
+
+    *rc = coder->c;
+    rc->bytestream_start= coder->pbbak_start;
+    rc->bytestream= coder->pbbak;
+    memcpy(f->block_state, coder->state, sizeof(f->block_state));
+}
+
+static void put_level_break(ObmcCoderContext *c, int ctx, int v)
+{
+    put_encoder_rac(c, ctx, v);
+}
+
+static void put_block_type  (struct ObmcCoderContext *c, int ctx, int type)
+{
+    put_encoder_rac(c, ctx, type);
+}
+
+static void put_best_ref    (struct ObmcCoderContext *c, int ctx, int best_ref)
+{
+    put_encoder_symbol(c, ctx, best_ref, 0);
+}
+
+static void put_block_mv    (struct ObmcCoderContext *c, int ctx_mx, int ctx_my, int mx, int my)
+{
+    put_encoder_symbol(c, ctx_mx, mx, 1);
+    put_encoder_symbol(c, ctx_my, my, 1);
+}
+
+static void put_block_color (struct ObmcCoderContext *c, int ctx_l, int ctx_cb, int ctx_cr, int l, int cb, int cr)
+{
+    FFV1Context *f = (FFV1Context *)c->avctx->priv_data;
+    put_encoder_symbol(c, ctx_l, l, 1);
+    if (f->obmc.nb_planes > 2) {
+        put_encoder_symbol(c, ctx_cb, cb, 1);
+        put_encoder_symbol(c, ctx_cr, cr, 1);
+    }
+}
+
+static int get_coder_bits(ObmcCoderContext *c)
+{
+    RangeEncoderContext *coder = (RangeEncoderContext *)c->priv_data;
+    return get_rac_count(&coder->c) - coder->base_bits;
+}
+
+static int get_coder_available_bytes(ObmcCoderContext *c)
+{
+    FFV1Context *f = (FFV1Context *)c->avctx->priv_data;
+    RangeCoder *rc = &f->slice_context[0]->c;
+    if (c->priv_data) {
+        RangeEncoderContext *coder = (RangeEncoderContext *)c->priv_data;
+        rc = &coder->c;
+    }
+    return rc->bytestream_end - rc->bytestream;
+}
+
+static void ff_ffv1_init_encode_callbacks(ObmcCoderContext *c, AVCodecContext *avctx)
+{
+    FFV1Context *f = (FFV1Context *)c->avctx->priv_data;
+    av_assert0(sizeof(f->block_state) >= 256);
+
+    c->avctx            = avctx;
+    c->put_level_break  = put_level_break;
+    c->put_block_type   = put_block_type;
+    c->put_block_color  = put_block_color;
+    c->put_best_ref     = put_best_ref;
+    c->put_block_mv     = put_block_mv;
+
+    c->init_frame_coder = init_frame_encoder;
+    c->reset_coder      = reset_coder;
+    c->copy_coder       = copy_coder;
+    c->free             = free_coder;
+
+    c->get_bits         = get_coder_bits;
+    c->available_bytes  = get_coder_available_bytes;
+
+}
+
 #define TYPE int16_t
 #define RENAME(name) name
 #include "ffv1enc_template.c"
@@ -388,6 +615,32 @@ static void write_header(FFV1Context *f)
     }
 }
 
+static void write_p_header(FFV1Context *f)
+{
+    uint8_t state[CONTEXT_SIZE];
+    int i, plane_index;
+    RangeCoder *const c = &f->slice_context[0]->c;
+
+    memset(state, 128, sizeof(state));
+
+    if (f->key_frame) {
+        memset(f->block_state, MID_STATE, sizeof(f->block_state));
+        put_symbol(c, state, f->obmc.max_ref_frames-1, 0);
+    }
+    if (!f->key_frame) { //FIXME update_mc
+        for (plane_index=0; plane_index<FFMIN(f->obmc.nb_planes, 2); plane_index++) {
+            PlaneObmc *p= &f->obmc.plane[plane_index];
+            put_rac(c, state, p->diag_mc);
+            put_symbol(c, state, p->htaps/2-1, 0);
+            for (i = p->htaps/2; i; i--)
+                put_symbol(c, state, FFABS(p->hcoeff[i]), 0);
+        }
+    }
+
+    put_symbol(c, state, f->obmc.mv_scale, 0);
+    put_symbol(c, state, f->obmc.block_max_depth, 0);
+}
+
 static int write_extradata(FFV1Context *f)
 {
     RangeCoder *const c = &f->c;
@@ -410,9 +663,9 @@ static int write_extradata(FFV1Context *f)
     put_symbol(c, state, f->version, 0);
     if (f->version > 2) {
         if (f->version == 3) {
-            f->micro_version = 4;
+            f->micro_version = 4 + f->p_frame;
         } else if (f->version == 4)
-            f->micro_version = 2;
+            f->micro_version = 2 + f->p_frame;
         put_symbol(c, state, f->micro_version, 0);
     }
 
@@ -599,6 +852,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
             return AVERROR_INVALIDDATA;
         }
         s->version = FFMAX(s->version, 1);
+        s->p_frame = 0;
     case AV_PIX_FMT_GRAY8:
     case AV_PIX_FMT_YA8:
     case AV_PIX_FMT_YUV444P:
@@ -619,6 +873,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
             s->bits_per_raw_sample = 8;
         break;
     case AV_PIX_FMT_RGB32:
+        s->p_frame = 0;
         s->colorspace = 1;
         s->transparency = 1;
         s->chroma_planes = 1;
@@ -635,6 +890,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
         }
         break;
     case AV_PIX_FMT_0RGB32:
+        s->p_frame = 0;
         s->colorspace = 1;
         s->chroma_planes = 1;
         s->bits_per_raw_sample = 8;
@@ -666,12 +922,14 @@ FF_ENABLE_DEPRECATION_WARNINGS
             }
         }
         s->version = FFMAX(s->version, 1);
+        s->p_frame = 0;
         break;
     default:
         av_log(avctx, AV_LOG_ERROR, "format not supported\n");
         return AVERROR(ENOSYS);
     }
     av_assert0(s->bits_per_raw_sample >= 8);
+    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
 
     if (s->bits_per_raw_sample > 8) {
         if (s->ac == AC_GOLOMB_RICE) {
@@ -704,6 +962,10 @@ FF_ENABLE_DEPRECATION_WARNINGS
             s->state_transition[i] = c.one_state[i];
     }
 
+    if (avctx->width % 16 || avctx->height % 16) {
+        s->p_frame = 0;
+    }
+
     for (i = 0; i < 256; i++) {
         s->quant_table_count = 2;
         if (s->bits_per_raw_sample <= 8) {
@@ -886,6 +1148,9 @@ slices_ok:
             }
     }
 
+    ff_obmc_encode_init(&s->obmc, avctx);
+    ff_ffv1_init_encode_callbacks(&s->obmc.obmc_coder, avctx);
+
     return 0;
 }
 
@@ -1086,12 +1351,20 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                         const AVFrame *pict, int *got_packet)
 {
     FFV1Context *f      = avctx->priv_data;
+    if (f->p_frame) {
+        if (f->last_picture.f)
+            av_frame_unref(f->last_picture.f);
+        FFSWAP(ThreadFrame, f->picture, f->last_picture);
+    }
     RangeCoder *const c = &f->slice_context[0]->c;
     AVFrame *const p    = f->picture.f;
     int used_count      = 0;
     uint8_t keystate    = 128;
     uint8_t *buf_p;
-    int i, ret;
+    AVFrame *pic = NULL;
+    const int width  = f->avctx->width;
+    const int height = f->avctx->height;
+    int plane_index, i, ret;
     int64_t maxsize =   AV_INPUT_BUFFER_MIN_SIZE
                       + avctx->width*avctx->height*37LL*4;
 
@@ -1144,12 +1417,35 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     if (f->version > 3)
         maxsize = AV_INPUT_BUFFER_MIN_SIZE + avctx->width*avctx->height*3LL*4;
 
+    if (f->p_frame) {
+        maxsize += f->obmc.b_width*f->obmc.b_height*MB_SIZE*MB_SIZE*3;
+    }
+
     if ((ret = ff_alloc_packet2(avctx, pkt, maxsize, 0)) < 0)
         return ret;
 
     ff_init_range_encoder(c, pkt->data, pkt->size);
     ff_build_rac_states(c, 0.05 * (1LL << 32), 256 - 8);
 
+    if (f->p_frame) {
+        av_frame_copy(f->obmc.input_picture, pict);
+        for (i=0; i < f->obmc.nb_planes; i++)
+        {
+            int hshift = i ? f->chroma_h_shift : 0;
+            int vshift = i ? f->chroma_v_shift : 0;
+            f->obmc.mpvencdsp.draw_edges(f->obmc.input_picture->data[i], f->obmc.input_picture->linesize[i],
+                                    AV_CEIL_RSHIFT(width, hshift), AV_CEIL_RSHIFT(height, vshift),
+                                    EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
+                                    EDGE_TOP | EDGE_BOTTOM);
+        }
+        emms_c();
+        pic = f->obmc.input_picture;
+        pic->pict_type = pict->pict_type;
+        pic->quality = pict->quality;
+
+        f->obmc.m.picture_number= avctx->frame_number;
+    }
+
     av_frame_unref(p);
     if ((ret = av_frame_ref(p, pict)) < 0)
         return ret;
@@ -1162,11 +1458,64 @@ FF_ENABLE_DEPRECATION_WARNINGS
     if (avctx->gop_size == 0 || f->picture_number % avctx->gop_size == 0) {
         put_rac(c, &keystate, 1);
         f->key_frame = 1;
+        f->obmc.keyframe = 1;
         f->gob_count++;
         write_header(f);
     } else {
         put_rac(c, &keystate, 0);
         f->key_frame = 0;
+        f->obmc.keyframe = 0;
+    }
+
+    if (f->p_frame) {
+        write_p_header(f);
+
+        f->obmc.m.pict_type = pic->pict_type = f->key_frame ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
+
+        ff_obmc_pre_encode_frame(&f->obmc, avctx, pict);
+
+        ff_obmc_common_init_after_header(&f->obmc);
+
+        f->obmc.m.misc_bits = 8*(c->bytestream - c->bytestream_start);
+        ff_obmc_encode_blocks(&f->obmc, 1);
+        f->obmc.m.mv_bits = 8*(c->bytestream - c->bytestream_start) - f->obmc.m.misc_bits;
+
+        for(plane_index=0; plane_index < f->obmc.nb_planes; plane_index++){
+            PlaneObmc *p= &f->obmc.plane[plane_index];
+            int w= p->width;
+            int h= p->height;
+
+            if(pic->pict_type == AV_PICTURE_TYPE_I) {
+                av_frame_copy(f->obmc.current_picture, pict);
+                break;
+            } else {
+                memset(f->obmc.spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
+                predict_plane(&f->obmc, f->obmc.spatial_idwt_buffer, plane_index, 1);
+            }
+        }
+
+        if (!f->key_frame) {
+            if ((ret = ff_frame_diff(f, pict)) < 0) {
+                return ret;
+            }
+            av_frame_copy(f->obmc.current_picture, pict);
+        }
+
+        ff_obmc_release_buffer(&f->obmc);
+
+        f->obmc.current_picture->coded_picture_number = avctx->frame_number;
+        f->obmc.current_picture->pict_type = pic->pict_type;
+        f->obmc.current_picture->quality = pic->quality;
+        f->obmc.m.frame_bits = 8*(c->bytestream - c->bytestream_start);
+        f->obmc.m.p_tex_bits = f->obmc.m.frame_bits - f->obmc.m.misc_bits - f->obmc.m.mv_bits;
+        f->obmc.m.current_picture.f->display_picture_number =
+        f->obmc.m.current_picture.f->coded_picture_number   = avctx->frame_number;
+        f->obmc.m.current_picture.f->quality                = pic->quality;
+        f->obmc.m.total_bits += 8*(c->bytestream - c->bytestream_start);
+
+        f->obmc.m.last_pict_type = f->obmc.m.pict_type;
+
+        emms_c();
     }
 
     if (f->ac == AC_RANGE_CUSTOM_TAB) {
@@ -1232,19 +1581,34 @@ FF_ENABLE_DEPRECATION_WARNINGS
     pkt->flags |= AV_PKT_FLAG_KEY * f->key_frame;
     *got_packet = 1;
 
+    if (f->p_frame) {
+        if (f->picture.f)
+            av_frame_unref(f->picture.f);
+        if ((ret = av_frame_ref(f->picture.f, pict)) < 0)
+            return ret;
+        if (f->last_picture.f)
+            av_frame_unref(f->last_picture.f);
+    }
+
     return 0;
 }
 
 static av_cold int encode_close(AVCodecContext *avctx)
 {
+    FFV1Context *f = avctx->priv_data;
+
     ff_ffv1_close(avctx);
+    av_frame_free(&f->obmc.input_picture);
     return 0;
 }
 
 #define OFFSET(x) offsetof(FFV1Context, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
+    FF_MPV_COMMON_OPTS
+    { "iter",           NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_ITER }, 0, 0, FF_MPV_OPT_FLAGS, "motion_est" },
     { "slicecrc", "Protect slices with CRCs", OFFSET(ec), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE },
+    { "pframe", "Use P frames", OFFSET(p_frame), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
     { "coder", "Coder type", OFFSET(ac), AV_OPT_TYPE_INT,
             { .i64 = 0 }, -2, 2, VE, "coder" },
         { "rice", "Golomb rice", 0, AV_OPT_TYPE_CONST,
@@ -1271,6 +1635,8 @@ static const AVClass ffv1_class = {
 #if FF_API_CODER_TYPE
 static const AVCodecDefault ffv1_defaults[] = {
     { "coder", "-1" },
+    { "me_method", "iter" },
+    { "flags", "+qpel+mv4" },
     { NULL },
 };
 #endif
diff --git a/libavcodec/x86/me_cmp_init.c b/libavcodec/x86/me_cmp_init.c
index dc3e6f8..5f1a223 100644
--- a/libavcodec/x86/me_cmp_init.c
+++ b/libavcodec/x86/me_cmp_init.c
@@ -627,7 +627,9 @@ av_cold void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)
         c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2;
         c->hadamard8_diff[1] = ff_hadamard8_diff_sse2;
 #endif
-        if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW) && avctx->codec_id != AV_CODEC_ID_SNOW) {
+        if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW) &&
+            avctx->codec_id != AV_CODEC_ID_SNOW &&
+            avctx->codec_id != AV_CODEC_ID_FFV1) {
             c->sad[0]        = ff_sad16_sse2;
             c->pix_abs[0][0] = ff_sad16_sse2;
             c->pix_abs[0][1] = ff_sad16_x2_sse2;
-- 
2.7.4 (Apple Git-66)