On Fri, Dec 19, 2014 at 02:41:05PM +0000, john.c.harri...@intel.com wrote:
> From: John Harrison <john.c.harri...@intel.com>
> 
> The outstanding lazy request mechanism does not really work well with
> a GPU scheduler. The scheduler expects each work packet, i.e. request
> structure, to be a complete entity and to belong to one and only one
> submitter. Whereas the whole lazy mechanism allows lots of work from
> lots of different places to all be lumped together into a single
> request. It also means that work is floating around in the system
> unowned and untracked at various random points in time. This all
> causes headaches for the scheduler.
> 
> This patch removes the need for the outstanding lazy request. It
> converts all functions which would otherwise be relying on the OLR to
> explicitly manage the request. Either by allocating, passing and
> submitting the request if they are the top level owner. Or by simply
> taking a request in as a parameter rather than pulling it out of the
> magic global variable if they are a client. The OLR itself is left in
> along with a bunch of sanity check asserts that it matches the request
> being passed in as a parameter. However, it should now be safe to
> remove completely.
> 
> Note that this patch is not intended as a final, shipping, isn't it
> gorgeous, end product. It is merely a quick hack that I went through
> as being the simplest way to actually work out what the real sequence
> of events and the real ownership of work is in certain circumstances.
> Most particularly to do with display and overlay work. However, I
> would like to get agreement that it is a good direction to go in and
> that removing the OLR would be a good thing. Or, to put it another
> way, is it worth me trying to break this patch into a set of
> manageable items or do I just abandon it and give up?
> 
> Note also that the patch is based on a tree including the scheduler
> prep-work patches posted earlier. So it will not apply to a clean
> nightly tree.
> 
> Signed-off-by: John Harrison <john.c.harri...@intel.com>

Summarizing offline discussions from a meeting about John's rfc here:

I definitely like where this is going, using requests as the primary
object to submit work to the gpu should simplify our code a lot. And
getting rid of the olr will remove a lot of the accidental complexity in
gem. I also looked at some of the details here with John specifically that
he chuffles the init_hw functions around a bit to just have 1 request to
wrap all the ring init (default ctx, ppgtt, l3 remapping).

For the details it'd be good to discuss this all with Chris since he's got
a working poc for this, just to make sure you know about all the dragons
potentially lurking around.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_drv.h              |   29 ++--
>  drivers/gpu/drm/i915/i915_gem.c              |  182 ++++++++++++--------
>  drivers/gpu/drm/i915/i915_gem_context.c      |   69 +++-----
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c   |   62 +++----
>  drivers/gpu/drm/i915/i915_gem_gtt.c          |   64 ++++----
>  drivers/gpu/drm/i915/i915_gem_gtt.h          |    3 +-
>  drivers/gpu/drm/i915/i915_gem_render_state.c |   10 +-
>  drivers/gpu/drm/i915/i915_gem_render_state.h |    2 +-
>  drivers/gpu/drm/i915/intel_display.c         |   68 ++++----
>  drivers/gpu/drm/i915/intel_lrc.c             |  145 +++++++++-------
>  drivers/gpu/drm/i915/intel_lrc.h             |    8 +-
>  drivers/gpu/drm/i915/intel_overlay.c         |   58 ++++---
>  drivers/gpu/drm/i915/intel_pm.c              |   33 ++--
>  drivers/gpu/drm/i915/intel_ringbuffer.c      |  228 
> ++++++++++++++------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h      |   38 ++---
>  15 files changed, 553 insertions(+), 446 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 511f55f..7b4309e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -513,7 +513,7 @@ struct drm_i915_display_funcs {
>       int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc,
>                         struct drm_framebuffer *fb,
>                         struct drm_i915_gem_object *obj,
> -                       struct intel_engine_cs *ring,
> +                       struct drm_i915_gem_request *req,
>                         uint32_t flags);
>       void (*update_primary_plane)(struct drm_crtc *crtc,
>                                    struct drm_framebuffer *fb,
> @@ -1796,7 +1796,8 @@ struct drm_i915_private {
>       /* Abstract the submission mechanism (legacy ringbuffer or execlists) 
> away */
>       struct {
>               int (*alloc_request)(struct intel_engine_cs *ring,
> -                                  struct intel_context *ctx);
> +                                  struct intel_context *ctx,
> +                                  struct drm_i915_gem_request **req_out);
>               int (*do_execbuf)(struct i915_execbuffer_params *params,
>                                 struct drm_i915_gem_execbuffer2 *args,
>                                 struct list_head *vmas);
> @@ -2511,10 +2512,10 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, 
> void *data,
>  int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
>                            struct drm_file *file_priv);
>  void i915_gem_execbuffer_move_to_active(struct list_head *vmas,
> -                                     struct intel_engine_cs *ring);
> +                                     struct drm_i915_gem_request *req);
>  void i915_gem_execbuffer_retire_commands(struct drm_device *dev,
>                                        struct drm_file *file,
> -                                      struct intel_engine_cs *ring,
> +                                      struct drm_i915_gem_request *req,
>                                        struct drm_i915_gem_object *obj);
>  void i915_gem_execbuff_release_batch_obj(struct drm_i915_gem_object 
> *batch_obj);
>  int i915_gem_ringbuffer_submission(struct i915_execbuffer_params *qe,
> @@ -2609,9 +2610,9 @@ int __must_check __i915_mutex_lock_interruptible(struct 
> drm_device *dev, const c
>  int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
>  #endif
>  int i915_gem_object_sync(struct drm_i915_gem_object *obj,
> -                      struct intel_engine_cs *to, bool add_request);
> +                      struct drm_i915_gem_request *to_req);
>  void i915_vma_move_to_active(struct i915_vma *vma,
> -                          struct intel_engine_cs *ring);
> +                          struct drm_i915_gem_request *req);
>  int i915_gem_dumb_create(struct drm_file *file_priv,
>                        struct drm_device *dev,
>                        struct drm_mode_create_dumb *args);
> @@ -2678,19 +2679,19 @@ int __must_check i915_gem_object_finish_gpu(struct 
> drm_i915_gem_object *obj);
>  int __must_check i915_gem_init(struct drm_device *dev);
>  int i915_gem_init_rings(struct drm_device *dev);
>  int __must_check i915_gem_init_hw(struct drm_device *dev);
> -int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice);
> +int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice);
>  void i915_gem_init_swizzling(struct drm_device *dev);
>  void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
>  int __must_check i915_gpu_idle(struct drm_device *dev);
>  int __must_check i915_gem_suspend(struct drm_device *dev);
> -int __i915_add_request(struct intel_engine_cs *ring,
> +int __i915_add_request(struct drm_i915_gem_request *req,
>                      struct drm_file *file,
>                      struct drm_i915_gem_object *batch_obj,
>                      bool flush_caches);
> -#define i915_add_request(ring) \
> -     __i915_add_request(ring, NULL, NULL, true)
> -#define i915_add_request_no_flush(ring) \
> -     __i915_add_request(ring, NULL, NULL, false)
> +#define i915_add_request(req) \
> +     __i915_add_request(req, NULL, NULL, true)
> +#define i915_add_request_no_flush(req) \
> +     __i915_add_request(req, NULL, NULL, false)
>  int __i915_wait_request(struct drm_i915_gem_request *req,
>                       unsigned reset_counter,
>                       bool interruptible,
> @@ -2810,9 +2811,9 @@ int __must_check i915_gem_context_init(struct 
> drm_device *dev);
>  void i915_gem_context_fini(struct drm_device *dev);
>  void i915_gem_context_reset(struct drm_device *dev);
>  int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
> -int i915_gem_context_enable(struct drm_i915_private *dev_priv);
> +int i915_gem_context_enable(struct drm_i915_gem_request *req);
>  void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
> -int i915_switch_context(struct intel_engine_cs *ring,
> +int i915_switch_context(struct drm_i915_gem_request *req,
>                       struct intel_context *to);
>  struct intel_context *
>  i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 1d2cbfb..dbfb4e5 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1178,7 +1178,7 @@ i915_gem_check_olr(struct drm_i915_gem_request *req)
>  
>       ret = 0;
>       if (req == req->ring->outstanding_lazy_request)
> -             ret = i915_add_request(req->ring);
> +             ret = i915_add_request(req);
>  
>       return ret;
>  }
> @@ -2294,17 +2294,16 @@ i915_gem_object_get_pages(struct drm_i915_gem_object 
> *obj)
>  
>  static void
>  i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
> -                            struct intel_engine_cs *ring)
> +                            struct drm_i915_gem_request *req)
>  {
> -     struct drm_i915_gem_request *req;
> -     struct intel_engine_cs *old_ring;
> +     struct intel_engine_cs *new_ring, *old_ring;
>  
> -     BUG_ON(ring == NULL);
> +     BUG_ON(req == NULL);
>  
> -     req = intel_ring_get_request(ring);
> +     new_ring = i915_gem_request_get_ring(req);
>       old_ring = i915_gem_request_get_ring(obj->last_read_req);
>  
> -     if (old_ring != ring && obj->last_write_req) {
> +     if (old_ring != new_ring && obj->last_write_req) {
>               /* Keep the request relative to the current ring */
>               i915_gem_request_assign(&obj->last_write_req, req);
>       }
> @@ -2315,17 +2314,17 @@ i915_gem_object_move_to_active(struct 
> drm_i915_gem_object *obj,
>               obj->active = 1;
>       }
>  
> -     list_move_tail(&obj->ring_list, &ring->active_list);
> +     list_move_tail(&obj->ring_list, &new_ring->active_list);
>  
> -     //printk(KERN_INFO "%s:%d> <%s> obj = %p, last_read_req <= 0x%p\n", 
> __func__, __LINE__, ring->name, obj, req);
> +     //printk(KERN_INFO "%s:%d> <%s> obj = %p, last_read_req <= 0x%p\n", 
> __func__, __LINE__, new_ring->name, obj, req);
>       i915_gem_request_assign(&obj->last_read_req, req);
>  }
>  
>  void i915_vma_move_to_active(struct i915_vma *vma,
> -                          struct intel_engine_cs *ring)
> +                          struct drm_i915_gem_request *req)
>  {
>       list_move_tail(&vma->mm_list, &vma->vm->active_list);
> -     return i915_gem_object_move_to_active(vma->obj, ring);
> +     return i915_gem_object_move_to_active(vma->obj, req);
>  }
>  
>  static void
> @@ -2440,26 +2439,35 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
>       return 0;
>  }
>  
> -int __i915_add_request(struct intel_engine_cs *ring,
> +int __i915_add_request(struct drm_i915_gem_request *request,
>                      struct drm_file *file,
>                      struct drm_i915_gem_object *obj,
>                      bool flush_caches)
>  {
> -     struct drm_i915_private *dev_priv = ring->dev->dev_private;
> -     struct drm_i915_gem_request *request;
> +     struct intel_engine_cs *ring;
> +     struct drm_i915_private *dev_priv;
>       struct intel_ringbuffer *ringbuf;
>       u32 request_ring_position, request_start;
>       int ret;
>  
> -     request = ring->outstanding_lazy_request;
> +     /*printk( KERN_ERR "<%s> request %c %d:%d, OLR %c %d:%d\n",
> +             request ? request->ring->name : "???",
> +             request ? '=' : '?',
> +             request ? request->uniq : -1,
> +             request ? request->seqno : 0,
> +             request->ring->outstanding_lazy_request ? '=' : '?',
> +             request->ring->outstanding_lazy_request ? 
> request->ring->outstanding_lazy_request->uniq : -1,
> +             request->ring->outstanding_lazy_request ? 
> request->ring->outstanding_lazy_request->seqno : 0);*/
> +     //dump_stack();
> +
>       if (WARN_ON(request == NULL))
>               return -ENOMEM;
>  
> -     if (i915.enable_execlists) {
> -             struct intel_context *ctx = request->ctx;
> -             ringbuf = ctx->engine[ring->id].ringbuf;
> -     } else
> -             ringbuf = ring->buffer;
> +     ring = request->ring;
> +     dev_priv = ring->dev->dev_private;
> +     ringbuf = request->ringbuf;
> +
> +     WARN_ON(request != ring->outstanding_lazy_request);
>  
>       request_start = intel_ring_get_tail(ringbuf);
>       /*
> @@ -2471,9 +2479,9 @@ int __i915_add_request(struct intel_engine_cs *ring,
>        */
>       if (flush_caches) {
>               if (i915.enable_execlists)
> -                     ret = logical_ring_flush_all_caches(ringbuf);
> +                     ret = logical_ring_flush_all_caches(request);
>               else
> -                     ret = intel_ring_flush_all_caches(ring);
> +                     ret = intel_ring_flush_all_caches(request);
>               if (ret)
>                       return ret;
>       }
> @@ -2488,9 +2496,9 @@ int __i915_add_request(struct intel_engine_cs *ring,
>       request_ring_position = intel_ring_get_tail(ringbuf);
>  
>       if (i915.enable_execlists)
> -             ret = ring->emit_request(ringbuf);
> +             ret = ring->emit_request(request);
>       else
> -             ret = ring->add_request(ring);
> +             ret = ring->add_request(request);
>       if (ret)
>               return ret;
>  
> @@ -2504,7 +2512,8 @@ int __i915_add_request(struct intel_engine_cs *ring,
>        * inactive_list and lose its active reference. Hence we do not need
>        * to explicitly hold another reference here.
>        */
> -     request->batch_obj = obj;
> +     if (obj)
> +             request->batch_obj = obj;
>  
>       if (!i915.enable_execlists) {
>               /* Hold a reference to the current context so that we can 
> inspect
> @@ -2744,6 +2753,7 @@ static void i915_gem_reset_ring_cleanup(struct 
> drm_i915_private *dev_priv,
>  #endif
>  
>       /* This may not have been flushed before the reset, so clean it now */
> +     WARN_ON(ring->outstanding_lazy_request);
>       i915_gem_request_assign(&ring->outstanding_lazy_request, NULL);
>  }
>  
> @@ -3114,8 +3124,6 @@ out:
>   *
>   * @obj: object which may be in use on another ring.
>   * @to: ring we wish to use the object on. May be NULL.
> - * @add_request: do we need to add a request to track operations
> - *    submitted on ring with sync_to function
>   *
>   * This code is meant to abstract object synchronization with the GPU.
>   * Calling with NULL implies synchronizing the object with the CPU
> @@ -3125,8 +3133,9 @@ out:
>   */
>  int
>  i915_gem_object_sync(struct drm_i915_gem_object *obj,
> -                  struct intel_engine_cs *to, bool add_request)
> +                  struct drm_i915_gem_request *to_req)
>  {
> +     struct intel_engine_cs *to = to_req->ring;
>       struct intel_engine_cs *from;
>       u32 seqno;
>       int ret, idx;
> @@ -3152,7 +3161,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
>               return ret;
>  
>       trace_i915_gem_ring_sync_to(from, to, obj->last_read_req);
> -     ret = to->semaphore.sync_to(to, from, seqno);
> +     ret = to->semaphore.sync_to(to_req, from, seqno);
>       if (!ret) {
>               /* We use last_read_req because sync_to()
>                * might have just caused seqno wrap under
> @@ -3160,8 +3169,6 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
>                */
>               from->semaphore.sync_seqno[idx] =
>                               i915_gem_request_get_seqno(obj->last_read_req);
> -             if (add_request)
> -                     i915_add_request_no_flush(to);
>       }
>  
>       return ret;
> @@ -3266,18 +3273,23 @@ int i915_gpu_idle(struct drm_device *dev)
>       /* Flush everything onto the inactive list. */
>       for_each_ring(ring, dev_priv, i) {
>               if (!i915.enable_execlists) {
> -                     ret = i915_switch_context(ring, ring->default_context);
> +                     struct drm_i915_gem_request *req;
> +
> +                     ret = dev_priv->gt.alloc_request(ring, 
> ring->default_context, &req);
>                       if (ret)
>                               return ret;
> -             }
>  
> -             /* Make sure the context switch (if one actually happened)
> -              * gets wrapped up and finished rather than hanging around
> -              * and confusing things later. */
> -             if (ring->outstanding_lazy_request) {
> -                     ret = i915_add_request(ring);
> -                     if (ret)
> +                     ret = i915_switch_context(req, ring->default_context);
> +                     if (ret) {
> +                             i915_gem_request_unreference(req);
>                               return ret;
> +                     }
> +
> +                     ret = i915_add_request_no_flush(req);
> +                     if (ret) {
> +                             i915_gem_request_unreference(req);
> +                             return ret;
> +                     }
>               }
>  
>               ret = intel_ring_idle(ring);
> @@ -4099,8 +4111,19 @@ i915_gem_object_pin_to_display_plane(struct 
> drm_i915_gem_object *obj,
>       bool was_pin_display;
>       int ret;
>  
> -     if (pipelined != i915_gem_request_get_ring(obj->last_read_req)) {
> -             ret = i915_gem_object_sync(obj, pipelined, true);
> +     if (pipelined && (pipelined != 
> i915_gem_request_get_ring(obj->last_read_req))) {
> +             struct drm_i915_private *dev_priv = pipelined->dev->dev_private;
> +             struct drm_i915_gem_request *req;
> +
> +             ret = dev_priv->gt.alloc_request(pipelined, 
> pipelined->default_context, &req);
> +             if (ret)
> +                     return ret;
> +
> +             ret = i915_gem_object_sync(obj, req);
> +             if (ret)
> +                     return ret;
> +
> +             ret = i915_add_request_no_flush(req);
>               if (ret)
>                       return ret;
>       }
> @@ -4771,8 +4794,9 @@ err:
>       return ret;
>  }
>  
> -int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice)
> +int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       struct drm_device *dev = ring->dev;
>       struct drm_i915_private *dev_priv = dev->dev_private;
>       u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
> @@ -4782,7 +4806,7 @@ int i915_gem_l3_remap(struct intel_engine_cs *ring, int 
> slice)
>       if (!HAS_L3_DPF(dev) || !remap_info)
>               return 0;
>  
> -     ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3);
> +     ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3);
>       if (ret)
>               return ret;
>  
> @@ -4962,37 +4986,67 @@ i915_gem_init_hw(struct drm_device *dev)
>        */
>       init_unused_rings(dev);
>  
> +     BUG_ON(!dev_priv->ring[RCS].default_context);
> +
> +     ret = i915_ppgtt_init_hw(dev);
> +     if (ret) {
> +             DRM_ERROR("PPGTT enable failed %d\n", ret);
> +             i915_gem_cleanup_ringbuffer(dev);
> +             return ret;
> +     }
> +
>       for_each_ring(ring, dev_priv, i) {
> +             struct drm_i915_gem_request *req;
> +
>               ret = ring->init_hw(ring);
>               if (ret)
>                       return ret;
> -     }
>  
> -     for (i = 0; i < NUM_L3_SLICES(dev); i++)
> -             i915_gem_l3_remap(&dev_priv->ring[RCS], i);
> +             if (!ring->default_context)
> +                     continue;
>  
> -     /*
> -      * XXX: Contexts should only be initialized once. Doing a switch to the
> -      * default context switch however is something we'd like to do after
> -      * reset or thaw (the latter may not actually be necessary for HW, but
> -      * goes with our code better). Context switching requires rings (for
> -      * the do_switch), but before enabling PPGTT. So don't move this.
> -      */
> -     ret = i915_gem_context_enable(dev_priv);
> -     if (ret && ret != -EIO) {
> -             DRM_ERROR("Context enable failed %d\n", ret);
> -             i915_gem_cleanup_ringbuffer(dev);
> +             ret = dev_priv->gt.alloc_request(ring, ring->default_context, 
> &req);
> +             if (ret)
> +                     return ret;
>  
> -             return ret;
> -     }
> +             if (ring->id == RCS) {
> +                     for (i = 0; i < NUM_L3_SLICES(dev); i++)
> +                             i915_gem_l3_remap(req, i);
> +             }
>  
> -     ret = i915_ppgtt_init_hw(dev);
> -     if (ret && ret != -EIO) {
> -             DRM_ERROR("PPGTT enable failed %d\n", ret);
> -             i915_gem_cleanup_ringbuffer(dev);
> +             /*
> +              * XXX: Contexts should only be initialized once. Doing a 
> switch to the
> +              * default context switch however is something we'd like to do 
> after
> +              * reset or thaw (the latter may not actually be necessary for 
> HW, but
> +              * goes with our code better). Context switching requires rings 
> (for
> +              * the do_switch), but before enabling PPGTT. So don't move 
> this.
> +              */
> +             ret = i915_gem_context_enable(req);
> +             if (ret && ret != -EIO) {
> +                     DRM_ERROR("Context enable failed %d\n", ret);
> +                     i915_gem_request_unreference(req);
> +                     i915_gem_cleanup_ringbuffer(dev);
> +
> +                     return ret;
> +             }
> +
> +             ret = i915_ppgtt_init_ring(req);
> +             if (ret && ret != -EIO) {
> +                     DRM_ERROR("PPGTT enable failed %d\n", ret);
> +                     i915_gem_request_unreference(req);
> +                     i915_gem_cleanup_ringbuffer(dev);
> +             }
> +
> +             ret = i915_add_request_no_flush(req);
> +             if (ret) {
> +                     DRM_ERROR("Add request failed: %d\n", ret);
> +                     i915_gem_request_unreference(req);
> +                     i915_gem_cleanup_ringbuffer(dev);
> +                     return ret;
> +             }
>       }
>  
> -     return ret;
> +     return 0;
>  }
>  
>  int i915_gem_init(struct drm_device *dev)
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
> b/drivers/gpu/drm/i915/i915_gem_context.c
> index c5e1bfc..72e280b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -401,41 +401,23 @@ void i915_gem_context_fini(struct drm_device *dev)
>       i915_gem_context_unreference(dctx);
>  }
>  
> -int i915_gem_context_enable(struct drm_i915_private *dev_priv)
> +int i915_gem_context_enable(struct drm_i915_gem_request *req)
>  {
> -     struct intel_engine_cs *ring;
> -     int ret, i;
> -
> -     BUG_ON(!dev_priv->ring[RCS].default_context);
> +     struct intel_engine_cs *ring = req->ring;
> +     int ret;
>  
>       if (i915.enable_execlists) {
> -             for_each_ring(ring, dev_priv, i) {
> -                     if (ring->init_context) {
> -                             ret = ring->init_context(ring,
> -                                             ring->default_context);
> -                             if (ret) {
> -                                     DRM_ERROR("ring init context: %d\n",
> -                                                     ret);
> -                                     return ret;
> -                             }
> -                     }
> -             }
> +             if (ring->init_context == NULL)
> +                     return 0;
>  
> +             ret = ring->init_context(req, ring->default_context);
>       } else
> -             for_each_ring(ring, dev_priv, i) {
> -                     ret = i915_switch_context(ring, ring->default_context);
> -                     if (ret)
> -                             return ret;
> -
> -                     /* Make sure the context switch (if one actually 
> happened)
> -                      * gets wrapped up and finished rather than hanging 
> around
> -                      * and confusing things later. */
> -                     if (ring->outstanding_lazy_request) {
> -                             ret = i915_add_request_no_flush(ring);
> -                             if (ret)
> -                                     return ret;
> -                     }
> -             }
> +             ret = i915_switch_context(req, ring->default_context);
> +
> +     if (ret) {
> +             DRM_ERROR("ring init context: %d\n", ret);
> +             return ret;
> +     }
>  
>       return 0;
>  }
> @@ -488,10 +470,11 @@ i915_gem_context_get(struct drm_i915_file_private 
> *file_priv, u32 id)
>  }
>  
>  static inline int
> -mi_set_context(struct intel_engine_cs *ring,
> +mi_set_context(struct drm_i915_gem_request *req,
>              struct intel_context *new_context,
>              u32 hw_flags)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       u32 flags = hw_flags | MI_MM_SPACE_GTT;
>       int ret;
>  
> @@ -501,7 +484,7 @@ mi_set_context(struct intel_engine_cs *ring,
>        * itlb_before_ctx_switch.
>        */
>       if (IS_GEN6(ring->dev)) {
> -             ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, 0);
> +             ret = ring->flush(req, I915_GEM_GPU_DOMAINS, 0);
>               if (ret)
>                       return ret;
>       }
> @@ -510,7 +493,7 @@ mi_set_context(struct intel_engine_cs *ring,
>       if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8)
>               flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);
>  
> -     ret = intel_ring_begin(ring, 6);
> +     ret = intel_ring_begin(req, 6);
>       if (ret)
>               return ret;
>  
> @@ -540,9 +523,10 @@ mi_set_context(struct intel_engine_cs *ring,
>       return ret;
>  }
>  
> -static int do_switch(struct intel_engine_cs *ring,
> +static int do_switch(struct drm_i915_gem_request *req,
>                    struct intel_context *to)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       struct drm_i915_private *dev_priv = ring->dev->dev_private;
>       struct intel_context *from = ring->last_context;
>       u32 hw_flags = 0;
> @@ -577,7 +561,7 @@ static int do_switch(struct intel_engine_cs *ring,
>  
>       if (to->ppgtt) {
>               trace_switch_mm(ring, to);
> -             ret = to->ppgtt->switch_mm(to->ppgtt, ring);
> +             ret = to->ppgtt->switch_mm(to->ppgtt, req);
>               if (ret)
>                       goto unpin_out;
>       }
> @@ -608,7 +592,7 @@ static int do_switch(struct intel_engine_cs *ring,
>       if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to))
>               hw_flags |= MI_RESTORE_INHIBIT;
>  
> -     ret = mi_set_context(ring, to, hw_flags);
> +     ret = mi_set_context(req, to, hw_flags);
>       if (ret)
>               goto unpin_out;
>  
> @@ -616,7 +600,7 @@ static int do_switch(struct intel_engine_cs *ring,
>               if (!(to->remap_slice & (1<<i)))
>                       continue;
>  
> -             ret = i915_gem_l3_remap(ring, i);
> +             ret = i915_gem_l3_remap(req, i);
>               /* If it failed, try again next round */
>               if (ret)
>                       DRM_DEBUG_DRIVER("L3 remapping failed\n");
> @@ -632,7 +616,7 @@ static int do_switch(struct intel_engine_cs *ring,
>        */
>       if (from != NULL) {
>               from->legacy_hw_ctx.rcs_state->base.read_domains = 
> I915_GEM_DOMAIN_INSTRUCTION;
> -             
> i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), 
> ring);
> +             
> i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), 
> req);
>               /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
>                * whole damn pipeline, we don't need to explicitly mark the
>                * object dirty. The only exception is that the context must be
> @@ -658,12 +642,12 @@ done:
>  
>       if (uninitialized) {
>               if (ring->init_context) {
> -                     ret = ring->init_context(ring, to);
> +                     ret = ring->init_context(req, to);
>                       if (ret)
>                               DRM_ERROR("ring init context: %d\n", ret);
>               }
>  
> -             ret = i915_gem_render_state_init(ring);
> +             ret = i915_gem_render_state_init(req);
>               if (ret)
>                       DRM_ERROR("init render state: %d\n", ret);
>       }
> @@ -690,9 +674,10 @@ unpin_out:
>   * switched by writing to the ELSP and requests keep a reference to their
>   * context.
>   */
> -int i915_switch_context(struct intel_engine_cs *ring,
> +int i915_switch_context(struct drm_i915_gem_request *req,
>                       struct intel_context *to)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       struct drm_i915_private *dev_priv = ring->dev->dev_private;
>  
>       WARN_ON(i915.enable_execlists);
> @@ -708,7 +693,7 @@ int i915_switch_context(struct intel_engine_cs *ring,
>               return 0;
>       }
>  
> -     return do_switch(ring, to);
> +     return do_switch(req, to);
>  }
>  
>  static bool contexts_enabled(struct drm_device *dev)
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index ca31673..5caa2a2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -822,7 +822,7 @@ err:
>  }
>  
>  static int
> -i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
> +i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
>                               struct list_head *vmas)
>  {
>       struct i915_vma *vma;
> @@ -832,7 +832,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs 
> *ring,
>  
>       list_for_each_entry(vma, vmas, exec_list) {
>               struct drm_i915_gem_object *obj = vma->obj;
> -             ret = i915_gem_object_sync(obj, ring, false);
> +             ret = i915_gem_object_sync(obj, req);
>               if (ret)
>                       return ret;
>  
> @@ -843,7 +843,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs 
> *ring,
>       }
>  
>       if (flush_chipset)
> -             i915_gem_chipset_flush(ring->dev);
> +             i915_gem_chipset_flush(req->ring->dev);
>  
>       if (flush_domains & I915_GEM_DOMAIN_GTT)
>               wmb();
> @@ -941,9 +941,9 @@ i915_gem_validate_context(struct drm_device *dev, struct 
> drm_file *file,
>  
>  void
>  i915_gem_execbuffer_move_to_active(struct list_head *vmas,
> -                                struct intel_engine_cs *ring)
> +                                struct drm_i915_gem_request *req)
>  {
> -     struct drm_i915_gem_request *req = intel_ring_get_request(ring);
> +     struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
>       struct i915_vma *vma;
>  
>       list_for_each_entry(vma, vmas, exec_list) {
> @@ -957,7 +957,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
>                       obj->base.pending_read_domains |= 
> obj->base.read_domains;
>               obj->base.read_domains = obj->base.pending_read_domains;
>  
> -             i915_vma_move_to_active(vma, ring);
> +             i915_vma_move_to_active(vma, req);
>               if (obj->base.write_domain) {
>                       obj->dirty = 1;
>                       i915_gem_request_assign(&obj->last_write_req, req);
> @@ -983,20 +983,21 @@ i915_gem_execbuffer_move_to_active(struct list_head 
> *vmas,
>  void
>  i915_gem_execbuffer_retire_commands(struct drm_device *dev,
>                                   struct drm_file *file,
> -                                 struct intel_engine_cs *ring,
> +                                 struct drm_i915_gem_request *req,
>                                   struct drm_i915_gem_object *obj)
>  {
>       /* Unconditionally force add_request to emit a full flush. */
> -     ring->gpu_caches_dirty = true;
> +     req->ring->gpu_caches_dirty = true;
>  
>       /* Add a breadcrumb for the completion of the batch buffer */
> -     (void)__i915_add_request(ring, file, obj, true);
> +     (void)__i915_add_request(req, file, obj, true);
>  }
>  
>  static int
>  i915_reset_gen7_sol_offsets(struct drm_device *dev,
> -                         struct intel_engine_cs *ring)
> +                         struct drm_i915_gem_request *req)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       struct drm_i915_private *dev_priv = dev->dev_private;
>       int ret, i;
>  
> @@ -1005,7 +1006,7 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
>               return -EINVAL;
>       }
>  
> -     ret = intel_ring_begin(ring, 4 * 3);
> +     ret = intel_ring_begin(req, 4 * 3);
>       if (ret)
>               return ret;
>  
> @@ -1021,10 +1022,11 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
>  }
>  
>  static int
> -i915_emit_box(struct intel_engine_cs *ring,
> +i915_emit_box(struct drm_i915_gem_request *req,
>             struct drm_clip_rect *box,
>             int DR1, int DR4)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       int ret;
>  
>       if (box->y2 <= box->y1 || box->x2 <= box->x1 ||
> @@ -1035,7 +1037,7 @@ i915_emit_box(struct intel_engine_cs *ring,
>       }
>  
>       if (INTEL_INFO(ring->dev)->gen >= 4) {
> -             ret = intel_ring_begin(ring, 4);
> +             ret = intel_ring_begin(req, 4);
>               if (ret)
>                       return ret;
>  
> @@ -1044,7 +1046,7 @@ i915_emit_box(struct intel_engine_cs *ring,
>               intel_ring_emit(ring, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) 
> << 16);
>               intel_ring_emit(ring, DR4);
>       } else {
> -             ret = intel_ring_begin(ring, 6);
> +             ret = intel_ring_begin(req, 6);
>               if (ret)
>                       return ret;
>  
> @@ -1151,11 +1153,11 @@ i915_gem_ringbuffer_submission(struct 
> i915_execbuffer_params *params,
>               goto error;
>       }
>  
> -     ret = i915_gem_execbuffer_move_to_gpu(ring, vmas);
> +     ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
>       if (ret)
>               goto error;
>  
> -     i915_gem_execbuffer_move_to_active(vmas, ring);
> +     i915_gem_execbuffer_move_to_active(vmas, params->request);
>  
>       /* Make sure the OLR hasn't advanced (which would indicate a flush
>        * of the work in progress which in turn would be a Bad Thing). */
> @@ -1200,18 +1202,18 @@ int i915_gem_ringbuffer_submission_final(struct 
> i915_execbuffer_params *params)
>       /* Unconditionally invalidate gpu caches and ensure that we do flush
>        * any residual writes from the previous batch.
>        */
> -     ret = intel_ring_invalidate_all_caches(ring);
> +     ret = intel_ring_invalidate_all_caches(params->request);
>       if (ret)
>               goto error;
>  
>       /* Switch to the correct context for the batch */
> -     ret = i915_switch_context(ring, params->ctx);
> +     ret = i915_switch_context(params->request, params->ctx);
>       if (ret)
>               goto error;
>  
>       if (ring == &dev_priv->ring[RCS] &&
>                       params->instp_mode != 
> dev_priv->relative_constants_mode) {
> -             ret = intel_ring_begin(ring, 4);
> +             ret = intel_ring_begin(params->request, 4);
>               if (ret)
>                       goto error;
>  
> @@ -1225,7 +1227,7 @@ int i915_gem_ringbuffer_submission_final(struct 
> i915_execbuffer_params *params)
>       }
>  
>       if (params->args_flags & I915_EXEC_GEN7_SOL_RESET) {
> -             ret = i915_reset_gen7_sol_offsets(params->dev, ring);
> +             ret = i915_reset_gen7_sol_offsets(params->dev, params->request);
>               if (ret)
>                       goto error;
>       }
> @@ -1236,29 +1238,31 @@ int i915_gem_ringbuffer_submission_final(struct 
> i915_execbuffer_params *params)
>  
>       if (params->cliprects) {
>               for (i = 0; i < params->args_num_cliprects; i++) {
> -                     ret = i915_emit_box(ring, &params->cliprects[i],
> -                                         params->args_DR1, params->args_DR4);
> +                     ret = i915_emit_box(params->request,
> +                                         &params->cliprects[i],
> +                                         params->args_DR1,
> +                                         params->args_DR4);
>                       if (ret)
>                               goto error;
>  
> -                     ret = ring->dispatch_execbuffer(ring,
> +                     ret = ring->dispatch_execbuffer(params->request,
>                                                       exec_start, exec_len,
>                                                       params->dispatch_flags);
>                       if (ret)
>                               goto error;
>               }
>       } else {
> -             ret = ring->dispatch_execbuffer(ring,
> +             ret = ring->dispatch_execbuffer(params->request,
>                                               exec_start, exec_len,
>                                               params->dispatch_flags);
>               if (ret)
>                       goto error;
>       }
>  
> -     trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), 
> params->dispatch_flags);
> +     trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
>  
> -     i915_gem_execbuffer_retire_commands(params->dev, params->file, ring,
> -                                         params->batch_obj);
> +     i915_gem_execbuffer_retire_commands(params->dev, params->file,
> +                                         params->request, params->batch_obj);
>  
>  error:
>       /* intel_gpu_busy should also get a ref, so it will free when the device
> @@ -1490,10 +1494,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
> *data,
>               params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, 
> vm);
>  
>       /* Allocate a request for this batch buffer nice and early. */
> -     ret = dev_priv->gt.alloc_request(ring, ctx);
> +     ret = dev_priv->gt.alloc_request(ring, ctx, &params->request);
>       if (ret)
>               goto err;
> -     params->request = ring->outstanding_lazy_request;
> +     WARN_ON(params->request != ring->outstanding_lazy_request);
>  
>       /* Save assorted stuff away to pass through to *_submission_final() */
>       params->dev                     = dev;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
> b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 7eead93..776776e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -213,14 +213,15 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
>  }
>  
>  /* Broadwell Page Directory Pointer Descriptors */
> -static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
> -                        uint64_t val)
> +static int gen8_write_pdp(struct drm_i915_gem_request *req, unsigned entry,
> +                       uint64_t val)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       int ret;
>  
>       BUG_ON(entry >= 4);
>  
> -     ret = intel_ring_begin(ring, 6);
> +     ret = intel_ring_begin(req, 6);
>       if (ret)
>               return ret;
>  
> @@ -236,7 +237,7 @@ static int gen8_write_pdp(struct intel_engine_cs *ring, 
> unsigned entry,
>  }
>  
>  static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -                       struct intel_engine_cs *ring)
> +                       struct drm_i915_gem_request *req)
>  {
>       int i, ret;
>  
> @@ -245,7 +246,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  
>       for (i = used_pd - 1; i >= 0; i--) {
>               dma_addr_t addr = ppgtt->pd_dma_addr[i];
> -             ret = gen8_write_pdp(ring, i, addr);
> +             ret = gen8_write_pdp(req, i, addr);
>               if (ret)
>                       return ret;
>       }
> @@ -710,16 +711,17 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt 
> *ppgtt)
>  }
>  
>  static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -                      struct intel_engine_cs *ring)
> +                      struct drm_i915_gem_request *req)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       int ret;
>  
>       /* NB: TLBs must be flushed and invalidated before a switch */
> -     ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
> +     ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
>       if (ret)
>               return ret;
>  
> -     ret = intel_ring_begin(ring, 6);
> +     ret = intel_ring_begin(req, 6);
>       if (ret)
>               return ret;
>  
> @@ -735,16 +737,17 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  }
>  
>  static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -                       struct intel_engine_cs *ring)
> +                       struct drm_i915_gem_request *req)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       int ret;
>  
>       /* NB: TLBs must be flushed and invalidated before a switch */
> -     ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
> +     ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
>       if (ret)
>               return ret;
>  
> -     ret = intel_ring_begin(ring, 6);
> +     ret = intel_ring_begin(req, 6);
>       if (ret)
>               return ret;
>  
> @@ -758,7 +761,7 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  
>       /* XXX: RCS is the only one to auto invalidate the TLBs? */
>       if (ring->id != RCS) {
> -             ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, 
> I915_GEM_GPU_DOMAINS);
> +             ret = ring->flush(req, I915_GEM_GPU_DOMAINS, 
> I915_GEM_GPU_DOMAINS);
>               if (ret)
>                       return ret;
>       }
> @@ -767,8 +770,9 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  }
>  
>  static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -                       struct intel_engine_cs *ring)
> +                       struct drm_i915_gem_request *req)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       struct drm_device *dev = ppgtt->base.dev;
>       struct drm_i915_private *dev_priv = dev->dev_private;
>  
> @@ -1125,11 +1129,6 @@ int i915_ppgtt_init(struct drm_device *dev, struct 
> i915_hw_ppgtt *ppgtt)
>  
>  int i915_ppgtt_init_hw(struct drm_device *dev)
>  {
> -     struct drm_i915_private *dev_priv = dev->dev_private;
> -     struct intel_engine_cs *ring;
> -     struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
> -     int i, ret = 0;
> -
>       /* In the case of execlists, PPGTT is enabled by the context descriptor
>        * and the PDPs are contained within the context itself.  We don't
>        * need to do anything here. */
> @@ -1148,25 +1147,20 @@ int i915_ppgtt_init_hw(struct drm_device *dev)
>       else
>               WARN_ON(1);
>  
> -     if (ppgtt) {
> -             for_each_ring(ring, dev_priv, i) {
> -                     ret = ppgtt->switch_mm(ppgtt, ring);
> -                     if (ret != 0)
> -                             return ret;
> -
> -                     /* Make sure the context switch (if one actually 
> happened)
> -                      * gets wrapped up and finished rather than hanging 
> around
> -                      * and confusing things later. */
> -                     if (ring->outstanding_lazy_request) {
> -                             ret = i915_add_request_no_flush(ring);
> -                             if (ret)
> -                                     return ret;
> -                     }
> -             }
> -     }
> +     return 0;
> +}
>  
> -     return ret;
> +int i915_ppgtt_init_ring(struct drm_i915_gem_request *req)
> +{
> +     struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
> +     struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
> +
> +     if (!ppgtt)
> +             return 0;
> +
> +     return ppgtt->switch_mm(ppgtt, req);
>  }
> +
>  struct i915_hw_ppgtt *
>  i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private 
> *fpriv)
>  {
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h 
> b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index dd849df..bee3e2a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -267,7 +267,7 @@ struct i915_hw_ppgtt {
>  
>       int (*enable)(struct i915_hw_ppgtt *ppgtt);
>       int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
> -                      struct intel_engine_cs *ring);
> +                      struct drm_i915_gem_request *req);
>       void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
>  };
>  
> @@ -278,6 +278,7 @@ void i915_global_gtt_cleanup(struct drm_device *dev);
>  
>  int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt);
>  int i915_ppgtt_init_hw(struct drm_device *dev);
> +int i915_ppgtt_init_ring(struct drm_i915_gem_request *req);
>  void i915_ppgtt_release(struct kref *kref);
>  struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev,
>                                       struct drm_i915_file_private *fpriv);
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
> b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index aba39c3..0e0c23fe 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -152,8 +152,9 @@ int i915_gem_render_state_prepare(struct intel_engine_cs 
> *ring,
>       return 0;
>  }
>  
> -int i915_gem_render_state_init(struct intel_engine_cs *ring)
> +int i915_gem_render_state_init(struct drm_i915_gem_request *req)
>  {
> +     struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
>       struct render_state so;
>       int ret;
>  
> @@ -164,16 +165,17 @@ int i915_gem_render_state_init(struct intel_engine_cs 
> *ring)
>       if (so.rodata == NULL)
>               return 0;
>  
> -     ret = ring->dispatch_execbuffer(ring,
> +     ret = ring->dispatch_execbuffer(req,
>                                       so.ggtt_offset,
>                                       so.rodata->batch_items * 4,
>                                       I915_DISPATCH_SECURE);
>       if (ret)
>               goto out;
>  
> -     i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
> +     i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
>  
> -     ret = __i915_add_request(ring, NULL, so.obj, true);
> +//   ret = __i915_add_request(req, NULL, so.obj, true);
> +     req->batch_obj = so.obj;
>       /* __i915_add_request moves object to inactive if it fails */
>  out:
>       i915_gem_render_state_fini(&so);
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h 
> b/drivers/gpu/drm/i915/i915_gem_render_state.h
> index c44961e..7aa7372 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.h
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h
> @@ -39,7 +39,7 @@ struct render_state {
>       int gen;
>  };
>  
> -int i915_gem_render_state_init(struct intel_engine_cs *ring);
> +int i915_gem_render_state_init(struct drm_i915_gem_request *req);
>  void i915_gem_render_state_fini(struct render_state *so);
>  int i915_gem_render_state_prepare(struct intel_engine_cs *ring,
>                                 struct render_state *so);
> diff --git a/drivers/gpu/drm/i915/intel_display.c 
> b/drivers/gpu/drm/i915/intel_display.c
> index f0cf421..c0b0e37 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -9089,14 +9089,15 @@ static int intel_gen2_queue_flip(struct drm_device 
> *dev,
>                                struct drm_crtc *crtc,
>                                struct drm_framebuffer *fb,
>                                struct drm_i915_gem_object *obj,
> -                              struct intel_engine_cs *ring,
> +                              struct drm_i915_gem_request *req,
>                                uint32_t flags)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>       u32 flip_mask;
>       int ret;
>  
> -     ret = intel_ring_begin(ring, 6);
> +     ret = intel_ring_begin(req, 6);
>       if (ret)
>               return ret;
>  
> @@ -9116,7 +9117,7 @@ static int intel_gen2_queue_flip(struct drm_device *dev,
>       intel_ring_emit(ring, 0); /* aux display base address, unused */
>  
>       intel_mark_page_flip_active(intel_crtc);
> -     i915_add_request_no_flush(ring);
> +     i915_add_request_no_flush(req);
>       return 0;
>  }
>  
> @@ -9124,14 +9125,15 @@ static int intel_gen3_queue_flip(struct drm_device 
> *dev,
>                                struct drm_crtc *crtc,
>                                struct drm_framebuffer *fb,
>                                struct drm_i915_gem_object *obj,
> -                              struct intel_engine_cs *ring,
> +                              struct drm_i915_gem_request *req,
>                                uint32_t flags)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>       u32 flip_mask;
>       int ret;
>  
> -     ret = intel_ring_begin(ring, 6);
> +     ret = intel_ring_begin(req, 6);
>       if (ret)
>               return ret;
>  
> @@ -9148,7 +9150,7 @@ static int intel_gen3_queue_flip(struct drm_device *dev,
>       intel_ring_emit(ring, MI_NOOP);
>  
>       intel_mark_page_flip_active(intel_crtc);
> -     i915_add_request_no_flush(ring);
> +     i915_add_request_no_flush(req);
>       return 0;
>  }
>  
> @@ -9156,15 +9158,16 @@ static int intel_gen4_queue_flip(struct drm_device 
> *dev,
>                                struct drm_crtc *crtc,
>                                struct drm_framebuffer *fb,
>                                struct drm_i915_gem_object *obj,
> -                              struct intel_engine_cs *ring,
> +                              struct drm_i915_gem_request *req,
>                                uint32_t flags)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       struct drm_i915_private *dev_priv = dev->dev_private;
>       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>       uint32_t pf, pipesrc;
>       int ret;
>  
> -     ret = intel_ring_begin(ring, 4);
> +     ret = intel_ring_begin(req, 4);
>       if (ret)
>               return ret;
>  
> @@ -9187,7 +9190,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev,
>       intel_ring_emit(ring, pf | pipesrc);
>  
>       intel_mark_page_flip_active(intel_crtc);
> -     i915_add_request_no_flush(ring);
> +     i915_add_request_no_flush(req);
>       return 0;
>  }
>  
> @@ -9195,15 +9198,16 @@ static int intel_gen6_queue_flip(struct drm_device 
> *dev,
>                                struct drm_crtc *crtc,
>                                struct drm_framebuffer *fb,
>                                struct drm_i915_gem_object *obj,
> -                              struct intel_engine_cs *ring,
> +                              struct drm_i915_gem_request *req,
>                                uint32_t flags)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       struct drm_i915_private *dev_priv = dev->dev_private;
>       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>       uint32_t pf, pipesrc;
>       int ret;
>  
> -     ret = intel_ring_begin(ring, 4);
> +     ret = intel_ring_begin(req, 4);
>       if (ret)
>               return ret;
>  
> @@ -9223,7 +9227,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev,
>       intel_ring_emit(ring, pf | pipesrc);
>  
>       intel_mark_page_flip_active(intel_crtc);
> -     i915_add_request_no_flush(ring);
> +     i915_add_request_no_flush(req);
>       return 0;
>  }
>  
> @@ -9231,9 +9235,10 @@ static int intel_gen7_queue_flip(struct drm_device 
> *dev,
>                                struct drm_crtc *crtc,
>                                struct drm_framebuffer *fb,
>                                struct drm_i915_gem_object *obj,
> -                              struct intel_engine_cs *ring,
> +                              struct drm_i915_gem_request *req,
>                                uint32_t flags)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>       uint32_t plane_bit = 0;
>       int len, ret;
> @@ -9275,11 +9280,11 @@ static int intel_gen7_queue_flip(struct drm_device 
> *dev,
>        * then do the cacheline alignment, and finally emit the
>        * MI_DISPLAY_FLIP.
>        */
> -     ret = intel_ring_cacheline_align(ring);
> +     ret = intel_ring_cacheline_align(req);
>       if (ret)
>               return ret;
>  
> -     ret = intel_ring_begin(ring, len);
> +     ret = intel_ring_begin(req, len);
>       if (ret)
>               return ret;
>  
> @@ -9318,7 +9323,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
>       intel_ring_emit(ring, (MI_NOOP));
>  
>       intel_mark_page_flip_active(intel_crtc);
> -     i915_add_request_no_flush(ring);
> +     i915_add_request_no_flush(req);
>       return 0;
>  }
>  
> @@ -9474,9 +9479,10 @@ static int intel_gen9_queue_flip(struct drm_device 
> *dev,
>                                struct drm_crtc *crtc,
>                                struct drm_framebuffer *fb,
>                                struct drm_i915_gem_object *obj,
> -                              struct intel_engine_cs *ring,
> +                              struct drm_i915_gem_request *req,
>                                uint32_t flags)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>       uint32_t plane = 0, stride;
>       int ret;
> @@ -9508,7 +9514,7 @@ static int intel_gen9_queue_flip(struct drm_device *dev,
>               return -ENODEV;
>       }
>  
> -     ret = intel_ring_begin(ring, 10);
> +     ret = intel_ring_begin(req, 10);
>       if (ret)
>               return ret;
>  
> @@ -9528,7 +9534,7 @@ static int intel_gen9_queue_flip(struct drm_device *dev,
>       intel_ring_emit(ring, intel_crtc->unpin_work->gtt_offset);
>  
>       intel_mark_page_flip_active(intel_crtc);
> -     i915_add_request_no_flush(ring);
> +     i915_add_request_no_flush(req);
>  
>       return 0;
>  }
> @@ -9537,7 +9543,7 @@ static int intel_default_queue_flip(struct drm_device 
> *dev,
>                                   struct drm_crtc *crtc,
>                                   struct drm_framebuffer *fb,
>                                   struct drm_i915_gem_object *obj,
> -                                 struct intel_engine_cs *ring,
> +                                 struct drm_i915_gem_request *req,
>                                   uint32_t flags)
>  {
>       return -ENODEV;
> @@ -9729,22 +9735,18 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>               i915_gem_request_assign(&work->flip_queued_req,
>                                       obj->last_write_req);
>       } else {
> -             ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, ring,
> +             struct drm_i915_gem_request *req;
> +
> +             ret = dev_priv->gt.alloc_request(ring, ring->default_context, 
> &req);
> +             if (ret)
> +                     return ret;
> +
> +             i915_gem_request_assign(&work->flip_queued_req, req);
> +
> +             ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, req,
>                                                  page_flip_flags);
>               if (ret)
>                       goto cleanup_unpin;
> -
> -             /* Borked: need to get the seqno for the request submitted in
> -              * 'queue_flip()' above. However, either the request has been
> -              * posted already and the seqno is gone (q_f calls add_request),
> -              * or the request never gets posted and is merged into whatever
> -              * render comes along next (q_f calls ring_advance).
> -              *
> -              * On the other hand, seqnos are going away soon anyway! So
> -              * hopefully the problem will disappear...
> -              */
> -             i915_gem_request_assign(&work->flip_queued_req,
> -                                     ring->outstanding_lazy_request ? 
> intel_ring_get_request(ring) : NULL);
>       }
>  
>       work->flip_queued_vblank = drm_vblank_count(dev, intel_crtc->pipe);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index 80cb87e..5077a77 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -203,6 +203,10 @@ enum {
>  };
>  #define GEN8_CTX_ID_SHIFT 32
>  
> +static int intel_logical_ring_begin(struct drm_i915_gem_request *req,
> +                                 int num_dwords);
> +static int intel_lr_context_render_state_init(struct drm_i915_gem_request 
> *req,
> +                                           struct intel_context *ctx);
>  static int intel_lr_context_pin(struct intel_engine_cs *ring,
>               struct intel_context *ctx);
>  
> @@ -587,9 +591,9 @@ static int execlists_context_queue(struct intel_engine_cs 
> *ring,
>       return 0;
>  }
>  
> -static int logical_ring_invalidate_all_caches(struct intel_ringbuffer 
> *ringbuf)
> +static int logical_ring_invalidate_all_caches(struct drm_i915_gem_request 
> *req)
>  {
> -     struct intel_engine_cs *ring = ringbuf->ring;
> +     struct intel_engine_cs *ring = req->ring;
>       uint32_t flush_domains;
>       int ret;
>  
> @@ -597,7 +601,7 @@ static int logical_ring_invalidate_all_caches(struct 
> intel_ringbuffer *ringbuf)
>       if (ring->gpu_caches_dirty)
>               flush_domains = I915_GEM_GPU_DOMAINS;
>  
> -     ret = ring->emit_flush(ringbuf, I915_GEM_GPU_DOMAINS, flush_domains);
> +     ret = ring->emit_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
>       if (ret)
>               return ret;
>  
> @@ -605,10 +609,9 @@ static int logical_ring_invalidate_all_caches(struct 
> intel_ringbuffer *ringbuf)
>       return 0;
>  }
>  
> -static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
> +static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
>                                struct list_head *vmas)
>  {
> -     struct intel_engine_cs *ring = ringbuf->ring;
>       struct i915_vma *vma;
>       uint32_t flush_domains = 0;
>       bool flush_chipset = false;
> @@ -617,7 +620,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer 
> *ringbuf,
>       list_for_each_entry(vma, vmas, exec_list) {
>               struct drm_i915_gem_object *obj = vma->obj;
>  
> -             ret = i915_gem_object_sync(obj, ring, true);
> +             ret = i915_gem_object_sync(obj, req);
>               if (ret)
>                       return ret;
>  
> @@ -657,7 +660,6 @@ int intel_execlists_submission(struct 
> i915_execbuffer_params *params,
>       struct drm_device       *dev = params->dev;
>       struct intel_engine_cs  *ring = params->ring;
>       struct drm_i915_private *dev_priv = dev->dev_private;
> -     struct intel_ringbuffer *ringbuf = 
> params->ctx->engine[ring->id].ringbuf;
>       int ret;
>  
>       params->instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
> @@ -706,11 +708,11 @@ int intel_execlists_submission(struct 
> i915_execbuffer_params *params,
>               return -EINVAL;
>       }
>  
> -     ret = execlists_move_to_gpu(ringbuf, vmas);
> +     ret = execlists_move_to_gpu(params->request, vmas);
>       if (ret)
>               return ret;
>  
> -     i915_gem_execbuffer_move_to_active(vmas, ring);
> +     i915_gem_execbuffer_move_to_active(vmas, params->request);
>  
>       ret = dev_priv->gt.do_execfinal(params);
>       if (ret)
> @@ -742,13 +744,13 @@ int intel_execlists_submission_final(struct 
> i915_execbuffer_params *params)
>       /* Unconditionally invalidate gpu caches and ensure that we do flush
>        * any residual writes from the previous batch.
>        */
> -     ret = logical_ring_invalidate_all_caches(ringbuf);
> +     ret = logical_ring_invalidate_all_caches(params->request);
>       if (ret)
>               return ret;
>  
>       if (ring == &dev_priv->ring[RCS] &&
>           params->instp_mode != dev_priv->relative_constants_mode) {
> -             ret = intel_logical_ring_begin(ringbuf, 4);
> +             ret = intel_logical_ring_begin(params->request, 4);
>               if (ret)
>                       return ret;
>  
> @@ -764,13 +766,14 @@ int intel_execlists_submission_final(struct 
> i915_execbuffer_params *params)
>       exec_start = params->batch_obj_vm_offset +
>                    params->args_batch_start_offset;
>  
> -     ret = ring->emit_bb_start(ringbuf, exec_start, params->dispatch_flags);
> +     ret = ring->emit_bb_start(params->request, exec_start, 
> params->dispatch_flags);
>       if (ret)
>               return ret;
>  
> -     trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), 
> params->dispatch_flags);
> +     trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
>  
> -     i915_gem_execbuffer_retire_commands(params->dev, params->file, ring, 
> params->batch_obj);
> +     i915_gem_execbuffer_retire_commands(params->dev, params->file,
> +                                         params->request, params->batch_obj);
>  
>       return 0;
>  }
> @@ -827,15 +830,15 @@ void intel_logical_ring_stop(struct intel_engine_cs 
> *ring)
>       I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
>  }
>  
> -int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf)
> +int logical_ring_flush_all_caches(struct drm_i915_gem_request *req)
>  {
> -     struct intel_engine_cs *ring = ringbuf->ring;
> +     struct intel_engine_cs *ring = req->ring;
>       int ret;
>  
>       if (!ring->gpu_caches_dirty)
>               return 0;
>  
> -     ret = ring->emit_flush(ringbuf, 0, I915_GEM_GPU_DOMAINS);
> +     ret = ring->emit_flush(req, 0, I915_GEM_GPU_DOMAINS);
>       if (ret)
>               return ret;
>  
> @@ -910,13 +913,17 @@ void intel_lr_context_unpin(struct intel_engine_cs 
> *ring,
>  }
>  
>  int intel_logical_ring_alloc_request(struct intel_engine_cs *ring,
> -                                  struct intel_context *ctx)
> +                                  struct intel_context *ctx,
> +                                  struct drm_i915_gem_request **req_out)
>  {
>       struct drm_i915_gem_request *request;
>       struct drm_i915_private *dev_private = ring->dev->dev_private;
>       int ret;
>  
> -     if (ring->outstanding_lazy_request)
> +     if (!req_out)
> +             return -EINVAL;
> +
> +     if ((*req_out = ring->outstanding_lazy_request) != NULL)
>               return 0;
>  
>       request = kzalloc(sizeof(*request), GFP_KERNEL);
> @@ -953,7 +960,7 @@ int intel_logical_ring_alloc_request(struct 
> intel_engine_cs *ring,
>       i915_gem_context_reference(request->ctx);
>       request->ringbuf = ctx->engine[ring->id].ringbuf;
>  
> -     ring->outstanding_lazy_request = request;
> +     *req_out = ring->outstanding_lazy_request = request;
>       return 0;
>  }
>  
> @@ -1090,7 +1097,7 @@ static int logical_ring_prepare(struct intel_ringbuffer 
> *ringbuf, int bytes)
>  /**
>   * intel_logical_ring_begin() - prepare the logical ringbuffer to accept 
> some commands
>   *
> - * @ringbuf: Logical ringbuffer.
> + * @request: The request to start some new work for
>   * @num_dwords: number of DWORDs that we plan to write to the ringbuffer.
>   *
>   * The ringbuffer might not be ready to accept the commands right away 
> (maybe it needs to
> @@ -1100,8 +1107,9 @@ static int logical_ring_prepare(struct intel_ringbuffer 
> *ringbuf, int bytes)
>   *
>   * Return: non-zero if the ringbuffer is not ready to be written to.
>   */
> -int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int 
> num_dwords)
> +static int intel_logical_ring_begin(struct drm_i915_gem_request *req, int 
> num_dwords)
>  {
> +     struct intel_ringbuffer *ringbuf = req->ringbuf;
>       struct intel_engine_cs *ring = ringbuf->ring;
>       struct drm_device *dev = ring->dev;
>       struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -1116,38 +1124,28 @@ int intel_logical_ring_begin(struct intel_ringbuffer 
> *ringbuf, int num_dwords)
>       if (ret)
>               return ret;
>  
> -     if(!ring->outstanding_lazy_request) {
> -             printk(KERN_INFO "%s:%d> 
> \x1B[31;1mring->outstanding_lazy_request = 0x%p\x1B[0m\n", __func__, 
> __LINE__, ring->outstanding_lazy_request);
> -             dump_stack();
> -     }
> -
> -     /* Preallocate the olr before touching the ring */
> -     ret = intel_logical_ring_alloc_request(ring, ringbuf->FIXME_lrc_ctx);
> -     if (ret)
> -             return ret;
> -
>       ringbuf->space -= num_dwords * sizeof(uint32_t);
>       return 0;
>  }
>  
> -static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring,
> +static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request 
> *req,
>                                              struct intel_context *ctx)
>  {
>       int ret, i;
> -     struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
> -     struct drm_device *dev = ring->dev;
> +     struct intel_ringbuffer *ringbuf = req->ringbuf;
> +     struct drm_device *dev = req->ring->dev;
>       struct drm_i915_private *dev_priv = dev->dev_private;
>       struct i915_workarounds *w = &dev_priv->workarounds;
>  
>       if (WARN_ON(w->count == 0))
>               return 0;
>  
> -     ring->gpu_caches_dirty = true;
> -     ret = logical_ring_flush_all_caches(ringbuf);
> +     req->ring->gpu_caches_dirty = true;
> +     ret = logical_ring_flush_all_caches(req);
>       if (ret)
>               return ret;
>  
> -     ret = intel_logical_ring_begin(ringbuf, w->count * 2 + 2);
> +     ret = intel_logical_ring_begin(req, w->count * 2 + 2);
>       if (ret)
>               return ret;
>  
> @@ -1160,8 +1158,8 @@ static int intel_logical_ring_workarounds_emit(struct 
> intel_engine_cs *ring,
>  
>       intel_logical_ring_advance(ringbuf);
>  
> -     ring->gpu_caches_dirty = true;
> -     ret = logical_ring_flush_all_caches(ringbuf);
> +     req->ring->gpu_caches_dirty = true;
> +     ret = logical_ring_flush_all_caches(req);
>       if (ret)
>               return ret;
>  
> @@ -1210,13 +1208,14 @@ static int gen8_init_render_ring(struct 
> intel_engine_cs *ring)
>       return init_workarounds_ring(ring);
>  }
>  
> -static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf,
> +static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
>                             u64 offset, unsigned flags)
>  {
> +     struct intel_ringbuffer *ringbuf = req->ringbuf;
>       bool ppgtt = !(flags & I915_DISPATCH_SECURE);
>       int ret;
>  
> -     ret = intel_logical_ring_begin(ringbuf, 4);
> +     ret = intel_logical_ring_begin(req, 4);
>       if (ret)
>               return ret;
>  
> @@ -1263,17 +1262,18 @@ static void gen8_logical_ring_put_irq(struct 
> intel_engine_cs *ring)
>       spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
>  }
>  
> -static int gen8_emit_flush(struct intel_ringbuffer *ringbuf,
> +static int gen8_emit_flush(struct drm_i915_gem_request *req,
>                          u32 invalidate_domains,
>                          u32 unused)
>  {
> +     struct intel_ringbuffer *ringbuf = req->ringbuf;
>       struct intel_engine_cs *ring = ringbuf->ring;
>       struct drm_device *dev = ring->dev;
>       struct drm_i915_private *dev_priv = dev->dev_private;
>       uint32_t cmd;
>       int ret;
>  
> -     ret = intel_logical_ring_begin(ringbuf, 4);
> +     ret = intel_logical_ring_begin(req, 4);
>       if (ret)
>               return ret;
>  
> @@ -1301,10 +1301,11 @@ static int gen8_emit_flush(struct intel_ringbuffer 
> *ringbuf,
>       return 0;
>  }
>  
> -static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf,
> +static int gen8_emit_flush_render(struct drm_i915_gem_request *req,
>                                 u32 invalidate_domains,
>                                 u32 flush_domains)
>  {
> +     struct intel_ringbuffer *ringbuf = req->ringbuf;
>       struct intel_engine_cs *ring = ringbuf->ring;
>       u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
>       u32 flags = 0;
> @@ -1328,7 +1329,7 @@ static int gen8_emit_flush_render(struct 
> intel_ringbuffer *ringbuf,
>               flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
>       }
>  
> -     ret = intel_logical_ring_begin(ringbuf, 6);
> +     ret = intel_logical_ring_begin(req, 6);
>       if (ret)
>               return ret;
>  
> @@ -1353,13 +1354,14 @@ static void gen8_set_seqno(struct intel_engine_cs 
> *ring, u32 seqno)
>       intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
>  }
>  
> -static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
> +static int gen8_emit_request(struct drm_i915_gem_request *req)
>  {
> +     struct intel_ringbuffer *ringbuf = req->ringbuf;
>       struct intel_engine_cs *ring = ringbuf->ring;
>       u32 cmd;
>       int ret;
>  
> -     ret = intel_logical_ring_begin(ringbuf, 6);
> +     ret = intel_logical_ring_begin(req, 6);
>       if (ret)
>               return ret;
>  
> @@ -1371,8 +1373,7 @@ static int gen8_emit_request(struct intel_ringbuffer 
> *ringbuf)
>                               (ring->status_page.gfx_addr +
>                               (I915_GEM_HWS_INDEX << 
> MI_STORE_DWORD_INDEX_SHIFT)));
>       intel_logical_ring_emit(ringbuf, 0);
> -     intel_logical_ring_emit(ringbuf,
> -             i915_gem_request_get_seqno(ring->outstanding_lazy_request));
> +     intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(req));
>       intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
>       intel_logical_ring_emit(ringbuf, MI_NOOP);
>       intel_logical_ring_advance_and_submit(ringbuf);
> @@ -1380,16 +1381,20 @@ static int gen8_emit_request(struct intel_ringbuffer 
> *ringbuf)
>       return 0;
>  }
>  
> -static int gen8_init_rcs_context(struct intel_engine_cs *ring,
> -                    struct intel_context *ctx)
> +static int gen8_init_rcs_context(struct drm_i915_gem_request *req,
> +                              struct intel_context *ctx)
>  {
>       int ret;
>  
> -     ret = intel_logical_ring_workarounds_emit(ring, ctx);
> +     ret = intel_logical_ring_workarounds_emit(req, ctx);
>       if (ret)
>               return ret;
>  
> -     return intel_lr_context_render_state_init(ring, ctx);
> +     ret = intel_lr_context_render_state_init(req, ctx);
> +     if (ret)
> +             return ret;
> +
> +     return 0;
>  }
>  
>  /**
> @@ -1409,6 +1414,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs 
> *ring)
>  
>       intel_logical_ring_stop(ring);
>       WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
> +     WARN_ON(ring->outstanding_lazy_request);
>       i915_gem_request_assign(&ring->outstanding_lazy_request, NULL);
>  
>       if (ring->cleanup)
> @@ -1648,10 +1654,10 @@ cleanup_render_ring:
>       return ret;
>  }
>  
> -int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
> -                                    struct intel_context *ctx)
> +static int intel_lr_context_render_state_init(struct drm_i915_gem_request 
> *req,
> +                                           struct intel_context *ctx)
>  {
> -     struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
> +     struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
>       struct render_state so;
>       struct drm_i915_file_private *file_priv = ctx->file_priv;
>       struct drm_file *file = file_priv ? file_priv->file : NULL;
> @@ -1664,15 +1670,13 @@ int intel_lr_context_render_state_init(struct 
> intel_engine_cs *ring,
>       if (so.rodata == NULL)
>               return 0;
>  
> -     ret = ring->emit_bb_start(ringbuf,
> -                     so.ggtt_offset,
> -                     I915_DISPATCH_SECURE);
> +     ret = ring->emit_bb_start(req, so.ggtt_offset, I915_DISPATCH_SECURE);
>       if (ret)
>               goto out;
>  
> -     i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
> +     i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
>  
> -     ret = __i915_add_request(ring, file, so.obj, true);
> +     ret = __i915_add_request(req, file, so.obj, true);
>       /* intel_logical_ring_add_request moves object to inactive if it
>        * fails */
>  out:
> @@ -1883,6 +1887,7 @@ static void lrc_setup_hardware_status_page(struct 
> intel_engine_cs *ring,
>  int intel_lr_context_deferred_create(struct intel_context *ctx,
>                                    struct intel_engine_cs *ring)
>  {
> +     struct drm_i915_private *dev_priv = ring->dev->dev_private;
>       const bool is_global_default_ctx = (ctx == ring->default_context);
>       struct drm_device *dev = ring->dev;
>       struct drm_i915_gem_object *ctx_obj;
> @@ -1964,13 +1969,27 @@ int intel_lr_context_deferred_create(struct 
> intel_context *ctx,
>               lrc_setup_hardware_status_page(ring, ctx_obj);
>       else if (ring->id == RCS && !ctx->rcs_initialized) {
>               if (ring->init_context) {
> -                     ret = ring->init_context(ring, ctx);
> +                     struct drm_i915_gem_request *req;
> +
> +                     ret = dev_priv->gt.alloc_request(ring, ctx, &req);
> +                     if (ret)
> +                             return ret;
> +
> +                     ret = ring->init_context(req, ctx);
>                       if (ret) {
>                               DRM_ERROR("ring init context: %d\n", ret);
> +                             i915_gem_request_unreference(req);
>                               ctx->engine[ring->id].ringbuf = NULL;
>                               ctx->engine[ring->id].state = NULL;
>                               goto error;
>                       }
> +
> +                     ret = i915_add_request_no_flush(req);
> +                     if (ret) {
> +                             DRM_ERROR("ring init context: %d\n", ret);
> +                             i915_gem_request_unreference(req);
> +                             goto error;
> +                     }
>               }
>  
>               ctx->rcs_initialized = true;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h 
> b/drivers/gpu/drm/i915/intel_lrc.h
> index ea083d9..a2981ba 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -35,12 +35,13 @@
>  
>  /* Logical Rings */
>  int __must_check intel_logical_ring_alloc_request(struct intel_engine_cs 
> *ring,
> -                                               struct intel_context *ctx);
> +                                               struct intel_context *ctx,
> +                                               struct drm_i915_gem_request 
> **req_out);
>  void intel_logical_ring_stop(struct intel_engine_cs *ring);
>  void intel_logical_ring_cleanup(struct intel_engine_cs *ring);
>  int intel_logical_rings_init(struct drm_device *dev);
>  
> -int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf);
> +int logical_ring_flush_all_caches(struct drm_i915_gem_request *req);
>  void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf);
>  /**
>   * intel_logical_ring_advance() - advance the ringbuffer tail
> @@ -63,11 +64,8 @@ static inline void intel_logical_ring_emit(struct 
> intel_ringbuffer *ringbuf,
>       iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
>       ringbuf->tail += 4;
>  }
> -int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int 
> num_dwords);
>  
>  /* Logical Ring Contexts */
> -int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
> -                                    struct intel_context *ctx);
>  void intel_lr_context_free(struct intel_context *ctx);
>  int intel_lr_context_deferred_create(struct intel_context *ctx,
>                                    struct intel_engine_cs *ring);
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c 
> b/drivers/gpu/drm/i915/intel_overlay.c
> index 973c9de..2d2ce59 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -209,17 +209,15 @@ static void intel_overlay_unmap_regs(struct 
> intel_overlay *overlay,
>  }
>  
>  static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
> +                                      struct drm_i915_gem_request *req,
>                                        void (*tail)(struct intel_overlay *))
>  {
>       struct drm_device *dev = overlay->dev;
> -     struct drm_i915_private *dev_priv = dev->dev_private;
> -     struct intel_engine_cs *ring = &dev_priv->ring[RCS];
>       int ret;
>  
>       BUG_ON(overlay->last_flip_req);
> -     i915_gem_request_assign(&overlay->last_flip_req,
> -                                          ring->outstanding_lazy_request);
> -     ret = i915_add_request(ring);
> +     i915_gem_request_assign(&overlay->last_flip_req, req);
> +     ret = i915_add_request(overlay->last_flip_req);
>       if (ret)
>               return ret;
>  
> @@ -239,6 +237,7 @@ static int intel_overlay_on(struct intel_overlay *overlay)
>       struct drm_device *dev = overlay->dev;
>       struct drm_i915_private *dev_priv = dev->dev_private;
>       struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> +     struct drm_i915_gem_request *req;
>       int ret;
>  
>       BUG_ON(overlay->active);
> @@ -246,17 +245,21 @@ static int intel_overlay_on(struct intel_overlay 
> *overlay)
>  
>       WARN_ON(IS_I830(dev) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE));
>  
> -     ret = intel_ring_begin(ring, 4);
> +     ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
>       if (ret)
>               return ret;
>  
> -     intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON);
> -     intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE);
> -     intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
> -     intel_ring_emit(ring, MI_NOOP);
> -     intel_ring_advance(ring);
> +     ret = intel_ring_begin(req, 4);
> +     if (ret)
> +             return ret;
> +
> +     intel_ring_emit(req->ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON);
> +     intel_ring_emit(req->ring, overlay->flip_addr | OFC_UPDATE);
> +     intel_ring_emit(req->ring, MI_WAIT_FOR_EVENT | 
> MI_WAIT_FOR_OVERLAY_FLIP);
> +     intel_ring_emit(req->ring, MI_NOOP);
> +     intel_ring_advance(req->ring);
>  
> -     return intel_overlay_do_wait_request(overlay, NULL);
> +     return intel_overlay_do_wait_request(overlay, req, NULL);
>  }
>  
>  /* overlay needs to be enabled in OCMD reg */
> @@ -266,6 +269,7 @@ static int intel_overlay_continue(struct intel_overlay 
> *overlay,
>       struct drm_device *dev = overlay->dev;
>       struct drm_i915_private *dev_priv = dev->dev_private;
>       struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> +     struct drm_i915_gem_request *req;
>       u32 flip_addr = overlay->flip_addr;
>       u32 tmp;
>       int ret;
> @@ -280,7 +284,11 @@ static int intel_overlay_continue(struct intel_overlay 
> *overlay,
>       if (tmp & (1 << 17))
>               DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
>  
> -     ret = intel_ring_begin(ring, 2);
> +     ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
> +     if (ret)
> +             return ret;
> +
> +     ret = intel_ring_begin(req, 2);
>       if (ret)
>               return ret;
>  
> @@ -289,9 +297,8 @@ static int intel_overlay_continue(struct intel_overlay 
> *overlay,
>       intel_ring_advance(ring);
>  
>       WARN_ON(overlay->last_flip_req);
> -     i915_gem_request_assign(&overlay->last_flip_req,
> -                                          ring->outstanding_lazy_request);
> -     return i915_add_request(ring);
> +     i915_gem_request_assign(&overlay->last_flip_req, req);
> +     return i915_add_request(req);
>  }
>  
>  static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
> @@ -326,6 +333,7 @@ static int intel_overlay_off(struct intel_overlay 
> *overlay)
>       struct drm_device *dev = overlay->dev;
>       struct drm_i915_private *dev_priv = dev->dev_private;
>       struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> +     struct drm_i915_gem_request *req;
>       u32 flip_addr = overlay->flip_addr;
>       int ret;
>  
> @@ -337,7 +345,11 @@ static int intel_overlay_off(struct intel_overlay 
> *overlay)
>        * of the hw. Do it in both cases */
>       flip_addr |= OFC_UPDATE;
>  
> -     ret = intel_ring_begin(ring, 6);
> +     ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req);
> +     if (ret)
> +             return ret;
> +
> +     ret = intel_ring_begin(req, 6);
>       if (ret)
>               return ret;
>  
> @@ -359,7 +371,7 @@ static int intel_overlay_off(struct intel_overlay 
> *overlay)
>       }
>       intel_ring_advance(ring);
>  
> -     return intel_overlay_do_wait_request(overlay, intel_overlay_off_tail);
> +     return intel_overlay_do_wait_request(overlay, req, 
> intel_overlay_off_tail);
>  }
>  
>  /* recover from an interruption due to a signal
> @@ -404,7 +416,13 @@ static int intel_overlay_release_old_vid(struct 
> intel_overlay *overlay)
>  
>       if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
>               /* synchronous slowpath */
> -             ret = intel_ring_begin(ring, 2);
> +             struct drm_i915_gem_request *req;
> +
> +             ret = dev_priv->gt.alloc_request(ring, ring->default_context, 
> &req);
> +             if (ret)
> +                     return ret;
> +
> +             ret = intel_ring_begin(req, 2);
>               if (ret)
>                       return ret;
>  
> @@ -412,7 +430,7 @@ static int intel_overlay_release_old_vid(struct 
> intel_overlay *overlay)
>               intel_ring_emit(ring, MI_NOOP);
>               intel_ring_advance(ring);
>  
> -             ret = intel_overlay_do_wait_request(overlay,
> +             ret = intel_overlay_do_wait_request(overlay, req,
>                                                   
> intel_overlay_release_old_vid_tail);
>               if (ret)
>                       return ret;
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 78911e2..5905fa5 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -5506,6 +5506,7 @@ static void ironlake_enable_rc6(struct drm_device *dev)
>  {
>       struct drm_i915_private *dev_priv = dev->dev_private;
>       struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> +     struct drm_i915_gem_request *req = NULL;
>       bool was_interruptible;
>       int ret;
>  
> @@ -5524,16 +5525,17 @@ static void ironlake_enable_rc6(struct drm_device 
> *dev)
>       was_interruptible = dev_priv->mm.interruptible;
>       dev_priv->mm.interruptible = false;
>  
> +     ret = dev_priv->gt.alloc_request(ring, NULL, &req);
> +     if (ret)
> +             goto err;
> +
>       /*
>        * GPU can automatically power down the render unit if given a page
>        * to save state.
>        */
> -     ret = intel_ring_begin(ring, 6);
> -     if (ret) {
> -             ironlake_teardown_rc6(dev);
> -             dev_priv->mm.interruptible = was_interruptible;
> -             return;
> -     }
> +     ret = intel_ring_begin(req, 6);
> +     if (ret)
> +             goto err;
>  
>       intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN);
>       intel_ring_emit(ring, MI_SET_CONTEXT);
> @@ -5547,6 +5549,11 @@ static void ironlake_enable_rc6(struct drm_device *dev)
>       intel_ring_emit(ring, MI_FLUSH);
>       intel_ring_advance(ring);
>  
> +     ret = i915_add_request_no_flush(req);
> +     if (ret)
> +             goto err;
> +     req = NULL;
> +
>       /*
>        * Wait for the command parser to advance past MI_SET_CONTEXT. The HW
>        * does an implicit flush, combined with MI_FLUSH above, it should be
> @@ -5554,16 +5561,20 @@ static void ironlake_enable_rc6(struct drm_device 
> *dev)
>        */
>       ret = intel_ring_idle(ring);
>       dev_priv->mm.interruptible = was_interruptible;
> -     if (ret) {
> -             DRM_ERROR("failed to enable ironlake power savings\n");
> -             ironlake_teardown_rc6(dev);
> -             return;
> -     }
> +     if (ret)
> +             goto err;
>  
>       I915_WRITE(PWRCTXA, i915_gem_obj_ggtt_offset(dev_priv->ips.pwrctx) | 
> PWRCTX_EN);
>       I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
>  
>       intel_print_rc6_info(dev, GEN6_RC_CTL_RC6_ENABLE);
> +
> +err:
> +     DRM_ERROR("failed to enable ironlake power savings\n");
> +     ironlake_teardown_rc6(dev);
> +     dev_priv->mm.interruptible = was_interruptible;
> +     if (req)
> +             i915_gem_request_unreference(req);
>  }
>  
>  static unsigned long intel_pxfreq(u32 vidfreq)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
> b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index b60e59b..e6e7bb5 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -91,10 +91,11 @@ void __intel_ring_advance(struct intel_engine_cs *ring)
>  }
>  
>  static int
> -gen2_render_ring_flush(struct intel_engine_cs *ring,
> +gen2_render_ring_flush(struct drm_i915_gem_request *req,
>                      u32      invalidate_domains,
>                      u32      flush_domains)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       u32 cmd;
>       int ret;
>  
> @@ -105,7 +106,7 @@ gen2_render_ring_flush(struct intel_engine_cs *ring,
>       if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
>               cmd |= MI_READ_FLUSH;
>  
> -     ret = intel_ring_begin(ring, 2);
> +     ret = intel_ring_begin(req, 2);
>       if (ret)
>               return ret;
>  
> @@ -117,10 +118,11 @@ gen2_render_ring_flush(struct intel_engine_cs *ring,
>  }
>  
>  static int
> -gen4_render_ring_flush(struct intel_engine_cs *ring,
> +gen4_render_ring_flush(struct drm_i915_gem_request *req,
>                      u32      invalidate_domains,
>                      u32      flush_domains)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       struct drm_device *dev = ring->dev;
>       u32 cmd;
>       int ret;
> @@ -163,7 +165,7 @@ gen4_render_ring_flush(struct intel_engine_cs *ring,
>           (IS_G4X(dev) || IS_GEN5(dev)))
>               cmd |= MI_INVALIDATE_ISP;
>  
> -     ret = intel_ring_begin(ring, 2);
> +     ret = intel_ring_begin(req, 2);
>       if (ret)
>               return ret;
>  
> @@ -212,12 +214,13 @@ gen4_render_ring_flush(struct intel_engine_cs *ring,
>   * really our business.  That leaves only stall at scoreboard.
>   */
>  static int
> -intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring)
> +intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
>       int ret;
>  
> -     ret = intel_ring_begin(ring, 6);
> +     ret = intel_ring_begin(req, 6);
>       if (ret)
>               return ret;
>  
> @@ -230,7 +233,7 @@ intel_emit_post_sync_nonzero_flush(struct intel_engine_cs 
> *ring)
>       intel_ring_emit(ring, MI_NOOP);
>       intel_ring_advance(ring);
>  
> -     ret = intel_ring_begin(ring, 6);
> +     ret = intel_ring_begin(req, 6);
>       if (ret)
>               return ret;
>  
> @@ -246,15 +249,16 @@ intel_emit_post_sync_nonzero_flush(struct 
> intel_engine_cs *ring)
>  }
>  
>  static int
> -gen6_render_ring_flush(struct intel_engine_cs *ring,
> -                         u32 invalidate_domains, u32 flush_domains)
> +gen6_render_ring_flush(struct drm_i915_gem_request *req,
> +                       u32 invalidate_domains, u32 flush_domains)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       u32 flags = 0;
>       u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
>       int ret;
>  
>       /* Force SNB workarounds for PIPE_CONTROL flushes */
> -     ret = intel_emit_post_sync_nonzero_flush(ring);
> +     ret = intel_emit_post_sync_nonzero_flush(req);
>       if (ret)
>               return ret;
>  
> @@ -284,7 +288,7 @@ gen6_render_ring_flush(struct intel_engine_cs *ring,
>               flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
>       }
>  
> -     ret = intel_ring_begin(ring, 4);
> +     ret = intel_ring_begin(req, 4);
>       if (ret)
>               return ret;
>  
> @@ -298,11 +302,12 @@ gen6_render_ring_flush(struct intel_engine_cs *ring,
>  }
>  
>  static int
> -gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)
> +gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       int ret;
>  
> -     ret = intel_ring_begin(ring, 4);
> +     ret = intel_ring_begin(req, 4);
>       if (ret)
>               return ret;
>  
> @@ -316,14 +321,15 @@ gen7_render_ring_cs_stall_wa(struct intel_engine_cs 
> *ring)
>       return 0;
>  }
>  
> -static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value)
> +static int gen7_ring_fbc_flush(struct drm_i915_gem_request *req, u32 value)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       int ret;
>  
>       if (!ring->fbc_dirty)
>               return 0;
>  
> -     ret = intel_ring_begin(ring, 6);
> +     ret = intel_ring_begin(req, 6);
>       if (ret)
>               return ret;
>       /* WaFbcNukeOn3DBlt:ivb/hsw */
> @@ -340,9 +346,10 @@ static int gen7_ring_fbc_flush(struct intel_engine_cs 
> *ring, u32 value)
>  }
>  
>  static int
> -gen7_render_ring_flush(struct intel_engine_cs *ring,
> +gen7_render_ring_flush(struct drm_i915_gem_request *req,
>                      u32 invalidate_domains, u32 flush_domains)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       u32 flags = 0;
>       u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
>       int ret;
> @@ -381,10 +388,10 @@ gen7_render_ring_flush(struct intel_engine_cs *ring,
>               /* Workaround: we must issue a pipe_control with CS-stall bit
>                * set before a pipe_control command that has the state cache
>                * invalidate bit set. */
> -             gen7_render_ring_cs_stall_wa(ring);
> +             gen7_render_ring_cs_stall_wa(req);
>       }
>  
> -     ret = intel_ring_begin(ring, 4);
> +     ret = intel_ring_begin(req, 4);
>       if (ret)
>               return ret;
>  
> @@ -395,18 +402,19 @@ gen7_render_ring_flush(struct intel_engine_cs *ring,
>       intel_ring_advance(ring);
>  
>       if (!invalidate_domains && flush_domains)
> -             return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
> +             return gen7_ring_fbc_flush(req, FBC_REND_NUKE);
>  
>       return 0;
>  }
>  
>  static int
> -gen8_emit_pipe_control(struct intel_engine_cs *ring,
> +gen8_emit_pipe_control(struct drm_i915_gem_request *req,
>                      u32 flags, u32 scratch_addr)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       int ret;
>  
> -     ret = intel_ring_begin(ring, 6);
> +     ret = intel_ring_begin(req, 6);
>       if (ret)
>               return ret;
>  
> @@ -422,11 +430,11 @@ gen8_emit_pipe_control(struct intel_engine_cs *ring,
>  }
>  
>  static int
> -gen8_render_ring_flush(struct intel_engine_cs *ring,
> +gen8_render_ring_flush(struct drm_i915_gem_request *req,
>                      u32 invalidate_domains, u32 flush_domains)
>  {
>       u32 flags = 0;
> -     u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
> +     u32 scratch_addr = req->ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
>       int ret;
>  
>       flags |= PIPE_CONTROL_CS_STALL;
> @@ -446,7 +454,7 @@ gen8_render_ring_flush(struct intel_engine_cs *ring,
>               flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
>  
>               /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
> -             ret = gen8_emit_pipe_control(ring,
> +             ret = gen8_emit_pipe_control(req,
>                                            PIPE_CONTROL_CS_STALL |
>                                            PIPE_CONTROL_STALL_AT_SCOREBOARD,
>                                            0);
> @@ -454,12 +462,12 @@ gen8_render_ring_flush(struct intel_engine_cs *ring,
>                       return ret;
>       }
>  
> -     ret = gen8_emit_pipe_control(ring, flags, scratch_addr);
> +     ret = gen8_emit_pipe_control(req, flags, scratch_addr);
>       if (ret)
>               return ret;
>  
>       if (!invalidate_domains && flush_domains)
> -             return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
> +             return gen7_ring_fbc_flush(req, FBC_REND_NUKE);
>  
>       return 0;
>  }
> @@ -670,9 +678,10 @@ err:
>       return ret;
>  }
>  
> -static int intel_ring_workarounds_emit(struct intel_engine_cs *ring,
> +static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req,
>                                      struct intel_context *ctx)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       int ret, i;
>       struct drm_device *dev = ring->dev;
>       struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -682,11 +691,11 @@ static int intel_ring_workarounds_emit(struct 
> intel_engine_cs *ring,
>               return 0;
>  
>       ring->gpu_caches_dirty = true;
> -     ret = intel_ring_flush_all_caches(ring);
> +     ret = intel_ring_flush_all_caches(req);
>       if (ret)
>               return ret;
>  
> -     ret = intel_ring_begin(ring, (w->count * 2 + 2));
> +     ret = intel_ring_begin(req, (w->count * 2 + 2));
>       if (ret)
>               return ret;
>  
> @@ -700,7 +709,7 @@ static int intel_ring_workarounds_emit(struct 
> intel_engine_cs *ring,
>       intel_ring_advance(ring);
>  
>       ring->gpu_caches_dirty = true;
> -     ret = intel_ring_flush_all_caches(ring);
> +     ret = intel_ring_flush_all_caches(req);
>       if (ret)
>               return ret;
>  
> @@ -898,10 +907,11 @@ static void render_ring_cleanup(struct intel_engine_cs 
> *ring)
>       intel_fini_pipe_control(ring);
>  }
>  
> -static int gen8_rcs_signal(struct intel_engine_cs *signaller,
> +static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
>                          unsigned int num_dwords)
>  {
>  #define MBOX_UPDATE_DWORDS 8
> +     struct intel_engine_cs *signaller = signaller_req->ring;
>       struct drm_device *dev = signaller->dev;
>       struct drm_i915_private *dev_priv = dev->dev_private;
>       struct intel_engine_cs *waiter;
> @@ -911,7 +921,7 @@ static int gen8_rcs_signal(struct intel_engine_cs 
> *signaller,
>       num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
>  #undef MBOX_UPDATE_DWORDS
>  
> -     ret = intel_ring_begin(signaller, num_dwords);
> +     ret = intel_ring_begin(signaller_req, num_dwords);
>       if (ret)
>               return ret;
>  
> @@ -921,8 +931,7 @@ static int gen8_rcs_signal(struct intel_engine_cs 
> *signaller,
>               if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
>                       continue;
>  
> -             seqno = i915_gem_request_get_seqno(
> -                                        signaller->outstanding_lazy_request);
> +             seqno = i915_gem_request_get_seqno(signaller_req);
>               intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
>               intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
>                                          PIPE_CONTROL_QW_WRITE |
> @@ -939,10 +948,11 @@ static int gen8_rcs_signal(struct intel_engine_cs 
> *signaller,
>       return 0;
>  }
>  
> -static int gen8_xcs_signal(struct intel_engine_cs *signaller,
> +static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
>                          unsigned int num_dwords)
>  {
>  #define MBOX_UPDATE_DWORDS 6
> +     struct intel_engine_cs *signaller = signaller_req->ring;
>       struct drm_device *dev = signaller->dev;
>       struct drm_i915_private *dev_priv = dev->dev_private;
>       struct intel_engine_cs *waiter;
> @@ -952,7 +962,7 @@ static int gen8_xcs_signal(struct intel_engine_cs 
> *signaller,
>       num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
>  #undef MBOX_UPDATE_DWORDS
>  
> -     ret = intel_ring_begin(signaller, num_dwords);
> +     ret = intel_ring_begin(signaller_req, num_dwords);
>       if (ret)
>               return ret;
>  
> @@ -962,8 +972,7 @@ static int gen8_xcs_signal(struct intel_engine_cs 
> *signaller,
>               if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
>                       continue;
>  
> -             seqno = i915_gem_request_get_seqno(
> -                                        signaller->outstanding_lazy_request);
> +             seqno = i915_gem_request_get_seqno(signaller_req);
>               intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
>                                          MI_FLUSH_DW_OP_STOREDW);
>               intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
> @@ -978,9 +987,10 @@ static int gen8_xcs_signal(struct intel_engine_cs 
> *signaller,
>       return 0;
>  }
>  
> -static int gen6_signal(struct intel_engine_cs *signaller,
> +static int gen6_signal(struct drm_i915_gem_request *signaller_req,
>                      unsigned int num_dwords)
>  {
> +     struct intel_engine_cs *signaller = signaller_req->ring;
>       struct drm_device *dev = signaller->dev;
>       struct drm_i915_private *dev_priv = dev->dev_private;
>       struct intel_engine_cs *useless;
> @@ -991,15 +1001,14 @@ static int gen6_signal(struct intel_engine_cs 
> *signaller,
>       num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
>  #undef MBOX_UPDATE_DWORDS
>  
> -     ret = intel_ring_begin(signaller, num_dwords);
> +     ret = intel_ring_begin(signaller_req, num_dwords);
>       if (ret)
>               return ret;
>  
>       for_each_ring(useless, dev_priv, i) {
>               u32 mbox_reg = signaller->semaphore.mbox.signal[i];
>               if (mbox_reg != GEN6_NOSYNC) {
> -                     u32 seqno = i915_gem_request_get_seqno(
> -                                        signaller->outstanding_lazy_request);
> +                     u32 seqno = i915_gem_request_get_seqno(signaller_req);
>                       intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
>                       intel_ring_emit(signaller, mbox_reg);
>                       intel_ring_emit(signaller, seqno);
> @@ -1016,29 +1025,28 @@ static int gen6_signal(struct intel_engine_cs 
> *signaller,
>  /**
>   * gen6_add_request - Update the semaphore mailbox registers
>   *
> - * @ring - ring that is adding a request
> - * @seqno - return seqno stuck into the ring
> + * @request - request to write to the ring
>   *
>   * Update the mailbox registers in the *other* rings with the current seqno.
>   * This acts like a signal in the canonical semaphore.
>   */
>  static int
> -gen6_add_request(struct intel_engine_cs *ring)
> +gen6_add_request(struct drm_i915_gem_request *req)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       int ret;
>  
>       if (ring->semaphore.signal)
> -             ret = ring->semaphore.signal(ring, 4);
> +             ret = ring->semaphore.signal(req, 4);
>       else
> -             ret = intel_ring_begin(ring, 4);
> +             ret = intel_ring_begin(req, 4);
>  
>       if (ret)
>               return ret;
>  
>       intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
>       intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
> -     intel_ring_emit(ring,
> -                 i915_gem_request_get_seqno(ring->outstanding_lazy_request));
> +     intel_ring_emit(ring, i915_gem_request_get_seqno(req));
>       intel_ring_emit(ring, MI_USER_INTERRUPT);
>       __intel_ring_advance(ring);
>  
> @@ -1061,14 +1069,15 @@ static inline bool i915_gem_has_seqno_wrapped(struct 
> drm_device *dev,
>   */
>  
>  static int
> -gen8_ring_sync(struct intel_engine_cs *waiter,
> +gen8_ring_sync(struct drm_i915_gem_request *waiter_req,
>              struct intel_engine_cs *signaller,
>              u32 seqno)
>  {
> +     struct intel_engine_cs *waiter = waiter_req->ring;
>       struct drm_i915_private *dev_priv = waiter->dev->dev_private;
>       int ret;
>  
> -     ret = intel_ring_begin(waiter, 4);
> +     ret = intel_ring_begin(waiter_req, 4);
>       if (ret)
>               return ret;
>  
> @@ -1086,10 +1095,11 @@ gen8_ring_sync(struct intel_engine_cs *waiter,
>  }
>  
>  static int
> -gen6_ring_sync(struct intel_engine_cs *waiter,
> +gen6_ring_sync(struct drm_i915_gem_request *waiter_req,
>              struct intel_engine_cs *signaller,
>              u32 seqno)
>  {
> +     struct intel_engine_cs *waiter = waiter_req->ring;
>       u32 dw1 = MI_SEMAPHORE_MBOX |
>                 MI_SEMAPHORE_COMPARE |
>                 MI_SEMAPHORE_REGISTER;
> @@ -1104,7 +1114,7 @@ gen6_ring_sync(struct intel_engine_cs *waiter,
>  
>       WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
>  
> -     ret = intel_ring_begin(waiter, 4);
> +     ret = intel_ring_begin(waiter_req, 4);
>       if (ret)
>               return ret;
>  
> @@ -1135,8 +1145,9 @@ do {                                                    
>                 \
>  } while (0)
>  
>  static int
> -pc_render_add_request(struct intel_engine_cs *ring)
> +pc_render_add_request(struct drm_i915_gem_request *req)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
>       int ret;
>  
> @@ -1148,7 +1159,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
>        * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
>        * memory before requesting an interrupt.
>        */
> -     ret = intel_ring_begin(ring, 32);
> +     ret = intel_ring_begin(req, 32);
>       if (ret)
>               return ret;
>  
> @@ -1156,8 +1167,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
>                       PIPE_CONTROL_WRITE_FLUSH |
>                       PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
>       intel_ring_emit(ring, ring->scratch.gtt_offset | 
> PIPE_CONTROL_GLOBAL_GTT);
> -     intel_ring_emit(ring,
> -                 i915_gem_request_get_seqno(ring->outstanding_lazy_request));
> +     intel_ring_emit(ring, i915_gem_request_get_seqno(req));
>       intel_ring_emit(ring, 0);
>       PIPE_CONTROL_FLUSH(ring, scratch_addr);
>       scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
> @@ -1176,8 +1186,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
>                       PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
>                       PIPE_CONTROL_NOTIFY);
>       intel_ring_emit(ring, ring->scratch.gtt_offset | 
> PIPE_CONTROL_GLOBAL_GTT);
> -     intel_ring_emit(ring,
> -                 i915_gem_request_get_seqno(ring->outstanding_lazy_request));
> +     intel_ring_emit(ring, i915_gem_request_get_seqno(req));
>       intel_ring_emit(ring, 0);
>       __intel_ring_advance(ring);
>  
> @@ -1390,13 +1399,14 @@ void intel_ring_setup_status_page(struct 
> intel_engine_cs *ring)
>  }
>  
>  static int
> -bsd_ring_flush(struct intel_engine_cs *ring,
> +bsd_ring_flush(struct drm_i915_gem_request *req,
>              u32     invalidate_domains,
>              u32     flush_domains)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       int ret;
>  
> -     ret = intel_ring_begin(ring, 2);
> +     ret = intel_ring_begin(req, 2);
>       if (ret)
>               return ret;
>  
> @@ -1407,18 +1417,18 @@ bsd_ring_flush(struct intel_engine_cs *ring,
>  }
>  
>  static int
> -i9xx_add_request(struct intel_engine_cs *ring)
> +i9xx_add_request(struct drm_i915_gem_request *req)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       int ret;
>  
> -     ret = intel_ring_begin(ring, 4);
> +     ret = intel_ring_begin(req, 4);
>       if (ret)
>               return ret;
>  
>       intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
>       intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
> -     intel_ring_emit(ring,
> -                 i915_gem_request_get_seqno(ring->outstanding_lazy_request));
> +     intel_ring_emit(ring, i915_gem_request_get_seqno(req));
>       intel_ring_emit(ring, MI_USER_INTERRUPT);
>       __intel_ring_advance(ring);
>  
> @@ -1550,13 +1560,14 @@ gen8_ring_put_irq(struct intel_engine_cs *ring)
>  }
>  
>  static int
> -i965_dispatch_execbuffer(struct intel_engine_cs *ring,
> +i965_dispatch_execbuffer(struct drm_i915_gem_request *req,
>                        u64 offset, u32 length,
>                        unsigned flags)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       int ret;
>  
> -     ret = intel_ring_begin(ring, 2);
> +     ret = intel_ring_begin(req, 2);
>       if (ret)
>               return ret;
>  
> @@ -1575,14 +1586,15 @@ i965_dispatch_execbuffer(struct intel_engine_cs *ring,
>  #define I830_TLB_ENTRIES (2)
>  #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
>  static int
> -i830_dispatch_execbuffer(struct intel_engine_cs *ring,
> -                             u64 offset, u32 len,
> -                             unsigned flags)
> +i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
> +                      u64 offset, u32 len,
> +                      unsigned flags)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       u32 cs_offset = ring->scratch.gtt_offset;
>       int ret;
>  
> -     ret = intel_ring_begin(ring, 6);
> +     ret = intel_ring_begin(req, 6);
>       if (ret)
>               return ret;
>  
> @@ -1599,7 +1611,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring,
>               if (len > I830_BATCH_LIMIT)
>                       return -ENOSPC;
>  
> -             ret = intel_ring_begin(ring, 6 + 2);
> +             ret = intel_ring_begin(req, 6 + 2);
>               if (ret)
>                       return ret;
>  
> @@ -1622,7 +1634,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring,
>               offset = cs_offset;
>       }
>  
> -     ret = intel_ring_begin(ring, 4);
> +     ret = intel_ring_begin(req, 4);
>       if (ret)
>               return ret;
>  
> @@ -1636,13 +1648,14 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring,
>  }
>  
>  static int
> -i915_dispatch_execbuffer(struct intel_engine_cs *ring,
> +i915_dispatch_execbuffer(struct drm_i915_gem_request *req,
>                        u64 offset, u32 len,
>                        unsigned flags)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       int ret;
>  
> -     ret = intel_ring_begin(ring, 2);
> +     ret = intel_ring_begin(req, 2);
>       if (ret)
>               return ret;
>  
> @@ -1885,6 +1898,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs 
> *ring)
>  
>       intel_unpin_ringbuffer_obj(ringbuf);
>       intel_destroy_ringbuffer_obj(ringbuf);
> +     WARN_ON(ring->outstanding_lazy_request);
>       i915_gem_request_assign(&ring->outstanding_lazy_request, NULL);
>  
>       if (ring->cleanup)
> @@ -2007,8 +2021,9 @@ int intel_ring_idle(struct intel_engine_cs *ring)
>       int ret;
>  
>       /* We need to add any requests required to flush the objects and ring */
> +     WARN_ON(ring->outstanding_lazy_request);
>       if (ring->outstanding_lazy_request) {
> -             ret = i915_add_request(ring);
> +             ret = i915_add_request(ring->outstanding_lazy_request);
>               if (ret)
>                       return ret;
>       }
> @@ -2025,13 +2040,18 @@ int intel_ring_idle(struct intel_engine_cs *ring)
>  }
>  
>  int
> -intel_ring_alloc_request(struct intel_engine_cs *ring, struct intel_context 
> *ctx)
> +intel_ring_alloc_request(struct intel_engine_cs *ring,
> +                      struct intel_context *ctx,
> +                      struct drm_i915_gem_request **req_out)
>  {
>       int ret;
>       struct drm_i915_gem_request *request;
>       struct drm_i915_private *dev_private = ring->dev->dev_private;
>  
> -     if (ring->outstanding_lazy_request)
> +     if (!req_out)
> +             return -EINVAL;
> +
> +     if ((*req_out = ring->outstanding_lazy_request) != NULL)
>               return 0;
>  
>       request = kzalloc(sizeof(*request), GFP_KERNEL);
> @@ -2053,7 +2073,7 @@ intel_ring_alloc_request(struct intel_engine_cs *ring, 
> struct intel_context *ctx
>       spewThisReq(request, "\x1B[32mCreated: %d:%d, ref => %d\x1B[0m", 
> request->uniq, request->seqno, request->ref.refcount.counter);
>  
>       //printk(KERN_INFO "%s:%d> <%s> OLR = 0x%p, uniq = %d, seqno = %d\n", 
> __func__, __LINE__, ring->name, request, request->uniq, request->seqno);
> -     ring->outstanding_lazy_request = request;
> +     *req_out = ring->outstanding_lazy_request = request;
>       return 0;
>  }
>  
> @@ -2078,9 +2098,10 @@ static int __intel_ring_prepare(struct intel_engine_cs 
> *ring,
>       return 0;
>  }
>  
> -int intel_ring_begin(struct intel_engine_cs *ring,
> +int intel_ring_begin(struct drm_i915_gem_request *req,
>                    int num_dwords)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       struct drm_i915_private *dev_priv = ring->dev->dev_private;
>       int ret;
>  
> @@ -2093,18 +2114,14 @@ int intel_ring_begin(struct intel_engine_cs *ring,
>       if (ret)
>               return ret;
>  
> -     /* Preallocate the olr before touching the ring */
> -     ret = intel_ring_alloc_request(ring, NULL);
> -     if (ret)
> -             return ret;
> -
>       ring->buffer->space -= num_dwords * sizeof(uint32_t);
>       return 0;
>  }
>  
>  /* Align the ring tail to a cacheline boundary */
> -int intel_ring_cacheline_align(struct intel_engine_cs *ring)
> +int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / 
> sizeof(uint32_t);
>       int ret;
>  
> @@ -2112,7 +2129,7 @@ int intel_ring_cacheline_align(struct intel_engine_cs 
> *ring)
>               return 0;
>  
>       num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
> -     ret = intel_ring_begin(ring, num_dwords);
> +     ret = intel_ring_begin(req, num_dwords);
>       if (ret)
>               return ret;
>  
> @@ -2176,13 +2193,14 @@ static void gen6_bsd_ring_write_tail(struct 
> intel_engine_cs *ring,
>                  _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
>  }
>  
> -static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
> +static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,
>                              u32 invalidate, u32 flush)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       uint32_t cmd;
>       int ret;
>  
> -     ret = intel_ring_begin(ring, 4);
> +     ret = intel_ring_begin(req, 4);
>       if (ret)
>               return ret;
>  
> @@ -2212,14 +2230,15 @@ static int gen6_bsd_ring_flush(struct intel_engine_cs 
> *ring,
>  }
>  
>  static int
> -gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
> +gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
>                             u64 offset, u32 len,
>                             unsigned flags)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE);
>       int ret;
>  
> -     ret = intel_ring_begin(ring, 4);
> +     ret = intel_ring_begin(req, 4);
>       if (ret)
>               return ret;
>  
> @@ -2234,13 +2253,14 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs 
> *ring,
>  }
>  
>  static int
> -hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
> +hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
>                             u64 offset, u32 len,
>                             unsigned flags)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       int ret;
>  
> -     ret = intel_ring_begin(ring, 2);
> +     ret = intel_ring_begin(req, 2);
>       if (ret)
>               return ret;
>  
> @@ -2256,13 +2276,14 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs 
> *ring,
>  }
>  
>  static int
> -gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
> +gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
>                             u64 offset, u32 len,
>                             unsigned flags)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       int ret;
>  
> -     ret = intel_ring_begin(ring, 2);
> +     ret = intel_ring_begin(req, 2);
>       if (ret)
>               return ret;
>  
> @@ -2278,15 +2299,16 @@ gen6_ring_dispatch_execbuffer(struct intel_engine_cs 
> *ring,
>  
>  /* Blitter support (SandyBridge+) */
>  
> -static int gen6_ring_flush(struct intel_engine_cs *ring,
> +static int gen6_ring_flush(struct drm_i915_gem_request *req,
>                          u32 invalidate, u32 flush)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       struct drm_device *dev = ring->dev;
>       struct drm_i915_private *dev_priv = dev->dev_private;
>       uint32_t cmd;
>       int ret;
>  
> -     ret = intel_ring_begin(ring, 4);
> +     ret = intel_ring_begin(req, 4);
>       if (ret)
>               return ret;
>  
> @@ -2315,7 +2337,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
>  
>       if (!invalidate && flush) {
>               if (IS_GEN7(dev))
> -                     return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN);
> +                     return gen7_ring_fbc_flush(req, FBC_REND_CACHE_CLEAN);
>               else if (IS_BROADWELL(dev))
>                       dev_priv->fbc.need_sw_cache_clean = true;
>       }
> @@ -2696,14 +2718,15 @@ int intel_init_vebox_ring_buffer(struct drm_device 
> *dev)
>  }
>  
>  int
> -intel_ring_flush_all_caches(struct intel_engine_cs *ring)
> +intel_ring_flush_all_caches(struct drm_i915_gem_request *req)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       int ret;
>  
>       if (!ring->gpu_caches_dirty)
>               return 0;
>  
> -     ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
> +     ret = ring->flush(req, 0, I915_GEM_GPU_DOMAINS);
>       if (ret)
>               return ret;
>  
> @@ -2714,8 +2737,9 @@ intel_ring_flush_all_caches(struct intel_engine_cs 
> *ring)
>  }
>  
>  int
> -intel_ring_invalidate_all_caches(struct intel_engine_cs *ring)
> +intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
>  {
> +     struct intel_engine_cs *ring = req->ring;
>       uint32_t flush_domains;
>       int ret;
>  
> @@ -2723,7 +2747,7 @@ intel_ring_invalidate_all_caches(struct intel_engine_cs 
> *ring)
>       if (ring->gpu_caches_dirty)
>               flush_domains = I915_GEM_GPU_DOMAINS;
>  
> -     ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
> +     ret = ring->flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
>       if (ret)
>               return ret;
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
> b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 48cbb00..a7e47ad 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -154,15 +154,15 @@ struct  intel_engine_cs {
>  
>       int             (*init_hw)(struct intel_engine_cs *ring);
>  
> -     int             (*init_context)(struct intel_engine_cs *ring,
> +     int             (*init_context)(struct drm_i915_gem_request *req,
>                                       struct intel_context *ctx);
>  
>       void            (*write_tail)(struct intel_engine_cs *ring,
>                                     u32 value);
> -     int __must_check (*flush)(struct intel_engine_cs *ring,
> +     int __must_check (*flush)(struct drm_i915_gem_request *req,
>                                 u32   invalidate_domains,
>                                 u32   flush_domains);
> -     int             (*add_request)(struct intel_engine_cs *ring);
> +     int             (*add_request)(struct drm_i915_gem_request *req);
>       /* Some chipsets are not quite as coherent as advertised and need
>        * an expensive kick to force a true read of the up-to-date seqno.
>        * However, the up-to-date seqno is not always required and the last
> @@ -173,7 +173,7 @@ struct  intel_engine_cs {
>                                    bool lazy_coherency);
>       void            (*set_seqno)(struct intel_engine_cs *ring,
>                                    u32 seqno);
> -     int             (*dispatch_execbuffer)(struct intel_engine_cs *ring,
> +     int             (*dispatch_execbuffer)(struct drm_i915_gem_request *req,
>                                              u64 offset, u32 length,
>                                              unsigned dispatch_flags);
>  #define I915_DISPATCH_SECURE 0x1
> @@ -231,10 +231,10 @@ struct  intel_engine_cs {
>               };
>  
>               /* AKA wait() */
> -             int     (*sync_to)(struct intel_engine_cs *ring,
> -                                struct intel_engine_cs *to,
> +             int     (*sync_to)(struct drm_i915_gem_request *to_req,
> +                                struct intel_engine_cs *from,
>                                  u32 seqno);
> -             int     (*signal)(struct intel_engine_cs *signaller,
> +             int     (*signal)(struct drm_i915_gem_request *signaller_req,
>                                 /* num_dwords needed by caller */
>                                 unsigned int num_dwords);
>       } semaphore;
> @@ -245,11 +245,11 @@ struct  intel_engine_cs {
>       struct list_head execlist_retired_req_list;
>       u8 next_context_status_buffer;
>       u32             irq_keep_mask; /* bitmask for interrupts that should 
> not be masked */
> -     int             (*emit_request)(struct intel_ringbuffer *ringbuf);
> -     int             (*emit_flush)(struct intel_ringbuffer *ringbuf,
> +     int             (*emit_request)(struct drm_i915_gem_request *req);
> +     int             (*emit_flush)(struct drm_i915_gem_request *req,
>                                     u32 invalidate_domains,
>                                     u32 flush_domains);
> -     int             (*emit_bb_start)(struct intel_ringbuffer *ringbuf,
> +     int             (*emit_bb_start)(struct drm_i915_gem_request *req,
>                                        u64 offset, unsigned flags);
>  
>       /**
> @@ -433,10 +433,11 @@ int intel_alloc_ringbuffer_obj(struct drm_device *dev,
>  void intel_stop_ring_buffer(struct intel_engine_cs *ring);
>  void intel_cleanup_ring_buffer(struct intel_engine_cs *ring);
>  
> -int __must_check intel_ring_begin(struct intel_engine_cs *ring, int n);
> -int __must_check intel_ring_cacheline_align(struct intel_engine_cs *ring);
> +int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n);
> +int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request 
> *req);
>  int __must_check intel_ring_alloc_request(struct intel_engine_cs *ring,
> -                                       struct intel_context *ctx);
> +                                       struct intel_context *ctx,
> +                                       struct drm_i915_gem_request 
> **req_out);
>  static inline void intel_ring_emit(struct intel_engine_cs *ring,
>                                  u32 data)
>  {
> @@ -457,8 +458,8 @@ void __intel_ring_advance(struct intel_engine_cs *ring);
>  
>  int __must_check intel_ring_idle(struct intel_engine_cs *ring);
>  void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno);
> -int intel_ring_flush_all_caches(struct intel_engine_cs *ring);
> -int intel_ring_invalidate_all_caches(struct intel_engine_cs *ring);
> +int intel_ring_flush_all_caches(struct drm_i915_gem_request *req);
> +int intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req);
>  
>  void intel_fini_pipe_control(struct intel_engine_cs *ring);
>  int intel_init_pipe_control(struct intel_engine_cs *ring);
> @@ -479,11 +480,4 @@ static inline u32 intel_ring_get_tail(struct 
> intel_ringbuffer *ringbuf)
>       return ringbuf->tail;
>  }
>  
> -static inline struct drm_i915_gem_request *
> -intel_ring_get_request(struct intel_engine_cs *ring)
> -{
> -     BUG_ON(ring->outstanding_lazy_request == NULL);
> -     return ring->outstanding_lazy_request;
> -}
> -
>  #endif /* _INTEL_RINGBUFFER_H_ */
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to