Re: [Intel-gfx] [PATCH v6 3/3] drm/i915/icl: Implement half float formats

2019-03-13 Thread Maarten Lankhorst
Op 13-03-2019 om 01:38 schreef Kevin Strasser:
> 64 bpp half float formats are supported on hdr planes only and are subject
> to the following restrictions:
>   * 90/270 rotation not supported
>   * Yf Tiling not supported
>   * Frame Buffer Compression not supported
>   * Color Keying not supported
>
> v2:
> - Drop handling pixel normalize register
> - Don't use icl_is_hdr_plane too early
>
> v3:
> - Use refactored icl_is_hdr_plane (Ville)
> - Use u32 instead of uint32_t (Ville)
>
> v6:
> - Rebase and fix merge conflicts
> - Reorganize switch statements to keep RGB grouped separately from YUV
>
> Cc: Uma Shankar 
> Cc: Shashank Sharma 
> Cc: David Airlie 
> Cc: Daniel Vetter 
> Cc: dri-de...@lists.freedesktop.org
> Signed-off-by: Kevin Strasser 
> Reviewed-by: Ville Syrjälä 
> Reviewed-by: Maarten Lankhorst 
> Reviewed-by: Adam Jackson 
> ---
>  drivers/gpu/drm/i915/intel_display.c | 22 +++
>  drivers/gpu/drm/i915/intel_sprite.c  | 72 
> ++--
>  2 files changed, 91 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_display.c 
> b/drivers/gpu/drm/i915/intel_display.c
> index 60fbe3a..eaedf91 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -2680,6 +2680,18 @@ int skl_format_to_fourcc(int format, bool rgb_order, 
> bool alpha)
>   return DRM_FORMAT_XBGR2101010;
>   else
>   return DRM_FORMAT_XRGB2101010;
> + case PLANE_CTL_FORMAT_XRGB_16161616F:
> + if (rgb_order) {
> + if (alpha)
> + return DRM_FORMAT_ABGR16161616F;
> + else
> + return DRM_FORMAT_XBGR16161616F;
> + } else {
> + if (alpha)
> + return DRM_FORMAT_ARGB16161616F;
> + else
> + return DRM_FORMAT_XRGB16161616F;
> + }
>   }
>  }
>  
> @@ -3575,6 +3587,12 @@ static u32 skl_plane_ctl_format(u32 pixel_format)
>   return PLANE_CTL_FORMAT_XRGB_2101010;
>   case DRM_FORMAT_XBGR2101010:
>   return PLANE_CTL_ORDER_RGBX | PLANE_CTL_FORMAT_XRGB_2101010;
> + case DRM_FORMAT_XBGR16161616F:
> + case DRM_FORMAT_ABGR16161616F:
> + return PLANE_CTL_FORMAT_XRGB_16161616F | PLANE_CTL_ORDER_RGBX;
> + case DRM_FORMAT_XRGB16161616F:
> + case DRM_FORMAT_ARGB16161616F:
> + return PLANE_CTL_FORMAT_XRGB_16161616F;
>   case DRM_FORMAT_YUYV:
>   return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_YUYV;
>   case DRM_FORMAT_YVYU:
> @@ -5143,6 +5161,10 @@ static int skl_update_scaler_plane(struct 
> intel_crtc_state *crtc_state,
>   case DRM_FORMAT_ARGB:
>   case DRM_FORMAT_XRGB2101010:
>   case DRM_FORMAT_XBGR2101010:
> + case DRM_FORMAT_XBGR16161616F:
> + case DRM_FORMAT_ABGR16161616F:
> + case DRM_FORMAT_XRGB16161616F:
> + case DRM_FORMAT_ARGB16161616F:
>   case DRM_FORMAT_YUYV:
>   case DRM_FORMAT_YVYU:
>   case DRM_FORMAT_UYVY:
> diff --git a/drivers/gpu/drm/i915/intel_sprite.c 
> b/drivers/gpu/drm/i915/intel_sprite.c
> index 622669f..e00559d 100644
> --- a/drivers/gpu/drm/i915/intel_sprite.c
> +++ b/drivers/gpu/drm/i915/intel_sprite.c
> @@ -1508,8 +1508,6 @@ static int skl_plane_check_fb(const struct 
> intel_crtc_state *crtc_state,
>   /*
>* 90/270 is not allowed with RGB64 16:16:16:16 and
>* Indexed 8-bit. RGB 16-bit 5:6:5 is allowed gen11 onwards.
> -  * TBD: Add RGB64 case once its added in supported format
> -  * list.
>*/
>   switch (fb->format->format) {
>   case DRM_FORMAT_RGB565:
> @@ -1517,6 +1515,10 @@ static int skl_plane_check_fb(const struct 
> intel_crtc_state *crtc_state,
>   break;
>   /* fall through */
>   case DRM_FORMAT_C8:
> + case DRM_FORMAT_XRGB16161616F:
> + case DRM_FORMAT_XBGR16161616F:
> + case DRM_FORMAT_ARGB16161616F:
> + case DRM_FORMAT_ABGR16161616F:
>   DRM_DEBUG_KMS("Unsupported pixel format %s for 
> 90/270!\n",
> drm_get_format_name(fb->format->format,
> &format_name));
> @@ -1837,6 +1839,31 @@ static const uint32_t icl_plane_formats[] = {
>   DRM_FORMAT_Y416,
>  };
>  
> +static const uint32_t icl_hdr_plane_formats[] = {
> + DRM_FORMAT_C8,
> + DRM_FORMAT_RGB565,
> + DRM_FORMAT_XRGB,
> + DRM_FORMAT_XBGR,
> + DRM_FORMAT_ARGB,
> + DRM_FORMAT_ABGR,
> + DRM_FORMAT_XRGB2101010,
> + DRM_FORMAT_XBGR2101010,
> + DRM_FORMAT_XRGB16161616F,
> + DRM_FORMAT_XBGR16161616F,
> + DRM_FORMAT_ARGB16161616F,
> + DRM_FORMAT_ABGR16161616F,
> +   

Re: [Intel-gfx] [PATCH v2 1/3] drm: Add support for panic message output

2019-03-13 Thread Christian König

Am 12.03.19 um 19:02 schrieb Ville Syrjälä:

On Tue, Mar 12, 2019 at 06:37:57PM +0100, Noralf Trønnes wrote:


Den 12.03.2019 18.25, skrev Ville Syrjälä:

On Tue, Mar 12, 2019 at 06:15:24PM +0100, Noralf Trønnes wrote:


Den 12.03.2019 17.17, skrev Ville Syrjälä:

On Tue, Mar 12, 2019 at 11:47:04AM +0100, Michel Dänzer wrote:

On 2019-03-11 6:42 p.m., Noralf Trønnes wrote:

This adds support for outputting kernel messages on panic().
A kernel message dumper is used to dump the log. The dumper iterates
over each DRM device and it's crtc's to find suitable framebuffers.

All the other dumpers are run before this one except mtdoops.
Only atomic drivers are supported.

Signed-off-by: Noralf Trønnes 
---
  [...]

diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h
index f0b34c977ec5..f3274798ecfe 100644
--- a/include/drm/drm_framebuffer.h
+++ b/include/drm/drm_framebuffer.h
@@ -94,6 +94,44 @@ struct drm_framebuffer_funcs {
 struct drm_file *file_priv, unsigned flags,
 unsigned color, struct drm_clip_rect *clips,
 unsigned num_clips);
+
+   /**
+* @panic_vmap:
+*
+* Optional callback for panic handling.
+*
+* For vmapping the selected framebuffer in a panic context. Must
+* be super careful about locking (only trylocking allowed).
+*
+* RETURNS:
+*
+* NULL if it didn't work out, otherwise an opaque cookie which is
+* passed to @panic_draw_xy. It can be anything: vmap area, structure
+* with more details, just a few flags, ...
+*/
+   void *(*panic_vmap)(struct drm_framebuffer *fb);

FWIW, the panic_vmap hook cannot work in general with the amdgpu/radeon
drivers:

Framebuffers are normally tiled, writing to them with the CPU results in
garbled output.


In which case the driver needs to support the ->panic_draw_xy callback,
or maybe it's possible to make a generic helper for tiled buffers.


With a discrete GPU having a large amount of VRAM, the framebuffer may
not be directly CPU accessible at all.


I would have been nice to know how Windows works around this.


There would need to be a mechanism for switching scanout to a linear,
CPU accessible framebuffer.

I suppose panic_vmap() could just provide a linear temp buffer
to the panic handler, and panic_unmap() could copy the contents
over to the real fb.

That said, this approach of scribbling over the primary plane's
framebuffer has some clear limitations:
* something may overwrite the oops message before the user
   can even read it

When the dumper drm_panic_kmsg_dump() runs, the other CPU's should have
been stopped. See panic().

GPUs etc. may still be executing away.


Would it be safe to stop it in a panic situation? It would ofc be bad to
crash the box even harder.

Some drivers/devices may have working (and hopefully even reliable)
gpu reset, some may not.


Even if GPU reset is working, it certainly doesn't under a panic() 
condition when all other CPUs are already stopped.


I don't see how this approach should ever work reliable.

Christian.

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v2 1/3] drm: Add support for panic message output

2019-03-13 Thread Daniel Vetter
On Tue, Mar 12, 2019 at 11:13:03PM +0100, Ahmed S. Darwish wrote:
> Hi,
> 
> [[ CCing John for the trylock parts ]]
> 
> On Mon, Mar 11, 2019 at 11:33:15PM +0100, Noralf Trønnes wrote:
> >
> >
> > Den 11.03.2019 20.23, skrev Daniel Vetter:
> > > On Mon, Mar 11, 2019 at 06:42:16PM +0100, Noralf Trønnes wrote:
> > >> This adds support for outputting kernel messages on panic().
> > >> A kernel message dumper is used to dump the log. The dumper iterates
> > >> over each DRM device and it's crtc's to find suitable framebuffers.
> > >>
> > >> All the other dumpers are run before this one except mtdoops.
> > >> Only atomic drivers are supported.
> > >>
> > >> Signed-off-by: Noralf Trønnes 
> > >
> > > Bunch of comments/ideas for you or Darwish below, whoever picks this up.
> >
> > Actually it would ne nice if Darwish could pick it up since he will do
> > it on i915 which will be useful to a much broader audience.
> > If not I'll respin when I'm done with the drm_fb_helper refactoring.
> >
> 
> Yup, I'll be more than happy to do this.. while preserving all of
> Noralf's authorship and copyright notices of course.
> 
> I guess it can be:
> 
>   - Handle the comments posted by Daniel and others (I'll post
> some questions too)
> 
>   - Add the necessary i915 specific bits
> 
>   - Test, post v3/v4/../vn. Rinse and repeat. Keep it local at
> dri-devel until getting the necessary S-o-Bs.
> 
>   - Post to wider audience (some feedback from distribution folks
> would also be nice, before posting to lkml)
> 
> More comments below..
> 
> [...]
> 
> > >> +
> > >> +static void drm_panic_kmsg_dump(struct kmsg_dumper *dumper,
> > >> +enum kmsg_dump_reason reason)
> > >> +{
> > >> +class_for_each_device(drm_class, NULL, dumper, 
> > >> drm_panic_dev_iter);
> > >
> > > class_for_each_device uses klist, which only uses an irqsave spinlock. I
> > > think that's good enough. Comment to that effect would be good e.g.
> > >
> > >   /* based on klist, which uses only a spin_lock_irqsave, which we
> > >* assume still works */
> > >
> > > If we aim for perfect this should be a trylock still, maybe using our own
> > > device list.
> > >
> 
> I definitely agree here.
> 
> The lock may already be locked either by a stopped CPU, or by the
> very same CPU we execute panic() on (e.g. NMI panic() on the
> printing CPU).
> 
> This is why it's very common for example in serial consoles, which
> are usually careful about re-entrance and panic contexts, to do:
> 
>   xx_console_write(...) {
>   if (oops_in_progress)
>   locked = spin_trylock_irqsave(&port->lock, flags);
>   else
>   spin_lock_irqsave(&port->lock, flags);
>   }
> 
> I'm quite positive we should do the same for panic drm drivers.

Yeah Ideally all the locking in the drm path would be trylock only.

I wonder whether lockdep could help us validate this, with some "don't
allow anything except trylocks in this context". It's easy to audit the
core code with review, but drivers are much tougher. And often end up with
really deep callchains to get at the backing buffers.
> John?
> 
> > >> +}
> > >> +
> > >> +static struct kmsg_dumper drm_panic_kmsg_dumper = {
> > >> +.dump = drm_panic_kmsg_dump,
> > >> +.max_reason = KMSG_DUMP_PANIC,
> > >> +};
> > >> +
> > >> +static ssize_t drm_panic_file_panic_write(struct file *file,
> > >> +  const char __user *user_buf,
> > >> +  size_t count, loff_t *ppos)
> > >> +{
> > >> +unsigned long long val;
> > >> +char buf[24];
> > >> +size_t size;
> > >> +ssize_t ret;
> > >> +
> > >> +size = min(sizeof(buf) - 1, count);
> > >> +if (copy_from_user(buf, user_buf, size))
> > >> +return -EFAULT;
> > >> +
> > >> +buf[size] = '\0';
> > >> +ret = kstrtoull(buf, 0, &val);
> > >> +if (ret)
> > >> +return ret;
> > >> +
> > >> +drm_panic_kmsg_dumper.max_reason = KMSG_DUMP_OOPS;
> > >> +wmb();
> > >> +
> > >> +/* Do a real test with: echo c > /proc/sysrq-trigger */
> > >> +
> > >> +if (val == 0) {
> > >> +pr_info("Test panic screen using kmsg_dump(OOPS)\n");
> > >> +kmsg_dump(KMSG_DUMP_OOPS);
> > >> +} else if (val == 1) {
> > >> +char *null_pointer = NULL;
> > >> +
> > >> +pr_info("Test panic screen using NULL pointer 
> > >> dereference\n");
> > >> +*null_pointer = 1;
> > >> +} else {
> > >> +return -EINVAL;
> > >> +}
> > >
> > > This isn't quite what I had in mind, since it still kills the kernel (like
> > > sysrq-trigger).
> >
> > If val == 0, it doesn't kill the kernel, it only dumps the kernel log.
> > And it doesn't taint the kernel either.
> >
> > > Instead what I had in mind is to recreate the worst
> > > possible panic co

Re: [Intel-gfx] [PATCH v2 1/3] drm: Add support for panic message output

2019-03-13 Thread Daniel Vetter
On Wed, Mar 13, 2019 at 08:49:17AM +0100, John Ogness wrote:
> On 2019-03-12, Ahmed S. Darwish  wrote:
>  +
>  +static void drm_panic_kmsg_dump(struct kmsg_dumper *dumper,
>  +enum kmsg_dump_reason reason)
>  +{
>  +class_for_each_device(drm_class, NULL, dumper, 
>  drm_panic_dev_iter);
> >>>
> >>> class_for_each_device uses klist, which only uses an irqsave
> >>> spinlock. I think that's good enough. Comment to that effect would
> >>> be good e.g.
> >>>
> >>>   /* based on klist, which uses only a spin_lock_irqsave, which we
> >>>* assume still works */
> >>>
> >>> If we aim for perfect this should be a trylock still, maybe using
> >>> our own device list.
> >>>
> >
> > I definitely agree here.
> >
> > The lock may already be locked either by a stopped CPU, or by the
> > very same CPU we execute panic() on (e.g. NMI panic() on the
> > printing CPU).
> >
> > This is why it's very common for example in serial consoles, which
> > are usually careful about re-entrance and panic contexts, to do:
> >
> >   xx_console_write(...) {
> > if (oops_in_progress)
> > locked = spin_trylock_irqsave(&port->lock, flags);
> > else
> > spin_lock_irqsave(&port->lock, flags);
> >   }
> >
> > I'm quite positive we should do the same for panic drm drivers.
> 
> This construction will continue, even if the trylock fails. It only
> makes sense to do this if the driver has a chance of being
> successful. Ignoring locks is a serious issue. I personally am quite
> unhappy that the serial drivers do this, which was part of my motivation
> for the new printk design I'm working on.
> 
> If the driver is not capable of doing something useful on a failed
> trylock, then I recommend just skipping that device. Maybe trying it
> again later after trying all the devices?

Ah yes missed that. If the trylock fails anywhere, we must bail out.

Not sure retrying is useful, my experience from at least drm is that
either you're lucky, and drm wasn't doing anything right when the machine
blew up, and then the trylocks will all go through. Or you're unlucky, and
most likely that means drm itself blew up, and no amount of retrying is
going to help. I wouldn't bother.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v2 1/3] drm: Add support for panic message output

2019-03-13 Thread Daniel Vetter
On Tue, Mar 12, 2019 at 08:02:56PM +0200, Ville Syrjälä wrote:
> On Tue, Mar 12, 2019 at 06:37:57PM +0100, Noralf Trønnes wrote:
> > 
> > 
> > Den 12.03.2019 18.25, skrev Ville Syrjälä:
> > > On Tue, Mar 12, 2019 at 06:15:24PM +0100, Noralf Trønnes wrote:
> > >>
> > >>
> > >> Den 12.03.2019 17.17, skrev Ville Syrjälä:
> > >>> On Tue, Mar 12, 2019 at 11:47:04AM +0100, Michel Dänzer wrote:
> >  On 2019-03-11 6:42 p.m., Noralf Trønnes wrote:
> > > This adds support for outputting kernel messages on panic().
> > > A kernel message dumper is used to dump the log. The dumper iterates
> > > over each DRM device and it's crtc's to find suitable framebuffers.
> > >
> > > All the other dumpers are run before this one except mtdoops.
> > > Only atomic drivers are supported.
> > >
> > > Signed-off-by: Noralf Trønnes 
> > > ---
> > >  [...]
> > >
> > > diff --git a/include/drm/drm_framebuffer.h 
> > > b/include/drm/drm_framebuffer.h
> > > index f0b34c977ec5..f3274798ecfe 100644
> > > --- a/include/drm/drm_framebuffer.h
> > > +++ b/include/drm/drm_framebuffer.h
> > > @@ -94,6 +94,44 @@ struct drm_framebuffer_funcs {
> > >struct drm_file *file_priv, unsigned flags,
> > >unsigned color, struct drm_clip_rect *clips,
> > >unsigned num_clips);
> > > +
> > > + /**
> > > +  * @panic_vmap:
> > > +  *
> > > +  * Optional callback for panic handling.
> > > +  *
> > > +  * For vmapping the selected framebuffer in a panic context. 
> > > Must
> > > +  * be super careful about locking (only trylocking allowed).
> > > +  *
> > > +  * RETURNS:
> > > +  *
> > > +  * NULL if it didn't work out, otherwise an opaque cookie which 
> > > is
> > > +  * passed to @panic_draw_xy. It can be anything: vmap area, 
> > > structure
> > > +  * with more details, just a few flags, ...
> > > +  */
> > > + void *(*panic_vmap)(struct drm_framebuffer *fb);
> > 
> >  FWIW, the panic_vmap hook cannot work in general with the amdgpu/radeon
> >  drivers:
> > 
> >  Framebuffers are normally tiled, writing to them with the CPU results 
> >  in
> >  garbled output.
> > 
> > >>
> > >> In which case the driver needs to support the ->panic_draw_xy callback,
> > >> or maybe it's possible to make a generic helper for tiled buffers.

I've proposed somewhere else that we rename panic_vmap to panic_prepare,
and the vmap pointer to an abstract cookie. Then the driver can do
whatever it wants too, e.g. in ->panic_prepare it does a few trylocks to
get at the buffer and make sure it can set up a temporary pte to write
into it page-by-page. ->panic_draw_xy can then do whatever it needs to do,
using the opaque void *cookie.

And if the trylock fails you just return NULL from ->panic_prepare

And ->panic_cleanup would be just to clean up the mess for the validation
use-case when running this from debugfs.

> > >>
> >  With a discrete GPU having a large amount of VRAM, the framebuffer may
> >  not be directly CPU accessible at all.
> > 
> > >>
> > >> I would have been nice to know how Windows works around this.
> > >>
> > 
> >  There would need to be a mechanism for switching scanout to a linear,
> >  CPU accessible framebuffer.
> > >>>
> > >>> I suppose panic_vmap() could just provide a linear temp buffer
> > >>> to the panic handler, and panic_unmap() could copy the contents
> > >>> over to the real fb.
> > >>>
> > >>> That said, this approach of scribbling over the primary plane's
> > >>> framebuffer has some clear limitations:
> > >>> * something may overwrite the oops message before the user
> > >>>   can even read it
> > >>
> > >> When the dumper drm_panic_kmsg_dump() runs, the other CPU's should have
> > >> been stopped. See panic().
> > > 
> > > GPUs etc. may still be executing away.
> > > 
> > 
> > Would it be safe to stop it in a panic situation? It would ofc be bad to
> > crash the box even harder.
> 
> Some drivers/devices may have working (and hopefully even reliable)
> gpu reset, some may not.

I don't think touching the gpu is a good idea. Even disabling planes and
all that feels risky. And there's really not much working anymore in panic
context, we can't even schedule a worker/timer to redraw the panic output
a bit later.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v2 1/3] drm: Add support for panic message output

2019-03-13 Thread Michel Dänzer
On 2019-03-12 6:15 p.m., Noralf Trønnes wrote:
> 
> 
> Den 12.03.2019 17.17, skrev Ville Syrjälä:
>> On Tue, Mar 12, 2019 at 11:47:04AM +0100, Michel Dänzer wrote:
>>> On 2019-03-11 6:42 p.m., Noralf Trønnes wrote:
 This adds support for outputting kernel messages on panic().
 A kernel message dumper is used to dump the log. The dumper iterates
 over each DRM device and it's crtc's to find suitable framebuffers.

 All the other dumpers are run before this one except mtdoops.
 Only atomic drivers are supported.

 Signed-off-by: Noralf Trønnes 
 ---
  [...]

 diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h
 index f0b34c977ec5..f3274798ecfe 100644
 --- a/include/drm/drm_framebuffer.h
 +++ b/include/drm/drm_framebuffer.h
 @@ -94,6 +94,44 @@ struct drm_framebuffer_funcs {
 struct drm_file *file_priv, unsigned flags,
 unsigned color, struct drm_clip_rect *clips,
 unsigned num_clips);
 +
 +  /**
 +   * @panic_vmap:
 +   *
 +   * Optional callback for panic handling.
 +   *
 +   * For vmapping the selected framebuffer in a panic context. Must
 +   * be super careful about locking (only trylocking allowed).
 +   *
 +   * RETURNS:
 +   *
 +   * NULL if it didn't work out, otherwise an opaque cookie which is
 +   * passed to @panic_draw_xy. It can be anything: vmap area, structure
 +   * with more details, just a few flags, ...
 +   */
 +  void *(*panic_vmap)(struct drm_framebuffer *fb);
>>>
>>> FWIW, the panic_vmap hook cannot work in general with the amdgpu/radeon
>>> drivers:
>>>
>>> Framebuffers are normally tiled, writing to them with the CPU results in
>>> garbled output.
>>>
> 
> In which case the driver needs to support the ->panic_draw_xy callback,
> or maybe it's possible to make a generic helper for tiled buffers.

I'm afraid that won't help, at least not without porting big chunks of
https://gitlab.freedesktop.org/mesa/mesa/tree/master/src/amd/addrlib
into the kernel, none of which will be used for anything else.


>>> There would need to be a mechanism for switching scanout to a linear,
>>> CPU accessible framebuffer.
>>
>> I suppose panic_vmap() could just provide a linear temp buffer
>> to the panic handler, and panic_unmap() could copy the contents
>> over to the real fb.

Copy how? Using a GPU engine?


-- 
Earthling Michel Dänzer   |  https://www.amd.com
Libre software enthusiast | Mesa and X developer
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915: Stop needlessly acquiring wakeref for debugfs/drop_caches_set

2019-03-13 Thread Patchwork
== Series Details ==

Series: drm/i915: Stop needlessly acquiring wakeref for debugfs/drop_caches_set
URL   : https://patchwork.freedesktop.org/series/57882/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5736 -> Patchwork_12441


Summary
---

  **SUCCESS**

  No regressions found.

  External URL: 
https://patchwork.freedesktop.org/api/1.0/series/57882/revisions/1/mbox/

Known issues


  Here are the changes found in Patchwork_12441 that come from known issues:

### IGT changes ###

 Issues hit 

  * igt@gem_exec_basic@gtt-bsd2:
- fi-byt-clapper: NOTRUN -> SKIP [fdo#109271] +57

  * igt@gem_ringfill@basic-default-fd:
- fi-elk-e7500:   NOTRUN -> SKIP [fdo#109271] +73

  * igt@kms_busy@basic-flip-a:
- fi-bsw-n3050:   NOTRUN -> SKIP [fdo#109271] / [fdo#109278] +1

  * igt@kms_busy@basic-flip-c:
- fi-blb-e6850:   NOTRUN -> SKIP [fdo#109271] / [fdo#109278]
- fi-byt-clapper: NOTRUN -> SKIP [fdo#109271] / [fdo#109278]
- fi-elk-e7500:   NOTRUN -> SKIP [fdo#109271] / [fdo#109278]

  * igt@kms_chamelium@hdmi-crc-fast:
- fi-bsw-n3050:   NOTRUN -> SKIP [fdo#109271] +62

  * igt@kms_chamelium@vga-edid-read:
- fi-skl-6600u:   NOTRUN -> SKIP [fdo#109271] +41

  * igt@kms_frontbuffer_tracking@basic:
- fi-icl-u3:  PASS -> FAIL [fdo#103167]
- fi-byt-clapper: NOTRUN -> FAIL [fdo#103167]

  * igt@kms_pipe_crc_basic@hang-read-crc-pipe-a:
- fi-byt-clapper: NOTRUN -> FAIL [fdo#103191] / [fdo#107362] +1

  * igt@kms_pipe_crc_basic@hang-read-crc-pipe-c:
- fi-blb-e6850:   NOTRUN -> SKIP [fdo#109271] +48

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
- fi-blb-e6850:   NOTRUN -> INCOMPLETE [fdo#107718]

  * igt@prime_vgem@basic-fence-flip:
- fi-gdg-551: PASS -> FAIL [fdo#103182] +1

  
 Possible fixes 

  * igt@i915_selftest@live_execlists:
- fi-apl-guc: INCOMPLETE [fdo#103927] / [fdo#109720] -> PASS

  * igt@kms_busy@basic-flip-a:
- fi-gdg-551: FAIL [fdo#103182] -> PASS

  
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103182]: https://bugs.freedesktop.org/show_bug.cgi?id=103182
  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#107362]: https://bugs.freedesktop.org/show_bug.cgi?id=107362
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [fdo#109720]: https://bugs.freedesktop.org/show_bug.cgi?id=109720


Participating hosts (43 -> 42)
--

  Additional (5): fi-bsw-n3050 fi-elk-e7500 fi-blb-e6850 fi-byt-clapper 
fi-skl-6600u 
  Missing(6): fi-kbl-soraka fi-ilk-m540 fi-hsw-4200u fi-byt-squawks 
fi-bsw-cyan fi-pnv-d510 


Build changes
-

* Linux: CI_DRM_5736 -> Patchwork_12441

  CI_DRM_5736: d0b266074ff4ac02e49b3a0af1781551f36bead3 @ 
git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4882: b8d471f3483bf1482b7e46aefb91dab5b7a25110 @ 
git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_12441: a723f9b03d087c669c5d21931ed8939485a5603c @ 
git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

a723f9b03d08 drm/i915: Stop needlessly acquiring wakeref for 
debugfs/drop_caches_set

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12441/
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v2 1/3] drm: Add support for panic message output

2019-03-13 Thread Noralf Trønnes


Den 12.03.2019 23.13, skrev Ahmed S. Darwish:
> Hi,
> 
> [[ CCing John for the trylock parts ]]
> 
> On Mon, Mar 11, 2019 at 11:33:15PM +0100, Noralf Trønnes wrote:
>>
>>
>> Den 11.03.2019 20.23, skrev Daniel Vetter:
>>> On Mon, Mar 11, 2019 at 06:42:16PM +0100, Noralf Trønnes wrote:
 This adds support for outputting kernel messages on panic().
 A kernel message dumper is used to dump the log. The dumper iterates
 over each DRM device and it's crtc's to find suitable framebuffers.

 All the other dumpers are run before this one except mtdoops.
 Only atomic drivers are supported.

 Signed-off-by: Noralf Trønnes 
>>>
>>> Bunch of comments/ideas for you or Darwish below, whoever picks this up.
>>
>> Actually it would ne nice if Darwish could pick it up since he will do
>> it on i915 which will be useful to a much broader audience.
>> If not I'll respin when I'm done with the drm_fb_helper refactoring.
>>
> 
> Yup, I'll be more than happy to do this.. 

Thanks for doing that.

Noralf.

> while preserving all of
> Noralf's authorship and copyright notices of course.
> 
> I guess it can be:
> 
>   - Handle the comments posted by Daniel and others (I'll post
> some questions too)
> 
>   - Add the necessary i915 specific bits
> 
>   - Test, post v3/v4/../vn. Rinse and repeat. Keep it local at
> dri-devel until getting the necessary S-o-Bs.
> 
>   - Post to wider audience (some feedback from distribution folks
> would also be nice, before posting to lkml)
> 
> More comments below..
> 
> [...]
> 
 +
 +static void drm_panic_kmsg_dump(struct kmsg_dumper *dumper,
 +  enum kmsg_dump_reason reason)
 +{
 +  class_for_each_device(drm_class, NULL, dumper, drm_panic_dev_iter);
>>>
>>> class_for_each_device uses klist, which only uses an irqsave spinlock. I
>>> think that's good enough. Comment to that effect would be good e.g.
>>>
>>> /* based on klist, which uses only a spin_lock_irqsave, which we
>>>  * assume still works */
>>>
>>> If we aim for perfect this should be a trylock still, maybe using our own
>>> device list.
>>>
> 
> I definitely agree here.
> 
> The lock may already be locked either by a stopped CPU, or by the
> very same CPU we execute panic() on (e.g. NMI panic() on the
> printing CPU).
> 
> This is why it's very common for example in serial consoles, which
> are usually careful about re-entrance and panic contexts, to do:
> 
>   xx_console_write(...) {
>   if (oops_in_progress)
>   locked = spin_trylock_irqsave(&port->lock, flags);
>   else
>   spin_lock_irqsave(&port->lock, flags);
>   }
> 
> I'm quite positive we should do the same for panic drm drivers.
> John?
> 
 +}
 +
 +static struct kmsg_dumper drm_panic_kmsg_dumper = {
 +  .dump = drm_panic_kmsg_dump,
 +  .max_reason = KMSG_DUMP_PANIC,
 +};
 +
 +static ssize_t drm_panic_file_panic_write(struct file *file,
 +const char __user *user_buf,
 +size_t count, loff_t *ppos)
 +{
 +  unsigned long long val;
 +  char buf[24];
 +  size_t size;
 +  ssize_t ret;
 +
 +  size = min(sizeof(buf) - 1, count);
 +  if (copy_from_user(buf, user_buf, size))
 +  return -EFAULT;
 +
 +  buf[size] = '\0';
 +  ret = kstrtoull(buf, 0, &val);
 +  if (ret)
 +  return ret;
 +
 +  drm_panic_kmsg_dumper.max_reason = KMSG_DUMP_OOPS;
 +  wmb();
 +
 +  /* Do a real test with: echo c > /proc/sysrq-trigger */
 +
 +  if (val == 0) {
 +  pr_info("Test panic screen using kmsg_dump(OOPS)\n");
 +  kmsg_dump(KMSG_DUMP_OOPS);
 +  } else if (val == 1) {
 +  char *null_pointer = NULL;
 +
 +  pr_info("Test panic screen using NULL pointer dereference\n");
 +  *null_pointer = 1;
 +  } else {
 +  return -EINVAL;
 +  }
>>>
>>> This isn't quite what I had in mind, since it still kills the kernel (like
>>> sysrq-trigger).
>>
>> If val == 0, it doesn't kill the kernel, it only dumps the kernel log.
>> And it doesn't taint the kernel either.
>>
>>> Instead what I had in mind is to recreate the worst
>>> possible panic context as much as feasible (disabling interrupts should be
>>> a good start, maybe we can even do an nmi callback), and then call our
>>> panic implementation. That way we can test the panic handler in a
>>> non-destructive way (i.e. aside from last dmesg lines printed to the
>>> screen nothing bad happens to the kernel: No real panic, no oops, no
>>> tainting).
>>
>> The interrupt case I can do, nmi I have no idea.
>>
> 
> I agree too. Disabling interrupts + CONFIG_DEBUG_ATOMIC_SLEEP
> would be a nice non-destructive test-case emulation.
> 
> thanks!
> 
> --
> darwi
> http://darwish.chasingpointers.com
> 
___
Intel-gfx 

[Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915/selftests: Provide stub reset functions

2019-03-13 Thread Patchwork
== Series Details ==

Series: drm/i915/selftests: Provide stub reset functions
URL   : https://patchwork.freedesktop.org/series/57884/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5736 -> Patchwork_12442


Summary
---

  **SUCCESS**

  No regressions found.

  External URL: 
https://patchwork.freedesktop.org/api/1.0/series/57884/revisions/1/mbox/

Known issues


  Here are the changes found in Patchwork_12442 that come from known issues:

### IGT changes ###

 Issues hit 

  * igt@gem_exec_basic@gtt-bsd2:
- fi-byt-clapper: NOTRUN -> SKIP [fdo#109271] +57

  * igt@gem_exec_basic@readonly-vebox:
- fi-blb-e6850:   NOTRUN -> SKIP [fdo#109271] +27

  * igt@gem_exec_suspend@basic-s4-devices:
- fi-blb-e6850:   NOTRUN -> INCOMPLETE [fdo#107718]

  * igt@gem_ringfill@basic-default-fd:
- fi-elk-e7500:   NOTRUN -> SKIP [fdo#109271] +73

  * igt@i915_pm_rpm@basic-pci-d3-state:
- fi-bsw-kefka:   PASS -> SKIP [fdo#109271]

  * igt@i915_pm_rpm@basic-rte:
- fi-bsw-kefka:   PASS -> FAIL [fdo#108800]

  * igt@kms_busy@basic-flip-a:
- fi-bsw-n3050:   NOTRUN -> SKIP [fdo#109271] / [fdo#109278] +1

  * igt@kms_busy@basic-flip-c:
- fi-byt-clapper: NOTRUN -> SKIP [fdo#109271] / [fdo#109278]
- fi-elk-e7500:   NOTRUN -> SKIP [fdo#109271] / [fdo#109278]

  * igt@kms_chamelium@hdmi-crc-fast:
- fi-bsw-n3050:   NOTRUN -> SKIP [fdo#109271] +62

  * igt@kms_chamelium@vga-edid-read:
- fi-skl-6600u:   NOTRUN -> SKIP [fdo#109271] +41

  * igt@kms_frontbuffer_tracking@basic:
- fi-byt-clapper: NOTRUN -> FAIL [fdo#103167]

  * igt@prime_vgem@basic-fence-flip:
- fi-gdg-551: PASS -> DMESG-FAIL [fdo#103182]

  
 Warnings 

  * igt@i915_selftest@live_contexts:
- fi-icl-u3:  DMESG-FAIL [fdo#108569] -> INCOMPLETE [fdo#108569]

  
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103182]: https://bugs.freedesktop.org/show_bug.cgi?id=103182
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#108800]: https://bugs.freedesktop.org/show_bug.cgi?id=108800
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278


Participating hosts (43 -> 40)
--

  Additional (5): fi-bsw-n3050 fi-elk-e7500 fi-blb-e6850 fi-byt-clapper 
fi-skl-6600u 
  Missing(8): fi-kbl-soraka fi-ilk-m540 fi-bdw-5557u fi-hsw-4200u 
fi-byt-squawks fi-bsw-cyan fi-skl-6260u fi-icl-y 


Build changes
-

* Linux: CI_DRM_5736 -> Patchwork_12442

  CI_DRM_5736: d0b266074ff4ac02e49b3a0af1781551f36bead3 @ 
git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4882: b8d471f3483bf1482b7e46aefb91dab5b7a25110 @ 
git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_12442: eafea28552384f0a3b190149d8c75ace11d7f8fe @ 
git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

eafea2855238 drm/i915/selftests: Provide stub reset functions

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12442/
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 02/13] drm/i915: Introduce the i915_user_extension_method

2019-03-13 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-03-08 14:33:02)
> 
> On 08/03/2019 14:12, Chris Wilson wrote:
> > +int i915_user_extensions(struct i915_user_extension __user *ext,
> > +  const i915_user_extension_fn *tbl,
> > +  unsigned long count,
> > +  void *data)
> > +{
> > + unsigned int stackdepth = 512;
> 
> I have doubts about usefulness of trying to impose some limit now. And 
> also reservations about using the name stack. But both are irrelevant 
> implementation details at this stage so meh.

We need defence against malice userspace doing
struct i915_user_extension ext = {
.next_extension = &ext,
};
so sadly some limit is required.

> > +
> > + while (ext) {
> > + int err;
> > + u64 x;
> > +
> > + if (!stackdepth--) /* recursion vs useful flexibility */
> > + return -EINVAL;
> > +
> > + if (get_user(x, &ext->name))
> > + return -EFAULT;
> > +
> > + err = -EINVAL;
> > + if (x < count && tbl[x])
> > + err = tbl[x](ext, data);
> 
> How about:
> 
> put_user(err, &ext->result);
> 
> And:
> 
> struct i915_user_extension {
> __u64 next_extension;
> __u64 name;
> __u32 result;
> __u32 mbz;
> };
> 
> So we add the ability for each extension to store it's exit code giving 
> userspace opportunity to know which one failed.
> 
> With this I would be satisfied usability is future proof enough.

I'm sorely tempted. The biggest objection I have is this defeats the
elegance of a read-only chain. So who would actually use it?

err = gem_context_create_ext(&chain);
if (err) {
struct i915_user_extension *ext = (struct i915_user_extension *)chain;
while (ext && !ext->result)
ext = (struct i915_user_extension *)ext->next_extension;
if (ext)
fprintf(stderr, "context creation failed at extension: %lld", 
ext->name);
}

What exactly are they going to do? They are not going to do anything
like
while (err) {
ext = first_faulty_ext(&chain);
switch (ext->name) {
case ...:  do_fixup_A(ext);
}
err = gem_context_create_ext(&chain);
}

I'm not really seeing how they benefit over, and above, handling the
ioctl error by printing out the entire erroneous struct and chain, and
falling back to avoiding that ioctl.

I think what you really want is a per-application/fd debug log, so that
we can dump the actual errors as they arise (without leaking them into
the general syslog).
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 02/13] drm/i915: Introduce the i915_user_extension_method

2019-03-13 Thread Tvrtko Ursulin


On 13/03/2019 10:50, Chris Wilson wrote:

Quoting Tvrtko Ursulin (2019-03-08 14:33:02)


On 08/03/2019 14:12, Chris Wilson wrote:

+int i915_user_extensions(struct i915_user_extension __user *ext,
+  const i915_user_extension_fn *tbl,
+  unsigned long count,
+  void *data)
+{
+ unsigned int stackdepth = 512;


I have doubts about usefulness of trying to impose some limit now. And
also reservations about using the name stack. But both are irrelevant
implementation details at this stage so meh.


We need defence against malice userspace doing
struct i915_user_extension ext = {
.next_extension = &ext,
};
so sadly some limit is required.


Oh yes, good point. I wasn't thinking maliciously enough.

S possible alternative solution could be, in conjunction with the result 
field from below, to only allow visiting any extension once. It would 
require reserving some value as meaning "not visited". Probably zero, so 
non-zero in result would immediately fail the chain, but would also I 
think mean we only support negative values in result as output, mapping 
zeros to one.



+
+ while (ext) {
+ int err;
+ u64 x;
+
+ if (!stackdepth--) /* recursion vs useful flexibility */
+ return -EINVAL;
+
+ if (get_user(x, &ext->name))
+ return -EFAULT;
+
+ err = -EINVAL;
+ if (x < count && tbl[x])
+ err = tbl[x](ext, data);


How about:

 put_user(err, &ext->result);

And:

struct i915_user_extension {
 __u64 next_extension;
 __u64 name;
 __u32 result;
 __u32 mbz;
};

So we add the ability for each extension to store it's exit code giving
userspace opportunity to know which one failed.

With this I would be satisfied usability is future proof enough.


I'm sorely tempted. The biggest objection I have is this defeats the
elegance of a read-only chain. So who would actually use it?

err = gem_context_create_ext(&chain);
if (err) {
struct i915_user_extension *ext = (struct i915_user_extension *)chain;
while (ext && !ext->result)
ext = (struct i915_user_extension *)ext->next_extension;
if (ext)
fprintf(stderr, "context creation failed at extension: %lld", 
ext->name);
}

What exactly are they going to do? They are not going to do anything
like
while (err) {
ext = first_faulty_ext(&chain);
switch (ext->name) {
case ...:  do_fixup_A(ext);
}
err = gem_context_create_ext(&chain);
}

I'm not really seeing how they benefit over, and above, handling the
ioctl error by printing out the entire erroneous struct and chain, and
falling back to avoiding that ioctl.

I think what you really want is a per-application/fd debug log, so that
we can dump the actual errors as they arise (without leaking them into
the general syslog).


Maybe.. could be an extension of the existing problem of "What EINVAL 
you mean exactly?" indeed.


I don't see a problem with writing back though?

Regards,

Tvrtko




___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v6 3/3] drm/i915/icl: Implement half float formats

2019-03-13 Thread Maarten Lankhorst
Op 13-03-2019 om 08:25 schreef Maarten Lankhorst:
> Op 13-03-2019 om 01:38 schreef Kevin Strasser:
>> 64 bpp half float formats are supported on hdr planes only and are subject
>> to the following restrictions:
>>   * 90/270 rotation not supported
>>   * Yf Tiling not supported
>>   * Frame Buffer Compression not supported
>>   * Color Keying not supported
>>
>> v2:
>> - Drop handling pixel normalize register
>> - Don't use icl_is_hdr_plane too early
>>
>> v3:
>> - Use refactored icl_is_hdr_plane (Ville)
>> - Use u32 instead of uint32_t (Ville)
>>
>> v6:
>> - Rebase and fix merge conflicts
>> - Reorganize switch statements to keep RGB grouped separately from YUV
>>
>> Cc: Uma Shankar 
>> Cc: Shashank Sharma 
>> Cc: David Airlie 
>> Cc: Daniel Vetter 
>> Cc: dri-de...@lists.freedesktop.org
>> Signed-off-by: Kevin Strasser 
>> Reviewed-by: Ville Syrjälä 
>> Reviewed-by: Maarten Lankhorst 
>> Reviewed-by: Adam Jackson 
>> ---
>>  drivers/gpu/drm/i915/intel_display.c | 22 +++
>>  drivers/gpu/drm/i915/intel_sprite.c  | 72 
>> ++--
>>  2 files changed, 91 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_display.c 
>> b/drivers/gpu/drm/i915/intel_display.c
>> index 60fbe3a..eaedf91 100644
>> --- a/drivers/gpu/drm/i915/intel_display.c
>> +++ b/drivers/gpu/drm/i915/intel_display.c
>> @@ -2680,6 +2680,18 @@ int skl_format_to_fourcc(int format, bool rgb_order, 
>> bool alpha)
>>  return DRM_FORMAT_XBGR2101010;
>>  else
>>  return DRM_FORMAT_XRGB2101010;
>> +case PLANE_CTL_FORMAT_XRGB_16161616F:
>> +if (rgb_order) {
>> +if (alpha)
>> +return DRM_FORMAT_ABGR16161616F;
>> +else
>> +return DRM_FORMAT_XBGR16161616F;
>> +} else {
>> +if (alpha)
>> +return DRM_FORMAT_ARGB16161616F;
>> +else
>> +return DRM_FORMAT_XRGB16161616F;
>> +}
>>  }
>>  }
>>  
>> @@ -3575,6 +3587,12 @@ static u32 skl_plane_ctl_format(u32 pixel_format)
>>  return PLANE_CTL_FORMAT_XRGB_2101010;
>>  case DRM_FORMAT_XBGR2101010:
>>  return PLANE_CTL_ORDER_RGBX | PLANE_CTL_FORMAT_XRGB_2101010;
>> +case DRM_FORMAT_XBGR16161616F:
>> +case DRM_FORMAT_ABGR16161616F:
>> +return PLANE_CTL_FORMAT_XRGB_16161616F | PLANE_CTL_ORDER_RGBX;
>> +case DRM_FORMAT_XRGB16161616F:
>> +case DRM_FORMAT_ARGB16161616F:
>> +return PLANE_CTL_FORMAT_XRGB_16161616F;
>>  case DRM_FORMAT_YUYV:
>>  return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_YUYV;
>>  case DRM_FORMAT_YVYU:
>> @@ -5143,6 +5161,10 @@ static int skl_update_scaler_plane(struct 
>> intel_crtc_state *crtc_state,
>>  case DRM_FORMAT_ARGB:
>>  case DRM_FORMAT_XRGB2101010:
>>  case DRM_FORMAT_XBGR2101010:
>> +case DRM_FORMAT_XBGR16161616F:
>> +case DRM_FORMAT_ABGR16161616F:
>> +case DRM_FORMAT_XRGB16161616F:
>> +case DRM_FORMAT_ARGB16161616F:
>>  case DRM_FORMAT_YUYV:
>>  case DRM_FORMAT_YVYU:
>>  case DRM_FORMAT_UYVY:
>> diff --git a/drivers/gpu/drm/i915/intel_sprite.c 
>> b/drivers/gpu/drm/i915/intel_sprite.c
>> index 622669f..e00559d 100644
>> --- a/drivers/gpu/drm/i915/intel_sprite.c
>> +++ b/drivers/gpu/drm/i915/intel_sprite.c
>> @@ -1508,8 +1508,6 @@ static int skl_plane_check_fb(const struct 
>> intel_crtc_state *crtc_state,
>>  /*
>>   * 90/270 is not allowed with RGB64 16:16:16:16 and
>>   * Indexed 8-bit. RGB 16-bit 5:6:5 is allowed gen11 onwards.
>> - * TBD: Add RGB64 case once its added in supported format
>> - * list.
>>   */
>>  switch (fb->format->format) {
>>  case DRM_FORMAT_RGB565:
>> @@ -1517,6 +1515,10 @@ static int skl_plane_check_fb(const struct 
>> intel_crtc_state *crtc_state,
>>  break;
>>  /* fall through */
>>  case DRM_FORMAT_C8:
>> +case DRM_FORMAT_XRGB16161616F:
>> +case DRM_FORMAT_XBGR16161616F:
>> +case DRM_FORMAT_ARGB16161616F:
>> +case DRM_FORMAT_ABGR16161616F:
>>  DRM_DEBUG_KMS("Unsupported pixel format %s for 
>> 90/270!\n",
>>drm_get_format_name(fb->format->format,
>>&format_name));
>> @@ -1837,6 +1839,31 @@ static const uint32_t icl_plane_formats[] = {
>>  DRM_FORMAT_Y416,
>>  };
>>  
>> +static const uint32_t icl_hdr_plane_formats[] = {
>> +DRM_FORMAT_C8,
>> +DRM_FORMAT_RGB565,
>> +DRM_FORMAT_XRGB,
>> +DRM_FORMAT_XBGR,
>> +DRM_FORMAT_ARGB,
>> +DRM_FORMAT_ABGR,
>> +DRM_FORMAT_XRGB2101010,
>> +DRM_FORMAT_XBGR2101010,
>> +DRM_FORMAT_XRGB1616161

Re: [Intel-gfx] [PATCH 02/13] drm/i915: Introduce the i915_user_extension_method

2019-03-13 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-03-13 11:13:10)
> 
> On 13/03/2019 10:50, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-03-08 14:33:02)
> >>
> >> On 08/03/2019 14:12, Chris Wilson wrote:
> >>> +int i915_user_extensions(struct i915_user_extension __user *ext,
> >>> +  const i915_user_extension_fn *tbl,
> >>> +  unsigned long count,
> >>> +  void *data)
> >>> +{
> >>> + unsigned int stackdepth = 512;
> >>
> >> I have doubts about usefulness of trying to impose some limit now. And
> >> also reservations about using the name stack. But both are irrelevant
> >> implementation details at this stage so meh.
> > 
> > We need defence against malice userspace doing
> >   struct i915_user_extension ext = {
> >   .next_extension = &ext,
> >   };
> > so sadly some limit is required.
> 
> Oh yes, good point. I wasn't thinking maliciously enough.
> 
> S possible alternative solution could be, in conjunction with the result 
> field from below, to only allow visiting any extension once. It would 
> require reserving some value as meaning "not visited". Probably zero, so 
> non-zero in result would immediately fail the chain, but would also I 
> think mean we only support negative values in result as output, mapping 
> zeros to one.

I've avoided using the struct itself for markup so far.
Ugh, it would also mean that userspace has to sanitize the extension
chain between uses.

> >>> +
> >>> + while (ext) {
> >>> + int err;
> >>> + u64 x;
> >>> +
> >>> + if (!stackdepth--) /* recursion vs useful flexibility */
> >>> + return -EINVAL;
> >>> +
> >>> + if (get_user(x, &ext->name))
> >>> + return -EFAULT;
> >>> +
> >>> + err = -EINVAL;
> >>> + if (x < count && tbl[x])
> >>> + err = tbl[x](ext, data);
> >>
> >> How about:
> >>
> >>  put_user(err, &ext->result);
> >>
> >> And:
> >>
> >> struct i915_user_extension {
> >>  __u64 next_extension;
> >>  __u64 name;
> >>  __u32 result;
> >>  __u32 mbz;
> >> };
> >>
> >> So we add the ability for each extension to store it's exit code giving
> >> userspace opportunity to know which one failed.
> >>
> >> With this I would be satisfied usability is future proof enough.
> > 
> > I'm sorely tempted. The biggest objection I have is this defeats the
> > elegance of a read-only chain. So who would actually use it?
> > 
> > err = gem_context_create_ext(&chain);
> > if (err) {
> >   struct i915_user_extension *ext = (struct i915_user_extension *)chain;
> >   while (ext && !ext->result)
> >   ext = (struct i915_user_extension *)ext->next_extension;
> >   if (ext)
> >   fprintf(stderr, "context creation failed at extension: %lld", 
> > ext->name);
> > }
> > 
> > What exactly are they going to do? They are not going to do anything
> > like
> >   while (err) {
> >   ext = first_faulty_ext(&chain);
> >   switch (ext->name) {
> >   case ...:  do_fixup_A(ext);
> >   }
> >   err = gem_context_create_ext(&chain);
> >   }
> > 
> > I'm not really seeing how they benefit over, and above, handling the
> > ioctl error by printing out the entire erroneous struct and chain, and
> > falling back to avoiding that ioctl.
> > 
> > I think what you really want is a per-application/fd debug log, so that
> > we can dump the actual errors as they arise (without leaking them into
> > the general syslog).
> 
> Maybe.. could be an extension of the existing problem of "What EINVAL 
> you mean exactly?" indeed.
> 
> I don't see a problem with writing back though?

Writing anything gives me the heebie-jeebies. If we keep it a read-only
struct, we can never be tricked into overwriting something important.

It also makes it harder for userspace to reuse as they have to clear the
result field?
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PULL] topic/hdr-formats

2019-03-13 Thread Maarten Lankhorst
Hey Sean and Joonas,

One more pull request for the hdr-formats topic branch. FP16 support
is now also implemented.

Can this be pulled to drm-misc-next and dinq?

~Maarten

topic/hdr-formats-2019-03-13:
Add support for floating point half-width formats.
The following changes since commit 296e9b19eff6157e1e4f130fa436e105c45725e9:

  drm/i915/icl: Enabling Y2xx and Y4xx (xx:10/12/16) formats for universal 
planes (2019-03-05 12:49:00 +0100)

are available in the Git repository at:

  git://anongit.freedesktop.org/drm/drm-misc tags/topic/hdr-formats-2019-03-13

for you to fetch changes up to a94bed60cb73962f344ead14b2ee7613280432c6:

  drm/i915/icl: Implement half float formats (2019-03-13 11:23:12 +0100)


Add support for floating point half-width formats.


Kevin Strasser (3):
  drm/fourcc: Add 64 bpp half float formats
  drm/i915: Refactor icl_is_hdr_plane
  drm/i915/icl: Implement half float formats

 drivers/gpu/drm/drm_fourcc.c |  4 ++
 drivers/gpu/drm/i915/intel_atomic.c  |  3 +-
 drivers/gpu/drm/i915/intel_display.c | 29 +-
 drivers/gpu/drm/i915/intel_drv.h |  7 ++--
 drivers/gpu/drm/i915/intel_sprite.c  | 78 +---
 include/uapi/drm/drm_fourcc.h| 11 +
 6 files changed, 120 insertions(+), 12 deletions(-)
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 02/13] drm/i915: Introduce the i915_user_extension_method

2019-03-13 Thread Tvrtko Ursulin


On 13/03/2019 11:21, Chris Wilson wrote:

Quoting Tvrtko Ursulin (2019-03-13 11:13:10)


On 13/03/2019 10:50, Chris Wilson wrote:

Quoting Tvrtko Ursulin (2019-03-08 14:33:02)


On 08/03/2019 14:12, Chris Wilson wrote:

+int i915_user_extensions(struct i915_user_extension __user *ext,
+  const i915_user_extension_fn *tbl,
+  unsigned long count,
+  void *data)
+{
+ unsigned int stackdepth = 512;


I have doubts about usefulness of trying to impose some limit now. And
also reservations about using the name stack. But both are irrelevant
implementation details at this stage so meh.


We need defence against malice userspace doing
   struct i915_user_extension ext = {
   .next_extension = &ext,
   };
so sadly some limit is required.


Oh yes, good point. I wasn't thinking maliciously enough.

S possible alternative solution could be, in conjunction with the result
field from below, to only allow visiting any extension once. It would
require reserving some value as meaning "not visited". Probably zero, so
non-zero in result would immediately fail the chain, but would also I
think mean we only support negative values in result as output, mapping
zeros to one.


I've avoided using the struct itself for markup so far.
Ugh, it would also mean that userspace has to sanitize the extension
chain between uses.


+
+ while (ext) {
+ int err;
+ u64 x;
+
+ if (!stackdepth--) /* recursion vs useful flexibility */
+ return -EINVAL;
+
+ if (get_user(x, &ext->name))
+ return -EFAULT;
+
+ err = -EINVAL;
+ if (x < count && tbl[x])
+ err = tbl[x](ext, data);


How about:

  put_user(err, &ext->result);

And:

struct i915_user_extension {
  __u64 next_extension;
  __u64 name;
  __u32 result;
  __u32 mbz;
};

So we add the ability for each extension to store it's exit code giving
userspace opportunity to know which one failed.

With this I would be satisfied usability is future proof enough.


I'm sorely tempted. The biggest objection I have is this defeats the
elegance of a read-only chain. So who would actually use it?

err = gem_context_create_ext(&chain);
if (err) {
   struct i915_user_extension *ext = (struct i915_user_extension *)chain;
   while (ext && !ext->result)
   ext = (struct i915_user_extension *)ext->next_extension;
   if (ext)
   fprintf(stderr, "context creation failed at extension: %lld", 
ext->name);
}

What exactly are they going to do? They are not going to do anything
like
   while (err) {
   ext = first_faulty_ext(&chain);
   switch (ext->name) {
   case ...:  do_fixup_A(ext);
   }
   err = gem_context_create_ext(&chain);
   }

I'm not really seeing how they benefit over, and above, handling the
ioctl error by printing out the entire erroneous struct and chain, and
falling back to avoiding that ioctl.

I think what you really want is a per-application/fd debug log, so that
we can dump the actual errors as they arise (without leaking them into
the general syslog).


Maybe.. could be an extension of the existing problem of "What EINVAL
you mean exactly?" indeed.

I don't see a problem with writing back though?


Writing anything gives me the heebie-jeebies. If we keep it a read-only
struct, we can never be tricked into overwriting something important.

It also makes it harder for userspace to reuse as they have to clear the
result field?


Yeah.. nothing then.

Shall we only reserve some space with a flags and some rsvd fields just 
in case it will need to change/grow?


Regards,

Tvrtko





___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 02/13] drm/i915: Introduce the i915_user_extension_method

2019-03-13 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-03-13 11:35:55)
[snip]
> Shall we only reserve some space with a flags and some rsvd fields just 
> in case it will need to change/grow?

The only thing that occurs to me is to exchange the next pointer with a
table of next[] (C++ here we come). But I ask myself, could any
extension like not be part of the next layer?

That is if any particular extension needs to chain up to more than one
iface, it can call each itself:

struct hypothetical_extension {
struct i915_user_extension base;

u64 iface1_extension;
u64 iface2_extension;
...
u64 ifaceN_extension;
}

? So far I haven't thought of anything I can't weasel my way out by
punting the problem to the caller :)
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] ✓ Fi.CI.IGT: success for drm/i915: Stop needlessly acquiring wakeref for debugfs/drop_caches_set

2019-03-13 Thread Patchwork
== Series Details ==

Series: drm/i915: Stop needlessly acquiring wakeref for debugfs/drop_caches_set
URL   : https://patchwork.freedesktop.org/series/57882/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5736_full -> Patchwork_12441_full


Summary
---

  **WARNING**

  Minor unknown changes coming with Patchwork_12441_full need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_12441_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
---

  Here are the unknown changes that may have been introduced in 
Patchwork_12441_full:

### IGT changes ###

 Warnings 

  * igt@i915_suspend@forcewake:
- shard-hsw:  ( 2 PASS ) -> PASS +50

  
 Suppressed 

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@gem_mmap_gtt@forked-medium-copy-odd:
- {shard-iclb}:   NOTRUN -> INCOMPLETE

  * {igt@kms_atomic@plane_primary_overlay_zpos}:
- {shard-iclb}:   NOTRUN -> SKIP

  * {igt@kms_plane@pixel-format-pipe-c-planes}:
- {shard-iclb}:   INCOMPLETE [fdo#107713] -> FAIL

  
Known issues


  Here are the changes found in Patchwork_12441_full that come from known 
issues:

### IGT changes ###

 Issues hit 

  * igt@gem_ctx_param@invalid-param-set:
- shard-snb:  NOTRUN -> FAIL [fdo#109674]

  * igt@i915_pm_rpm@gem-execbuf-stress-extra-wait:
- shard-snb:  NOTRUN -> SKIP [fdo#109271] +220

  * igt@i915_pm_rpm@modeset-lpsp:
- shard-skl:  PASS -> INCOMPLETE [fdo#107807]

  * igt@kms_atomic_transition@3x-modeset-transitions-nonblocking:
- shard-snb:  NOTRUN -> SKIP [fdo#109271] / [fdo#109278] +21

  * igt@kms_atomic_transition@4x-modeset-transitions-nonblocking:
- shard-hsw:  NOTRUN -> SKIP [fdo#109271] / [fdo#109278]

  * igt@kms_busy@basic-modeset-f:
- shard-glk:  NOTRUN -> SKIP [fdo#109271] / [fdo#109278] +1

  * igt@kms_busy@extended-modeset-hang-oldfb-render-e:
- shard-skl:  NOTRUN -> SKIP [fdo#109271] / [fdo#109278] +5
- shard-apl:  NOTRUN -> SKIP [fdo#109271] / [fdo#109278] +1

  * igt@kms_busy@extended-pageflip-hang-newfb-render-a:
- shard-apl:  NOTRUN -> DMESG-WARN [fdo#107956]

  * igt@kms_busy@extended-pageflip-hang-oldfb-render-d:
- shard-kbl:  NOTRUN -> SKIP [fdo#109271] / [fdo#109278]

  * igt@kms_busy@extended-pageflip-modeset-hang-oldfb-render-b:
- shard-glk:  NOTRUN -> DMESG-WARN [fdo#107956]

  * igt@kms_ccs@pipe-b-crc-sprite-planes-basic:
- shard-apl:  PASS -> FAIL [fdo#107725] / [fdo#108145]

  * igt@kms_chamelium@hdmi-crc-fast:
- shard-kbl:  NOTRUN -> SKIP [fdo#109271] +19

  * igt@kms_color@pipe-b-degamma:
- shard-apl:  PASS -> FAIL [fdo#104782]

  * igt@kms_cursor_crc@cursor-128x42-random:
- shard-glk:  NOTRUN -> FAIL [fdo#103232]

  * igt@kms_cursor_crc@cursor-256x256-suspend:
- shard-apl:  PASS -> FAIL [fdo#103191] / [fdo#103232]

  * igt@kms_cursor_crc@cursor-256x85-sliding:
- shard-skl:  NOTRUN -> FAIL [fdo#103232]

  * igt@kms_cursor_crc@cursor-64x21-sliding:
- shard-apl:  PASS -> FAIL [fdo#103232] +6

  * igt@kms_cursor_crc@cursor-64x64-suspend:
- shard-skl:  PASS -> INCOMPLETE [fdo#104108]

  * igt@kms_cursor_crc@cursor-alpha-opaque:
- shard-apl:  PASS -> FAIL [fdo#109350]

  * igt@kms_draw_crc@draw-method-xrgb2101010-mmap-gtt-xtiled:
- shard-skl:  PASS -> FAIL [fdo#103184]

  * igt@kms_fbcon_fbt@psr-suspend:
- shard-skl:  NOTRUN -> FAIL [fdo#103833]

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-onoff:
- shard-apl:  PASS -> FAIL [fdo#103167] +1

  * igt@kms_frontbuffer_tracking@fbc-1p-rte:
- shard-glk:  PASS -> FAIL [fdo#103167] / [fdo#105682]

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-cur-indfb-draw-blt:
- shard-glk:  PASS -> FAIL [fdo#103167] +7

  * igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-pri-indfb-draw-blt:
- shard-skl:  NOTRUN -> SKIP [fdo#109271] +57

  * igt@kms_frontbuffer_tracking@fbcpsr-stridechange:
- shard-skl:  NOTRUN -> FAIL [fdo#105683]

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes:
- shard-kbl:  PASS -> INCOMPLETE [fdo#103665]

  * igt@kms_plane_alpha_blend@pipe-b-alpha-basic:
- shard-skl:  NOTRUN -> FAIL [fdo#107815] / [fdo#108145] +1

  * igt@kms_plane_alpha_blend@pipe-b-constant-alpha-max:
- shard-glk:  NOTRUN -> FAIL [fdo#108145]

  * igt@kms_plane_alpha_blend@pipe-b-coverage-7efc:
- shard-skl:  PASS -> FAIL [fdo#107815]

  * igt@kms_plane_multiple@atomic-pipe-a-tiling-x:

Re: [Intel-gfx] [PATCH 02/13] drm/i915: Introduce the i915_user_extension_method

2019-03-13 Thread Tvrtko Ursulin


On 13/03/2019 11:46, Chris Wilson wrote:

Quoting Tvrtko Ursulin (2019-03-13 11:35:55)
[snip]

Shall we only reserve some space with a flags and some rsvd fields just
in case it will need to change/grow?


The only thing that occurs to me is to exchange the next pointer with a
table of next[] (C++ here we come). But I ask myself, could any
extension like not be part of the next layer?

That is if any particular extension needs to chain up to more than one
iface, it can call each itself:

struct hypothetical_extension {
struct i915_user_extension base;

u64 iface1_extension;
u64 iface2_extension;
...
u64 ifaceN_extension;
}

? So far I haven't thought of anything I can't weasel my way out by
punting the problem to the caller :)


Just top make sure we are on the same page, I was thinking of:

struct i915_user_extension {
__u64 next_extension;
__u64 name;
__u32 flags;
__u32 rsvd[7];
};

So we could add things like:

/* Store each extension return code in rsvd[0]. */
#define I915_USER_EXTENSION_STORE_RESULT (1)

/* Only check whether extensions are known by the driver. */
#define I915_USER_EXTENSION_DRY_RUN. (2)

And things like that. Because we are putting in a generic extension 
mechanism I am worried that if it itself turns out to have some 
limitation we will not have wiggle room to extend it.


Regards,

Tvrtko
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 02/13] drm/i915: Introduce the i915_user_extension_method

2019-03-13 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-03-13 13:11:09)
> 
> On 13/03/2019 11:46, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-03-13 11:35:55)
> > [snip]
> >> Shall we only reserve some space with a flags and some rsvd fields just
> >> in case it will need to change/grow?
> > 
> > The only thing that occurs to me is to exchange the next pointer with a
> > table of next[] (C++ here we come). But I ask myself, could any
> > extension like not be part of the next layer?
> > 
> > That is if any particular extension needs to chain up to more than one
> > iface, it can call each itself:
> > 
> > struct hypothetical_extension {
> >   struct i915_user_extension base;
> > 
> >   u64 iface1_extension;
> >   u64 iface2_extension;
> >   ...
> >   u64 ifaceN_extension;
> > }
> > 
> > ? So far I haven't thought of anything I can't weasel my way out by
> > punting the problem to the caller :)
> 
> Just top make sure we are on the same page, I was thinking of:
> 
> struct i915_user_extension {
> __u64 next_extension;
> __u64 name;
> __u32 flags;
> __u32 rsvd[7];
> };
> 
> So we could add things like:
> 
> /* Store each extension return code in rsvd[0]. */
> #define I915_USER_EXTENSION_STORE_RESULT (1)
> 
> /* Only check whether extensions are known by the driver. */
> #define I915_USER_EXTENSION_DRY_RUN. (2)
> 
> And things like that. Because we are putting in a generic extension 
> mechanism I am worried that if it itself turns out to have some 
> limitation we will not have wiggle room to extend it.

u64 next;
u32 name;
u32 flags;
u32 rsvd[4];

Maybe... That's a cacheline.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [CI] drm/i915: Introduce a context barrier callback

2019-03-13 Thread Jani Nikula
On Sat, 09 Mar 2019, Chris Wilson  wrote:
> In the next patch, we will want to update live state within a context.
> As this state may be in use by the GPU and we haven't been explicitly
> tracking its activity, we instead attach it to a request we send down
> the context setup with its new state and on retiring that request
> cleanup the old state as we then know that it is no longer live.
>
> Signed-off-by: Chris Wilson 
> Reviewed-by: Tvrtko Ursulin 

This, or more precisely commit 85fddf0b0027 ("drm/i915: Introduce a
context barrier callback"), breaks build for

CONFIG_DRM_I915_WERROR=y
CONFIG_DRM_I915_SELFTEST=n

with

  CC [M]  drivers/gpu/drm/i915/i915_gem_context.o
drivers/gpu/drm/i915/i915_gem_context.c:698:12: error: ‘context_barrier_task’ 
defined but not used [-Werror=unused-function]
 static int context_barrier_task(struct i915_gem_context *ctx,
^~~~

Please fix.


BR,
Jani.

> ---
>  drivers/gpu/drm/i915/i915_gem_context.c   |  74 
>  .../gpu/drm/i915/selftests/i915_gem_context.c | 106 ++
>  2 files changed, 180 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
> b/drivers/gpu/drm/i915/i915_gem_context.c
> index f9a21a891aa4..b6370225dcb5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -677,6 +677,80 @@ last_request_on_engine(struct i915_timeline *timeline,
>   return NULL;
>  }
>  
> +struct context_barrier_task {
> + struct i915_active base;
> + void (*task)(void *data);
> + void *data;
> +};
> +
> +static void cb_retire(struct i915_active *base)
> +{
> + struct context_barrier_task *cb = container_of(base, typeof(*cb), base);
> +
> + if (cb->task)
> + cb->task(cb->data);
> +
> + i915_active_fini(&cb->base);
> + kfree(cb);
> +}
> +
> +I915_SELFTEST_DECLARE(static unsigned long context_barrier_inject_fault);
> +static int context_barrier_task(struct i915_gem_context *ctx,
> + unsigned long engines,
> + void (*task)(void *data),
> + void *data)
> +{
> + struct drm_i915_private *i915 = ctx->i915;
> + struct context_barrier_task *cb;
> + struct intel_context *ce;
> + intel_wakeref_t wakeref;
> + int err = 0;
> +
> + lockdep_assert_held(&i915->drm.struct_mutex);
> + GEM_BUG_ON(!task);
> +
> + cb = kmalloc(sizeof(*cb), GFP_KERNEL);
> + if (!cb)
> + return -ENOMEM;
> +
> + i915_active_init(i915, &cb->base, cb_retire);
> + i915_active_acquire(&cb->base);
> +
> + wakeref = intel_runtime_pm_get(i915);
> + list_for_each_entry(ce, &ctx->active_engines, active_link) {
> + struct intel_engine_cs *engine = ce->engine;
> + struct i915_request *rq;
> +
> + if (!(ce->engine->mask & engines))
> + continue;
> +
> + if (I915_SELFTEST_ONLY(context_barrier_inject_fault &
> +engine->mask)) {
> + err = -ENXIO;
> + break;
> + }
> +
> + rq = i915_request_alloc(engine, ctx);
> + if (IS_ERR(rq)) {
> + err = PTR_ERR(rq);
> + break;
> + }
> +
> + err = i915_active_ref(&cb->base, rq->fence.context, rq);
> + i915_request_add(rq);
> + if (err)
> + break;
> + }
> + intel_runtime_pm_put(i915, wakeref);
> +
> + cb->task = err ? NULL : task; /* caller needs to unwind instead */
> + cb->data = data;
> +
> + i915_active_release(&cb->base);
> +
> + return err;
> +}
> +
>  int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
> unsigned long mask)
>  {
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c 
> b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 5b8614b2fbe4..4399ef9ebf15 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -1594,10 +1594,116 @@ static int igt_switch_to_kernel_context(void *arg)
>   return err;
>  }
>  
> +static void mock_barrier_task(void *data)
> +{
> + unsigned int *counter = data;
> +
> + ++*counter;
> +}
> +
> +static int mock_context_barrier(void *arg)
> +{
> +#undef pr_fmt
> +#define pr_fmt(x) "context_barrier_task():" # x
> + struct drm_i915_private *i915 = arg;
> + struct i915_gem_context *ctx;
> + struct i915_request *rq;
> + intel_wakeref_t wakeref;
> + unsigned int counter;
> + int err;
> +
> + /*
> +  * The context barrier provides us with a callback after it emits
> +  * a request; useful for retiring old state after loading new.
> +  */
> +
> + mutex_lock(&i915->drm.struct_mutex);
> +
> + ctx = mock_context(i915, "mock");
> + if (IS_ERR(ctx)) {
> 

Re: [Intel-gfx] [PATCH v2 1/3] drm: Add support for panic message output

2019-03-13 Thread Ville Syrjälä
On Wed, Mar 13, 2019 at 10:35:08AM +0100, Michel Dänzer wrote:
> On 2019-03-12 6:15 p.m., Noralf Trønnes wrote:
> > 
> > 
> > Den 12.03.2019 17.17, skrev Ville Syrjälä:
> >> On Tue, Mar 12, 2019 at 11:47:04AM +0100, Michel Dänzer wrote:
> >>> On 2019-03-11 6:42 p.m., Noralf Trønnes wrote:
>  This adds support for outputting kernel messages on panic().
>  A kernel message dumper is used to dump the log. The dumper iterates
>  over each DRM device and it's crtc's to find suitable framebuffers.
> 
>  All the other dumpers are run before this one except mtdoops.
>  Only atomic drivers are supported.
> 
>  Signed-off-by: Noralf Trønnes 
>  ---
>   [...]
> 
>  diff --git a/include/drm/drm_framebuffer.h 
>  b/include/drm/drm_framebuffer.h
>  index f0b34c977ec5..f3274798ecfe 100644
>  --- a/include/drm/drm_framebuffer.h
>  +++ b/include/drm/drm_framebuffer.h
>  @@ -94,6 +94,44 @@ struct drm_framebuffer_funcs {
>    struct drm_file *file_priv, unsigned flags,
>    unsigned color, struct drm_clip_rect *clips,
>    unsigned num_clips);
>  +
>  +/**
>  + * @panic_vmap:
>  + *
>  + * Optional callback for panic handling.
>  + *
>  + * For vmapping the selected framebuffer in a panic context. 
>  Must
>  + * be super careful about locking (only trylocking allowed).
>  + *
>  + * RETURNS:
>  + *
>  + * NULL if it didn't work out, otherwise an opaque cookie which 
>  is
>  + * passed to @panic_draw_xy. It can be anything: vmap area, 
>  structure
>  + * with more details, just a few flags, ...
>  + */
>  +void *(*panic_vmap)(struct drm_framebuffer *fb);
> >>>
> >>> FWIW, the panic_vmap hook cannot work in general with the amdgpu/radeon
> >>> drivers:
> >>>
> >>> Framebuffers are normally tiled, writing to them with the CPU results in
> >>> garbled output.
> >>>
> > 
> > In which case the driver needs to support the ->panic_draw_xy callback,
> > or maybe it's possible to make a generic helper for tiled buffers.
> 
> I'm afraid that won't help, at least not without porting big chunks of
> https://gitlab.freedesktop.org/mesa/mesa/tree/master/src/amd/addrlib
> into the kernel, none of which will be used for anything else.
> 
> 
> >>> There would need to be a mechanism for switching scanout to a linear,
> >>> CPU accessible framebuffer.
> >>
> >> I suppose panic_vmap() could just provide a linear temp buffer
> >> to the panic handler, and panic_unmap() could copy the contents
> >> over to the real fb.
> 
> Copy how? Using a GPU engine?

CPU maybe? Though I suppose that won't work if the buffer isn't CPU
accesible :/

-- 
Ville Syrjälä
Intel
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v2 1/3] drm: Add support for panic message output

2019-03-13 Thread Christian König

Am 13.03.19 um 14:31 schrieb Ville Syrjälä:

On Wed, Mar 13, 2019 at 10:35:08AM +0100, Michel Dänzer wrote:

On 2019-03-12 6:15 p.m., Noralf Trønnes wrote:


Den 12.03.2019 17.17, skrev Ville Syrjälä:

On Tue, Mar 12, 2019 at 11:47:04AM +0100, Michel Dänzer wrote:

On 2019-03-11 6:42 p.m., Noralf Trønnes wrote:

This adds support for outputting kernel messages on panic().
A kernel message dumper is used to dump the log. The dumper iterates
over each DRM device and it's crtc's to find suitable framebuffers.

All the other dumpers are run before this one except mtdoops.
Only atomic drivers are supported.

Signed-off-by: Noralf Trønnes 
---
  [...]

diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h
index f0b34c977ec5..f3274798ecfe 100644
--- a/include/drm/drm_framebuffer.h
+++ b/include/drm/drm_framebuffer.h
@@ -94,6 +94,44 @@ struct drm_framebuffer_funcs {
 struct drm_file *file_priv, unsigned flags,
 unsigned color, struct drm_clip_rect *clips,
 unsigned num_clips);
+
+   /**
+* @panic_vmap:
+*
+* Optional callback for panic handling.
+*
+* For vmapping the selected framebuffer in a panic context. Must
+* be super careful about locking (only trylocking allowed).
+*
+* RETURNS:
+*
+* NULL if it didn't work out, otherwise an opaque cookie which is
+* passed to @panic_draw_xy. It can be anything: vmap area, structure
+* with more details, just a few flags, ...
+*/
+   void *(*panic_vmap)(struct drm_framebuffer *fb);

FWIW, the panic_vmap hook cannot work in general with the amdgpu/radeon
drivers:

Framebuffers are normally tiled, writing to them with the CPU results in
garbled output.


In which case the driver needs to support the ->panic_draw_xy callback,
or maybe it's possible to make a generic helper for tiled buffers.

I'm afraid that won't help, at least not without porting big chunks of
https://gitlab.freedesktop.org/mesa/mesa/tree/master/src/amd/addrlib
into the kernel, none of which will be used for anything else.



There would need to be a mechanism for switching scanout to a linear,
CPU accessible framebuffer.

I suppose panic_vmap() could just provide a linear temp buffer
to the panic handler, and panic_unmap() could copy the contents
over to the real fb.

Copy how? Using a GPU engine?

CPU maybe? Though I suppose that won't work if the buffer isn't CPU
accesible :/


Well we do have a debug path for accessing invisible memory with the CPU.

E.g. three registers: DATA and auto increment OFFSET_LO/HI. So you can 
just read/write DATA over and over again if you want to access some memory.


But turning of tilling etc is still extremely tricky when the system is 
already unstable.


I mean from two miles high it looks like a nice to have feature, but up 
close is a different picture...


Christian.
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 05/17] drm/i915/selftests: Provide stub reset functions

2019-03-13 Thread Chris Wilson
If a test fails, we quite often mark the device as wedged. Provide the
stub functions so that we can wedge the mock device, and avoid exploding
on test failures.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109981
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/selftests/mock_engine.c | 36 
 1 file changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c 
b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 639d36eb904a..61744819172b 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -198,6 +198,37 @@ static void mock_submit_request(struct i915_request 
*request)
spin_unlock_irqrestore(&engine->hw_lock, flags);
 }
 
+static void mock_reset_prepare(struct intel_engine_cs *engine)
+{
+}
+
+static void mock_reset(struct intel_engine_cs *engine, bool stalled)
+{
+   GEM_BUG_ON(stalled);
+}
+
+static void mock_reset_finish(struct intel_engine_cs *engine)
+{
+}
+
+static void mock_cancel_requests(struct intel_engine_cs *engine)
+{
+   struct i915_request *request;
+   unsigned long flags;
+
+   spin_lock_irqsave(&engine->timeline.lock, flags);
+
+   /* Mark all submitted requests as skipped. */
+   list_for_each_entry(request, &engine->timeline.requests, sched.link) {
+   if (!i915_request_signaled(request))
+   dma_fence_set_error(&request->fence, -EIO);
+
+   i915_request_mark_complete(request);
+   }
+
+   spin_unlock_irqrestore(&engine->timeline.lock, flags);
+}
+
 struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
const char *name,
int id)
@@ -223,6 +254,11 @@ struct intel_engine_cs *mock_engine(struct 
drm_i915_private *i915,
engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb;
engine->base.submit_request = mock_submit_request;
 
+   engine->base.reset.prepare = mock_reset_prepare;
+   engine->base.reset.reset = mock_reset;
+   engine->base.reset.finish = mock_reset_finish;
+   engine->base.cancel_requests = mock_cancel_requests;
+
if (i915_timeline_init(i915,
   &engine->base.timeline,
   engine->base.name,
-- 
2.20.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 15/17] drm/i915: Extend execution fence to support a callback

2019-03-13 Thread Chris Wilson
In the next patch, we will want to configure the slave request
depending on which physical engine the master request is executed on.
For this, we introduce a callback from the execute fence to convey this
information.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_request.c | 84 +++--
 drivers/gpu/drm/i915/i915_request.h |  4 ++
 2 files changed, 83 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 2382339172b4..0a46f8113f5c 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -38,6 +38,8 @@ struct execute_cb {
struct list_head link;
struct irq_work work;
struct i915_sw_fence *fence;
+   void (*hook)(struct i915_request *rq, struct dma_fence *signal);
+   struct i915_request *signal;
 };
 
 static struct i915_global_request {
@@ -343,6 +345,17 @@ static void irq_execute_cb(struct irq_work *wrk)
kmem_cache_free(global.slab_execute_cbs, cb);
 }
 
+static void irq_execute_cb_hook(struct irq_work *wrk)
+{
+   struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
+
+   cb->hook(container_of(cb->fence, struct i915_request, submit),
+&cb->signal->fence);
+   i915_request_put(cb->signal);
+
+   irq_execute_cb(wrk);
+}
+
 static void __notify_execute_cb(struct i915_request *rq)
 {
struct execute_cb *cb;
@@ -369,14 +382,19 @@ static void __notify_execute_cb(struct i915_request *rq)
 }
 
 static int
-i915_request_await_execution(struct i915_request *rq,
-struct i915_request *signal,
-gfp_t gfp)
+__i915_request_await_execution(struct i915_request *rq,
+  struct i915_request *signal,
+  void (*hook)(struct i915_request *rq,
+   struct dma_fence *signal),
+  gfp_t gfp)
 {
struct execute_cb *cb;
 
-   if (i915_request_is_active(signal))
+   if (i915_request_is_active(signal)) {
+   if (hook)
+   hook(rq, &signal->fence);
return 0;
+   }
 
cb = kmem_cache_alloc(global.slab_execute_cbs, gfp);
if (!cb)
@@ -386,8 +404,18 @@ i915_request_await_execution(struct i915_request *rq,
i915_sw_fence_await(cb->fence);
init_irq_work(&cb->work, irq_execute_cb);
 
+   if (hook) {
+   cb->hook = hook;
+   cb->signal = i915_request_get(signal);
+   cb->work.func = irq_execute_cb_hook;
+   }
+
spin_lock_irq(&signal->lock);
if (i915_request_is_active(signal)) {
+   if (hook) {
+   hook(rq, &signal->fence);
+   i915_request_put(signal);
+   }
i915_sw_fence_complete(cb->fence);
kmem_cache_free(global.slab_execute_cbs, cb);
} else {
@@ -790,7 +818,7 @@ emit_semaphore_wait(struct i915_request *to,
return err;
 
/* Only submit our spinner after the signaler is running! */
-   err = i915_request_await_execution(to, from, gfp);
+   err = __i915_request_await_execution(to, from, NULL, gfp);
if (err)
return err;
 
@@ -910,6 +938,52 @@ i915_request_await_dma_fence(struct i915_request *rq, 
struct dma_fence *fence)
return 0;
 }
 
+int
+i915_request_await_execution(struct i915_request *rq,
+struct dma_fence *fence,
+void (*hook)(struct i915_request *rq,
+ struct dma_fence *signal))
+{
+   struct dma_fence **child = &fence;
+   unsigned int nchild = 1;
+   int ret;
+
+   if (dma_fence_is_array(fence)) {
+   struct dma_fence_array *array = to_dma_fence_array(fence);
+
+   /* XXX Error for signal-on-any fence arrays */
+
+   child = array->fences;
+   nchild = array->num_fences;
+   GEM_BUG_ON(!nchild);
+   }
+
+   do {
+   fence = *child++;
+   if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+   continue;
+
+   /*
+* We don't squash repeated fence dependencies here as we
+* want to run our callback in all cases.
+*/
+
+   if (dma_fence_is_i915(fence))
+   ret = __i915_request_await_execution(rq,
+to_request(fence),
+hook,
+I915_FENCE_GFP);
+   else
+   ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
+   I915_FENCE_TIMEOUT,
+ 

[Intel-gfx] [PATCH 09/17] drm/i915: Extend CONTEXT_CREATE to set parameters upon construction

2019-03-13 Thread Chris Wilson
It can be useful to have a single ioctl to create a context with all
the initial parameters instead of a series of create + setparam + setparam
ioctls. This extension to create context allows any of the parameters
to be passed in as a linked list to be applied to the newly constructed
context.

v2: Make a local copy of user setparam (Tvrtko)
v3: Use flags to detect availability of extension interface

Signed-off-by: Chris Wilson 
Reviewed-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_drv.c |   2 +-
 drivers/gpu/drm/i915/i915_gem_context.c | 439 +---
 include/uapi/drm/i915_drm.h | 180 +-
 3 files changed, 342 insertions(+), 279 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5d53efc4c5d9..93e41c937d96 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -3110,7 +3110,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, 
intel_sprite_set_colorkey_ioctl, DRM_MASTER),
DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER),
DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
-   DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, 
i915_gem_context_create_ioctl, DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE_EXT, 
i915_gem_context_create_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, 
i915_gem_context_destroy_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, 
i915_gem_context_reset_stats_ioctl, DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 71464ae91d61..07c097ad83ee 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -90,6 +90,7 @@
 #include "i915_drv.h"
 #include "i915_globals.h"
 #include "i915_trace.h"
+#include "i915_user_extensions.h"
 #include "intel_lrc_reg.h"
 #include "intel_workarounds.h"
 
@@ -1094,192 +1095,6 @@ static int set_ppgtt(struct i915_gem_context *ctx,
return err;
 }
 
-static bool client_is_banned(struct drm_i915_file_private *file_priv)
-{
-   return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
-}
-
-int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
- struct drm_file *file)
-{
-   struct drm_i915_private *i915 = to_i915(dev);
-   struct drm_i915_gem_context_create *args = data;
-   struct drm_i915_file_private *file_priv = file->driver_priv;
-   struct i915_gem_context *ctx;
-   int ret;
-
-   if (!DRIVER_CAPS(i915)->has_logical_contexts)
-   return -ENODEV;
-
-   if (args->pad != 0)
-   return -EINVAL;
-
-   ret = i915_terminally_wedged(i915);
-   if (ret)
-   return ret;
-
-   if (client_is_banned(file_priv)) {
-   DRM_DEBUG("client %s[%d] banned from creating ctx\n",
- current->comm,
- pid_nr(get_task_pid(current, PIDTYPE_PID)));
-
-   return -EIO;
-   }
-
-   ret = i915_mutex_lock_interruptible(dev);
-   if (ret)
-   return ret;
-
-   ctx = i915_gem_create_context(i915, file_priv);
-   mutex_unlock(&dev->struct_mutex);
-   if (IS_ERR(ctx))
-   return PTR_ERR(ctx);
-
-   GEM_BUG_ON(i915_gem_context_is_kernel(ctx));
-
-   args->ctx_id = ctx->user_handle;
-   DRM_DEBUG("HW context %d created\n", args->ctx_id);
-
-   return 0;
-}
-
-int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
-  struct drm_file *file)
-{
-   struct drm_i915_gem_context_destroy *args = data;
-   struct drm_i915_file_private *file_priv = file->driver_priv;
-   struct i915_gem_context *ctx;
-   int ret;
-
-   if (args->pad != 0)
-   return -EINVAL;
-
-   if (args->ctx_id == DEFAULT_CONTEXT_HANDLE)
-   return -ENOENT;
-
-   ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
-   if (!ctx)
-   return -ENOENT;
-
-   ret = mutex_lock_interruptible(&dev->struct_mutex);
-   if (ret)
-   goto out;
-
-   __destroy_hw_context(ctx, file_priv);
-   mutex_unlock(&dev->struct_mutex);
-
-out:
-   i915_gem_context_put(ctx);
-   return 0;
-}
-
-static int get_sseu(struct i915_gem_context *ctx,
-   struct drm_i915_gem_context_param *args)
-{
-   struct drm_i915_gem_context_param_sseu user_sseu;
-   struct intel_engine_cs *engine;
-   struct intel_context *ce;
-
-   if (args->size == 0)
-   goto out;
-   else if (args->size < sizeof(user_sseu))
-   return -EINVAL;
-
-   if (copy_from_user(&user_sseu, u64_to

[Intel-gfx] [PATCH 17/17] drm/i915: Allow specification of parallel execbuf

2019-03-13 Thread Chris Wilson
There is a desire to split a task onto two engines and have them run at
the same time, e.g. scanline interleaving to spread the workload evenly.
Through the use of the out-fence from the first execbuf, we can
coordinate secondary execbuf to only become ready simultaneously with
the first, so that with all things idle the second execbufs are executed
in parallel with the first. The key difference here between the new
EXEC_FENCE_SUBMIT and the existing EXEC_FENCE_IN is that the in-fence
waits for the completion of the first request (so that all of its
rendering results are visible to the second execbuf, the more common
userspace fence requirement).

Since we only have a single input fence slot, userspace cannot mix an
in-fence and a submit-fence. It has to use one or the other! This is not
such a harsh requirement, since by virtue of the submit-fence, the
secondary execbuf inherit all of the dependencies from the first
request, and for the application the dependencies should be common
between the primary and secondary execbuf.

Suggested-by: Tvrtko Ursulin 
Testcase: igt/gem_exec_fence/parallel
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_drv.c|  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 25 +-
 include/uapi/drm/i915_drm.h| 17 ++-
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 93e41c937d96..afdfced262e6 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -421,6 +421,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void 
*data,
case I915_PARAM_HAS_EXEC_CAPTURE:
case I915_PARAM_HAS_EXEC_BATCH_FIRST:
case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
+   case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
/* For the time being all of these are always true;
 * if some supported hardware does not have one of these
 * features this value needs to be provided from
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 70a26f0a9f1e..064c649f3f46 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2282,6 +2282,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 {
struct i915_execbuffer eb;
struct dma_fence *in_fence = NULL;
+   struct dma_fence *exec_fence = NULL;
struct sync_file *out_fence = NULL;
intel_wakeref_t wakeref;
int out_fence_fd = -1;
@@ -2325,11 +2326,24 @@ i915_gem_do_execbuffer(struct drm_device *dev,
return -EINVAL;
}
 
+   if (args->flags & I915_EXEC_FENCE_SUBMIT) {
+   if (in_fence) {
+   err = -EINVAL;
+   goto err_in_fence;
+   }
+
+   exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
+   if (!exec_fence) {
+   err = -EINVAL;
+   goto err_in_fence;
+   }
+   }
+
if (args->flags & I915_EXEC_FENCE_OUT) {
out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
if (out_fence_fd < 0) {
err = out_fence_fd;
-   goto err_in_fence;
+   goto err_exec_fence;
}
}
 
@@ -2461,6 +2475,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_request;
}
 
+   if (exec_fence) {
+   err = i915_request_await_execution(eb.request, exec_fence,
+  eb.engine->bond_execute);
+   if (err < 0)
+   goto err_request;
+   }
+
if (fences) {
err = await_fence_array(&eb, fences);
if (err)
@@ -2521,6 +2542,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 err_out_fence:
if (out_fence_fd != -1)
put_unused_fd(out_fence_fd);
+err_exec_fence:
+   dma_fence_put(exec_fence);
 err_in_fence:
dma_fence_put(in_fence);
return err;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index ed33b8af8692..dbab4d365a6d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -593,6 +593,12 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_MMAP_GTT_COHERENT   52
 
+/*
+ * Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel
+ * execution through use of explicit fence support.
+ * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
+ */
+#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1115,7 +1121,16 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_FENCE_ARRAY   (1<<19)
 
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I

[Intel-gfx] [PATCH 12/17] drm/i915: Allow a context to define its set of engines

2019-03-13 Thread Chris Wilson
Over the last few years, we have debated how to extend the user API to
support an increase in the number of engines, that may be sparse and
even be heterogeneous within a class (not all video decoders created
equal). We settled on using (class, instance) tuples to identify a
specific engine, with an API for the user to construct a map of engines
to capabilities. Into this picture, we then add a challenge of virtual
engines; one user engine that maps behind the scenes to any number of
physical engines. To keep it general, we want the user to have full
control over that mapping. To that end, we allow the user to constrain a
context to define the set of engines that it can access, order fully
controlled by the user via (class, instance). With such precise control
in context setup, we can continue to use the existing execbuf uABI of
specifying a single index; only now it doesn't automagically map onto
the engines, it uses the user defined engine map from the context.

The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
execbuf. It's use will be revealed in the next patch.

v2: Fixup freeing of local on success of get_engines()
v3: Allow empty engines[]

Testcase: igt/gem_ctx_engines
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_gem_context.c   | 223 +-
 drivers/gpu/drm/i915/i915_gem_context_types.h |   4 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c|  19 +-
 drivers/gpu/drm/i915/i915_utils.h |  23 ++
 include/uapi/drm/i915_drm.h   |  42 +++-
 5 files changed, 298 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index bac548584091..07377b75b563 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -86,7 +86,9 @@
  */
 
 #include 
+
 #include 
+
 #include "i915_drv.h"
 #include "i915_globals.h"
 #include "i915_trace.h"
@@ -101,6 +103,21 @@ static struct i915_global_gem_context {
struct kmem_cache *slab_luts;
 } global;
 
+static struct intel_engine_cs *
+lookup_user_engine(struct i915_gem_context *ctx,
+  unsigned long flags, u16 class, u16 instance)
+#define LOOKUP_USER_INDEX BIT(0)
+{
+   if (flags & LOOKUP_USER_INDEX) {
+   if (instance >= ctx->nengine)
+   return NULL;
+
+   return ctx->engines[instance];
+   }
+
+   return intel_engine_lookup_user(ctx->i915, class, instance);
+}
+
 struct i915_lut_handle *i915_lut_handle_alloc(void)
 {
return kmem_cache_alloc(global.slab_luts, GFP_KERNEL);
@@ -235,6 +252,8 @@ static void i915_gem_context_free(struct i915_gem_context 
*ctx)
release_hw_id(ctx);
i915_ppgtt_put(ctx->ppgtt);
 
+   kfree(ctx->engines);
+
rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
intel_context_put(it);
 
@@ -1371,9 +1390,9 @@ static int set_sseu(struct i915_gem_context *ctx,
if (user_sseu.flags || user_sseu.rsvd)
return -EINVAL;
 
-   engine = intel_engine_lookup_user(i915,
- user_sseu.engine_class,
- user_sseu.engine_instance);
+   engine = lookup_user_engine(ctx, 0,
+   user_sseu.engine_class,
+   user_sseu.engine_instance);
if (!engine)
return -EINVAL;
 
@@ -1391,9 +1410,163 @@ static int set_sseu(struct i915_gem_context *ctx,
 
args->size = sizeof(user_sseu);
 
+   return 0;
+};
+
+struct set_engines {
+   struct i915_gem_context *ctx;
+   struct intel_engine_cs **engines;
+   unsigned int nengine;
+};
+
+static const i915_user_extension_fn set_engines__extensions[] = {
+};
+
+static int
+set_engines(struct i915_gem_context *ctx,
+   const struct drm_i915_gem_context_param *args)
+{
+   struct i915_context_param_engines __user *user;
+   struct set_engines set = { .ctx = ctx };
+   u64 size, extensions;
+   unsigned int n;
+   int err;
+
+   user = u64_to_user_ptr(args->value);
+   size = args->size;
+   if (!size)
+   goto out;
+
+   BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->class_instance)));
+   if (size < sizeof(*user) ||
+   !IS_ALIGNED(size, sizeof(*user->class_instance)))
+   return -EINVAL;
+
+   set.nengine = (size - sizeof(*user)) / sizeof(*user->class_instance);
+   if (set.nengine > I915_EXEC_RING_MASK + 1)
+   return -EINVAL;
+
+   set.engines = kmalloc_array(set.nengine,
+   sizeof(*set.engines),
+   GFP_KERNEL);
+   if (!set.engines)
+   return -ENOMEM;
+
+   for (n = 0; n < set.nengine; n++) {
+   u16 class, inst;
+
+   if (get_user(class, &user->class_instance

[Intel-gfx] [PATCH 07/17] drm/i915: Introduce the i915_user_extension_method

2019-03-13 Thread Chris Wilson
An idea for extending uABI inspired by Vulkan's extension chains.
Instead of expanding the data struct for each ioctl every time we need
to add a new feature, define an extension chain instead. As we add
optional interfaces to control the ioctl, we define a new extension
struct that can be linked into the ioctl data only when required by the
user. The key advantage being able to ignore large control structs for
optional interfaces/extensions, while being able to process them in a
consistent manner.

In comparison to other extensible ioctls, the key difference is the
use of a linked chain of extension structs vs an array of tagged
pointers. For example,

struct drm_amdgpu_cs_chunk {
__u32   chunk_id;
__u32   length_dw;
__u64   chunk_data;
};

struct drm_amdgpu_cs_in {
__u32   ctx_id;
__u32   bo_list_handle;
__u32   num_chunks;
__u32   _pad;
__u64   chunks;
};

allows userspace to pass in array of pointers to extension structs, but
must therefore keep constructing that array along side the command stream.
In dynamic situations like that, a linked list is preferred and does not
similar from extra cache line misses as the extension structs themselves
must still be loaded separate to the chunks array.

v2: Apply the tail call optimisation directly to nip the worry of stack
overflow in the bud.
v3: Defend against recursion.

Opens:
- do we include the result as an out-field in each chain?
struct i915_user_extension {
__u64 next_extension;
__u64 name;
__s32 result;
__u32 mbz; /* reserved for future use */
};
* Undecided, so provision some room for future expansion.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
Cc: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/Makefile   |  1 +
 drivers/gpu/drm/i915/i915_user_extensions.c | 59 +
 drivers/gpu/drm/i915/i915_user_extensions.h | 20 +++
 drivers/gpu/drm/i915/i915_utils.h   | 12 +
 include/uapi/drm/i915_drm.h | 22 
 5 files changed, 114 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_user_extensions.c
 create mode 100644 drivers/gpu/drm/i915/i915_user_extensions.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 68fecf355471..60de05f3fa60 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -46,6 +46,7 @@ i915-y := i915_drv.o \
  i915_sw_fence.o \
  i915_syncmap.o \
  i915_sysfs.o \
+ i915_user_extensions.o \
  intel_csr.o \
  intel_device_info.o \
  intel_pm.o \
diff --git a/drivers/gpu/drm/i915/i915_user_extensions.c 
b/drivers/gpu/drm/i915/i915_user_extensions.c
new file mode 100644
index ..d28c95221db4
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_user_extensions.c
@@ -0,0 +1,59 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+
+#include 
+
+#include "i915_user_extensions.h"
+#include "i915_utils.h"
+
+int i915_user_extensions(struct i915_user_extension __user *ext,
+const i915_user_extension_fn *tbl,
+unsigned long count,
+void *data)
+{
+   unsigned int stackdepth = 512;
+
+   while (ext) {
+   int i, err;
+   u64 x;
+
+   if (!stackdepth--) /* recursion vs useful flexibility */
+   return -E2BIG;
+
+   err = check_user_mbz(&ext->flags);
+   if (err)
+   return err;
+
+   for (i = 0; i < ARRAY_SIZE(ext->rsvd); i++) {
+   err = check_user_mbz(&ext->rsvd[i]);
+   if (err)
+   return err;
+   }
+
+   if (get_user(x, &ext->name))
+   return -EFAULT;
+
+   err = -EINVAL;
+   if (x < count) {
+   x = array_index_nospec(x, count);
+   if (tbl[x])
+   err = tbl[x](ext, data);
+   }
+   if (err)
+   return err;
+
+   if (get_user(x, &ext->next_extension))
+   return -EFAULT;
+
+   ext = u64_to_user_ptr(x);
+   }
+
+   return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_user_extensions.h 
b/drivers/gpu/drm/i915/i915_user_extensions.h
new file mode 100644
index ..313a510b068a
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_user_extensions.h
@@ -0,0 +1,20 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#ifndef I915_USER_EXTENSIONS_H
+#define I915_USER_EXTENSIONS_H
+
+struct i915_user_extension;
+
+typedef int (*i915_user_extension_fn)(struct i915_user_extension __user *ext,
+ 

[Intel-gfx] [PATCH 08/17] drm/i915: Create/destroy VM (ppGTT) for use with contexts

2019-03-13 Thread Chris Wilson
In preparation to making the ppGTT binding for a context explicit (to
facilitate reusing the same ppGTT between different contexts), allow the
user to create and destroy named ppGTT.

v2: Replace global barrier for swapping over the ppgtt and tlbs with a
local context barrier (Tvrtko)
v3: serialise with struct_mutex; it's lazy but required dammit
v4: Rewrite igt_ctx_shared_exec to be more different (aimed to be more
similarly, turned out different!)

v2: Fix up test unwind for aliasing-ppgtt (snb)
v3: Tighten language for uapi struct drm_i915_gem_vm_control.
v4: Patch the context image for runtime ppgtt switching!

Testcase: igt/gem_ctx_param/vm
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_drv.c   |   2 +
 drivers/gpu/drm/i915/i915_drv.h   |   3 +
 drivers/gpu/drm/i915/i915_gem_context.c   | 322 +-
 drivers/gpu/drm/i915/i915_gem_context.h   |   5 +
 drivers/gpu/drm/i915/i915_gem_gtt.c   |  30 +-
 drivers/gpu/drm/i915/i915_gem_gtt.h   |  17 +-
 drivers/gpu/drm/i915/selftests/huge_pages.c   |   1 -
 .../gpu/drm/i915/selftests/i915_gem_context.c | 237 ++---
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |   1 -
 drivers/gpu/drm/i915/selftests/mock_context.c |   8 +-
 include/uapi/drm/i915_drm.h   |  43 +++
 11 files changed, 594 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 0d743907e7bc..5d53efc4c5d9 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -3121,6 +3121,8 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, 
DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, 
i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, 
DRM_UNLOCKED|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(I915_GEM_VM_CREATE, i915_gem_vm_create_ioctl, 
DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(I915_GEM_VM_DESTROY, i915_gem_vm_destroy_ioctl, 
DRM_RENDER_ALLOW),
 };
 
 static struct drm_driver driver = {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index dc63303225fc..4675355916ff 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -218,6 +218,9 @@ struct drm_i915_file_private {
} mm;
struct idr context_idr;
 
+   struct mutex vm_lock;
+   struct idr vm_idr;
+
unsigned int bsd_engine;
 
 /*
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 4af51b689cbd..71464ae91d61 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -120,12 +120,15 @@ static void lut_close(struct i915_gem_context *ctx)
list_del(&lut->obj_link);
i915_lut_handle_free(lut);
}
+   INIT_LIST_HEAD(&ctx->handles_list);
 
rcu_read_lock();
radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
struct i915_vma *vma = rcu_dereference_raw(*slot);
 
radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
+
+   vma->open_count--;
__i915_gem_object_release_unless_active(vma->obj);
}
rcu_read_unlock();
@@ -306,7 +309,7 @@ static void context_close(struct i915_gem_context *ctx)
 */
lut_close(ctx);
if (ctx->ppgtt)
-   i915_ppgtt_close(&ctx->ppgtt->vm);
+   i915_ppgtt_close(ctx->ppgtt);
 
ctx->file_priv = ERR_PTR(-EBADF);
i915_gem_context_put(ctx);
@@ -417,6 +420,32 @@ static void __destroy_hw_context(struct i915_gem_context 
*ctx,
context_close(ctx);
 }
 
+static struct i915_hw_ppgtt *
+__set_ppgtt(struct i915_gem_context *ctx, struct i915_hw_ppgtt *ppgtt)
+{
+   struct i915_hw_ppgtt *old = ctx->ppgtt;
+
+   i915_ppgtt_open(ppgtt);
+   ctx->ppgtt = i915_ppgtt_get(ppgtt);
+
+   ctx->desc_template = default_desc_template(ctx->i915, ppgtt);
+
+   return old;
+}
+
+static void __assign_ppgtt(struct i915_gem_context *ctx,
+  struct i915_hw_ppgtt *ppgtt)
+{
+   if (ppgtt == ctx->ppgtt)
+   return;
+
+   ppgtt = __set_ppgtt(ctx, ppgtt);
+   if (ppgtt) {
+   i915_ppgtt_close(ppgtt);
+   i915_ppgtt_put(ppgtt);
+   }
+}
+
 static struct i915_gem_context *
 i915_gem_create_context(struct drm_i915_private *dev_priv,
struct drm_i915_file_private *file_priv)
@@ -443,8 +472,8 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
return ERR_CAST(ppgtt);
}
 
-   ctx->ppgtt = ppgtt;
-   ctx->desc_template = default_desc_template(dev_priv, ppgtt);
+   __assign_ppgtt(ctx, ppgtt);
+   i

[Intel-gfx] [PATCH 06/17] drm/i915: Switch to use HWS indices rather than addresses

2019-03-13 Thread Chris Wilson
If we use the STORE_DATA_INDEX function we can use a fixed offset and
avoid having to lookup up the engine HWS address. A step closer to being
able to emit the final breadcrumb during request_add rather than later
in the submission interrupt handler.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_guc_submission.c |  3 ++-
 drivers/gpu/drm/i915/intel_lrc.c| 17 +++--
 drivers/gpu/drm/i915/intel_ringbuffer.c | 16 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.h |  4 ++--
 4 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c 
b/drivers/gpu/drm/i915/intel_guc_submission.c
index 4a5727233419..c4ad73980988 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -583,7 +583,8 @@ static void inject_preempt_context(struct work_struct *work)
} else {
cs = gen8_emit_ggtt_write(cs,
  GUC_PREEMPT_FINISHED,
- addr);
+ addr,
+ 0);
*cs++ = MI_NOOP;
*cs++ = MI_NOOP;
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 44e75bc520c1..5669823f6901 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -173,12 +173,6 @@ static void execlists_init_reg_state(u32 *reg_state,
 struct intel_engine_cs *engine,
 struct intel_ring *ring);
 
-static inline u32 intel_hws_hangcheck_address(struct intel_engine_cs *engine)
-{
-   return (i915_ggtt_offset(engine->status_page.vma) +
-   I915_GEM_HWS_HANGCHECK_ADDR);
-}
-
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
return rb_entry(rb, struct i915_priolist, node);
@@ -2212,11 +2206,14 @@ static u32 *gen8_emit_fini_breadcrumb(struct 
i915_request *request, u32 *cs)
 {
cs = gen8_emit_ggtt_write(cs,
  request->fence.seqno,
- request->timeline->hwsp_offset);
+ request->timeline->hwsp_offset,
+ 0);
 
cs = gen8_emit_ggtt_write(cs,
  
intel_engine_next_hangcheck_seqno(request->engine),
- intel_hws_hangcheck_address(request->engine));
+ I915_GEM_HWS_HANGCHECK_ADDR,
+ MI_FLUSH_DW_STORE_INDEX);
+
 
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
@@ -2240,8 +2237,8 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct 
i915_request *request, u32 *cs)
 
cs = gen8_emit_ggtt_write_rcs(cs,
  
intel_engine_next_hangcheck_seqno(request->engine),
- 
intel_hws_hangcheck_address(request->engine),
- 0);
+ I915_GEM_HWS_HANGCHECK_ADDR,
+ PIPE_CONTROL_STORE_DATA_INDEX);
 
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index a021c9545649..9e7ad17b5250 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -43,12 +43,6 @@
  */
 #define LEGACY_REQUEST_SIZE 200
 
-static inline u32 hws_hangcheck_address(struct intel_engine_cs *engine)
-{
-   return (i915_ggtt_offset(engine->status_page.vma) +
-   I915_GEM_HWS_HANGCHECK_ADDR);
-}
-
 unsigned int intel_ring_update_space(struct intel_ring *ring)
 {
unsigned int space;
@@ -317,8 +311,8 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request 
*rq, u32 *cs)
*cs++ = rq->fence.seqno;
 
*cs++ = GFX_OP_PIPE_CONTROL(4);
-   *cs++ = PIPE_CONTROL_QW_WRITE;
-   *cs++ = hws_hangcheck_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
+   *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_STORE_DATA_INDEX;
+   *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | PIPE_CONTROL_GLOBAL_GTT;
*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
 
*cs++ = MI_USER_INTERRUPT;
@@ -423,8 +417,10 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request 
*rq, u32 *cs)
*cs++ = rq->fence.seqno;
 
*cs++ = GFX_OP_PIPE_CONTROL(4);
-   *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
-   *cs++ = hws_hangcheck_address(rq->engine);
+   *cs++ = (PIPE_CONTROL_QW_WRITE |
+PIPE_CONTROL_STORE_DATA_INDEX |
+PIPE_CONTROL_GLOBAL_GTT_IVB);
+   *cs++ = I915_GEM_HWS_HANGCHECK_ADDR;

[Intel-gfx] [PATCH 14/17] drm/i915: Load balancing across a virtual engine

2019-03-13 Thread Chris Wilson
Having allowed the user to define a set of engines that they will want
to only use, we go one step further and allow them to bind those engines
into a single virtual instance. Submitting a batch to the virtual engine
will then forward it to any one of the set in a manner as best to
distribute load.  The virtual engine has a single timeline across all
engines (it operates as a single queue), so it is not able to concurrently
run batches across multiple engines by itself; that is left up to the user
to submit multiple concurrent batches to multiple queues. Multiple users
will be load balanced across the system.

The mechanism used for load balancing in this patch is a late greedy
balancer. When a request is ready for execution, it is added to each
engine's queue, and when an engine is ready for its next request it
claims it from the virtual engine. The first engine to do so, wins, i.e.
the request is executed at the earliest opportunity (idle moment) in the
system.

As not all HW is created equal, the user is still able to skip the
virtual engine and execute the batch on a specific engine, all within the
same queue. It will then be executed in order on the correct engine,
with execution on other virtual engines being moved away due to the load
detection.

A couple of areas for potential improvement left!

- The virtual engine always take priority over equal-priority tasks.
Mostly broken up by applying FQ_CODEL rules for prioritising new clients,
and hopefully the virtual and real engines are not then congested (i.e.
all work is via virtual engines, or all work is to the real engine).

- We require the breadcrumb irq around every virtual engine request. For
normal engines, we eliminate the need for the slow round trip via
interrupt by using the submit fence and queueing in order. For virtual
engines, we have to allow any job to transfer to a new ring, and cannot
coalesce the submissions, so require the completion fence instead,
forcing the persistent use of interrupts.

- We only drip feed single requests through each virtual engine and onto
the physical engines, even if there was enough work to fill all ELSP,
leaving small stalls with an idle CS event at the end of every request.
Could we be greedy and fill both slots? Being lazy is virtuous for load
distribution on less-than-full workloads though.

Other areas of improvement are more general, such as reducing lock
contention, reducing dispatch overhead, looking at direct submission
rather than bouncing around tasklets etc.

sseu: Lift the restriction to allow sseu to be reconfigured on virtual
engines composed of RENDER_CLASS (rcs).

v2: macroize check_user_mbz()
v3: Cancel virtual engines on wedging
v4: Commence commenting

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_gem.h|   5 +
 drivers/gpu/drm/i915/i915_gem_context.c| 128 -
 drivers/gpu/drm/i915/i915_scheduler.c  |  18 +-
 drivers/gpu/drm/i915/i915_timeline_types.h |   1 +
 drivers/gpu/drm/i915/intel_engine_types.h  |   8 +
 drivers/gpu/drm/i915/intel_lrc.c   | 570 -
 drivers/gpu/drm/i915/intel_lrc.h   |  11 +
 drivers/gpu/drm/i915/selftests/intel_lrc.c | 165 ++
 include/uapi/drm/i915_drm.h|  30 ++
 9 files changed, 917 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 74a2ddc1b52f..dbcea6e29d48 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -91,4 +91,9 @@ static inline bool __tasklet_is_enabled(const struct 
tasklet_struct *t)
return !atomic_read(&t->count);
 }
 
+static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
+{
+   return test_bit(TASKLET_STATE_SCHED, &t->state);
+}
+
 #endif /* __I915_GEM_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 7ae28622b709..98763d3f1b12 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -86,6 +86,7 @@
  */
 
 #include 
+#include 
 
 #include 
 
@@ -94,6 +95,7 @@
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
 #include "intel_lrc_reg.h"
+#include "intel_lrc.h"
 #include "intel_workarounds.h"
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
@@ -241,6 +243,20 @@ static void release_hw_id(struct i915_gem_context *ctx)
mutex_unlock(&i915->contexts.mutex);
 }
 
+static void free_engines(struct intel_engine_cs **engines, int count)
+{
+   int i;
+
+   if (ZERO_OR_NULL_PTR(engines))
+   return;
+
+   /* We own the veng we created; regular engines are ignored */
+   for (i = 0; i < count; i++)
+   intel_virtual_engine_destroy(engines[i]);
+
+   kfree(engines);
+}
+
 static void i915_gem_context_free(struct i915_gem_context *ctx)
 {
struct intel_context *it, *n;
@@ -251,8 +267,7 @@ static void i915_gem_context_free(struct i915_gem_context 
*

[Intel-gfx] [PATCH 01/17] drm/i915: Hold a ref to the ring while retiring

2019-03-13 Thread Chris Wilson
As the final request on a ring may hold the reference to this ring (via
retiring the last pinned context), we may find ourselves chasing a
dangling pointer on completion of the list.

A quick solution is to hold a reference to the ring itself as we retire
along it so that we only free it after we stop dereferencing it.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_request.c  |  6 +-
 drivers/gpu/drm/i915/intel_engine_types.h|  2 ++
 drivers/gpu/drm/i915/intel_lrc.c |  4 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c  |  9 +
 drivers/gpu/drm/i915/intel_ringbuffer.h  | 13 -
 drivers/gpu/drm/i915/selftests/mock_engine.c |  1 +
 6 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 9533a85cb0b3..0a3d94517d0a 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1332,8 +1332,12 @@ void i915_retire_requests(struct drm_i915_private *i915)
if (!i915->gt.active_requests)
return;
 
-   list_for_each_entry_safe(ring, tmp, &i915->gt.active_rings, active_link)
+   list_for_each_entry_safe(ring, tmp,
+&i915->gt.active_rings, active_link) {
+   intel_ring_get(ring); /* last rq holds reference! */
ring_retire_requests(ring);
+   intel_ring_put(ring);
+   }
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h 
b/drivers/gpu/drm/i915/intel_engine_types.h
index b0aa1f0d4e47..88ed7ba8886f 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/intel_engine_types.h
@@ -9,6 +9,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -51,6 +52,7 @@ struct intel_engine_hangcheck {
 };
 
 struct intel_ring {
+   struct kref ref;
struct i915_vma *vma;
void *vaddr;
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index dc3de09c7586..00fa4a3bc9a3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1236,7 +1236,7 @@ static void execlists_submit_request(struct i915_request 
*request)
 
 static void __execlists_context_fini(struct intel_context *ce)
 {
-   intel_ring_free(ce->ring);
+   intel_ring_put(ce->ring);
 
GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
i915_gem_object_put(ce->state->obj);
@@ -2869,7 +2869,7 @@ static int execlists_context_deferred_alloc(struct 
intel_context *ce,
return 0;
 
 error_ring_free:
-   intel_ring_free(ring);
+   intel_ring_put(ring);
 error_deref_obj:
i915_gem_object_put(ctx_obj);
return ret;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index f26f5cc1584c..ebac752e092d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1311,6 +1311,7 @@ intel_engine_create_ring(struct intel_engine_cs *engine,
if (!ring)
return ERR_PTR(-ENOMEM);
 
+   kref_init(&ring->ref);
INIT_LIST_HEAD(&ring->request_list);
ring->timeline = i915_timeline_get(timeline);
 
@@ -1335,9 +1336,9 @@ intel_engine_create_ring(struct intel_engine_cs *engine,
return ring;
 }
 
-void
-intel_ring_free(struct intel_ring *ring)
+void intel_ring_free(struct kref *ref)
 {
+   struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
struct drm_i915_gem_object *obj = ring->vma->obj;
 
i915_vma_close(ring->vma);
@@ -1591,7 +1592,7 @@ static int intel_init_ring_buffer(struct intel_engine_cs 
*engine)
 err_unpin:
intel_ring_unpin(ring);
 err_ring:
-   intel_ring_free(ring);
+   intel_ring_put(ring);
 err:
intel_engine_cleanup_common(engine);
return err;
@@ -1605,7 +1606,7 @@ void intel_engine_cleanup(struct intel_engine_cs *engine)
(I915_READ_MODE(engine) & MODE_IDLE) == 0);
 
intel_ring_unpin(engine->buffer);
-   intel_ring_free(engine->buffer);
+   intel_ring_put(engine->buffer);
 
if (engine->cleanup)
engine->cleanup(engine);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index e612bdca9fd9..a57489fcb302 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -231,7 +231,18 @@ int intel_ring_pin(struct intel_ring *ring);
 void intel_ring_reset(struct intel_ring *ring, u32 tail);
 unsigned int intel_ring_update_space(struct intel_ring *ring);
 void intel_ring_unpin(struct intel_ring *ring);
-void intel_ring_free(struct intel_ring *ring);
+void intel_ring_free(struct kref *ref);
+
+static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
+{
+   kref_get(&ring->ref);
+   return ring;
+}
+
+static inline void intel_ring_put(struct in

[Intel-gfx] [PATCH 16/17] drm/i915/execlists: Virtual engine bonding

2019-03-13 Thread Chris Wilson
Some users require that when a master batch is executed on one particular
engine, a companion batch is run simultaneously on a specific slave
engine. For this purpose, we introduce virtual engine bonding, allowing
maps of master:slaves to be constructed to constrain which physical
engines a virtual engine may select given a fence on a master engine.

For the moment, we continue to ignore the issue of preemption deferring
the master request for later. Ideally, we would like to then also remove
the slave and run something else rather than have it stall the pipeline.
With load balancing, we should be able to move workload around it, but
there is a similar stall on the master pipeline while it may wait for
the slave to be executed. At the cost of more latency for the bonded
request, it may be interesting to launch both on their engines in
lockstep. (Bubbles abound.)

Opens: Also what about bonding an engine as its own master? It doesn't
break anything internally, so allow the silliness.

v2: Emancipate the bonds
v3: Couple in delayed scheduling for the selftests
v4: Handle invalid mutually exclusive bonding
v5: Mention what the uapi does

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_context.c   |  50 +
 drivers/gpu/drm/i915/i915_request.c   |   1 +
 drivers/gpu/drm/i915/i915_request.h   |   1 +
 drivers/gpu/drm/i915/intel_engine_types.h |   7 +
 drivers/gpu/drm/i915/intel_lrc.c  | 143 ++
 drivers/gpu/drm/i915/intel_lrc.h  |   4 +
 drivers/gpu/drm/i915/selftests/intel_lrc.c| 185 ++
 drivers/gpu/drm/i915/selftests/lib_sw_fence.c |   3 +
 include/uapi/drm/i915_drm.h   |  33 
 9 files changed, 427 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 98763d3f1b12..0ec78c386473 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1513,8 +1513,58 @@ set_engines__load_balance(struct i915_user_extension 
__user *base, void *data)
return 0;
 }
 
+static int
+set_engines__bond(struct i915_user_extension __user *base, void *data)
+{
+   struct i915_context_engines_bond __user *ext =
+   container_of_user(base, typeof(*ext), base);
+   const struct set_engines *set = data;
+   struct intel_engine_cs *master;
+   u32 class, instance, siblings;
+   u16 idx;
+   int err;
+
+   if (get_user(idx, &ext->virtual_index))
+   return -EFAULT;
+
+   if (idx >= set->nengine)
+   return -EINVAL;
+
+   idx = array_index_nospec(idx, set->nengine);
+   if (!set->engines[idx])
+   return -EINVAL;
+
+   /*
+* A non-virtual engine has 0 siblings to choose between; and submit
+* fence will always be directed to the one engine.
+*/
+   if (!intel_engine_is_virtual(set->engines[idx]))
+   return 0;
+
+   err = check_user_mbz(&ext->mbz);
+   if (err)
+   return err;
+
+   if (get_user(class, &ext->master_class))
+   return -EFAULT;
+
+   if (get_user(instance, &ext->master_instance))
+   return -EFAULT;
+
+   master = intel_engine_lookup_user(set->ctx->i915, class, instance);
+   if (!master)
+   return -EINVAL;
+
+   if (get_user(siblings, &ext->sibling_mask))
+   return -EFAULT;
+
+   return intel_virtual_engine_attach_bond(set->engines[idx],
+   master, siblings);
+}
+
 static const i915_user_extension_fn set_engines__extensions[] = {
[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
+   [I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond,
 };
 
 static int
diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 0a46f8113f5c..9ce710baa452 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -743,6 +743,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct 
i915_gem_context *ctx)
rq->batch = NULL;
rq->capture_list = NULL;
rq->waitboost = false;
+   rq->execution_mask = ~0u;
 
/*
 * Reserve space in the ring buffer for all the commands required to
diff --git a/drivers/gpu/drm/i915/i915_request.h 
b/drivers/gpu/drm/i915/i915_request.h
index d4f6b2940130..862b25930de0 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -145,6 +145,7 @@ struct i915_request {
 */
struct i915_sched_node sched;
struct i915_dependency dep;
+   unsigned int execution_mask;
 
/*
 * A convenience pointer to the current breadcrumb value stored in
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h 
b/drivers/gpu/drm/i915/intel_engine_types.h
index 322fbda65190..1da35509d811 100644
--- a/drivers/gpu/drm/i915/intel_engine_t

[Intel-gfx] [PATCH 04/17] drm/i915: Stop needlessly acquiring wakeref for debugfs/drop_caches_set

2019-03-13 Thread Chris Wilson
We only need to acquire a wakeref for ourselves for a few operations, as
most either already acquire their own wakeref or imply a wakeref. In
particular, it is i915_gem_set_wedged() that needed us to present it
with a wakeref, which is incongruous with its "use anywhere" ability.

Suggested-by: Yokoyama, Caz 
Signed-off-by: Chris Wilson 
Cc: Yokoyama, Caz 
Cc: Mika Kuoppala 
---
 drivers/gpu/drm/i915/i915_debugfs.c | 12 
 drivers/gpu/drm/i915/i915_reset.c   |  4 +++-
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 6a90558de213..08683dca7775 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3888,12 +3888,9 @@ static int
 i915_drop_caches_set(void *data, u64 val)
 {
struct drm_i915_private *i915 = data;
-   intel_wakeref_t wakeref;
-   int ret = 0;
 
DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n",
  val, val & DROP_ALL);
-   wakeref = intel_runtime_pm_get(i915);
 
if (val & DROP_RESET_ACTIVE &&
wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT))
@@ -3902,9 +3899,11 @@ i915_drop_caches_set(void *data, u64 val)
/* No need to check and wait for gpu resets, only libdrm auto-restarts
 * on ioctls on -EAGAIN. */
if (val & (DROP_ACTIVE | DROP_RETIRE | DROP_RESET_SEQNO)) {
+   int ret;
+
ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
if (ret)
-   goto out;
+   return ret;
 
if (val & DROP_ACTIVE)
ret = i915_gem_wait_for_idle(i915,
@@ -3943,10 +3942,7 @@ i915_drop_caches_set(void *data, u64 val)
if (val & DROP_FREED)
i915_gem_drain_freed_objects(i915);
 
-out:
-   intel_runtime_pm_put(i915, wakeref);
-
-   return ret;
+   return 0;
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(i915_drop_caches_fops,
diff --git a/drivers/gpu/drm/i915/i915_reset.c 
b/drivers/gpu/drm/i915/i915_reset.c
index 3c08e08837d0..955c22b8dfc7 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -861,9 +861,11 @@ static void __i915_gem_set_wedged(struct drm_i915_private 
*i915)
 void i915_gem_set_wedged(struct drm_i915_private *i915)
 {
struct i915_gpu_error *error = &i915->gpu_error;
+   intel_wakeref_t wakeref;
 
mutex_lock(&error->wedge_mutex);
-   __i915_gem_set_wedged(i915);
+   with_intel_runtime_pm(i915, wakeref)
+   __i915_gem_set_wedged(i915);
mutex_unlock(&error->wedge_mutex);
 }
 
-- 
2.20.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 02/17] drm/i915: Lock the gem_context->active_list while dropping the link

2019-03-13 Thread Chris Wilson
On unpinning the intel_context, we remove it from the active list
inside the GEM context. This list is supposed to be guarded by the GEM
context mutex, so remember to take it!

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_context.c | 15 +++
 drivers/gpu/drm/i915/intel_lrc.c |  3 ---
 drivers/gpu/drm/i915/intel_ringbuffer.c  |  3 ---
 drivers/gpu/drm/i915/selftests/mock_engine.c |  2 --
 4 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_context.c 
b/drivers/gpu/drm/i915/intel_context.c
index 5a16c9bb2778..0ab894a058f6 100644
--- a/drivers/gpu/drm/i915/intel_context.c
+++ b/drivers/gpu/drm/i915/intel_context.c
@@ -165,13 +165,13 @@ intel_context_pin(struct i915_gem_context *ctx,
if (err)
goto err;
 
+   i915_gem_context_get(ctx);
+   GEM_BUG_ON(ce->gem_context != ctx);
+
mutex_lock(&ctx->mutex);
list_add(&ce->active_link, &ctx->active_engines);
mutex_unlock(&ctx->mutex);
 
-   i915_gem_context_get(ctx);
-   GEM_BUG_ON(ce->gem_context != ctx);
-
smp_mb__before_atomic(); /* flush pin before it is visible */
}
 
@@ -194,9 +194,16 @@ void intel_context_unpin(struct intel_context *ce)
/* We may be called from inside intel_context_pin() to evict another */
mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING);
 
-   if (likely(atomic_dec_and_test(&ce->pin_count)))
+   if (likely(atomic_dec_and_test(&ce->pin_count))) {
ce->ops->unpin(ce);
 
+   mutex_lock(&ce->gem_context->mutex);
+   list_del(&ce->active_link);
+   mutex_unlock(&ce->gem_context->mutex);
+
+   i915_gem_context_put(ce->gem_context);
+   }
+
mutex_unlock(&ce->pin_mutex);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 00fa4a3bc9a3..e0fb8853477c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1283,9 +1283,6 @@ static void execlists_context_unpin(struct intel_context 
*ce)
ce->state->obj->pin_global--;
i915_gem_object_unpin_map(ce->state->obj);
i915_vma_unpin(ce->state);
-
-   list_del(&ce->active_link);
-   i915_gem_context_put(ce->gem_context);
 }
 
 static int __context_pin(struct i915_vma *vma)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index ebac752e092d..175070ea0f50 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1434,9 +1434,6 @@ static void ring_context_unpin(struct intel_context *ce)
 {
__context_unpin_ppgtt(ce->gem_context);
__context_unpin(ce);
-
-   list_del(&ce->active_link);
-   i915_gem_context_put(ce->gem_context);
 }
 
 static struct i915_vma *
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c 
b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 881450c694e9..7641b74ada98 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -126,8 +126,6 @@ static void hw_delay_complete(struct timer_list *t)
 static void mock_context_unpin(struct intel_context *ce)
 {
mock_timeline_unpin(ce->ring->timeline);
-   list_del(&ce->active_link);
-   i915_gem_context_put(ce->gem_context);
 }
 
 static void mock_context_destroy(struct intel_context *ce)
-- 
2.20.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 11/17] drm/i915: Allow userspace to clone contexts on creation

2019-03-13 Thread Chris Wilson
A usecase arose out of handling context recovery in mesa, whereby they
wish to recreate a context with fresh logical state but preserving all
other details of the original. Currently, they create a new context and
iterate over which bits they want to copy across, but it would much more
convenient if they were able to just pass in a target context to clone
during creation. This essentially extends the setparam during creation
to pull the details from a target context instead of the user supplied
parameters.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_context.c | 103 
 include/uapi/drm/i915_drm.h |  14 
 2 files changed, 117 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 4e4b0b5c4be0..bac548584091 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1488,8 +1488,111 @@ static int create_setparam(struct i915_user_extension 
__user *ext, void *data)
return ctx_setparam(data, &local.setparam);
 }
 
+static int clone_sseu(struct i915_gem_context *dst,
+ struct i915_gem_context *src)
+{
+   const struct intel_sseu default_sseu =
+   intel_device_default_sseu(dst->i915);
+   struct intel_engine_cs *engine;
+   enum intel_engine_id id;
+
+   for_each_engine(engine, dst->i915, id) {
+   struct intel_context *ce;
+   struct intel_sseu sseu;
+
+   ce = intel_context_lookup(src, engine);
+   if (!ce)
+   continue;
+
+   sseu = ce->sseu;
+   if (!memcmp(&sseu, &default_sseu, sizeof(sseu)))
+   continue;
+
+   ce = intel_context_pin_lock(dst, engine);
+   if (IS_ERR(ce))
+   return PTR_ERR(ce);
+
+   ce->sseu = sseu;
+   intel_context_pin_unlock(ce);
+   }
+
+   return 0;
+}
+
+static int create_clone(struct i915_user_extension __user *ext, void *data)
+{
+   struct drm_i915_gem_context_create_ext_clone local;
+   struct i915_gem_context *dst = data;
+   struct i915_gem_context *src;
+   int err;
+
+   if (copy_from_user(&local, ext, sizeof(local)))
+   return -EFAULT;
+
+   if (local.flags & I915_CONTEXT_CLONE_UNKNOWN)
+   return -EINVAL;
+
+   if (local.rsvd)
+   return -EINVAL;
+
+   if (local.clone_id == dst->user_handle) /* good guess! denied. */
+   return -ENOENT;
+
+   rcu_read_lock();
+   src = __i915_gem_context_lookup_rcu(dst->file_priv, local.clone_id);
+   rcu_read_unlock();
+   if (!src)
+   return -ENOENT;
+
+   GEM_BUG_ON(src == dst);
+
+   if (local.flags & I915_CONTEXT_CLONE_FLAGS)
+   dst->user_flags = src->user_flags;
+
+   if (local.flags & I915_CONTEXT_CLONE_SCHED)
+   dst->sched = src->sched;
+
+   if (local.flags & I915_CONTEXT_CLONE_SSEU) {
+   err = clone_sseu(dst, src);
+   if (err)
+   return err;
+   }
+
+   if (local.flags & I915_CONTEXT_CLONE_TIMELINE && src->timeline) {
+   if (dst->timeline)
+   i915_timeline_put(dst->timeline);
+   dst->timeline = i915_timeline_get(src->timeline);
+   }
+
+   if (local.flags & I915_CONTEXT_CLONE_VM) {
+   struct i915_hw_ppgtt *ppgtt;
+
+   do {
+   ppgtt = READ_ONCE(src->ppgtt);
+   if (!ppgtt)
+   break;
+
+   if (!kref_get_unless_zero(&ppgtt->ref))
+   continue;
+
+   if (ppgtt == READ_ONCE(src->ppgtt))
+   break;
+
+   i915_ppgtt_put(ppgtt);
+   } while (1);
+
+   if (ppgtt) {
+   __assign_ppgtt(dst, ppgtt);
+   i915_ppgtt_put(ppgtt);
+   }
+   }
+
+   return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
[I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam,
+   [I915_CONTEXT_CREATE_EXT_CLONE] = create_clone,
 };
 
 static bool client_is_banned(struct drm_i915_file_private *file_priv)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 5206d0006043..9714520f43da 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1581,6 +1581,20 @@ struct drm_i915_gem_context_create_ext_setparam {
struct drm_i915_gem_context_param setparam;
 };
 
+struct drm_i915_gem_context_create_ext_clone {
+#define I915_CONTEXT_CREATE_EXT_CLONE 1
+   struct i915_user_extension base;
+   __u32 clone_id;
+   __u32 flags;
+#define I915_CONTEXT_CLONE_FLAGS   (1u << 0)
+#define I915_CONTEXT_CLONE_SCHED   (1u << 1)
+#define 

[Intel-gfx] [PATCH 03/17] drm/i915: Hold a reference to the active HW context

2019-03-13 Thread Chris Wilson
For virtual engines, we need to keep the HW context alive while it
remains in use. For regular HW contexts, they are created and kept alive
until the end of the GEM context. For simplicity, generalise the
requirements and keep an active reference to each HW context.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_context.c  |  2 +-
 drivers/gpu/drm/i915/intel_context.c |  6 ++
 drivers/gpu/drm/i915/intel_context.h | 11 +++
 drivers/gpu/drm/i915/intel_context_types.h   |  6 +-
 drivers/gpu/drm/i915/intel_lrc.c |  4 +++-
 drivers/gpu/drm/i915/intel_ringbuffer.c  |  4 +++-
 drivers/gpu/drm/i915/selftests/mock_engine.c |  7 ++-
 7 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index b6370225dcb5..4af51b689cbd 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -232,7 +232,7 @@ static void i915_gem_context_free(struct i915_gem_context 
*ctx)
i915_ppgtt_put(ctx->ppgtt);
 
rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
-   it->ops->destroy(it);
+   intel_context_put(it);
 
kfree(ctx->name);
put_pid(ctx->pid);
diff --git a/drivers/gpu/drm/i915/intel_context.c 
b/drivers/gpu/drm/i915/intel_context.c
index 0ab894a058f6..8931e0fee873 100644
--- a/drivers/gpu/drm/i915/intel_context.c
+++ b/drivers/gpu/drm/i915/intel_context.c
@@ -172,6 +172,7 @@ intel_context_pin(struct i915_gem_context *ctx,
list_add(&ce->active_link, &ctx->active_engines);
mutex_unlock(&ctx->mutex);
 
+   intel_context_get(ce);
smp_mb__before_atomic(); /* flush pin before it is visible */
}
 
@@ -192,6 +193,7 @@ void intel_context_unpin(struct intel_context *ce)
return;
 
/* We may be called from inside intel_context_pin() to evict another */
+   intel_context_get(ce);
mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING);
 
if (likely(atomic_dec_and_test(&ce->pin_count))) {
@@ -202,9 +204,11 @@ void intel_context_unpin(struct intel_context *ce)
mutex_unlock(&ce->gem_context->mutex);
 
i915_gem_context_put(ce->gem_context);
+   intel_context_put(ce);
}
 
mutex_unlock(&ce->pin_mutex);
+   intel_context_put(ce);
 }
 
 static void intel_context_retire(struct i915_active_request *active,
@@ -221,6 +225,8 @@ intel_context_init(struct intel_context *ce,
   struct i915_gem_context *ctx,
   struct intel_engine_cs *engine)
 {
+   kref_init(&ce->ref);
+
ce->gem_context = ctx;
ce->engine = engine;
ce->ops = engine->cops;
diff --git a/drivers/gpu/drm/i915/intel_context.h 
b/drivers/gpu/drm/i915/intel_context.h
index 9546d932406a..ebc861b1a49e 100644
--- a/drivers/gpu/drm/i915/intel_context.h
+++ b/drivers/gpu/drm/i915/intel_context.h
@@ -73,4 +73,15 @@ static inline void __intel_context_pin(struct intel_context 
*ce)
 
 void intel_context_unpin(struct intel_context *ce);
 
+static inline struct intel_context *intel_context_get(struct intel_context *ce)
+{
+   kref_get(&ce->ref);
+   return ce;
+}
+
+static inline void intel_context_put(struct intel_context *ce)
+{
+   kref_put(&ce->ref, ce->ops->destroy);
+}
+
 #endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/intel_context_types.h 
b/drivers/gpu/drm/i915/intel_context_types.h
index 6dc9b4b9067b..624729a35875 100644
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -7,6 +7,7 @@
 #ifndef __INTEL_CONTEXT_TYPES__
 #define __INTEL_CONTEXT_TYPES__
 
+#include 
 #include 
 #include 
 #include 
@@ -22,7 +23,8 @@ struct intel_ring;
 struct intel_context_ops {
int (*pin)(struct intel_context *ce);
void (*unpin)(struct intel_context *ce);
-   void (*destroy)(struct intel_context *ce);
+
+   void (*destroy)(struct kref *kref);
 };
 
 /*
@@ -36,6 +38,8 @@ struct intel_sseu {
 };
 
 struct intel_context {
+   struct kref ref;
+
struct i915_gem_context *gem_context;
struct intel_engine_cs *engine;
struct intel_engine_cs *active;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e0fb8853477c..44e75bc520c1 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1242,8 +1242,10 @@ static void __execlists_context_fini(struct 
intel_context *ce)
i915_gem_object_put(ce->state->obj);
 }
 
-static void execlists_context_destroy(struct intel_context *ce)
+static void execlists_context_destroy(struct kref *kref)
 {
+   struct intel_context *ce = container_of(kref, typeof(*ce), ref);
+
GEM_BUG_ON(intel_context_is_pinned(ce));
 
if (ce->state)
diff --git a/drivers/gpu/drm/i915/intel_r

[Intel-gfx] [PATCH 10/17] drm/i915: Allow contexts to share a single timeline across all engines

2019-03-13 Thread Chris Wilson
Previously, our view has been always to run the engines independently
within a context. (Multiple engines happened before we had contexts and
timelines, so they always operated independently and that behaviour
persisted into contexts.) However, at the user level the context often
represents a single timeline (e.g. GL contexts) and userspace must
ensure that the individual engines are serialised to present that
ordering to the client (or forgot about this detail entirely and hope no
one notices - a fair ploy if the client can only directly control one
engine themselves ;)

In the next patch, we will want to construct a set of engines that
operate as one, that have a single timeline interwoven between them, to
present a single virtual engine to the user. (They submit to the virtual
engine, then we decide which engine to execute on based.)

To that end, we want to be able to create contexts which have a single
timeline (fence context) shared between all engines, rather than multiple
timelines.

v2: Move the specialised timeline ordering to its own function.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_context.c   | 32 ++--
 drivers/gpu/drm/i915/i915_gem_context_types.h |  2 +
 drivers/gpu/drm/i915/i915_request.c   | 80 +--
 drivers/gpu/drm/i915/i915_request.h   |  5 +-
 drivers/gpu/drm/i915/i915_sw_fence.c  | 39 +++--
 drivers/gpu/drm/i915/i915_sw_fence.h  | 13 ++-
 drivers/gpu/drm/i915/intel_lrc.c  |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_context.c | 18 +++--
 drivers/gpu/drm/i915/selftests/mock_context.c |  2 +-
 include/uapi/drm/i915_drm.h   |  3 +-
 10 files changed, 149 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 07c097ad83ee..4e4b0b5c4be0 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -238,6 +238,9 @@ static void i915_gem_context_free(struct i915_gem_context 
*ctx)
rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
intel_context_put(it);
 
+   if (ctx->timeline)
+   i915_timeline_put(ctx->timeline);
+
kfree(ctx->name);
put_pid(ctx->pid);
 
@@ -449,12 +452,17 @@ static void __assign_ppgtt(struct i915_gem_context *ctx,
 
 static struct i915_gem_context *
 i915_gem_create_context(struct drm_i915_private *dev_priv,
-   struct drm_i915_file_private *file_priv)
+   struct drm_i915_file_private *file_priv,
+   unsigned int flags)
 {
struct i915_gem_context *ctx;
 
lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
+   if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &&
+   !HAS_EXECLISTS(dev_priv))
+   return ERR_PTR(-EINVAL);
+
/* Reap the most stale context */
contexts_free_first(dev_priv);
 
@@ -477,6 +485,18 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
i915_ppgtt_put(ppgtt);
}
 
+   if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
+   struct i915_timeline *timeline;
+
+   timeline = i915_timeline_create(dev_priv, ctx->name, NULL);
+   if (IS_ERR(timeline)) {
+   __destroy_hw_context(ctx, file_priv);
+   return ERR_CAST(timeline);
+   }
+
+   ctx->timeline = timeline;
+   }
+
trace_i915_context_create(ctx);
 
return ctx;
@@ -505,7 +525,7 @@ i915_gem_context_create_gvt(struct drm_device *dev)
if (ret)
return ERR_PTR(ret);
 
-   ctx = i915_gem_create_context(to_i915(dev), NULL);
+   ctx = i915_gem_create_context(to_i915(dev), NULL, 0);
if (IS_ERR(ctx))
goto out;
 
@@ -541,7 +561,7 @@ i915_gem_context_create_kernel(struct drm_i915_private 
*i915, int prio)
struct i915_gem_context *ctx;
int err;
 
-   ctx = i915_gem_create_context(i915, NULL);
+   ctx = i915_gem_create_context(i915, NULL, 0);
if (IS_ERR(ctx))
return ctx;
 
@@ -673,7 +693,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
idr_init_base(&file_priv->vm_idr, 1);
 
mutex_lock(&i915->drm.struct_mutex);
-   ctx = i915_gem_create_context(i915, file_priv);
+   ctx = i915_gem_create_context(i915, file_priv, 0);
mutex_unlock(&i915->drm.struct_mutex);
if (IS_ERR(ctx)) {
idr_destroy(&file_priv->context_idr);
@@ -789,7 +809,7 @@ last_request_on_engine(struct i915_timeline *timeline,
 
rq = i915_active_request_raw(&timeline->last_request,
 &engine->i915->drm.struct_mutex);
-   if (rq && rq->engine == engine) {
+   if (rq && rq->engine->mask & engine->mask) {
GEM_TRACE("last request for %s on engine %s: %llx:%ll

[Intel-gfx] [PATCH 13/17] drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[]

2019-03-13 Thread Chris Wilson
Allow the user to specify a local engine index (as opposed to
class:index) that they can use to refer to a preset engine inside the
ctx->engine[] array defined by an earlier I915_CONTEXT_PARAM_ENGINES.
This will be useful for setting SSEU parameters on virtual engines that
are local to the context and do not have a valid global class:instance
lookup.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_gem_context.c | 24 
 include/uapi/drm/i915_drm.h |  3 ++-
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 07377b75b563..7ae28622b709 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1375,6 +1375,7 @@ static int set_sseu(struct i915_gem_context *ctx,
struct drm_i915_gem_context_param_sseu user_sseu;
struct intel_engine_cs *engine;
struct intel_sseu sseu;
+   unsigned long lookup;
int ret;
 
if (args->size < sizeof(user_sseu))
@@ -1387,10 +1388,17 @@ static int set_sseu(struct i915_gem_context *ctx,
   sizeof(user_sseu)))
return -EFAULT;
 
-   if (user_sseu.flags || user_sseu.rsvd)
+   if (user_sseu.rsvd)
return -EINVAL;
 
-   engine = lookup_user_engine(ctx, 0,
+   if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX))
+   return -EINVAL;
+
+   lookup = 0;
+   if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)
+   lookup |= LOOKUP_USER_INDEX;
+
+   engine = lookup_user_engine(ctx, lookup,
user_sseu.engine_class,
user_sseu.engine_instance);
if (!engine)
@@ -1899,6 +1907,7 @@ static int get_sseu(struct i915_gem_context *ctx,
struct drm_i915_gem_context_param_sseu user_sseu;
struct intel_engine_cs *engine;
struct intel_context *ce;
+   unsigned long lookup;
 
if (args->size == 0)
goto out;
@@ -1909,10 +1918,17 @@ static int get_sseu(struct i915_gem_context *ctx,
   sizeof(user_sseu)))
return -EFAULT;
 
-   if (user_sseu.flags || user_sseu.rsvd)
+   if (user_sseu.rsvd)
return -EINVAL;
 
-   engine = lookup_user_engine(ctx, 0,
+   if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX))
+   return -EINVAL;
+
+   lookup = 0;
+   if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)
+   lookup |= LOOKUP_USER_INDEX;
+
+   engine = lookup_user_engine(ctx, lookup,
user_sseu.engine_class,
user_sseu.engine_instance);
if (!engine)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 6dde864e14e7..e17c7375248c 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1567,9 +1567,10 @@ struct drm_i915_gem_context_param_sseu {
__u16 engine_instance;
 
/*
-* Unused for now. Must be cleared to zero.
+* Unknown flags must be cleared to zero.
 */
__u32 flags;
+#define I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX (1u << 0)
 
/*
 * Mask of slices to enable for the context. Valid values are a subset
-- 
2.20.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] ✓ Fi.CI.IGT: success for drm/i915/selftests: Provide stub reset functions

2019-03-13 Thread Patchwork
== Series Details ==

Series: drm/i915/selftests: Provide stub reset functions
URL   : https://patchwork.freedesktop.org/series/57884/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5736_full -> Patchwork_12442_full


Summary
---

  **WARNING**

  Minor unknown changes coming with Patchwork_12442_full need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_12442_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
---

  Here are the unknown changes that may have been introduced in 
Patchwork_12442_full:

### IGT changes ###

 Warnings 

  * igt@i915_suspend@forcewake:
- shard-hsw:  ( 2 PASS ) -> PASS +50

  
 Suppressed 

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * {igt@kms_atomic@plane_primary_overlay_zpos}:
- {shard-iclb}:   NOTRUN -> SKIP

  * {igt@kms_plane@pixel-format-pipe-c-planes-source-clamping}:
- shard-apl:  PASS -> FAIL

  
Known issues


  Here are the changes found in Patchwork_12442_full that come from known 
issues:

### IGT changes ###

 Issues hit 

  * igt@gem_ctx_param@invalid-param-set:
- shard-snb:  NOTRUN -> FAIL [fdo#109674]

  * igt@kms_atomic_transition@3x-modeset-transitions-nonblocking:
- shard-snb:  NOTRUN -> SKIP [fdo#109271] / [fdo#109278] +11

  * igt@kms_busy@basic-modeset-f:
- shard-glk:  NOTRUN -> SKIP [fdo#109271] / [fdo#109278] +1

  * igt@kms_busy@extended-modeset-hang-oldfb-render-e:
- shard-skl:  NOTRUN -> SKIP [fdo#109271] / [fdo#109278] +1
- shard-apl:  NOTRUN -> SKIP [fdo#109271] / [fdo#109278] +1

  * igt@kms_busy@extended-pageflip-hang-newfb-render-a:
- shard-apl:  NOTRUN -> DMESG-WARN [fdo#107956]

  * igt@kms_busy@extended-pageflip-modeset-hang-oldfb-render-b:
- shard-glk:  NOTRUN -> DMESG-WARN [fdo#107956]

  * igt@kms_ccs@pipe-a-crc-sprite-planes-basic:
- shard-glk:  PASS -> FAIL [fdo#108145]

  * igt@kms_color@pipe-a-legacy-gamma:
- shard-glk:  PASS -> FAIL [fdo#104782] / [fdo#108145]

  * igt@kms_cursor_crc@cursor-128x42-random:
- shard-glk:  NOTRUN -> FAIL [fdo#103232]

  * igt@kms_cursor_crc@cursor-64x21-sliding:
- shard-apl:  PASS -> FAIL [fdo#103232] +1

  * igt@kms_cursor_crc@cursor-64x64-suspend:
- shard-apl:  PASS -> INCOMPLETE [fdo#103927]

  * igt@kms_draw_crc@draw-method-xrgb2101010-mmap-gtt-xtiled:
- shard-skl:  PASS -> FAIL [fdo#103184]

  * igt@kms_flip@flip-vs-expired-vblank:
- shard-skl:  PASS -> FAIL [fdo#105363]

  * igt@kms_flip@plain-flip-fb-recreate:
- shard-skl:  PASS -> FAIL [fdo#100368]

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-mmap-cpu:
- shard-apl:  PASS -> FAIL [fdo#103167] +1

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-pwrite:
- shard-glk:  NOTRUN -> FAIL [fdo#103167] +1

  * igt@kms_frontbuffer_tracking@fbc-1p-rte:
- shard-glk:  PASS -> FAIL [fdo#103167] / [fdo#105682]

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-draw-mmap-wc:
- shard-glk:  PASS -> FAIL [fdo#103167] +4

  * igt@kms_frontbuffer_tracking@psr-1p-primscrn-shrfb-pgflip-blt:
- shard-snb:  NOTRUN -> SKIP [fdo#109271] +150

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes:
- shard-kbl:  PASS -> INCOMPLETE [fdo#103665]

  * igt@kms_plane_alpha_blend@pipe-b-constant-alpha-max:
- shard-glk:  NOTRUN -> FAIL [fdo#108145]

  * igt@kms_plane_multiple@atomic-pipe-b-tiling-none:
- shard-glk:  PASS -> FAIL [fdo#103166] +1
- shard-apl:  PASS -> FAIL [fdo#103166]

  * igt@kms_rotation_crc@primary-x-tiled-reflect-x-0:
- shard-glk:  NOTRUN -> SKIP [fdo#109271] +18

  * igt@kms_setmode@basic:
- shard-hsw:  PASS -> FAIL [fdo#99912]

  * igt@prime_vgem@coherency-gtt:
- shard-apl:  NOTRUN -> SKIP [fdo#109271] +25

  * igt@prime_vgem@sync-bsd1:
- shard-skl:  NOTRUN -> SKIP [fdo#109271] +19

  
 Possible fixes 

  * igt@gem_exec_big:
- shard-hsw:  TIMEOUT [fdo#107936] -> PASS

  * igt@gem_exec_capture@capture-blt:
- {shard-iclb}:   FAIL [fdo#109960] -> PASS

  * igt@gem_exec_suspend@basic-s4-devices:
- {shard-iclb}:   DMESG-FAIL [fdo#109960] -> PASS

  * igt@gem_mmap_gtt@hang:
- {shard-iclb}:   FAIL [fdo#109677] -> PASS

  * igt@i915_pm_rpm@i2c:
- {shard-iclb}:   DMESG-WARN [fdo#109982] -> PASS

  * igt@i915_selftest@live_contexts:
- {shard-iclb}:   INCOMPLETE [fdo#108569] -> PASS

  * igt@kms_atomic_transition@1x-modeset-t

[Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [v4,1/3] drm/i915/vbt: Parse and use the new field with PSR2 TP2/3 wakeup time

2019-03-13 Thread Patchwork
== Series Details ==

Series: series starting with [v4,1/3] drm/i915/vbt: Parse and use the new field 
with PSR2 TP2/3 wakeup time
URL   : https://patchwork.freedesktop.org/series/57896/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
337a8cd36aa7 drm/i915/vbt: Parse and use the new field with PSR2 TP2/3 wakeup 
time
-:67: WARNING:LONG_LINE: line over 100 characters
#67: FILE: drivers/gpu/drm/i915/intel_bios.c:786:
+   dev_priv->vbt.psr.psr2_tp2_tp3_wakeup_time_us = 
dev_priv->vbt.psr.tp2_tp3_wakeup_time_us;

total: 0 errors, 1 warnings, 0 checks, 63 lines checked
161af2393b0b drm/i915/psr: Move logic to get TPS registers values to another 
function
fa0a48728c55 drm/i915/icl+: Always use TPS2 or TPS3 when exiting PSR1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] ✗ Fi.CI.SPARSE: warning for series starting with [v4,1/3] drm/i915/vbt: Parse and use the new field with PSR2 TP2/3 wakeup time

2019-03-13 Thread Patchwork
== Series Details ==

Series: series starting with [v4,1/3] drm/i915/vbt: Parse and use the new field 
with PSR2 TP2/3 wakeup time
URL   : https://patchwork.freedesktop.org/series/57896/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915/vbt: Parse and use the new field with PSR2 TP2/3 wakeup time
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3553:16: warning: expression 
using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3554:16: warning: expression 
using sizeof(void)

Commit: drm/i915/psr: Move logic to get TPS registers values to another function
-O:drivers/gpu/drm/i915/intel_psr.c:446:27: warning: expression using 
sizeof(void)
-O:drivers/gpu/drm/i915/intel_psr.c:451:23: warning: expression using 
sizeof(void)
-O:drivers/gpu/drm/i915/intel_psr.c:451:23: warning: expression using 
sizeof(void)
+drivers/gpu/drm/i915/intel_psr.c:478:27: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/intel_psr.c:483:23: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/intel_psr.c:483:23: warning: expression using sizeof(void)

Commit: drm/i915/icl+: Always use TPS2 or TPS3 when exiting PSR1
Okay!

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 16/17] drm/i915/execlists: Virtual engine bonding

2019-03-13 Thread Chris Wilson
Quoting Chris Wilson (2019-03-13 13:39:33)
> +   if (flags & BOND_SCHEDULE) {
> +   onstack_fence_init(&fence);
> +   err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
> +  &fence,
> +  GFP_KERNEL);
> +   }
> +   i915_request_add(rq[0]);
> +   if (err)

Missed the git add: if (err < 0) !!!
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/execlists: Virtual engine bonding

2019-03-13 Thread Chris Wilson
Some users require that when a master batch is executed on one particular
engine, a companion batch is run simultaneously on a specific slave
engine. For this purpose, we introduce virtual engine bonding, allowing
maps of master:slaves to be constructed to constrain which physical
engines a virtual engine may select given a fence on a master engine.

For the moment, we continue to ignore the issue of preemption deferring
the master request for later. Ideally, we would like to then also remove
the slave and run something else rather than have it stall the pipeline.
With load balancing, we should be able to move workload around it, but
there is a similar stall on the master pipeline while it may wait for
the slave to be executed. At the cost of more latency for the bonded
request, it may be interesting to launch both on their engines in
lockstep. (Bubbles abound.)

Opens: Also what about bonding an engine as its own master? It doesn't
break anything internally, so allow the silliness.

v2: Emancipate the bonds
v3: Couple in delayed scheduling for the selftests
v4: Handle invalid mutually exclusive bonding
v5: Mention what the uapi does

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_context.c   |  50 +
 drivers/gpu/drm/i915/i915_request.c   |   1 +
 drivers/gpu/drm/i915/i915_request.h   |   1 +
 drivers/gpu/drm/i915/intel_engine_types.h |   7 +
 drivers/gpu/drm/i915/intel_lrc.c  | 143 ++
 drivers/gpu/drm/i915/intel_lrc.h  |   4 +
 drivers/gpu/drm/i915/selftests/intel_lrc.c| 185 ++
 drivers/gpu/drm/i915/selftests/lib_sw_fence.c |   3 +
 include/uapi/drm/i915_drm.h   |  33 
 9 files changed, 427 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 98763d3f1b12..0ec78c386473 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1513,8 +1513,58 @@ set_engines__load_balance(struct i915_user_extension 
__user *base, void *data)
return 0;
 }
 
+static int
+set_engines__bond(struct i915_user_extension __user *base, void *data)
+{
+   struct i915_context_engines_bond __user *ext =
+   container_of_user(base, typeof(*ext), base);
+   const struct set_engines *set = data;
+   struct intel_engine_cs *master;
+   u32 class, instance, siblings;
+   u16 idx;
+   int err;
+
+   if (get_user(idx, &ext->virtual_index))
+   return -EFAULT;
+
+   if (idx >= set->nengine)
+   return -EINVAL;
+
+   idx = array_index_nospec(idx, set->nengine);
+   if (!set->engines[idx])
+   return -EINVAL;
+
+   /*
+* A non-virtual engine has 0 siblings to choose between; and submit
+* fence will always be directed to the one engine.
+*/
+   if (!intel_engine_is_virtual(set->engines[idx]))
+   return 0;
+
+   err = check_user_mbz(&ext->mbz);
+   if (err)
+   return err;
+
+   if (get_user(class, &ext->master_class))
+   return -EFAULT;
+
+   if (get_user(instance, &ext->master_instance))
+   return -EFAULT;
+
+   master = intel_engine_lookup_user(set->ctx->i915, class, instance);
+   if (!master)
+   return -EINVAL;
+
+   if (get_user(siblings, &ext->sibling_mask))
+   return -EFAULT;
+
+   return intel_virtual_engine_attach_bond(set->engines[idx],
+   master, siblings);
+}
+
 static const i915_user_extension_fn set_engines__extensions[] = {
[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
+   [I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond,
 };
 
 static int
diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 0a46f8113f5c..9ce710baa452 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -743,6 +743,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct 
i915_gem_context *ctx)
rq->batch = NULL;
rq->capture_list = NULL;
rq->waitboost = false;
+   rq->execution_mask = ~0u;
 
/*
 * Reserve space in the ring buffer for all the commands required to
diff --git a/drivers/gpu/drm/i915/i915_request.h 
b/drivers/gpu/drm/i915/i915_request.h
index d4f6b2940130..862b25930de0 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -145,6 +145,7 @@ struct i915_request {
 */
struct i915_sched_node sched;
struct i915_dependency dep;
+   unsigned int execution_mask;
 
/*
 * A convenience pointer to the current breadcrumb value stored in
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h 
b/drivers/gpu/drm/i915/intel_engine_types.h
index 322fbda65190..1da35509d811 100644
--- a/drivers/gpu/drm/i915/intel_engine_t

[Intel-gfx] [PATCH 18/39] drm/i915/execlists: Skip direct submission if only lite-restore

2019-03-13 Thread Chris Wilson
If resubmitting the active context, simply skip the submission as
performing the submission from the interrupt handler has higher
throughput than continually provoking lite-restores. If however, we find
ourselves with a new client, we check whether or not we can dequeue into
the second port or to resolve preemption.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_lrc.c | 24 +++-
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 79ab4bc543fd..19af14bab38e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1443,12 +1443,26 @@ static void __submit_queue_imm(struct intel_engine_cs 
*engine)
tasklet_hi_schedule(&execlists->tasklet);
 }
 
-static void submit_queue(struct intel_engine_cs *engine, int prio)
+static bool inflight(const struct intel_engine_execlists *execlists,
+const struct i915_request *rq)
 {
-   if (prio > engine->execlists.queue_priority_hint) {
-   engine->execlists.queue_priority_hint = prio;
+   const struct i915_request *active = port_request(execlists->port);
+
+   return active && active->hw_context == rq->hw_context;
+}
+
+static void submit_queue(struct intel_engine_cs *engine,
+const struct i915_request *rq)
+{
+   struct intel_engine_execlists *execlists = &engine->execlists;
+
+   if (rq_prio(rq) <= execlists->queue_priority_hint)
+   return;
+
+   execlists->queue_priority_hint = rq_prio(rq);
+
+   if (!inflight(execlists, rq))
__submit_queue_imm(engine);
-   }
 }
 
 static void execlists_submit_request(struct i915_request *request)
@@ -1464,7 +1478,7 @@ static void execlists_submit_request(struct i915_request 
*request)
GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
GEM_BUG_ON(list_empty(&request->sched.link));
 
-   submit_queue(engine, rq_prio(request));
+   submit_queue(engine, request);
 
spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
-- 
2.20.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 32/39] drm/i915: Drop the deferred active reference

2019-03-13 Thread Chris Wilson
An old optimisation to reduce the number of atomics per batch sadly
relies on struct_mutex for coordination. In order to remove struct_mutex
from serialising object/context closing, always taking and releasing an
active reference on first use / last use greatly simplifies the locking.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 13 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h| 24 +--
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  8 ---
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  3 +--
 .../i915/gem/selftests/i915_gem_coherency.c   |  4 ++--
 .../drm/i915/gem/selftests/i915_gem_context.c | 11 +
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gvt/scheduler.c  |  2 +-
 drivers/gpu/drm/i915/i915_gem_batch_pool.c|  2 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c  |  2 +-
 drivers/gpu/drm/i915/i915_vma.c   | 15 +---
 drivers/gpu/drm/i915/intel_engine_cs.c|  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c   |  3 +--
 drivers/gpu/drm/i915/selftests/i915_request.c |  8 ---
 drivers/gpu/drm/i915/selftests/igt_spinner.c  |  9 +--
 .../gpu/drm/i915/selftests/intel_hangcheck.c  |  9 +--
 .../drm/i915/selftests/intel_workarounds.c|  3 ---
 18 files changed, 26 insertions(+), 96 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index f3c55c99eb1e..af96b57f9d1d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -129,7 +129,7 @@ static void lut_close(struct i915_gem_context *ctx)
radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
 
vma->open_count--;
-   __i915_gem_object_release_unless_active(vma->obj);
+   i915_vma_put(vma);
}
rcu_read_unlock();
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index ebed1898a6cc..d21a7095092c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -156,7 +156,7 @@ void i915_gem_close_object(struct drm_gem_object *gem, 
struct drm_file *file)
list_del(&lut->ctx_link);
 
i915_lut_handle_free(lut);
-   __i915_gem_object_release_unless_active(obj);
+   i915_gem_object_put(obj);
}
 
mutex_unlock(&i915->drm.struct_mutex);
@@ -348,17 +348,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
 }
 
-void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
-{
-   lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-   if (!i915_gem_object_has_active_reference(obj) &&
-   i915_gem_object_is_active(obj))
-   i915_gem_object_set_active_reference(obj);
-   else
-   i915_gem_object_put(obj);
-}
-
 static inline enum fb_op_origin
 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index d229a8d675d1..afc665359e58 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -161,31 +161,9 @@ i915_gem_object_needs_async_cancel(const struct 
drm_i915_gem_object *obj)
 static inline bool
 i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
 {
-   return obj->active_count;
+   return READ_ONCE(obj->active_count);
 }
 
-static inline bool
-i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj)
-{
-   return test_bit(I915_BO_ACTIVE_REF, &obj->flags);
-}
-
-static inline void
-i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj)
-{
-   lockdep_assert_held(&obj->base.dev->struct_mutex);
-   __set_bit(I915_BO_ACTIVE_REF, &obj->flags);
-}
-
-static inline void
-i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj)
-{
-   lockdep_assert_held(&obj->base.dev->struct_mutex);
-   __clear_bit(I915_BO_ACTIVE_REF, &obj->flags);
-}
-
-void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj);
-
 static inline bool
 i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index da6a33e2395f..9f61220795a6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -120,14 +120,6 @@ struct drm_i915_gem_object {
struct list_head batch_pool_link;
I915_SELFTEST_DECLARE(struct list_head st_link);
 
-   unsigned long flags;
-
-   /**
-* Have we taken a reference for the object for incomplete GPU

[Intel-gfx] [PATCH 11/39] drm/i915: Allow userspace to clone contexts on creation

2019-03-13 Thread Chris Wilson
A usecase arose out of handling context recovery in mesa, whereby they
wish to recreate a context with fresh logical state but preserving all
other details of the original. Currently, they create a new context and
iterate over which bits they want to copy across, but it would much more
convenient if they were able to just pass in a target context to clone
during creation. This essentially extends the setparam during creation
to pull the details from a target context instead of the user supplied
parameters.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_context.c | 103 
 include/uapi/drm/i915_drm.h |  14 
 2 files changed, 117 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 4e4b0b5c4be0..bac548584091 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1488,8 +1488,111 @@ static int create_setparam(struct i915_user_extension 
__user *ext, void *data)
return ctx_setparam(data, &local.setparam);
 }
 
+static int clone_sseu(struct i915_gem_context *dst,
+ struct i915_gem_context *src)
+{
+   const struct intel_sseu default_sseu =
+   intel_device_default_sseu(dst->i915);
+   struct intel_engine_cs *engine;
+   enum intel_engine_id id;
+
+   for_each_engine(engine, dst->i915, id) {
+   struct intel_context *ce;
+   struct intel_sseu sseu;
+
+   ce = intel_context_lookup(src, engine);
+   if (!ce)
+   continue;
+
+   sseu = ce->sseu;
+   if (!memcmp(&sseu, &default_sseu, sizeof(sseu)))
+   continue;
+
+   ce = intel_context_pin_lock(dst, engine);
+   if (IS_ERR(ce))
+   return PTR_ERR(ce);
+
+   ce->sseu = sseu;
+   intel_context_pin_unlock(ce);
+   }
+
+   return 0;
+}
+
+static int create_clone(struct i915_user_extension __user *ext, void *data)
+{
+   struct drm_i915_gem_context_create_ext_clone local;
+   struct i915_gem_context *dst = data;
+   struct i915_gem_context *src;
+   int err;
+
+   if (copy_from_user(&local, ext, sizeof(local)))
+   return -EFAULT;
+
+   if (local.flags & I915_CONTEXT_CLONE_UNKNOWN)
+   return -EINVAL;
+
+   if (local.rsvd)
+   return -EINVAL;
+
+   if (local.clone_id == dst->user_handle) /* good guess! denied. */
+   return -ENOENT;
+
+   rcu_read_lock();
+   src = __i915_gem_context_lookup_rcu(dst->file_priv, local.clone_id);
+   rcu_read_unlock();
+   if (!src)
+   return -ENOENT;
+
+   GEM_BUG_ON(src == dst);
+
+   if (local.flags & I915_CONTEXT_CLONE_FLAGS)
+   dst->user_flags = src->user_flags;
+
+   if (local.flags & I915_CONTEXT_CLONE_SCHED)
+   dst->sched = src->sched;
+
+   if (local.flags & I915_CONTEXT_CLONE_SSEU) {
+   err = clone_sseu(dst, src);
+   if (err)
+   return err;
+   }
+
+   if (local.flags & I915_CONTEXT_CLONE_TIMELINE && src->timeline) {
+   if (dst->timeline)
+   i915_timeline_put(dst->timeline);
+   dst->timeline = i915_timeline_get(src->timeline);
+   }
+
+   if (local.flags & I915_CONTEXT_CLONE_VM) {
+   struct i915_hw_ppgtt *ppgtt;
+
+   do {
+   ppgtt = READ_ONCE(src->ppgtt);
+   if (!ppgtt)
+   break;
+
+   if (!kref_get_unless_zero(&ppgtt->ref))
+   continue;
+
+   if (ppgtt == READ_ONCE(src->ppgtt))
+   break;
+
+   i915_ppgtt_put(ppgtt);
+   } while (1);
+
+   if (ppgtt) {
+   __assign_ppgtt(dst, ppgtt);
+   i915_ppgtt_put(ppgtt);
+   }
+   }
+
+   return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
[I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam,
+   [I915_CONTEXT_CREATE_EXT_CLONE] = create_clone,
 };
 
 static bool client_is_banned(struct drm_i915_file_private *file_priv)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 5206d0006043..9714520f43da 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1581,6 +1581,20 @@ struct drm_i915_gem_context_create_ext_setparam {
struct drm_i915_gem_context_param setparam;
 };
 
+struct drm_i915_gem_context_create_ext_clone {
+#define I915_CONTEXT_CREATE_EXT_CLONE 1
+   struct i915_user_extension base;
+   __u32 clone_id;
+   __u32 flags;
+#define I915_CONTEXT_CLONE_FLAGS   (1u << 0)
+#define I915_CONTEXT_CLONE_SCHED   (1u << 1)
+#define 

[Intel-gfx] [PATCH 27/39] drm/i915: Pull scatterlist utils out of i915_gem.h

2019-03-13 Thread Chris Wilson
Out scatterlist utility routines can be pulled out of i915_gem.h for a
bit more decluttering.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/Makefile |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c|   1 +
 drivers/gpu/drm/i915/gem/i915_gem_internal.c  |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_pages.c |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_phys.c  |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c   |   1 +
 .../drm/i915/gem/selftests/huge_gem_object.c  |   2 +
 .../drm/i915/gem/selftests/i915_gem_dmabuf.c  |   2 +
 drivers/gpu/drm/i915/i915_drv.h   | 110 ---
 drivers/gpu/drm/i915/i915_gem.c   |  30 +---
 drivers/gpu/drm/i915/i915_gem_fence_reg.c |   2 +
 drivers/gpu/drm/i915/i915_gem_gtt.c   |   3 +-
 drivers/gpu/drm/i915/i915_gpu_error.c |   1 +
 drivers/gpu/drm/i915/i915_scatterlist.c   |  39 ++
 drivers/gpu/drm/i915/i915_scatterlist.h   | 128 ++
 drivers/gpu/drm/i915/selftests/i915_vma.c |   1 +
 drivers/gpu/drm/i915/selftests/scatterlist.c  |   1 +
 18 files changed, 186 insertions(+), 140 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_scatterlist.c
 create mode 100644 drivers/gpu/drm/i915/i915_scatterlist.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index c12350df7793..84dfdf90e0e7 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -42,6 +42,7 @@ i915-y := i915_drv.o \
  i915_params.o \
  i915_pci.o \
  i915_reset.o \
+ i915_scatterlist.o \
  i915_suspend.o \
  i915_sw_fence.o \
  i915_syncmap.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 47db261a98c0..b5d86cfadd46 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -11,6 +11,7 @@
 #include "i915_gem_object.h"
 
 #include "../i915_drv.h"
+#include "../i915_scatterlist.h"
 
 static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c 
b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index d40adb3bbe29..d072d0cbce06 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -14,6 +14,7 @@
 
 #include "../i915_drv.h"
 #include "../i915_gem.h"
+#include "../i915_scatterlist.h"
 #include "../i915_utils.h"
 
 #define QUIET (__GFP_NORETRY | __GFP_NOWARN)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c 
b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index a594f48db28e..5c1a3cf2c33f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -7,6 +7,7 @@
 #include "i915_gem_object.h"
 
 #include "../i915_drv.h"
+#include "../i915_scatterlist.h"
 
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 struct sg_table *pages,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c 
b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
index 1bf3e0afcba2..22d185301578 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -16,6 +16,7 @@
 #include "i915_gem_object.h"
 
 #include "../i915_drv.h"
+#include "../i915_scatterlist.h"
 
 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 4bcd861046c1..7159688cd303 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -10,6 +10,7 @@
 #include "i915_gem_object.h"
 
 #include "../i915_drv.h"
+#include "../i915_scatterlist.h"
 
 /*
  * Move pages to appropriate lru and release the pagevec, decrementing the
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c 
b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 12ef6d8fb2dc..0ae793d997a5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -15,6 +15,7 @@
 #include "i915_gem_ioctls.h"
 #include "i915_gem_object.h"
 
+#include "../i915_scatterlist.h"
 #include "../i915_trace.h"
 #include "../intel_drv.h"
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c 
b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
index 824f3761314c..c03781f8b435 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
@@ -4,6 +4,8 @@
  * Copyright © 2016 Intel Corporation
  */
 
+#include "../../i915_scatterlist.h"
+
 #include "huge_gem_object.h"
 
 static void huge_free_pages(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index 67a9d551bb0e..d9f31d3a4f24 100644
--- a/drivers/gpu/dr

[Intel-gfx] [PATCH 33/39] drm/i915: Move object close under its own lock

2019-03-13 Thread Chris Wilson
Use i915_gem_object_lock() to guard the LUT and active reference to
allow us to break free of struct_mutex for handling GEM_CLOSE.

Testcase: igt/gem_close_race
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 16 +++---
 .../gpu/drm/i915/gem/i915_gem_context_types.h |  6 ---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 22 +---
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 51 +--
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  1 -
 .../gpu/drm/i915/gem/selftests/mock_context.c |  1 -
 drivers/gpu/drm/i915/i915_drv.h   |  4 +-
 drivers/gpu/drm/i915/i915_gem.c   |  1 +
 drivers/gpu/drm/i915/i915_vma.c   | 41 ++-
 drivers/gpu/drm/i915/i915_vma.h   | 15 +++---
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 +
 11 files changed, 97 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index af96b57f9d1d..7b7aea2d1877 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -112,24 +112,17 @@ void i915_lut_handle_free(struct i915_lut_handle *lut)
 
 static void lut_close(struct i915_gem_context *ctx)
 {
-   struct i915_lut_handle *lut, *ln;
struct radix_tree_iter iter;
void __rcu **slot;
 
-   list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) {
-   list_del(&lut->obj_link);
-   i915_lut_handle_free(lut);
-   }
-   INIT_LIST_HEAD(&ctx->handles_list);
+   lockdep_assert_held(&ctx->mutex);
 
rcu_read_lock();
radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
struct i915_vma *vma = rcu_dereference_raw(*slot);
 
radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
-
vma->open_count--;
-   i915_vma_put(vma);
}
rcu_read_unlock();
 }
@@ -312,6 +305,8 @@ void i915_gem_context_release(struct kref *ref)
 
 static void context_close(struct i915_gem_context *ctx)
 {
+   mutex_lock(&ctx->mutex);
+
i915_gem_context_set_closed(ctx);
 
/*
@@ -330,6 +325,8 @@ static void context_close(struct i915_gem_context *ctx)
i915_ppgtt_close(ctx->ppgtt);
 
ctx->file_priv = ERR_PTR(-EBADF);
+
+   mutex_unlock(&ctx->mutex);
i915_gem_context_put(ctx);
 }
 
@@ -380,7 +377,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
spin_lock_init(&ctx->hw_contexts_lock);
 
INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
-   INIT_LIST_HEAD(&ctx->handles_list);
INIT_LIST_HEAD(&ctx->hw_id_link);
 
/* Default context will never have a file_priv */
@@ -1100,7 +1096,9 @@ static int set_ppgtt(struct i915_gem_context *ctx,
goto unlock;
 
/* Teardown the existing obj:vma cache, it will have to be rebuilt. */
+   mutex_lock(&ctx->mutex);
lut_close(ctx);
+   mutex_unlock(&ctx->mutex);
 
old = __set_ppgtt(ctx, ppgtt);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 47ed1f65cc06..333eeacef5e3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -177,12 +177,6 @@ struct i915_gem_context {
 * per vm, which may be one per context or shared with the global GTT)
 */
struct radix_tree_root handles_vma;
-
-   /** handles_list: reverse list of all the rbtree entries in use for
-* this context, which allows us to free all the allocations on
-* context close.
-*/
-   struct list_head handles_list;
 };
 
 #endif /* __I915_GEM_CONTEXT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 900417510fe0..1a9b14361e34 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -803,9 +803,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
unsigned int i, batch;
int err;
 
-   if (unlikely(i915_gem_context_is_closed(eb->ctx)))
-   return -ENOENT;
-
if (unlikely(i915_gem_context_is_banned(eb->ctx)))
return -EIO;
 
@@ -814,6 +811,12 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 
batch = eb_batch_index(eb);
 
+   mutex_lock(&eb->ctx->mutex);
+   if (unlikely(i915_gem_context_is_closed(eb->ctx))) {
+   err = -ENOENT;
+   goto err_ctx;
+   }
+
for (i = 0; i < eb->buffer_count; i++) {
u32 handle = eb->exec[i].handle;
struct i915_lut_handle *lut;
@@ -850,10 +853,13 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
/* transfer ref to ctx */
if (!vma->open_count++)
i915_

[Intel-gfx] [PATCH 02/39] drm/i915: Lock the gem_context->active_list while dropping the link

2019-03-13 Thread Chris Wilson
On unpinning the intel_context, we remove it from the active list
inside the GEM context. This list is supposed to be guarded by the GEM
context mutex, so remember to take it!

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_context.c | 15 +++
 drivers/gpu/drm/i915/intel_lrc.c |  3 ---
 drivers/gpu/drm/i915/intel_ringbuffer.c  |  3 ---
 drivers/gpu/drm/i915/selftests/mock_engine.c |  2 --
 4 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_context.c 
b/drivers/gpu/drm/i915/intel_context.c
index 5a16c9bb2778..0ab894a058f6 100644
--- a/drivers/gpu/drm/i915/intel_context.c
+++ b/drivers/gpu/drm/i915/intel_context.c
@@ -165,13 +165,13 @@ intel_context_pin(struct i915_gem_context *ctx,
if (err)
goto err;
 
+   i915_gem_context_get(ctx);
+   GEM_BUG_ON(ce->gem_context != ctx);
+
mutex_lock(&ctx->mutex);
list_add(&ce->active_link, &ctx->active_engines);
mutex_unlock(&ctx->mutex);
 
-   i915_gem_context_get(ctx);
-   GEM_BUG_ON(ce->gem_context != ctx);
-
smp_mb__before_atomic(); /* flush pin before it is visible */
}
 
@@ -194,9 +194,16 @@ void intel_context_unpin(struct intel_context *ce)
/* We may be called from inside intel_context_pin() to evict another */
mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING);
 
-   if (likely(atomic_dec_and_test(&ce->pin_count)))
+   if (likely(atomic_dec_and_test(&ce->pin_count))) {
ce->ops->unpin(ce);
 
+   mutex_lock(&ce->gem_context->mutex);
+   list_del(&ce->active_link);
+   mutex_unlock(&ce->gem_context->mutex);
+
+   i915_gem_context_put(ce->gem_context);
+   }
+
mutex_unlock(&ce->pin_mutex);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 00fa4a3bc9a3..e0fb8853477c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1283,9 +1283,6 @@ static void execlists_context_unpin(struct intel_context 
*ce)
ce->state->obj->pin_global--;
i915_gem_object_unpin_map(ce->state->obj);
i915_vma_unpin(ce->state);
-
-   list_del(&ce->active_link);
-   i915_gem_context_put(ce->gem_context);
 }
 
 static int __context_pin(struct i915_vma *vma)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index ebac752e092d..175070ea0f50 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1434,9 +1434,6 @@ static void ring_context_unpin(struct intel_context *ce)
 {
__context_unpin_ppgtt(ce->gem_context);
__context_unpin(ce);
-
-   list_del(&ce->active_link);
-   i915_gem_context_put(ce->gem_context);
 }
 
 static struct i915_vma *
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c 
b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 881450c694e9..7641b74ada98 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -126,8 +126,6 @@ static void hw_delay_complete(struct timer_list *t)
 static void mock_context_unpin(struct intel_context *ce)
 {
mock_timeline_unpin(ce->ring->timeline);
-   list_del(&ce->active_link);
-   i915_gem_context_put(ce->gem_context);
 }
 
 static void mock_context_destroy(struct intel_context *ce)
-- 
2.20.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 29/39] drm/i915: Move GEM object waiting to its own file

2019-03-13 Thread Chris Wilson
Continuing the decluttering of i915_gem.c by moving the object wait
decomposition into its own file.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/Makefile  |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_object.h |   8 +
 drivers/gpu/drm/i915/gem/i915_gem_wait.c   | 276 +
 drivers/gpu/drm/i915/i915_drv.h|  17 --
 drivers/gpu/drm/i915/i915_gem.c| 254 ---
 5 files changed, 285 insertions(+), 271 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_wait.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 89fb4eaca4fb..2e78fbe79280 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -91,6 +91,7 @@ i915-y += \
  gem/i915_gem_stolen.o \
  gem/i915_gem_tiling.o \
  gem/i915_gem_userptr.o \
+ gem/i915_gem_wait.o \
  gem/i915_gemfs.o \
  i915_active.o \
  i915_cmd_parser.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 08016f1c9505..d229a8d675d1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -427,4 +427,12 @@ static inline void __start_cpu_write(struct 
drm_i915_gem_object *obj)
obj->cache_dirty = true;
 }
 
+int i915_gem_object_wait(struct drm_i915_gem_object *obj,
+unsigned int flags,
+long timeout);
+int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
+ unsigned int flags,
+ const struct i915_sched_attr *attr);
+#define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX)
+
 #endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c 
b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
new file mode 100644
index ..e30a247d21f2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -0,0 +1,276 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include 
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+#include "../intel_ringbuffer.h"
+
+static long
+i915_gem_object_wait_fence(struct dma_fence *fence,
+  unsigned int flags,
+  long timeout)
+{
+   BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
+
+   if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+   return timeout;
+
+   if (dma_fence_is_i915(fence))
+   return i915_request_wait(to_request(fence), flags, timeout);
+
+   return dma_fence_wait_timeout(fence,
+ flags & I915_WAIT_INTERRUPTIBLE,
+ timeout);
+}
+
+static long
+i915_gem_object_wait_reservation(struct reservation_object *resv,
+unsigned int flags,
+long timeout)
+{
+   unsigned int seq = __read_seqcount_begin(&resv->seq);
+   struct dma_fence *excl;
+   bool prune_fences = false;
+
+   if (flags & I915_WAIT_ALL) {
+   struct dma_fence **shared;
+   unsigned int count, i;
+   int ret;
+
+   ret = reservation_object_get_fences_rcu(resv,
+   &excl, &count, &shared);
+   if (ret)
+   return ret;
+
+   for (i = 0; i < count; i++) {
+   timeout = i915_gem_object_wait_fence(shared[i],
+flags, timeout);
+   if (timeout < 0)
+   break;
+
+   dma_fence_put(shared[i]);
+   }
+
+   for (; i < count; i++)
+   dma_fence_put(shared[i]);
+   kfree(shared);
+
+   /*
+* If both shared fences and an exclusive fence exist,
+* then by construction the shared fences must be later
+* than the exclusive fence. If we successfully wait for
+* all the shared fences, we know that the exclusive fence
+* must all be signaled. If all the shared fences are
+* signaled, we can prune the array and recover the
+* floating references on the fences/requests.
+*/
+   prune_fences = count && timeout >= 0;
+   } else {
+   excl = reservation_object_get_excl_rcu(resv);
+   }
+
+   if (excl && timeout >= 0)
+   timeout = i915_gem_object_wait_fence(excl, flags, timeout);
+
+   dma_fence_put(excl);
+
+   /*
+* Opportunistically prune the fences iff we know they have *all* been
+* signaled and that the reservation object has not been changed (i.e.
+* no new fences have been added).
+*/
+   if 

[Intel-gfx] [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring

2019-03-13 Thread Chris Wilson
As the final request on a ring may hold the reference to this ring (via
retiring the last pinned context), we may find ourselves chasing a
dangling pointer on completion of the list.

A quick solution is to hold a reference to the ring itself as we retire
along it so that we only free it after we stop dereferencing it.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_request.c  |  6 +-
 drivers/gpu/drm/i915/intel_engine_types.h|  2 ++
 drivers/gpu/drm/i915/intel_lrc.c |  4 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c  |  9 +
 drivers/gpu/drm/i915/intel_ringbuffer.h  | 13 -
 drivers/gpu/drm/i915/selftests/mock_engine.c |  1 +
 6 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 9533a85cb0b3..0a3d94517d0a 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1332,8 +1332,12 @@ void i915_retire_requests(struct drm_i915_private *i915)
if (!i915->gt.active_requests)
return;
 
-   list_for_each_entry_safe(ring, tmp, &i915->gt.active_rings, active_link)
+   list_for_each_entry_safe(ring, tmp,
+&i915->gt.active_rings, active_link) {
+   intel_ring_get(ring); /* last rq holds reference! */
ring_retire_requests(ring);
+   intel_ring_put(ring);
+   }
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h 
b/drivers/gpu/drm/i915/intel_engine_types.h
index b0aa1f0d4e47..88ed7ba8886f 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/intel_engine_types.h
@@ -9,6 +9,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -51,6 +52,7 @@ struct intel_engine_hangcheck {
 };
 
 struct intel_ring {
+   struct kref ref;
struct i915_vma *vma;
void *vaddr;
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index dc3de09c7586..00fa4a3bc9a3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1236,7 +1236,7 @@ static void execlists_submit_request(struct i915_request 
*request)
 
 static void __execlists_context_fini(struct intel_context *ce)
 {
-   intel_ring_free(ce->ring);
+   intel_ring_put(ce->ring);
 
GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
i915_gem_object_put(ce->state->obj);
@@ -2869,7 +2869,7 @@ static int execlists_context_deferred_alloc(struct 
intel_context *ce,
return 0;
 
 error_ring_free:
-   intel_ring_free(ring);
+   intel_ring_put(ring);
 error_deref_obj:
i915_gem_object_put(ctx_obj);
return ret;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index f26f5cc1584c..ebac752e092d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1311,6 +1311,7 @@ intel_engine_create_ring(struct intel_engine_cs *engine,
if (!ring)
return ERR_PTR(-ENOMEM);
 
+   kref_init(&ring->ref);
INIT_LIST_HEAD(&ring->request_list);
ring->timeline = i915_timeline_get(timeline);
 
@@ -1335,9 +1336,9 @@ intel_engine_create_ring(struct intel_engine_cs *engine,
return ring;
 }
 
-void
-intel_ring_free(struct intel_ring *ring)
+void intel_ring_free(struct kref *ref)
 {
+   struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
struct drm_i915_gem_object *obj = ring->vma->obj;
 
i915_vma_close(ring->vma);
@@ -1591,7 +1592,7 @@ static int intel_init_ring_buffer(struct intel_engine_cs 
*engine)
 err_unpin:
intel_ring_unpin(ring);
 err_ring:
-   intel_ring_free(ring);
+   intel_ring_put(ring);
 err:
intel_engine_cleanup_common(engine);
return err;
@@ -1605,7 +1606,7 @@ void intel_engine_cleanup(struct intel_engine_cs *engine)
(I915_READ_MODE(engine) & MODE_IDLE) == 0);
 
intel_ring_unpin(engine->buffer);
-   intel_ring_free(engine->buffer);
+   intel_ring_put(engine->buffer);
 
if (engine->cleanup)
engine->cleanup(engine);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index e612bdca9fd9..a57489fcb302 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -231,7 +231,18 @@ int intel_ring_pin(struct intel_ring *ring);
 void intel_ring_reset(struct intel_ring *ring, u32 tail);
 unsigned int intel_ring_update_space(struct intel_ring *ring);
 void intel_ring_unpin(struct intel_ring *ring);
-void intel_ring_free(struct intel_ring *ring);
+void intel_ring_free(struct kref *ref);
+
+static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
+{
+   kref_get(&ring->ref);
+   return ring;
+}
+
+static inline void intel_ring_put(struct in

[Intel-gfx] [PATCH 20/39] drm/i915: Pull GEM ioctls interface to its own file

2019-03-13 Thread Chris Wilson
Declutter i915_drv/gem.h by moving the ioctl API into its own header.

Signed-off-by: Chris Wilson 
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/i915/Makefile |  1 +
 drivers/gpu/drm/i915/gem/i915_gem_ioctls.h| 52 +++
 .../gem/test_i915_gem_ioctls_standalone.c |  7 +++
 drivers/gpu/drm/i915/i915_drv.c   |  2 +
 drivers/gpu/drm/i915/i915_drv.h   | 38 --
 drivers/gpu/drm/i915/i915_gem.c   |  2 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c|  2 +
 drivers/gpu/drm/i915/i915_gem_tiling.c|  3 ++
 drivers/gpu/drm/i915/i915_gem_userptr.c   | 12 +++--
 9 files changed, 77 insertions(+), 42 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
 create mode 100644 drivers/gpu/drm/i915/gem/test_i915_gem_ioctls_standalone.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 87fb8c21510e..2f84fac02578 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -59,6 +59,7 @@ i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 
 # Test the headers are compilable as standalone units
 i915-$(CONFIG_DRM_I915_WERROR) += \
+   gem/test_i915_gem_ioctls_standalone.o \
gem/test_i915_gem_object_types_standalone.o \
test_i915_active_types_standalone.o \
test_i915_gem_context_types_standalone.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h 
b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
new file mode 100644
index ..ddc7f2a52b3e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
@@ -0,0 +1,52 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef I915_GEM_IOCTLS_H
+#define I915_GEM_IOCTLS_H
+
+struct drm_device;
+struct drm_file;
+
+int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
+   struct drm_file *file);
+int i915_gem_create_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+int i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+int i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
+  struct drm_file *file);
+int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
+   struct drm_file *file);
+int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
+  struct drm_file *file);
+int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
+  struct drm_file *file);
+int i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
+   struct drm_file *file);
+int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
+   struct drm_file *file);
+int i915_gem_pread_ioctl(struct drm_device *dev, void *data,
+struct drm_file *file);
+int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
+  struct drm_file *file);
+int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
+struct drm_file *file);
+int i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
+   struct drm_file *file);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+  struct drm_file *file);
+int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
+   struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/test_i915_gem_ioctls_standalone.c 
b/drivers/gpu/drm/i915/gem/test_i915_gem_ioctls_standalone.c
new file mode 100644
index ..df5f2b43d81c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/test_i915_gem_ioctls_standalone.c
@@ -0,0 +1,7 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_gem_ioctls.h"
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index afdfced262e6..fa89e36d12d6 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -47,6 +47,8 @@
 #include 
 #include 
 
+#include "gem/i915_gem_ioctls.h"
+
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "i915_pmu.h"
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index faa8836abf4a..8069549da11c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2739,46 +2739

[Intel-gfx] [PATCH 15/39] drm/i915: Extend execution fence to support a callback

2019-03-13 Thread Chris Wilson
In the next patch, we will want to configure the slave request
depending on which physical engine the master request is executed on.
For this, we introduce a callback from the execute fence to convey this
information.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_request.c | 84 +++--
 drivers/gpu/drm/i915/i915_request.h |  4 ++
 2 files changed, 83 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 2382339172b4..0a46f8113f5c 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -38,6 +38,8 @@ struct execute_cb {
struct list_head link;
struct irq_work work;
struct i915_sw_fence *fence;
+   void (*hook)(struct i915_request *rq, struct dma_fence *signal);
+   struct i915_request *signal;
 };
 
 static struct i915_global_request {
@@ -343,6 +345,17 @@ static void irq_execute_cb(struct irq_work *wrk)
kmem_cache_free(global.slab_execute_cbs, cb);
 }
 
+static void irq_execute_cb_hook(struct irq_work *wrk)
+{
+   struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
+
+   cb->hook(container_of(cb->fence, struct i915_request, submit),
+&cb->signal->fence);
+   i915_request_put(cb->signal);
+
+   irq_execute_cb(wrk);
+}
+
 static void __notify_execute_cb(struct i915_request *rq)
 {
struct execute_cb *cb;
@@ -369,14 +382,19 @@ static void __notify_execute_cb(struct i915_request *rq)
 }
 
 static int
-i915_request_await_execution(struct i915_request *rq,
-struct i915_request *signal,
-gfp_t gfp)
+__i915_request_await_execution(struct i915_request *rq,
+  struct i915_request *signal,
+  void (*hook)(struct i915_request *rq,
+   struct dma_fence *signal),
+  gfp_t gfp)
 {
struct execute_cb *cb;
 
-   if (i915_request_is_active(signal))
+   if (i915_request_is_active(signal)) {
+   if (hook)
+   hook(rq, &signal->fence);
return 0;
+   }
 
cb = kmem_cache_alloc(global.slab_execute_cbs, gfp);
if (!cb)
@@ -386,8 +404,18 @@ i915_request_await_execution(struct i915_request *rq,
i915_sw_fence_await(cb->fence);
init_irq_work(&cb->work, irq_execute_cb);
 
+   if (hook) {
+   cb->hook = hook;
+   cb->signal = i915_request_get(signal);
+   cb->work.func = irq_execute_cb_hook;
+   }
+
spin_lock_irq(&signal->lock);
if (i915_request_is_active(signal)) {
+   if (hook) {
+   hook(rq, &signal->fence);
+   i915_request_put(signal);
+   }
i915_sw_fence_complete(cb->fence);
kmem_cache_free(global.slab_execute_cbs, cb);
} else {
@@ -790,7 +818,7 @@ emit_semaphore_wait(struct i915_request *to,
return err;
 
/* Only submit our spinner after the signaler is running! */
-   err = i915_request_await_execution(to, from, gfp);
+   err = __i915_request_await_execution(to, from, NULL, gfp);
if (err)
return err;
 
@@ -910,6 +938,52 @@ i915_request_await_dma_fence(struct i915_request *rq, 
struct dma_fence *fence)
return 0;
 }
 
+int
+i915_request_await_execution(struct i915_request *rq,
+struct dma_fence *fence,
+void (*hook)(struct i915_request *rq,
+ struct dma_fence *signal))
+{
+   struct dma_fence **child = &fence;
+   unsigned int nchild = 1;
+   int ret;
+
+   if (dma_fence_is_array(fence)) {
+   struct dma_fence_array *array = to_dma_fence_array(fence);
+
+   /* XXX Error for signal-on-any fence arrays */
+
+   child = array->fences;
+   nchild = array->num_fences;
+   GEM_BUG_ON(!nchild);
+   }
+
+   do {
+   fence = *child++;
+   if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+   continue;
+
+   /*
+* We don't squash repeated fence dependencies here as we
+* want to run our callback in all cases.
+*/
+
+   if (dma_fence_is_i915(fence))
+   ret = __i915_request_await_execution(rq,
+to_request(fence),
+hook,
+I915_FENCE_GFP);
+   else
+   ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
+   I915_FENCE_TIMEOUT,
+ 

[Intel-gfx] [PATCH 38/39] drm/i915/execlists: Preempt-to-busy

2019-03-13 Thread Chris Wilson
When using a global seqno, we required a precise stop-the-workd event to
handle preemption and unwind the global seqno counter. To accomplish
this, we would preempt to a special out-of-band context and wait for the
machine to report that it was idle. Given an idle machine, we could very
precisely see which requests had completed and which we needed to feed
back into the run queue.

However, now that we have scrapped the global seqno, we no longer need
to precisely unwind the global counter and only track requests by their
per-context seqno. This allows us to loosely unwind inflight requests
while scheduling a preemption, with the enormous caveat that the
requests we put back on the run queue are still _inflight_ (until the
preemption request is complete). This makes request tracking much more
messy, as at any point then we can see a completed request that we
believe is not currently scheduled for execution. We also have to be
careful not to rewind RING_TAIL past RING_HEAD on preempting to the
running context, and for this we use a semaphore to prevent completion
of the request before continuing.

To accomplish this feat, we change how we track requests scheduled to
the HW. Instead of appending our requests onto a single list as we
submit, we track each submission to ELSP as its own block. Then upon
receiving the CS preemption event, we promote the pending block to the
inflight block (discarding what was previously being tracked). As normal
CS completion events arrive, we then remove stale entries from the
inflight tracker.

fixme: stats
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c  |   2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c|  19 +-
 drivers/gpu/drm/i915/i915_request.c  |   6 +
 drivers/gpu/drm/i915/i915_scheduler.c|  15 +-
 drivers/gpu/drm/i915/i915_utils.h|  13 +
 drivers/gpu/drm/i915/intel_context_types.h   |   5 +
 drivers/gpu/drm/i915/intel_engine_cs.c   |  58 +-
 drivers/gpu/drm/i915/intel_engine_types.h|  52 +-
 drivers/gpu/drm/i915/intel_guc_submission.c  | 171 +++---
 drivers/gpu/drm/i915/intel_hangcheck.c   |   2 +
 drivers/gpu/drm/i915/intel_lrc.c | 577 ---
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  62 +-
 drivers/gpu/drm/i915/selftests/intel_lrc.c   |   2 +-
 drivers/gpu/drm/i915/selftests/mock_engine.c |   1 +
 14 files changed, 416 insertions(+), 569 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index e08a9afee7cd..e763e1142ce9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -607,7 +607,7 @@ static void init_contexts(struct drm_i915_private *i915)
 
 static bool needs_preempt_context(struct drm_i915_private *i915)
 {
-   return HAS_LOGICAL_RING_PREEMPTION(i915);
+   return USES_GUC_SUBMISSION(i915);
 }
 
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 6a5d27dc6338..fa9827eed8ef 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1234,10 +1234,10 @@ static void error_record_engine_registers(struct 
i915_gpu_state *error,
}
 }
 
-static void record_request(struct i915_request *request,
+static void record_request(const struct i915_request *request,
   struct drm_i915_error_request *erq)
 {
-   struct i915_gem_context *ctx = request->gem_context;
+   const struct i915_gem_context *ctx = request->gem_context;
 
erq->flags = request->fence.flags;
erq->context = request->fence.context;
@@ -1301,20 +1301,15 @@ static void engine_record_requests(struct 
intel_engine_cs *engine,
ee->num_requests = count;
 }
 
-static void error_record_engine_execlists(struct intel_engine_cs *engine,
+static void error_record_engine_execlists(const struct intel_engine_cs *engine,
  struct drm_i915_error_engine *ee)
 {
const struct intel_engine_execlists * const execlists = 
&engine->execlists;
-   unsigned int n;
+   struct i915_request * const *port = execlists->active;
+   unsigned int n = 0;
 
-   for (n = 0; n < execlists_num_ports(execlists); n++) {
-   struct i915_request *rq = port_request(&execlists->port[n]);
-
-   if (!rq)
-   break;
-
-   record_request(rq, &ee->execlist[n]);
-   }
+   while (*port)
+   record_request(*port++, &ee->execlist[n++]);
 
ee->num_ports = n;
 }
diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 7365a86a448a..c29fb6f7ef4a 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -212,6 +212,12 @@ static bool i915_request_retire(struct i915_request *rq)
GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
   

[Intel-gfx] [PATCH 24/39] drm/i915: Move mmap and friends to its own file

2019-03-13 Thread Chris Wilson
Continuing the decluttering of i915_gem.c, now the turn of do_mmap and
the faulthandlers

Signed-off-by: Chris Wilson 
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/i915/Makefile |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  | 514 
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  56 ++
 drivers/gpu/drm/i915/gem/i915_gem_object.h|   7 +
 .../drm/i915/gem/selftests/i915_gem_mman.c| 507 
 drivers/gpu/drm/i915/i915_drv.h   |   1 -
 drivers/gpu/drm/i915/i915_gem.c   | 570 +-
 drivers/gpu/drm/i915/i915_gem_tiling.c|   2 +-
 .../gpu/drm/i915/selftests/i915_gem_object.c  | 489 ---
 .../drm/i915/selftests/i915_live_selftests.h  |   1 +
 10 files changed, 1101 insertions(+), 1047 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_mman.c
 create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 8e6ef54f2497..c2804efe4e5a 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -72,6 +72,7 @@ i915-$(CONFIG_DRM_I915_WERROR) += \
 # GEM code
 i915-y += \
  gem/i915_gem_object.o \
+ gem/i915_gem_mman.o \
  gem/i915_gem_pages.o \
  gem/i915_gem_phys.o \
  gem/i915_gem_shmem.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
new file mode 100644
index ..92a2b9cd879c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -0,0 +1,514 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include 
+#include 
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+#include "../i915_gem_gtt.h"
+#include "../i915_vma.h"
+#include "../i915_drv.h"
+#include "../intel_drv.h"
+
+static inline bool
+__vma_matches(struct vm_area_struct *vma, struct file *filp,
+ unsigned long addr, unsigned long size)
+{
+   if (vma->vm_file != filp)
+   return false;
+
+   return vma->vm_start == addr &&
+  (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
+}
+
+/**
+ * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
+ *  it is mapped to.
+ * @dev: drm device
+ * @data: ioctl data blob
+ * @file: drm file
+ *
+ * While the mapping holds a reference on the contents of the object, it 
doesn't
+ * imply a ref on the object itself.
+ *
+ * IMPORTANT:
+ *
+ * DRM driver writers who look a this function as an example for how to do GEM
+ * mmap support, please don't implement mmap support like here. The modern way
+ * to implement DRM mmap support is with an mmap offset ioctl (like
+ * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
+ * That way debug tooling like valgrind will understand what's going on, hiding
+ * the mmap call in a driver private ioctl will break that. The i915 driver 
only
+ * does cpu mmaps this way because we didn't know better.
+ */
+int
+i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
+   struct drm_file *file)
+{
+   struct drm_i915_gem_mmap *args = data;
+   struct drm_i915_gem_object *obj;
+   unsigned long addr;
+
+   if (args->flags & ~(I915_MMAP_WC))
+   return -EINVAL;
+
+   if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
+   return -ENODEV;
+
+   obj = i915_gem_object_lookup(file, args->handle);
+   if (!obj)
+   return -ENOENT;
+
+   /* prime objects have no backing filp to GEM mmap
+* pages from.
+*/
+   if (!obj->base.filp) {
+   i915_gem_object_put(obj);
+   return -ENXIO;
+   }
+
+   addr = vm_mmap(obj->base.filp, 0, args->size,
+  PROT_READ | PROT_WRITE, MAP_SHARED,
+  args->offset);
+   if (IS_ERR_VALUE(addr))
+   goto err;
+
+   if (args->flags & I915_MMAP_WC) {
+   struct mm_struct *mm = current->mm;
+   struct vm_area_struct *vma;
+
+   if (down_write_killable(&mm->mmap_sem)) {
+   i915_gem_object_put(obj);
+   return -EINTR;
+   }
+   vma = find_vma(mm, addr);
+   if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
+   vma->vm_page_prot =
+   
pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+   else
+   addr = -ENOMEM;
+   up_write(&mm->mmap_sem);
+   if (IS_ERR_VALUE(addr))
+   goto err;
+
+   /* This may race, but that's ok, it only gets set */
+   WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
+   }
+   i915_gem_object_put(obj);
+
+   args->addr_ptr = (u64)addr;
+

[Intel-gfx] [PATCH 05/39] drm/i915/selftests: Provide stub reset functions

2019-03-13 Thread Chris Wilson
If a test fails, we quite often mark the device as wedged. Provide the
stub functions so that we can wedge the mock device, and avoid exploding
on test failures.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109981
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/selftests/mock_engine.c | 36 
 1 file changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c 
b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 639d36eb904a..61744819172b 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -198,6 +198,37 @@ static void mock_submit_request(struct i915_request 
*request)
spin_unlock_irqrestore(&engine->hw_lock, flags);
 }
 
+static void mock_reset_prepare(struct intel_engine_cs *engine)
+{
+}
+
+static void mock_reset(struct intel_engine_cs *engine, bool stalled)
+{
+   GEM_BUG_ON(stalled);
+}
+
+static void mock_reset_finish(struct intel_engine_cs *engine)
+{
+}
+
+static void mock_cancel_requests(struct intel_engine_cs *engine)
+{
+   struct i915_request *request;
+   unsigned long flags;
+
+   spin_lock_irqsave(&engine->timeline.lock, flags);
+
+   /* Mark all submitted requests as skipped. */
+   list_for_each_entry(request, &engine->timeline.requests, sched.link) {
+   if (!i915_request_signaled(request))
+   dma_fence_set_error(&request->fence, -EIO);
+
+   i915_request_mark_complete(request);
+   }
+
+   spin_unlock_irqrestore(&engine->timeline.lock, flags);
+}
+
 struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
const char *name,
int id)
@@ -223,6 +254,11 @@ struct intel_engine_cs *mock_engine(struct 
drm_i915_private *i915,
engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb;
engine->base.submit_request = mock_submit_request;
 
+   engine->base.reset.prepare = mock_reset_prepare;
+   engine->base.reset.reset = mock_reset;
+   engine->base.reset.finish = mock_reset_finish;
+   engine->base.cancel_requests = mock_cancel_requests;
+
if (i915_timeline_init(i915,
   &engine->base.timeline,
   engine->base.name,
-- 
2.20.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 30/39] drm/i915: Move GEM object busy checking to its own file

2019-03-13 Thread Chris Wilson
Continuing the decluttering of i915_gem.c by moving the object busy
checking into its own file.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/Makefile|   1 +
 drivers/gpu/drm/i915/gem/i915_gem_busy.c | 137 +++
 drivers/gpu/drm/i915/i915_gem.c  | 127 -
 3 files changed, 138 insertions(+), 127 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_busy.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 2e78fbe79280..50bc8f344000 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -75,6 +75,7 @@ i915-$(CONFIG_DRM_I915_WERROR) += \
 
 # GEM code
 i915-y += \
+ gem/i915_gem_busy.o \
  gem/i915_gem_clflush.o \
  gem/i915_gem_context.o \
  gem/i915_gem_dmabuf.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c 
b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
new file mode 100644
index ..c06ba7bdf7b0
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
@@ -0,0 +1,137 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+#include "../intel_ringbuffer.h"
+
+static __always_inline unsigned int __busy_read_flag(unsigned int id)
+{
+   if (id == I915_ENGINE_CLASS_INVALID)
+   return 0x;
+
+   GEM_BUG_ON(id >= 16);
+   return 0x1 << id;
+}
+
+static __always_inline unsigned int __busy_write_id(unsigned int id)
+{
+   /*
+* The uABI guarantees an active writer is also amongst the read
+* engines. This would be true if we accessed the activity tracking
+* under the lock, but as we perform the lookup of the object and
+* its activity locklessly we can not guarantee that the last_write
+* being active implies that we have set the same engine flag from
+* last_read - hence we always set both read and write busy for
+* last_write.
+*/
+   if (id == I915_ENGINE_CLASS_INVALID)
+   return 0x;
+
+   return (id + 1) | __busy_read_flag(id);
+}
+
+static __always_inline unsigned int
+__busy_set_if_active(const struct dma_fence *fence,
+unsigned int (*flag)(unsigned int id))
+{
+   const struct i915_request *rq;
+
+   /*
+* We have to check the current hw status of the fence as the uABI
+* guarantees forward progress. We could rely on the idle worker
+* to eventually flush us, but to minimise latency just ask the
+* hardware.
+*
+* Note we only report on the status of native fences.
+*/
+   if (!dma_fence_is_i915(fence))
+   return 0;
+
+   /* opencode to_request() in order to avoid const warnings */
+   rq = container_of(fence, const struct i915_request, fence);
+   if (i915_request_completed(rq))
+   return 0;
+
+   return flag(rq->engine->uabi_class);
+}
+
+static __always_inline unsigned int
+busy_check_reader(const struct dma_fence *fence)
+{
+   return __busy_set_if_active(fence, __busy_read_flag);
+}
+
+static __always_inline unsigned int
+busy_check_writer(const struct dma_fence *fence)
+{
+   if (!fence)
+   return 0;
+
+   return __busy_set_if_active(fence, __busy_write_id);
+}
+
+int
+i915_gem_busy_ioctl(struct drm_device *dev, void *data,
+   struct drm_file *file)
+{
+   struct drm_i915_gem_busy *args = data;
+   struct drm_i915_gem_object *obj;
+   struct reservation_object_list *list;
+   unsigned int seq;
+   int err;
+
+   err = -ENOENT;
+   rcu_read_lock();
+   obj = i915_gem_object_lookup_rcu(file, args->handle);
+   if (!obj)
+   goto out;
+
+   /*
+* A discrepancy here is that we do not report the status of
+* non-i915 fences, i.e. even though we may report the object as idle,
+* a call to set-domain may still stall waiting for foreign rendering.
+* This also means that wait-ioctl may report an object as busy,
+* where busy-ioctl considers it idle.
+*
+* We trade the ability to warn of foreign fences to report on which
+* i915 engines are active for the object.
+*
+* Alternatively, we can trade that extra information on read/write
+* activity with
+*  args->busy =
+*  !reservation_object_test_signaled_rcu(obj->resv, true);
+* to report the overall busyness. This is what the wait-ioctl does.
+*
+*/
+retry:
+   seq = raw_read_seqcount(&obj->resv->seq);
+
+   /* Translate the exclusive fence to the READ *and* WRITE engine */
+   args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
+
+   /* Translate shared fences to READ set of engines */
+   list = rcu_dereference(obj->resv->fence);
+   if 

[Intel-gfx] [PATCH 16/39] drm/i915/execlists: Virtual engine bonding

2019-03-13 Thread Chris Wilson
Some users require that when a master batch is executed on one particular
engine, a companion batch is run simultaneously on a specific slave
engine. For this purpose, we introduce virtual engine bonding, allowing
maps of master:slaves to be constructed to constrain which physical
engines a virtual engine may select given a fence on a master engine.

For the moment, we continue to ignore the issue of preemption deferring
the master request for later. Ideally, we would like to then also remove
the slave and run something else rather than have it stall the pipeline.
With load balancing, we should be able to move workload around it, but
there is a similar stall on the master pipeline while it may wait for
the slave to be executed. At the cost of more latency for the bonded
request, it may be interesting to launch both on their engines in
lockstep. (Bubbles abound.)

Opens: Also what about bonding an engine as its own master? It doesn't
break anything internally, so allow the silliness.

v2: Emancipate the bonds
v3: Couple in delayed scheduling for the selftests
v4: Handle invalid mutually exclusive bonding
v5: Mention what the uapi does

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_context.c   |  50 +
 drivers/gpu/drm/i915/i915_request.c   |   1 +
 drivers/gpu/drm/i915/i915_request.h   |   1 +
 drivers/gpu/drm/i915/intel_engine_types.h |   7 +
 drivers/gpu/drm/i915/intel_lrc.c  | 143 ++
 drivers/gpu/drm/i915/intel_lrc.h  |   4 +
 drivers/gpu/drm/i915/selftests/intel_lrc.c| 185 ++
 drivers/gpu/drm/i915/selftests/lib_sw_fence.c |   3 +
 include/uapi/drm/i915_drm.h   |  33 
 9 files changed, 427 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 98763d3f1b12..0ec78c386473 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1513,8 +1513,58 @@ set_engines__load_balance(struct i915_user_extension 
__user *base, void *data)
return 0;
 }
 
+static int
+set_engines__bond(struct i915_user_extension __user *base, void *data)
+{
+   struct i915_context_engines_bond __user *ext =
+   container_of_user(base, typeof(*ext), base);
+   const struct set_engines *set = data;
+   struct intel_engine_cs *master;
+   u32 class, instance, siblings;
+   u16 idx;
+   int err;
+
+   if (get_user(idx, &ext->virtual_index))
+   return -EFAULT;
+
+   if (idx >= set->nengine)
+   return -EINVAL;
+
+   idx = array_index_nospec(idx, set->nengine);
+   if (!set->engines[idx])
+   return -EINVAL;
+
+   /*
+* A non-virtual engine has 0 siblings to choose between; and submit
+* fence will always be directed to the one engine.
+*/
+   if (!intel_engine_is_virtual(set->engines[idx]))
+   return 0;
+
+   err = check_user_mbz(&ext->mbz);
+   if (err)
+   return err;
+
+   if (get_user(class, &ext->master_class))
+   return -EFAULT;
+
+   if (get_user(instance, &ext->master_instance))
+   return -EFAULT;
+
+   master = intel_engine_lookup_user(set->ctx->i915, class, instance);
+   if (!master)
+   return -EINVAL;
+
+   if (get_user(siblings, &ext->sibling_mask))
+   return -EFAULT;
+
+   return intel_virtual_engine_attach_bond(set->engines[idx],
+   master, siblings);
+}
+
 static const i915_user_extension_fn set_engines__extensions[] = {
[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
+   [I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond,
 };
 
 static int
diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 0a46f8113f5c..9ce710baa452 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -743,6 +743,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct 
i915_gem_context *ctx)
rq->batch = NULL;
rq->capture_list = NULL;
rq->waitboost = false;
+   rq->execution_mask = ~0u;
 
/*
 * Reserve space in the ring buffer for all the commands required to
diff --git a/drivers/gpu/drm/i915/i915_request.h 
b/drivers/gpu/drm/i915/i915_request.h
index d4f6b2940130..862b25930de0 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -145,6 +145,7 @@ struct i915_request {
 */
struct i915_sched_node sched;
struct i915_dependency dep;
+   unsigned int execution_mask;
 
/*
 * A convenience pointer to the current breadcrumb value stored in
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h 
b/drivers/gpu/drm/i915/intel_engine_types.h
index 322fbda65190..1da35509d811 100644
--- a/drivers/gpu/drm/i915/intel_engine_t

[Intel-gfx] [PATCH 35/39] drm/i915: Keep contexts pinned until after the next kernel context switch

2019-03-13 Thread Chris Wilson
We need to keep the context image pinned in memory until after the GPU
has finished writing into it. Since it continues to write as we signal
the final breadcrumb, we need to keep it pinned until the request after
it is complete. Currently we know the order in which requests execute on
each engine, and so to remove that presumption we need to identify a
request/context-switch we know must occur after our completion. Any
request queued after the signal must imply a context switch, for
simplicity we use a fresh request from the kernel context.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 35 +++-
 drivers/gpu/drm/i915/gem/i915_gem_context.h   |  1 -
 drivers/gpu/drm/i915/i915_active.c| 72 +++
 drivers/gpu/drm/i915/i915_active.h|  4 +
 drivers/gpu/drm/i915/i915_drv.h   |  1 +
 drivers/gpu/drm/i915/i915_gem.c   | 47 --
 drivers/gpu/drm/i915/i915_gem_evict.c | 25 +++---
 drivers/gpu/drm/i915/i915_request.c   | 15 
 drivers/gpu/drm/i915/i915_reset.c |  5 +-
 drivers/gpu/drm/i915/intel_context.c  | 88 +--
 drivers/gpu/drm/i915/intel_context.h  |  3 +
 drivers/gpu/drm/i915/intel_context_types.h|  6 +-
 drivers/gpu/drm/i915/intel_engine_cs.c| 23 +
 drivers/gpu/drm/i915/intel_engine_types.h | 13 +--
 drivers/gpu/drm/i915/intel_lrc.c  | 64 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c   | 56 +---
 drivers/gpu/drm/i915/intel_ringbuffer.h   |  2 -
 drivers/gpu/drm/i915/selftests/mock_engine.c  | 11 +--
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  8 +-
 19 files changed, 254 insertions(+), 225 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 7b7aea2d1877..d9bc05ba3902 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -653,17 +653,6 @@ int i915_gem_contexts_init(struct drm_i915_private 
*dev_priv)
return 0;
 }
 
-void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
-{
-   struct intel_engine_cs *engine;
-   enum intel_engine_id id;
-
-   lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-   for_each_engine(engine, dev_priv, id)
-   intel_engine_lost_context(engine);
-}
-
 void i915_gem_contexts_fini(struct drm_i915_private *i915)
 {
lockdep_assert_held(&i915->drm.struct_mutex);
@@ -952,6 +941,7 @@ int i915_gem_switch_to_kernel_context(struct 
drm_i915_private *i915,
 I915_FENCE_GFP);
}
 
+   i915_request_add_barriers(rq);
i915_request_add(rq);
}
 
@@ -1189,15 +1179,6 @@ gen8_modify_rpcs(struct intel_context *ce, struct 
intel_sseu sseu)
goto out_add;
}
 
-   /* Order all following requests to be after. */
-   ret = i915_timeline_set_barrier(ce->ring->timeline, rq);
-   if (ret)
-   goto out_add;
-
-   ret = gen8_emit_rpcs_config(rq, ce, sseu);
-   if (ret)
-   goto out_add;
-
/*
 * Guarantee context image and the timeline remains pinned until the
 * modifying request is retired by setting the ce activity tracker.
@@ -1205,9 +1186,17 @@ gen8_modify_rpcs(struct intel_context *ce, struct 
intel_sseu sseu)
 * But we only need to take one pin on the account of it. Or in other
 * words transfer the pinned ce object to tracked active request.
 */
-   if (!i915_active_request_isset(&ce->active_tracker))
-   __intel_context_pin(ce);
-   __i915_active_request_set(&ce->active_tracker, rq);
+   GEM_BUG_ON(i915_active_is_idle(&ce->active));
+   ret = i915_active_ref(&ce->active, rq->fence.context, rq);
+   if (ret)
+   goto out_add;
+
+   /* Order all following requests to be after. */
+   ret = i915_timeline_set_barrier(ce->ring->timeline, rq);
+   if (ret)
+   goto out_add;
+
+   ret = gen8_emit_rpcs_config(rq, ce, sseu);
 
 out_add:
i915_request_add(rq);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 107c713073cb..0e813faca451 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -120,7 +120,6 @@ static inline bool i915_gem_context_is_kernel(struct 
i915_gem_context *ctx)
 
 /* i915_gem_context.c */
 int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv);
-void i915_gem_contexts_lost(struct drm_i915_private *dev_priv);
 void i915_gem_contexts_fini(struct drm_i915_private *dev_priv);
 
 int i915_gem_context_open(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/i915_active.c 
b/drivers/gpu/drm/i915/i915_active.c
index 863ae12707ba..e1a8758fd5d2 100644
--- 

[Intel-gfx] [PATCH 22/39] drm/i915: Move shmem object setup to its own file

2019-03-13 Thread Chris Wilson
Split the plain old shmem object into its own file to start decluttering
i915_gem.c

v2: Lose the confusing, hysterical raisins, suffix of _gtt.

Signed-off-by: Chris Wilson 
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/i915/Makefile |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 298 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  39 +
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 494 +++
 drivers/gpu/drm/i915/gvt/cmd_parser.c |   8 +-
 drivers/gpu/drm/i915/i915_drv.h   |  10 -
 drivers/gpu/drm/i915/i915_gem.c   | 811 +-
 drivers/gpu/drm/i915/i915_perf.c  |   2 +-
 drivers/gpu/drm/i915/intel_fbdev.c|   2 +-
 drivers/gpu/drm/i915/intel_guc.c  |   2 +-
 drivers/gpu/drm/i915/intel_lrc.c  |   4 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c   |   2 +-
 drivers/gpu/drm/i915/intel_uc_fw.c|   3 +-
 drivers/gpu/drm/i915/selftests/huge_pages.c   |   6 +-
 .../gpu/drm/i915/selftests/i915_gem_dmabuf.c  |   8 +-
 .../gpu/drm/i915/selftests/i915_gem_object.c  |   4 +-
 16 files changed, 859 insertions(+), 835 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_shmem.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index d270f01e1091..54418ce5faac 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -72,6 +72,7 @@ i915-$(CONFIG_DRM_I915_WERROR) += \
 # GEM code
 i915-y += \
  gem/i915_gem_object.o \
+ gem/i915_gem_shmem.o \
  i915_active.o \
  i915_cmd_parser.o \
  i915_gem_batch_pool.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 8179252bb39b..05efce885961 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -26,6 +26,7 @@
 
 #include "../i915_drv.h"
 #include "../i915_globals.h"
+#include "../intel_frontbuffer.h"
 
 static struct i915_global_object {
struct i915_global base;
@@ -42,6 +43,64 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj)
return kmem_cache_free(global.slab_objects, obj);
 }
 
+/* some bookkeeping */
+static void i915_gem_info_add_obj(struct drm_i915_private *i915,
+ u64 size)
+{
+   spin_lock(&i915->mm.object_stat_lock);
+   i915->mm.object_count++;
+   i915->mm.object_memory += size;
+   spin_unlock(&i915->mm.object_stat_lock);
+}
+
+static void i915_gem_info_remove_obj(struct drm_i915_private *i915,
+u64 size)
+{
+   spin_lock(&i915->mm.object_stat_lock);
+   i915->mm.object_count--;
+   i915->mm.object_memory -= size;
+   spin_unlock(&i915->mm.object_stat_lock);
+}
+
+static void
+frontbuffer_retire(struct i915_active_request *active,
+  struct i915_request *request)
+{
+   struct drm_i915_gem_object *obj =
+   container_of(active, typeof(*obj), frontbuffer_write);
+
+   intel_fb_obj_flush(obj, ORIGIN_CS);
+}
+
+void i915_gem_object_init(struct drm_i915_gem_object *obj,
+ const struct drm_i915_gem_object_ops *ops)
+{
+   mutex_init(&obj->mm.lock);
+
+   spin_lock_init(&obj->vma.lock);
+   INIT_LIST_HEAD(&obj->vma.list);
+
+   INIT_LIST_HEAD(&obj->lut_list);
+   INIT_LIST_HEAD(&obj->batch_pool_link);
+
+   init_rcu_head(&obj->rcu);
+
+   obj->ops = ops;
+
+   reservation_object_init(&obj->__builtin_resv);
+   obj->resv = &obj->__builtin_resv;
+
+   obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
+   i915_active_request_init(&obj->frontbuffer_write,
+NULL, frontbuffer_retire);
+
+   obj->mm.madv = I915_MADV_WILLNEED;
+   INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
+   mutex_init(&obj->mm.get_page.lock);
+
+   i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
+}
+
 /**
  * Mark up the object's coherency levels for a given cache_level
  * @obj: #drm_i915_gem_object
@@ -64,6 +123,245 @@ void i915_gem_object_set_cache_coherency(struct 
drm_i915_gem_object *obj,
!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
 }
 
+void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
+{
+   struct drm_i915_private *i915 = to_i915(gem->dev);
+   struct drm_i915_gem_object *obj = to_intel_bo(gem);
+   struct drm_i915_file_private *fpriv = file->driver_priv;
+   struct i915_lut_handle *lut, *ln;
+
+   mutex_lock(&i915->drm.struct_mutex);
+
+   list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
+   struct i915_gem_context *ctx = lut->ctx;
+   struct i915_vma *vma;
+
+   GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
+   if (ctx->file_priv != fpriv)
+   continue;
+
+   v

[Intel-gfx] [PATCH 28/39] drm/i915: Move GEM object domain management from struct_mutex to local

2019-03-13 Thread Chris Wilson
Use the per-object local lock to control the cache domain of the
individual GEM objects, not struct_mutex. This is a huge leap forward
for us in terms of object-level synchronisation; execbuffers are
coordinated using the ww_mutex and pread/pwrite is finally fully
serialised again.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/Makefile |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_clflush.c   |   4 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c|  10 +-
 drivers/gpu/drm/i915/gem/i915_gem_domain.c|  70 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 125 +++--
 drivers/gpu/drm/i915/gem/i915_gem_fence.c |  99 ++
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |   2 +
 drivers/gpu/drm/i915/gem/i915_gem_object.c|   2 +
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  14 ++
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  14 +-
 .../i915/gem/selftests/i915_gem_coherency.c   |  12 ++
 .../drm/i915/gem/selftests/i915_gem_context.c |  26 
 .../drm/i915/gem/selftests/i915_gem_mman.c|   6 +
 .../drm/i915/gem/selftests/i915_gem_phys.c|   4 +-
 drivers/gpu/drm/i915/gvt/cmd_parser.c |   2 +
 drivers/gpu/drm/i915/gvt/scheduler.c  |   8 +-
 drivers/gpu/drm/i915/i915_cmd_parser.c|  23 ++--
 drivers/gpu/drm/i915/i915_gem.c   | 128 ++
 drivers/gpu/drm/i915/i915_gem_gtt.c   |   5 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c  |   2 +
 drivers/gpu/drm/i915/i915_perf.c  |   4 +-
 drivers/gpu/drm/i915/i915_vma.c   |   8 +-
 drivers/gpu/drm/i915/i915_vma.h   |  12 ++
 drivers/gpu/drm/i915/intel_display.c  |   5 +
 drivers/gpu/drm/i915/intel_engine_cs.c|   4 +-
 drivers/gpu/drm/i915/intel_guc_log.c  |   6 +-
 drivers/gpu/drm/i915/intel_lrc.c  |   4 +
 drivers/gpu/drm/i915/intel_overlay.c  |  25 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c   |  10 +-
 drivers/gpu/drm/i915/intel_uc_fw.c|   2 +
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |   4 +-
 drivers/gpu/drm/i915/selftests/i915_request.c |   8 ++
 drivers/gpu/drm/i915/selftests/igt_spinner.c  |   4 +-
 .../gpu/drm/i915/selftests/intel_hangcheck.c  |   6 +-
 drivers/gpu/drm/i915/selftests/intel_lrc.c|   4 +
 .../drm/i915/selftests/intel_workarounds.c|   8 ++
 36 files changed, 482 insertions(+), 189 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_fence.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 84dfdf90e0e7..89fb4eaca4fb 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -80,6 +80,7 @@ i915-y += \
  gem/i915_gem_dmabuf.o \
  gem/i915_gem_domain.o \
  gem/i915_gem_execbuffer.o \
+ gem/i915_gem_fence.o \
  gem/i915_gem_internal.o \
  gem/i915_gem_object.o \
  gem/i915_gem_mman.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c 
b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index 093bfff55a96..efab47250588 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -96,6 +96,8 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 {
struct clflush *clflush;
 
+   assert_object_held(obj);
+
/*
 * Stolen memory is always coherent with the GPU as it is explicitly
 * marked as wc by the system, or the system is cache-coherent.
@@ -145,9 +147,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object 
*obj,
true, I915_FENCE_TIMEOUT,
I915_FENCE_GFP);
 
-   reservation_object_lock(obj->resv, NULL);
reservation_object_add_excl_fence(obj->resv, &clflush->dma);
-   reservation_object_unlock(obj->resv);
 
i915_sw_fence_commit(&clflush->wait);
} else if (obj->mm.pages) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index b5d86cfadd46..1585e54ef26b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -151,7 +151,6 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, 
struct vm_area_struct *
 static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum 
dma_data_direction direction)
 {
struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
-   struct drm_device *dev = obj->base.dev;
bool write = (direction == DMA_BIDIRECTIONAL || direction == 
DMA_TO_DEVICE);
int err;
 
@@ -159,12 +158,12 @@ static int i915_gem_begin_cpu_access(struct dma_buf 
*dma_buf, enum dma_data_dire
if (err)
return err;
 
-   err = i915_mutex_lock_interruptible(dev);
+   err = i915_gem_object_lock_interruptible(obj);
if (err)
goto out;
 
err = i9

[Intel-gfx] [PATCH 21/39] drm/i915: Move object->pages API to i915_gem_object.[ch]

2019-03-13 Thread Chris Wilson
Currently the code for manipulating the pages on an object is still
residing in i915_gem.c, move it to i915_gem_object.c

Signed-off-by: Chris Wilson 
Cc: Joonas Lahtinen 
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/i915/Makefile |   3 +-
 .../gpu/drm/i915/{ => gem}/i915_gem_object.c  |   5 +-
 .../gpu/drm/i915/{ => gem}/i915_gem_object.h  | 119 +++-
 .../gem/test_i915_gem_object_standalone.c |   7 +
 drivers/gpu/drm/i915/i915_drv.h   | 129 +-
 drivers/gpu/drm/i915/i915_globals.c   |   2 +-
 drivers/gpu/drm/i915/i915_vma.h   |   2 +-
 7 files changed, 138 insertions(+), 129 deletions(-)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_object.c (97%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_object.h (60%)
 create mode 100644 drivers/gpu/drm/i915/gem/test_i915_gem_object_standalone.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 2f84fac02578..d270f01e1091 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -60,6 +60,7 @@ i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 # Test the headers are compilable as standalone units
 i915-$(CONFIG_DRM_I915_WERROR) += \
gem/test_i915_gem_ioctls_standalone.o \
+   gem/test_i915_gem_object_standalone.o \
gem/test_i915_gem_object_types_standalone.o \
test_i915_active_types_standalone.o \
test_i915_gem_context_types_standalone.o \
@@ -70,6 +71,7 @@ i915-$(CONFIG_DRM_I915_WERROR) += \
 
 # GEM code
 i915-y += \
+ gem/i915_gem_object.o \
  i915_active.o \
  i915_cmd_parser.o \
  i915_gem_batch_pool.o \
@@ -82,7 +84,6 @@ i915-y += \
  i915_gem_gtt.o \
  i915_gem_internal.o \
  i915_gem.o \
- i915_gem_object.o \
  i915_gem_render_state.o \
  i915_gem_shrinker.o \
  i915_gem_stolen.o \
diff --git a/drivers/gpu/drm/i915/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
similarity index 97%
rename from drivers/gpu/drm/i915/i915_gem_object.c
rename to drivers/gpu/drm/i915/gem/i915_gem_object.c
index ac6a5ab84586..8179252bb39b 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -22,9 +22,10 @@
  *
  */
 
-#include "i915_drv.h"
 #include "i915_gem_object.h"
-#include "i915_globals.h"
+
+#include "../i915_drv.h"
+#include "../i915_globals.h"
 
 static struct i915_global_object {
struct i915_global base;
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
similarity index 60%
rename from drivers/gpu/drm/i915/i915_gem_object.h
rename to drivers/gpu/drm/i915/gem/i915_gem_object.h
index 509210b1945b..80d866de34a8 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -13,7 +13,7 @@
 
 #include 
 
-#include "gem/i915_gem_object_types.h"
+#include "i915_gem_object_types.h"
 
 struct drm_i915_gem_object *i915_gem_object_alloc(void);
 void i915_gem_object_free(struct drm_i915_gem_object *obj);
@@ -192,6 +192,123 @@ i915_gem_object_get_tile_row_size(const struct 
drm_i915_gem_object *obj)
 int i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
   unsigned int tiling, unsigned int stride);
 
+struct scatterlist *
+i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+  unsigned int n, unsigned int *offset);
+
+struct page *
+i915_gem_object_get_page(struct drm_i915_gem_object *obj,
+unsigned int n);
+
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+  unsigned int n);
+
+dma_addr_t
+i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
+   unsigned long n);
+
+void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
+struct sg_table *pages,
+unsigned int sg_page_sizes);
+int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
+
+static inline int __must_check
+i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
+{
+   might_lock(&obj->mm.lock);
+
+   if (atomic_inc_not_zero(&obj->mm.pages_pin_count))
+   return 0;
+
+   return __i915_gem_object_get_pages(obj);
+}
+
+static inline bool
+i915_gem_object_has_pages(struct drm_i915_gem_object *obj)
+{
+   return !IS_ERR_OR_NULL(READ_ONCE(obj->mm.pages));
+}
+
+static inline void
+__i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
+{
+   GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+
+   atomic_inc(&obj->mm.pages_pin_count);
+}
+
+static inline bool
+i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj)
+{
+   return atomic_read(&obj->mm.pages_pin_count);
+}
+
+static inline void
+__i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
+{
+   GEM_BUG_ON(!i915_gem_object_has_pag

[Intel-gfx] [PATCH 31/39] drm/i915: Move GEM client throttling to its own file

2019-03-13 Thread Chris Wilson
Continuing the decluttering of i915_gem.c by moving the client self
throttling into its own file.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/Makefile|  1 +
 drivers/gpu/drm/i915/gem/i915_gem_throttle.c | 74 
 drivers/gpu/drm/i915/i915_drv.h  |  6 --
 drivers/gpu/drm/i915/i915_gem.c  | 58 ---
 4 files changed, 75 insertions(+), 64 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_throttle.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 50bc8f344000..a783e5b24777 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -90,6 +90,7 @@ i915-y += \
  gem/i915_gem_shmem.o \
  gem/i915_gem_shrinker.o \
  gem/i915_gem_stolen.o \
+ gem/i915_gem_throttle.o \
  gem/i915_gem_tiling.o \
  gem/i915_gem_userptr.o \
  gem/i915_gem_wait.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c 
b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
new file mode 100644
index ..491bc28c175d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
@@ -0,0 +1,74 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include 
+
+#include 
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+
+/*
+ * 20ms is a fairly arbitrary limit (greater than the average frame time)
+ * chosen to prevent the CPU getting more than a frame ahead of the GPU
+ * (when using lax throttling for the frontbuffer). We also use it to
+ * offer free GPU waitboosts for severely congested workloads.
+ */
+#define DRM_I915_THROTTLE_JIFFIES msecs_to_jiffies(20)
+
+/*
+ * Throttle our rendering by waiting until the ring has completed our requests
+ * emitted over 20 msec ago.
+ *
+ * Note that if we were to use the current jiffies each time around the loop,
+ * we wouldn't escape the function with any frames outstanding if the time to
+ * render a frame was over 20ms.
+ *
+ * This should get us reasonable parallelism between CPU and GPU but also
+ * relatively low latency when blocking on a particular request to finish.
+ */
+int
+i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
+   struct drm_file *file)
+{
+   struct drm_i915_file_private *file_priv = file->driver_priv;
+   unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
+   struct i915_request *request, *target = NULL;
+   long ret;
+
+   /* ABI: return -EIO if already wedged */
+   ret = i915_terminally_wedged(to_i915(dev));
+   if (ret)
+   return ret;
+
+   spin_lock(&file_priv->mm.lock);
+   list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
+   if (time_after_eq(request->emitted_jiffies, recent_enough))
+   break;
+
+   if (target) {
+   list_del(&target->client_link);
+   target->file_priv = NULL;
+   }
+
+   target = request;
+   }
+   if (target)
+   i915_request_get(target);
+   spin_unlock(&file_priv->mm.lock);
+
+   if (!target)
+   return 0;
+
+   ret = i915_request_wait(target,
+   I915_WAIT_INTERRUPTIBLE,
+   MAX_SCHEDULE_TIMEOUT);
+   i915_request_put(target);
+
+   return ret < 0 ? ret : 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index efb96b4a5507..2b9d92a1672f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -210,12 +210,6 @@ struct drm_i915_file_private {
struct {
spinlock_t lock;
struct list_head request_list;
-/* 20ms is a fairly arbitrary limit (greater than the average frame time)
- * chosen to prevent the CPU getting more than a frame ahead of the GPU
- * (when using lax throttling for the frontbuffer). We also use it to
- * offer free GPU waitboosts for severely congested workloads.
- */
-#define DRM_I915_THROTTLE_JIFFIES msecs_to_jiffies(20)
} mm;
struct idr context_idr;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1f641a44e9aa..de6db95e812a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1211,57 +1211,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
return 0;
 }
 
-/* Throttle our rendering by waiting until the ring has completed our requests
- * emitted over 20 msec ago.
- *
- * Note that if we were to use the current jiffies each time around the loop,
- * we wouldn't escape the function with any frames outstanding if the time to
- * render a frame was over 20ms.
- *
- * This should get us reasonable parallelism between CPU and GPU but also
- * relatively low latency when blocking on a particular request

[Intel-gfx] [PATCH 39/39] drm/i915: Remove logical HW ID

2019-03-13 Thread Chris Wilson
We only need to keep a unique tag for the active lifetime of the
context, and for as long as we need to identify that context. The HW
uses the tag to determine if it should use a lite-restore (why not the
LRCA?) and passes the tag back for various status identifies. The only
status we need to track is for OA, so when using perf, we assign the
specific context a unique tag.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 150 --
 drivers/gpu/drm/i915/gem/i915_gem_context.h   |  15 --
 .../gpu/drm/i915/gem/i915_gem_context_types.h |  18 ---
 .../drm/i915/gem/selftests/i915_gem_context.c |  13 +-
 .../gpu/drm/i915/gem/selftests/mock_context.c |  10 --
 drivers/gpu/drm/i915/i915_debugfs.c   |   3 -
 drivers/gpu/drm/i915/i915_drv.h   |  11 --
 drivers/gpu/drm/i915/i915_gpu_error.c |  10 +-
 drivers/gpu/drm/i915/i915_gpu_error.h |   1 -
 drivers/gpu/drm/i915/i915_perf.c  |  58 ++-
 drivers/gpu/drm/i915/i915_trace.h |  38 ++---
 drivers/gpu/drm/i915/intel_context_types.h|   1 +
 drivers/gpu/drm/i915/intel_engine_types.h |   3 +
 drivers/gpu/drm/i915/intel_lrc.c  |  29 ++--
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |   4 +-
 drivers/gpu/drm/i915/selftests/i915_vma.c |   2 +-
 16 files changed, 53 insertions(+), 313 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index e763e1142ce9..7d528405fec7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -127,95 +127,6 @@ static void lut_close(struct i915_gem_context *ctx)
rcu_read_unlock();
 }
 
-static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)
-{
-   unsigned int max;
-
-   lockdep_assert_held(&i915->contexts.mutex);
-
-   if (INTEL_GEN(i915) >= 11)
-   max = GEN11_MAX_CONTEXT_HW_ID;
-   else if (USES_GUC_SUBMISSION(i915))
-   /*
-* When using GuC in proxy submission, GuC consumes the
-* highest bit in the context id to indicate proxy submission.
-*/
-   max = MAX_GUC_CONTEXT_HW_ID;
-   else
-   max = MAX_CONTEXT_HW_ID;
-
-   return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp);
-}
-
-static int steal_hw_id(struct drm_i915_private *i915)
-{
-   struct i915_gem_context *ctx, *cn;
-   LIST_HEAD(pinned);
-   int id = -ENOSPC;
-
-   lockdep_assert_held(&i915->contexts.mutex);
-
-   list_for_each_entry_safe(ctx, cn,
-&i915->contexts.hw_id_list, hw_id_link) {
-   if (atomic_read(&ctx->hw_id_pin_count)) {
-   list_move_tail(&ctx->hw_id_link, &pinned);
-   continue;
-   }
-
-   GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */
-   list_del_init(&ctx->hw_id_link);
-   id = ctx->hw_id;
-   break;
-   }
-
-   /*
-* Remember how far we got up on the last repossesion scan, so the
-* list is kept in a "least recently scanned" order.
-*/
-   list_splice_tail(&pinned, &i915->contexts.hw_id_list);
-   return id;
-}
-
-static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out)
-{
-   int ret;
-
-   lockdep_assert_held(&i915->contexts.mutex);
-
-   /*
-* We prefer to steal/stall ourselves and our users over that of the
-* entire system. That may be a little unfair to our users, and
-* even hurt high priority clients. The choice is whether to oomkill
-* something else, or steal a context id.
-*/
-   ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
-   if (unlikely(ret < 0)) {
-   ret = steal_hw_id(i915);
-   if (ret < 0) /* once again for the correct errno code */
-   ret = new_hw_id(i915, GFP_KERNEL);
-   if (ret < 0)
-   return ret;
-   }
-
-   *out = ret;
-   return 0;
-}
-
-static void release_hw_id(struct i915_gem_context *ctx)
-{
-   struct drm_i915_private *i915 = ctx->i915;
-
-   if (list_empty(&ctx->hw_id_link))
-   return;
-
-   mutex_lock(&i915->contexts.mutex);
-   if (!list_empty(&ctx->hw_id_link)) {
-   ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id);
-   list_del_init(&ctx->hw_id_link);
-   }
-   mutex_unlock(&i915->contexts.mutex);
-}
-
 static void free_engines(struct intel_engine_cs **engines, int count)
 {
int i;
@@ -238,7 +149,6 @@ static void i915_gem_context_free(struct i915_gem_context 
*ctx)
GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
GEM_BUG_ON(!list_empty(&ctx->active_engines));
 
-   release_hw_id(ctx);
i915_ppgtt_put(ctx->ppgtt);
free_engines(c

[Intel-gfx] [PATCH 34/39] drm/i915: Rename intel_context.active to .inflight

2019-03-13 Thread Chris Wilson
Rename the engine this HW context is currently active upon (that we are
flying upon) to disambiguate between the mixture of different active
terms (and prevent conflict in future patches).

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_context_types.h |  2 +-
 drivers/gpu/drm/i915/intel_lrc.c   | 22 +++---
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_context_types.h 
b/drivers/gpu/drm/i915/intel_context_types.h
index 624729a35875..f86f4ae8724c 100644
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -42,7 +42,7 @@ struct intel_context {
 
struct i915_gem_context *gem_context;
struct intel_engine_cs *engine;
-   struct intel_engine_cs *active;
+   struct intel_engine_cs *inflight;
 
struct list_head active_link;
struct list_head signal_link;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b9417d031743..f4d98f8530a5 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -457,7 +457,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
__i915_request_unsubmit(rq);
unwind_wa_tail(rq);
 
-   GEM_BUG_ON(rq->hw_context->active);
+   GEM_BUG_ON(rq->hw_context->inflight);
 
/*
 * Push the request back into the queue for later resubmission.
@@ -553,17 +553,17 @@ execlists_user_end(struct intel_engine_execlists 
*execlists)
 static inline void
 execlists_context_schedule_in(struct i915_request *rq)
 {
-   GEM_BUG_ON(rq->hw_context->active);
+   GEM_BUG_ON(rq->hw_context->inflight);
 
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
intel_engine_context_in(rq->engine);
-   rq->hw_context->active = rq->engine;
+   rq->hw_context->inflight = rq->engine;
 }
 
 static inline void
 execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
 {
-   rq->hw_context->active = NULL;
+   rq->hw_context->inflight = NULL;
intel_engine_context_out(rq->engine);
execlists_context_status_change(rq, status);
trace_i915_request_out(rq);
@@ -824,7 +824,7 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
struct virtual_engine *ve =
rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
struct i915_request *rq = READ_ONCE(ve->request);
-   struct intel_engine_cs *active;
+   struct intel_engine_cs *inflight;
 
if (!rq) { /* lazily cleanup after another engine handled rq */
rb_erase_cached(rb, &execlists->virtual);
@@ -842,8 +842,8 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
 * we reuse the register offsets). This is a very small
 * hystersis on the greedy seelction algorithm.
 */
-   active = READ_ONCE(ve->context.active);
-   if (active && active != engine) {
+   inflight = READ_ONCE(ve->context.inflight);
+   if (inflight && inflight != engine) {
rb = rb_next(rb);
continue;
}
@@ -947,7 +947,7 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
u32 *regs = ve->context.lrc_reg_state;
unsigned int n;
 
-   GEM_BUG_ON(READ_ONCE(ve->context.active));
+   GEM_BUG_ON(READ_ONCE(ve->context.inflight));
virtual_update_register_offsets(regs, engine);
 
/*
@@ -1519,7 +1519,7 @@ static void execlists_context_unpin(struct intel_context 
*ce)
 * had the chance to run yet; let it run before we teardown the
 * reference it may use.
 */
-   engine = READ_ONCE(ce->active);
+   engine = READ_ONCE(ce->inflight);
if (unlikely(engine)) {
unsigned long flags;
 
@@ -1527,7 +1527,7 @@ static void execlists_context_unpin(struct intel_context 
*ce)
process_csb(engine);
spin_unlock_irqrestore(&engine->timeline.lock, flags);
 
-   GEM_BUG_ON(READ_ONCE(ce->active));
+   GEM_BUG_ON(READ_ONCE(ce->inflight));
}
 
i915_gem_context_unpin_hw_id(ce->gem_context);
@@ -3168,7 +3168,7 @@ static void virtual_context_destroy(struct kref *kref)
unsigned int n;
 
GEM_BUG_ON(ve->request);
-   GEM_BUG_ON(ve->context.active);
+   GEM_BUG_ON(ve->context.inflight);
 
for (n = 0; n < ve->count; n++) {
struct intel_engine_cs *sibling = ve->siblings[n];
-- 
2.20.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://list

[Intel-gfx] [PATCH 12/39] drm/i915: Allow a context to define its set of engines

2019-03-13 Thread Chris Wilson
Over the last few years, we have debated how to extend the user API to
support an increase in the number of engines, that may be sparse and
even be heterogeneous within a class (not all video decoders created
equal). We settled on using (class, instance) tuples to identify a
specific engine, with an API for the user to construct a map of engines
to capabilities. Into this picture, we then add a challenge of virtual
engines; one user engine that maps behind the scenes to any number of
physical engines. To keep it general, we want the user to have full
control over that mapping. To that end, we allow the user to constrain a
context to define the set of engines that it can access, order fully
controlled by the user via (class, instance). With such precise control
in context setup, we can continue to use the existing execbuf uABI of
specifying a single index; only now it doesn't automagically map onto
the engines, it uses the user defined engine map from the context.

The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
execbuf. It's use will be revealed in the next patch.

v2: Fixup freeing of local on success of get_engines()
v3: Allow empty engines[]

Testcase: igt/gem_ctx_engines
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_gem_context.c   | 223 +-
 drivers/gpu/drm/i915/i915_gem_context_types.h |   4 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c|  19 +-
 drivers/gpu/drm/i915/i915_utils.h |  23 ++
 include/uapi/drm/i915_drm.h   |  42 +++-
 5 files changed, 298 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index bac548584091..07377b75b563 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -86,7 +86,9 @@
  */
 
 #include 
+
 #include 
+
 #include "i915_drv.h"
 #include "i915_globals.h"
 #include "i915_trace.h"
@@ -101,6 +103,21 @@ static struct i915_global_gem_context {
struct kmem_cache *slab_luts;
 } global;
 
+static struct intel_engine_cs *
+lookup_user_engine(struct i915_gem_context *ctx,
+  unsigned long flags, u16 class, u16 instance)
+#define LOOKUP_USER_INDEX BIT(0)
+{
+   if (flags & LOOKUP_USER_INDEX) {
+   if (instance >= ctx->nengine)
+   return NULL;
+
+   return ctx->engines[instance];
+   }
+
+   return intel_engine_lookup_user(ctx->i915, class, instance);
+}
+
 struct i915_lut_handle *i915_lut_handle_alloc(void)
 {
return kmem_cache_alloc(global.slab_luts, GFP_KERNEL);
@@ -235,6 +252,8 @@ static void i915_gem_context_free(struct i915_gem_context 
*ctx)
release_hw_id(ctx);
i915_ppgtt_put(ctx->ppgtt);
 
+   kfree(ctx->engines);
+
rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
intel_context_put(it);
 
@@ -1371,9 +1390,9 @@ static int set_sseu(struct i915_gem_context *ctx,
if (user_sseu.flags || user_sseu.rsvd)
return -EINVAL;
 
-   engine = intel_engine_lookup_user(i915,
- user_sseu.engine_class,
- user_sseu.engine_instance);
+   engine = lookup_user_engine(ctx, 0,
+   user_sseu.engine_class,
+   user_sseu.engine_instance);
if (!engine)
return -EINVAL;
 
@@ -1391,9 +1410,163 @@ static int set_sseu(struct i915_gem_context *ctx,
 
args->size = sizeof(user_sseu);
 
+   return 0;
+};
+
+struct set_engines {
+   struct i915_gem_context *ctx;
+   struct intel_engine_cs **engines;
+   unsigned int nengine;
+};
+
+static const i915_user_extension_fn set_engines__extensions[] = {
+};
+
+static int
+set_engines(struct i915_gem_context *ctx,
+   const struct drm_i915_gem_context_param *args)
+{
+   struct i915_context_param_engines __user *user;
+   struct set_engines set = { .ctx = ctx };
+   u64 size, extensions;
+   unsigned int n;
+   int err;
+
+   user = u64_to_user_ptr(args->value);
+   size = args->size;
+   if (!size)
+   goto out;
+
+   BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->class_instance)));
+   if (size < sizeof(*user) ||
+   !IS_ALIGNED(size, sizeof(*user->class_instance)))
+   return -EINVAL;
+
+   set.nengine = (size - sizeof(*user)) / sizeof(*user->class_instance);
+   if (set.nengine > I915_EXEC_RING_MASK + 1)
+   return -EINVAL;
+
+   set.engines = kmalloc_array(set.nengine,
+   sizeof(*set.engines),
+   GFP_KERNEL);
+   if (!set.engines)
+   return -ENOMEM;
+
+   for (n = 0; n < set.nengine; n++) {
+   u16 class, inst;
+
+   if (get_user(class, &user->class_instance

[Intel-gfx] [PATCH 36/39] drm/i915: Stop retiring along engine

2019-03-13 Thread Chris Wilson
We no longer track the execution order along the engine and so no longer
need to enforce ordering of retire along the engine.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_request.c | 116 ++--
 1 file changed, 39 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index b71421ae9587..c2df983ac6a9 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -196,72 +196,27 @@ static void free_capture_list(struct i915_request 
*request)
}
 }
 
-static void __retire_engine_request(struct intel_engine_cs *engine,
-   struct i915_request *rq)
-{
-   GEM_TRACE("%s(%s) fence %llx:%lld, current %d\n",
- __func__, engine->name,
- rq->fence.context, rq->fence.seqno,
- hwsp_seqno(rq));
-
-   GEM_BUG_ON(!i915_request_completed(rq));
-
-   local_irq_disable();
-
-   spin_lock(&engine->timeline.lock);
-   GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline.requests));
-   list_del_init(&rq->link);
-   spin_unlock(&engine->timeline.lock);
-
-   spin_lock(&rq->lock);
-   i915_request_mark_complete(rq);
-   if (!i915_request_signaled(rq))
-   dma_fence_signal_locked(&rq->fence);
-   if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
-   i915_request_cancel_breadcrumb(rq);
-   if (rq->waitboost) {
-   GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
-   atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
-   }
-   spin_unlock(&rq->lock);
-
-   local_irq_enable();
-}
-
-static void __retire_engine_upto(struct intel_engine_cs *engine,
-struct i915_request *rq)
-{
-   struct i915_request *tmp;
-
-   if (list_empty(&rq->link))
-   return;
-
-   do {
-   tmp = list_first_entry(&engine->timeline.requests,
-  typeof(*tmp), link);
-
-   GEM_BUG_ON(tmp->engine != engine);
-   __retire_engine_request(engine, tmp);
-   } while (tmp != rq);
-}
-
-static void i915_request_retire(struct i915_request *request)
+static bool i915_request_retire(struct i915_request *rq)
 {
struct i915_active_request *active, *next;
 
+   lockdep_assert_held(&rq->i915->drm.struct_mutex);
+   if (!i915_request_completed(rq))
+   return false;
+
GEM_TRACE("%s fence %llx:%lld, current %d\n",
- request->engine->name,
- request->fence.context, request->fence.seqno,
- hwsp_seqno(request));
+ rq->engine->name,
+ rq->fence.context, rq->fence.seqno,
+ hwsp_seqno(rq));
 
-   lockdep_assert_held(&request->i915->drm.struct_mutex);
-   GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
-   GEM_BUG_ON(!i915_request_completed(request));
+   GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
+   trace_i915_request_retire(rq);
 
-   trace_i915_request_retire(request);
+   spin_lock_irq(&rq->engine->timeline.lock);
+   list_del(&rq->link);
+   spin_unlock_irq(&rq->engine->timeline.lock);
 
-   advance_ring(request);
-   free_capture_list(request);
+   advance_ring(rq);
 
/*
 * Walk through the active list, calling retire on each. This allows
@@ -273,7 +228,7 @@ static void i915_request_retire(struct i915_request 
*request)
 * pass along the auxiliary information (to avoid dereferencing
 * the node after the callback).
 */
-   list_for_each_entry_safe(active, next, &request->active_list, link) {
+   list_for_each_entry_safe(active, next, &rq->active_list, link) {
/*
 * In microbenchmarks or focusing upon time inside the kernel,
 * we may spend an inordinate amount of time simply handling
@@ -289,19 +244,32 @@ static void i915_request_retire(struct i915_request 
*request)
INIT_LIST_HEAD(&active->link);
RCU_INIT_POINTER(active->request, NULL);
 
-   active->retire(active, request);
+   active->retire(active, rq);
}
 
-   i915_request_remove_from_client(request);
+   spin_lock_irq(&rq->lock);
+   i915_request_mark_complete(rq);
+   if (!i915_request_signaled(rq))
+   dma_fence_signal_locked(&rq->fence);
+   if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
+   i915_request_cancel_breadcrumb(rq);
+   if (rq->waitboost) {
+   GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
+   atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
+   }
+   spin_unlock_irq(&rq->lock);
+
+   i915_request_remove_from_client(rq);
 
-   intel_context_unpin(request->hw_context);
+ 

[Intel-gfx] [PATCH 09/39] drm/i915: Extend CONTEXT_CREATE to set parameters upon construction

2019-03-13 Thread Chris Wilson
It can be useful to have a single ioctl to create a context with all
the initial parameters instead of a series of create + setparam + setparam
ioctls. This extension to create context allows any of the parameters
to be passed in as a linked list to be applied to the newly constructed
context.

v2: Make a local copy of user setparam (Tvrtko)
v3: Use flags to detect availability of extension interface

Signed-off-by: Chris Wilson 
Reviewed-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_drv.c |   2 +-
 drivers/gpu/drm/i915/i915_gem_context.c | 439 +---
 include/uapi/drm/i915_drm.h | 180 +-
 3 files changed, 342 insertions(+), 279 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5d53efc4c5d9..93e41c937d96 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -3110,7 +3110,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, 
intel_sprite_set_colorkey_ioctl, DRM_MASTER),
DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER),
DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
-   DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, 
i915_gem_context_create_ioctl, DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE_EXT, 
i915_gem_context_create_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, 
i915_gem_context_destroy_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, 
i915_gem_context_reset_stats_ioctl, DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 71464ae91d61..07c097ad83ee 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -90,6 +90,7 @@
 #include "i915_drv.h"
 #include "i915_globals.h"
 #include "i915_trace.h"
+#include "i915_user_extensions.h"
 #include "intel_lrc_reg.h"
 #include "intel_workarounds.h"
 
@@ -1094,192 +1095,6 @@ static int set_ppgtt(struct i915_gem_context *ctx,
return err;
 }
 
-static bool client_is_banned(struct drm_i915_file_private *file_priv)
-{
-   return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
-}
-
-int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
- struct drm_file *file)
-{
-   struct drm_i915_private *i915 = to_i915(dev);
-   struct drm_i915_gem_context_create *args = data;
-   struct drm_i915_file_private *file_priv = file->driver_priv;
-   struct i915_gem_context *ctx;
-   int ret;
-
-   if (!DRIVER_CAPS(i915)->has_logical_contexts)
-   return -ENODEV;
-
-   if (args->pad != 0)
-   return -EINVAL;
-
-   ret = i915_terminally_wedged(i915);
-   if (ret)
-   return ret;
-
-   if (client_is_banned(file_priv)) {
-   DRM_DEBUG("client %s[%d] banned from creating ctx\n",
- current->comm,
- pid_nr(get_task_pid(current, PIDTYPE_PID)));
-
-   return -EIO;
-   }
-
-   ret = i915_mutex_lock_interruptible(dev);
-   if (ret)
-   return ret;
-
-   ctx = i915_gem_create_context(i915, file_priv);
-   mutex_unlock(&dev->struct_mutex);
-   if (IS_ERR(ctx))
-   return PTR_ERR(ctx);
-
-   GEM_BUG_ON(i915_gem_context_is_kernel(ctx));
-
-   args->ctx_id = ctx->user_handle;
-   DRM_DEBUG("HW context %d created\n", args->ctx_id);
-
-   return 0;
-}
-
-int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
-  struct drm_file *file)
-{
-   struct drm_i915_gem_context_destroy *args = data;
-   struct drm_i915_file_private *file_priv = file->driver_priv;
-   struct i915_gem_context *ctx;
-   int ret;
-
-   if (args->pad != 0)
-   return -EINVAL;
-
-   if (args->ctx_id == DEFAULT_CONTEXT_HANDLE)
-   return -ENOENT;
-
-   ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
-   if (!ctx)
-   return -ENOENT;
-
-   ret = mutex_lock_interruptible(&dev->struct_mutex);
-   if (ret)
-   goto out;
-
-   __destroy_hw_context(ctx, file_priv);
-   mutex_unlock(&dev->struct_mutex);
-
-out:
-   i915_gem_context_put(ctx);
-   return 0;
-}
-
-static int get_sseu(struct i915_gem_context *ctx,
-   struct drm_i915_gem_context_param *args)
-{
-   struct drm_i915_gem_context_param_sseu user_sseu;
-   struct intel_engine_cs *engine;
-   struct intel_context *ce;
-
-   if (args->size == 0)
-   goto out;
-   else if (args->size < sizeof(user_sseu))
-   return -EINVAL;
-
-   if (copy_from_user(&user_sseu, u64_to

[Intel-gfx] [PATCH 08/39] drm/i915: Create/destroy VM (ppGTT) for use with contexts

2019-03-13 Thread Chris Wilson
In preparation to making the ppGTT binding for a context explicit (to
facilitate reusing the same ppGTT between different contexts), allow the
user to create and destroy named ppGTT.

v2: Replace global barrier for swapping over the ppgtt and tlbs with a
local context barrier (Tvrtko)
v3: serialise with struct_mutex; it's lazy but required dammit
v4: Rewrite igt_ctx_shared_exec to be more different (aimed to be more
similarly, turned out different!)

v2: Fix up test unwind for aliasing-ppgtt (snb)
v3: Tighten language for uapi struct drm_i915_gem_vm_control.
v4: Patch the context image for runtime ppgtt switching!

Testcase: igt/gem_ctx_param/vm
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_drv.c   |   2 +
 drivers/gpu/drm/i915/i915_drv.h   |   3 +
 drivers/gpu/drm/i915/i915_gem_context.c   | 322 +-
 drivers/gpu/drm/i915/i915_gem_context.h   |   5 +
 drivers/gpu/drm/i915/i915_gem_gtt.c   |  30 +-
 drivers/gpu/drm/i915/i915_gem_gtt.h   |  17 +-
 drivers/gpu/drm/i915/selftests/huge_pages.c   |   1 -
 .../gpu/drm/i915/selftests/i915_gem_context.c | 237 ++---
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |   1 -
 drivers/gpu/drm/i915/selftests/mock_context.c |   8 +-
 include/uapi/drm/i915_drm.h   |  43 +++
 11 files changed, 594 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 0d743907e7bc..5d53efc4c5d9 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -3121,6 +3121,8 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, 
DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, 
i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, 
DRM_UNLOCKED|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(I915_GEM_VM_CREATE, i915_gem_vm_create_ioctl, 
DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(I915_GEM_VM_DESTROY, i915_gem_vm_destroy_ioctl, 
DRM_RENDER_ALLOW),
 };
 
 static struct drm_driver driver = {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index dc63303225fc..4675355916ff 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -218,6 +218,9 @@ struct drm_i915_file_private {
} mm;
struct idr context_idr;
 
+   struct mutex vm_lock;
+   struct idr vm_idr;
+
unsigned int bsd_engine;
 
 /*
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 4af51b689cbd..71464ae91d61 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -120,12 +120,15 @@ static void lut_close(struct i915_gem_context *ctx)
list_del(&lut->obj_link);
i915_lut_handle_free(lut);
}
+   INIT_LIST_HEAD(&ctx->handles_list);
 
rcu_read_lock();
radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
struct i915_vma *vma = rcu_dereference_raw(*slot);
 
radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
+
+   vma->open_count--;
__i915_gem_object_release_unless_active(vma->obj);
}
rcu_read_unlock();
@@ -306,7 +309,7 @@ static void context_close(struct i915_gem_context *ctx)
 */
lut_close(ctx);
if (ctx->ppgtt)
-   i915_ppgtt_close(&ctx->ppgtt->vm);
+   i915_ppgtt_close(ctx->ppgtt);
 
ctx->file_priv = ERR_PTR(-EBADF);
i915_gem_context_put(ctx);
@@ -417,6 +420,32 @@ static void __destroy_hw_context(struct i915_gem_context 
*ctx,
context_close(ctx);
 }
 
+static struct i915_hw_ppgtt *
+__set_ppgtt(struct i915_gem_context *ctx, struct i915_hw_ppgtt *ppgtt)
+{
+   struct i915_hw_ppgtt *old = ctx->ppgtt;
+
+   i915_ppgtt_open(ppgtt);
+   ctx->ppgtt = i915_ppgtt_get(ppgtt);
+
+   ctx->desc_template = default_desc_template(ctx->i915, ppgtt);
+
+   return old;
+}
+
+static void __assign_ppgtt(struct i915_gem_context *ctx,
+  struct i915_hw_ppgtt *ppgtt)
+{
+   if (ppgtt == ctx->ppgtt)
+   return;
+
+   ppgtt = __set_ppgtt(ctx, ppgtt);
+   if (ppgtt) {
+   i915_ppgtt_close(ppgtt);
+   i915_ppgtt_put(ppgtt);
+   }
+}
+
 static struct i915_gem_context *
 i915_gem_create_context(struct drm_i915_private *dev_priv,
struct drm_i915_file_private *file_priv)
@@ -443,8 +472,8 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
return ERR_CAST(ppgtt);
}
 
-   ctx->ppgtt = ppgtt;
-   ctx->desc_template = default_desc_template(dev_priv, ppgtt);
+   __assign_ppgtt(ctx, ppgtt);
+   i

[Intel-gfx] [PATCH 14/39] drm/i915: Load balancing across a virtual engine

2019-03-13 Thread Chris Wilson
Having allowed the user to define a set of engines that they will want
to only use, we go one step further and allow them to bind those engines
into a single virtual instance. Submitting a batch to the virtual engine
will then forward it to any one of the set in a manner as best to
distribute load.  The virtual engine has a single timeline across all
engines (it operates as a single queue), so it is not able to concurrently
run batches across multiple engines by itself; that is left up to the user
to submit multiple concurrent batches to multiple queues. Multiple users
will be load balanced across the system.

The mechanism used for load balancing in this patch is a late greedy
balancer. When a request is ready for execution, it is added to each
engine's queue, and when an engine is ready for its next request it
claims it from the virtual engine. The first engine to do so, wins, i.e.
the request is executed at the earliest opportunity (idle moment) in the
system.

As not all HW is created equal, the user is still able to skip the
virtual engine and execute the batch on a specific engine, all within the
same queue. It will then be executed in order on the correct engine,
with execution on other virtual engines being moved away due to the load
detection.

A couple of areas for potential improvement left!

- The virtual engine always take priority over equal-priority tasks.
Mostly broken up by applying FQ_CODEL rules for prioritising new clients,
and hopefully the virtual and real engines are not then congested (i.e.
all work is via virtual engines, or all work is to the real engine).

- We require the breadcrumb irq around every virtual engine request. For
normal engines, we eliminate the need for the slow round trip via
interrupt by using the submit fence and queueing in order. For virtual
engines, we have to allow any job to transfer to a new ring, and cannot
coalesce the submissions, so require the completion fence instead,
forcing the persistent use of interrupts.

- We only drip feed single requests through each virtual engine and onto
the physical engines, even if there was enough work to fill all ELSP,
leaving small stalls with an idle CS event at the end of every request.
Could we be greedy and fill both slots? Being lazy is virtuous for load
distribution on less-than-full workloads though.

Other areas of improvement are more general, such as reducing lock
contention, reducing dispatch overhead, looking at direct submission
rather than bouncing around tasklets etc.

sseu: Lift the restriction to allow sseu to be reconfigured on virtual
engines composed of RENDER_CLASS (rcs).

v2: macroize check_user_mbz()
v3: Cancel virtual engines on wedging
v4: Commence commenting

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_gem.h|   5 +
 drivers/gpu/drm/i915/i915_gem_context.c| 128 -
 drivers/gpu/drm/i915/i915_scheduler.c  |  18 +-
 drivers/gpu/drm/i915/i915_timeline_types.h |   1 +
 drivers/gpu/drm/i915/intel_engine_types.h  |   8 +
 drivers/gpu/drm/i915/intel_lrc.c   | 570 -
 drivers/gpu/drm/i915/intel_lrc.h   |  11 +
 drivers/gpu/drm/i915/selftests/intel_lrc.c | 165 ++
 include/uapi/drm/i915_drm.h|  30 ++
 9 files changed, 917 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 74a2ddc1b52f..dbcea6e29d48 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -91,4 +91,9 @@ static inline bool __tasklet_is_enabled(const struct 
tasklet_struct *t)
return !atomic_read(&t->count);
 }
 
+static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
+{
+   return test_bit(TASKLET_STATE_SCHED, &t->state);
+}
+
 #endif /* __I915_GEM_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 7ae28622b709..98763d3f1b12 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -86,6 +86,7 @@
  */
 
 #include 
+#include 
 
 #include 
 
@@ -94,6 +95,7 @@
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
 #include "intel_lrc_reg.h"
+#include "intel_lrc.h"
 #include "intel_workarounds.h"
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
@@ -241,6 +243,20 @@ static void release_hw_id(struct i915_gem_context *ctx)
mutex_unlock(&i915->contexts.mutex);
 }
 
+static void free_engines(struct intel_engine_cs **engines, int count)
+{
+   int i;
+
+   if (ZERO_OR_NULL_PTR(engines))
+   return;
+
+   /* We own the veng we created; regular engines are ignored */
+   for (i = 0; i < count; i++)
+   intel_virtual_engine_destroy(engines[i]);
+
+   kfree(engines);
+}
+
 static void i915_gem_context_free(struct i915_gem_context *ctx)
 {
struct intel_context *it, *n;
@@ -251,8 +267,7 @@ static void i915_gem_context_free(struct i915_gem_context 
*

[Intel-gfx] [PATCH 06/39] drm/i915: Switch to use HWS indices rather than addresses

2019-03-13 Thread Chris Wilson
If we use the STORE_DATA_INDEX function we can use a fixed offset and
avoid having to lookup up the engine HWS address. A step closer to being
able to emit the final breadcrumb during request_add rather than later
in the submission interrupt handler.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_guc_submission.c |  3 ++-
 drivers/gpu/drm/i915/intel_lrc.c| 17 +++--
 drivers/gpu/drm/i915/intel_ringbuffer.c | 16 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.h |  4 ++--
 4 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c 
b/drivers/gpu/drm/i915/intel_guc_submission.c
index 4a5727233419..c4ad73980988 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -583,7 +583,8 @@ static void inject_preempt_context(struct work_struct *work)
} else {
cs = gen8_emit_ggtt_write(cs,
  GUC_PREEMPT_FINISHED,
- addr);
+ addr,
+ 0);
*cs++ = MI_NOOP;
*cs++ = MI_NOOP;
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 44e75bc520c1..5669823f6901 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -173,12 +173,6 @@ static void execlists_init_reg_state(u32 *reg_state,
 struct intel_engine_cs *engine,
 struct intel_ring *ring);
 
-static inline u32 intel_hws_hangcheck_address(struct intel_engine_cs *engine)
-{
-   return (i915_ggtt_offset(engine->status_page.vma) +
-   I915_GEM_HWS_HANGCHECK_ADDR);
-}
-
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
return rb_entry(rb, struct i915_priolist, node);
@@ -2212,11 +2206,14 @@ static u32 *gen8_emit_fini_breadcrumb(struct 
i915_request *request, u32 *cs)
 {
cs = gen8_emit_ggtt_write(cs,
  request->fence.seqno,
- request->timeline->hwsp_offset);
+ request->timeline->hwsp_offset,
+ 0);
 
cs = gen8_emit_ggtt_write(cs,
  
intel_engine_next_hangcheck_seqno(request->engine),
- intel_hws_hangcheck_address(request->engine));
+ I915_GEM_HWS_HANGCHECK_ADDR,
+ MI_FLUSH_DW_STORE_INDEX);
+
 
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
@@ -2240,8 +2237,8 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct 
i915_request *request, u32 *cs)
 
cs = gen8_emit_ggtt_write_rcs(cs,
  
intel_engine_next_hangcheck_seqno(request->engine),
- 
intel_hws_hangcheck_address(request->engine),
- 0);
+ I915_GEM_HWS_HANGCHECK_ADDR,
+ PIPE_CONTROL_STORE_DATA_INDEX);
 
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index a021c9545649..9e7ad17b5250 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -43,12 +43,6 @@
  */
 #define LEGACY_REQUEST_SIZE 200
 
-static inline u32 hws_hangcheck_address(struct intel_engine_cs *engine)
-{
-   return (i915_ggtt_offset(engine->status_page.vma) +
-   I915_GEM_HWS_HANGCHECK_ADDR);
-}
-
 unsigned int intel_ring_update_space(struct intel_ring *ring)
 {
unsigned int space;
@@ -317,8 +311,8 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request 
*rq, u32 *cs)
*cs++ = rq->fence.seqno;
 
*cs++ = GFX_OP_PIPE_CONTROL(4);
-   *cs++ = PIPE_CONTROL_QW_WRITE;
-   *cs++ = hws_hangcheck_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
+   *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_STORE_DATA_INDEX;
+   *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | PIPE_CONTROL_GLOBAL_GTT;
*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
 
*cs++ = MI_USER_INTERRUPT;
@@ -423,8 +417,10 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request 
*rq, u32 *cs)
*cs++ = rq->fence.seqno;
 
*cs++ = GFX_OP_PIPE_CONTROL(4);
-   *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
-   *cs++ = hws_hangcheck_address(rq->engine);
+   *cs++ = (PIPE_CONTROL_QW_WRITE |
+PIPE_CONTROL_STORE_DATA_INDEX |
+PIPE_CONTROL_GLOBAL_GTT_IVB);
+   *cs++ = I915_GEM_HWS_HANGCHECK_ADDR;

[Intel-gfx] [PATCH 04/39] drm/i915: Stop needlessly acquiring wakeref for debugfs/drop_caches_set

2019-03-13 Thread Chris Wilson
We only need to acquire a wakeref for ourselves for a few operations, as
most either already acquire their own wakeref or imply a wakeref. In
particular, it is i915_gem_set_wedged() that needed us to present it
with a wakeref, which is incongruous with its "use anywhere" ability.

Suggested-by: Yokoyama, Caz 
Signed-off-by: Chris Wilson 
Cc: Yokoyama, Caz 
Cc: Mika Kuoppala 
---
 drivers/gpu/drm/i915/i915_debugfs.c | 12 
 drivers/gpu/drm/i915/i915_reset.c   |  4 +++-
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 6a90558de213..08683dca7775 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3888,12 +3888,9 @@ static int
 i915_drop_caches_set(void *data, u64 val)
 {
struct drm_i915_private *i915 = data;
-   intel_wakeref_t wakeref;
-   int ret = 0;
 
DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n",
  val, val & DROP_ALL);
-   wakeref = intel_runtime_pm_get(i915);
 
if (val & DROP_RESET_ACTIVE &&
wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT))
@@ -3902,9 +3899,11 @@ i915_drop_caches_set(void *data, u64 val)
/* No need to check and wait for gpu resets, only libdrm auto-restarts
 * on ioctls on -EAGAIN. */
if (val & (DROP_ACTIVE | DROP_RETIRE | DROP_RESET_SEQNO)) {
+   int ret;
+
ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
if (ret)
-   goto out;
+   return ret;
 
if (val & DROP_ACTIVE)
ret = i915_gem_wait_for_idle(i915,
@@ -3943,10 +3942,7 @@ i915_drop_caches_set(void *data, u64 val)
if (val & DROP_FREED)
i915_gem_drain_freed_objects(i915);
 
-out:
-   intel_runtime_pm_put(i915, wakeref);
-
-   return ret;
+   return 0;
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(i915_drop_caches_fops,
diff --git a/drivers/gpu/drm/i915/i915_reset.c 
b/drivers/gpu/drm/i915/i915_reset.c
index 3c08e08837d0..955c22b8dfc7 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -861,9 +861,11 @@ static void __i915_gem_set_wedged(struct drm_i915_private 
*i915)
 void i915_gem_set_wedged(struct drm_i915_private *i915)
 {
struct i915_gpu_error *error = &i915->gpu_error;
+   intel_wakeref_t wakeref;
 
mutex_lock(&error->wedge_mutex);
-   __i915_gem_set_wedged(i915);
+   with_intel_runtime_pm(i915, wakeref)
+   __i915_gem_set_wedged(i915);
mutex_unlock(&error->wedge_mutex);
 }
 
-- 
2.20.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 19/39] drm/i915: Split GEM object type definition to its own header

2019-03-13 Thread Chris Wilson
For convenience in avoiding inline spaghetti, keep the type definition
as a separate header.

Signed-off-by: Chris Wilson 
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/i915/Makefile |   3 +-
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 285 +
 .../test_i915_gem_object_types_standalone.c   |   7 +
 drivers/gpu/drm/i915/i915_drv.h   |   3 +-
 drivers/gpu/drm/i915/i915_gem_batch_pool.h|   3 +-
 drivers/gpu/drm/i915/i915_gem_gtt.h   |   1 +
 drivers/gpu/drm/i915/i915_gem_object.h| 295 +-
 drivers/gpu/drm/i915/intel_engine_types.h |   1 +
 8 files changed, 303 insertions(+), 295 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_object_types.h
 create mode 100644 
drivers/gpu/drm/i915/gem/test_i915_gem_object_types_standalone.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 60de05f3fa60..87fb8c21510e 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -59,6 +59,7 @@ i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 
 # Test the headers are compilable as standalone units
 i915-$(CONFIG_DRM_I915_WERROR) += \
+   gem/test_i915_gem_object_types_standalone.o \
test_i915_active_types_standalone.o \
test_i915_gem_context_types_standalone.o \
test_i915_timeline_types_standalone.o \
@@ -102,7 +103,7 @@ i915-y += \
  intel_mocs.o \
  intel_ringbuffer.o \
  intel_uncore.o \
- intel_wopcm.o
+ intel_wopcm.o \
 
 # general-purpose microcontroller (GuC) support
 i915-y += intel_uc.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
new file mode 100644
index ..e4b50944f553
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -0,0 +1,285 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __I915_GEM_OBJECT_TYPES_H__
+#define __I915_GEM_OBJECT_TYPES_H__
+
+#include 
+
+#include 
+
+#include "../i915_active.h"
+#include "../i915_selftest.h"
+
+struct drm_i915_gem_object;
+
+/*
+ * struct i915_lut_handle tracks the fast lookups from handle to vma used
+ * for execbuf. Although we use a radixtree for that mapping, in order to
+ * remove them as the object or context is closed, we need a secondary list
+ * and a translation entry (i915_lut_handle).
+ */
+struct i915_lut_handle {
+   struct list_head obj_link;
+   struct list_head ctx_link;
+   struct i915_gem_context *ctx;
+   u32 handle;
+};
+
+struct drm_i915_gem_object_ops {
+   unsigned int flags;
+#define I915_GEM_OBJECT_HAS_STRUCT_PAGEBIT(0)
+#define I915_GEM_OBJECT_IS_SHRINKABLE  BIT(1)
+#define I915_GEM_OBJECT_IS_PROXY   BIT(2)
+#define I915_GEM_OBJECT_ASYNC_CANCEL   BIT(3)
+
+   /* Interface between the GEM object and its backing storage.
+* get_pages() is called once prior to the use of the associated set
+* of pages before to binding them into the GTT, and put_pages() is
+* called after we no longer need them. As we expect there to be
+* associated cost with migrating pages between the backing storage
+* and making them available for the GPU (e.g. clflush), we may hold
+* onto the pages after they are no longer referenced by the GPU
+* in case they may be used again shortly (for example migrating the
+* pages to a different memory domain within the GTT). put_pages()
+* will therefore most likely be called when the object itself is
+* being released or under memory pressure (where we attempt to
+* reap pages for the shrinker).
+*/
+   int (*get_pages)(struct drm_i915_gem_object *obj);
+   void (*put_pages)(struct drm_i915_gem_object *obj,
+ struct sg_table *pages);
+
+   int (*pwrite)(struct drm_i915_gem_object *obj,
+ const struct drm_i915_gem_pwrite *arg);
+
+   int (*dmabuf_export)(struct drm_i915_gem_object *obj);
+   void (*release)(struct drm_i915_gem_object *obj);
+};
+
+struct drm_i915_gem_object {
+   struct drm_gem_object base;
+
+   const struct drm_i915_gem_object_ops *ops;
+
+   struct {
+   /**
+* @vma.lock: protect the list/tree of vmas
+*/
+   spinlock_t lock;
+
+   /**
+* @vma.list: List of VMAs backed by this object
+*
+* The VMA on this list are ordered by type, all GGTT vma are
+* placed at the head and all ppGTT vma are placed at the tail.
+* The different types of GGTT vma are unordered between
+* themselves, use the @vma.tree (which has a defined order
+* between all VMA) to quickly find an exact match.
+*/
+   struct list_head list;
+
+   /**
+*

[Intel-gfx] [PATCH 03/39] drm/i915: Hold a reference to the active HW context

2019-03-13 Thread Chris Wilson
For virtual engines, we need to keep the HW context alive while it
remains in use. For regular HW contexts, they are created and kept alive
until the end of the GEM context. For simplicity, generalise the
requirements and keep an active reference to each HW context.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_context.c  |  2 +-
 drivers/gpu/drm/i915/intel_context.c |  6 ++
 drivers/gpu/drm/i915/intel_context.h | 11 +++
 drivers/gpu/drm/i915/intel_context_types.h   |  6 +-
 drivers/gpu/drm/i915/intel_lrc.c |  4 +++-
 drivers/gpu/drm/i915/intel_ringbuffer.c  |  4 +++-
 drivers/gpu/drm/i915/selftests/mock_engine.c |  7 ++-
 7 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index b6370225dcb5..4af51b689cbd 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -232,7 +232,7 @@ static void i915_gem_context_free(struct i915_gem_context 
*ctx)
i915_ppgtt_put(ctx->ppgtt);
 
rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
-   it->ops->destroy(it);
+   intel_context_put(it);
 
kfree(ctx->name);
put_pid(ctx->pid);
diff --git a/drivers/gpu/drm/i915/intel_context.c 
b/drivers/gpu/drm/i915/intel_context.c
index 0ab894a058f6..8931e0fee873 100644
--- a/drivers/gpu/drm/i915/intel_context.c
+++ b/drivers/gpu/drm/i915/intel_context.c
@@ -172,6 +172,7 @@ intel_context_pin(struct i915_gem_context *ctx,
list_add(&ce->active_link, &ctx->active_engines);
mutex_unlock(&ctx->mutex);
 
+   intel_context_get(ce);
smp_mb__before_atomic(); /* flush pin before it is visible */
}
 
@@ -192,6 +193,7 @@ void intel_context_unpin(struct intel_context *ce)
return;
 
/* We may be called from inside intel_context_pin() to evict another */
+   intel_context_get(ce);
mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING);
 
if (likely(atomic_dec_and_test(&ce->pin_count))) {
@@ -202,9 +204,11 @@ void intel_context_unpin(struct intel_context *ce)
mutex_unlock(&ce->gem_context->mutex);
 
i915_gem_context_put(ce->gem_context);
+   intel_context_put(ce);
}
 
mutex_unlock(&ce->pin_mutex);
+   intel_context_put(ce);
 }
 
 static void intel_context_retire(struct i915_active_request *active,
@@ -221,6 +225,8 @@ intel_context_init(struct intel_context *ce,
   struct i915_gem_context *ctx,
   struct intel_engine_cs *engine)
 {
+   kref_init(&ce->ref);
+
ce->gem_context = ctx;
ce->engine = engine;
ce->ops = engine->cops;
diff --git a/drivers/gpu/drm/i915/intel_context.h 
b/drivers/gpu/drm/i915/intel_context.h
index 9546d932406a..ebc861b1a49e 100644
--- a/drivers/gpu/drm/i915/intel_context.h
+++ b/drivers/gpu/drm/i915/intel_context.h
@@ -73,4 +73,15 @@ static inline void __intel_context_pin(struct intel_context 
*ce)
 
 void intel_context_unpin(struct intel_context *ce);
 
+static inline struct intel_context *intel_context_get(struct intel_context *ce)
+{
+   kref_get(&ce->ref);
+   return ce;
+}
+
+static inline void intel_context_put(struct intel_context *ce)
+{
+   kref_put(&ce->ref, ce->ops->destroy);
+}
+
 #endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/intel_context_types.h 
b/drivers/gpu/drm/i915/intel_context_types.h
index 6dc9b4b9067b..624729a35875 100644
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -7,6 +7,7 @@
 #ifndef __INTEL_CONTEXT_TYPES__
 #define __INTEL_CONTEXT_TYPES__
 
+#include 
 #include 
 #include 
 #include 
@@ -22,7 +23,8 @@ struct intel_ring;
 struct intel_context_ops {
int (*pin)(struct intel_context *ce);
void (*unpin)(struct intel_context *ce);
-   void (*destroy)(struct intel_context *ce);
+
+   void (*destroy)(struct kref *kref);
 };
 
 /*
@@ -36,6 +38,8 @@ struct intel_sseu {
 };
 
 struct intel_context {
+   struct kref ref;
+
struct i915_gem_context *gem_context;
struct intel_engine_cs *engine;
struct intel_engine_cs *active;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e0fb8853477c..44e75bc520c1 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1242,8 +1242,10 @@ static void __execlists_context_fini(struct 
intel_context *ce)
i915_gem_object_put(ce->state->obj);
 }
 
-static void execlists_context_destroy(struct intel_context *ce)
+static void execlists_context_destroy(struct kref *kref)
 {
+   struct intel_context *ce = container_of(kref, typeof(*ce), ref);
+
GEM_BUG_ON(intel_context_is_pinned(ce));
 
if (ce->state)
diff --git a/drivers/gpu/drm/i915/intel_r

[Intel-gfx] [PATCH 23/39] drm/i915: Move phys objects to its own file

2019-03-13 Thread Chris Wilson
Continuing the decluttering of i915_gem.c, this time the legacy physical
object.

Signed-off-by: Chris Wilson 
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/i915/Makefile |   2 +
 drivers/gpu/drm/i915/gem/i915_gem_object.h|   8 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   2 +
 drivers/gpu/drm/i915/gem/i915_gem_pages.c | 482 
 drivers/gpu/drm/i915/gem/i915_gem_phys.c  | 212 ++
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c |  22 +
 .../drm/i915/gem/selftests/i915_gem_phys.c|  80 ++
 drivers/gpu/drm/i915/i915_drv.h   |   2 -
 drivers/gpu/drm/i915/i915_gem.c   | 685 --
 .../gpu/drm/i915/selftests/i915_gem_object.c  |  54 --
 .../drm/i915/selftests/i915_mock_selftests.h  |   1 +
 11 files changed, 809 insertions(+), 741 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_pages.c
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_phys.c
 create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 54418ce5faac..8e6ef54f2497 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -72,6 +72,8 @@ i915-$(CONFIG_DRM_I915_WERROR) += \
 # GEM code
 i915-y += \
  gem/i915_gem_object.o \
+ gem/i915_gem_pages.o \
+ gem/i915_gem_phys.o \
  gem/i915_gem_shmem.o \
  i915_active.o \
  i915_cmd_parser.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 05bbb3f33904..ebab3505e51d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -33,11 +33,17 @@ void __i915_gem_object_release_shmem(struct 
drm_i915_gem_object *obj,
 struct sg_table *pages,
 bool needs_clflush);
 
+int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align);
+
 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
 void i915_gem_free_object(struct drm_gem_object *obj);
 
 void i915_gem_flush_free_objects(struct drm_i915_private *i915);
 
+struct sg_table *
+__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj);
+void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
+
 /**
  * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
  * @filp: DRM file private date
@@ -231,6 +237,8 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object 
*obj,
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 struct sg_table *pages,
 unsigned int sg_page_sizes);
+
+int i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
 
 static inline int __must_check
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index e4b50944f553..da6a33e2395f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -52,6 +52,8 @@ struct drm_i915_gem_object_ops {
int (*get_pages)(struct drm_i915_gem_object *obj);
void (*put_pages)(struct drm_i915_gem_object *obj,
  struct sg_table *pages);
+   void (*truncate)(struct drm_i915_gem_object *obj);
+   void (*invalidate)(struct drm_i915_gem_object *obj);
 
int (*pwrite)(struct drm_i915_gem_object *obj,
  const struct drm_i915_gem_pwrite *arg);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c 
b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
new file mode 100644
index ..a594f48db28e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -0,0 +1,482 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+
+void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
+struct sg_table *pages,
+unsigned int sg_page_sizes)
+{
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   unsigned long supported = INTEL_INFO(i915)->page_sizes;
+   int i;
+
+   lockdep_assert_held(&obj->mm.lock);
+
+   obj->mm.get_page.sg_pos = pages->sgl;
+   obj->mm.get_page.sg_idx = 0;
+
+   obj->mm.pages = pages;
+
+   if (i915_gem_object_is_tiled(obj) &&
+   i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
+   GEM_BUG_ON(obj->mm.quirked);
+   __i915_gem_object_pin_pages(obj);
+   obj->mm.quirked = true;
+   }
+
+   GEM_BUG_ON(!sg_page_sizes);
+   obj->mm.page_sizes.phys = sg_page_sizes;
+
+   /*
+* Calculate the supported page-sizes which fit into the given
+* sg_page_sizes. This will give

[Intel-gfx] [PATCH 25/39] drm/i915: Move GEM domain management to its own file

2019-03-13 Thread Chris Wilson
Continuing the decluttering of i915_gem.c, that of the read/write
domains, perhaps the biggest of GEM's follies?

Signed-off-by: Chris Wilson 
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/i915/Makefile |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 764 ++
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  29 +
 drivers/gpu/drm/i915/gvt/cmd_parser.c |   4 +-
 drivers/gpu/drm/i915/gvt/scheduler.c  |   6 +-
 drivers/gpu/drm/i915/i915_cmd_parser.c|   8 +-
 drivers/gpu/drm/i915/i915_drv.h   |  34 -
 drivers/gpu/drm/i915/i915_gem.c   | 757 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c|   4 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c  |   4 +-
 drivers/gpu/drm/i915/intel_drv.h  |   2 +
 drivers/gpu/drm/i915/intel_frontbuffer.h  |   8 +
 drivers/gpu/drm/i915/selftests/huge_pages.c   |   4 +-
 .../drm/i915/selftests/i915_gem_coherency.c   |   8 +-
 .../gpu/drm/i915/selftests/i915_gem_context.c |   8 +-
 15 files changed, 831 insertions(+), 810 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_domain.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index c2804efe4e5a..c4b78634b5ee 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -71,6 +71,7 @@ i915-$(CONFIG_DRM_I915_WERROR) += \
 
 # GEM code
 i915-y += \
+ gem/i915_gem_domain.o \
  gem/i915_gem_object.o \
  gem/i915_gem_mman.o \
  gem/i915_gem_pages.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c 
b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
new file mode 100644
index ..c21ceb08f845
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -0,0 +1,764 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+#include "../i915_gem_clflush.h"
+#include "../i915_gem_gtt.h"
+#include "../i915_vma.h"
+
+#include "../intel_frontbuffer.h"
+
+static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object 
*obj)
+{
+   /*
+* We manually flush the CPU domain so that we can override and
+* force the flush for the display, and perform it asyncrhonously.
+*/
+   i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+   if (obj->cache_dirty)
+   i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
+   obj->write_domain = 0;
+}
+
+void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
+{
+   if (!READ_ONCE(obj->pin_global))
+   return;
+
+   mutex_lock(&obj->base.dev->struct_mutex);
+   __i915_gem_object_flush_for_display(obj);
+   mutex_unlock(&obj->base.dev->struct_mutex);
+}
+
+/**
+ * Moves a single object to the WC read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
+{
+   int ret;
+
+   lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+   ret = i915_gem_object_wait(obj,
+  I915_WAIT_INTERRUPTIBLE |
+  I915_WAIT_LOCKED |
+  (write ? I915_WAIT_ALL : 0),
+  MAX_SCHEDULE_TIMEOUT);
+   if (ret)
+   return ret;
+
+   if (obj->write_domain == I915_GEM_DOMAIN_WC)
+   return 0;
+
+   /* Flush and acquire obj->pages so that we are coherent through
+* direct access in memory with previous cached writes through
+* shmemfs and that our cache domain tracking remains valid.
+* For example, if the obj->filp was moved to swap without us
+* being notified and releasing the pages, we would mistakenly
+* continue to assume that the obj remained out of the CPU cached
+* domain.
+*/
+   ret = i915_gem_object_pin_pages(obj);
+   if (ret)
+   return ret;
+
+   i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
+
+   /* Serialise direct access to this object with the barriers for
+* coherent writes from the GPU, by effectively invalidating the
+* WC domain upon first access.
+*/
+   if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
+   mb();
+
+   /* It should now be out of any other write domains, and we can update
+* the domain values for our changes.
+*/
+   GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
+   obj->read_domains |= I915_GEM_DOMAIN_WC;
+   if (write) {
+   obj->read_domains = I915_GEM_DOMAIN_WC;
+   obj->write_domain = I915_GEM_DOMAIN_WC;
+   obj->m

[Intel-gfx] [PATCH 10/39] drm/i915: Allow contexts to share a single timeline across all engines

2019-03-13 Thread Chris Wilson
Previously, our view has been always to run the engines independently
within a context. (Multiple engines happened before we had contexts and
timelines, so they always operated independently and that behaviour
persisted into contexts.) However, at the user level the context often
represents a single timeline (e.g. GL contexts) and userspace must
ensure that the individual engines are serialised to present that
ordering to the client (or forgot about this detail entirely and hope no
one notices - a fair ploy if the client can only directly control one
engine themselves ;)

In the next patch, we will want to construct a set of engines that
operate as one, that have a single timeline interwoven between them, to
present a single virtual engine to the user. (They submit to the virtual
engine, then we decide which engine to execute on based.)

To that end, we want to be able to create contexts which have a single
timeline (fence context) shared between all engines, rather than multiple
timelines.

v2: Move the specialised timeline ordering to its own function.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_context.c   | 32 ++--
 drivers/gpu/drm/i915/i915_gem_context_types.h |  2 +
 drivers/gpu/drm/i915/i915_request.c   | 80 +--
 drivers/gpu/drm/i915/i915_request.h   |  5 +-
 drivers/gpu/drm/i915/i915_sw_fence.c  | 39 +++--
 drivers/gpu/drm/i915/i915_sw_fence.h  | 13 ++-
 drivers/gpu/drm/i915/intel_lrc.c  |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_context.c | 18 +++--
 drivers/gpu/drm/i915/selftests/mock_context.c |  2 +-
 include/uapi/drm/i915_drm.h   |  3 +-
 10 files changed, 149 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 07c097ad83ee..4e4b0b5c4be0 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -238,6 +238,9 @@ static void i915_gem_context_free(struct i915_gem_context 
*ctx)
rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
intel_context_put(it);
 
+   if (ctx->timeline)
+   i915_timeline_put(ctx->timeline);
+
kfree(ctx->name);
put_pid(ctx->pid);
 
@@ -449,12 +452,17 @@ static void __assign_ppgtt(struct i915_gem_context *ctx,
 
 static struct i915_gem_context *
 i915_gem_create_context(struct drm_i915_private *dev_priv,
-   struct drm_i915_file_private *file_priv)
+   struct drm_i915_file_private *file_priv,
+   unsigned int flags)
 {
struct i915_gem_context *ctx;
 
lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
+   if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &&
+   !HAS_EXECLISTS(dev_priv))
+   return ERR_PTR(-EINVAL);
+
/* Reap the most stale context */
contexts_free_first(dev_priv);
 
@@ -477,6 +485,18 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
i915_ppgtt_put(ppgtt);
}
 
+   if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
+   struct i915_timeline *timeline;
+
+   timeline = i915_timeline_create(dev_priv, ctx->name, NULL);
+   if (IS_ERR(timeline)) {
+   __destroy_hw_context(ctx, file_priv);
+   return ERR_CAST(timeline);
+   }
+
+   ctx->timeline = timeline;
+   }
+
trace_i915_context_create(ctx);
 
return ctx;
@@ -505,7 +525,7 @@ i915_gem_context_create_gvt(struct drm_device *dev)
if (ret)
return ERR_PTR(ret);
 
-   ctx = i915_gem_create_context(to_i915(dev), NULL);
+   ctx = i915_gem_create_context(to_i915(dev), NULL, 0);
if (IS_ERR(ctx))
goto out;
 
@@ -541,7 +561,7 @@ i915_gem_context_create_kernel(struct drm_i915_private 
*i915, int prio)
struct i915_gem_context *ctx;
int err;
 
-   ctx = i915_gem_create_context(i915, NULL);
+   ctx = i915_gem_create_context(i915, NULL, 0);
if (IS_ERR(ctx))
return ctx;
 
@@ -673,7 +693,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
idr_init_base(&file_priv->vm_idr, 1);
 
mutex_lock(&i915->drm.struct_mutex);
-   ctx = i915_gem_create_context(i915, file_priv);
+   ctx = i915_gem_create_context(i915, file_priv, 0);
mutex_unlock(&i915->drm.struct_mutex);
if (IS_ERR(ctx)) {
idr_destroy(&file_priv->context_idr);
@@ -789,7 +809,7 @@ last_request_on_engine(struct i915_timeline *timeline,
 
rq = i915_active_request_raw(&timeline->last_request,
 &engine->i915->drm.struct_mutex);
-   if (rq && rq->engine == engine) {
+   if (rq && rq->engine->mask & engine->mask) {
GEM_TRACE("last request for %s on engine %s: %llx:%ll

[Intel-gfx] [PATCH 17/39] drm/i915: Allow specification of parallel execbuf

2019-03-13 Thread Chris Wilson
There is a desire to split a task onto two engines and have them run at
the same time, e.g. scanline interleaving to spread the workload evenly.
Through the use of the out-fence from the first execbuf, we can
coordinate secondary execbuf to only become ready simultaneously with
the first, so that with all things idle the second execbufs are executed
in parallel with the first. The key difference here between the new
EXEC_FENCE_SUBMIT and the existing EXEC_FENCE_IN is that the in-fence
waits for the completion of the first request (so that all of its
rendering results are visible to the second execbuf, the more common
userspace fence requirement).

Since we only have a single input fence slot, userspace cannot mix an
in-fence and a submit-fence. It has to use one or the other! This is not
such a harsh requirement, since by virtue of the submit-fence, the
secondary execbuf inherit all of the dependencies from the first
request, and for the application the dependencies should be common
between the primary and secondary execbuf.

Suggested-by: Tvrtko Ursulin 
Testcase: igt/gem_exec_fence/parallel
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_drv.c|  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 25 +-
 include/uapi/drm/i915_drm.h| 17 ++-
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 93e41c937d96..afdfced262e6 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -421,6 +421,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void 
*data,
case I915_PARAM_HAS_EXEC_CAPTURE:
case I915_PARAM_HAS_EXEC_BATCH_FIRST:
case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
+   case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
/* For the time being all of these are always true;
 * if some supported hardware does not have one of these
 * features this value needs to be provided from
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 70a26f0a9f1e..064c649f3f46 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2282,6 +2282,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 {
struct i915_execbuffer eb;
struct dma_fence *in_fence = NULL;
+   struct dma_fence *exec_fence = NULL;
struct sync_file *out_fence = NULL;
intel_wakeref_t wakeref;
int out_fence_fd = -1;
@@ -2325,11 +2326,24 @@ i915_gem_do_execbuffer(struct drm_device *dev,
return -EINVAL;
}
 
+   if (args->flags & I915_EXEC_FENCE_SUBMIT) {
+   if (in_fence) {
+   err = -EINVAL;
+   goto err_in_fence;
+   }
+
+   exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
+   if (!exec_fence) {
+   err = -EINVAL;
+   goto err_in_fence;
+   }
+   }
+
if (args->flags & I915_EXEC_FENCE_OUT) {
out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
if (out_fence_fd < 0) {
err = out_fence_fd;
-   goto err_in_fence;
+   goto err_exec_fence;
}
}
 
@@ -2461,6 +2475,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_request;
}
 
+   if (exec_fence) {
+   err = i915_request_await_execution(eb.request, exec_fence,
+  eb.engine->bond_execute);
+   if (err < 0)
+   goto err_request;
+   }
+
if (fences) {
err = await_fence_array(&eb, fences);
if (err)
@@ -2521,6 +2542,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 err_out_fence:
if (out_fence_fd != -1)
put_unused_fd(out_fence_fd);
+err_exec_fence:
+   dma_fence_put(exec_fence);
 err_in_fence:
dma_fence_put(in_fence);
return err;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index ed33b8af8692..dbab4d365a6d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -593,6 +593,12 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_MMAP_GTT_COHERENT   52
 
+/*
+ * Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel
+ * execution through use of explicit fence support.
+ * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
+ */
+#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1115,7 +1121,16 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_FENCE_ARRAY   (1<<19)
 
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I

[Intel-gfx] [PATCH 07/39] drm/i915: Introduce the i915_user_extension_method

2019-03-13 Thread Chris Wilson
An idea for extending uABI inspired by Vulkan's extension chains.
Instead of expanding the data struct for each ioctl every time we need
to add a new feature, define an extension chain instead. As we add
optional interfaces to control the ioctl, we define a new extension
struct that can be linked into the ioctl data only when required by the
user. The key advantage being able to ignore large control structs for
optional interfaces/extensions, while being able to process them in a
consistent manner.

In comparison to other extensible ioctls, the key difference is the
use of a linked chain of extension structs vs an array of tagged
pointers. For example,

struct drm_amdgpu_cs_chunk {
__u32   chunk_id;
__u32   length_dw;
__u64   chunk_data;
};

struct drm_amdgpu_cs_in {
__u32   ctx_id;
__u32   bo_list_handle;
__u32   num_chunks;
__u32   _pad;
__u64   chunks;
};

allows userspace to pass in array of pointers to extension structs, but
must therefore keep constructing that array along side the command stream.
In dynamic situations like that, a linked list is preferred and does not
similar from extra cache line misses as the extension structs themselves
must still be loaded separate to the chunks array.

v2: Apply the tail call optimisation directly to nip the worry of stack
overflow in the bud.
v3: Defend against recursion.

Opens:
- do we include the result as an out-field in each chain?
struct i915_user_extension {
__u64 next_extension;
__u64 name;
__s32 result;
__u32 mbz; /* reserved for future use */
};
* Undecided, so provision some room for future expansion.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
Cc: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/Makefile   |  1 +
 drivers/gpu/drm/i915/i915_user_extensions.c | 59 +
 drivers/gpu/drm/i915/i915_user_extensions.h | 20 +++
 drivers/gpu/drm/i915/i915_utils.h   | 12 +
 include/uapi/drm/i915_drm.h | 22 
 5 files changed, 114 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_user_extensions.c
 create mode 100644 drivers/gpu/drm/i915/i915_user_extensions.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 68fecf355471..60de05f3fa60 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -46,6 +46,7 @@ i915-y := i915_drv.o \
  i915_sw_fence.o \
  i915_syncmap.o \
  i915_sysfs.o \
+ i915_user_extensions.o \
  intel_csr.o \
  intel_device_info.o \
  intel_pm.o \
diff --git a/drivers/gpu/drm/i915/i915_user_extensions.c 
b/drivers/gpu/drm/i915/i915_user_extensions.c
new file mode 100644
index ..d28c95221db4
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_user_extensions.c
@@ -0,0 +1,59 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+
+#include 
+
+#include "i915_user_extensions.h"
+#include "i915_utils.h"
+
+int i915_user_extensions(struct i915_user_extension __user *ext,
+const i915_user_extension_fn *tbl,
+unsigned long count,
+void *data)
+{
+   unsigned int stackdepth = 512;
+
+   while (ext) {
+   int i, err;
+   u64 x;
+
+   if (!stackdepth--) /* recursion vs useful flexibility */
+   return -E2BIG;
+
+   err = check_user_mbz(&ext->flags);
+   if (err)
+   return err;
+
+   for (i = 0; i < ARRAY_SIZE(ext->rsvd); i++) {
+   err = check_user_mbz(&ext->rsvd[i]);
+   if (err)
+   return err;
+   }
+
+   if (get_user(x, &ext->name))
+   return -EFAULT;
+
+   err = -EINVAL;
+   if (x < count) {
+   x = array_index_nospec(x, count);
+   if (tbl[x])
+   err = tbl[x](ext, data);
+   }
+   if (err)
+   return err;
+
+   if (get_user(x, &ext->next_extension))
+   return -EFAULT;
+
+   ext = u64_to_user_ptr(x);
+   }
+
+   return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_user_extensions.h 
b/drivers/gpu/drm/i915/i915_user_extensions.h
new file mode 100644
index ..313a510b068a
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_user_extensions.h
@@ -0,0 +1,20 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#ifndef I915_USER_EXTENSIONS_H
+#define I915_USER_EXTENSIONS_H
+
+struct i915_user_extension;
+
+typedef int (*i915_user_extension_fn)(struct i915_user_extension __user *ext,
+ 

[Intel-gfx] [PATCH 13/39] drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[]

2019-03-13 Thread Chris Wilson
Allow the user to specify a local engine index (as opposed to
class:index) that they can use to refer to a preset engine inside the
ctx->engine[] array defined by an earlier I915_CONTEXT_PARAM_ENGINES.
This will be useful for setting SSEU parameters on virtual engines that
are local to the context and do not have a valid global class:instance
lookup.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_gem_context.c | 24 
 include/uapi/drm/i915_drm.h |  3 ++-
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 07377b75b563..7ae28622b709 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1375,6 +1375,7 @@ static int set_sseu(struct i915_gem_context *ctx,
struct drm_i915_gem_context_param_sseu user_sseu;
struct intel_engine_cs *engine;
struct intel_sseu sseu;
+   unsigned long lookup;
int ret;
 
if (args->size < sizeof(user_sseu))
@@ -1387,10 +1388,17 @@ static int set_sseu(struct i915_gem_context *ctx,
   sizeof(user_sseu)))
return -EFAULT;
 
-   if (user_sseu.flags || user_sseu.rsvd)
+   if (user_sseu.rsvd)
return -EINVAL;
 
-   engine = lookup_user_engine(ctx, 0,
+   if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX))
+   return -EINVAL;
+
+   lookup = 0;
+   if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)
+   lookup |= LOOKUP_USER_INDEX;
+
+   engine = lookup_user_engine(ctx, lookup,
user_sseu.engine_class,
user_sseu.engine_instance);
if (!engine)
@@ -1899,6 +1907,7 @@ static int get_sseu(struct i915_gem_context *ctx,
struct drm_i915_gem_context_param_sseu user_sseu;
struct intel_engine_cs *engine;
struct intel_context *ce;
+   unsigned long lookup;
 
if (args->size == 0)
goto out;
@@ -1909,10 +1918,17 @@ static int get_sseu(struct i915_gem_context *ctx,
   sizeof(user_sseu)))
return -EFAULT;
 
-   if (user_sseu.flags || user_sseu.rsvd)
+   if (user_sseu.rsvd)
return -EINVAL;
 
-   engine = lookup_user_engine(ctx, 0,
+   if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX))
+   return -EINVAL;
+
+   lookup = 0;
+   if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)
+   lookup |= LOOKUP_USER_INDEX;
+
+   engine = lookup_user_engine(ctx, lookup,
user_sseu.engine_class,
user_sseu.engine_instance);
if (!engine)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 6dde864e14e7..e17c7375248c 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1567,9 +1567,10 @@ struct drm_i915_gem_context_param_sseu {
__u16 engine_instance;
 
/*
-* Unused for now. Must be cleared to zero.
+* Unknown flags must be cleared to zero.
 */
__u32 flags;
+#define I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX (1u << 0)
 
/*
 * Mask of slices to enable for the context. Valid values are a subset
-- 
2.20.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 35/39] drm/i915: Keep contexts pinned until after the next kernel context switch

2019-03-13 Thread Chris Wilson
Quoting Chris Wilson (2019-03-13 14:43:57)
> We need to keep the context image pinned in memory until after the GPU
> has finished writing into it. Since it continues to write as we signal
> the final breadcrumb, we need to keep it pinned until the request after
> it is complete. Currently we know the order in which requests execute on
> each engine, and so to remove that presumption we need to identify a
> request/context-switch we know must occur after our completion. Any
> request queued after the signal must imply a context switch, for
> simplicity we use a fresh request from the kernel context.

I'm not happy with the preallocation scheme for the active barrier yet.
But the general picture is here.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 37/39] drm/i915: Replace engine->timeline with a plain list

2019-03-13 Thread Chris Wilson
To continue the onslaught of removing the assumption of a global
execution ordering, another casualty is the engine->timeline. Without an
actual timeline to track, it is overkill and we can replace it with a
much less grand plain list. We still need a list of requests inflight,
for the simple purpose of finding inflight requests (for retiring,
resetting, preemption etc).

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 -
 drivers/gpu/drm/i915/i915_gpu_error.c |  5 +-
 drivers/gpu/drm/i915/i915_request.c   | 43 +++--
 drivers/gpu/drm/i915/i915_request.h   |  2 +-
 drivers/gpu/drm/i915/i915_reset.c | 10 +-
 drivers/gpu/drm/i915/i915_scheduler.c | 37 +++
 drivers/gpu/drm/i915/i915_timeline.c  |  1 -
 drivers/gpu/drm/i915/i915_timeline.h  | 19 
 drivers/gpu/drm/i915/i915_timeline_types.h|  4 -
 drivers/gpu/drm/i915/intel_engine_cs.c| 63 ++--
 drivers/gpu/drm/i915/intel_engine_types.h |  6 +-
 drivers/gpu/drm/i915/intel_guc_submission.c   |  6 +-
 drivers/gpu/drm/i915/intel_lrc.c  | 96 +--
 drivers/gpu/drm/i915/intel_ringbuffer.c   | 15 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.h   |  6 ++
 drivers/gpu/drm/i915/selftests/intel_lrc.c|  1 +
 drivers/gpu/drm/i915/selftests/mock_engine.c  | 17 +---
 .../gpu/drm/i915/selftests/mock_timeline.c|  1 -
 18 files changed, 140 insertions(+), 194 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index d9bc05ba3902..e08a9afee7cd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -804,8 +804,6 @@ last_request_on_engine(struct i915_timeline *timeline,
 {
struct i915_request *rq;
 
-   GEM_BUG_ON(timeline == &engine->timeline);
-
rq = i915_active_request_raw(&timeline->last_request,
 &engine->i915->drm.struct_mutex);
if (rq && rq->engine->mask & engine->mask) {
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 7c7f69114f4d..6a5d27dc6338 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1262,7 +1262,7 @@ static void engine_record_requests(struct intel_engine_cs 
*engine,
 
count = 0;
request = first;
-   list_for_each_entry_from(request, &engine->timeline.requests, link)
+   list_for_each_entry_from(request, &engine->active.requests, sched.link)
count++;
if (!count)
return;
@@ -1275,7 +1275,8 @@ static void engine_record_requests(struct intel_engine_cs 
*engine,
 
count = 0;
request = first;
-   list_for_each_entry_from(request, &engine->timeline.requests, link) {
+   list_for_each_entry_from(request,
+&engine->active.requests, sched.link) {
if (count >= ee->num_requests) {
/*
 * If the ring request list was changed in
diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index c2df983ac6a9..7365a86a448a 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -212,9 +212,9 @@ static bool i915_request_retire(struct i915_request *rq)
GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
trace_i915_request_retire(rq);
 
-   spin_lock_irq(&rq->engine->timeline.lock);
-   list_del(&rq->link);
-   spin_unlock_irq(&rq->engine->timeline.lock);
+   spin_lock_irq(&rq->engine->active.lock);
+   list_del(&rq->sched.link);
+   spin_unlock_irq(&rq->engine->active.lock);
 
advance_ring(rq);
 
@@ -260,6 +260,7 @@ static bool i915_request_retire(struct i915_request *rq)
spin_unlock_irq(&rq->lock);
 
i915_request_remove_from_client(rq);
+   list_del(&rq->link);
 
intel_context_unpin(rq->hw_context);
 
@@ -383,28 +384,17 @@ __i915_request_await_execution(struct i915_request *rq,
return 0;
 }
 
-static void move_to_timeline(struct i915_request *request,
-struct i915_timeline *timeline)
-{
-   GEM_BUG_ON(request->timeline == &request->engine->timeline);
-   lockdep_assert_held(&request->engine->timeline.lock);
-
-   spin_lock(&request->timeline->lock);
-   list_move_tail(&request->link, &timeline->requests);
-   spin_unlock(&request->timeline->lock);
-}
-
 void __i915_request_submit(struct i915_request *request)
 {
struct intel_engine_cs *engine = request->engine;
 
-   GEM_TRACE("%s fence %llx:%lld -> current %d\n",
+   GEM_TRACE("%s fence %llx:%lld, current %d\n",
  engine->name,
  request->fence.context, request->fence.seqno,
  hwsp_seqno(request));
 
GEM_BUG_ON(!irqs_disabled());
-   lo

[Intel-gfx] [PATCH 26/39] drm/i915: Move more GEM objects under gem/

2019-03-13 Thread Chris Wilson
Continuing the theme of separating out the GEM clutter.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/Makefile | 25 ++-
 .../gpu/drm/i915/{ => gem}/i915_gem_clflush.c | 27 +++
 drivers/gpu/drm/i915/gem/i915_gem_clflush.h   | 20 +
 .../gpu/drm/i915/{ => gem}/i915_gem_context.c | 40 +
 .../gpu/drm/i915/{ => gem}/i915_gem_context.h | 30 +++--
 .../i915/{ => gem}/i915_gem_context_types.h   |  4 +-
 .../gpu/drm/i915/{ => gem}/i915_gem_dmabuf.c  | 28 +++-
 drivers/gpu/drm/i915/gem/i915_gem_domain.c|  2 +-
 .../drm/i915/{ => gem}/i915_gem_execbuffer.c  | 39 
 .../drm/i915/{ => gem}/i915_gem_internal.c| 33 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 10 -
 .../drm/i915/{ => gem}/i915_gem_shrinker.c| 25 ++-
 .../gpu/drm/i915/{ => gem}/i915_gem_stolen.c  | 33 --
 .../gpu/drm/i915/{ => gem}/i915_gem_tiling.c  | 31 +++--
 .../gpu/drm/i915/{ => gem}/i915_gem_userptr.c | 30 +++--
 drivers/gpu/drm/i915/{ => gem}/i915_gemfs.c   | 25 ++-
 drivers/gpu/drm/i915/gem/i915_gemfs.h | 16 +++
 .../{ => gem}/selftests/huge_gem_object.c | 22 +
 .../drm/i915/gem/selftests/huge_gem_object.h  | 27 +++
 .../drm/i915/{ => gem}/selftests/huge_pages.c | 33 --
 .../{ => gem}/selftests/i915_gem_coherency.c  | 26 ++-
 .../{ => gem}/selftests/i915_gem_context.c| 41 +
 .../{ => gem}/selftests/i915_gem_dmabuf.c | 26 ++-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  3 +-
 .../{ => gem}/selftests/i915_gem_object.c | 27 +++
 .../i915/{ => gem}/selftests/mock_context.c   | 24 ++
 .../gpu/drm/i915/gem/selftests/mock_context.h | 24 ++
 .../i915/{ => gem}/selftests/mock_dmabuf.c| 22 +
 .../gpu/drm/i915/gem/selftests/mock_dmabuf.h  | 22 +
 .../{ => gem}/selftests/mock_gem_object.h |  7 ++-
 .../gem/test_i915_gem_clflush_standalone.c|  7 +++
 .../gem/test_i915_gem_context_standalone.c|  7 +++
 .../test_i915_gem_context_types_standalone.c  |  0
 .../drm/i915/gem/test_i915_gemfs_standalone.c |  7 +++
 drivers/gpu/drm/i915/gvt/mmio_context.c   |  1 +
 drivers/gpu/drm/i915/gvt/scheduler.c  |  3 ++
 drivers/gpu/drm/i915/i915_drv.c   |  1 +
 drivers/gpu/drm/i915/i915_drv.h   |  2 +-
 drivers/gpu/drm/i915/i915_gem.c   |  9 ++--
 drivers/gpu/drm/i915/i915_gem_clflush.h   | 36 ---
 drivers/gpu/drm/i915/i915_gem_evict.c |  2 +
 drivers/gpu/drm/i915/i915_gemfs.h | 34 --
 drivers/gpu/drm/i915/i915_globals.c   |  2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c |  2 +
 drivers/gpu/drm/i915/i915_perf.c  |  3 ++
 drivers/gpu/drm/i915/i915_request.c   |  3 ++
 drivers/gpu/drm/i915/i915_reset.c |  2 +
 drivers/gpu/drm/i915/intel_context.c  |  3 +-
 drivers/gpu/drm/i915/intel_display.c  |  2 -
 drivers/gpu/drm/i915/intel_engine_cs.c|  1 +
 drivers/gpu/drm/i915/intel_guc_submission.c   |  1 +
 drivers/gpu/drm/i915/intel_lrc.c  |  3 ++
 drivers/gpu/drm/i915/intel_lrc.h  | 14 +++---
 drivers/gpu/drm/i915/intel_ringbuffer.c   |  3 ++
 .../gpu/drm/i915/selftests/huge_gem_object.h  | 45 ---
 drivers/gpu/drm/i915/selftests/i915_gem.c |  4 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  3 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |  3 +-
 drivers/gpu/drm/i915/selftests/i915_request.c |  3 +-
 drivers/gpu/drm/i915/selftests/i915_vma.c |  3 +-
 .../gpu/drm/i915/selftests/igt_flush_test.c   |  2 +
 drivers/gpu/drm/i915/selftests/igt_spinner.h  |  1 -
 .../gpu/drm/i915/selftests/intel_hangcheck.c  |  5 ++-
 drivers/gpu/drm/i915/selftests/intel_lrc.c|  2 +-
 .../drm/i915/selftests/intel_workarounds.c|  4 +-
 drivers/gpu/drm/i915/selftests/mock_context.h | 42 -
 drivers/gpu/drm/i915/selftests/mock_dmabuf.h  | 41 -
 drivers/gpu/drm/i915/selftests/mock_engine.c  |  2 +
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  5 ++-
 69 files changed, 344 insertions(+), 691 deletions(-)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_clflush.c (77%)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_clflush.h
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_context.c (97%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_context.h (80%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_context_types.h (98%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_dmabuf.c (86%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_execbuffer.c (98%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_internal.c (81%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_shrinker.c (93%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_stolen.c (93%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_tiling.c (90%)
 rename drivers/gpu/drm/i915/{

[Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [v4,1/3] drm/i915/vbt: Parse and use the new field with PSR2 TP2/3 wakeup time

2019-03-13 Thread Patchwork
== Series Details ==

Series: series starting with [v4,1/3] drm/i915/vbt: Parse and use the new field 
with PSR2 TP2/3 wakeup time
URL   : https://patchwork.freedesktop.org/series/57896/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5737 -> Patchwork_12443


Summary
---

  **SUCCESS**

  No regressions found.

  External URL: 
https://patchwork.freedesktop.org/api/1.0/series/57896/revisions/1/mbox/

Known issues


  Here are the changes found in Patchwork_12443 that come from known issues:

### IGT changes ###

 Issues hit 

  * igt@amdgpu/amd_cs_nop@fork-gfx0:
- fi-icl-u2:  NOTRUN -> SKIP [fdo#109315] +17

  * igt@gem_ctx_create@basic-files:
- fi-gdg-551: NOTRUN -> SKIP [fdo#109271] +106

  * igt@gem_exec_basic@gtt-bsd2:
- fi-byt-clapper: NOTRUN -> SKIP [fdo#109271] +57

  * igt@gem_exec_basic@readonly-bsd1:
- fi-snb-2520m:   NOTRUN -> SKIP [fdo#109271] +57
- fi-icl-u2:  NOTRUN -> SKIP [fdo#109276] +7

  * igt@gem_exec_parse@basic-allowed:
- fi-icl-u2:  NOTRUN -> SKIP [fdo#109289] +1

  * igt@i915_selftest@live_contexts:
- fi-icl-u2:  NOTRUN -> DMESG-FAIL [fdo#108569]

  * igt@i915_selftest@live_execlists:
- fi-apl-guc: PASS -> INCOMPLETE [fdo#103927] / [fdo#109720]

  * igt@kms_addfb_basic@addfb25-y-tiled-small:
- fi-byt-n2820:   NOTRUN -> SKIP [fdo#109271] +56

  * igt@kms_busy@basic-flip-a:
- fi-kbl-7567u:   PASS -> SKIP [fdo#109271] / [fdo#109278] +2
- fi-gdg-551: NOTRUN -> FAIL [fdo#103182] +2

  * igt@kms_busy@basic-flip-c:
- fi-blb-e6850:   NOTRUN -> SKIP [fdo#109271] / [fdo#109278]
- fi-byt-clapper: NOTRUN -> SKIP [fdo#109271] / [fdo#109278]
- fi-gdg-551: NOTRUN -> SKIP [fdo#109271] / [fdo#109278]
- fi-snb-2520m:   NOTRUN -> SKIP [fdo#109271] / [fdo#109278]
- fi-byt-n2820:   NOTRUN -> SKIP [fdo#109271] / [fdo#109278]

  * igt@kms_chamelium@dp-edid-read:
- fi-icl-u2:  NOTRUN -> SKIP [fdo#109316] +2

  * igt@kms_chamelium@hdmi-edid-read:
- fi-hsw-peppy:   NOTRUN -> SKIP [fdo#109271] +46

  * igt@kms_chamelium@vga-hpd-fast:
- fi-icl-u2:  NOTRUN -> SKIP [fdo#109309] +1

  * igt@kms_force_connector_basic@prune-stale-modes:
- fi-icl-u2:  NOTRUN -> SKIP [fdo#109285] +3

  * igt@kms_frontbuffer_tracking@basic:
- fi-icl-u3:  PASS -> FAIL [fdo#103167]
- fi-hsw-peppy:   NOTRUN -> DMESG-FAIL [fdo#102614] / [fdo#107814]
- fi-byt-clapper: NOTRUN -> FAIL [fdo#103167]

  * igt@kms_pipe_crc_basic@hang-read-crc-pipe-a:
- fi-byt-clapper: NOTRUN -> FAIL [fdo#103191] / [fdo#107362] +1

  * igt@kms_pipe_crc_basic@hang-read-crc-pipe-c:
- fi-blb-e6850:   NOTRUN -> SKIP [fdo#109271] +48

  * igt@runner@aborted:
- fi-apl-guc: NOTRUN -> FAIL [fdo#108622] / [fdo#109720]

  
 Possible fixes 

  * igt@gem_exec_suspend@basic-s3:
- fi-blb-e6850:   INCOMPLETE [fdo#107718] -> PASS

  
  [fdo#102614]: https://bugs.freedesktop.org/show_bug.cgi?id=102614
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103182]: https://bugs.freedesktop.org/show_bug.cgi?id=103182
  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#107362]: https://bugs.freedesktop.org/show_bug.cgi?id=107362
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#107814]: https://bugs.freedesktop.org/show_bug.cgi?id=107814
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#108622]: https://bugs.freedesktop.org/show_bug.cgi?id=108622
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109276]: https://bugs.freedesktop.org/show_bug.cgi?id=109276
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [fdo#109285]: https://bugs.freedesktop.org/show_bug.cgi?id=109285
  [fdo#109289]: https://bugs.freedesktop.org/show_bug.cgi?id=109289
  [fdo#109309]: https://bugs.freedesktop.org/show_bug.cgi?id=109309
  [fdo#109315]: https://bugs.freedesktop.org/show_bug.cgi?id=109315
  [fdo#109316]: https://bugs.freedesktop.org/show_bug.cgi?id=109316
  [fdo#109720]: https://bugs.freedesktop.org/show_bug.cgi?id=109720


Participating hosts (41 -> 41)
--

  Additional (6): fi-hsw-peppy fi-icl-u2 fi-snb-2520m fi-gdg-551 fi-byt-n2820 
fi-byt-clapper 
  Missing(6): fi-kbl-soraka fi-ilk-m540 fi-bxt-dsi fi-hsw-4200u 
fi-ctg-p8600 fi-bdw-samus 


Build changes
-

* Linux: CI_DRM_5737 -> Patchwork_12443

  CI_DRM_5737: d5bb7d77aa77996702426496078a597f30bead58 @ 
git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4883: b25e06d6ddf2e42044cd9c93b613cbc7339a8c33 @ 
git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_12443: fa0a48728c552161eaeea7c5e1dad96edb6fc003 @ 
git://anongit.freed

Re: [Intel-gfx] [PATCH 1/7] drm/i915: Readout and check csc_mode

2019-03-13 Thread Shankar, Uma


>-Original Message-
>From: Ville Syrjala [mailto:ville.syrj...@linux.intel.com]
>Sent: Tuesday, February 19, 2019 1:02 AM
>To: intel-gfx@lists.freedesktop.org
>Cc: Shankar, Uma ; Roper, Matthew D
>
>Subject: [PATCH 1/7] drm/i915: Readout and check csc_mode
>
>From: Ville Syrjälä 
>
>Add the missing readout and PIPE_CONF_CHECK() for csc_mode.

Looks good to me.
Reviewed-by: Uma Shankar 

>Signed-off-by: Ville Syrjälä 
>---
> drivers/gpu/drm/i915/intel_color.c   | 4 ++--
> drivers/gpu/drm/i915/intel_display.c | 5 +
> 2 files changed, 7 insertions(+), 2 deletions(-)
>
>diff --git a/drivers/gpu/drm/i915/intel_color.c 
>b/drivers/gpu/drm/i915/intel_color.c
>index da7a07d5ccea..d813b9d0f5c0 100644
>--- a/drivers/gpu/drm/i915/intel_color.c
>+++ b/drivers/gpu/drm/i915/intel_color.c
>@@ -788,6 +788,8 @@ int intel_color_check(struct intel_crtc_state *crtc_state)
>   if (ret)
>   return ret;
>
>+  crtc_state->csc_mode = 0;
>+
>   /* Always allow legacy gamma LUT with no further checking. */
>   if (!crtc_state->gamma_enable ||
>   crtc_state_is_legacy_gamma(crtc_state)) { @@ -814,8 +816,6 @@ int
>intel_color_check(struct intel_crtc_state *crtc_state)
>   else
>   crtc_state->gamma_mode = GAMMA_MODE_MODE_8BIT;
>
>-  crtc_state->csc_mode = 0;
>-
>   if (INTEL_GEN(dev_priv) >= 11) {
>   if (crtc_state->output_format ==
>INTEL_OUTPUT_FORMAT_YCBCR420 ||
>   crtc_state->output_format ==
>INTEL_OUTPUT_FORMAT_YCBCR444) diff --git
>a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
>index afa21daaae51..2e4d33634e0c 100644
>--- a/drivers/gpu/drm/i915/intel_display.c
>+++ b/drivers/gpu/drm/i915/intel_display.c
>@@ -9267,6 +9267,8 @@ static bool ironlake_get_pipe_config(struct intel_crtc
>*crtc,
>   pipe_config->gamma_mode = (tmp & PIPECONF_GAMMA_MODE_MASK_ILK)
>>>
>   PIPECONF_GAMMA_MODE_SHIFT;
>
>+  pipe_config->csc_mode = I915_READ(PIPE_CSC_MODE(crtc->pipe));
>+
>   i9xx_get_pipe_color_config(pipe_config);
>
>   if (I915_READ(PCH_TRANSCONF(crtc->pipe)) & TRANS_ENABLE) { @@ -
>9903,6 +9905,8 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
>   pipe_config->gamma_mode =
>   I915_READ(GAMMA_MODE(crtc->pipe)) &
>GAMMA_MODE_MODE_MASK;
>
>+  pipe_config->csc_mode = I915_READ(PIPE_CSC_MODE(crtc->pipe));
>+
>   if (INTEL_GEN(dev_priv) >= 9) {
>   u32 tmp = I915_READ(SKL_BOTTOM_COLOR(crtc->pipe));
>
>@@ -12146,6 +12150,7 @@ intel_pipe_config_compare(struct drm_i915_private
>*dev_priv,
>   PIPE_CONF_CHECK_CLOCK_FUZZY(pixel_rate);
>
>   PIPE_CONF_CHECK_X(gamma_mode);
>+  PIPE_CONF_CHECK_X(csc_mode);
>   PIPE_CONF_CHECK_BOOL(gamma_enable);
>   PIPE_CONF_CHECK_BOOL(csc_enable);
>   }
>--
>2.19.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915: Fix PSR2 selective update corruption after PSR1 setup

2019-03-13 Thread Patchwork
== Series Details ==

Series: drm/i915: Fix PSR2 selective update corruption after PSR1 setup
URL   : https://patchwork.freedesktop.org/series/57900/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: Fix PSR2 selective update corruption after PSR1 setup
-O:drivers/gpu/drm/i915/intel_psr.c:501:27: warning: expression using 
sizeof(void)
-O:drivers/gpu/drm/i915/intel_psr.c:503:23: warning: expression using 
sizeof(void)
-O:drivers/gpu/drm/i915/intel_psr.c:503:23: warning: expression using 
sizeof(void)
+drivers/gpu/drm/i915/intel_psr.c:510:23: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/intel_psr.c:511:23: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/intel_psr.c:511:23: warning: expression using sizeof(void)

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 2/7] drm/i915: Preocmpute/readout/check CHV CGM mode

2019-03-13 Thread Shankar, Uma


>-Original Message-
>From: Ville Syrjala [mailto:ville.syrj...@linux.intel.com]
>Sent: Tuesday, February 19, 2019 1:02 AM
>To: intel-gfx@lists.freedesktop.org
>Cc: Shankar, Uma ; Roper, Matthew D
>
>Subject: [PATCH 2/7] drm/i915: Preocmpute/readout/check CHV CGM mode

Typo in precompute

>
>From: Ville Syrjälä 
>
>Let's precompte the CGM mode for CHV. And naturally we also read it out and 
>check

Same here.

Rest looks good to me. With the above minor nits fixed.
Reviewed-by: Uma Shankar 

>it.
>
>Signed-off-by: Ville Syrjälä 
>---
> drivers/gpu/drm/i915/intel_color.c   | 28 +---
> drivers/gpu/drm/i915/intel_display.c |  8 +++-
> drivers/gpu/drm/i915/intel_drv.h |  9 +++--
> 3 files changed, 35 insertions(+), 10 deletions(-)
>
>diff --git a/drivers/gpu/drm/i915/intel_color.c 
>b/drivers/gpu/drm/i915/intel_color.c
>index d813b9d0f5c0..93428d86510a 100644
>--- a/drivers/gpu/drm/i915/intel_color.c
>+++ b/drivers/gpu/drm/i915/intel_color.c
>@@ -294,7 +294,6 @@ static void cherryview_load_csc_matrix(const struct
>intel_crtc_state *crtc_state
>   struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
>   struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
>   enum pipe pipe = crtc->pipe;
>-  u32 mode;
>
>   if (crtc_state->base.ctm) {
>   const struct drm_color_ctm *ctm = crtc_state->base.ctm->data; @@
>-328,12 +327,7 @@ static void cherryview_load_csc_matrix(const struct
>intel_crtc_state *crtc_state
>   I915_WRITE(CGM_PIPE_CSC_COEFF8(pipe), coeffs[8]);
>   }
>
>-  mode = (crtc_state->base.ctm ? CGM_PIPE_MODE_CSC : 0);
>-  if (!crtc_state_is_legacy_gamma(crtc_state)) {
>-  mode |= (crtc_state->base.degamma_lut ?
>CGM_PIPE_MODE_DEGAMMA : 0) |
>-  (crtc_state->base.gamma_lut ? CGM_PIPE_MODE_GAMMA :
>0);
>-  }
>-  I915_WRITE(CGM_PIPE_MODE(pipe), mode);
>+  I915_WRITE(CGM_PIPE_MODE(pipe), crtc_state->cgm_mode);
> }
>
> /* Loads the legacy palette/gamma unit for the CRTC. */ @@ -753,6 +747,23 @@
>static int check_lut_size(const struct drm_property_blob *lut, int expected)
>   return 0;
> }
>
>+static u32 chv_cgm_mode(const struct intel_crtc_state *crtc_state) {
>+  u32 cgm_mode = 0;
>+
>+  if (crtc_state_is_legacy_gamma(crtc_state))
>+  return 0;
>+
>+  if (crtc_state->base.degamma_lut)
>+  cgm_mode |= CGM_PIPE_MODE_DEGAMMA;
>+  if (crtc_state->base.ctm)
>+  cgm_mode |= CGM_PIPE_MODE_CSC;
>+  if (crtc_state->base.gamma_lut)
>+  cgm_mode |= CGM_PIPE_MODE_GAMMA;
>+
>+  return cgm_mode;
>+}
>+
> int intel_color_check(struct intel_crtc_state *crtc_state)  {
>   struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
>@@ -790,6 +801,9 @@ int intel_color_check(struct intel_crtc_state *crtc_state)
>
>   crtc_state->csc_mode = 0;
>
>+  if (IS_CHERRYVIEW(dev_priv))
>+  crtc_state->cgm_mode = chv_cgm_mode(crtc_state);
>+
>   /* Always allow legacy gamma LUT with no further checking. */
>   if (!crtc_state->gamma_enable ||
>   crtc_state_is_legacy_gamma(crtc_state)) { diff --git
>a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
>index 2e4d33634e0c..d2f3174e10d6 100644
>--- a/drivers/gpu/drm/i915/intel_display.c
>+++ b/drivers/gpu/drm/i915/intel_display.c
>@@ -8187,6 +8187,9 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc,
>   pipe_config->gamma_mode = (tmp &
>PIPECONF_GAMMA_MODE_MASK_I9XX) >>
>   PIPECONF_GAMMA_MODE_SHIFT;
>
>+  if (IS_CHERRYVIEW(dev_priv))
>+  pipe_config->cgm_mode = I915_READ(CGM_PIPE_MODE(crtc-
>>pipe));
>+
>   i9xx_get_pipe_color_config(pipe_config);
>
>   if (INTEL_GEN(dev_priv) < 4)
>@@ -12150,7 +12153,10 @@ intel_pipe_config_compare(struct drm_i915_private
>*dev_priv,
>   PIPE_CONF_CHECK_CLOCK_FUZZY(pixel_rate);
>
>   PIPE_CONF_CHECK_X(gamma_mode);
>-  PIPE_CONF_CHECK_X(csc_mode);
>+  if (IS_CHERRYVIEW(dev_priv))
>+  PIPE_CONF_CHECK_X(cgm_mode);
>+  else
>+  PIPE_CONF_CHECK_X(csc_mode);
>   PIPE_CONF_CHECK_BOOL(gamma_enable);
>   PIPE_CONF_CHECK_BOOL(csc_enable);
>   }
>diff --git a/drivers/gpu/drm/i915/intel_drv.h 
>b/drivers/gpu/drm/i915/intel_drv.h
>index eec4ed93c335..bbe9cf7e20d6 100644
>--- a/drivers/gpu/drm/i915/intel_drv.h
>+++ b/drivers/gpu/drm/i915/intel_drv.h
>@@ -942,8 +942,13 @@ struct intel_crtc_state {
>   /* Gamma mode programmed on the pipe */
>   u32 gamma_mode;
>
>-  /* CSC mode programmed on the pipe */
>-  u32 csc_mode;
>+  union {
>+  /* CSC mode programmed on the pipe */
>+  u32 csc_mode;
>+
>+  /* CHV CGM mode */
>+  u32 cgm_mode;
>+  };
>
>   /* bitmask of visible planes (enum plane_id) */
>   u8 active

Re: [Intel-gfx] [PATCH 3/7] drm/i915: Extract ilk_csc_limited_range()

2019-03-13 Thread Shankar, Uma


>-Original Message-
>From: Ville Syrjala [mailto:ville.syrj...@linux.intel.com]
>Sent: Tuesday, February 19, 2019 1:02 AM
>To: intel-gfx@lists.freedesktop.org
>Cc: Shankar, Uma ; Roper, Matthew D
>
>Subject: [PATCH 3/7] drm/i915: Extract ilk_csc_limited_range()
>
>From: Ville Syrjälä 
>
>Extract a helper which determines if we need to use the pipe CSC for limited 
>range
>RGB output.
>
>Signed-off-by: Ville Syrjälä 
>---
> drivers/gpu/drm/i915/intel_color.c | 22 ++
> 1 file changed, 14 insertions(+), 8 deletions(-)
>
>diff --git a/drivers/gpu/drm/i915/intel_color.c 
>b/drivers/gpu/drm/i915/intel_color.c
>index 93428d86510a..ddc48c0d45ac 100644
>--- a/drivers/gpu/drm/i915/intel_color.c
>+++ b/drivers/gpu/drm/i915/intel_color.c
>@@ -161,22 +161,28 @@ static void ilk_load_ycbcr_conversion_matrix(struct
>intel_crtc *crtc)
>   }
> }
>
>+static bool ilk_csc_limited_range(const struct intel_crtc_state
>+*crtc_state) {
>+  struct drm_i915_private *dev_priv =
>+to_i915(crtc_state->base.crtc->dev);
>+
>+  /*
>+   * FIXME if there's a gamma LUT after the CSC, we should
>+   * do the range compression using the gamma LUT instead.
>+   */
>+  return crtc_state->limited_color_range &&
>+  (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
>+   IS_GEN_RANGE(dev_priv, 9, 10));

We should include Gen8 also to this list. Is it intentional to drop that ?
With this fixed or justified reasoning, 
Reviewed-by: Uma Shankar 

>+}
>+
> static void ilk_load_csc_matrix(const struct intel_crtc_state *crtc_state)  {
>   struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
>   struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
>-  bool limited_color_range = false;
>+  bool limited_color_range = ilk_csc_limited_range(crtc_state);
>   enum pipe pipe = crtc->pipe;
>   u16 coeffs[9] = {};
>   int i;
>
>-  /*
>-   * FIXME if there's a gamma LUT after the CSC, we should
>-   * do the range compression using the gamma LUT instead.
>-   */
>-  if (INTEL_GEN(dev_priv) >= 8 || IS_HASWELL(dev_priv))
>-  limited_color_range = crtc_state->limited_color_range;
>-
>   if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 ||
>   crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR444) {
>   ilk_load_ycbcr_conversion_matrix(crtc);
>--
>2.19.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] ✗ Fi.CI.SPARSE: warning for skl+ cursor DDB allocation fixes

2019-03-13 Thread Patchwork
== Series Details ==

Series: skl+ cursor DDB allocation fixes
URL   : https://patchwork.freedesktop.org/series/57901/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: Accept alloc_size == blocks
Okay!

Commit: drm/i915: Don't pass plane state to skl_compute_plane_wm()
Okay!

Commit: drm/i915: Extract skl_compute_wm_params()
Okay!

Commit: drm/i915: Allocate enough DDB for the cursor
+drivers/gpu/drm/i915/intel_pm.c:3955:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/intel_pm.c:3955:16: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/intel_pm.c:4450:25: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/intel_pm.c:4450:25: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/intel_pm.c:4450:25: warning: too many warnings
-drivers/gpu/drm/i915/intel_pm.c:4935:30: warning: too many warnings

Commit: drm/i915: Make sure cursor has enough ddb for the selected wm level
Okay!

Commit: drm/i915: Keep plane watermarks enabled more aggressively
Okay!

Commit: drm/i915: Move some variables to tighter scope
-O:drivers/gpu/drm/i915/intel_pm.c:4446:25: warning: expression using 
sizeof(void)
-O:drivers/gpu/drm/i915/intel_pm.c:4446:25: warning: expression using 
sizeof(void)
+drivers/gpu/drm/i915/intel_pm.c:4446:25: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/intel_pm.c:4446:25: warning: expression using sizeof(void)

Commit: drm/i915: Don't pass pipe_wm around so much
Okay!

Commit: drm/i915: Inline skl_build_pipe_wm() into its only caller
Okay!

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v2 1/3] drm: Add support for panic message output

2019-03-13 Thread Michel Dänzer
On 2019-03-13 2:37 p.m., Christian König wrote:
> Am 13.03.19 um 14:31 schrieb Ville Syrjälä:
>> On Wed, Mar 13, 2019 at 10:35:08AM +0100, Michel Dänzer wrote:
>>> On 2019-03-12 6:15 p.m., Noralf Trønnes wrote:

 Den 12.03.2019 17.17, skrev Ville Syrjälä:
> On Tue, Mar 12, 2019 at 11:47:04AM +0100, Michel Dänzer wrote:
>> On 2019-03-11 6:42 p.m., Noralf Trønnes wrote:
>>> This adds support for outputting kernel messages on panic().
>>> A kernel message dumper is used to dump the log. The dumper iterates
>>> over each DRM device and it's crtc's to find suitable framebuffers.
>>>
>>> All the other dumpers are run before this one except mtdoops.
>>> Only atomic drivers are supported.
>>>
>>> Signed-off-by: Noralf Trønnes 
>>> ---
>>>   [...]
>>>
>>> diff --git a/include/drm/drm_framebuffer.h
>>> b/include/drm/drm_framebuffer.h
>>> index f0b34c977ec5..f3274798ecfe 100644
>>> --- a/include/drm/drm_framebuffer.h
>>> +++ b/include/drm/drm_framebuffer.h
>>> @@ -94,6 +94,44 @@ struct drm_framebuffer_funcs {
>>>    struct drm_file *file_priv, unsigned flags,
>>>    unsigned color, struct drm_clip_rect *clips,
>>>    unsigned num_clips);
>>> +
>>> +    /**
>>> + * @panic_vmap:
>>> + *
>>> + * Optional callback for panic handling.
>>> + *
>>> + * For vmapping the selected framebuffer in a panic context.
>>> Must
>>> + * be super careful about locking (only trylocking allowed).
>>> + *
>>> + * RETURNS:
>>> + *
>>> + * NULL if it didn't work out, otherwise an opaque cookie
>>> which is
>>> + * passed to @panic_draw_xy. It can be anything: vmap area,
>>> structure
>>> + * with more details, just a few flags, ...
>>> + */
>>> +    void *(*panic_vmap)(struct drm_framebuffer *fb);
>> FWIW, the panic_vmap hook cannot work in general with the
>> amdgpu/radeon
>> drivers:
>>
>> Framebuffers are normally tiled, writing to them with the CPU
>> results in
>> garbled output.
>>
 In which case the driver needs to support the ->panic_draw_xy callback,
 or maybe it's possible to make a generic helper for tiled buffers.
>>> I'm afraid that won't help, at least not without porting big chunks of
>>> https://gitlab.freedesktop.org/mesa/mesa/tree/master/src/amd/addrlib
>>> into the kernel, none of which will be used for anything else.
>>>
>>>
>> There would need to be a mechanism for switching scanout to a linear,
>> CPU accessible framebuffer.
> I suppose panic_vmap() could just provide a linear temp buffer
> to the panic handler, and panic_unmap() could copy the contents
> over to the real fb.
>>> Copy how? Using a GPU engine?
>> CPU maybe? Though I suppose that won't work if the buffer isn't CPU
>> accesible :/
> 
> Well we do have a debug path for accessing invisible memory with the CPU.
> 
> E.g. three registers: DATA and auto increment OFFSET_LO/HI. So you can
> just read/write DATA over and over again if you want to access some memory.

Right. I assume that'll be very slow, but I guess it could do when the
memory isn't directly CPU accessible.


> But turning of tilling etc is still extremely tricky when the system is
> already unstable.

Maybe we could add a little hook to the display code, which just
disables tiling for scanout and maybe disables non-primary planes, but
doesn't touch anything else. Harry / Nicholas, does that seem feasible?


I'm coming around from "this is never going to work" to "it might
actually work" with our hardware...


-- 
Earthling Michel Dänzer   |  https://www.amd.com
Libre software enthusiast | Mesa and X developer
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v2 1/3] drm: Add support for panic message output

2019-03-13 Thread Christian König

Am 13.03.19 um 16:38 schrieb Michel Dänzer:

On 2019-03-13 2:37 p.m., Christian König wrote:

Am 13.03.19 um 14:31 schrieb Ville Syrjälä:

On Wed, Mar 13, 2019 at 10:35:08AM +0100, Michel Dänzer wrote:

On 2019-03-12 6:15 p.m., Noralf Trønnes wrote:

Den 12.03.2019 17.17, skrev Ville Syrjälä:

On Tue, Mar 12, 2019 at 11:47:04AM +0100, Michel Dänzer wrote:

On 2019-03-11 6:42 p.m., Noralf Trønnes wrote:

This adds support for outputting kernel messages on panic().
A kernel message dumper is used to dump the log. The dumper iterates
over each DRM device and it's crtc's to find suitable framebuffers.

All the other dumpers are run before this one except mtdoops.
Only atomic drivers are supported.

Signed-off-by: Noralf Trønnes 
---
   [...]

diff --git a/include/drm/drm_framebuffer.h
b/include/drm/drm_framebuffer.h
index f0b34c977ec5..f3274798ecfe 100644
--- a/include/drm/drm_framebuffer.h
+++ b/include/drm/drm_framebuffer.h
@@ -94,6 +94,44 @@ struct drm_framebuffer_funcs {
    struct drm_file *file_priv, unsigned flags,
    unsigned color, struct drm_clip_rect *clips,
    unsigned num_clips);
+
+    /**
+ * @panic_vmap:
+ *
+ * Optional callback for panic handling.
+ *
+ * For vmapping the selected framebuffer in a panic context.
Must
+ * be super careful about locking (only trylocking allowed).
+ *
+ * RETURNS:
+ *
+ * NULL if it didn't work out, otherwise an opaque cookie
which is
+ * passed to @panic_draw_xy. It can be anything: vmap area,
structure
+ * with more details, just a few flags, ...
+ */
+    void *(*panic_vmap)(struct drm_framebuffer *fb);

FWIW, the panic_vmap hook cannot work in general with the
amdgpu/radeon
drivers:

Framebuffers are normally tiled, writing to them with the CPU
results in
garbled output.


In which case the driver needs to support the ->panic_draw_xy callback,
or maybe it's possible to make a generic helper for tiled buffers.

I'm afraid that won't help, at least not without porting big chunks of
https://gitlab.freedesktop.org/mesa/mesa/tree/master/src/amd/addrlib
into the kernel, none of which will be used for anything else.



There would need to be a mechanism for switching scanout to a linear,
CPU accessible framebuffer.

I suppose panic_vmap() could just provide a linear temp buffer
to the panic handler, and panic_unmap() could copy the contents
over to the real fb.

Copy how? Using a GPU engine?

CPU maybe? Though I suppose that won't work if the buffer isn't CPU
accesible :/

Well we do have a debug path for accessing invisible memory with the CPU.

E.g. three registers: DATA and auto increment OFFSET_LO/HI. So you can
just read/write DATA over and over again if you want to access some memory.

Right. I assume that'll be very slow, but I guess it could do when the
memory isn't directly CPU accessible.


Just made a quick test and reading 33423360 bytes (4096x2040x4) using 
that interfaces takes about 13 seconds.


IIRC we don't use the auto increment optimization yet, so that can 
probably be improved by a factor of 3 or more.



But turning of tilling etc is still extremely tricky when the system is
already unstable.

Maybe we could add a little hook to the display code, which just
disables tiling for scanout and maybe disables non-primary planes, but
doesn't touch anything else. Harry / Nicholas, does that seem feasible?


I'm coming around from "this is never going to work" to "it might
actually work" with our hardware...


Yeah, agree. It's a bit tricky, but doable.

Takeaway for Noralf is that this whole vmap on panic won't even remotely 
work. We need to get the data byte by byte without a page mapping if 
that is ever going to fly.


Christian.






___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v2 1/3] drm: Add support for panic message output

2019-03-13 Thread John Ogness
On 2019-03-12, Ahmed S. Darwish  wrote:
 +
 +static void drm_panic_kmsg_dump(struct kmsg_dumper *dumper,
 +  enum kmsg_dump_reason reason)
 +{
 +  class_for_each_device(drm_class, NULL, dumper, drm_panic_dev_iter);
>>>
>>> class_for_each_device uses klist, which only uses an irqsave
>>> spinlock. I think that's good enough. Comment to that effect would
>>> be good e.g.
>>>
>>> /* based on klist, which uses only a spin_lock_irqsave, which we
>>>  * assume still works */
>>>
>>> If we aim for perfect this should be a trylock still, maybe using
>>> our own device list.
>>>
>
> I definitely agree here.
>
> The lock may already be locked either by a stopped CPU, or by the
> very same CPU we execute panic() on (e.g. NMI panic() on the
> printing CPU).
>
> This is why it's very common for example in serial consoles, which
> are usually careful about re-entrance and panic contexts, to do:
>
>   xx_console_write(...) {
>   if (oops_in_progress)
>   locked = spin_trylock_irqsave(&port->lock, flags);
>   else
>   spin_lock_irqsave(&port->lock, flags);
>   }
>
> I'm quite positive we should do the same for panic drm drivers.

This construction will continue, even if the trylock fails. It only
makes sense to do this if the driver has a chance of being
successful. Ignoring locks is a serious issue. I personally am quite
unhappy that the serial drivers do this, which was part of my motivation
for the new printk design I'm working on.

If the driver is not capable of doing something useful on a failed
trylock, then I recommend just skipping that device. Maybe trying it
again later after trying all the devices?

John Ogness
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v2 1/3] drm: Add support for panic message output

2019-03-13 Thread Ahmed S. Darwish
Hi,

[[ CCing John for the trylock parts ]]

On Mon, Mar 11, 2019 at 11:33:15PM +0100, Noralf Trønnes wrote:
>
>
> Den 11.03.2019 20.23, skrev Daniel Vetter:
> > On Mon, Mar 11, 2019 at 06:42:16PM +0100, Noralf Trønnes wrote:
> >> This adds support for outputting kernel messages on panic().
> >> A kernel message dumper is used to dump the log. The dumper iterates
> >> over each DRM device and it's crtc's to find suitable framebuffers.
> >>
> >> All the other dumpers are run before this one except mtdoops.
> >> Only atomic drivers are supported.
> >>
> >> Signed-off-by: Noralf Trønnes 
> >
> > Bunch of comments/ideas for you or Darwish below, whoever picks this up.
>
> Actually it would ne nice if Darwish could pick it up since he will do
> it on i915 which will be useful to a much broader audience.
> If not I'll respin when I'm done with the drm_fb_helper refactoring.
>

Yup, I'll be more than happy to do this.. while preserving all of
Noralf's authorship and copyright notices of course.

I guess it can be:

  - Handle the comments posted by Daniel and others (I'll post
some questions too)

  - Add the necessary i915 specific bits

  - Test, post v3/v4/../vn. Rinse and repeat. Keep it local at
dri-devel until getting the necessary S-o-Bs.

  - Post to wider audience (some feedback from distribution folks
would also be nice, before posting to lkml)

More comments below..

[...]

> >> +
> >> +static void drm_panic_kmsg_dump(struct kmsg_dumper *dumper,
> >> +  enum kmsg_dump_reason reason)
> >> +{
> >> +  class_for_each_device(drm_class, NULL, dumper, drm_panic_dev_iter);
> >
> > class_for_each_device uses klist, which only uses an irqsave spinlock. I
> > think that's good enough. Comment to that effect would be good e.g.
> >
> > /* based on klist, which uses only a spin_lock_irqsave, which we
> >  * assume still works */
> >
> > If we aim for perfect this should be a trylock still, maybe using our own
> > device list.
> >

I definitely agree here.

The lock may already be locked either by a stopped CPU, or by the
very same CPU we execute panic() on (e.g. NMI panic() on the
printing CPU).

This is why it's very common for example in serial consoles, which
are usually careful about re-entrance and panic contexts, to do:

  xx_console_write(...) {
if (oops_in_progress)
locked = spin_trylock_irqsave(&port->lock, flags);
else
spin_lock_irqsave(&port->lock, flags);
  }

I'm quite positive we should do the same for panic drm drivers.
John?

> >> +}
> >> +
> >> +static struct kmsg_dumper drm_panic_kmsg_dumper = {
> >> +  .dump = drm_panic_kmsg_dump,
> >> +  .max_reason = KMSG_DUMP_PANIC,
> >> +};
> >> +
> >> +static ssize_t drm_panic_file_panic_write(struct file *file,
> >> +const char __user *user_buf,
> >> +size_t count, loff_t *ppos)
> >> +{
> >> +  unsigned long long val;
> >> +  char buf[24];
> >> +  size_t size;
> >> +  ssize_t ret;
> >> +
> >> +  size = min(sizeof(buf) - 1, count);
> >> +  if (copy_from_user(buf, user_buf, size))
> >> +  return -EFAULT;
> >> +
> >> +  buf[size] = '\0';
> >> +  ret = kstrtoull(buf, 0, &val);
> >> +  if (ret)
> >> +  return ret;
> >> +
> >> +  drm_panic_kmsg_dumper.max_reason = KMSG_DUMP_OOPS;
> >> +  wmb();
> >> +
> >> +  /* Do a real test with: echo c > /proc/sysrq-trigger */
> >> +
> >> +  if (val == 0) {
> >> +  pr_info("Test panic screen using kmsg_dump(OOPS)\n");
> >> +  kmsg_dump(KMSG_DUMP_OOPS);
> >> +  } else if (val == 1) {
> >> +  char *null_pointer = NULL;
> >> +
> >> +  pr_info("Test panic screen using NULL pointer dereference\n");
> >> +  *null_pointer = 1;
> >> +  } else {
> >> +  return -EINVAL;
> >> +  }
> >
> > This isn't quite what I had in mind, since it still kills the kernel (like
> > sysrq-trigger).
>
> If val == 0, it doesn't kill the kernel, it only dumps the kernel log.
> And it doesn't taint the kernel either.
>
> > Instead what I had in mind is to recreate the worst
> > possible panic context as much as feasible (disabling interrupts should be
> > a good start, maybe we can even do an nmi callback), and then call our
> > panic implementation. That way we can test the panic handler in a
> > non-destructive way (i.e. aside from last dmesg lines printed to the
> > screen nothing bad happens to the kernel: No real panic, no oops, no
> > tainting).
>
> The interrupt case I can do, nmi I have no idea.
>

I agree too. Disabling interrupts + CONFIG_DEBUG_ATOMIC_SLEEP
would be a nice non-destructive test-case emulation.

thanks!

--
darwi
http://darwish.chasingpointers.com
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915: Fix PSR2 selective update corruption after PSR1 setup

2019-03-13 Thread Patchwork
== Series Details ==

Series: drm/i915: Fix PSR2 selective update corruption after PSR1 setup
URL   : https://patchwork.freedesktop.org/series/57900/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_5737 -> Patchwork_12444


Summary
---

  **FAILURE**

  Serious unknown changes coming with Patchwork_12444 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_12444, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: 
https://patchwork.freedesktop.org/api/1.0/series/57900/revisions/1/mbox/

Possible new issues
---

  Here are the unknown changes that may have been introduced in Patchwork_12444:

### IGT changes ###

 Possible regressions 

  * igt@i915_selftest@live_hangcheck:
- fi-cfl-8700k:   PASS -> DMESG-FAIL

  
Known issues


  Here are the changes found in Patchwork_12444 that come from known issues:

### IGT changes ###

 Issues hit 

  * igt@amdgpu/amd_cs_nop@fork-gfx0:
- fi-icl-u2:  NOTRUN -> SKIP [fdo#109315] +17

  * igt@gem_ctx_create@basic-files:
- fi-gdg-551: NOTRUN -> SKIP [fdo#109271] +106

  * igt@gem_exec_basic@gtt-bsd2:
- fi-byt-clapper: NOTRUN -> SKIP [fdo#109271] +57

  * igt@gem_exec_basic@readonly-bsd1:
- fi-snb-2520m:   NOTRUN -> SKIP [fdo#109271] +57
- fi-icl-u2:  NOTRUN -> SKIP [fdo#109276] +7

  * igt@gem_exec_parse@basic-allowed:
- fi-icl-u2:  NOTRUN -> SKIP [fdo#109289] +1

  * igt@i915_module_load@reload:
- fi-blb-e6850:   NOTRUN -> INCOMPLETE [fdo#107718]

  * igt@i915_pm_rpm@module-reload:
- fi-skl-6770hq:  PASS -> FAIL [fdo#108511]

  * igt@i915_selftest@live_contexts:
- fi-icl-u2:  NOTRUN -> DMESG-FAIL [fdo#108569]

  * igt@i915_selftest@live_execlists:
- fi-apl-guc: PASS -> INCOMPLETE [fdo#103927] / [fdo#109720]

  * igt@kms_addfb_basic@addfb25-y-tiled-small:
- fi-byt-n2820:   NOTRUN -> SKIP [fdo#109271] +56

  * igt@kms_busy@basic-flip-a:
- fi-gdg-551: NOTRUN -> FAIL [fdo#103182]

  * igt@kms_busy@basic-flip-c:
- fi-blb-e6850:   NOTRUN -> SKIP [fdo#109271] / [fdo#109278]
- fi-byt-clapper: NOTRUN -> SKIP [fdo#109271] / [fdo#109278]
- fi-gdg-551: NOTRUN -> SKIP [fdo#109271] / [fdo#109278]
- fi-snb-2520m:   NOTRUN -> SKIP [fdo#109271] / [fdo#109278]
- fi-byt-n2820:   NOTRUN -> SKIP [fdo#109271] / [fdo#109278]

  * igt@kms_chamelium@dp-edid-read:
- fi-icl-u2:  NOTRUN -> SKIP [fdo#109316] +2

  * igt@kms_chamelium@hdmi-edid-read:
- fi-hsw-peppy:   NOTRUN -> SKIP [fdo#109271] +46

  * igt@kms_chamelium@vga-hpd-fast:
- fi-icl-u2:  NOTRUN -> SKIP [fdo#109309] +1

  * igt@kms_force_connector_basic@prune-stale-modes:
- fi-icl-u2:  NOTRUN -> SKIP [fdo#109285] +3

  * igt@kms_frontbuffer_tracking@basic:
- fi-icl-u3:  PASS -> FAIL [fdo#103167]
- fi-hsw-peppy:   NOTRUN -> DMESG-FAIL [fdo#102614] / [fdo#107814]
- fi-icl-u2:  NOTRUN -> FAIL [fdo#103167]
- fi-byt-clapper: NOTRUN -> FAIL [fdo#103167]

  * igt@kms_pipe_crc_basic@hang-read-crc-pipe-c:
- fi-blb-e6850:   NOTRUN -> SKIP [fdo#109271] +29

  * igt@prime_vgem@basic-fence-flip:
- fi-gdg-551: NOTRUN -> DMESG-FAIL [fdo#103182]

  * igt@runner@aborted:
- fi-apl-guc: NOTRUN -> FAIL [fdo#108622] / [fdo#109720]

  
 Possible fixes 

  * igt@gem_exec_suspend@basic-s3:
- fi-blb-e6850:   INCOMPLETE [fdo#107718] -> PASS

  
  {name}: This element is suppressed. This means it is ignored when computing
  the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#102614]: https://bugs.freedesktop.org/show_bug.cgi?id=102614
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103182]: https://bugs.freedesktop.org/show_bug.cgi?id=103182
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#107814]: https://bugs.freedesktop.org/show_bug.cgi?id=107814
  [fdo#108511]: https://bugs.freedesktop.org/show_bug.cgi?id=108511
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#108622]: https://bugs.freedesktop.org/show_bug.cgi?id=108622
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109276]: https://bugs.freedesktop.org/show_bug.cgi?id=109276
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [fdo#109284]: https://bugs.freedesktop.org/show_bug.cgi?id=109284
  [fdo#109285]: https://bugs.freedesktop.org/show_bug.cgi?id=109285
  [fdo#109289]: https://bugs.freedesktop.org/show_bug.cgi?id=109289
  [fdo#109294]: https://bugs.freedesktop.org/show_bug.cgi?id=109294
  [fdo#109309]:

Re: [Intel-gfx] [PATCH] drm/i915/ddi: Fix default eDP detection on port A

2019-03-13 Thread Jani Nikula
On Thu, 07 Mar 2019, Jani Nikula  wrote:
> On Thu, 07 Mar 2019, Thomas Preston  wrote:
>> Would you like me to resubmit with the suggested changes?
>
> Nah, we can tweak the commit message while applying.

Pushed to dinq, thanks for the patch.

BR,
Jani.

-- 
Jani Nikula, Intel Open Source Graphics Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] ✓ Fi.CI.BAT: success for skl+ cursor DDB allocation fixes

2019-03-13 Thread Patchwork
== Series Details ==

Series: skl+ cursor DDB allocation fixes
URL   : https://patchwork.freedesktop.org/series/57901/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5737 -> Patchwork_12445


Summary
---

  **SUCCESS**

  No regressions found.

  External URL: 
https://patchwork.freedesktop.org/api/1.0/series/57901/revisions/1/mbox/

Known issues


  Here are the changes found in Patchwork_12445 that come from known issues:

### IGT changes ###

 Issues hit 

  * igt@amdgpu/amd_cs_nop@fork-gfx0:
- fi-icl-u2:  NOTRUN -> SKIP [fdo#109315] +17

  * igt@gem_ctx_create@basic-files:
- fi-gdg-551: NOTRUN -> SKIP [fdo#109271] +106

  * igt@gem_exec_basic@gtt-bsd2:
- fi-byt-clapper: NOTRUN -> SKIP [fdo#109271] +57

  * igt@gem_exec_basic@readonly-bsd1:
- fi-snb-2520m:   NOTRUN -> SKIP [fdo#109271] +57
- fi-icl-u2:  NOTRUN -> SKIP [fdo#109276] +7

  * igt@gem_exec_parse@basic-allowed:
- fi-icl-u2:  NOTRUN -> SKIP [fdo#109289] +1

  * igt@gem_exec_suspend@basic-s4-devices:
- fi-blb-e6850:   NOTRUN -> INCOMPLETE [fdo#107718]

  * igt@i915_selftest@live_contexts:
- fi-icl-u2:  NOTRUN -> DMESG-FAIL [fdo#108569]

  * igt@i915_selftest@live_execlists:
- fi-apl-guc: PASS -> INCOMPLETE [fdo#103927] / [fdo#109720]

  * igt@kms_addfb_basic@addfb25-y-tiled-small:
- fi-byt-n2820:   NOTRUN -> SKIP [fdo#109271] +56

  * igt@kms_busy@basic-flip-c:
- fi-byt-clapper: NOTRUN -> SKIP [fdo#109271] / [fdo#109278]
- fi-gdg-551: NOTRUN -> SKIP [fdo#109271] / [fdo#109278]
- fi-snb-2520m:   NOTRUN -> SKIP [fdo#109271] / [fdo#109278]
- fi-byt-n2820:   NOTRUN -> SKIP [fdo#109271] / [fdo#109278]

  * igt@kms_chamelium@dp-edid-read:
- fi-icl-u2:  NOTRUN -> SKIP [fdo#109316] +2

  * igt@kms_chamelium@hdmi-edid-read:
- fi-hsw-peppy:   NOTRUN -> SKIP [fdo#109271] +46

  * igt@kms_chamelium@vga-hpd-fast:
- fi-icl-u2:  NOTRUN -> SKIP [fdo#109309] +1

  * igt@kms_force_connector_basic@prune-stale-modes:
- fi-icl-u2:  NOTRUN -> SKIP [fdo#109285] +3

  * igt@kms_frontbuffer_tracking@basic:
- fi-icl-u3:  PASS -> FAIL [fdo#103167]
- fi-hsw-peppy:   NOTRUN -> DMESG-FAIL [fdo#102614] / [fdo#107814]
- fi-icl-u2:  NOTRUN -> FAIL [fdo#103167]

  * igt@runner@aborted:
- fi-apl-guc: NOTRUN -> FAIL [fdo#108622] / [fdo#109720]

  
 Possible fixes 

  * igt@gem_exec_suspend@basic-s3:
- fi-blb-e6850:   INCOMPLETE [fdo#107718] -> PASS

  
  {name}: This element is suppressed. This means it is ignored when computing
  the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#102614]: https://bugs.freedesktop.org/show_bug.cgi?id=102614
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103375]: https://bugs.freedesktop.org/show_bug.cgi?id=103375
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#107814]: https://bugs.freedesktop.org/show_bug.cgi?id=107814
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#108622]: https://bugs.freedesktop.org/show_bug.cgi?id=108622
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109276]: https://bugs.freedesktop.org/show_bug.cgi?id=109276
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [fdo#109284]: https://bugs.freedesktop.org/show_bug.cgi?id=109284
  [fdo#109285]: https://bugs.freedesktop.org/show_bug.cgi?id=109285
  [fdo#109289]: https://bugs.freedesktop.org/show_bug.cgi?id=109289
  [fdo#109294]: https://bugs.freedesktop.org/show_bug.cgi?id=109294
  [fdo#109309]: https://bugs.freedesktop.org/show_bug.cgi?id=109309
  [fdo#109315]: https://bugs.freedesktop.org/show_bug.cgi?id=109315
  [fdo#109316]: https://bugs.freedesktop.org/show_bug.cgi?id=109316
  [fdo#109638]: https://bugs.freedesktop.org/show_bug.cgi?id=109638
  [fdo#109720]: https://bugs.freedesktop.org/show_bug.cgi?id=109720
  [fdo#110028]: https://bugs.freedesktop.org/show_bug.cgi?id=110028


Participating hosts (41 -> 43)
--

  Additional (7): fi-hsw-peppy fi-icl-u2 fi-snb-2520m fi-gdg-551 fi-icl-y 
fi-byt-n2820 fi-byt-clapper 
  Missing(5): fi-kbl-soraka fi-ilk-m540 fi-hsw-4200u fi-ctg-p8600 
fi-bdw-samus 


Build changes
-

* Linux: CI_DRM_5737 -> Patchwork_12445

  CI_DRM_5737: d5bb7d77aa77996702426496078a597f30bead58 @ 
git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4883: b25e06d6ddf2e42044cd9c93b613cbc7339a8c33 @ 
git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_12445: c8c28fb7a798e26b588ca10925f7f8abd9c9f401 @ 
git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

c8c28fb7a798 drm/i915: Inline skl_build_pipe_wm() into its only caller
67f2a40230bd drm/i91

Re: [Intel-gfx] [PATCH v2 1/3] drm: Add support for panic message output

2019-03-13 Thread Kazlauskas, Nicholas
On 3/13/19 11:54 AM, Christian König wrote:
> Am 13.03.19 um 16:38 schrieb Michel Dänzer:
>> On 2019-03-13 2:37 p.m., Christian König wrote:
>>> Am 13.03.19 um 14:31 schrieb Ville Syrjälä:
 On Wed, Mar 13, 2019 at 10:35:08AM +0100, Michel Dänzer wrote:
> On 2019-03-12 6:15 p.m., Noralf Trønnes wrote:
>> Den 12.03.2019 17.17, skrev Ville Syrjälä:
>>> On Tue, Mar 12, 2019 at 11:47:04AM +0100, Michel Dänzer wrote:
 On 2019-03-11 6:42 p.m., Noralf Trønnes wrote:
> This adds support for outputting kernel messages on panic().
> A kernel message dumper is used to dump the log. The dumper 
> iterates
> over each DRM device and it's crtc's to find suitable 
> framebuffers.
>
> All the other dumpers are run before this one except mtdoops.
> Only atomic drivers are supported.
>
> Signed-off-by: Noralf Trønnes 
> ---
>    [...]
>
> diff --git a/include/drm/drm_framebuffer.h
> b/include/drm/drm_framebuffer.h
> index f0b34c977ec5..f3274798ecfe 100644
> --- a/include/drm/drm_framebuffer.h
> +++ b/include/drm/drm_framebuffer.h
> @@ -94,6 +94,44 @@ struct drm_framebuffer_funcs {
>     struct drm_file *file_priv, unsigned flags,
>     unsigned color, struct drm_clip_rect *clips,
>     unsigned num_clips);
> +
> +    /**
> + * @panic_vmap:
> + *
> + * Optional callback for panic handling.
> + *
> + * For vmapping the selected framebuffer in a panic context.
> Must
> + * be super careful about locking (only trylocking allowed).
> + *
> + * RETURNS:
> + *
> + * NULL if it didn't work out, otherwise an opaque cookie
> which is
> + * passed to @panic_draw_xy. It can be anything: vmap area,
> structure
> + * with more details, just a few flags, ...
> + */
> +    void *(*panic_vmap)(struct drm_framebuffer *fb);
 FWIW, the panic_vmap hook cannot work in general with the
 amdgpu/radeon
 drivers:

 Framebuffers are normally tiled, writing to them with the CPU
 results in
 garbled output.

>> In which case the driver needs to support the ->panic_draw_xy 
>> callback,
>> or maybe it's possible to make a generic helper for tiled buffers.
> I'm afraid that won't help, at least not without porting big chunks of
> https://gitlab.freedesktop.org/mesa/mesa/tree/master/src/amd/addrlib
> into the kernel, none of which will be used for anything else.
>
>
 There would need to be a mechanism for switching scanout to a 
 linear,
 CPU accessible framebuffer.
>>> I suppose panic_vmap() could just provide a linear temp buffer
>>> to the panic handler, and panic_unmap() could copy the contents
>>> over to the real fb.
> Copy how? Using a GPU engine?
 CPU maybe? Though I suppose that won't work if the buffer isn't CPU
 accesible :/
>>> Well we do have a debug path for accessing invisible memory with the 
>>> CPU.
>>>
>>> E.g. three registers: DATA and auto increment OFFSET_LO/HI. So you can
>>> just read/write DATA over and over again if you want to access some 
>>> memory.
>> Right. I assume that'll be very slow, but I guess it could do when the
>> memory isn't directly CPU accessible.
> 
> Just made a quick test and reading 33423360 bytes (4096x2040x4) using 
> that interfaces takes about 13 seconds.
> 
> IIRC we don't use the auto increment optimization yet, so that can 
> probably be improved by a factor of 3 or more.
> 
>>> But turning of tilling etc is still extremely tricky when the system is
>>> already unstable.
>> Maybe we could add a little hook to the display code, which just
>> disables tiling for scanout and maybe disables non-primary planes, but
>> doesn't touch anything else. Harry / Nicholas, does that seem feasible?
>>
>>
>> I'm coming around from "this is never going to work" to "it might
>> actually work" with our hardware...
> 
> Yeah, agree. It's a bit tricky, but doable.

A "disable_tiling" hook or something along those lines could work for 
display. It's a little bit non trivial when you want to start dealing 
with locking and any active DRM commits, but we have a global lock 
around all our hardware programming anyway that makes that easier to 
deal with.

I think we can just re-commit and update the existing hardware state 
with only the tiling info for every plane reset to off. For most buffers 
I don't think we'd have to really consider changing anything else here 
as long as you respect the current FB size and pitch.

Nicholas Kazlauskas

> 
> Takeaway for Noralf is that this whole vmap on panic won't even remotely 
> work. We need to get the data byt

[Intel-gfx] [PATCH] drm/i915: Always kick the execlists tasklet after reset

2019-03-13 Thread Chris Wilson
With direct submission being disabled while the reset in progress, we
have a small window where we may forgo the submission of a new request
and not notice its addition during execlists_reset_finish. To close this
window, always schedule the submission tasklet on coming out of reset to
catch any residual work.

<6> [333.144082] i915: Running intel_hangcheck_live_selftests/igt_reset_engines
<3> [333.296927] i915_reset_engine(rcs0:idle): failed to idle after reset
<6> [333.296932] i915 :00:02.0: [drm] rcs0
<6> [333.296934] i915 :00:02.0: [drm]   Hangcheck 0:a9ddf7a5 [4157 ms]
<6> [333.296936] i915 :00:02.0: [drm]   Reset count: 36048 (global 754)
<6> [333.296938] i915 :00:02.0: [drm]   Requests:
<6> [333.296997] i915 :00:02.0: [drm]   RING_START: 0x
<6> [333.296999] i915 :00:02.0: [drm]   RING_HEAD:  0x
<6> [333.297001] i915 :00:02.0: [drm]   RING_TAIL:  0x
<6> [333.297003] i915 :00:02.0: [drm]   RING_CTL:   0x
<6> [333.297005] i915 :00:02.0: [drm]   RING_MODE:  0x0200 [idle]
<6> [333.297007] i915 :00:02.0: [drm]   RING_IMR: feff
<6> [333.297010] i915 :00:02.0: [drm]   ACTHD:  0x_
<6> [333.297012] i915 :00:02.0: [drm]   BBADDR: 0x_
<6> [333.297015] i915 :00:02.0: [drm]   DMA_FADDR: 0x_
<6> [333.297017] i915 :00:02.0: [drm]   IPEIR: 0x
<6> [333.297019] i915 :00:02.0: [drm]   IPEHR: 0x
<6> [333.297021] i915 :00:02.0: [drm]   Execlist status: 0x0001 

<6> [333.297023] i915 :00:02.0: [drm]   Execlist CSB read 5, write 5 
[mmio:7], tasklet queued? no (enabled)
<6> [333.297025] i915 :00:02.0: [drm]   ELSP[0] idle
<6> [333.297027] i915 :00:02.0: [drm]   ELSP[1] idle
<6> [333.297028] i915 :00:02.0: [drm]   HW active? 0x0
<6> [333.297044] i915 :00:02.0: [drm]   Queue priority hint: 
-8186
<6> [333.297067] i915 :00:02.0: [drm]   Q  2afac:5f2+  
prio=-8186 @ 50ms: (null)
<6> [333.297068] i915 :00:02.0: [drm] HWSP:
<6> [333.297071] i915 :00:02.0: [drm] []    
    
<6> [333.297073] i915 :00:02.0: [drm] *
<6> [333.297075] i915 :00:02.0: [drm] [0040] 0001  0018 
0002 0001  0018 
<6> [333.297077] i915 :00:02.0: [drm] [0060] 0001  8002 
0002    0005
<6> [333.297079] i915 :00:02.0: [drm] [0080]    
    
<6> [333.297081] i915 :00:02.0: [drm] *
<6> [333.297083] i915 :00:02.0: [drm] [00c0]    
 a9ddf7a5   
<6> [333.297085] i915 :00:02.0: [drm] [00e0]    
    
<6> [333.297087] i915 :00:02.0: [drm] *
<6> [333.297089] i915 :00:02.0: [drm] Idle? no
<6> [333.297090] i915_reset_engine(rcs0:idle): 3000 resets
<3> [333.297092] i915/intel_hangcheck_live_selftests: igt_reset_engines failed 
with error -5
<3> [333.455460] i915 :00:02.0: Failed to idle engines, declaring wedged!
...
<0> [333.491294] i915_sel-49161 333262143us : i915_reset_engine: rcs0 
flags=4
<0> [333.491328] i915_sel-49161 333262143us : execlists_reset_prepare: 
rcs0: depth<-0
<0> [333.491362] i915_sel-49161 333262143us : intel_engine_stop_cs: rcs0
<0> [333.491396] i915_sel-49161d..1 333262144us : process_csb: rcs0 cs-irq 
head=5, tail=5
<0> [333.491424] i915_sel-49161 333262145us : intel_gpu_reset: 
engine_mask=1
<0> [333.491454] kworker/-214 5 333262184us : 
i915_gem_switch_to_kernel_context: awake?=yes
<0> [333.491487] kworker/-214 5 333262192us : i915_request_add: rcs0 
fence 2afac:1522
<0> [333.491520] kworker/-214 5 333262193us : i915_request_add: marking 
(null) as active
<0> [333.491553] i915_sel-49161 333262199us : 
intel_engine_cancel_stop_cs: rcs0
<0> [333.491587] i915_sel-49161 333262199us : execlists_reset_finish: 
rcs0: depth->0

Signed-off-by: Chris Wilson 
Cc: Mika Kuoppala 
---
 drivers/gpu/drm/i915/i915_gem.h  | 7 ++-
 drivers/gpu/drm/i915/intel_lrc.c | 3 ++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 74a2ddc1b52f..5c073fe73664 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -82,7 +82,7 @@ void i915_gem_unpark(struct drm_i915_private *i915);
 
 static inline void __tasklet_disable_sync_once(struct tasklet_struct *t)
 {
-   if (atomic_inc_return(&t->count) == 1)
+   if (!atomic_fetch_inc(&t->count))
tasklet_unlock_wait(t);
 }
 
@@ -91,4 +91,9 @@ static inline bool __tasklet_is_enabled(const struct 
tasklet_struct *t)
 

Re: [Intel-gfx] [PATCH 3/7] drm/i915: Extract ilk_csc_limited_range()

2019-03-13 Thread Ville Syrjälä
On Wed, Mar 13, 2019 at 03:30:43PM +, Shankar, Uma wrote:
> 
> 
> >-Original Message-
> >From: Ville Syrjala [mailto:ville.syrj...@linux.intel.com]
> >Sent: Tuesday, February 19, 2019 1:02 AM
> >To: intel-gfx@lists.freedesktop.org
> >Cc: Shankar, Uma ; Roper, Matthew D
> >
> >Subject: [PATCH 3/7] drm/i915: Extract ilk_csc_limited_range()
> >
> >From: Ville Syrjälä 
> >
> >Extract a helper which determines if we need to use the pipe CSC for limited 
> >range
> >RGB output.
> >
> >Signed-off-by: Ville Syrjälä 
> >---
> > drivers/gpu/drm/i915/intel_color.c | 22 ++
> > 1 file changed, 14 insertions(+), 8 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/intel_color.c 
> >b/drivers/gpu/drm/i915/intel_color.c
> >index 93428d86510a..ddc48c0d45ac 100644
> >--- a/drivers/gpu/drm/i915/intel_color.c
> >+++ b/drivers/gpu/drm/i915/intel_color.c
> >@@ -161,22 +161,28 @@ static void ilk_load_ycbcr_conversion_matrix(struct
> >intel_crtc *crtc)
> > }
> > }
> >
> >+static bool ilk_csc_limited_range(const struct intel_crtc_state
> >+*crtc_state) {
> >+struct drm_i915_private *dev_priv =
> >+to_i915(crtc_state->base.crtc->dev);
> >+
> >+/*
> >+ * FIXME if there's a gamma LUT after the CSC, we should
> >+ * do the range compression using the gamma LUT instead.
> >+ */
> >+return crtc_state->limited_color_range &&
> >+(IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
> >+ IS_GEN_RANGE(dev_priv, 9, 10));
> 
> We should include Gen8 also to this list. Is it intentional to drop that ?

IS_BROADWELL is the gen8 we care about.

> With this fixed or justified reasoning, 
> Reviewed-by: Uma Shankar 
> 
> >+}
> >+
> > static void ilk_load_csc_matrix(const struct intel_crtc_state *crtc_state)  
> > {
> > struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> > struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
> >-bool limited_color_range = false;
> >+bool limited_color_range = ilk_csc_limited_range(crtc_state);
> > enum pipe pipe = crtc->pipe;
> > u16 coeffs[9] = {};
> > int i;
> >
> >-/*
> >- * FIXME if there's a gamma LUT after the CSC, we should
> >- * do the range compression using the gamma LUT instead.
> >- */
> >-if (INTEL_GEN(dev_priv) >= 8 || IS_HASWELL(dev_priv))
> >-limited_color_range = crtc_state->limited_color_range;
> >-
> > if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 ||
> > crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR444) {
> > ilk_load_ycbcr_conversion_matrix(crtc);
> >--
> >2.19.2
> 

-- 
Ville Syrjälä
Intel
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  1   2   >