Re: [Intel-gfx] [PATCH 1/1] drm/i915: gracefully reject mmap of huge tiled objects
On Thu, Jun 30, 2016 at 05:04:42PM -0700, James Xiong wrote: > From: "Xiong, James" > > currently mmap of a tiled object that is larger than mappable > aperture is rejected in fault handler, and causes sigbus error > and application crash. Please note that SIGBUS can be returned at any time. If your application doesn't handle it, please fix that. > This commit rejects it in mmap instead so that the client has > chance to handle the failure. Wrong. Please review the patches to fix this correctly. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [RFC v2] drm/i915/chv: Clip cursor for CHV pipe C HW Cursor pos < 0
On 06/29/2016 06:24 PM, Shobhit Kumar wrote: From: Shobhit Kumar CHV pipe C hits underrun when we get negative crtc_x values of cursor. To avoid this we clip and shift the cursor image by negative crtc_x value. v2: Make a copy of cursor plane state and allocate new gem object and fb for clipped cursor and use that in case of negative cursor position v3: Updated error handling Pin the gem object before use. Need someone to look at this patch. Daniel does this align with your suggestions ? Regards Shobhit Signed-off-by: Akshu Agrawal Signed-off-by: Shobhit Kumar --- drivers/gpu/drm/i915/i915_drv.h | 7 ++ drivers/gpu/drm/i915/intel_display.c | 131 ++- 2 files changed, 137 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 724d34b..1e59c02 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2041,6 +2041,13 @@ struct drm_i915_private { struct intel_encoder *dig_port_map[I915_MAX_PORTS]; /* + * Temporary copy of cursor plane state for CHV PIPE_C + * Will be initialized only when crtc_x < 0 as there is a + * HW bug causing pipe underrun + */ + struct intel_plane_state *cursor_state; + + /* * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch * will be rejected. Instead look for a better place. */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c3b5dc8..e6c103a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14456,6 +14456,132 @@ intel_update_cursor_plane(struct drm_plane *plane, intel_crtc_update_cursor(crtc, state); } +static void +intel_update_chv_pipe_c_cursor_plane(struct drm_plane *plane, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *state) +{ + struct drm_crtc *crtc = crtc_state->base.crtc; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_device *dev = plane->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_gem_object *obj = intel_fb_obj(state->base.fb); + struct drm_i915_gem_object *cur_obj = NULL, *use_obj = NULL; + uint32_t addr; + struct intel_plane_state *cursor_state = dev_priv->cursor_state; + const struct intel_plane_state *use_state; + char __iomem *src, *dst; + bool pinned = true; + + if (state->visible && state->base.crtc_x < 0) { + int bytes_per_pixel = state->base.fb->bits_per_pixel / 8; + int x = state->base.crtc_x; + int width = state->base.crtc_w; + int height = state->base.crtc_h; + struct drm_mode_fb_cmd2 mode_cmd = { 0 }; + int i; + + if (!cursor_state) { + cursor_state = kzalloc(sizeof(*cursor_state), GFP_KERNEL); + if (!cursor_state) { + use_state = state; + use_obj = obj; + goto update; + } + + memcpy(cursor_state, state, sizeof(*state)); + + /* Allocate new gem object */ + cur_obj = i915_gem_object_create(dev, obj->base.size); + if (IS_ERR(cur_obj)) + goto gem_err; + + mode_cmd.width = cursor_state->base.fb->width; + mode_cmd.height = cursor_state->base.fb->height; + mode_cmd.pitches[0] = cursor_state->base.fb->pitches[0]; + mode_cmd.pixel_format = cursor_state->base.fb->pixel_format; + + cursor_state->base.fb = intel_framebuffer_create(dev, &mode_cmd, cur_obj); + if (IS_ERR(cursor_state->base.fb)) { + drm_gem_object_unreference_unlocked(&cur_obj->base); + goto gem_err; + } + + if (i915_gem_obj_ggtt_pin(cur_obj, 0, 0) < 0) { + drm_gem_object_unreference_unlocked(&cur_obj->base); + pinned = false; + goto cleanup; + } + + dev_priv->cursor_state = cursor_state; + } else + cur_obj = intel_fb_obj(cursor_state->base.fb); + + src = ioremap_wc(dev_priv->ggtt.mappable_base + + i915_gem_obj_ggtt_offset(obj), + obj->base.size); + + dst = ioremap_wc(dev_priv->ggtt.mappable_base + + i915_gem_obj_ggtt_offset(cur_obj), + cur_obj->base.size); + +
[Intel-gfx] [PATCH 1/2] drm/i915/ringbuffer: Move all generic engine->dispatch_batchbuffer together
Consolidate the block of default vfuncs for dispatching the batchbuffer. Just a minor tweak on top of Tvrtko's great job of tidying up the vfunc initialisation. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_ringbuffer.c | 27 ++- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4d61ea923154..caebe812d10f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2991,25 +2991,29 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; - if (INTEL_GEN(dev_priv) >= 8) { - engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; + engine->add_request = i9xx_add_request; + if (INTEL_GEN(dev_priv) >= 6) engine->add_request = gen6_add_request; - engine->irq_seqno_barrier = gen6_seqno_barrier; - } else if (INTEL_GEN(dev_priv) >= 6) { + + if (INTEL_GEN(dev_priv) >= 8) + engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; + else if (INTEL_GEN(dev_priv) >= 6) engine->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; - engine->add_request = gen6_add_request; - engine->irq_seqno_barrier = gen6_seqno_barrier; - } else { + else if (INTEL_GEN(dev_priv) >= 4) engine->dispatch_execbuffer = i965_dispatch_execbuffer; - engine->add_request = i9xx_add_request; - } + else if (IS_I830(dev_priv) || IS_845G(dev_priv)) + engine->dispatch_execbuffer = i830_dispatch_execbuffer; + else + engine->dispatch_execbuffer = i915_dispatch_execbuffer; if (INTEL_GEN(dev_priv) >= 8) { engine->irq_get = gen8_ring_get_irq; engine->irq_put = gen8_ring_put_irq; + engine->irq_seqno_barrier = gen6_seqno_barrier; } else if (INTEL_GEN(dev_priv) >= 6) { engine->irq_get = gen6_ring_get_irq; engine->irq_put = gen6_ring_put_irq; + engine->irq_seqno_barrier = gen6_seqno_barrier; } else if (INTEL_GEN(dev_priv) >= 5) { engine->irq_get = gen5_ring_get_irq; engine->irq_put = gen5_ring_put_irq; @@ -3069,10 +3073,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) if (IS_HASWELL(dev_priv)) engine->dispatch_execbuffer = hsw_ring_dispatch_execbuffer; - else if (IS_I830(dev_priv) || IS_845G(dev_priv)) - engine->dispatch_execbuffer = i830_dispatch_execbuffer; - else if (INTEL_GEN(dev_priv) <= 3) - engine->dispatch_execbuffer = i915_dispatch_execbuffer; + engine->init_hw = init_render_ring; engine->cleanup = render_ring_cleanup; -- 2.8.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/2] drm/i915/ringbuffer: Move all default irq vfuncs init to a separate func
Just plonk all the default irq vfuncs together in one function to keep the initialisers of reasonable size. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_ringbuffer.c | 43 ++--- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index caebe812d10f..24cdc920f4b4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2983,6 +2983,29 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, } } +static void intel_ring_init_irq(struct drm_i915_private *dev_priv, + struct intel_engine_cs *engine) +{ + if (INTEL_GEN(dev_priv) >= 8) { + engine->irq_get = gen8_ring_get_irq; + engine->irq_put = gen8_ring_put_irq; + engine->irq_seqno_barrier = gen6_seqno_barrier; + } else if (INTEL_GEN(dev_priv) >= 6) { + engine->irq_get = gen6_ring_get_irq; + engine->irq_put = gen6_ring_put_irq; + engine->irq_seqno_barrier = gen6_seqno_barrier; + } else if (INTEL_GEN(dev_priv) >= 5) { + engine->irq_get = gen5_ring_get_irq; + engine->irq_put = gen5_ring_put_irq; + } else if (INTEL_GEN(dev_priv) >= 3) { + engine->irq_get = i9xx_ring_get_irq; + engine->irq_put = i9xx_ring_put_irq; + } else { + engine->irq_get = i8xx_ring_get_irq; + engine->irq_put = i8xx_ring_put_irq; + } +} + static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { @@ -3006,25 +3029,7 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, else engine->dispatch_execbuffer = i915_dispatch_execbuffer; - if (INTEL_GEN(dev_priv) >= 8) { - engine->irq_get = gen8_ring_get_irq; - engine->irq_put = gen8_ring_put_irq; - engine->irq_seqno_barrier = gen6_seqno_barrier; - } else if (INTEL_GEN(dev_priv) >= 6) { - engine->irq_get = gen6_ring_get_irq; - engine->irq_put = gen6_ring_put_irq; - engine->irq_seqno_barrier = gen6_seqno_barrier; - } else if (INTEL_GEN(dev_priv) >= 5) { - engine->irq_get = gen5_ring_get_irq; - engine->irq_put = gen5_ring_put_irq; - } else if (INTEL_GEN(dev_priv) >= 3) { - engine->irq_get = i9xx_ring_get_irq; - engine->irq_put = i9xx_ring_put_irq; - } else { - engine->irq_get = i8xx_ring_get_irq; - engine->irq_put = i8xx_ring_put_irq; - } - + intel_ring_init_irq(dev_priv, engine); intel_ring_init_semaphores(dev_priv, engine); } -- 2.8.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/2] drm/i915/ringbuffer: Move all generic engine->dispatch_batchbuffer together
On 01/07/16 09:18, Chris Wilson wrote: Consolidate the block of default vfuncs for dispatching the batchbuffer. Just a minor tweak on top of Tvrtko's great job of tidying up the vfunc initialisation. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_ringbuffer.c | 27 ++- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4d61ea923154..caebe812d10f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2991,25 +2991,29 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; - if (INTEL_GEN(dev_priv) >= 8) { - engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; + engine->add_request = i9xx_add_request; + if (INTEL_GEN(dev_priv) >= 6) engine->add_request = gen6_add_request; - engine->irq_seqno_barrier = gen6_seqno_barrier; - } else if (INTEL_GEN(dev_priv) >= 6) { + + if (INTEL_GEN(dev_priv) >= 8) + engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; + else if (INTEL_GEN(dev_priv) >= 6) engine->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; - engine->add_request = gen6_add_request; - engine->irq_seqno_barrier = gen6_seqno_barrier; - } else { + else if (INTEL_GEN(dev_priv) >= 4) engine->dispatch_execbuffer = i965_dispatch_execbuffer; - engine->add_request = i9xx_add_request; - } + else if (IS_I830(dev_priv) || IS_845G(dev_priv)) + engine->dispatch_execbuffer = i830_dispatch_execbuffer; + else + engine->dispatch_execbuffer = i915_dispatch_execbuffer; if (INTEL_GEN(dev_priv) >= 8) { engine->irq_get = gen8_ring_get_irq; engine->irq_put = gen8_ring_put_irq; + engine->irq_seqno_barrier = gen6_seqno_barrier; } else if (INTEL_GEN(dev_priv) >= 6) { engine->irq_get = gen6_ring_get_irq; engine->irq_put = gen6_ring_put_irq; + engine->irq_seqno_barrier = gen6_seqno_barrier; } else if (INTEL_GEN(dev_priv) >= 5) { engine->irq_get = gen5_ring_get_irq; engine->irq_put = gen5_ring_put_irq; @@ -3069,10 +3073,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) if (IS_HASWELL(dev_priv)) engine->dispatch_execbuffer = hsw_ring_dispatch_execbuffer; - else if (IS_I830(dev_priv) || IS_845G(dev_priv)) - engine->dispatch_execbuffer = i830_dispatch_execbuffer; - else if (INTEL_GEN(dev_priv) <= 3) - engine->dispatch_execbuffer = i915_dispatch_execbuffer; + engine->init_hw = init_render_ring; engine->cleanup = render_ring_cleanup; I was planning to do this today after your comment from yesterday, which I was agreeing with, but you beat me to it. No complaints about that. :) Looks correct. Reviewed-by: Tvrtko Ursulin Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✗ Ro.CI.BAT: failure for series starting with [1/2] drm/i915/ringbuffer: Move all generic engine->dispatch_batchbuffer together
== Series Details == Series: series starting with [1/2] drm/i915/ringbuffer: Move all generic engine->dispatch_batchbuffer together URL : https://patchwork.freedesktop.org/series/9357/ State : failure == Summary == Series 9357v1 Series without cover letter http://patchwork.freedesktop.org/api/1.0/series/9357/revisions/1/mbox Test gem_exec_flush: Subgroup basic-batch-kernel-default-cmd: pass -> FAIL (ro-byt-n2820) Test kms_pipe_crc_basic: Subgroup hang-read-crc-pipe-a: pass -> INCOMPLETE (fi-skl-i5-6260u) Subgroup hang-read-crc-pipe-b: pass -> INCOMPLETE (fi-skl-i7-6700k) Subgroup suspend-read-crc-pipe-a: dmesg-warn -> SKIP (ro-bdw-i5-5250u) Subgroup suspend-read-crc-pipe-b: pass -> INCOMPLETE (fi-hsw-i7-4770k) dmesg-warn -> SKIP (ro-bdw-i5-5250u) fi-hsw-i7-4770k total:197 pass:178 dwarn:0 dfail:0 fail:0 skip:18 fi-kbl-qkkr total:229 pass:160 dwarn:29 dfail:0 fail:0 skip:40 fi-skl-i5-6260u total:192 pass:181 dwarn:0 dfail:0 fail:0 skip:10 fi-skl-i7-6700k total:198 pass:173 dwarn:0 dfail:0 fail:0 skip:24 fi-snb-i7-2600 total:229 pass:176 dwarn:0 dfail:0 fail:0 skip:53 ro-bdw-i5-5250u total:229 pass:204 dwarn:2 dfail:1 fail:0 skip:22 ro-bdw-i7-5557U total:229 pass:204 dwarn:1 dfail:1 fail:0 skip:23 ro-bdw-i7-5600u total:229 pass:190 dwarn:0 dfail:1 fail:0 skip:38 ro-bsw-n3050 total:229 pass:176 dwarn:1 dfail:1 fail:2 skip:49 ro-byt-n2820 total:229 pass:180 dwarn:0 dfail:1 fail:3 skip:45 ro-hsw-i7-4770r total:229 pass:197 dwarn:0 dfail:1 fail:0 skip:31 ro-ilk-i7-620lm total:229 pass:157 dwarn:0 dfail:1 fail:1 skip:70 ro-ilk1-i5-650 total:224 pass:157 dwarn:0 dfail:1 fail:1 skip:65 ro-ivb-i7-3770 total:229 pass:188 dwarn:0 dfail:1 fail:0 skip:40 ro-ivb2-i7-3770 total:229 pass:192 dwarn:0 dfail:1 fail:0 skip:36 ro-skl3-i5-6260u total:229 pass:208 dwarn:1 dfail:1 fail:0 skip:19 ro-snb-i7-2620M total:229 pass:179 dwarn:0 dfail:1 fail:1 skip:48 ro-hsw-i3-4010u failed to connect after reboot Results at /archive/results/CI_IGT_test/RO_Patchwork_1351/ b538380 drm-intel-nightly: 2016y-06m-30d-16h-21m-05s UTC integration manifest fd4430f8 drm/i915/ringbuffer: Move all default irq vfuncs init to a separate func 8c46988 drm/i915/ringbuffer: Move all generic engine->dispatch_batchbuffer together ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 2/2] drm/i915/ringbuffer: Move all default irq vfuncs init to a separate func
On 01/07/16 09:18, Chris Wilson wrote: Just plonk all the default irq vfuncs together in one function to keep the initialisers of reasonable size. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_ringbuffer.c | 43 ++--- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index caebe812d10f..24cdc920f4b4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2983,6 +2983,29 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, } } +static void intel_ring_init_irq(struct drm_i915_private *dev_priv, + struct intel_engine_cs *engine) +{ + if (INTEL_GEN(dev_priv) >= 8) { + engine->irq_get = gen8_ring_get_irq; + engine->irq_put = gen8_ring_put_irq; + engine->irq_seqno_barrier = gen6_seqno_barrier; + } else if (INTEL_GEN(dev_priv) >= 6) { + engine->irq_get = gen6_ring_get_irq; + engine->irq_put = gen6_ring_put_irq; + engine->irq_seqno_barrier = gen6_seqno_barrier; + } else if (INTEL_GEN(dev_priv) >= 5) { + engine->irq_get = gen5_ring_get_irq; + engine->irq_put = gen5_ring_put_irq; + } else if (INTEL_GEN(dev_priv) >= 3) { + engine->irq_get = i9xx_ring_get_irq; + engine->irq_put = i9xx_ring_put_irq; + } else { + engine->irq_get = i8xx_ring_get_irq; + engine->irq_put = i8xx_ring_put_irq; + } +} + static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { @@ -3006,25 +3029,7 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, else engine->dispatch_execbuffer = i915_dispatch_execbuffer; - if (INTEL_GEN(dev_priv) >= 8) { - engine->irq_get = gen8_ring_get_irq; - engine->irq_put = gen8_ring_put_irq; - engine->irq_seqno_barrier = gen6_seqno_barrier; - } else if (INTEL_GEN(dev_priv) >= 6) { - engine->irq_get = gen6_ring_get_irq; - engine->irq_put = gen6_ring_put_irq; - engine->irq_seqno_barrier = gen6_seqno_barrier; - } else if (INTEL_GEN(dev_priv) >= 5) { - engine->irq_get = gen5_ring_get_irq; - engine->irq_put = gen5_ring_put_irq; - } else if (INTEL_GEN(dev_priv) >= 3) { - engine->irq_get = i9xx_ring_get_irq; - engine->irq_put = i9xx_ring_put_irq; - } else { - engine->irq_get = i8xx_ring_get_irq; - engine->irq_put = i8xx_ring_put_irq; - } - + intel_ring_init_irq(dev_priv, engine); intel_ring_init_semaphores(dev_priv, engine); } Yes thats better. Reviewed-by: Tvrtko Ursulin Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] ✗ Ro.CI.BAT: failure for series starting with [1/2] drm/i915/ringbuffer: Move all generic engine->dispatch_batchbuffer together
On Fri, Jul 01, 2016 at 08:41:03AM -, Patchwork wrote: > == Series Details == > > Series: series starting with [1/2] drm/i915/ringbuffer: Move all generic > engine->dispatch_batchbuffer together > URL : https://patchwork.freedesktop.org/series/9357/ > State : failure > > == Summary == > > Series 9357v1 Series without cover letter > http://patchwork.freedesktop.org/api/1.0/series/9357/revisions/1/mbox > > Test gem_exec_flush: > Subgroup basic-batch-kernel-default-cmd: > pass -> FAIL (ro-byt-n2820) > Test kms_pipe_crc_basic: > Subgroup hang-read-crc-pipe-a: > pass -> INCOMPLETE (fi-skl-i5-6260u) > Subgroup hang-read-crc-pipe-b: > pass -> INCOMPLETE (fi-skl-i7-6700k) > Subgroup suspend-read-crc-pipe-a: > dmesg-warn -> SKIP (ro-bdw-i5-5250u) > Subgroup suspend-read-crc-pipe-b: > pass -> INCOMPLETE (fi-hsw-i7-4770k) > dmesg-warn -> SKIP (ro-bdw-i5-5250u) Oh, today's going to be one of those days where kms_pipe_crc_basic randomly explodes. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 04/11] drm/i915: Support for GuC interrupts
On 01/07/16 07:16, Goel, Akash wrote: [snip] +/* Process all the GuC to Host events in bottom half */ +gen6_disable_pm_irq(dev_priv, +GEN9_GUC_TO_HOST_INT_EVENT); Why it is important to disable the interrupt here? Not for the queue work I think. We want to & can handle one interrupt at a time, unless the queued work item is executed we can't process the next interrupt, so better to keep the interrupt masked. Sorry this is what is my understanding. So it is queued in hardware and will get asserted when unmasked? As per my understanding, if the interrupt is masked (IMR), it won't be queued, will be ignored & so will not be asserted on unmasking. If the interrupt wasn't masked, but was disabled (in IER) then it will be asserted (in IIR) when its enabled. Also, is it safe with regards to potentially losing the interrupt? Particularly for the FLUSH_LOG_BUFFER case, GuC won't send a new flush interrupt unless its gets an acknowledgement (flush signal) of the previous one from Host. Ah so the previous comment is really impossible? I mean the need to mask? Sorry my comments were not fully correct. GuC can send a new flush interrupt, even if the previous one is pending, but that will be for a different log buffer type (3 types of log buffer ISR, DPC, CRASH). For the same buffer type, GuC won't send a new flush interrupt unless its gets an acknowledgement of the previous one from Host. But as you said the workqueue is ordered and furthermore there is a single instance of work item, so the serialization will be provided implicitly and there is no real need to mask the interrupt. As mentioned above, a new flush interrupt can come while the previous one is being processed on Host but due to a single instance of work item either that new interrupt will not do anything effectively if work item was in a pending state or will re queue the work item if it was getting executed at that time. Also the state of all 3 log buffer types are being parsed irrespective for which one the interrupt actually came, and the whole buffer is being captured (this is how it has been recommended to handle the flush interrupts from Host side). So if a new interrupt comes while the work item was in a pending state, then effectively work of this new interrupt will also be done when work item is executed later. So will remove the masking then ? I think so, because if I understood what you wrote, masking can lose us an interrupt. Possibly just put a comment up there explaining that. +queue_work(dev_priv->wq, &dev_priv->guc.events_work); Because dev_priv->wq is a one a time in order wq so if something else is running on it and taking time, can that also be a cause of dropping an interrupt or being late with sending the flush signal to the guc and so losing some logs? Its a Driver's private workqueue and Turbo work item is also queued inside this workqueue which too needs to be executed without much delay. But yes the flush work item can get substantially delayed in case if there are other work items queued before it, especially the mm.retire_work (but generally executes every ~1 second). Best would be if the log buffer (44KB data) can be sampled in IRQ context (or Tasklet context) itself. I was just trying to understand if you perhaps need a dedicated wq. I don't have a feel at all on how much data guc logging generates per second. If the interrupt is low frequency even with a lot of cmd submission happening it could be fine like it is. Actually with maximum verbosity level, I am seeing flush interrupt every ms, with 'gem_exec_nop' IGT, as there are lot of submissions being done. But such may not happen in real life scenario. I think, if needed, later on we can either have a dedicated high priority work queue for logging work or use the tasklet context to do the processing. Hm, do you need to add some DRM_ERROR or something if wq starts lagging behind the flush interrupts? How many missed flush interrupts can we afford before the logging buffer starts getting overwritten? Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] ✗ Ro.CI.BAT: failure for series starting with [1/2] Revert "drm/i915/kbl: drm/i915: Avoid GuC loading for now on Kabylake."
On 01/07/16 06:20, Patchwork wrote: > == Series Details == > > Series: series starting with [1/2] Revert "drm/i915/kbl: drm/i915: Avoid GuC > loading for now on Kabylake." > URL : https://patchwork.freedesktop.org/series/9332/ > State : failure > > == Summary == > > Series 9332v1 Series without cover letter > http://patchwork.freedesktop.org/api/1.0/series/9332/revisions/1/mbox > > Test drv_hangman: > Subgroup error-state-basic: > pass -> FAIL (ro-skl3-i5-6260u) Looks like there is no firmware on the CI machine? [8.232519] [drm:intel_guc_init] GuC firmware pending, path i915/skl_guc_ver6_1.bin [8.232520] [drm:guc_fw_fetch] before requesting firmware: GuC fw fetch status PENDING [8.232889] i915 :00:02.0: Direct firmware load for i915/skl_guc_ver6_1.bin failed with error -2 [8.232928] [drm:guc_fw_fetch] GuC fw fetch status FAIL; err -2, fw (null), obj (null) [8.232959] [drm:intel_guc_init [i915]] *ERROR* Failed to fetch GuC firmware from i915/skl_guc_ver6_1.bin (error -2) It used to be there I thought. Regards, Tvrtko > Test drv_module_reload_basic: > dmesg-warn -> DMESG-FAIL (ro-skl3-i5-6260u) > Test gem_busy: > Subgroup basic-blt: > pass -> SKIP (ro-skl3-i5-6260u) > Subgroup basic-bsd: > pass -> SKIP (ro-skl3-i5-6260u) > Subgroup basic-bsd1: > pass -> SKIP (ro-skl3-i5-6260u) > Subgroup basic-bsd2: > pass -> SKIP (ro-skl3-i5-6260u) > Subgroup basic-parallel-blt: > pass -> SKIP (ro-skl3-i5-6260u) > Subgroup basic-parallel-bsd: > pass -> SKIP (ro-skl3-i5-6260u) > Subgroup basic-parallel-bsd1: > pass -> SKIP (ro-skl3-i5-6260u) > Subgroup basic-parallel-bsd2: > pass -> SKIP (ro-skl3-i5-6260u) > Subgroup basic-parallel-render: > pass -> SKIP (ro-skl3-i5-6260u) > Subgroup basic-parallel-vebox: > pass -> SKIP (ro-skl3-i5-6260u) > Subgroup basic-render: > pass -> SKIP (ro-skl3-i5-6260u) > Subgroup basic-vebox: > pass -> SKIP (ro-skl3-i5-6260u) > Test gem_cpu_reloc: > Subgroup basic: > pass -> FAIL (ro-skl3-i5-6260u) > Test gem_cs_tlb: > Subgroup basic-default: > pass -> FAIL (ro-skl3-i5-6260u) > Test gem_ctx_create: > Subgroup basic-files: > pass -> FAIL (ro-skl3-i5-6260u) > Test gem_ctx_exec: > Subgroup basic: > pass -> FAIL (ro-skl3-i5-6260u) > Test gem_ctx_switch: > Subgroup basic-default: > pass -> SKIP (ro-skl3-i5-6260u) > Test gem_exec_basic: > Subgroup basic-blt: > pass -> FAIL (ro-skl3-i5-6260u) > Subgroup basic-bsd: > pass -> FAIL (ro-skl3-i5-6260u) > Subgroup basic-bsd1: > pass -> FAIL (ro-skl3-i5-6260u) > Subgroup basic-bsd2: > pass -> FAIL (ro-skl3-i5-6260u) > Subgroup basic-default: > pass -> FAIL (ro-skl3-i5-6260u) > Subgroup basic-render: > pass -> FAIL (ro-skl3-i5-6260u) > Subgroup basic-vebox: > pass -> FAIL (ro-skl3-i5-6260u) > Subgroup gtt-blt: > pass -> FAIL (ro-skl3-i5-6260u) > Subgroup gtt-bsd: > pass -> FAIL (ro-skl3-i5-6260u) > Subgroup gtt-bsd1: > pass -> FAIL (ro-skl3-i5-6260u) > Subgroup gtt-bsd2: > pass -> FAIL (ro-skl3-i5-6260u) > Subgroup gtt-default: > pass -> FAIL (ro-skl3-i5-6260u) > Subgroup gtt-render: > pass -> FAIL (ro-skl3-i5-6260u) > Subgroup gtt-vebox: > pass -> FAIL (ro-skl3-i5-6260u) > Subgroup readonly-blt: > pass -> FAIL (ro-skl3-i5-6260u) > Subgroup readonly-bsd: > pass -> FAIL (ro-skl3-i5-6260u) > Subgroup readonly-bsd1: > pass -> FAIL (ro-skl3-i5-6260u) > Subgroup readonly-bsd2: > pass -> FAIL (ro-skl3-i5-6260u) > Subgroup readonly-default: > pass -> FAIL (ro-skl3-i5-6260u) > Subgroup readonly-render: > pass -> FAIL (ro-skl3-i5-6260u) > Subgroup readonly-vebox: >
Re: [Intel-gfx] ✗ Ro.CI.BAT: failure for series starting with [1/2] Revert "drm/i915/kbl: drm/i915: Avoid GuC loading for now on Kabylake."
On Fri, Jul 01, 2016 at 09:52:05AM +0100, Tvrtko Ursulin wrote: > > On 01/07/16 06:20, Patchwork wrote: > > == Series Details == > > > > Series: series starting with [1/2] Revert "drm/i915/kbl: drm/i915: Avoid > > GuC loading for now on Kabylake." > > URL : https://patchwork.freedesktop.org/series/9332/ > > State : failure > > > > == Summary == > > > > Series 9332v1 Series without cover letter > > http://patchwork.freedesktop.org/api/1.0/series/9332/revisions/1/mbox > > > > Test drv_hangman: > > Subgroup error-state-basic: > > pass -> FAIL (ro-skl3-i5-6260u) > > Looks like there is no firmware on the CI machine? > > [8.232519] [drm:intel_guc_init] GuC firmware pending, path > i915/skl_guc_ver6_1.bin > [8.232520] [drm:guc_fw_fetch] before requesting firmware: GuC fw fetch > status PENDING > [8.232889] i915 :00:02.0: Direct firmware load for > i915/skl_guc_ver6_1.bin failed with error -2 > [8.232928] [drm:guc_fw_fetch] GuC fw fetch status FAIL; err -2, fw >(null), obj (null) > [8.232959] [drm:intel_guc_init [i915]] *ERROR* Failed to fetch GuC > firmware from i915/skl_guc_ver6_1.bin (error -2) > > It used to be there I thought. Also, we still appear to be emitting an *ERROR* for what is handled, and considering the volatility of the firmware, the norm. Does the guc not yet emit the instructions on where to find the firmware [01.org]? -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] ✗ Ro.CI.BAT: failure for series starting with [1/2] Revert "drm/i915/kbl: drm/i915: Avoid GuC loading for now on Kabylake."
On 01/07/16 09:52, Tvrtko Ursulin wrote: > > On 01/07/16 06:20, Patchwork wrote: >> == Series Details == >> >> Series: series starting with [1/2] Revert "drm/i915/kbl: drm/i915: Avoid GuC >> loading for now on Kabylake." >> URL : https://patchwork.freedesktop.org/series/9332/ >> State : failure >> >> == Summary == >> >> Series 9332v1 Series without cover letter >> http://patchwork.freedesktop.org/api/1.0/series/9332/revisions/1/mbox >> >> Test drv_hangman: >> Subgroup error-state-basic: >> pass -> FAIL (ro-skl3-i5-6260u) > > Looks like there is no firmware on the CI machine? > > [8.232519] [drm:intel_guc_init] GuC firmware pending, path > i915/skl_guc_ver6_1.bin > [8.232520] [drm:guc_fw_fetch] before requesting firmware: GuC fw fetch > status PENDING > [8.232889] i915 :00:02.0: Direct firmware load for > i915/skl_guc_ver6_1.bin failed with error -2 > [8.232928] [drm:guc_fw_fetch] GuC fw fetch status FAIL; err -2, fw >(null), obj (null) > [8.232959] [drm:intel_guc_init [i915]] *ERROR* Failed to fetch GuC > firmware from i915/skl_guc_ver6_1.bin (error -2) > > It used to be there I thought. Also because later it goes: [8.235600] [drm:intel_guc_setup [i915]] *ERROR* GuC firmware load failed: -5 [8.235625] [drm:intel_guc_setup [i915]] *ERROR* GuC init failed: -5 [8.235658] [drm:i915_gem_init [i915]] *ERROR* Failed to initialize GPU, declaring it wedged That would mean either i915.enable_guc_loading or i915.enable_guc_submission are set to 2 on that machine. It is not in grub, maybe in module options? Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 2/2] drm/i915/bxt: Fix sanity check for BIOS RC6 setup
On pe, 2016-07-01 at 12:19 +0530, Kamble, Sagar A wrote: > Have seen BIOS having option "RC6" disabled and "GTPM" enabled for cases > where there are RC6 specific issues. It's possible although I haven't seen any based on the specs I have and the tests I ran. In any case the checks I added should catch any such missing setup and if there is something on top of that we need to add those (along with an update to the specification). > GTPM option entails setup for other features as well I guess. Yes, it affects RPS setup too, but my point was that disabling it is what leaves RC6 unconfigured. I guess this doesn't really matter in the end, the main thing is that we check all the RC6 specific registers. > In such cases - Can we output some DRM_INFO log saying BIOS has disabled > RC6 although setup is available. Yes, can add that, but since it's something we'd need for debugging I'd use DRM_DEBUG. > Do we need to also check for other unit level clock gating register > setup done by BIOS like: GEN7_MISCCPCTL, GEN6_UCGCTL1 to > GEN6_UCGCTL4, > GEN8_UCGCTL6 etc. These are subject to change with later HW steppings. In any case their default value is the more conservative scenario with clock gating disabled, which should still allow RC6 functionality. They can be also enabled/disabled separately from the GTPM option in BIOS setup (via sub-options to GTPM), something we haven't checked so far anyway. Are you ok if I add a debug print for these too? --Imre > > Thanks > Sagar > > > On 6/29/2016 9:43 PM, Imre Deak wrote: > > BXT BIOS has two options related to GPU power management: > > "RC6(Render > > Standby)" and "GT PM Support". The assumption so far was that > > disabling > > either of these options would leave RC6 uninitialized. According to > > my > > tests this isn't so: for a proper RC6 setup we only need the "GT PM > > Support" option to be enabled while the "RC6" option only controls > > whether RC6 is left enabled or not by BIOS. OTOH we were missing a > > few > > checks to ensure a proper RC6 setup. Add these now and don't fail > > the > > sanity check if RC6 is disabled. This fixes a problem where RC6 > > remains > > disabled after reloading the driver, since we explicitly disable > > RC6 > > during unloading. > > > > CC: Sagar Arun Kamble > > Signed-off-by: Imre Deak > > --- > > drivers/gpu/drm/i915/i915_reg.h | 5 + > > drivers/gpu/drm/i915/intel_pm.c | 19 ++- > > 2 files changed, 19 insertions(+), 5 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_reg.h > > b/drivers/gpu/drm/i915/i915_reg.h > > index c6bfbf8..92b4046 100644 > > --- a/drivers/gpu/drm/i915/i915_reg.h > > +++ b/drivers/gpu/drm/i915/i915_reg.h > > @@ -7085,12 +7085,17 @@ enum { > > #define GEN6_RC6pp_THRESHOLD _MMIO(0xA0C0) > > #define GEN6_PMINTRMSK_MMIO(0xA16 > > 8) > > #define GEN8_PMINTR_REDIRECT_TO_NON_DISP(1<<31) > > +#define GEN8_MISC_CTRL0_MMIO(0xA18 > > 0) > > #define VLV_PWRDWNUPCTL _MMIO(0xA2 > > 94) > > #define GEN9_MEDIA_PG_IDLE_HYSTERESIS _MMIO(0xA0C4 > > ) > > #define GEN9_RENDER_PG_IDLE_HYSTERESIS_MMIO(0xA0C > > 8) > > #define GEN9_PG_ENABLE_MMIO(0xA21 > > 0) > > #define GEN9_RENDER_PG_ENABLE (1<<0) > > #define GEN9_MEDIA_PG_ENABLE (1<<1) > > +#define GEN8_PUSHBUS_CONTROL _MMIO(0xA248) > > +#define GEN8_PUSHBUS_ENABLE_MMIO(0xA250) > > +#define GEN8_PUSHBUS_SHIFT _MMIO(0xA25C) > > + > > > > #define VLV_CHICKEN_3 _MMIO(VLV_DI > > SPLAY_BASE + 0x7040C) > > #define PIXEL_OVERLAP_CNT_MASK (3 << 30) > > diff --git a/drivers/gpu/drm/i915/intel_pm.c > > b/drivers/gpu/drm/i915/intel_pm.c > > index 5dce264..fe76991 100644 > > --- a/drivers/gpu/drm/i915/intel_pm.c > > +++ b/drivers/gpu/drm/i915/intel_pm.c > > @@ -5015,11 +5015,20 @@ static bool bxt_check_bios_rc6_setup(struct > > drm_i915_private *dev_priv) > > enable_rc6 = false; > > } > > > > - if (!(I915_READ(GEN6_RC_CONTROL) & (GEN6_RC_CTL_RC6_ENABLE > > | > > - GEN6_RC_CTL_HW_ENABLE) > > ) && > > - ((I915_READ(GEN6_RC_CONTROL) & GEN6_RC_CTL_HW_ENABLE) > > || > > - !(I915_READ(GEN6_RC_STATE) & RC6_STATE))) { > > - DRM_DEBUG_DRIVER("HW/SW RC6 is not enabled by > > BIOS.\n"); > > + if (!I915_READ(GEN8_PUSHBUS_CONTROL) || > > + !I915_READ(GEN8_PUSHBUS_ENABLE) || > > + !I915_READ(GEN8_PUSHBUS_SHIFT)) { > > + DRM_DEBUG_DRIVER("Pushbus not setup properly.\n"); > > + enable_rc6 = false; > > + } > > + > > + if (!I915_READ(GEN6_GFXPAUSE)) { > > + DRM_DEBUG_DRIVER("GFX pause not setup > > properly.\n"); > > + enable_rc6 = false; > > + } > > + > > + if (!I915_R
[Intel-gfx] [PATCH] drm/i915/guc: Demote some firmware loading messages to debug
From: Tvrtko Ursulin These messages are not errors unless GuC loading or submission is in the mandatory mode and even then the final status will be logged as error in intel_guc_setup. Therefore demote the messages in guc_fw_fetch to DRM_DEBUG_DRIVER. If more detail about the cause of the fail is required users will be asked to dial up the debug level. Signed-off-by: Tvrtko Ursulin Reported-by: Chris Wilson Cc: Dave Gordon --- drivers/gpu/drm/i915/intel_guc_loader.c | 14 ++ 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 4f6311a91a7e..55fefd53305b 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -568,7 +568,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* Check the size of the blob before examining buffer contents */ if (fw->size < sizeof(struct guc_css_header)) { - DRM_ERROR("Firmware header is missing\n"); + DRM_DEBUG_DRIVER("Firmware header is missing\n"); goto fail; } @@ -580,7 +580,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) css->key_size_dw - css->exponent_size_dw) * sizeof(u32); if (guc_fw->header_size != sizeof(struct guc_css_header)) { - DRM_ERROR("CSS header definition mismatch\n"); + DRM_DEBUG_DRIVER("CSS header definition mismatch\n"); goto fail; } @@ -590,7 +590,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* now RSA */ if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { - DRM_ERROR("RSA key size is bad\n"); + DRM_DEBUG_DRIVER("RSA key size is bad\n"); goto fail; } guc_fw->rsa_offset = guc_fw->ucode_offset + guc_fw->ucode_size; @@ -599,14 +599,14 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* At least, it should have header, uCode and RSA. Size of all three. */ size = guc_fw->header_size + guc_fw->ucode_size + guc_fw->rsa_size; if (fw->size < size) { - DRM_ERROR("Missing firmware components\n"); + DRM_DEBUG_DRIVER("Missing firmware components\n"); goto fail; } /* Header and uCode will be loaded to WOPCM. Size of the two. */ size = guc_fw->header_size + guc_fw->ucode_size; if (size > guc_wopcm_size(dev->dev_private)) { - DRM_ERROR("Firmware is too large to fit in WOPCM\n"); + DRM_DEBUG_DRIVER("Firmware is too large to fit in WOPCM\n"); goto fail; } @@ -621,7 +621,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) if (guc_fw->guc_fw_major_found != guc_fw->guc_fw_major_wanted || guc_fw->guc_fw_minor_found < guc_fw->guc_fw_minor_wanted) { - DRM_ERROR("GuC firmware version %d.%d, required %d.%d\n", + DRM_DEBUG_DRIVER("GuC firmware version %d.%d, required %d.%d\n", guc_fw->guc_fw_major_found, guc_fw->guc_fw_minor_found, guc_fw->guc_fw_major_wanted, guc_fw->guc_fw_minor_wanted); err = -ENOEXEC; @@ -653,8 +653,6 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) fail: DRM_DEBUG_DRIVER("GuC fw fetch status FAIL; err %d, fw %p, obj %p\n", err, fw, guc_fw->guc_fw_obj); - DRM_ERROR("Failed to fetch GuC firmware from %s (error %d)\n", - guc_fw->guc_fw_path, err); mutex_lock(&dev->struct_mutex); obj = guc_fw->guc_fw_obj; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915/guc: Demote some firmware loading messages to debug
On Fri, Jul 01, 2016 at 10:35:12AM +0100, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin > > These messages are not errors unless GuC loading or submission is > in the mandatory mode and even then the final status will be > logged as error in intel_guc_setup. > > Therefore demote the messages in guc_fw_fetch to DRM_DEBUG_DRIVER. > > If more detail about the cause of the fail is required users will > be asked to dial up the debug level. > > Signed-off-by: Tvrtko Ursulin > Reported-by: Chris Wilson > Cc: Dave Gordon It removes the error I see and errors from firmware not driver, so Acked-by: Chris Wilson -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/bxt: Export pooled eu info to userspace
Pooled EU is a bxt only feature and kernel changes are already merged. This feature is not yet exposed to userspace as the support was not yet available. Beignet team expressed interest and added patches to use this. Since we now have a user and patches to use them, expose them from the kernel side as well. [1] https://lists.freedesktop.org/archives/beignet/2016-June/007698.html [2] https://lists.freedesktop.org/archives/beignet/2016-June/007699.html Cc: Winiarski, Michal Cc: Zou, Nanhai Cc: Yang, Rong R Cc: Tim Gore Cc: Jeff McGee Signed-off-by: Arun Siluvery --- drivers/gpu/drm/i915/i915_drv.c | 6 ++ include/uapi/drm/i915_drm.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c580e24..8a26740 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -365,6 +365,12 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_SOFTPIN: value = 1; break; + case I915_PARAM_HAS_POOLED_EU: + value = HAS_POOLED_EU(dev); + break; + case I915_PARAM_MIN_EU_IN_POOL: + value = INTEL_INFO(dev)->min_eu_in_pool; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index c17d63d..905880d 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -361,6 +361,8 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_GPU_RESET35 #define I915_PARAM_HAS_RESOURCE_STREAMER 36 #define I915_PARAM_HAS_EXEC_SOFTPIN 37 +#define I915_PARAM_HAS_POOLED_EU38 +#define I915_PARAM_HAS_MIN_EU_IN_POOL 39 typedef struct drm_i915_getparam { __s32 param; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 04/11] drm/i915: Support for GuC interrupts
On 7/1/2016 2:17 PM, Tvrtko Ursulin wrote: On 01/07/16 07:16, Goel, Akash wrote: [snip] +/* Process all the GuC to Host events in bottom half */ +gen6_disable_pm_irq(dev_priv, +GEN9_GUC_TO_HOST_INT_EVENT); Why it is important to disable the interrupt here? Not for the queue work I think. We want to & can handle one interrupt at a time, unless the queued work item is executed we can't process the next interrupt, so better to keep the interrupt masked. Sorry this is what is my understanding. So it is queued in hardware and will get asserted when unmasked? As per my understanding, if the interrupt is masked (IMR), it won't be queued, will be ignored & so will not be asserted on unmasking. If the interrupt wasn't masked, but was disabled (in IER) then it will be asserted (in IIR) when its enabled. Also, is it safe with regards to potentially losing the interrupt? Particularly for the FLUSH_LOG_BUFFER case, GuC won't send a new flush interrupt unless its gets an acknowledgement (flush signal) of the previous one from Host. Ah so the previous comment is really impossible? I mean the need to mask? Sorry my comments were not fully correct. GuC can send a new flush interrupt, even if the previous one is pending, but that will be for a different log buffer type (3 types of log buffer ISR, DPC, CRASH). For the same buffer type, GuC won't send a new flush interrupt unless its gets an acknowledgement of the previous one from Host. But as you said the workqueue is ordered and furthermore there is a single instance of work item, so the serialization will be provided implicitly and there is no real need to mask the interrupt. As mentioned above, a new flush interrupt can come while the previous one is being processed on Host but due to a single instance of work item either that new interrupt will not do anything effectively if work item was in a pending state or will re queue the work item if it was getting executed at that time. Also the state of all 3 log buffer types are being parsed irrespective for which one the interrupt actually came, and the whole buffer is being captured (this is how it has been recommended to handle the flush interrupts from Host side). So if a new interrupt comes while the work item was in a pending state, then effectively work of this new interrupt will also be done when work item is executed later. So will remove the masking then ? I think so, because if I understood what you wrote, masking can lose us an interrupt. If a new flush interrupt comes while the work item was getting executed then there is a potential of losing an opportunity to sample the log buffer. Will not mask the interrupt. Thanks for persisting on this. Possibly just put a comment up there explaining that. +queue_work(dev_priv->wq, &dev_priv->guc.events_work); Because dev_priv->wq is a one a time in order wq so if something else is running on it and taking time, can that also be a cause of dropping an interrupt or being late with sending the flush signal to the guc and so losing some logs? Its a Driver's private workqueue and Turbo work item is also queued inside this workqueue which too needs to be executed without much delay. But yes the flush work item can get substantially delayed in case if there are other work items queued before it, especially the mm.retire_work (but generally executes every ~1 second). Best would be if the log buffer (44KB data) can be sampled in IRQ context (or Tasklet context) itself. I was just trying to understand if you perhaps need a dedicated wq. I don't have a feel at all on how much data guc logging generates per second. If the interrupt is low frequency even with a lot of cmd submission happening it could be fine like it is. Actually with maximum verbosity level, I am seeing flush interrupt every ms, with 'gem_exec_nop' IGT, as there are lot of submissions being done. But such may not happen in real life scenario. I think, if needed, later on we can either have a dedicated high priority work queue for logging work or use the tasklet context to do the processing. Hm, do you need to add some DRM_ERROR or something if wq starts lagging behind the flush interrupts? How many missed flush interrupts can we afford before the logging buffer starts getting overwritten? Actually if GuC is producing logs at such a fast rate then we can't afford to miss even a single interrupt, if we don't want to lose any logs. When the log buffer becomes half full, GuC sends a flush interrupt. GuC firmware expects that while it is writing to 2nd half of the buffer, first half would get consumed by Host and then get a flush completed acknowledgement from Host, so that it does not end up doing any overwrite causing loss of logs. There is a buffer_full_cnt field in the state structure which GuC firmware increments every time it detects a potential log buffer overflow. Probably this can be shown via d
[Intel-gfx] [PATCH v2] drm/i915/guc: Demote some firmware loading messages to debug
From: Tvrtko Ursulin These messages are not errors unless GuC loading or submission is in the mandatory mode and even then the final status will be logged as error in intel_guc_setup. Therefore demote the messages in guc_fw_fetch to DRM_DEBUG_DRIVER. If more detail about the cause of the fail is required users will be asked to dial up the debug level. v2: Demote signature error in guc_ucode_xfer_dma as well. Signed-off-by: Tvrtko Ursulin Reported-by: Chris Wilson Cc: Dave Gordon --- drivers/gpu/drm/i915/intel_guc_loader.c | 16 +++- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 4f6311a91a7e..120973ad1c11 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -285,7 +285,7 @@ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv) I915_READ(DMA_CTRL), status); if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { - DRM_ERROR("GuC firmware signature verification failed\n"); + DRM_DEBUG_DRIVER("GuC firmware signature verification failed\n"); ret = -ENOEXEC; } @@ -568,7 +568,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* Check the size of the blob before examining buffer contents */ if (fw->size < sizeof(struct guc_css_header)) { - DRM_ERROR("Firmware header is missing\n"); + DRM_DEBUG_DRIVER("Firmware header is missing\n"); goto fail; } @@ -580,7 +580,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) css->key_size_dw - css->exponent_size_dw) * sizeof(u32); if (guc_fw->header_size != sizeof(struct guc_css_header)) { - DRM_ERROR("CSS header definition mismatch\n"); + DRM_DEBUG_DRIVER("CSS header definition mismatch\n"); goto fail; } @@ -590,7 +590,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* now RSA */ if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { - DRM_ERROR("RSA key size is bad\n"); + DRM_DEBUG_DRIVER("RSA key size is bad\n"); goto fail; } guc_fw->rsa_offset = guc_fw->ucode_offset + guc_fw->ucode_size; @@ -599,14 +599,14 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* At least, it should have header, uCode and RSA. Size of all three. */ size = guc_fw->header_size + guc_fw->ucode_size + guc_fw->rsa_size; if (fw->size < size) { - DRM_ERROR("Missing firmware components\n"); + DRM_DEBUG_DRIVER("Missing firmware components\n"); goto fail; } /* Header and uCode will be loaded to WOPCM. Size of the two. */ size = guc_fw->header_size + guc_fw->ucode_size; if (size > guc_wopcm_size(dev->dev_private)) { - DRM_ERROR("Firmware is too large to fit in WOPCM\n"); + DRM_DEBUG_DRIVER("Firmware is too large to fit in WOPCM\n"); goto fail; } @@ -621,7 +621,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) if (guc_fw->guc_fw_major_found != guc_fw->guc_fw_major_wanted || guc_fw->guc_fw_minor_found < guc_fw->guc_fw_minor_wanted) { - DRM_ERROR("GuC firmware version %d.%d, required %d.%d\n", + DRM_DEBUG_DRIVER("GuC firmware version %d.%d, required %d.%d\n", guc_fw->guc_fw_major_found, guc_fw->guc_fw_minor_found, guc_fw->guc_fw_major_wanted, guc_fw->guc_fw_minor_wanted); err = -ENOEXEC; @@ -653,8 +653,6 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) fail: DRM_DEBUG_DRIVER("GuC fw fetch status FAIL; err %d, fw %p, obj %p\n", err, fw, guc_fw->guc_fw_obj); - DRM_ERROR("Failed to fetch GuC firmware from %s (error %d)\n", - guc_fw->guc_fw_path, err); mutex_lock(&dev->struct_mutex); obj = guc_fw->guc_fw_obj; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✗ Ro.CI.BAT: warning for drm/i915/guc: Demote some firmware loading messages to debug
== Series Details == Series: drm/i915/guc: Demote some firmware loading messages to debug URL : https://patchwork.freedesktop.org/series/9366/ State : warning == Summary == Series 9366v1 drm/i915/guc: Demote some firmware loading messages to debug http://patchwork.freedesktop.org/api/1.0/series/9366/revisions/1/mbox Test drv_module_reload_basic: dmesg-warn -> PASS (ro-skl3-i5-6260u) Test kms_pipe_crc_basic: Subgroup nonblocking-crc-pipe-b: skip -> PASS (fi-skl-i5-6260u) Subgroup suspend-read-crc-pipe-a: skip -> DMESG-WARN (ro-bdw-i5-5250u) Subgroup suspend-read-crc-pipe-c: incomplete -> PASS (fi-hsw-i7-4770k) fi-hsw-i7-4770k total:229 pass:196 dwarn:0 dfail:0 fail:0 skip:33 fi-kbl-qkkr total:229 pass:160 dwarn:29 dfail:0 fail:0 skip:40 fi-skl-i5-6260u total:229 pass:204 dwarn:0 dfail:0 fail:0 skip:25 fi-skl-i7-6700k total:229 pass:190 dwarn:0 dfail:0 fail:0 skip:39 fi-snb-i7-2600 total:229 pass:176 dwarn:0 dfail:0 fail:0 skip:53 ro-bdw-i5-5250u total:229 pass:204 dwarn:3 dfail:1 fail:0 skip:21 ro-bdw-i7-5557U total:229 pass:204 dwarn:1 dfail:1 fail:0 skip:23 ro-bdw-i7-5600u total:229 pass:190 dwarn:0 dfail:1 fail:0 skip:38 ro-bsw-n3050 total:229 pass:177 dwarn:0 dfail:1 fail:2 skip:49 ro-byt-n2820 total:229 pass:180 dwarn:0 dfail:1 fail:3 skip:45 ro-hsw-i3-4010u total:229 pass:197 dwarn:0 dfail:1 fail:0 skip:31 ro-hsw-i7-4770r total:229 pass:197 dwarn:0 dfail:1 fail:0 skip:31 ro-ilk-i7-620lm total:229 pass:157 dwarn:0 dfail:1 fail:1 skip:70 ro-ilk1-i5-650 total:224 pass:157 dwarn:0 dfail:1 fail:1 skip:65 ro-ivb-i7-3770 total:229 pass:188 dwarn:0 dfail:1 fail:0 skip:40 ro-ivb2-i7-3770 total:229 pass:192 dwarn:0 dfail:1 fail:0 skip:36 ro-skl3-i5-6260u total:229 pass:209 dwarn:0 dfail:1 fail:0 skip:19 ro-snb-i7-2620M total:229 pass:179 dwarn:0 dfail:1 fail:1 skip:48 Results at /archive/results/CI_IGT_test/RO_Patchwork_1353/ ee2057a drm-intel-nightly: 2016y-07m-01d-08h-52m-58s UTC integration manifest 9019573 drm/i915/guc: Demote some firmware loading messages to debug ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✗ Ro.CI.BAT: failure for drm/i915/bxt: Export pooled eu info to userspace
== Series Details == Series: drm/i915/bxt: Export pooled eu info to userspace URL : https://patchwork.freedesktop.org/series/9367/ State : failure == Summary == CC drivers/acpi/acpica/uthex.o CC drivers/acpi/acpica/utids.o CC drivers/acpi/acpica/utinit.o CC drivers/acpi/acpica/utlock.o CC drivers/acpi/acpica/utmath.o CC drivers/acpi/acpica/utmutex.o CC drivers/acpi/acpica/utnonansi.o CC drivers/acpi/acpica/utmisc.o CC drivers/acpi/acpica/utobject.o CC drivers/acpi/acpica/utosi.o CC drivers/acpi/acpica/utownerid.o CC drivers/acpi/acpica/utpredef.o CC drivers/acpi/acpica/utresrc.o CC drivers/acpi/acpica/utstate.o CC drivers/acpi/acpica/utstring.o CC drivers/acpi/acpica/utxface.o CC drivers/acpi/acpica/utxferror.o CC drivers/acpi/acpica/utxfinit.o CC drivers/acpi/acpica/utxfmutex.o LD lib/built-in.o LD net/ipv6/ipv6.o AR lib/lib.a LD drivers/acpi/acpica/acpi.o LD net/ipv6/built-in.o LD drivers/acpi/acpica/built-in.o LD net/built-in.o LD drivers/acpi/built-in.o Makefile:985: recipe for target 'drivers' failed make: *** [drivers] Error 2 make: *** Waiting for unfinished jobs ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✗ Ro.CI.BAT: failure for drm/i915/guc: Demote some firmware loading messages to debug (rev2)
== Series Details == Series: drm/i915/guc: Demote some firmware loading messages to debug (rev2) URL : https://patchwork.freedesktop.org/series/9366/ State : failure == Summary == Series 9366v2 drm/i915/guc: Demote some firmware loading messages to debug http://patchwork.freedesktop.org/api/1.0/series/9366/revisions/2/mbox Test drv_module_reload_basic: dmesg-warn -> PASS (ro-skl3-i5-6260u) Test kms_flip: Subgroup basic-flip-vs-wf_vblank: pass -> FAIL (ro-bdw-i5-5250u) Test kms_pipe_crc_basic: Subgroup nonblocking-crc-pipe-b: skip -> PASS (fi-skl-i5-6260u) Subgroup suspend-read-crc-pipe-b: skip -> DMESG-WARN (ro-bdw-i5-5250u) Subgroup suspend-read-crc-pipe-c: incomplete -> PASS (fi-hsw-i7-4770k) Test pm_rpm: Subgroup basic-rte: pass -> SKIP (fi-skl-i5-6260u) fi-hsw-i7-4770k total:229 pass:196 dwarn:0 dfail:0 fail:0 skip:33 fi-kbl-qkkr total:229 pass:160 dwarn:27 dfail:1 fail:1 skip:40 fi-skl-i5-6260u total:229 pass:203 dwarn:0 dfail:0 fail:0 skip:26 fi-skl-i7-6700k total:229 pass:190 dwarn:0 dfail:0 fail:0 skip:39 fi-snb-i7-2600 total:229 pass:176 dwarn:0 dfail:0 fail:0 skip:53 ro-bdw-i5-5250u total:229 pass:203 dwarn:3 dfail:1 fail:1 skip:21 ro-bdw-i7-5557U total:229 pass:204 dwarn:1 dfail:1 fail:0 skip:23 ro-bdw-i7-5600u total:229 pass:190 dwarn:0 dfail:1 fail:0 skip:38 ro-byt-n2820 total:229 pass:180 dwarn:0 dfail:1 fail:3 skip:45 ro-hsw-i3-4010u total:229 pass:197 dwarn:0 dfail:1 fail:0 skip:31 ro-hsw-i7-4770r total:229 pass:197 dwarn:0 dfail:1 fail:0 skip:31 ro-ilk-i7-620lm total:229 pass:157 dwarn:0 dfail:1 fail:1 skip:70 ro-ilk1-i5-650 total:224 pass:157 dwarn:0 dfail:1 fail:1 skip:65 ro-ivb-i7-3770 total:229 pass:188 dwarn:0 dfail:1 fail:0 skip:40 ro-ivb2-i7-3770 total:229 pass:192 dwarn:0 dfail:1 fail:0 skip:36 ro-skl3-i5-6260u total:229 pass:209 dwarn:0 dfail:1 fail:0 skip:19 ro-snb-i7-2620M total:229 pass:179 dwarn:0 dfail:1 fail:1 skip:48 ro-bsw-n3050 failed to connect after reboot Results at /archive/results/CI_IGT_test/RO_Patchwork_1355/ ee2057a drm-intel-nightly: 2016y-07m-01d-08h-52m-58s UTC integration manifest 8e07a01 drm/i915/guc: Demote some firmware loading messages to debug ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/bxt: Export pooled eu info to userspace
Pooled EU is a bxt only feature and kernel changes are already merged. This feature is not yet exposed to userspace as the support was not yet available. Beignet team expressed interest and added patches to use this. Since we now have a user and patches to use them, expose them from the kernel side as well. v2: fix compile error [1] https://lists.freedesktop.org/archives/beignet/2016-June/007698.html [2] https://lists.freedesktop.org/archives/beignet/2016-June/007699.html Cc: Winiarski, Michal Cc: Zou, Nanhai Cc: Yang, Rong R Cc: Tim Gore Cc: Jeff McGee Signed-off-by: Arun Siluvery --- drivers/gpu/drm/i915/i915_drv.c | 6 ++ include/uapi/drm/i915_drm.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c580e24..8a26740 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -365,6 +365,12 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_SOFTPIN: value = 1; break; + case I915_PARAM_HAS_POOLED_EU: + value = HAS_POOLED_EU(dev); + break; + case I915_PARAM_MIN_EU_IN_POOL: + value = INTEL_INFO(dev)->min_eu_in_pool; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index c17d63d..a642bbc 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -361,6 +361,8 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_GPU_RESET35 #define I915_PARAM_HAS_RESOURCE_STREAMER 36 #define I915_PARAM_HAS_EXEC_SOFTPIN 37 +#define I915_PARAM_HAS_POOLED_EU38 +#define I915_PARAM_MIN_EU_IN_POOL 39 typedef struct drm_i915_getparam { __s32 param; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 2/2] drm/i915/bxt: Fix sanity check for BIOS RC6 setup
On 7/1/2016 2:45 PM, Imre Deak wrote: On pe, 2016-07-01 at 12:19 +0530, Kamble, Sagar A wrote: Have seen BIOS having option "RC6" disabled and "GTPM" enabled for cases where there are RC6 specific issues. It's possible although I haven't seen any based on the specs I have and the tests I ran. In any case the checks I added should catch any such missing setup and if there is something on top of that we need to add those (along with an update to the specification). GTPM option entails setup for other features as well I guess. Yes, it affects RPS setup too, but my point was that disabling it is what leaves RC6 unconfigured. I guess this doesn't really matter in the end, the main thing is that we check all the RC6 specific registers. In such cases - Can we output some DRM_INFO log saying BIOS has disabled RC6 although setup is available. Yes, can add that, but since it's something we'd need for debugging I'd use DRM_DEBUG. Fine. With this: Reviewed-by: Sagar Arun Kamble Do we need to also check for other unit level clock gating register setup done by BIOS like: GEN7_MISCCPCTL, GEN6_UCGCTL1 to GEN6_UCGCTL4, GEN8_UCGCTL6 etc. These are subject to change with later HW steppings. In any case their default value is the more conservative scenario with clock gating disabled, which should still allow RC6 functionality. They can be also enabled/disabled separately from the GTPM option in BIOS setup (via sub-options to GTPM), something we haven't checked so far anyway. Are you ok if I add a debug print for these too? We can skip given that these are not so much impacting RC6 as you said. --Imre Thanks Sagar On 6/29/2016 9:43 PM, Imre Deak wrote: BXT BIOS has two options related to GPU power management: "RC6(Render Standby)" and "GT PM Support". The assumption so far was that disabling either of these options would leave RC6 uninitialized. According to my tests this isn't so: for a proper RC6 setup we only need the "GT PM Support" option to be enabled while the "RC6" option only controls whether RC6 is left enabled or not by BIOS. OTOH we were missing a few checks to ensure a proper RC6 setup. Add these now and don't fail the sanity check if RC6 is disabled. This fixes a problem where RC6 remains disabled after reloading the driver, since we explicitly disable RC6 during unloading. CC: Sagar Arun Kamble Signed-off-by: Imre Deak --- drivers/gpu/drm/i915/i915_reg.h | 5 + drivers/gpu/drm/i915/intel_pm.c | 19 ++- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c6bfbf8..92b4046 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7085,12 +7085,17 @@ enum { #define GEN6_RC6pp_THRESHOLD _MMIO(0xA0C0) #define GEN6_PMINTRMSK _MMIO(0xA16 8) #define GEN8_PMINTR_REDIRECT_TO_NON_DISP (1<<31) +#define GEN8_MISC_CTRL0_MMIO(0xA18 0) #define VLV_PWRDWNUPCTL _MMIO(0xA2 94) #define GEN9_MEDIA_PG_IDLE_HYSTERESIS_MMIO(0xA0C4 ) #define GEN9_RENDER_PG_IDLE_HYSTERESIS _MMIO(0xA0C 8) #define GEN9_PG_ENABLE _MMIO(0xA21 0) #define GEN9_RENDER_PG_ENABLE(1<<0) #define GEN9_MEDIA_PG_ENABLE (1<<1) +#define GEN8_PUSHBUS_CONTROL _MMIO(0xA248) +#define GEN8_PUSHBUS_ENABLE_MMIO(0xA250) +#define GEN8_PUSHBUS_SHIFT _MMIO(0xA25C) + #define VLV_CHICKEN_3_MMIO(VLV_DI SPLAY_BASE + 0x7040C) #define PIXEL_OVERLAP_CNT_MASK (3 << 30) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 5dce264..fe76991 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5015,11 +5015,20 @@ static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv) enable_rc6 = false; } - if (!(I915_READ(GEN6_RC_CONTROL) & (GEN6_RC_CTL_RC6_ENABLE | - GEN6_RC_CTL_HW_ENABLE) ) && - ((I915_READ(GEN6_RC_CONTROL) & GEN6_RC_CTL_HW_ENABLE) || -!(I915_READ(GEN6_RC_STATE) & RC6_STATE))) { - DRM_DEBUG_DRIVER("HW/SW RC6 is not enabled by BIOS.\n"); + if (!I915_READ(GEN8_PUSHBUS_CONTROL) || + !I915_READ(GEN8_PUSHBUS_ENABLE) || + !I915_READ(GEN8_PUSHBUS_SHIFT)) { + DRM_DEBUG_DRIVER("Pushbus not setup properly.\n"); + enable_rc6 = false; + } + + if (!I915_READ(GEN6_GFXPAUSE)) { + DRM_DEBUG_DRIVER("GFX pause not setup properly.\n"); + enable_rc6 = false; + } + + if (!I915_READ(GEN8_MISC_CTRL0)) { + DRM_DEBUG_DRIVER("GPM control not setup properly.\n"); enable_rc6 = false; } _
[Intel-gfx] ✓ Ro.CI.BAT: success for drm/i915/bxt: Export pooled eu info to userspace (rev2)
== Series Details == Series: drm/i915/bxt: Export pooled eu info to userspace (rev2) URL : https://patchwork.freedesktop.org/series/9367/ State : success == Summary == Series 9367v2 drm/i915/bxt: Export pooled eu info to userspace http://patchwork.freedesktop.org/api/1.0/series/9367/revisions/2/mbox Test kms_pipe_crc_basic: Subgroup nonblocking-crc-pipe-b: skip -> PASS (fi-skl-i5-6260u) Subgroup suspend-read-crc-pipe-c: incomplete -> PASS (fi-hsw-i7-4770k) fi-hsw-i7-4770k total:229 pass:196 dwarn:0 dfail:0 fail:0 skip:33 fi-kbl-qkkr total:229 pass:160 dwarn:28 dfail:1 fail:0 skip:40 fi-skl-i5-6260u total:229 pass:204 dwarn:0 dfail:0 fail:0 skip:25 fi-skl-i7-6700k total:229 pass:190 dwarn:0 dfail:0 fail:0 skip:39 fi-snb-i7-2600 total:229 pass:176 dwarn:0 dfail:0 fail:0 skip:53 ro-bdw-i7-5557U total:229 pass:204 dwarn:1 dfail:1 fail:0 skip:23 ro-bdw-i7-5600u total:229 pass:190 dwarn:0 dfail:1 fail:0 skip:38 ro-bsw-n3050 total:229 pass:177 dwarn:0 dfail:1 fail:2 skip:49 ro-byt-n2820 total:229 pass:180 dwarn:0 dfail:1 fail:3 skip:45 ro-hsw-i3-4010u total:229 pass:197 dwarn:0 dfail:1 fail:0 skip:31 ro-hsw-i7-4770r total:229 pass:197 dwarn:0 dfail:1 fail:0 skip:31 ro-ilk-i7-620lm total:229 pass:157 dwarn:0 dfail:1 fail:1 skip:70 ro-ilk1-i5-650 total:224 pass:157 dwarn:0 dfail:1 fail:1 skip:65 ro-ivb-i7-3770 total:229 pass:188 dwarn:0 dfail:1 fail:0 skip:40 ro-ivb2-i7-3770 total:229 pass:192 dwarn:0 dfail:1 fail:0 skip:36 ro-skl3-i5-6260u total:229 pass:208 dwarn:1 dfail:1 fail:0 skip:19 ro-snb-i7-2620M total:229 pass:179 dwarn:0 dfail:1 fail:1 skip:48 ro-bdw-i5-5250u failed to connect after reboot Results at /archive/results/CI_IGT_test/RO_Patchwork_1357/ ee2057a drm-intel-nightly: 2016y-07m-01d-08h-52m-58s UTC integration manifest 7761c2c drm/i915/bxt: Export pooled eu info to userspace ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] To the gingerbread house!
Since those clamoring for the RC6 hole to be plugged are on holiday and didn't leave a review on the regression fixes, let's push this ahead of their return. Just a small number of patches left without r-b and then after almost 12 months of waiting we can close a critical customer issue. [PATCH 02/20] drm/i915: Delay queuing hangcheck to wait-request [PATCH 05/20] drm/i915: Separate GPU hang waitqueue from advance [PATCH 08/20] drm/i915: Use HWS for seqno tracking everywhere [PATCH 12/20] drm/i915: Add a delay between interrupt and inspecting [PATCH 18/20] drm/i915: Move the get/put irq locking into the caller -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 03/20] drm/i915: Remove the dedicated hangcheck workqueue
The queue only ever contains at most one item and has no special flags. It is just a very simple wrapper around the system-wq - a complication with no benefits. v2: Use the system_long_wq as we may wish to capture the error state after detecting the hang - which may take a bit of time. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.c | 8 drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_irq.c | 7 --- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c580e24095b0..06ab8253e246 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1169,15 +1169,8 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv) if (dev_priv->hotplug.dp_wq == NULL) goto out_free_wq; - dev_priv->gpu_error.hangcheck_wq = - alloc_ordered_workqueue("i915-hangcheck", 0); - if (dev_priv->gpu_error.hangcheck_wq == NULL) - goto out_free_dp_wq; - return 0; -out_free_dp_wq: - destroy_workqueue(dev_priv->hotplug.dp_wq); out_free_wq: destroy_workqueue(dev_priv->wq); out_err: @@ -1188,7 +1181,6 @@ out_err: static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv) { - destroy_workqueue(dev_priv->gpu_error.hangcheck_wq); destroy_workqueue(dev_priv->hotplug.dp_wq); destroy_workqueue(dev_priv->wq); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 485ab1148181..f4aa727e522a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1374,7 +1374,6 @@ struct i915_gpu_error { /* Hang gpu twice in this window and your context gets banned */ #define DRM_I915_CTX_BAN_PERIOD DIV_ROUND_UP(8*DRM_I915_HANGCHECK_PERIOD, 1000) - struct workqueue_struct *hangcheck_wq; struct delayed_work hangcheck_work; /* For reset and error_state handling. */ diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 5614582ca240..3ad4ef9250d8 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3249,7 +3249,7 @@ out: void i915_queue_hangcheck(struct drm_i915_private *dev_priv) { - struct i915_gpu_error *e = &dev_priv->gpu_error; + unsigned long delay; if (!i915.enable_hangcheck) return; @@ -3259,8 +3259,9 @@ void i915_queue_hangcheck(struct drm_i915_private *dev_priv) * we will ignore a hung ring if a second ring is kept busy. */ - queue_delayed_work(e->hangcheck_wq, &e->hangcheck_work, - round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES)); + delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES); + queue_delayed_work(system_long_wq, + &dev_priv->gpu_error.hangcheck_work, delay); } static void ibx_irq_reset(struct drm_device *dev) -- 2.8.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 02/20] drm/i915: Delay queuing hangcheck to wait-request
We can forgo queuing the hangcheck from the start of every request to until we wait upon a request. This reduces the overhead of every request, but may increase the latency of detecting a hang. Howeever, if nothing every waits upon a hang, did it ever hang? It also improves the robustness of the wait-request by ensuring that the hangchecker is indeed running before we sleep indefinitely (and thereby ensuring that we never actually sleep forever waiting for a dead GPU). Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 9 + drivers/gpu/drm/i915/i915_irq.c | 10 -- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1d9878258103..34f724cc40b8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1532,6 +1532,9 @@ int __i915_wait_request(struct drm_i915_gem_request *req, break; } + /* Ensure that even if the GPU hangs, we get woken up. */ + i915_queue_hangcheck(dev_priv); + timer.function = NULL; if (timeout || missed_irq(dev_priv, engine)) { unsigned long expire; @@ -2919,8 +2922,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, /* Not allowed to fail! */ WARN(ret, "emit|add_request failed: %d!\n", ret); - i915_queue_hangcheck(engine->i915); - queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, round_jiffies_up_relative(HZ)); @@ -3264,8 +3265,8 @@ i915_gem_retire_requests(struct drm_i915_private *dev_priv) if (idle) mod_delayed_work(dev_priv->wq, - &dev_priv->mm.idle_work, - msecs_to_jiffies(100)); +&dev_priv->mm.idle_work, +msecs_to_jiffies(100)); return idle; } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 4378a659d962..5614582ca240 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3135,10 +3135,10 @@ static void i915_hangcheck_elapsed(struct work_struct *work) intel_uncore_arm_unclaimed_mmio_detection(dev_priv); for_each_engine_id(engine, dev_priv, id) { + bool busy = waitqueue_active(&engine->irq_queue); u64 acthd; u32 seqno; unsigned user_interrupts; - bool busy = true; semaphore_clear_deadlocks(dev_priv); @@ -3161,12 +3161,11 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (engine->hangcheck.seqno == seqno) { if (ring_idle(engine, seqno)) { engine->hangcheck.action = HANGCHECK_IDLE; - if (waitqueue_active(&engine->irq_queue)) { + if (busy) { /* Safeguard against driver failure */ user_interrupts = kick_waiters(engine); engine->hangcheck.score += BUSY; - } else - busy = false; + } } else { /* We always increment the hangcheck score * if the ring is busy and still processing @@ -3240,9 +3239,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work) goto out; } + /* Reset timer in case GPU hangs without another request being added */ if (busy_count) - /* Reset timer case chip hangs without another request -* being added */ i915_queue_hangcheck(dev_priv); out: -- 2.8.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 01/20] drm/i915/shrinker: Flush active on objects before counting
As we inspect obj->active to decide how many objects we can shrink (we only shrink idle objects), it helps to flush the active lists first in order to have a more accurate count of available objects. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_shrinker.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 886a8797566d..1bf14544d8ad 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -265,6 +265,8 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) if (!i915_gem_shrinker_lock(dev, &unlock)) return 0; + i915_gem_retire_requests(dev_priv); + count = 0; list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) if (can_release_pages(obj)) -- 2.8.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 05/20] drm/i915: Separate GPU hang waitqueue from advance
Currently __i915_wait_request uses a per-engine wait_queue_t for the dual purpose of waking after the GPU advances or for waking after an error. In the future, we may add even more wake sources and require greater separation, but for now we can conceptually simplify wakeups by separating the two sources. In particular, this allows us to use different wait-queues (e.g. one on the engine advancement, a global one for errors and one on each requests) without any hassle. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 6 ++ drivers/gpu/drm/i915/i915_gem.c | 5 + drivers/gpu/drm/i915/i915_irq.c | 19 --- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4948c90c9bd4..0d0e4ac4dadb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1410,6 +1410,12 @@ struct i915_gpu_error { #define I915_WEDGED(1 << 31) /** +* Waitqueue to signal when a hang is detected. Used to for waiters +* to release the struct_mutex for the reset to procede. +*/ + wait_queue_head_t wait_queue; + + /** * Waitqueue to signal when the reset has completed. Used by clients * that wait for dev_priv->mm.wedged to settle. */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 34f724cc40b8..b607493a8d3a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1455,6 +1455,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, const bool irq_test_in_progress = ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_engine_flag(engine); int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; + DEFINE_WAIT(reset); DEFINE_WAIT(wait); unsigned long timeout_expire; s64 before = 0; /* Only to silence a compiler warning. */ @@ -1499,6 +1500,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, goto out; } + add_wait_queue(&dev_priv->gpu_error.wait_queue, &reset); for (;;) { struct timer_list timer; @@ -1551,6 +1553,8 @@ int __i915_wait_request(struct drm_i915_gem_request *req, destroy_timer_on_stack(&timer); } } + remove_wait_queue(&dev_priv->gpu_error.wait_queue, &reset); + if (!irq_test_in_progress) engine->irq_put(engine); @@ -5281,6 +5285,7 @@ i915_gem_load_init(struct drm_device *dev) i915_gem_retire_work_handler); INIT_DELAYED_WORK(&dev_priv->mm.idle_work, i915_gem_idle_work_handler); + init_waitqueue_head(&dev_priv->gpu_error.wait_queue); init_waitqueue_head(&dev_priv->gpu_error.reset_queue); dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 83f40baeb1f3..6c17596d75dd 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2488,11 +2488,8 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg) return ret; } -static void i915_error_wake_up(struct drm_i915_private *dev_priv, - bool reset_completed) +static void i915_error_wake_up(struct drm_i915_private *dev_priv) { - struct intel_engine_cs *engine; - /* * Notify all waiters for GPU completion events that reset state has * been changed, and that they need to restart their wait after @@ -2501,18 +2498,10 @@ static void i915_error_wake_up(struct drm_i915_private *dev_priv, */ /* Wake up __wait_seqno, potentially holding dev->struct_mutex. */ - for_each_engine(engine, dev_priv) - wake_up_all(&engine->irq_queue); + wake_up_all(&dev_priv->gpu_error.wait_queue); /* Wake up intel_crtc_wait_for_pending_flips, holding crtc->mutex. */ wake_up_all(&dev_priv->pending_flip_queue); - - /* -* Signal tasks blocked in i915_gem_wait_for_error that the pending -* reset state is cleared. -*/ - if (reset_completed) - wake_up_all(&dev_priv->gpu_error.reset_queue); } /** @@ -2577,7 +2566,7 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv) * Note: The wake_up also serves as a memory barrier so that * waiters see the update value of the reset counter atomic_t. */ - i915_error_wake_up(dev_priv, true); + wake_up_all(&dev_priv->gpu_error.reset_queue); } } @@ -2714,7 +2703,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv, * ensure that the waiters see the updated value of the reset * counter atomic_t. */
[Intel-gfx] [PATCH 06/20] drm/i915: Slaughter the thundering i915_wait_request herd
One particularly stressful scenario consists of many independent tasks all competing for GPU time and waiting upon the results (e.g. realtime transcoding of many, many streams). One bottleneck in particular is that each client waits on its own results, but every client is woken up after every batchbuffer - hence the thunder of hooves as then every client must do its heavyweight dance to read a coherent seqno to see if it is the lucky one. Ideally, we only want one client to wake up after the interrupt and check its request for completion. Since the requests must retire in order, we can select the first client on the oldest request to be woken. Once that client has completed his wait, we can then wake up the next client and so on. However, all clients then incur latency as every process in the chain may be delayed for scheduling - this may also then cause some priority inversion. To reduce the latency, when a client is added or removed from the list, we scan the tree for completed seqno and wake up all the completed waiters in parallel. Using igt/benchmarks/gem_latency, we can demonstrate this effect. The benchmark measures the number of GPU cycles between completion of a batch and the client waking up from a call to wait-ioctl. With many concurrent waiters, with each on a different request, we observe that the wakeup latency before the patch scales nearly linearly with the number of waiters (before external factors kick in making the scaling much worse). After applying the patch, we can see that only the single waiter for the request is being woken up, providing a constant wakeup latency for every operation. However, the situation is not quite as rosy for many waiters on the same request, though to the best of my knowledge this is much less likely in practice. Here, we can observe that the concurrent waiters incur extra latency from being woken up by the solitary bottom-half, rather than directly by the interrupt. This appears to be scheduler induced (having discounted adverse effects from having a rbtree walk/erase in the wakeup path), each additional wake_up_process() costs approximately 1us on big core. Another effect of performing the secondary wakeups from the first bottom-half is the incurred delay this imposes on high priority threads - rather than immediately returning to userspace and leaving the interrupt handler to wake the others. To offset the delay incurred with additional waiters on a request, we could use a hybrid scheme that did a quick read in the interrupt handler and dequeued all the completed waiters (incurring the overhead in the interrupt handler, not the best plan either as we then incur GPU submission latency) but we would still have to wake up the bottom-half every time to do the heavyweight slow read. Or we could only kick the waiters on the seqno with the same priority as the current task (i.e. in the realtime waiter scenario, only it is woken up immediately by the interrupt and simply queues the next waiter before returning to userspace, minimising its delay at the expense of the chain, and also reducing contention on its scheduler runqueue). This is effective at avoid long pauses in the interrupt handler and at avoiding the extra latency in realtime/high-priority waiters. v2: Convert from a kworker per engine into a dedicated kthread for the bottom-half. v3: Rename request members and tweak comments. v4: Use a per-engine spinlock in the breadcrumbs bottom-half. v5: Fix race in locklessly checking waiter status and kicking the task on adding a new waiter. v6: Fix deciding when to force the timer to hide missing interrupts. v7: Move the bottom-half from the kthread to the first client process. v8: Reword a few comments v9: Break the busy loop when the interrupt is unmasked or has fired. v10: Comments, unnecessary churn, better debugging from Tvrtko v11: Wake all completed waiters on removing the current bottom-half to reduce the latency of waking up a herd of clients all waiting on the same request. v12: Rearrange missed-interrupt fault injection so that it works with igt/drv_missed_irq_hang v13: Rename intel_breadcrumb and friends to intel_wait in preparation for signal handling. v14: RCU commentary, assert_spin_locked v15: Hide BUG_ON behind the compiler; report on gem_latency findings. v16: Sort seqno-groups by priority so that first-waiter has the highest task priority (and so avoid priority inversion). v17: Add waiters to post-mortem GPU hang state. v18: Return early for a completed wait after acquiring the spinlock. Avoids adding ourselves to the tree if the is already complete, and skips the awkward question of why we don't do completion wakeups for waits earlier than or equal to ourselves. v19: Prepare for init_breadcrumbs to fail. Later patches may want to allocate during init, so be prepared to propagate back the error code. Testcase: igt/gem_concurrent_blit Testcase: igt/benchmarks/gem_latency Signed-off-by: Chris Wilson Cc: "Rogozhkin, Dmitry V" Cc: "Gong, Zhi
[Intel-gfx] [PATCH 07/20] drm/i915: Spin after waking up for an interrupt
When waiting for an interrupt (waiting for the engine to complete some work), we know we are the only waiter to be woken on this engine. We also know when the GPU has nearly completed our request (or at least started processing it), so after being woken and we detect that the GPU is active and working on our request, allow us the bottom-half (the first waiter who wakes up to handle checking the seqno after the interrupt) to spin for a very short while to reduce client latencies. The impact is minimal, there was an improvement to the realtime-vs-many clients case, but exporting the function proves useful later. However, it is tempting to adjust irq_seqno_barrier to include the spin. The problem is first ensuring that the "start-of-request" seqno is coherent as we use that as our basis for judging when it is ok to spin. If we could, spinning there could dramatically shorten some sleeps, and allow us to make the barriers more conservative to handle missed seqno writes on more platforms (all gen7+ are known to have the occasional issue, at least). Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 26 +++ drivers/gpu/drm/i915/i915_gem.c | 40 +--- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 4 ++-- 5 files changed, 45 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 8601a1cbc337..33e5540e7229 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -663,7 +663,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) i915_gem_request_get_seqno(work->flip_queued_req), dev_priv->next_seqno, engine->get_seqno(engine), - i915_gem_request_completed(work->flip_queued_req, true)); + i915_gem_request_completed(work->flip_queued_req)); } else seq_printf(m, "Flip not associated with any ring\n"); seq_printf(m, "Flip queued on frame %d, (was ready on frame %d), now %d\n", diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1fefa8c495f2..0ea69c5ecc8b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3287,24 +3287,27 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2) return (int32_t)(seq1 - seq2) >= 0; } -static inline bool i915_gem_request_started(struct drm_i915_gem_request *req, - bool lazy_coherency) +static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req) { - if (!lazy_coherency && req->engine->irq_seqno_barrier) - req->engine->irq_seqno_barrier(req->engine); return i915_seqno_passed(req->engine->get_seqno(req->engine), req->previous_seqno); } -static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, - bool lazy_coherency) +static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req) { - if (!lazy_coherency && req->engine->irq_seqno_barrier) - req->engine->irq_seqno_barrier(req->engine); return i915_seqno_passed(req->engine->get_seqno(req->engine), req->seqno); } +bool __i915_spin_request(const struct drm_i915_gem_request *request, +int state, unsigned long timeout_us); +static inline bool i915_spin_request(const struct drm_i915_gem_request *request, +int state, unsigned long timeout_us) +{ + return (i915_gem_request_started(request) && + __i915_spin_request(request, state, timeout_us)); +} + int __must_check i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno); int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno); @@ -3983,6 +3986,8 @@ static inline void i915_trace_irq_get(struct intel_engine_cs *engine, static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) { + struct intel_engine_cs *engine = req->engine; + /* Ensure our read of the seqno is coherent so that we * do not "miss an interrupt" (i.e. if this is the last * request and the seqno write from the GPU is not visible @@ -3994,7 +3999,10 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) * but it is easier and safer to do it every time the waiter * is woken. */ - if (i915_gem_request_completed(req, false)) + if (engine->irq_seqno_barri
[Intel-gfx] [PATCH 14/20] drm/i915: Only apply one barrier after a breadcrumb interrupt is posted
If we flag the seqno as potentially stale upon receiving an interrupt, we can use that information to reduce the frequency that we apply the heavyweight coherent seqno read (i.e. if we wake up a chain of waiters). v2: Use cmpxchg to replace READ_ONCE/WRITE_ONCE for more explicit control of the ordering wrt to interrupt generation and interrupt checking in the bottom-half. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h | 15 ++- drivers/gpu/drm/i915/i915_irq.c | 1 + drivers/gpu/drm/i915/intel_breadcrumbs.c | 16 ++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ee04bd40a41a..21181a6ec0b0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -4005,7 +4005,20 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) * but it is easier and safer to do it every time the waiter * is woken. */ - if (engine->irq_seqno_barrier) { + if (engine->irq_seqno_barrier && + cmpxchg_relaxed(&engine->irq_posted, 1, 0)) { + /* The ordering of irq_posted versus applying the barrier +* is crucial. The clearing of the current irq_posted must +* be visible before we perform the barrier operation, +* such that if a subsequent interrupt arrives, irq_posted +* is reasserted and our task rewoken (which causes us to +* do another __i915_request_irq_complete() immediately +* and reapply the barrier). Conversely, if the clear +* occurs after the barrier, then an interrupt that arrived +* whilst we waited on the barrier would not trigger a +* barrier on the next pass, and the read may not see the +* seqno update. +*/ engine->irq_seqno_barrier(engine); if (i915_gem_request_completed(req)) return true; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index be7f0b9b27e0..7724bae27bcf 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -976,6 +976,7 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) static void notify_ring(struct intel_engine_cs *engine) { + smp_store_mb(engine->irq_posted, true); if (intel_engine_wakeup(engine)) { trace_i915_gem_request_notify(engine); engine->user_interrupts++; diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index f7fa99a00da8..31d3c06912dc 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -43,12 +43,18 @@ static void intel_breadcrumbs_fake_irq(unsigned long data) static void irq_enable(struct intel_engine_cs *engine) { + /* Enabling the IRQ may miss the generation of the interrupt, but +* we still need to force the barrier before reading the seqno, +* just in case. +*/ + engine->irq_posted = true; WARN_ON(!engine->irq_get(engine)); } static void irq_disable(struct intel_engine_cs *engine) { engine->irq_put(engine); + engine->irq_posted = false; } static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) @@ -56,7 +62,6 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) struct intel_engine_cs *engine = container_of(b, struct intel_engine_cs, breadcrumbs); struct drm_i915_private *i915 = engine->i915; - bool irq_posted = false; assert_spin_locked(&b->lock); if (b->rpm_wakelock) @@ -72,10 +77,8 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) /* No interrupts? Kick the waiter every jiffie! */ if (intel_irqs_enabled(i915)) { - if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) { + if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) irq_enable(engine); - irq_posted = true; - } b->irq_enabled = true; } @@ -83,7 +86,7 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) mod_timer(&b->fake_irq, jiffies + 1); - return irq_posted; + return engine->irq_posted; } static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b) @@ -205,7 +208,8 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, * in case the seqno passed. */ __intel_brea
[Intel-gfx] [PATCH 13/20] drm/i915: Check the CPU cached value in HWS of seqno after waking the waiter
If we have multiple waiters, we may find that many complete on the same wake up. If we first inspect the seqno from the CPU cache, we may reduce the number of heavyweight coherent seqno reads we require. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h | 14 ++ 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5a1e8e056ee5..ee04bd40a41a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3988,6 +3988,12 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; + /* Before we do the heavier coherent read of the seqno, +* check the value (hopefully) in the CPU cacheline. +*/ + if (i915_gem_request_completed(req)) + return true; + /* Ensure our read of the seqno is coherent so that we * do not "miss an interrupt" (i.e. if this is the last * request and the seqno write from the GPU is not visible @@ -3999,11 +4005,11 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) * but it is easier and safer to do it every time the waiter * is woken. */ - if (engine->irq_seqno_barrier) + if (engine->irq_seqno_barrier) { engine->irq_seqno_barrier(engine); - - if (i915_gem_request_completed(req)) - return true; + if (i915_gem_request_completed(req)) + return true; + } /* We need to check whether any gpu reset happened in between * the request being submitted and now. If a reset has occurred, -- 2.8.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 10/20] drm/i915: Allocate scratch page from stolen
With the last direct CPU access to the scratch page removed, we can now allocate it from our small amount of reserved system pages (stolen memory). Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 7ccfb1e57d12..cc65c269b82a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -665,7 +665,9 @@ int intel_init_pipe_control(struct intel_engine_cs *engine) WARN_ON(engine->scratch.obj); - obj = i915_gem_object_create(engine->i915->dev, 4096); + obj = i915_gem_object_create_stolen(engine->i915->dev, 4096); + if (obj == NULL) + obj = i915_gem_object_create(engine->i915->dev, 4096); if (IS_ERR(obj)) { DRM_ERROR("Failed to allocate scratch page\n"); ret = PTR_ERR(obj); -- 2.8.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 04/20] drm/i915: Make queueing the hangcheck work inline
Since the function is a small wrapper around schedule_delayed_work(), move it inline to remove the function call overhead for the principle caller. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h | 18 +- drivers/gpu/drm/i915/i915_irq.c | 17 - 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f4aa727e522a..4948c90c9bd4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2932,7 +2932,23 @@ void intel_hpd_cancel_work(struct drm_i915_private *dev_priv); bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port); /* i915_irq.c */ -void i915_queue_hangcheck(struct drm_i915_private *dev_priv); +static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv) +{ + unsigned long delay; + + if (unlikely(!i915.enable_hangcheck)) + return; + + /* Don't continually defer the hangcheck so that it is always run at +* least once after work has been scheduled on any ring. Otherwise, +* we will ignore a hung ring if a second ring is kept busy. +*/ + + delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES); + queue_delayed_work(system_long_wq, + &dev_priv->gpu_error.hangcheck_work, delay); +} + __printf(3, 4) void i915_handle_error(struct drm_i915_private *dev_priv, u32 engine_mask, diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 3ad4ef9250d8..83f40baeb1f3 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3247,23 +3247,6 @@ out: ENABLE_RPM_WAKEREF_ASSERTS(dev_priv); } -void i915_queue_hangcheck(struct drm_i915_private *dev_priv) -{ - unsigned long delay; - - if (!i915.enable_hangcheck) - return; - - /* Don't continually defer the hangcheck so that it is always run at -* least once after work has been scheduled on any ring. Otherwise, -* we will ignore a hung ring if a second ring is kept busy. -*/ - - delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES); - queue_delayed_work(system_long_wq, - &dev_priv->gpu_error.hangcheck_work, delay); -} - static void ibx_irq_reset(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; -- 2.8.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 11/20] drm/i915: Refactor scratch object allocation for gen2 w/a buffer
The gen2 w/a buffer is stuffed into the same slot as the gen5+ scratch buffer. If we pass in the size we want to allocate for the scratch buffer, both callers can use the same routine. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_lrc.c| 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 32 drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 3 files changed, 10 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c8411f8bb4ac..f1a01137334c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2056,7 +2056,7 @@ static int logical_render_ring_init(struct intel_engine_cs *engine) engine->emit_flush = gen8_emit_flush_render; engine->emit_request = gen8_emit_request_render; - ret = intel_init_pipe_control(engine); + ret = intel_init_pipe_control(engine, 4096); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index cc65c269b82a..f89b1797b465 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -658,16 +658,16 @@ void intel_fini_pipe_control(struct intel_engine_cs *engine) engine->scratch.obj = NULL; } -int intel_init_pipe_control(struct intel_engine_cs *engine) +int intel_init_pipe_control(struct intel_engine_cs *engine, int size) { struct drm_i915_gem_object *obj; int ret; WARN_ON(engine->scratch.obj); - obj = i915_gem_object_create_stolen(engine->i915->dev, 4096); + obj = i915_gem_object_create_stolen(engine->i915->dev, size); if (obj == NULL) - obj = i915_gem_object_create(engine->i915->dev, 4096); + obj = i915_gem_object_create(engine->i915->dev, size); if (IS_ERR(obj)) { DRM_ERROR("Failed to allocate scratch page\n"); ret = PTR_ERR(obj); @@ -3002,7 +3002,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *engine = &dev_priv->engine[RCS]; - struct drm_i915_gem_object *obj; int ret; engine->name = "render ring"; @@ -3045,31 +3044,16 @@ int intel_init_render_ring_buffer(struct drm_device *dev) engine->init_hw = init_render_ring; engine->cleanup = render_ring_cleanup; - /* Workaround batchbuffer to combat CS tlb bug. */ - if (HAS_BROKEN_CS_TLB(dev_priv)) { - obj = i915_gem_object_create(dev, I830_WA_SIZE); - if (IS_ERR(obj)) { - DRM_ERROR("Failed to allocate batch bo\n"); - return PTR_ERR(obj); - } - - ret = i915_gem_obj_ggtt_pin(obj, 0, 0); - if (ret != 0) { - drm_gem_object_unreference(&obj->base); - DRM_ERROR("Failed to ping batch bo\n"); - return ret; - } - - engine->scratch.obj = obj; - engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj); - } - ret = intel_init_ring_buffer(dev, engine); if (ret) return ret; if (INTEL_GEN(dev_priv) >= 5) { - ret = intel_init_pipe_control(engine); + ret = intel_init_pipe_control(engine, 4096); + if (ret) + return ret; + } else if (HAS_BROKEN_CS_TLB(dev_priv)) { + ret = intel_init_pipe_control(engine, I830_WA_SIZE); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 4a3152993415..e7495a2d6367 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -471,8 +471,8 @@ void intel_ring_init_seqno(struct intel_engine_cs *engine, u32 seqno); int intel_ring_flush_all_caches(struct drm_i915_gem_request *req); int intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req); +int intel_init_pipe_control(struct intel_engine_cs *engine, int size); void intel_fini_pipe_control(struct intel_engine_cs *engine); -int intel_init_pipe_control(struct intel_engine_cs *engine); int intel_init_render_ring_buffer(struct drm_device *dev); int intel_init_bsd_ring_buffer(struct drm_device *dev); -- 2.8.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 12/20] drm/i915: Add a delay between interrupt and inspecting the final seqno (ilk)
On Ironlake, there is no command nor register to ensure that the write from a MI_STORE command is completed (and coherent on the CPU) before the command parser continues. This means that the ordering between the seqno write and the subsequent user interrupt is undefined (like gen6+). So to ensure that the seqno write is completed after the final user interrupt we need to delay the read sufficiently to allow the write to complete. This delay is undefined by the bspec, and empirically requires 75us even though a register read combined with a clflush is less than 500ns. Hence, the delay is due to an on-chip buffer rather than the latency of the write to memory. Note that the render ring controls this by filling the PIPE_CONTROL fifo with stalling commands that force the earliest pipe-control with the seqno to be completed before the command parser continues. Given that we need a barrier operation for BSD, we may as well forgo the extra per-batch latency by using a common per-interrupt barrier. Studying the impact of adding the usleep shows that in both sequences of and individual synchronous no-op batches is negligible for the media engine (where the write now is unordered with the interrupt). Converting the render engine over from the current glutton of pie-controls over to the per-interrupt delays speeds up both the sequential and individual synchronous no-ops by 20% and 60%, respectively. This speed up holds even when looking at the throughput of small copies (4KiB->4MiB), both serial and synchronous, by about 20%. This is because despite adding a significant delay to the interrupt, in all likelihood we will see the seqno write without having to apply the barrier (only in the rare corner cases where the write is delayed on the last required is the delay necessary). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94307 Testcase: igt/gem_sync #ilk Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_irq.c | 10 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 86 - 2 files changed, 23 insertions(+), 73 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7c379afcff2f..be7f0b9b27e0 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1264,8 +1264,7 @@ static void ivybridge_parity_error_irq_handler(struct drm_i915_private *dev_priv static void ilk_gt_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) { - if (gt_iir & - (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT)) + if (gt_iir & GT_RENDER_USER_INTERRUPT) notify_ring(&dev_priv->engine[RCS]); if (gt_iir & ILK_BSD_USER_INTERRUPT) notify_ring(&dev_priv->engine[VCS]); @@ -1274,9 +1273,7 @@ static void ilk_gt_irq_handler(struct drm_i915_private *dev_priv, static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) { - - if (gt_iir & - (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT)) + if (gt_iir & GT_RENDER_USER_INTERRUPT) notify_ring(&dev_priv->engine[RCS]); if (gt_iir & GT_BSD_USER_INTERRUPT) notify_ring(&dev_priv->engine[VCS]); @@ -3601,8 +3598,7 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev) gt_irqs |= GT_RENDER_USER_INTERRUPT; if (IS_GEN5(dev)) { - gt_irqs |= GT_RENDER_PIPECTL_NOTIFY_INTERRUPT | - ILK_BSD_USER_INTERRUPT; + gt_irqs |= ILK_BSD_USER_INTERRUPT; } else { gt_irqs |= GT_BLT_USER_INTERRUPT | GT_BSD_USER_INTERRUPT; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index f89b1797b465..d919e72f1328 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1593,67 +1593,22 @@ gen6_ring_sync(struct drm_i915_gem_request *waiter_req, return 0; } -#define PIPE_CONTROL_FLUSH(ring__, addr__) \ -do { \ - intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |\ -PIPE_CONTROL_DEPTH_STALL); \ - intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \ - intel_ring_emit(ring__, 0); \ - intel_ring_emit(ring__, 0); \ -} while (0) - -static int -pc_render_add_request(struct drm_i915_gem_request *req) +static void +gen5_seqno_barrier(struct intel_engine_cs *ring) { - struct intel_engine_cs *engine = req->engine; - u32 addr = engine->status_page.gfx_addr + - (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); -
[Intel-gfx] [PATCH 18/20] drm/i915: Move the get/put irq locking into the caller
With only a single callsite for intel_engine_cs->irq_get and ->irq_put, we can reduce the code size by moving the common preamble into the caller, and we can also eliminate the reference counting. For completeness, as we are no longer doing reference counting on irq, rename the get/put vfunctions to enable/disable respectively and are able to review the use of posting reads. We only require the serialisation with hardware when enabling the interrupt (i.e. so we cannot miss an interrupt by going to sleep before the hardware truly enables it). Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_irq.c | 8 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 10 +- drivers/gpu/drm/i915/intel_lrc.c | 34 ++--- drivers/gpu/drm/i915/intel_ringbuffer.c | 237 +-- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 +- 5 files changed, 92 insertions(+), 202 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7724bae27bcf..be25b7bdacfe 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -259,12 +259,12 @@ static void ilk_update_gt_irq(struct drm_i915_private *dev_priv, dev_priv->gt_irq_mask &= ~interrupt_mask; dev_priv->gt_irq_mask |= (~enabled_irq_mask & interrupt_mask); I915_WRITE(GTIMR, dev_priv->gt_irq_mask); - POSTING_READ(GTIMR); } void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask) { ilk_update_gt_irq(dev_priv, mask, mask); + POSTING_READ_FW(GTIMR); } void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask) @@ -2819,9 +2819,9 @@ ring_idle(struct intel_engine_cs *engine, u32 seqno) } static bool -ipehr_is_semaphore_wait(struct drm_i915_private *dev_priv, u32 ipehr) +ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr) { - if (INTEL_GEN(dev_priv) >= 8) { + if (INTEL_GEN(engine->i915) >= 8) { return (ipehr >> 23) == 0x1c; } else { ipehr &= ~MI_SEMAPHORE_SYNC_MASK; @@ -2892,7 +2892,7 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) return NULL; ipehr = I915_READ(RING_IPEHR(engine->mmio_base)); - if (!ipehr_is_semaphore_wait(engine->i915, ipehr)) + if (!ipehr_is_semaphore_wait(engine, ipehr)) return NULL; /* diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 3b8313b87ce4..28bc72b601b8 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -50,12 +50,18 @@ static void irq_enable(struct intel_engine_cs *engine) * just in case. */ engine->irq_posted = true; - WARN_ON(!engine->irq_get(engine)); + + spin_lock_irq(&engine->i915->irq_lock); + engine->irq_enable(engine); + spin_unlock_irq(&engine->i915->irq_lock); } static void irq_disable(struct intel_engine_cs *engine) { - engine->irq_put(engine); + spin_lock_irq(&engine->i915->irq_lock); + engine->irq_disable(engine); + spin_unlock_irq(&engine->i915->irq_lock); + engine->irq_posted = false; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index f1a01137334c..380175149916 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1617,36 +1617,18 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, return 0; } -static bool gen8_logical_ring_get_irq(struct intel_engine_cs *engine) +static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - unsigned long flags; - - if (WARN_ON(!intel_irqs_enabled(dev_priv))) - return false; - - spin_lock_irqsave(&dev_priv->irq_lock, flags); - if (engine->irq_refcount++ == 0) { - I915_WRITE_IMR(engine, - ~(engine->irq_enable_mask | engine->irq_keep_mask)); - POSTING_READ(RING_IMR(engine->mmio_base)); - } - spin_unlock_irqrestore(&dev_priv->irq_lock, flags); - - return true; + I915_WRITE_IMR(engine, + ~(engine->irq_enable_mask | engine->irq_keep_mask)); + POSTING_READ_FW(RING_IMR(engine->mmio_base)); } -static void gen8_logical_ring_put_irq(struct intel_engine_cs *engine) +static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - unsigned long flags; - - spin_lock_irqsave(&dev_priv->irq_lock, flags); - if (--engine->irq_refcount == 0) { - I915_WRITE_IMR(engine, ~engine->irq_keep_mask); - POSTING_READ(RING_IMR(engine->mmio_base)); - } - spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + I915_WRITE_IMR(engine, ~engine->irq_keep_m
[Intel-gfx] [PATCH 17/20] drm/i915: Embed signaling node into the GEM request
Under the assumption that enabling signaling will be a frequent operation, lets preallocate our attachments for signaling inside the (rather large) request struct (and so benefiting from the slab cache). v2: Convert from void * to more meaningful names and types. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_breadcrumbs.c | 76 drivers/gpu/drm/i915/intel_ringbuffer.h | 10 - 3 files changed, 46 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ed4116f9d793..0dcc43d2994b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2380,6 +2380,7 @@ struct drm_i915_gem_request { /** On Which ring this request was generated */ struct drm_i915_private *i915; struct intel_engine_cs *engine; + struct intel_signal_node signaling; /** GEM sequence number associated with the previous request, * when the HWS breadcrumb is equal to this the GPU is processing diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 226c3d51c045..3b8313b87ce4 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -354,35 +354,29 @@ out_unlock: spin_unlock(&b->lock); } -struct signal { - struct rb_node node; - struct intel_wait wait; - struct drm_i915_gem_request *request; -}; - -static bool signal_complete(struct signal *signal) +static bool signal_complete(struct drm_i915_gem_request *request) { - if (signal == NULL) + if (request == NULL) return false; /* If another process served as the bottom-half it may have already * signalled that this wait is already completed. */ - if (intel_wait_complete(&signal->wait)) + if (intel_wait_complete(&request->signaling.wait)) return true; /* Carefully check if the request is complete, giving time for the * seqno to be visible or if the GPU hung. */ - if (__i915_request_irq_complete(signal->request)) + if (__i915_request_irq_complete(request)) return true; return false; } -static struct signal *to_signal(struct rb_node *rb) +static struct drm_i915_gem_request *to_signaler(struct rb_node *rb) { - return container_of(rb, struct signal, node); + return container_of(rb, struct drm_i915_gem_request, signaling.node); } static void signaler_set_rtpriority(void) @@ -396,7 +390,7 @@ static int intel_breadcrumbs_signaler(void *arg) { struct intel_engine_cs *engine = arg; struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct signal *signal; + struct drm_i915_gem_request *request; /* Install ourselves with high priority to reduce signalling latency */ signaler_set_rtpriority(); @@ -412,14 +406,13 @@ static int intel_breadcrumbs_signaler(void *arg) * need to wait for a new interrupt from the GPU or for * a new client. */ - signal = READ_ONCE(b->first_signal); - if (signal_complete(signal)) { + request = READ_ONCE(b->first_signal); + if (signal_complete(request)) { /* Wake up all other completed waiters and select the * next bottom-half for the next user interrupt. */ - intel_engine_remove_wait(engine, &signal->wait); - - i915_gem_request_unreference(signal->request); + intel_engine_remove_wait(engine, +&request->signaling.wait); /* Find the next oldest signal. Note that as we have * not been holding the lock, another client may @@ -428,12 +421,15 @@ static int intel_breadcrumbs_signaler(void *arg) * the oldest before picking the next one. */ spin_lock(&b->lock); - if (signal == b->first_signal) - b->first_signal = rb_next(&signal->node); - rb_erase(&signal->node, &b->signals); + if (request == b->first_signal) { + struct rb_node *rb = + rb_next(&request->signaling.node); + b->first_signal = rb ? to_signaler(rb) : NULL; + } + rb_erase(&request->signaling.node, &b->signals); spin_unlock(&b->lock); - kfree(signal); + i915_gem_request_unreference(request); } else {
[Intel-gfx] [PATCH 20/20] drm/i915: Remove debug noise on detecting fault-injection of missed interrupts
Since the tests can and do explicitly check debugfs/i915_ring_missed_irqs for the handling of a "missed interrupt", adding it to the dmesg at INFO is just noise. When it happens for real, we still class it as an ERROR. Note that I have chose to remove it entirely because when we detect the "missed interrupt" is irrelevant and the message contains no more information than we glean from looking in debugfs. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_irq.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index be25b7bdacfe..53d4c8088f28 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3072,9 +3072,6 @@ static unsigned kick_waiters(struct intel_engine_cs *engine) if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) DRM_ERROR("Hangcheck timer elapsed... %s idle\n", engine->name); - else - DRM_INFO("Fake missed irq on %s\n", -engine->name); intel_engine_enable_fake_irq(engine); } -- 2.8.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 19/20] drm/i915: Simplify enabling user-interrupts with L3-remapping
Borrow the idea from intel_lrc.c to precompute the mask of interrupts we wish to always enable to avoid having lots of conditionals inside the interrupt enabling. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_ringbuffer.c | 34 +++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 4 ++-- 2 files changed, 13 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index eb62f3012aa6..c626cc160b14 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1313,8 +1313,7 @@ static int init_render_ring(struct intel_engine_cs *engine) if (IS_GEN(dev_priv, 6, 7)) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - if (HAS_L3_DPF(dev_priv)) - I915_WRITE_IMR(engine, ~GT_PARITY_ERROR(dev_priv)); + I915_WRITE_IMR(engine, ~engine->irq_keep_mask); return init_workarounds_ring(engine); } @@ -1729,12 +1728,9 @@ gen6_irq_enable(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - if (HAS_L3_DPF(dev_priv) && engine->id == RCS) - I915_WRITE_IMR(engine, - ~(engine->irq_enable_mask | -GT_PARITY_ERROR(dev_priv))); - else - I915_WRITE_IMR(engine, ~engine->irq_enable_mask); + I915_WRITE_IMR(engine, + ~(engine->irq_enable_mask | +engine->irq_keep_mask)); gen5_enable_gt_irq(dev_priv, engine->irq_enable_mask); } @@ -1743,10 +1739,7 @@ gen6_irq_disable(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - if (HAS_L3_DPF(dev_priv) && engine->id == RCS) - I915_WRITE_IMR(engine, ~GT_PARITY_ERROR(dev_priv)); - else - I915_WRITE_IMR(engine, ~0); + I915_WRITE_IMR(engine, ~engine->irq_keep_mask); gen5_disable_gt_irq(dev_priv, engine->irq_enable_mask); } @@ -1773,12 +1766,9 @@ gen8_irq_enable(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - if (HAS_L3_DPF(dev_priv) && engine->id == RCS) - I915_WRITE_IMR(engine, - ~(engine->irq_enable_mask | -GT_RENDER_L3_PARITY_ERROR_INTERRUPT)); - else - I915_WRITE_IMR(engine, ~engine->irq_enable_mask); + I915_WRITE_IMR(engine, + ~(engine->irq_enable_mask | +engine->irq_keep_mask)); POSTING_READ_FW(RING_IMR(engine->mmio_base)); } @@ -1787,11 +1777,7 @@ gen8_irq_disable(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - if (HAS_L3_DPF(dev_priv) && engine->id == RCS) - I915_WRITE_IMR(engine, - ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT); - else - I915_WRITE_IMR(engine, ~0); + I915_WRITE_IMR(engine, ~engine->irq_keep_mask); } static int @@ -2872,6 +2858,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev) intel_ring_default_vfuncs(dev_priv, engine); engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; + if (HAS_L3_DPF(dev_priv)) + engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT; if (INTEL_GEN(dev_priv) >= 8) { engine->init_context = intel_rcs_ctx_init; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 8eb96408a3e2..ab0fe208ac0a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -190,7 +190,8 @@ struct intel_engine_cs { struct i915_ctx_workarounds wa_ctx; boolirq_posted; - u32 irq_enable_mask;/* bitmask to enable ring interrupt */ + u32 irq_keep_mask; /* always keep these interrupts */ + u32 irq_enable_mask; /* bitmask to enable ring interrupt */ void(*irq_enable)(struct intel_engine_cs *ring); void(*irq_disable)(struct intel_engine_cs *ring); @@ -287,7 +288,6 @@ struct intel_engine_cs { unsigned int idle_lite_restore_wa; bool disable_lite_restore_wa; u32 ctx_desc_template; - u32 irq_keep_mask; /* bitmask for interrupts that should not be masked */ int (*emit_request)(struct drm_i915_gem_request *request); int (*emit_flush)(struct drm_i915_gem_request *request, u32 invalidate_domains, -- 2.8.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 15/20] drm/i915: Stop setting wraparound seqno on initialisation
We have testcases to ensure that seqno wraparound works fine, so we can forgo forcing everyone to encounter seqno wraparound during early uptime. seqno wraparound incurs a full GPU stall so not forcing it will eliminate one jitter from the early system. Using the testcases, we have very deterministic testing which given how difficult it would be to debug an issue (GPU hang) stemming from a wraparound using pure postmortem analysis I see no value in forcing a wrap during boot. Advancing the global next_seqno after a GPU reset is equally pointless. References? https://bugs.freedesktop.org/show_bug.cgi?id=95023 Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 14 -- 1 file changed, 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f30d2f90d00b..b3a682badd8d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5113,12 +5113,6 @@ i915_gem_init_hw(struct drm_device *dev) if (ret) goto out; - /* -* Increment the next seqno by 0x100 so we have a visible break -* on re-initialisation -*/ - ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100); - out: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); return ret; @@ -5261,14 +5255,6 @@ i915_gem_load_init(struct drm_device *dev) dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; - /* -* Set initial sequence number for requests. -* Using this number allows the wraparound to happen early, -* catching any obvious problems. -*/ - dev_priv->next_seqno = ((u32)~0 - 0x1100); - dev_priv->last_seqno = ((u32)~0 - 0x1101); - INIT_LIST_HEAD(&dev_priv->mm.fence_list); init_waitqueue_head(&dev_priv->pending_flip_queue); -- 2.8.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 16/20] drm/i915: Convert trace-irq to the breadcrumb waiter
If we convert the tracing over from direct use of ring->irq_get() and over to the breadcrumb infrastructure, we only have a single user of the ring->irq_get and so we will be able to simplify the driver routines (eliminating the redundant validation and irq refcounting). Process context is preferred over softirq (or even hardirq) for a couple of reasons: - we already utilize process context to have fast wakeup of a single client (i.e. the client waiting for the GPU inspects the seqno for itself following an interrupt to avoid the overhead of a context switch before it returns to userspace) - engine->irq_seqno() is not suitable for use from an softirq/hardirq context as we may require long waits (100-250us) to ensure the seqno write is posted before we read it from the CPU A signaling framework is a requirement for enabling dma-fences. v2: Move to a signaling framework based upon the waiter. v3: Track the first-signal to avoid having to walk the rbtree everytime. v4: Mark the signaler thread as RT priority to reduce latency in the indirect wakeups. v5: Make failure to allocate the thread fatal. v6: Rename kthreads to i915/signal:%u Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h | 8 -- drivers/gpu/drm/i915/i915_gem.c | 9 +- drivers/gpu/drm/i915/i915_trace.h| 2 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 193 ++- drivers/gpu/drm/i915/intel_ringbuffer.h | 8 +- 5 files changed, 202 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 21181a6ec0b0..ed4116f9d793 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3976,14 +3976,6 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) schedule_timeout_uninterruptible(remaining_jiffies); } } - -static inline void i915_trace_irq_get(struct intel_engine_cs *engine, - struct drm_i915_gem_request *req) -{ - if (engine->trace_irq_req == NULL && engine->irq_get(engine)) - i915_gem_request_assign(&engine->trace_irq_req, req); -} - static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b3a682badd8d..33ead594c6db 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2749,7 +2749,8 @@ i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno) /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) { - while (intel_kick_waiters(dev_priv)) + while (intel_kick_waiters(dev_priv) || + intel_kick_signalers(dev_priv)) yield(); } @@ -3213,12 +3214,6 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine) i915_gem_object_retire__read(obj, engine->id); } - if (unlikely(engine->trace_irq_req && -i915_gem_request_completed(engine->trace_irq_req))) { - engine->irq_put(engine); - i915_gem_request_assign(&engine->trace_irq_req, NULL); - } - WARN_ON(i915_verify_lists(engine->dev)); } diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 3d13fde95fdf..f59cf07184ae 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -490,7 +490,7 @@ TRACE_EVENT(i915_gem_ring_dispatch, __entry->ring = req->engine->id; __entry->seqno = req->seqno; __entry->flags = flags; - i915_trace_irq_get(req->engine, req); + intel_engine_enable_signaling(req); ), TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x", diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 31d3c06912dc..226c3d51c045 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -22,6 +22,8 @@ * */ +#include + #include "i915_drv.h" static void intel_breadcrumbs_fake_irq(unsigned long data) @@ -255,6 +257,15 @@ static inline bool chain_wakeup(struct rb_node *rb, int priority) return rb && to_wait(rb)->tsk->prio <= priority; } +static inline int wakeup_priority(struct intel_breadcrumbs *b, + struct task_struct *tsk) +{ + if (tsk == b->signaler) + return INT_MIN; + else + return tsk->prio; +} + void intel_engine_remove_wait(struct intel_engine_cs *engine, struct intel
[Intel-gfx] [PATCH 09/20] drm/i915: Stop mapping the scratch page into CPU space
After the elimination of using the scratch page for Ironlake's breadcrumb, we no longer need to kmap the object. We therefore can move it into the high unmappable space and do not need to force the object to be coherent (i.e. snooped on !llc platforms). Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_ringbuffer.c | 40 + drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - 2 files changed, 11 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 02104fbf9045..7ccfb1e57d12 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -648,58 +648,40 @@ out: return ret; } -void -intel_fini_pipe_control(struct intel_engine_cs *engine) +void intel_fini_pipe_control(struct intel_engine_cs *engine) { if (engine->scratch.obj == NULL) return; - if (INTEL_GEN(engine->i915) >= 5) { - kunmap(sg_page(engine->scratch.obj->pages->sgl)); - i915_gem_object_ggtt_unpin(engine->scratch.obj); - } - + i915_gem_object_ggtt_unpin(engine->scratch.obj); drm_gem_object_unreference(&engine->scratch.obj->base); engine->scratch.obj = NULL; } -int -intel_init_pipe_control(struct intel_engine_cs *engine) +int intel_init_pipe_control(struct intel_engine_cs *engine) { + struct drm_i915_gem_object *obj; int ret; WARN_ON(engine->scratch.obj); - engine->scratch.obj = i915_gem_object_create(engine->i915->dev, 4096); - if (IS_ERR(engine->scratch.obj)) { - DRM_ERROR("Failed to allocate seqno page\n"); - ret = PTR_ERR(engine->scratch.obj); - engine->scratch.obj = NULL; + obj = i915_gem_object_create(engine->i915->dev, 4096); + if (IS_ERR(obj)) { + DRM_ERROR("Failed to allocate scratch page\n"); + ret = PTR_ERR(obj); goto err; } - ret = i915_gem_object_set_cache_level(engine->scratch.obj, - I915_CACHE_LLC); + ret = i915_gem_obj_ggtt_pin(obj, 4096, PIN_HIGH); if (ret) goto err_unref; - ret = i915_gem_obj_ggtt_pin(engine->scratch.obj, 4096, 0); - if (ret) - goto err_unref; - - engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(engine->scratch.obj); - engine->scratch.cpu_page = kmap(sg_page(engine->scratch.obj->pages->sgl)); - if (engine->scratch.cpu_page == NULL) { - ret = -ENOMEM; - goto err_unpin; - } - + engine->scratch.obj = obj; + engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj); DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n", engine->name, engine->scratch.gtt_offset); return 0; -err_unpin: - i915_gem_object_ggtt_unpin(engine->scratch.obj); err_unref: drm_gem_object_unreference(&engine->scratch.obj->base); err: diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a2cb9587ca0d..4a3152993415 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -326,7 +326,6 @@ struct intel_engine_cs { struct { struct drm_i915_gem_object *obj; u32 gtt_offset; - volatile u32 *cpu_page; } scratch; bool needs_cmd_parser; -- 2.8.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 08/20] drm/i915: Use HWS for seqno tracking everywhere
By using the same address for storing the HWS on every platform, we can remove the platform specific vfuncs and reduce the get-seqno routine to a single read of a cached memory location. v2: Fix semaphore_passed() to look at the signaling engine (not the waiter's) Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 6 +-- drivers/gpu/drm/i915/i915_drv.h | 4 +- drivers/gpu/drm/i915/i915_gpu_error.c| 2 +- drivers/gpu/drm/i915/i915_irq.c | 4 +- drivers/gpu/drm/i915/i915_trace.h| 2 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 4 +- drivers/gpu/drm/i915/intel_lrc.c | 26 + drivers/gpu/drm/i915/intel_ringbuffer.c | 65 +--- drivers/gpu/drm/i915/intel_ringbuffer.h | 7 ++-- 9 files changed, 34 insertions(+), 86 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 33e5540e7229..586cd8c65146 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -662,7 +662,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) engine->name, i915_gem_request_get_seqno(work->flip_queued_req), dev_priv->next_seqno, - engine->get_seqno(engine), + intel_engine_get_seqno(engine), i915_gem_request_completed(work->flip_queued_req)); } else seq_printf(m, "Flip not associated with any ring\n"); @@ -792,7 +792,7 @@ static void i915_ring_seqno_info(struct seq_file *m, struct rb_node *rb; seq_printf(m, "Current sequence (%s): %x\n", - engine->name, engine->get_seqno(engine)); + engine->name, intel_engine_get_seqno(engine)); seq_printf(m, "Current user interrupts (%s): %x\n", engine->name, READ_ONCE(engine->user_interrupts)); @@ -1420,7 +1420,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) for_each_engine_id(engine, dev_priv, id) { acthd[id] = intel_ring_get_active_head(engine); - seqno[id] = engine->get_seqno(engine); + seqno[id] = intel_engine_get_seqno(engine); } i915_get_extra_instdone(dev_priv, instdone); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0ea69c5ecc8b..5a1e8e056ee5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3289,13 +3289,13 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2) static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req) { - return i915_seqno_passed(req->engine->get_seqno(req->engine), + return i915_seqno_passed(intel_engine_get_seqno(req->engine), req->previous_seqno); } static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req) { - return i915_seqno_passed(req->engine->get_seqno(req->engine), + return i915_seqno_passed(intel_engine_get_seqno(req->engine), req->seqno); } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index fcc7606fc008..95f8d4d2d6c9 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -984,7 +984,7 @@ static void i915_record_ring_state(struct drm_i915_private *dev_priv, ering->waiting = intel_engine_has_waiter(engine); ering->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); ering->acthd = intel_ring_get_active_head(engine); - ering->seqno = engine->get_seqno(engine); + ering->seqno = intel_engine_get_seqno(engine); ering->last_seqno = engine->last_submitted_seqno; ering->start = I915_READ_START(engine); ering->head = I915_READ_HEAD(engine); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index a11ab00cdee0..7c379afcff2f 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2952,7 +2952,7 @@ static int semaphore_passed(struct intel_engine_cs *engine) if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES) return -1; - if (i915_seqno_passed(signaller->get_seqno(signaller), seqno)) + if (i915_seqno_passed(intel_engine_get_seqno(signaller), seqno)) return 1; /* cursory check for an unkickable deadlock */ @@ -3140,7 +3140,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) engine->irq_seqno_barrier(engine); acthd = intel_ring_get_active_head(engine); - seqno = engine->get_seqno(engine); + seqno = intel_en
[Intel-gfx] ✗ Ro.CI.BAT: failure for series starting with [01/20] drm/i915/shrinker: Flush active on objects before counting
== Series Details == Series: series starting with [01/20] drm/i915/shrinker: Flush active on objects before counting URL : https://patchwork.freedesktop.org/series/9370/ State : failure == Summary == Series 9370v1 Series without cover letter http://patchwork.freedesktop.org/api/1.0/series/9370/revisions/1/mbox Test gem_exec_flush: Subgroup basic-batch-kernel-default-wb: pass -> DMESG-FAIL (ro-bdw-i7-5557U) Test kms_pipe_crc_basic: Subgroup hang-read-crc-pipe-c: pass -> DMESG-WARN (ro-ivb2-i7-3770) Subgroup nonblocking-crc-pipe-b: skip -> PASS (fi-skl-i5-6260u) fi-kbl-qkkr total:229 pass:160 dwarn:25 dfail:2 fail:2 skip:40 fi-skl-i5-6260u total:229 pass:204 dwarn:0 dfail:0 fail:0 skip:25 fi-skl-i7-6700k total:229 pass:190 dwarn:0 dfail:0 fail:0 skip:39 fi-snb-i7-2600 total:229 pass:176 dwarn:0 dfail:0 fail:0 skip:53 ro-bdw-i5-5250u total:229 pass:204 dwarn:2 dfail:1 fail:0 skip:22 ro-bdw-i7-5557U total:229 pass:203 dwarn:1 dfail:2 fail:0 skip:23 ro-bdw-i7-5600u total:229 pass:190 dwarn:0 dfail:1 fail:0 skip:38 ro-bsw-n3050 total:229 pass:176 dwarn:0 dfail:1 fail:3 skip:49 ro-byt-n2820 total:229 pass:180 dwarn:0 dfail:1 fail:3 skip:45 ro-hsw-i7-4770r total:229 pass:197 dwarn:0 dfail:1 fail:0 skip:31 ro-ilk-i7-620lm total:229 pass:157 dwarn:0 dfail:1 fail:1 skip:70 ro-ilk1-i5-650 total:224 pass:157 dwarn:0 dfail:1 fail:1 skip:65 ro-ivb-i7-3770 total:229 pass:188 dwarn:0 dfail:1 fail:0 skip:40 ro-ivb2-i7-3770 total:229 pass:191 dwarn:1 dfail:1 fail:0 skip:36 ro-skl3-i5-6260u total:229 pass:208 dwarn:1 dfail:1 fail:0 skip:19 ro-snb-i7-2620M total:229 pass:179 dwarn:0 dfail:1 fail:1 skip:48 fi-hsw-i7-4770k failed to connect after reboot ro-hsw-i3-4010u failed to connect after reboot Results at /archive/results/CI_IGT_test/RO_Patchwork_1358/ ee2057a drm-intel-nightly: 2016y-07m-01d-08h-52m-58s UTC integration manifest b948c58 drm/i915: Remove debug noise on detecting fault-injection of missed interrupts 4d00695 drm/i915: Simplify enabling user-interrupts with L3-remapping 47348b5 drm/i915: Move the get/put irq locking into the caller 4eaea22 drm/i915: Embed signaling node into the GEM request 09c45dd drm/i915: Convert trace-irq to the breadcrumb waiter f09494b drm/i915: Stop setting wraparound seqno on initialisation 5e71899 drm/i915: Only apply one barrier after a breadcrumb interrupt is posted 85358c6 drm/i915: Check the CPU cached value in HWS of seqno after waking the waiter 0665c9d drm/i915: Add a delay between interrupt and inspecting the final seqno (ilk) 388b9c2 drm/i915: Refactor scratch object allocation for gen2 w/a buffer e92c46c drm/i915: Allocate scratch page from stolen 76c6dfb drm/i915: Stop mapping the scratch page into CPU space dfe8af3 drm/i915: Use HWS for seqno tracking everywhere 1673284 drm/i915: Spin after waking up for an interrupt aecc827 drm/i915: Slaughter the thundering i915_wait_request herd c96435e drm/i915: Separate GPU hang waitqueue from advance 753123b drm/i915: Make queueing the hangcheck work inline 6629854 drm/i915: Remove the dedicated hangcheck workqueue 4aff738 drm/i915: Delay queuing hangcheck to wait-request 5f78aed drm/i915/shrinker: Flush active on objects before counting ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915/bxt: Export pooled eu info to userspace
On Fri, Jul 01, 2016 at 11:43:02AM +0100, Arun Siluvery wrote: > Pooled EU is a bxt only feature and kernel changes are already merged. This > feature is not yet exposed to userspace as the support was not yet > available. Beignet team expressed interest and added patches to use this. > > Since we now have a user and patches to use them, expose them from the > kernel side as well. > > v2: fix compile error > > [1] https://lists.freedesktop.org/archives/beignet/2016-June/007698.html > [2] https://lists.freedesktop.org/archives/beignet/2016-June/007699.html > > Cc: Winiarski, Michal > Cc: Zou, Nanhai > Cc: Yang, Rong R > Cc: Tim Gore > Cc: Jeff McGee > Signed-off-by: Arun Siluvery Acked-by: Chris Wilson -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] ✓ Ro.CI.BAT: success for series starting with [1/2] drm/i915: Fix log type for RC6 debug messages
On ke, 2016-06-29 at 17:06 +, Patchwork wrote: > == Series Details == > > Series: series starting with [1/2] drm/i915: Fix log type for RC6 > debug messages > URL : https://patchwork.freedesktop.org/series/9285/ > State : success Thanks for the review, I pushed the patches to -dinq. > == Summary == > > Series 9285v1 Series without cover letter > http://patchwork.freedesktop.org/api/1.0/series/9285/revisions/1/mbox > > Test kms_pipe_crc_basic: > Subgroup suspend-read-crc-pipe-c: > dmesg-warn -> SKIP (ro-bdw-i5-5250u) > > fi-skl-i5- > 6260u total:229 pass:202 dwarn:0 dfail:0 fail:2 skip:25 > fi-snb-i7- > 2600 total:229 pass:174 dwarn:0 dfail:0 fail:2 skip:53 > ro-bdw-i5- > 5250u total:229 pass:202 dwarn:2 dfail:1 fail:2 skip:22 > ro-bdw-i7- > 5557U total:229 pass:202 dwarn:1 dfail:1 fail:2 skip:23 > ro-bdw-i7- > 5600u total:229 pass:190 dwarn:0 dfail:1 fail:0 skip:38 > ro-byt- > n2820 total:229 pass:178 dwarn:0 dfail:1 fail:5 skip:45 > ro-hsw-i3- > 4010u total:229 pass:195 dwarn:0 dfail:1 fail:2 skip:31 > ro-hsw-i7- > 4770r total:229 pass:195 dwarn:0 dfail:1 fail:2 skip:31 > ro-ilk-i7- > 620lm total:229 pass:155 dwarn:0 dfail:1 fail:3 skip:70 > ro-ilk1-i5- > 650 total:224 pass:155 dwarn:0 dfail:1 fail:3 skip:65 > ro-ivb-i7- > 3770 total:229 pass:186 dwarn:0 dfail:1 fail:2 skip:40 > ro-ivb2-i7- > 3770 total:229 pass:190 dwarn:0 dfail:1 fail:2 skip:36 > ro-skl3-i5-6260u > total:229 pass:206 dwarn:1 dfail:1 fail:2 skip:19 > ro-snb-i7- > 2620M total:229 pass:179 dwarn:0 dfail:1 fail:1 skip:48 > fi-kbl-qkkr failed to connect after reboot > fi-skl-i7-6700k failed to connect after reboot > ro-bsw-n3050 failed to connect after reboot > > Results at /archive/results/CI_IGT_test/RO_Patchwork_1334/ > > 8a6521c drm-intel-nightly: 2016y-06m-29d-16h-08m-16s UTC integration > manifest > 4595c784 drm/i915/bxt: Fix sanity check for BIOS RC6 setup > 52064d0 drm/i915: Fix log type for RC6 debug messages > ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v4 0/2] drm/i915/opregion: proper handling of DIDL and CADL
Found a problem: After screensaver kicked in and display was turned off the brightness keys stop working. Problem can be reproduced like that: 1. Boot laptop 2. Test brightness keys, they are working 3. open Terminal and issue "xset -display :0 dpms force off" 4. the screen goes blank (like after the screensaver timeout) 5. push a key to bring the screen back 6. test brightness keys again, now they don't work If the system is sent to suspend and woken up everything is fine again. Behaviour happens on the 4.7.0-rc5 kernel from the opregion-didl-v4 branch. Before I compiled the 4.7.0-r4 from the same git repository. On this (v3) everything still works after the screen was blanked. Best regards Rainer Am 30.06.2016 um 11:19 schrieb Rainer Koenig: > Am 29.06.2016 um 17:36 schrieb Jani Nikula: >> This is v4 of [1]. The first three have already been pushed to >> drm-intel-next-queued. The only change here is the atomic commit. >> >> Review and testing would be much appreciated to move this forward. For >> testing, I've pushed this to opregion-didl-v4 branch of my repo at [2]. >> > Tested on a Fujitsu LIFEBOOK E736: Brightness keys are working now. > > Best regards > Rainer > -- Dipl.-Inf. (FH) Rainer Koenig Project Manager Linux Clients FJ EMEIA PR PSO PM&D CCD ENG SW OSS&C Fujitsu Technology Solutions Bürgermeister-Ullrich-Str. 100 86199 Augsburg Germany Telephone: +49-821-804-3321 Telefax: +49-821-804-2131 Mail: mailto:rainer.koe...@ts.fujitsu.com Internet ts.fujtsu.com Company Details ts.fujitsu.com/imprint.html ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH i-g-t 2/2] overlay/Makefile.am: Use lib path for i915_pciids.h
This is due to commit d308bb082d429eb25 (lib: Start weaning off defunct intel_chipset.h) which ``moved'' i915_pciids.h to lib/ from overlay/. Signed-off-by: Marius Vlad CC: Chris Wilson --- overlay/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/overlay/Makefile.am b/overlay/Makefile.am index c926557..b71e2c5 100644 --- a/overlay/Makefile.am +++ b/overlay/Makefile.am @@ -8,7 +8,7 @@ AM_CFLAGS = $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(CWARNFLAGS) \ LDADD = $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(OVERLAY_LIBS) intel_gpu_overlay_SOURCES = \ - i915_pciids.h \ + ../lib/i915_pciids.h \ chart.h \ chart.c \ config.c \ -- 2.8.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH i-g-t 1/2] tests/gvt_basic: Test w/o sub-test requires simple_main
Signed-off-by: Marius Vlad CC: Chris Wilson --- tests/gvt_basic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/gvt_basic.c b/tests/gvt_basic.c index 9e17f29..056c472 100644 --- a/tests/gvt_basic.c +++ b/tests/gvt_basic.c @@ -26,7 +26,7 @@ IGT_TEST_DESCRIPTION("Basic sanity check of i915 Guest Virtualisation"); -igt_main +igt_simple_main { int fd = -1; -- 2.8.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH i-g-t 1/2] tests/gvt_basic: Test w/o sub-test requires simple_main
On Fri, Jul 01, 2016 at 03:32:44PM +0300, Marius Vlad wrote: > Signed-off-by: Marius Vlad > CC: Chris Wilson > --- > tests/gvt_basic.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) This is a stub that I expect to be filled with subtests. I want to keep it easy for people to add tests. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH i-g-t 2/2] overlay/Makefile.am: Use lib path for i915_pciids.h
On Fri, Jul 01, 2016 at 03:32:45PM +0300, Marius Vlad wrote: > This is due to commit d308bb082d429eb25 (lib: Start weaning off defunct > intel_chipset.h) which ``moved'' i915_pciids.h to lib/ from overlay/. > > Signed-off-by: Marius Vlad > CC: Chris Wilson The line can be dropped from sources, it will be packaged up with lib/ -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915/bxt: Export pooled eu info to userspace
On 01/07/2016 12:56, Chris Wilson wrote: On Fri, Jul 01, 2016 at 11:43:02AM +0100, Arun Siluvery wrote: Pooled EU is a bxt only feature and kernel changes are already merged. This feature is not yet exposed to userspace as the support was not yet available. Beignet team expressed interest and added patches to use this. Since we now have a user and patches to use them, expose them from the kernel side as well. v2: fix compile error [1] https://lists.freedesktop.org/archives/beignet/2016-June/007698.html [2] https://lists.freedesktop.org/archives/beignet/2016-June/007699.html Cc: Winiarski, Michal Cc: Zou, Nanhai Cc: Yang, Rong R Cc: Tim Gore Cc: Jeff McGee Signed-off-by: Arun Siluvery Acked-by: Chris Wilson -Chris Thanks Chris. Daniel, Tvrtko, Could you please check this and merge? regards Arun ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 2/2] i915/guc: Add Kabylake GuC Loading
On 6/30/2016 5:37 PM, Rodrigo Vivi wrote: From: Peter Antoine This patch added the loading of the GuC for Kabylake. It loads a 9.14 firmware. Hello, in case you need a fresh r-b for v3: v2: Fix commit message v3: Fix major/minor var names to match -nightly. (Rodrigo) Cc: Christophe Prigent Signed-off-by: Peter Antoine Signed-off-by: Michel Thierry Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Reviewed-by: Michel Thierry (v3) --- drivers/gpu/drm/i915/intel_guc_loader.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 4f6311a..d80b617 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -65,6 +65,9 @@ MODULE_FIRMWARE(I915_SKL_GUC_UCODE); #define I915_BXT_GUC_UCODE "i915/bxt_guc_ver8_7.bin" MODULE_FIRMWARE(I915_BXT_GUC_UCODE); +#define I915_KBL_GUC_UCODE "i915/kbl_guc_ver9_14.bin" +MODULE_FIRMWARE(I915_KBL_GUC_UCODE); + /* User-friendly representation of an enum */ const char *intel_guc_fw_status_repr(enum intel_guc_fw_status status) { @@ -698,6 +701,10 @@ void intel_guc_init(struct drm_device *dev) fw_path = I915_BXT_GUC_UCODE; guc_fw->guc_fw_major_wanted = 8; guc_fw->guc_fw_minor_wanted = 7; + } else if (IS_KABYLAKE(dev)) { + fw_path = I915_KBL_GUC_UCODE; + guc_fw->guc_fw_major_wanted = 9; + guc_fw->guc_fw_minor_wanted = 14; } else { fw_path = ""; /* unknown device */ } ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH i-g-t 1/2] tests/gvt_basic: Test w/o sub-test requires simple_main
Right, but bare in mind that it can't be released as is... On Fri, Jul 01, 2016 at 01:33:02PM +0100, Chris Wilson wrote: > On Fri, Jul 01, 2016 at 03:32:44PM +0300, Marius Vlad wrote: > > Signed-off-by: Marius Vlad > > CC: Chris Wilson > > --- > > tests/gvt_basic.c | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > This is a stub that I expect to be filled with subtests. I want to keep > it easy for people to add tests. > -Chris > > -- > Chris Wilson, Intel Open Source Technology Centre signature.asc Description: Digital signature ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH i-g-t 2/2] overlay/Makefile.am: Use lib path for i915_pciids.h
On Fri, Jul 01, 2016 at 01:33:36PM +0100, Chris Wilson wrote: > On Fri, Jul 01, 2016 at 03:32:45PM +0300, Marius Vlad wrote: > > This is due to commit d308bb082d429eb25 (lib: Start weaning off defunct > > intel_chipset.h) which ``moved'' i915_pciids.h to lib/ from overlay/. > > > > Signed-off-by: Marius Vlad > > CC: Chris Wilson > > The line can be dropped from sources, it will be packaged up with lib/ Hmm indeed, can I push it removed? > -Chris > > -- > Chris Wilson, Intel Open Source Technology Centre signature.asc Description: Digital signature ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 6/6] drm/i915/huc: Add BXT HuC Loading Support
This patch adds the HuC Loading for the BXT. Version 1.7 of the HuC firmware. Signed-off-by: Peter Antoine --- drivers/gpu/drm/i915/intel_huc_loader.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_huc_loader.c b/drivers/gpu/drm/i915/intel_huc_loader.c index 98ef8df..23df639 100644 --- a/drivers/gpu/drm/i915/intel_huc_loader.c +++ b/drivers/gpu/drm/i915/intel_huc_loader.c @@ -49,6 +49,9 @@ #define I915_SKL_HUC_UCODE "i915/skl_huc_ver1_7.bin" MODULE_FIRMWARE(I915_SKL_HUC_UCODE); +#define I915_BXT_HUC_UCODE "i915/bxt_huc_ver1_7.bin" +MODULE_FIRMWARE(I915_BXT_HUC_UCODE); + /** * intel_huc_load_ucode() - DMA's the firmware * @dev: the drm device @@ -157,6 +160,10 @@ void intel_huc_init(struct drm_device *dev) fw_path = I915_SKL_HUC_UCODE; huc_fw->major_ver_wanted = 1; huc_fw->minor_ver_wanted = 7; + } else if (IS_BROXTON(dev_priv)) { + fw_path = I915_BXT_HUC_UCODE; + huc_fw->major_ver_wanted = 1; + huc_fw->minor_ver_wanted = 7; } if (fw_path == NULL) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 5/6] drm/i915/huc: Support HuC authentication
The HuC authentication is done by host2guc call. The HuC RSA keys are sent to GuC for authentication. v2: rebased on top of drm-intel-nightly. changed name format and upped version 1.7. Signed-off-by: Alex Dai Signed-off-by: Peter Antoine --- drivers/gpu/drm/i915/i915_guc_submission.c | 65 ++ drivers/gpu/drm/i915/intel_guc_fwif.h | 1 + drivers/gpu/drm/i915/intel_guc_loader.c| 2 + 3 files changed, 68 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 2bfa86e..3bb8c25 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -25,6 +25,7 @@ #include #include "i915_drv.h" #include "intel_guc.h" +#include "intel_huc.h" /** * DOC: GuC-based command submission @@ -1076,3 +1077,67 @@ int intel_guc_resume(struct drm_device *dev) return host2guc_action(guc, data, ARRAY_SIZE(data)); } + +/** + * intel_huc_auth() - authenticate ucode + * @dev: the drm device + * + * Triggers a HuC fw authentication request to the GuC via host-2-guc + * interface. + */ +void intel_huc_auth(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_guc *guc = &dev_priv->guc; + struct intel_huc *huc = &dev_priv->huc; + int ret; + u32 data[2]; + + /* Bypass the case where there is no HuC firmware */ + if (huc->huc_fw.fetch_status == UC_FIRMWARE_NONE || + huc->huc_fw.load_status == UC_FIRMWARE_NONE) + return; + + if (guc->guc_fw.load_status != UC_FIRMWARE_SUCCESS) { + DRM_ERROR("HuC: GuC fw wasn't loaded. Can't authenticate"); + return; + } + + if (huc->huc_fw.load_status != UC_FIRMWARE_SUCCESS) { + DRM_ERROR("HuC: fw wasn't loaded. Nothing to authenticate"); + return; + } + + ret = i915_gem_obj_ggtt_pin(huc->huc_fw.uc_fw_obj, 0, 0); + if (ret) { + DRM_ERROR("HuC: Pin failed"); + return; + } + + /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */ + I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); + + /* Specify auth action and where public signature is. It's stored +* at the beginning of the gem object, before the fw bits +*/ + data[0] = HOST2GUC_ACTION_AUTHENTICATE_HUC; + data[1] = i915_gem_obj_ggtt_offset(huc->huc_fw.uc_fw_obj) + + huc->huc_fw.rsa_offset; + + ret = host2guc_action(guc, data, ARRAY_SIZE(data)); + if (ret) { + DRM_ERROR("HuC: GuC did not ack Auth request\n"); + goto out; + } + + /* Check authentication status, it should be done by now */ + ret = wait_for_atomic( + (I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED) > 0, 50); + if (ret) { + DRM_ERROR("HuC: Authentication failed\n"); + goto out; + } + +out: + i915_gem_object_ggtt_unpin(huc->huc_fw.uc_fw_obj); +} diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index a69ee36..c5a6227 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -437,6 +437,7 @@ enum host2guc_action { HOST2GUC_ACTION_ENTER_S_STATE = 0x501, HOST2GUC_ACTION_EXIT_S_STATE = 0x502, HOST2GUC_ACTION_SLPC_REQUEST = 0x3003, + HOST2GUC_ACTION_AUTHENTICATE_HUC = 0x4000, HOST2GUC_ACTION_LIMIT }; diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index c7206d8..7d085fb 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -492,6 +492,8 @@ int intel_guc_setup(struct drm_device *dev) intel_uc_fw_status_repr(guc_fw->fetch_status), intel_uc_fw_status_repr(guc_fw->load_status)); + intel_huc_auth(dev); + if (i915.enable_guc_submission) { err = i915_guc_submission_enable(dev_priv); if (err) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 2/6] drm/i915/huc: Unified css_header struct for GuC and HuC
HuC firmware css header has almost exactly same definition as GuC firmware except for the sw_version. Also, add a new member fw_type into intel_uc_fw to indicate what kind of fw it is. So, the loader will pull right sw_version from header. v2: rebased on-top of drn-intel-nightly Signed-off-by: Alex Dai Signed-off-by: Peter Antoine --- drivers/gpu/drm/i915/intel_guc.h| 4 drivers/gpu/drm/i915/intel_guc_fwif.h | 16 ++--- drivers/gpu/drm/i915/intel_guc_loader.c | 40 ++--- 3 files changed, 44 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 02adcfc..ebf9c8d 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -97,6 +97,9 @@ enum intel_uc_fw_status { UC_FIRMWARE_SUCCESS }; +#define UC_FW_TYPE_GUC 0 +#define UC_FW_TYPE_HUC 1 + /* * This structure encapsulates all the data needed during the process * of fetching, caching, and loading the firmware image into the GuC. @@ -114,6 +117,7 @@ struct intel_uc_fw { uint16_t major_ver_found; uint16_t minor_ver_found; + uint32_t fw_type; uint32_t header_size; uint32_t header_offset; uint32_t rsa_size; diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index 944786d..a69ee36 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -154,7 +154,7 @@ * The GuC firmware layout looks like this: * * +---+ - * |guc_css_header | + * | uc_css_header | * | contains major/minor version | * +---+ * | uCode | @@ -180,9 +180,16 @@ * 3. Length info of each component can be found in header, in dwords. * 4. Modulus and exponent key are not required by driver. They may not appear * in fw. So driver will load a truncated firmware in this case. + * + * HuC firmware layout is same as GuC firmware. + * + * HuC firmware css header is different. However, the only difference is where + * the version information is saved. The uc_css_header is unified to support + * both. Driver should get HuC version from uc_css_header.huc_sw_version, while + * uc_css_header.guc_sw_version for GuC. */ -struct guc_css_header { +struct uc_css_header { uint32_t module_type; /* header_size includes all non-uCode bits, including css_header, rsa * key, modulus key and exponent data. */ @@ -213,7 +220,10 @@ struct guc_css_header { char username[8]; char buildnumber[12]; - uint32_t device_id; + union { + uint32_t device_id; + uint32_t huc_sw_version; + }; uint32_t guc_sw_version; uint32_t prod_preprod_fw; uint32_t reserved[12]; diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 1424c42..3e8e17a 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -550,7 +550,7 @@ void intel_uc_fw_fetch(struct drm_device *dev, struct intel_uc_fw *uc_fw) { struct drm_i915_gem_object *obj; const struct firmware *fw; - struct guc_css_header *css; + struct uc_css_header *css; size_t size; int err; @@ -567,19 +567,19 @@ void intel_uc_fw_fetch(struct drm_device *dev, struct intel_uc_fw *uc_fw) uc_fw->uc_fw_path, fw); /* Check the size of the blob before examining buffer contents */ - if (fw->size < sizeof(struct guc_css_header)) { + if (fw->size < sizeof(struct uc_css_header)) { DRM_ERROR("Firmware header is missing\n"); goto fail; } - css = (struct guc_css_header *)fw->data; + css = (struct uc_css_header *)fw->data; /* Firmware bits always start from header */ uc_fw->header_offset = 0; uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw - css->key_size_dw - css->exponent_size_dw) * sizeof(u32); - if (uc_fw->header_size != sizeof(struct guc_css_header)) { + if (uc_fw->header_size != sizeof(struct uc_css_header)) { DRM_ERROR("CSS header definition mismatch\n"); goto fail; } @@ -603,21 +603,35 @@ void intel_uc_fw_fetch(struct drm_device *dev, struct intel_uc_fw *uc_fw) goto fail; } - /* Header and uCode will be loaded to WOPCM. Size of the two. */ - size = uc_fw->header_size + uc_fw->ucode_size; - if (size > guc_wopcm_size(dev->dev_private)) { - DRM_ERROR("Firmware is too large to fit in WOPCM\n"); - goto fail; - } - /* * The GuC firmware image has the version number embedded at a well-known * offset within the firmw
[Intel-gfx] [PATCH v2 3/6] drm/i915/huc: Add HuC fw loading support
The HuC loading process is similar to GuC. The intel_uc_fw_fetch() is used for both cases. HuC loading needs to be before GuC loading. The WOPCM setting must be done early before loading any of them. v2: rebased on-top of drm-intel-nightly. removed if(HAS_GUC()) before the guc call. (D.Gordon) update huc_version number of format. Signed-off-by: Alex Dai Signed-off-by: Peter Antoine --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.c | 3 + drivers/gpu/drm/i915/i915_drv.h | 3 + drivers/gpu/drm/i915/i915_guc_reg.h | 3 + drivers/gpu/drm/i915/intel_guc.h| 1 + drivers/gpu/drm/i915/intel_guc_loader.c | 26 ++-- drivers/gpu/drm/i915/intel_huc.h| 44 ++ drivers/gpu/drm/i915/intel_huc_loader.c | 267 8 files changed, 336 insertions(+), 12 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_huc.h create mode 100644 drivers/gpu/drm/i915/intel_huc_loader.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index a3d2b78..32af66e 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -45,6 +45,7 @@ i915-y += i915_cmd_parser.o \ # general-purpose microcontroller (GuC) support i915-y += intel_guc_loader.o \ + intel_huc_loader.o \ i915_guc_submission.o # autogenerated null render state diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c580e24..ae5093a 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -639,6 +639,7 @@ static int i915_load_modeset_init(struct drm_device *dev) * working irqs for e.g. gmbus and dp aux transfers. */ intel_modeset_init(dev); + intel_huc_init(dev); intel_guc_init(dev); ret = i915_gem_init(dev); @@ -664,6 +665,7 @@ static int i915_load_modeset_init(struct drm_device *dev) cleanup_gem: i915_gem_fini(dev); cleanup_irq: + intel_huc_fini(dev); intel_guc_fini(dev); drm_irq_uninstall(dev); intel_teardown_gmbus(dev); @@ -1714,6 +1716,7 @@ void i915_driver_unload(struct drm_device *dev) /* Flush any outstanding unpin_work. */ flush_workqueue(dev_priv->wq); + intel_huc_fini(dev); intel_guc_fini(dev); i915_gem_fini(dev); intel_fbc_cleanup_cfb(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 485ab11..783934e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -55,6 +55,7 @@ #include "intel_bios.h" #include "intel_dpll_mgr.h" #include "intel_guc.h" +#include "intel_huc.h" #include "intel_lrc.h" #include "intel_ringbuffer.h" @@ -1754,6 +1755,7 @@ struct drm_i915_private { struct intel_gvt gvt; + struct intel_huc huc; struct intel_guc guc; struct intel_csr csr; @@ -2851,6 +2853,7 @@ struct drm_i915_cmd_table { #define HAS_GUC(dev) (IS_GEN9(dev) && !IS_KABYLAKE(dev)) #define HAS_GUC_UCODE(dev) (HAS_GUC(dev)) #define HAS_GUC_SCHED(dev) (HAS_GUC(dev)) +#define HAS_HUC_UCODE(dev) (HAS_GUC(dev)) #define HAS_RESOURCE_STREAMER(dev) (IS_HASWELL(dev) || \ INTEL_INFO(dev)->gen >= 8) diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h b/drivers/gpu/drm/i915/i915_guc_reg.h index cf5a65b..51533f1 100644 --- a/drivers/gpu/drm/i915/i915_guc_reg.h +++ b/drivers/gpu/drm/i915/i915_guc_reg.h @@ -61,9 +61,12 @@ #define DMA_ADDRESS_SPACE_GTT (8 << 16) #define DMA_COPY_SIZE _MMIO(0xc310) #define DMA_CTRL _MMIO(0xc314) +#define HUC_UKERNEL(1<<9) #define UOS_MOVE (1<<4) #define START_DMA (1<<0) #define DMA_GUC_WOPCM_OFFSET _MMIO(0xc340) +#define HUC_LOADING_AGENT_VCR (0<<1) +#define HUC_LOADING_AGENT_GUC (1<<1) #define GUC_WOPCM_OFFSET_VALUE 0x8 /* 512KB */ #define GUC_MAX_IDLE_COUNT _MMIO(0xC3E4) diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index ebf9c8d..c7b2745 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -160,6 +160,7 @@ extern const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status); extern int intel_guc_suspend(struct drm_device *dev); extern int intel_guc_resume(struct drm_device *dev); void intel_uc_fw_fetch(struct drm_device *dev, struct intel_uc_fw *uc_fw); +u32 guc_wopcm_size(struct drm_i915_private *dev_priv); /* i915_guc_submission.c */ int i915_guc_submission_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 3e8e17a..c7206d8 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@
[Intel-gfx] [PATCH v2 0/6] HuC Loading Patches
This patch series enables the HuC loading. These patches are a port of the patches that were created by Yu Dai (Alex) and have been ported to work with the new GuC patches. The series include a patch to enable the HuC on BXT. This is a separate patch as the state of the BXT HuC firmware is still in flux, so the patch can be dropped without delaying the series. v2: rebased onto drm-intel-nightly. Also, patch 1 has been recreated as the upstream version of the function had change significantly making the old patch obsolete. Also, due to a code move i915_dma.c -> i915_drv.c some code was being applied to a deleted file. Also HuC file versions and formats changed. Feedback from D.Gordon and R.Vivi (Thanks). Peter Antoine (6): drm/i915/guc: Make the GuC fw loading helper functions general drm/i915/huc: Unified css_header struct for GuC and HuC drm/i915/huc: Add HuC fw loading support drm/i915/huc: Add debugfs for HuC loading status check drm/i915/huc: Support HuC authentication drm/i915/huc: Add BXT HuC Loading Support drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_debugfs.c| 44 - drivers/gpu/drm/i915/i915_drv.c| 3 + drivers/gpu/drm/i915/i915_drv.h| 3 + drivers/gpu/drm/i915/i915_guc_reg.h| 3 + drivers/gpu/drm/i915/i915_guc_submission.c | 69 +++- drivers/gpu/drm/i915/intel_guc.h | 46 ++--- drivers/gpu/drm/i915/intel_guc_fwif.h | 17 +- drivers/gpu/drm/i915/intel_guc_loader.c| 202 +++-- drivers/gpu/drm/i915/intel_huc.h | 44 + drivers/gpu/drm/i915/intel_huc_loader.c| 274 + 11 files changed, 583 insertions(+), 123 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_huc.h create mode 100644 drivers/gpu/drm/i915/intel_huc_loader.c -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 1/6] drm/i915/guc: Make the GuC fw loading helper functions general
Rename some of the GuC fw loading code to make them more general. We will utilise them for HuC loading as well. s/intel_guc_fw/intel_uc_fw/g s/GUC_FIRMWARE/UC_FIRMWARE/g Struct intel_guc_fw is renamed to intel_uc_fw. Prefix of tts members, such as 'guc' or 'guc_fw' either is renamed to 'uc' or removed for same purpose. v2: rebased on top of nightly. reapplied the search/replace as upstream code as changed. Signed-off-by: Alex Dai Signed-off-by: Peter Antoine --- drivers/gpu/drm/i915/i915_debugfs.c| 12 +-- drivers/gpu/drm/i915/i915_guc_submission.c | 4 +- drivers/gpu/drm/i915/intel_guc.h | 39 drivers/gpu/drm/i915/intel_guc_loader.c| 142 ++--- 4 files changed, 99 insertions(+), 98 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index f664884..3883df5 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2483,7 +2483,7 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) { struct drm_info_node *node = m->private; struct drm_i915_private *dev_priv = node->minor->dev->dev_private; - struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; + struct intel_uc_fw *guc_fw = &dev_priv->guc.guc_fw; u32 tmp, i; if (!HAS_GUC_UCODE(dev_priv)) @@ -2491,15 +2491,15 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) seq_printf(m, "GuC firmware status:\n"); seq_printf(m, "\tpath: %s\n", - guc_fw->guc_fw_path); + guc_fw->uc_fw_path); seq_printf(m, "\tfetch: %s\n", - intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status)); + intel_uc_fw_status_repr(guc_fw->fetch_status)); seq_printf(m, "\tload: %s\n", - intel_guc_fw_status_repr(guc_fw->guc_fw_load_status)); + intel_uc_fw_status_repr(guc_fw->load_status)); seq_printf(m, "\tversion wanted: %d.%d\n", - guc_fw->guc_fw_major_wanted, guc_fw->guc_fw_minor_wanted); + guc_fw->major_ver_wanted, guc_fw->minor_ver_wanted); seq_printf(m, "\tversion found: %d.%d\n", - guc_fw->guc_fw_major_found, guc_fw->guc_fw_minor_found); + guc_fw->major_ver_found, guc_fw->minor_ver_found); seq_printf(m, "\theader: offset is %d; size = %d\n", guc_fw->header_offset, guc_fw->header_size); seq_printf(m, "\tuCode: offset is %d; size = %d\n", diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 355b647..2bfa86e 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -1038,7 +1038,7 @@ int intel_guc_suspend(struct drm_device *dev) struct i915_gem_context *ctx; u32 data[3]; - if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS) + if (guc->guc_fw.load_status != UC_FIRMWARE_SUCCESS) return 0; ctx = dev_priv->kernel_context; @@ -1064,7 +1064,7 @@ int intel_guc_resume(struct drm_device *dev) struct i915_gem_context *ctx; u32 data[3]; - if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS) + if (guc->guc_fw.load_status != UC_FIRMWARE_SUCCESS) return 0; ctx = dev_priv->kernel_context; diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 3e3e743..02adcfc 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -90,29 +90,29 @@ struct i915_guc_client { uint64_t submissions[I915_NUM_ENGINES]; }; -enum intel_guc_fw_status { - GUC_FIRMWARE_FAIL = -1, - GUC_FIRMWARE_NONE = 0, - GUC_FIRMWARE_PENDING, - GUC_FIRMWARE_SUCCESS +enum intel_uc_fw_status { + UC_FIRMWARE_FAIL = -1, + UC_FIRMWARE_NONE = 0, + UC_FIRMWARE_PENDING, + UC_FIRMWARE_SUCCESS }; /* * This structure encapsulates all the data needed during the process * of fetching, caching, and loading the firmware image into the GuC. */ -struct intel_guc_fw { - struct drm_device * guc_dev; - const char *guc_fw_path; - size_t guc_fw_size; - struct drm_i915_gem_object *guc_fw_obj; - enum intel_guc_fw_statusguc_fw_fetch_status; - enum intel_guc_fw_statusguc_fw_load_status; - - uint16_tguc_fw_major_wanted; - uint16_tguc_fw_minor_wanted; - uint16_tguc_fw_major_found; - uint16_tguc_fw_minor_found; +struct intel_uc_fw { + struct drm_device *uc_dev; + const char *uc_fw_path; + size_t uc_fw_size; + struct drm_i915_gem_object *uc_fw_obj; + enum intel_uc_fw_status fetch_status; + enum inte
[Intel-gfx] [PATCH v2 4/6] drm/i915/huc: Add debugfs for HuC loading status check
Add debugfs entry for HuC loading status check. v2: rebase on-top of drm-intel-nightly. Signed-off-by: Alex Dai Signed-off-by: Peter Antoine --- drivers/gpu/drm/i915/i915_debugfs.c | 32 1 file changed, 32 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 3883df5..c1f92e2 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2479,6 +2479,37 @@ static int i915_llc(struct seq_file *m, void *data) return 0; } +static int i915_huc_load_status_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = m->private; + struct drm_i915_private *dev_priv = node->minor->dev->dev_private; + struct intel_uc_fw *huc_fw = &dev_priv->huc.huc_fw; + + if (!HAS_HUC_UCODE(dev_priv->dev)) + return 0; + + seq_puts(m, "HuC firmware status:\n"); + seq_printf(m, "\tpath: %s\n", huc_fw->uc_fw_path); + seq_printf(m, "\tfetch: %s\n", + intel_uc_fw_status_repr(huc_fw->fetch_status)); + seq_printf(m, "\tload: %s\n", + intel_uc_fw_status_repr(huc_fw->load_status)); + seq_printf(m, "\tversion wanted: %d.%d\n", + huc_fw->major_ver_wanted, huc_fw->minor_ver_wanted); + seq_printf(m, "\tversion found: %d.%d\n", + huc_fw->major_ver_found, huc_fw->minor_ver_found); + seq_printf(m, "\theader: offset is %d; size = %d\n", + huc_fw->header_offset, huc_fw->header_size); + seq_printf(m, "\tuCode: offset is %d; size = %d\n", + huc_fw->ucode_offset, huc_fw->ucode_size); + seq_printf(m, "\tRSA: offset is %d; size = %d\n", + huc_fw->rsa_offset, huc_fw->rsa_size); + + seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2)); + + return 0; +} + static int i915_guc_load_status_info(struct seq_file *m, void *data) { struct drm_info_node *node = m->private; @@ -5433,6 +5464,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_guc_info", i915_guc_info, 0}, {"i915_guc_load_status", i915_guc_load_status_info, 0}, {"i915_guc_log_dump", i915_guc_log_dump, 0}, + {"i915_huc_load_status", i915_huc_load_status_info, 0}, {"i915_frequency_info", i915_frequency_info, 0}, {"i915_hangcheck_info", i915_hangcheck_info, 0}, {"i915_drpc_info", i915_drpc_info, 0}, -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH i-g-t 2/2] overlay/Makefile.am: Use lib path for i915_pciids.h
On Fri, Jul 01, 2016 at 04:19:31PM +0300, Marius Vlad wrote: > On Fri, Jul 01, 2016 at 01:33:36PM +0100, Chris Wilson wrote: > > On Fri, Jul 01, 2016 at 03:32:45PM +0300, Marius Vlad wrote: > > > This is due to commit d308bb082d429eb25 (lib: Start weaning off defunct > > > intel_chipset.h) which ``moved'' i915_pciids.h to lib/ from overlay/. > > > > > > Signed-off-by: Marius Vlad > > > CC: Chris Wilson > > > > The line can be dropped from sources, it will be packaged up with lib/ > Hmm indeed, can I push it removed? Yes. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH i-g-t 1/2] tests/gvt_basic: Test w/o sub-test requires simple_main
On Fri, Jul 01, 2016 at 04:18:46PM +0300, Marius Vlad wrote: > Right, but bare in mind that it can't be released as is... Why not? Which bit of infrastructure is broken? Just add a igt_subtest_f("placeholder") ; -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 1/3] drm/i915/gen9: Clean up MOCS table definitions
Use named struct initializers for clarity. Also fix the target cache definition to reflect its role in GEN9 onwards. On GEN8 a TC value of 0 meant ELLC but on GEN9+ it means the TC and LRU controls are taken from the PTE. No functional change, igt/gem_mocs_settings still passing after this change. v2: (Chris) - Add back the hexa literals for the entries. Add note that igt/gem_mocs_settings still passes. CC: Rong R Yang CC: Yakui Zhao CC: Chris Wilson Signed-off-by: Imre Deak --- drivers/gpu/drm/i915/intel_mocs.c | 88 +++ 1 file changed, 61 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 3c1482b..d36e609 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -66,9 +66,10 @@ struct drm_i915_mocs_table { #define L3_WB 3 /* Target cache */ -#define ELLC 0 -#define LLC1 -#define LLC_ELLC 2 +#define LE_TC_PAGETABLE0 +#define LE_TC_LLC 1 +#define LE_TC_LLC_ELLC 2 +#define LE_TC_LLC_ELLC_ALT 3 /* * MOCS tables @@ -96,34 +97,67 @@ struct drm_i915_mocs_table { * end. */ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { - /* { 0x0009, 0x0010 } */ - { (LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(0) | - LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)), - (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC)) }, - /* { 0x0038, 0x0030 } */ - { (LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(3) | - LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)), - (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB)) }, - /* { 0x003b, 0x0030 } */ - { (LE_CACHEABILITY(LE_WB) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(3) | - LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)), - (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB)) } + { /* 0x0009 */ + .control_value = LE_CACHEABILITY(LE_UC) | + LE_TGT_CACHE(LE_TC_LLC_ELLC) | + LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | + LE_PFM(0) | LE_SCF(0), + + /* 0x0010 */ + .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), + }, + { + /* 0x0038 */ + .control_value = LE_CACHEABILITY(LE_PAGETABLE) | + LE_TGT_CACHE(LE_TC_LLC_ELLC) | + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | + LE_PFM(0) | LE_SCF(0), + /* 0x0030 */ + .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), + }, + { + /* 0x003b */ + .control_value = LE_CACHEABILITY(LE_WB) | + LE_TGT_CACHE(LE_TC_LLC_ELLC) | + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | + LE_PFM(0) | LE_SCF(0), + /* 0x0030 */ + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), + }, }; /* NOTE: the LE_TGT_CACHE is not used on Broxton */ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { - /* { 0x0009, 0x0010 } */ - { (LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(0) | - LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)), - (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC)) }, - /* { 0x0038, 0x0030 } */ - { (LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(3) | - LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)), - (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB)) }, - /* { 0x003b, 0x0030 } */ - { (LE_CACHEABILITY(LE_WB) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(3) | - LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)), - (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB)) } + { + /* 0x0009 */ + .control_value = LE_CACHEABILITY(LE_UC) | + LE_TGT_CACHE(LE_TC_LLC_ELLC) | + LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | + LE_PFM(0) | LE_SCF(0), + + /* 0x0010 */ + .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), + }, + { + /* 0x0038 */ + .control_value = LE_CACHEABILITY(LE_PAGETABLE) | + LE_TGT_CACHE(LE_TC_LLC_ELLC) | + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | + LE_PFM(0) | LE_SCF(0), + + /* 0x0030 */ + .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), + }, + { + /* 0x003b */ + .control_value = LE_CACHEABILITY(LE_WB) | + LE_TGT_CACHE(LE_TC_LLC_ELLC) | +
[Intel-gfx] [PATCH v3 0/3] drm/i915/bxt: Fix performance due to bogus MOCS entry
This is v3 of [1] addressing Ville's and Chris' comments. On Daniel's request I also discussed about these changes with Rong R Yang from the Beignet and Yakui Zhao from the Libva team, they are CC'd. Rong, Yakui please add your Acked-by/Tested-by if you are ok with the changes. I suggest merging these patches for 4.7, via drm-intel-fixes. [1] https://lists.freedesktop.org/archives/intel-gfx/2016-April/094056.html CC: Rong R Yang CC: Yakui Zhao CC: Ville Syrjälä CC: Chris Wilson Imre Deak (3): drm/i915/gen9: Clean up MOCS table definitions drm/i915/bxt: Fix inadvertent CPU snooping due to incorrect MOCS config drm/i915: Give proper names to MOCS entries drivers/gpu/drm/i915/intel_mocs.c | 89 +++ include/uapi/drm/i915_drm.h | 24 +++ 2 files changed, 86 insertions(+), 27 deletions(-) -- 2.5.0 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 2/3] drm/i915/bxt: Fix inadvertent CPU snooping due to incorrect MOCS config
Setting a write-back cache policy in the MOCS entry definition also implies snooping, which has a considerable overhead. This is unexpected for a few reasons: - From user-space's point of view since it didn't want a coherent surface (it didn't set the buffer as such via the set caching IOCTL). - There is a separate MOCS entry field for snooping (which we never set). - This MOCS table is about caching in (e)LLC and there is no (e)LLC on BXT. There is a separate table for L3 cache control. Considering the above the current behavior of snooping looks like an unintentional side-effect of the WB setting. Changing it to be LLC-UC gets rid of the snooping without any ill-effects. For a coherent surface the application would use a separate MOCS entry at index 1 and call the set caching IOCTL to setup the PTE entries for the corresponding buffer to be snooped. In the future we could also add a new MOCS entry for coherent surfaces. This resulted in 70% improvement in synthetic texturing benchmarks. Kudos to Valtteri Rantala, Eero Tamminen and Michael T Frederick and Ville who helped to narrow the source of problem to the kernel and to the snooping behaviour in particular. With a follow-up change to adjust the 3rd entry value igt/gem_mocs_settings is passing after this change. v2: - Rebase on v2 of patch 1/2. v3: - Set the entry as LLC uncached instead of PTE-passthrough. This way we also keep snooping disabled, but we also make the cacheability/ coherency setting indepent of the PTE which is managed by the kernel. (Chris) CC: Rong R Yang CC: Yakui Zhao CC: Valtteri Rantala CC: Eero Tamminen CC: Michael T Frederick CC: Ville Syrjälä CC: Chris Wilson Signed-off-by: Imre Deak --- drivers/gpu/drm/i915/intel_mocs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index d36e609..927825f 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -149,8 +149,8 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), }, { - /* 0x003b */ - .control_value = LE_CACHEABILITY(LE_WB) | + /* 0x0039 */ + .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0), -- 2.5.0 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 3/3] drm/i915: Give proper names to MOCS entries
The purpose for each MOCS entry isn't well defined atm. Defining these is important to remove any uncertainty about the use of these entries for example in terms of performance and GPU/CPU coherency. Suggested by Ville. CC: Rong R Yang CC: Yakui Zhao CC: Ville Syrjälä CC: Chris Wilson Signed-off-by: Imre Deak --- drivers/gpu/drm/i915/intel_mocs.c | 13 +++-- include/uapi/drm/i915_drm.h | 24 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 927825f..86adc11 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -97,7 +97,8 @@ struct drm_i915_mocs_table { * end. */ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { - { /* 0x0009 */ + [I915_MOCS_UNCACHED] = { + /* 0x0009 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | @@ -106,7 +107,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* 0x0010 */ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), }, - { + [I915_MOCS_AUTO] = { /* 0x0038 */ .control_value = LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -115,7 +116,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* 0x0030 */ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), }, - { + [I915_MOCS_CACHED] = { /* 0x003b */ .control_value = LE_CACHEABILITY(LE_WB) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -128,7 +129,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* NOTE: the LE_TGT_CACHE is not used on Broxton */ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { - { + [I915_MOCS_UNCACHED] = { /* 0x0009 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -138,7 +139,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { /* 0x0010 */ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), }, - { + [I915_MOCS_AUTO] = { /* 0x0038 */ .control_value = LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -148,7 +149,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { /* 0x0030 */ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), }, - { + [I915_MOCS_CACHED] = { /* 0x0039 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index c17d63d..a5d116f 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -62,6 +62,30 @@ extern "C" { #define I915_ERROR_UEVENT "ERROR" #define I915_RESET_UEVENT "RESET" +/* + * MOCS indexes used for GPU surfaces, defining the cacheability of the + * surface data and the coherency for this data wrt. CPU vs. GPU accesses. + */ +enum i915_mocs_table_index { + /* +* Not cached anywhere, coherency between CPU and GPU accesses is +* guaranteed. +*/ + I915_MOCS_UNCACHED, + /* +* Cacheability and coherency controlled by the kernel automatically +* based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current +* usage of the surface (used for display scanout or not). +*/ + I915_MOCS_AUTO, + /* +* Cached in all GPU caches available on the platform. +* Coherency between CPU and GPU accesses to the surface is not +* guaranteed without extra synchronization. +*/ + I915_MOCS_CACHED, +}; + /* Each region is a minimum of 16k, and there are at most 255 of them. */ #define I915_NR_TEX_REGIONS 255/* table size 2k - maximum due to use -- 2.5.0 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] Runtime: set the sub slice according to kernel pooled EU configure.
On 30/06/2016 09:43, Song, Ruiling wrote: LGTM Ruiling Could you please let me know whether these patches are merged/yet to be merged? I have submitted kernel patch which is ready to be merged but we would like to know if userspace bits are merged or not? https://lists.freedesktop.org/archives/intel-gfx/2016-July/099822.html regards Arun -Original Message- From: Intel-gfx [mailto:intel-gfx-boun...@lists.freedesktop.org] On Behalf Of Yang Rong Sent: Wednesday, June 15, 2016 4:20 PM To: beig...@lists.freedesktop.org; intel-gfx@lists.freedesktop.org; arun.siluv...@linux.intel.com Subject: [Intel-gfx] [PATCH] Runtime: set the sub slice according to kernel pooled EU configure. If BXT pooled EU enable, the 3*6 EUs is split into 2 pooled, so change the sub slice to 2. For min no. of eu in pool, only affact fused down 2*6 BXT devices, because beignet don't support these devices now, add assert only. assert. This patch is based on kernel patch: https://patchwork.freedesktop.org/series/8200/ Thanks Arun. Signed-off-by: Yang Rong --- CMakeLists.txt | 12 src/CMakeLists.txt | 10 ++ src/intel/intel_driver.c | 15 +++ 3 files changed, 37 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index fae3e88..af684ed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -153,6 +153,18 @@ IF(DRM_INTEL_FOUND) ELSE(HAVE_DRM_INTEL_SUBSLICE_TOTAL) MESSAGE(STATUS "Disable subslice total query support") ENDIF(HAVE_DRM_INTEL_SUBSLICE_TOTAL) + CHECK_LIBRARY_EXISTS(drm_intel "drm_intel_get_pooled_eu" "" HAVE_DRM_INTEL_POOLED_EU) + IF(HAVE_DRM_INTEL_POOLED_EU) +MESSAGE(STATUS "Enable pooled eu query support") + ELSE(HAVE_DRM_INTEL_POOLED_EU) +MESSAGE(STATUS "Disable pooled eu query support") + ENDIF(HAVE_DRM_INTEL_POOLED_EU) + CHECK_LIBRARY_EXISTS(drm_intel "drm_intel_get_min_eu_in_pool" "" HAVE_DRM_INTEL_MIN_EU_IN_POOL) + IF(HAVE_DRM_INTEL_MIN_EU_IN_POOL) +MESSAGE(STATUS "Enable min eu in pool query support") + ELSE(HAVE_DRM_INTEL_MIN_EU_IN_POOL) +MESSAGE(STATUS "Disable min eu in pool query support") + ENDIF(HAVE_DRM_INTEL_MIN_EU_IN_POOL) ELSE(DRM_INTEL_FOUND) MESSAGE(FATAL_ERROR "Looking for DRM Intel (>= 2.4.52) - not found") ENDIF(DRM_INTEL_FOUND) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 98f8423..a002865 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -138,6 +138,16 @@ if (HAVE_DRM_INTEL_SUBSLICE_TOTAL) SET(CMAKE_C_FLAGS "-DHAS_SUBSLICE_TOTAL ${CMAKE_C_FLAGS}") endif (HAVE_DRM_INTEL_SUBSLICE_TOTAL) +if (HAVE_DRM_INTEL_POOLED_EU) + SET(CMAKE_CXX_FLAGS "-DHAS_POOLED_EU ${CMAKE_CXX_FLAGS}") + SET(CMAKE_C_FLAGS "-DHAS_POOLED_EU ${CMAKE_C_FLAGS}") +endif (HAVE_DRM_INTEL_POOLED_EU) + +if (HAVE_DRM_INTEL_MIN_EU_IN_POOL) + SET(CMAKE_CXX_FLAGS "-DHAS_MIN_EU_IN_POOL ${CMAKE_CXX_FLAGS}") + SET(CMAKE_C_FLAGS "-DHAS_MIN_EU_IN_POOL ${CMAKE_C_FLAGS}") +endif (HAVE_DRM_INTEL_MIN_EU_IN_POOL) + set(GIT_SHA1 "git_sha1.h") add_custom_target(${GIT_SHA1} ALL COMMAND chmod +x ${CMAKE_CURRENT_SOURCE_DIR}/git_sha1.sh diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c index 03d9d34..8f2373b 100644 --- a/src/intel/intel_driver.c +++ b/src/intel/intel_driver.c @@ -882,6 +882,21 @@ intel_update_device_info(cl_device_id device) #endif } #endif + +#ifdef HAS_POOLED_EU + /* BXT pooled eu, 3*6 to 2*9, like sub slice count is 2 */ + unsigned int has_pooled_eu = 0; + if(!drm_intel_get_pooled_eu(driver->fd, &has_pooled_eu) && has_pooled_eu) +device->sub_slice_count = 2; + +#ifdef HAS_MIN_EU_IN_POOL + unsigned int min_eu; + /* for fused down 2x6 devices, beignet don't support. */ + if (has_pooled_eu && !drm_intel_get_min_eu_in_pool(driver->fd, &min_eu)) { +assert(min_eu == 9); //don't support fuse down device. + } +#endif //HAS_MIN_EU_IN_POOL +#endif //HAS_POOLED_EU //We should get the device memory dynamically, but the //mapablce mem size usage is unknown. Just ignore it. size_t total_mem,map_mem; -- 2.1.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3 3/3] drm/i915: Give proper names to MOCS entries
On Fri, Jul 01, 2016 at 04:40:06PM +0300, Imre Deak wrote: > The purpose for each MOCS entry isn't well defined atm. Defining these > is important to remove any uncertainty about the use of these entries > for example in terms of performance and GPU/CPU coherency. > > Suggested by Ville. > > CC: Rong R Yang > CC: Yakui Zhao > CC: Ville Syrjälä > CC: Chris Wilson > Signed-off-by: Imre Deak > --- > drivers/gpu/drm/i915/intel_mocs.c | 13 +++-- > include/uapi/drm/i915_drm.h | 24 > 2 files changed, 31 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_mocs.c > b/drivers/gpu/drm/i915/intel_mocs.c > index 927825f..86adc11 100644 > --- a/drivers/gpu/drm/i915/intel_mocs.c > +++ b/drivers/gpu/drm/i915/intel_mocs.c > @@ -97,7 +97,8 @@ struct drm_i915_mocs_table { > * end. > */ > static const struct drm_i915_mocs_entry skylake_mocs_table[] = { > - { /* 0x0009 */ > + [I915_MOCS_UNCACHED] = { > + /* 0x0009 */ > .control_value = LE_CACHEABILITY(LE_UC) | > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > @@ -106,7 +107,7 @@ static const struct drm_i915_mocs_entry > skylake_mocs_table[] = { > /* 0x0010 */ > .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > }, > - { > + [I915_MOCS_AUTO] = { > /* 0x0038 */ > .control_value = LE_CACHEABILITY(LE_PAGETABLE) | > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > @@ -115,7 +116,7 @@ static const struct drm_i915_mocs_entry > skylake_mocs_table[] = { > /* 0x0030 */ > .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > }, > - { > + [I915_MOCS_CACHED] = { > /* 0x003b */ > .control_value = LE_CACHEABILITY(LE_WB) | > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > @@ -128,7 +129,7 @@ static const struct drm_i915_mocs_entry > skylake_mocs_table[] = { > > /* NOTE: the LE_TGT_CACHE is not used on Broxton */ > static const struct drm_i915_mocs_entry broxton_mocs_table[] = { > - { > + [I915_MOCS_UNCACHED] = { > /* 0x0009 */ > .control_value = LE_CACHEABILITY(LE_UC) | > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > @@ -138,7 +139,7 @@ static const struct drm_i915_mocs_entry > broxton_mocs_table[] = { > /* 0x0010 */ > .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > }, > - { > + [I915_MOCS_AUTO] = { > /* 0x0038 */ > .control_value = LE_CACHEABILITY(LE_PAGETABLE) | > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > @@ -148,7 +149,7 @@ static const struct drm_i915_mocs_entry > broxton_mocs_table[] = { > /* 0x0030 */ > .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > }, > - { > + [I915_MOCS_CACHED] = { > /* 0x0039 */ > .control_value = LE_CACHEABILITY(LE_UC) | > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index c17d63d..a5d116f 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -62,6 +62,30 @@ extern "C" { > #define I915_ERROR_UEVENT"ERROR" > #define I915_RESET_UEVENT"RESET" > > +/* > + * MOCS indexes used for GPU surfaces, defining the cacheability of the > + * surface data and the coherency for this data wrt. CPU vs. GPU accesses. > + */ > +enum i915_mocs_table_index { > + /* > + * Not cached anywhere, coherency between CPU and GPU accesses is > + * guaranteed. > + */ > + I915_MOCS_UNCACHED, > + /* > + * Cacheability and coherency controlled by the kernel automatically > + * based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current > + * usage of the surface (used for display scanout or not). > + */ > + I915_MOCS_AUTO, So PTE. > + /* > + * Cached in all GPU caches available on the platform. > + * Coherency between CPU and GPU accesses to the surface is not > + * guaranteed without extra synchronization. > + */ > + I915_MOCS_CACHED, So pretty useless for its current usage then. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v4 0/2] drm/i915/opregion: proper handling of DIDL and CADL
On Fri, 01 Jul 2016, Rainer Koenig wrote: > Found a problem: After screensaver kicked in and display was turned off > the brightness keys stop working. > > Problem can be reproduced like that: > > 1. Boot laptop > 2. Test brightness keys, they are working > 3. open Terminal and issue "xset -display :0 dpms force off" > 4. the screen goes blank (like after the screensaver timeout) > 5. push a key to bring the screen back > 6. test brightness keys again, now they don't work > > If the system is sent to suspend and woken up everything is fine again. > > Behaviour happens on the 4.7.0-rc5 kernel from the opregion-didl-v4 branch. > Before I compiled the 4.7.0-r4 from the same git repository. On this > (v3) everything still works after the screen was blanked. Maarten, I think the difference is between where and when the calls to cadl update are made. BR, Jani. > > Best regards > Rainer > > Am 30.06.2016 um 11:19 schrieb Rainer Koenig: >> Am 29.06.2016 um 17:36 schrieb Jani Nikula: >>> This is v4 of [1]. The first three have already been pushed to >>> drm-intel-next-queued. The only change here is the atomic commit. >>> >>> Review and testing would be much appreciated to move this forward. For >>> testing, I've pushed this to opregion-didl-v4 branch of my repo at [2]. >>> >> Tested on a Fujitsu LIFEBOOK E736: Brightness keys are working now. >> >> Best regards >> Rainer >> -- Jani Nikula, Intel Open Source Technology Center ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915/bxt: Export pooled eu info to userspace
On 01/07/16 13:45, Arun Siluvery wrote: On 01/07/2016 12:56, Chris Wilson wrote: On Fri, Jul 01, 2016 at 11:43:02AM +0100, Arun Siluvery wrote: Pooled EU is a bxt only feature and kernel changes are already merged. This feature is not yet exposed to userspace as the support was not yet available. Beignet team expressed interest and added patches to use this. Since we now have a user and patches to use them, expose them from the kernel side as well. v2: fix compile error [1] https://lists.freedesktop.org/archives/beignet/2016-June/007698.html [2] https://lists.freedesktop.org/archives/beignet/2016-June/007699.html Cc: Winiarski, Michal Cc: Zou, Nanhai Cc: Yang, Rong R Cc: Tim Gore Cc: Jeff McGee Signed-off-by: Arun Siluvery Acked-by: Chris Wilson -Chris Thanks Chris. Daniel, Tvrtko, Could you please check this and merge? Merged to dinq with Jani's ack. Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3 3/3] drm/i915: Give proper names to MOCS entries
On pe, 2016-07-01 at 14:49 +0100, Chris Wilson wrote: > On Fri, Jul 01, 2016 at 04:40:06PM +0300, Imre Deak wrote: > > The purpose for each MOCS entry isn't well defined atm. Defining these > > is important to remove any uncertainty about the use of these entries > > for example in terms of performance and GPU/CPU coherency. > > > > Suggested by Ville. > > > > CC: Rong R Yang > > CC: Yakui Zhao > > CC: Ville Syrjälä > > CC: Chris Wilson > > Signed-off-by: Imre Deak > > --- > > drivers/gpu/drm/i915/intel_mocs.c | 13 +++-- > > include/uapi/drm/i915_drm.h | 24 > > 2 files changed, 31 insertions(+), 6 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/intel_mocs.c > > b/drivers/gpu/drm/i915/intel_mocs.c > > index 927825f..86adc11 100644 > > --- a/drivers/gpu/drm/i915/intel_mocs.c > > +++ b/drivers/gpu/drm/i915/intel_mocs.c > > @@ -97,7 +97,8 @@ struct drm_i915_mocs_table { > > * end. > > */ > > static const struct drm_i915_mocs_entry skylake_mocs_table[] = { > > - { /* 0x0009 */ > > + [I915_MOCS_UNCACHED] = { > > + /* 0x0009 */ > > .control_value = LE_CACHEABILITY(LE_UC) | > > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > > LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > > @@ -106,7 +107,7 @@ static const struct drm_i915_mocs_entry > > skylake_mocs_table[] = { > > /* 0x0010 */ > > .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > > }, > > - { > > + [I915_MOCS_AUTO] = { > > /* 0x0038 */ > > .control_value = LE_CACHEABILITY(LE_PAGETABLE) | > > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > > @@ -115,7 +116,7 @@ static const struct drm_i915_mocs_entry > > skylake_mocs_table[] = { > > /* 0x0030 */ > > .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > > }, > > - { > > + [I915_MOCS_CACHED] = { > > /* 0x003b */ > > .control_value = LE_CACHEABILITY(LE_WB) | > > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > > @@ -128,7 +129,7 @@ static const struct drm_i915_mocs_entry > > skylake_mocs_table[] = { > > > > /* NOTE: the LE_TGT_CACHE is not used on Broxton */ > > static const struct drm_i915_mocs_entry broxton_mocs_table[] = { > > - { > > + [I915_MOCS_UNCACHED] = { > > /* 0x0009 */ > > .control_value = LE_CACHEABILITY(LE_UC) | > > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > > @@ -138,7 +139,7 @@ static const struct drm_i915_mocs_entry > > broxton_mocs_table[] = { > > /* 0x0010 */ > > .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > > }, > > - { > > + [I915_MOCS_AUTO] = { > > /* 0x0038 */ > > .control_value = LE_CACHEABILITY(LE_PAGETABLE) | > > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > > @@ -148,7 +149,7 @@ static const struct drm_i915_mocs_entry > > broxton_mocs_table[] = { > > /* 0x0030 */ > > .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > > }, > > - { > > + [I915_MOCS_CACHED] = { > > /* 0x0039 */ > > .control_value = LE_CACHEABILITY(LE_UC) | > > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > > index c17d63d..a5d116f 100644 > > --- a/include/uapi/drm/i915_drm.h > > +++ b/include/uapi/drm/i915_drm.h > > @@ -62,6 +62,30 @@ extern "C" { > > #define I915_ERROR_UEVENT "ERROR" > > #define I915_RESET_UEVENT "RESET" > > > > +/* > > + * MOCS indexes used for GPU surfaces, defining the cacheability of the > > + * surface data and the coherency for this data wrt. CPU vs. GPU accesses. > > + */ > > +enum i915_mocs_table_index { > > + /* > > + * Not cached anywhere, coherency between CPU and GPU accesses is > > + * guaranteed. > > + */ > > + I915_MOCS_UNCACHED, > > + /* > > + * Cacheability and coherency controlled by the kernel automatically > > + * based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current > > + * usage of the surface (used for display scanout or not). > > + */ > > + I915_MOCS_AUTO, > > So PTE. Can change it. > > + /* > > + * Cached in all GPU caches available on the platform. > > + * Coherency between CPU and GPU accesses to the surface is not > > + * guaranteed without extra synchronization. > > + */ > > + I915_MOCS_CACHED, > > So pretty useless for its current usage then. This is how it's used in Mesa atm where there is no need for coherency. Beignet and Libva don't use this entry atm. --Imre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✗ Ro.CI.BAT: warning for HuC Loading Patches (rev2)
== Series Details == Series: HuC Loading Patches (rev2) URL : https://patchwork.freedesktop.org/series/9011/ State : warning == Summary == Series 9011v2 HuC Loading Patches http://patchwork.freedesktop.org/api/1.0/series/9011/revisions/2/mbox Test drv_hangman: Subgroup error-state-basic: pass -> DMESG-WARN (fi-skl-i5-6260u) pass -> DMESG-WARN (fi-skl-i7-6700k) Test drv_module_reload_basic: pass -> DMESG-WARN (ro-byt-n2820) pass -> DMESG-WARN (ro-bdw-i7-5600u) pass -> DMESG-WARN (fi-skl-i5-6260u) pass -> DMESG-WARN (fi-skl-i7-6700k) Test gem_exec_flush: Subgroup basic-batch-kernel-default-cmd: fail -> PASS (ro-byt-n2820) Test gem_exec_suspend: Subgroup basic-s3: pass -> DMESG-WARN (fi-skl-i5-6260u) pass -> DMESG-WARN (fi-skl-i7-6700k) Test gem_ringfill: Subgroup basic-default-hang: pass -> DMESG-WARN (fi-skl-i5-6260u) pass -> DMESG-WARN (fi-skl-i7-6700k) Test kms_pipe_crc_basic: Subgroup hang-read-crc-pipe-a: pass -> DMESG-WARN (fi-skl-i5-6260u) pass -> DMESG-WARN (fi-skl-i7-6700k) Subgroup hang-read-crc-pipe-b: pass -> DMESG-WARN (fi-skl-i5-6260u) pass -> DMESG-WARN (fi-skl-i7-6700k) Subgroup hang-read-crc-pipe-c: pass -> DMESG-WARN (fi-skl-i5-6260u) pass -> DMESG-WARN (fi-skl-i7-6700k) Subgroup suspend-read-crc-pipe-a: dmesg-warn -> SKIP (ro-bdw-i5-5250u) pass -> DMESG-WARN (fi-skl-i5-6260u) pass -> DMESG-WARN (fi-skl-i7-6700k) Subgroup suspend-read-crc-pipe-b: pass -> SKIP (fi-skl-i5-6260u) pass -> DMESG-WARN (fi-skl-i7-6700k) Subgroup suspend-read-crc-pipe-c: pass -> DMESG-WARN (fi-skl-i5-6260u) pass -> DMESG-WARN (fi-skl-i7-6700k) fi-hsw-i7-4770k total:229 pass:196 dwarn:0 dfail:0 fail:0 skip:33 fi-kbl-qkkr total:229 pass:161 dwarn:28 dfail:1 fail:0 skip:39 fi-skl-i5-6260u total:229 pass:194 dwarn:9 dfail:0 fail:0 skip:26 fi-skl-i7-6700k total:229 pass:180 dwarn:10 dfail:0 fail:0 skip:39 fi-snb-i7-2600 total:229 pass:176 dwarn:0 dfail:0 fail:0 skip:53 ro-bdw-i5-5250u total:229 pass:204 dwarn:2 dfail:1 fail:0 skip:22 ro-bdw-i7-5557U total:229 pass:204 dwarn:1 dfail:1 fail:0 skip:23 ro-bdw-i7-5600u total:229 pass:189 dwarn:1 dfail:1 fail:0 skip:38 ro-byt-n2820 total:229 pass:180 dwarn:1 dfail:1 fail:2 skip:45 ro-hsw-i3-4010u total:229 pass:197 dwarn:0 dfail:1 fail:0 skip:31 ro-hsw-i7-4770r total:229 pass:197 dwarn:0 dfail:1 fail:0 skip:31 ro-ilk-i7-620lm total:229 pass:157 dwarn:0 dfail:1 fail:1 skip:70 ro-ilk1-i5-650 total:224 pass:157 dwarn:0 dfail:1 fail:1 skip:65 ro-ivb-i7-3770 total:229 pass:188 dwarn:0 dfail:1 fail:0 skip:40 ro-skl3-i5-6260u total:229 pass:208 dwarn:1 dfail:1 fail:0 skip:19 ro-snb-i7-2620M total:229 pass:179 dwarn:0 dfail:1 fail:1 skip:48 ro-bsw-n3050 failed to connect after reboot Results at /archive/results/CI_IGT_test/RO_Patchwork_1360/ 8f8910b drm-intel-nightly: 2016y-07m-01d-11h-59m-32s UTC integration manifest a3e90b6 drm/i915/huc: Add BXT HuC Loading Support 3c8f963 drm/i915/huc: Support HuC authentication b889fbd drm/i915/huc: Add debugfs for HuC loading status check cf95443 drm/i915/huc: Add HuC fw loading support a9dada1 drm/i915/huc: Unified css_header struct for GuC and HuC f0188e1 drm/i915/guc: Make the GuC fw loading helper functions general ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 08/20] drm/i915: Use HWS for seqno tracking everywhere
On 01/07/16 12:22, Chris Wilson wrote: By using the same address for storing the HWS on every platform, we can remove the platform specific vfuncs and reduce the get-seqno routine to a single read of a cached memory location. v2: Fix semaphore_passed() to look at the signaling engine (not the waiter's) Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 6 +-- drivers/gpu/drm/i915/i915_drv.h | 4 +- drivers/gpu/drm/i915/i915_gpu_error.c| 2 +- drivers/gpu/drm/i915/i915_irq.c | 4 +- drivers/gpu/drm/i915/i915_trace.h| 2 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 4 +- drivers/gpu/drm/i915/intel_lrc.c | 26 + drivers/gpu/drm/i915/intel_ringbuffer.c | 65 +--- drivers/gpu/drm/i915/intel_ringbuffer.h | 7 ++-- 9 files changed, 34 insertions(+), 86 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 33e5540e7229..586cd8c65146 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -662,7 +662,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) engine->name, i915_gem_request_get_seqno(work->flip_queued_req), dev_priv->next_seqno, - engine->get_seqno(engine), + intel_engine_get_seqno(engine), i915_gem_request_completed(work->flip_queued_req)); } else seq_printf(m, "Flip not associated with any ring\n"); @@ -792,7 +792,7 @@ static void i915_ring_seqno_info(struct seq_file *m, struct rb_node *rb; seq_printf(m, "Current sequence (%s): %x\n", - engine->name, engine->get_seqno(engine)); + engine->name, intel_engine_get_seqno(engine)); seq_printf(m, "Current user interrupts (%s): %x\n", engine->name, READ_ONCE(engine->user_interrupts)); @@ -1420,7 +1420,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) for_each_engine_id(engine, dev_priv, id) { acthd[id] = intel_ring_get_active_head(engine); - seqno[id] = engine->get_seqno(engine); + seqno[id] = intel_engine_get_seqno(engine); } i915_get_extra_instdone(dev_priv, instdone); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0ea69c5ecc8b..5a1e8e056ee5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3289,13 +3289,13 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2) static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req) { - return i915_seqno_passed(req->engine->get_seqno(req->engine), + return i915_seqno_passed(intel_engine_get_seqno(req->engine), req->previous_seqno); } static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req) { - return i915_seqno_passed(req->engine->get_seqno(req->engine), + return i915_seqno_passed(intel_engine_get_seqno(req->engine), req->seqno); } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index fcc7606fc008..95f8d4d2d6c9 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -984,7 +984,7 @@ static void i915_record_ring_state(struct drm_i915_private *dev_priv, ering->waiting = intel_engine_has_waiter(engine); ering->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); ering->acthd = intel_ring_get_active_head(engine); - ering->seqno = engine->get_seqno(engine); + ering->seqno = intel_engine_get_seqno(engine); ering->last_seqno = engine->last_submitted_seqno; ering->start = I915_READ_START(engine); ering->head = I915_READ_HEAD(engine); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index a11ab00cdee0..7c379afcff2f 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2952,7 +2952,7 @@ static int semaphore_passed(struct intel_engine_cs *engine) if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES) return -1; - if (i915_seqno_passed(signaller->get_seqno(signaller), seqno)) + if (i915_seqno_passed(intel_engine_get_seqno(signaller), seqno)) return 1; /* cursory check for an unkickable deadlock */ @@ -3140,7 +3140,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) engine->irq_seqno_barrier(engine); acthd = intel_ring_get_active_head(engine); - seqno = engine->get
Re: [Intel-gfx] [PATCH v2 1/4] drm: Helper for lspcon in drm_dp_dual_mode
On Thu, Jun 30, 2016 at 10:58 PM, Sharma, Shashank wrote: > Thanks for the review Rodrigo. My comments inline. > > Regards > Shashank > > > On 7/1/2016 3:46 AM, Rodrigo Vivi wrote: >> >> On Tue, Jun 21, 2016 at 8:00 AM, Shashank Sharma >> wrote: >>> >>> This patch adds lspcon support in dp_dual_mode helper. >>> lspcon is essentially a dp->hdmi dongle with dual personality. >>> >>> LS mode: It works as a passive dongle, by level shifting DP++ >>> signals to HDMI signals, in LS mode. >>> PCON mode: It works as a protocol converter active dongle >>> in pcon mode, by converting DP++ outputs to HDMI 2.0 outputs. >>> >>> This patch adds support for lspcon detection and mode set >>> switch operations, as a dp dual mode dongle. >>> >>> v2: Addressed review comments from Ville >>> - add adaptor id for lspcon devices (0x08), use it to identify lspcon >>> - change function names >>>old: drm_lspcon_get_current_mode/drm_lspcon_change_mode >>>new: drm_lspcon_get_mode/drm_lspcon_set_mode >>> - change drm_lspcon_get_mode type to int, to match >>>drm_dp_dual_mode_get_tmds_output >>> - change 'err' to 'ret' to match the rest of the functions >>> - remove pointless typecasting during call to dual_mode_read >>> - fix the but while setting value of data, while writing lspcon mode >>> - fix indentation >>> - change mdelay(10) -> msleep(10) >>> - return ETIMEDOUT instead of EFAULT, when lspcon mode change times out >>> - Add an empty line to separate std regs macros and lspcon regs macros >>>Indent bit definition >>> >>> Signed-off-by: Shashank Sharma >>> --- >>> drivers/gpu/drm/drm_dp_dual_mode_helper.c | 103 >>> ++ >>> include/drm/drm_dp_dual_mode_helper.h | 25 >>> 2 files changed, 128 insertions(+) >>> >>> diff --git a/drivers/gpu/drm/drm_dp_dual_mode_helper.c >>> b/drivers/gpu/drm/drm_dp_dual_mode_helper.c >>> index a7b2a75..404e715 100644 >>> --- a/drivers/gpu/drm/drm_dp_dual_mode_helper.c >>> +++ b/drivers/gpu/drm/drm_dp_dual_mode_helper.c >>> @@ -148,6 +148,14 @@ static bool is_type2_adaptor(uint8_t adaptor_id) >>>DP_DUAL_MODE_REV_TYPE2); >>> } >>> >>> +bool is_lspcon_adaptor(const char hdmi_id[DP_DUAL_MODE_HDMI_ID_LEN], >>> + const uint8_t adaptor_id) >>> +{ >>> + return is_hdmi_adaptor(hdmi_id) && >>> + (adaptor_id == (DP_DUAL_MODE_REV_TYPE2 | >> >> >> DP_DUAL_MODE_REV_TYPE2 = 0 so useless here and confusing. > > This was a review comment from Ville, on the last patch. I think he > suggested for the better readability of the code. >> >> >>> + DP_DUAL_MODE_TYPE_TYPE2 | >>> DP_DUAL_MODE_TYPE_LSPCON)); >> >> >> Also this is confusing and took me a while to uderstand that LSPCON is >> the type2 with DPCD > > I know, due to LSPCON's dual personality, its complicated to understand. I > tried to cover some theory, in the cover letter, can add some here too: > LSPCON is a dp->hdmi adapter which can operate in two modes > Passive dongle mode / LS mode: in this mode, it acts as type2 dp dual mode > adapter (no DPCD readable here) > Active mode / PCON mode: in this mode, it acts as active DP++->HDMI2.0 > protocol convertor dongle, and allows DPCD read/write like a DP++ display. >> >> >> so my suggestion is to define DP_DUAL_MODE_TYPE_HAS_DPCD (1<<3) with a >> comment this is defined by LSPCON docs since this is not part of VESA >> DP Dual Mode that only defines the Type2 = 0xA0. >> >> so you could use >> TYPE2 | HAS_DPCD = LSPCON. >> > Sure, got it. Will change it like this. > >>> +} >>> + >>> /** >>>* drm_dp_dual_mode_detect - Identify the DP dual mode adaptor >>>* @adapter: I2C adapter for the DDC bus >>> @@ -203,6 +211,8 @@ enum drm_dp_dual_mode_type >>> drm_dp_dual_mode_detect(struct i2c_adapter *adapter) >>> ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_ADAPTOR_ID, >>> &adaptor_id, sizeof(adaptor_id)); >>> if (ret == 0) { >>> + if (is_lspcon_adaptor(hdmi_id, adaptor_id)) >>> + return DRM_DP_DUAL_MODE_LSPCON; >>> if (is_type2_adaptor(adaptor_id)) { >>> if (is_hdmi_adaptor(hdmi_id)) >>> return DRM_DP_DUAL_MODE_TYPE2_HDMI; >>> @@ -364,3 +374,96 @@ const char *drm_dp_get_dual_mode_type_name(enum >>> drm_dp_dual_mode_type type) >>> } >>> } >>> EXPORT_SYMBOL(drm_dp_get_dual_mode_type_name); >>> + >>> +/** >>> + * drm_lspcon_get_current_mode: Get LSPCON's current mode of operation >>> by >>> + * by reading offset (0x80, 0x41) >>> + * @i2c_adapter: I2C-over-aux adapter >>> + * @current_mode: out vaiable, current lspcon mode of operation >>> + * >>> + * Returns: >>> + * 0 on success, sets the current_mode value to appropriate mode >>> + * -error on failure >>> + */ >>> +int drm_lspcon_get_mode(struct i2c_adapter *adapter, >>> + enum drm_lspcon_mode *current_mode) >>> +{ >>> + u8 data; >>
Re: [Intel-gfx] [PATCH 08/20] drm/i915: Use HWS for seqno tracking everywhere
On Fri, Jul 01, 2016 at 03:09:16PM +0100, Tvrtko Ursulin wrote: > Looks OK if Gen5 is happy about it. Happier than it has been for years. Still trying to beat some odd coherency issues that upset igt (not introduced by these patches I hasten to add), but we may just about get it working in time for the last power supply to die. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 12/20] drm/i915: Add a delay between interrupt and inspecting the final seqno (ilk)
On 01/07/16 12:22, Chris Wilson wrote: On Ironlake, there is no command nor register to ensure that the write from a MI_STORE command is completed (and coherent on the CPU) before the command parser continues. This means that the ordering between the seqno Command *streamer* I think. (More instances below.) write and the subsequent user interrupt is undefined (like gen6+). So to ensure that the seqno write is completed after the final user interrupt we need to delay the read sufficiently to allow the write to complete. This delay is undefined by the bspec, and empirically requires 75us even though a register read combined with a clflush is less than 500ns. Hence, the delay is due to an on-chip buffer rather than the latency of the write to memory. Note that the render ring controls this by filling the PIPE_CONTROL fifo with stalling commands that force the earliest pipe-control with the seqno to be completed before the command parser continues. Given that we need a barrier operation for BSD, we may as well forgo the extra per-batch latency by using a common per-interrupt barrier. Studying the impact of adding the usleep shows that in both sequences of and individual synchronous no-op batches is negligible for the media engine (where the write now is unordered with the interrupt). Converting the render engine over from the current glutton of pie-controls over to the per-interrupt delays speeds up both the sequential and individual synchronous no-ops by 20% and 60%, respectively. This speed up holds even when looking at the throughput of small copies (4KiB->4MiB), both serial and synchronous, by about 20%. This is because despite adding a significant delay to the interrupt, in all likelihood we will see the seqno write without having to apply the barrier (only in the rare corner cases where the write is delayed on the last required is the delay necessary). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94307 Testcase: igt/gem_sync #ilk Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_irq.c | 10 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 86 - 2 files changed, 23 insertions(+), 73 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7c379afcff2f..be7f0b9b27e0 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1264,8 +1264,7 @@ static void ivybridge_parity_error_irq_handler(struct drm_i915_private *dev_priv static void ilk_gt_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) { - if (gt_iir & - (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT)) + if (gt_iir & GT_RENDER_USER_INTERRUPT) notify_ring(&dev_priv->engine[RCS]); if (gt_iir & ILK_BSD_USER_INTERRUPT) notify_ring(&dev_priv->engine[VCS]); @@ -1274,9 +1273,7 @@ static void ilk_gt_irq_handler(struct drm_i915_private *dev_priv, static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) { - - if (gt_iir & - (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT)) + if (gt_iir & GT_RENDER_USER_INTERRUPT) notify_ring(&dev_priv->engine[RCS]); if (gt_iir & GT_BSD_USER_INTERRUPT) notify_ring(&dev_priv->engine[VCS]); @@ -3601,8 +3598,7 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev) gt_irqs |= GT_RENDER_USER_INTERRUPT; if (IS_GEN5(dev)) { - gt_irqs |= GT_RENDER_PIPECTL_NOTIFY_INTERRUPT | - ILK_BSD_USER_INTERRUPT; + gt_irqs |= ILK_BSD_USER_INTERRUPT; } else { gt_irqs |= GT_BLT_USER_INTERRUPT | GT_BSD_USER_INTERRUPT; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index f89b1797b465..d919e72f1328 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1593,67 +1593,22 @@ gen6_ring_sync(struct drm_i915_gem_request *waiter_req, return 0; } -#define PIPE_CONTROL_FLUSH(ring__, addr__) \ -do { \ - intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |\ -PIPE_CONTROL_DEPTH_STALL); \ - intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \ - intel_ring_emit(ring__, 0); \ - intel_ring_emit(ring__, 0); \ -} while (0) - -static int -pc_render_add_request(struct drm_i915_gem_request *req) +static void +gen5_seqno_barrier(struct intel_engine_cs *ring) { - struct intel_engine_cs *engine = req->engine; - u32 addr =
[Intel-gfx] [PATCH v4 3/3] drm/i915: Give proper names to MOCS entries
The purpose for each MOCS entry isn't well defined atm. Defining these is important to remove any uncertainty about the use of these entries for example in terms of performance and GPU/CPU coherency. Suggested by Ville. v4: - Rename I915_MOCS_AUTO to I915_MOCS_PTE. (Chris) CC: Rong R Yang CC: Yakui Zhao CC: Ville Syrjälä CC: Chris Wilson Signed-off-by: Imre Deak --- drivers/gpu/drm/i915/intel_mocs.c | 13 +++-- include/uapi/drm/i915_drm.h | 24 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 927825f..2280c32 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -97,7 +97,8 @@ struct drm_i915_mocs_table { * end. */ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { - { /* 0x0009 */ + [I915_MOCS_UNCACHED] = { + /* 0x0009 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | @@ -106,7 +107,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* 0x0010 */ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), }, - { + [I915_MOCS_PTE] = { /* 0x0038 */ .control_value = LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -115,7 +116,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* 0x0030 */ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), }, - { + [I915_MOCS_CACHED] = { /* 0x003b */ .control_value = LE_CACHEABILITY(LE_WB) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -128,7 +129,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* NOTE: the LE_TGT_CACHE is not used on Broxton */ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { - { + [I915_MOCS_UNCACHED] = { /* 0x0009 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -138,7 +139,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { /* 0x0010 */ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), }, - { + [I915_MOCS_PTE] = { /* 0x0038 */ .control_value = LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -148,7 +149,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { /* 0x0030 */ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), }, - { + [I915_MOCS_CACHED] = { /* 0x0039 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index c17d63d..e2a6969 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -62,6 +62,30 @@ extern "C" { #define I915_ERROR_UEVENT "ERROR" #define I915_RESET_UEVENT "RESET" +/* + * MOCS indexes used for GPU surfaces, defining the cacheability of the + * surface data and the coherency for this data wrt. CPU vs. GPU accesses. + */ +enum i915_mocs_table_index { + /* +* Not cached anywhere, coherency between CPU and GPU accesses is +* guaranteed. +*/ + I915_MOCS_UNCACHED, + /* +* Cacheability and coherency controlled by the kernel automatically +* based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current +* usage of the surface (used for display scanout or not). +*/ + I915_MOCS_PTE, + /* +* Cached in all GPU caches available on the platform. +* Coherency between CPU and GPU accesses to the surface is not +* guaranteed without extra synchronization. +*/ + I915_MOCS_CACHED, +}; + /* Each region is a minimum of 16k, and there are at most 255 of them. */ #define I915_NR_TEX_REGIONS 255/* table size 2k - maximum due to use -- 2.5.0 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 12/20] drm/i915: Add a delay between interrupt and inspecting the final seqno (ilk)
On Fri, Jul 01, 2016 at 03:27:40PM +0100, Tvrtko Ursulin wrote: > > On 01/07/16 12:22, Chris Wilson wrote: > >On Ironlake, there is no command nor register to ensure that the write > >from a MI_STORE command is completed (and coherent on the CPU) before the > >command parser continues. This means that the ordering between the seqno > > Command *streamer* I think. (More instances below.) Yeah, probably better to avoid confusing people into thinking about our own command parser. I just had in mind that the execution is separate from the streaming (and fetching is yet another phase). > >@@ -3087,9 +3040,10 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) > > } else { > > engine->mmio_base = BSD_RING_BASE; > > engine->flush = bsd_ring_flush; > >-if (IS_GEN5(dev_priv)) > >+if (IS_GEN5(dev_priv)) { > > engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; > >-else > >+engine->irq_seqno_barrier = gen5_seqno_barrier; > > This is already set in common setup AFAICS. Yes, an oversight after sending the tidying patch earlier. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 18/20] drm/i915: Move the get/put irq locking into the caller
On 01/07/16 12:22, Chris Wilson wrote: With only a single callsite for intel_engine_cs->irq_get and ->irq_put, we can reduce the code size by moving the common preamble into the caller, and we can also eliminate the reference counting. For completeness, as we are no longer doing reference counting on irq, rename the get/put vfunctions to enable/disable respectively and are able to review the use of posting reads. We only require the serialisation with hardware when enabling the interrupt (i.e. so we cannot miss an interrupt by going to sleep before the hardware truly enables it). Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_irq.c | 8 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 10 +- drivers/gpu/drm/i915/intel_lrc.c | 34 ++--- drivers/gpu/drm/i915/intel_ringbuffer.c | 237 +-- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 +- 5 files changed, 92 insertions(+), 202 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7724bae27bcf..be25b7bdacfe 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -259,12 +259,12 @@ static void ilk_update_gt_irq(struct drm_i915_private *dev_priv, dev_priv->gt_irq_mask &= ~interrupt_mask; dev_priv->gt_irq_mask |= (~enabled_irq_mask & interrupt_mask); I915_WRITE(GTIMR, dev_priv->gt_irq_mask); - POSTING_READ(GTIMR); } void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask) { ilk_update_gt_irq(dev_priv, mask, mask); + POSTING_READ_FW(GTIMR); } void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask) @@ -2819,9 +2819,9 @@ ring_idle(struct intel_engine_cs *engine, u32 seqno) } static bool -ipehr_is_semaphore_wait(struct drm_i915_private *dev_priv, u32 ipehr) +ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr) { - if (INTEL_GEN(dev_priv) >= 8) { + if (INTEL_GEN(engine->i915) >= 8) { return (ipehr >> 23) == 0x1c; } else { ipehr &= ~MI_SEMAPHORE_SYNC_MASK; @@ -2892,7 +2892,7 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) return NULL; ipehr = I915_READ(RING_IPEHR(engine->mmio_base)); - if (!ipehr_is_semaphore_wait(engine->i915, ipehr)) + if (!ipehr_is_semaphore_wait(engine, ipehr)) return NULL; /* diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 3b8313b87ce4..28bc72b601b8 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -50,12 +50,18 @@ static void irq_enable(struct intel_engine_cs *engine) * just in case. */ engine->irq_posted = true; - WARN_ON(!engine->irq_get(engine)); + + spin_lock_irq(&engine->i915->irq_lock); + engine->irq_enable(engine); + spin_unlock_irq(&engine->i915->irq_lock); } static void irq_disable(struct intel_engine_cs *engine) { - engine->irq_put(engine); + spin_lock_irq(&engine->i915->irq_lock); + engine->irq_disable(engine); + spin_unlock_irq(&engine->i915->irq_lock); + engine->irq_posted = false; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index f1a01137334c..380175149916 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1617,36 +1617,18 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, return 0; } -static bool gen8_logical_ring_get_irq(struct intel_engine_cs *engine) +static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - unsigned long flags; - - if (WARN_ON(!intel_irqs_enabled(dev_priv))) - return false; - - spin_lock_irqsave(&dev_priv->irq_lock, flags); - if (engine->irq_refcount++ == 0) { - I915_WRITE_IMR(engine, - ~(engine->irq_enable_mask | engine->irq_keep_mask)); - POSTING_READ(RING_IMR(engine->mmio_base)); - } - spin_unlock_irqrestore(&dev_priv->irq_lock, flags); - - return true; + I915_WRITE_IMR(engine, + ~(engine->irq_enable_mask | engine->irq_keep_mask)); + POSTING_READ_FW(RING_IMR(engine->mmio_base)); } -static void gen8_logical_ring_put_irq(struct intel_engine_cs *engine) +static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - unsigned long flags; - - spin_lock_irqsave(&dev_priv->irq_lock, flags); - if (--engine->irq_refcount == 0) { - I915_WRITE_IMR(engine, ~engine->irq_keep_mask); - POSTING_READ(RING_IMR(engine->mmio_base)); - } - spin_unlock_irqrestore(&dev_priv->irq_lock, fla
[Intel-gfx] [PATCH] drm/i915/bxt: Remove the preliminary_hw_support flag
Broxton is now part of CI which doesn't indicate any major problems so enable the driver by default. Signed-off-by: Imre Deak --- drivers/gpu/drm/i915/i915_pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index a7f8f4f..949c016 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -331,7 +331,6 @@ static const struct intel_device_info intel_skylake_gt3_info = { }; static const struct intel_device_info intel_broxton_info = { - .is_preliminary = 1, .is_broxton = 1, .gen = 9, .need_gfx_hws = 1, .has_hotplug = 1, -- 2.5.0 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915/bxt: Remove the preliminary_hw_support flag
Reviewed-by: Rodrigo Vivi On Fri, Jul 1, 2016 at 7:40 AM, Imre Deak wrote: > Broxton is now part of CI which doesn't indicate any major problems so > enable the driver by default. > > Signed-off-by: Imre Deak > --- > drivers/gpu/drm/i915/i915_pci.c | 1 - > 1 file changed, 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c > index a7f8f4f..949c016 100644 > --- a/drivers/gpu/drm/i915/i915_pci.c > +++ b/drivers/gpu/drm/i915/i915_pci.c > @@ -331,7 +331,6 @@ static const struct intel_device_info > intel_skylake_gt3_info = { > }; > > static const struct intel_device_info intel_broxton_info = { > - .is_preliminary = 1, > .is_broxton = 1, > .gen = 9, > .need_gfx_hws = 1, .has_hotplug = 1, > -- > 2.5.0 > > ___ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Rodrigo Vivi Blog: http://blog.vivi.eng.br ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✗ Ro.CI.BAT: warning for drm/i915/bxt: Fix performance due to bogus MOCS entry
== Series Details == Series: drm/i915/bxt: Fix performance due to bogus MOCS entry URL : https://patchwork.freedesktop.org/series/9377/ State : warning == Summary == Series 9377v1 drm/i915/bxt: Fix performance due to bogus MOCS entry http://patchwork.freedesktop.org/api/1.0/series/9377/revisions/1/mbox Test gem_exec_flush: Subgroup basic-batch-kernel-default-cmd: fail -> PASS (ro-byt-n2820) Test kms_pipe_crc_basic: Subgroup suspend-read-crc-pipe-a: skip -> DMESG-WARN (ro-bdw-i5-5250u) Subgroup suspend-read-crc-pipe-c: skip -> PASS (fi-skl-i5-6260u) fi-kbl-qkkr total:229 pass:160 dwarn:29 dfail:0 fail:0 skip:40 fi-skl-i5-6260u total:229 pass:204 dwarn:0 dfail:0 fail:0 skip:25 fi-skl-i7-6700k total:229 pass:190 dwarn:0 dfail:0 fail:0 skip:39 ro-bdw-i5-5250u total:229 pass:204 dwarn:4 dfail:1 fail:0 skip:20 ro-bdw-i7-5557U total:229 pass:204 dwarn:1 dfail:1 fail:0 skip:23 ro-bdw-i7-5600u total:229 pass:190 dwarn:0 dfail:1 fail:0 skip:38 ro-bsw-n3050 total:229 pass:177 dwarn:0 dfail:1 fail:2 skip:49 ro-byt-n2820 total:229 pass:181 dwarn:0 dfail:1 fail:2 skip:45 ro-hsw-i3-4010u total:229 pass:197 dwarn:0 dfail:1 fail:0 skip:31 ro-hsw-i7-4770r total:229 pass:197 dwarn:0 dfail:1 fail:0 skip:31 ro-ilk-i7-620lm total:229 pass:157 dwarn:0 dfail:1 fail:1 skip:70 ro-ilk1-i5-650 total:224 pass:157 dwarn:0 dfail:1 fail:1 skip:65 ro-ivb-i7-3770 total:229 pass:188 dwarn:0 dfail:1 fail:0 skip:40 ro-skl3-i5-6260u total:229 pass:208 dwarn:1 dfail:1 fail:0 skip:19 ro-snb-i7-2620M total:229 pass:179 dwarn:0 dfail:1 fail:1 skip:48 fi-hsw-i7-4770k failed to connect after reboot Results at /archive/results/CI_IGT_test/RO_Patchwork_1361/ a755d6c drm-intel-nightly: 2016y-07m-01d-13h-54m-24s UTC integration manifest dedf573 drm/i915: Give proper names to MOCS entries ab3eabf drm/i915/bxt: Fix inadvertent CPU snooping due to incorrect MOCS config c73e12a drm/i915/gen9: Clean up MOCS table definitions ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 05/20] drm/i915: Separate GPU hang waitqueue from advance
On 01/07/16 12:22, Chris Wilson wrote: Currently __i915_wait_request uses a per-engine wait_queue_t for the dual purpose of waking after the GPU advances or for waking after an error. In the future, we may add even more wake sources and require greater separation, but for now we can conceptually simplify wakeups by separating the two sources. In particular, this allows us to use different wait-queues (e.g. one on the engine advancement, a global one for errors and one on each requests) without any hassle. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 6 ++ drivers/gpu/drm/i915/i915_gem.c | 5 + drivers/gpu/drm/i915/i915_irq.c | 19 --- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4948c90c9bd4..0d0e4ac4dadb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1410,6 +1410,12 @@ struct i915_gpu_error { #define I915_WEDGED (1 << 31) /** +* Waitqueue to signal when a hang is detected. Used to for waiters +* to release the struct_mutex for the reset to procede. +*/ + wait_queue_head_t wait_queue; + + /** * Waitqueue to signal when the reset has completed. Used by clients * that wait for dev_priv->mm.wedged to settle. */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 34f724cc40b8..b607493a8d3a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1455,6 +1455,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, const bool irq_test_in_progress = ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_engine_flag(engine); int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; + DEFINE_WAIT(reset); DEFINE_WAIT(wait); unsigned long timeout_expire; s64 before = 0; /* Only to silence a compiler warning. */ @@ -1499,6 +1500,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, goto out; } + add_wait_queue(&dev_priv->gpu_error.wait_queue, &reset); for (;;) { struct timer_list timer; @@ -1551,6 +1553,8 @@ int __i915_wait_request(struct drm_i915_gem_request *req, destroy_timer_on_stack(&timer); } } + remove_wait_queue(&dev_priv->gpu_error.wait_queue, &reset); + if (!irq_test_in_progress) engine->irq_put(engine); @@ -5281,6 +5285,7 @@ i915_gem_load_init(struct drm_device *dev) i915_gem_retire_work_handler); INIT_DELAYED_WORK(&dev_priv->mm.idle_work, i915_gem_idle_work_handler); + init_waitqueue_head(&dev_priv->gpu_error.wait_queue); init_waitqueue_head(&dev_priv->gpu_error.reset_queue); dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 83f40baeb1f3..6c17596d75dd 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2488,11 +2488,8 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg) return ret; } -static void i915_error_wake_up(struct drm_i915_private *dev_priv, - bool reset_completed) +static void i915_error_wake_up(struct drm_i915_private *dev_priv) { - struct intel_engine_cs *engine; - /* * Notify all waiters for GPU completion events that reset state has * been changed, and that they need to restart their wait after @@ -2501,18 +2498,10 @@ static void i915_error_wake_up(struct drm_i915_private *dev_priv, */ /* Wake up __wait_seqno, potentially holding dev->struct_mutex. */ - for_each_engine(engine, dev_priv) - wake_up_all(&engine->irq_queue); + wake_up_all(&dev_priv->gpu_error.wait_queue); /* Wake up intel_crtc_wait_for_pending_flips, holding crtc->mutex. */ wake_up_all(&dev_priv->pending_flip_queue); - - /* -* Signal tasks blocked in i915_gem_wait_for_error that the pending -* reset state is cleared. -*/ - if (reset_completed) - wake_up_all(&dev_priv->gpu_error.reset_queue); } /** @@ -2577,7 +2566,7 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv) * Note: The wake_up also serves as a memory barrier so that * waiters see the update value of the reset counter atomic_t. */ - i915_error_wake_up(dev_priv, true); + wake_up_all(&dev_priv->gpu_error.reset_queue); } } @@ -2714,7 +2703,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv, * ensure that the waiters see the updated value of the reset
[Intel-gfx] ✗ Ro.CI.BAT: warning for drm/i915/bxt: Fix performance due to bogus MOCS entry (rev2)
== Series Details == Series: drm/i915/bxt: Fix performance due to bogus MOCS entry (rev2) URL : https://patchwork.freedesktop.org/series/9377/ State : warning == Summary == Series 9377v2 drm/i915/bxt: Fix performance due to bogus MOCS entry http://patchwork.freedesktop.org/api/1.0/series/9377/revisions/2/mbox Test gem_exec_flush: Subgroup basic-batch-kernel-default-cmd: fail -> PASS (ro-byt-n2820) Test kms_pipe_crc_basic: Subgroup nonblocking-crc-pipe-b: pass -> SKIP (fi-skl-i5-6260u) Subgroup read-crc-pipe-c-frame-sequence: pass -> SKIP (fi-skl-i5-6260u) Subgroup suspend-read-crc-pipe-b: dmesg-warn -> SKIP (ro-bdw-i5-5250u) Subgroup suspend-read-crc-pipe-c: skip -> PASS (fi-skl-i5-6260u) fi-hsw-i7-4770k total:229 pass:196 dwarn:0 dfail:0 fail:0 skip:33 fi-kbl-qkkr total:229 pass:160 dwarn:29 dfail:0 fail:0 skip:40 fi-skl-i5-6260u total:229 pass:202 dwarn:0 dfail:0 fail:0 skip:27 fi-skl-i7-6700k total:229 pass:190 dwarn:0 dfail:0 fail:0 skip:39 ro-bdw-i5-5250u total:229 pass:204 dwarn:2 dfail:1 fail:0 skip:22 ro-bdw-i7-5557U total:229 pass:204 dwarn:1 dfail:1 fail:0 skip:23 ro-bdw-i7-5600u total:229 pass:190 dwarn:0 dfail:1 fail:0 skip:38 ro-bsw-n3050 total:229 pass:177 dwarn:0 dfail:1 fail:2 skip:49 ro-byt-n2820 total:229 pass:181 dwarn:0 dfail:1 fail:2 skip:45 ro-hsw-i7-4770r total:229 pass:197 dwarn:0 dfail:1 fail:0 skip:31 ro-ilk-i7-620lm total:229 pass:157 dwarn:0 dfail:1 fail:1 skip:70 ro-ilk1-i5-650 total:224 pass:157 dwarn:0 dfail:1 fail:1 skip:65 ro-ivb-i7-3770 total:229 pass:188 dwarn:0 dfail:1 fail:0 skip:40 ro-skl3-i5-6260u total:229 pass:208 dwarn:1 dfail:1 fail:0 skip:19 ro-snb-i7-2620M total:229 pass:179 dwarn:0 dfail:1 fail:1 skip:48 ro-hsw-i3-4010u failed to connect after reboot Results at /archive/results/CI_IGT_test/RO_Patchwork_1362/ a755d6c drm-intel-nightly: 2016y-07m-01d-13h-54m-24s UTC integration manifest 5a7b0a5 drm/i915: Give proper names to MOCS entries 6a55933 drm/i915/bxt: Fix inadvertent CPU snooping due to incorrect MOCS config 9c5f6a5 drm/i915/gen9: Clean up MOCS table definitions ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 02/20] drm/i915: Delay queuing hangcheck to wait-request
On 01/07/16 12:22, Chris Wilson wrote: We can forgo queuing the hangcheck from the start of every request to until we wait upon a request. This reduces the overhead of every request, but may increase the latency of detecting a hang. Howeever, if nothing every waits upon a hang, did it ever hang? It also improves the robustness of the wait-request by ensuring that the hangchecker is indeed running before we sleep indefinitely (and thereby ensuring that we never actually sleep forever waiting for a dead GPU). Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 9 + drivers/gpu/drm/i915/i915_irq.c | 10 -- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1d9878258103..34f724cc40b8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1532,6 +1532,9 @@ int __i915_wait_request(struct drm_i915_gem_request *req, break; } + /* Ensure that even if the GPU hangs, we get woken up. */ + i915_queue_hangcheck(dev_priv); + timer.function = NULL; if (timeout || missed_irq(dev_priv, engine)) { unsigned long expire; @@ -2919,8 +2922,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, /* Not allowed to fail! */ WARN(ret, "emit|add_request failed: %d!\n", ret); - i915_queue_hangcheck(engine->i915); - queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, round_jiffies_up_relative(HZ)); @@ -3264,8 +3265,8 @@ i915_gem_retire_requests(struct drm_i915_private *dev_priv) if (idle) mod_delayed_work(dev_priv->wq, - &dev_priv->mm.idle_work, - msecs_to_jiffies(100)); +&dev_priv->mm.idle_work, +msecs_to_jiffies(100)); return idle; } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 4378a659d962..5614582ca240 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3135,10 +3135,10 @@ static void i915_hangcheck_elapsed(struct work_struct *work) intel_uncore_arm_unclaimed_mmio_detection(dev_priv); for_each_engine_id(engine, dev_priv, id) { + bool busy = waitqueue_active(&engine->irq_queue); u64 acthd; u32 seqno; unsigned user_interrupts; - bool busy = true; semaphore_clear_deadlocks(dev_priv); @@ -3161,12 +3161,11 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (engine->hangcheck.seqno == seqno) { if (ring_idle(engine, seqno)) { engine->hangcheck.action = HANGCHECK_IDLE; - if (waitqueue_active(&engine->irq_queue)) {, the + if (busy) { /* Safeguard against driver failure */ user_interrupts = kick_waiters(engine); engine->hangcheck.score += BUSY; - } else - busy = false; + } } else { /* We always increment the hangcheck score * if the ring is busy and still processing @@ -3240,9 +3239,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work) goto out; } + /* Reset timer in case GPU hangs without another request being added */ if (busy_count) - /* Reset timer case chip hangs without another request -* being added */ i915_queue_hangcheck(dev_priv); out: I thought I see a problem here but I was just confused. I think it is OK. Just won't re-queue the hangcheck if no one is waiting and no new requests get submitted. It is unlikely that would cause a problem in practice. It sounds very unlucky that the last submitted request ever hangs. Balance with the benefit of not running while GPU is processing stuff I think we can give it a go. Reviewed-by: Tvrtko Ursulin Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✗ Ro.CI.BAT: warning for drm/i915/bxt: Remove the preliminary_hw_support flag
== Series Details == Series: drm/i915/bxt: Remove the preliminary_hw_support flag URL : https://patchwork.freedesktop.org/series/9381/ State : warning == Summary == Series 9381v1 drm/i915/bxt: Remove the preliminary_hw_support flag http://patchwork.freedesktop.org/api/1.0/series/9381/revisions/1/mbox Test kms_pipe_crc_basic: Subgroup hang-read-crc-pipe-a: pass -> SKIP (fi-skl-i5-6260u) Subgroup suspend-read-crc-pipe-b: dmesg-warn -> SKIP (ro-bdw-i5-5250u) Subgroup suspend-read-crc-pipe-c: dmesg-warn -> SKIP (ro-bdw-i5-5250u) skip -> PASS (fi-skl-i5-6260u) fi-hsw-i7-4770k total:229 pass:196 dwarn:0 dfail:0 fail:0 skip:33 fi-kbl-qkkr total:229 pass:160 dwarn:28 dfail:1 fail:0 skip:40 fi-skl-i5-6260u total:229 pass:203 dwarn:0 dfail:0 fail:0 skip:26 fi-skl-i7-6700k total:229 pass:190 dwarn:0 dfail:0 fail:0 skip:39 ro-bdw-i5-5250u total:229 pass:204 dwarn:1 dfail:1 fail:0 skip:23 ro-bdw-i7-5557U total:229 pass:204 dwarn:1 dfail:1 fail:0 skip:23 ro-bdw-i7-5600u total:229 pass:190 dwarn:0 dfail:1 fail:0 skip:38 ro-bsw-n3050 total:229 pass:177 dwarn:0 dfail:1 fail:2 skip:49 ro-byt-n2820 total:229 pass:180 dwarn:0 dfail:1 fail:3 skip:45 ro-hsw-i3-4010u total:229 pass:197 dwarn:0 dfail:1 fail:0 skip:31 ro-hsw-i7-4770r total:229 pass:197 dwarn:0 dfail:1 fail:0 skip:31 ro-ilk-i7-620lm total:229 pass:157 dwarn:0 dfail:1 fail:1 skip:70 ro-ilk1-i5-650 total:224 pass:157 dwarn:0 dfail:1 fail:1 skip:65 ro-ivb-i7-3770 total:229 pass:188 dwarn:0 dfail:1 fail:0 skip:40 ro-skl3-i5-6260u total:229 pass:208 dwarn:1 dfail:1 fail:0 skip:19 ro-snb-i7-2620M total:229 pass:179 dwarn:0 dfail:1 fail:1 skip:48 Results at /archive/results/CI_IGT_test/RO_Patchwork_1363/ a755d6c drm-intel-nightly: 2016y-07m-01d-13h-54m-24s UTC integration manifest f3c9840 drm/i915/bxt: Remove the preliminary_hw_support flag ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/1] drm/i915: gracefully reject mmap of huge tiled objects
Thanks, James -Original Message- From: Chris Wilson [mailto:chris.ickle.wil...@gmail.com] On Behalf Of Chris Wilson Sent: Friday, July 1, 2016 12:25 AM To: Xiong, James Cc: intel-gfx@lists.freedesktop.org Subject: Re: [Intel-gfx] [PATCH 1/1] drm/i915: gracefully reject mmap of huge tiled objects On Thu, Jun 30, 2016 at 05:04:42PM -0700, James Xiong wrote: > From: "Xiong, James" > > currently mmap of a tiled object that is larger than mappable aperture > is rejected in fault handler, and causes sigbus error and application > crash. Please note that SIGBUS can be returned at any time. If your application doesn't handle it, please fix that. [JX] I agree, the way I put it like it's a bug is wrong, it's okay to return sigbus in i915 fault handler. It's a common practice that an application validates a pointer then accesses it directly, in case of SIGBUS, there is not much signal handler can do other than clearing up before the application aborts(please correct me if I am wrong). I have seen people use longjump/sigaction to handle sigbus but it has problems: only be able to jump within the current function, reentrancy etc,etc,. makes it impractical to apply for all accesses. And sometime the app wants to be able to continue in case of SIGBUG, for example: one test case fails because of the buffer size and SIGBUS, with this change, the app is able to either reduce buffer size, re-run test or continue with the next test. The change helps with these cases. Another thing is that when mmap is called to map a tiled/250M+ obj to user space, i915 knows it doesn't have enough space, shouldn't ENOSPC be returned right there and then? > This commit rejects it in mmap instead so that the client has chance > to handle the failure. Wrong. Please review the patches to fix this correctly. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✓ Ro.CI.BAT: success for drm/i915: Mass convert dev->dev_private to to_i915(dev)
== Series Details == Series: drm/i915: Mass convert dev->dev_private to to_i915(dev) URL : https://patchwork.freedesktop.org/series/9385/ State : success == Summary == Series 9385v1 drm/i915: Mass convert dev->dev_private to to_i915(dev) http://patchwork.freedesktop.org/api/1.0/series/9385/revisions/1/mbox Test kms_pipe_crc_basic: Subgroup suspend-read-crc-pipe-b: dmesg-warn -> SKIP (ro-bdw-i5-5250u) Subgroup suspend-read-crc-pipe-c: dmesg-warn -> SKIP (ro-bdw-i5-5250u) skip -> PASS (fi-skl-i5-6260u) fi-hsw-i7-4770k total:229 pass:196 dwarn:0 dfail:0 fail:0 skip:33 fi-kbl-qkkr total:229 pass:160 dwarn:28 dfail:1 fail:0 skip:40 fi-skl-i5-6260u total:229 pass:204 dwarn:0 dfail:0 fail:0 skip:25 fi-skl-i7-6700k total:229 pass:190 dwarn:0 dfail:0 fail:0 skip:39 ro-bdw-i5-5250u total:229 pass:204 dwarn:1 dfail:1 fail:0 skip:23 ro-bdw-i7-5557U total:229 pass:204 dwarn:1 dfail:1 fail:0 skip:23 ro-bdw-i7-5600u total:229 pass:190 dwarn:0 dfail:1 fail:0 skip:38 ro-bsw-n3050 total:229 pass:176 dwarn:0 dfail:1 fail:2 skip:50 ro-byt-n2820 total:229 pass:180 dwarn:0 dfail:1 fail:3 skip:45 ro-hsw-i3-4010u total:229 pass:197 dwarn:0 dfail:1 fail:0 skip:31 ro-hsw-i7-4770r total:229 pass:197 dwarn:0 dfail:1 fail:0 skip:31 ro-ilk-i7-620lm total:229 pass:157 dwarn:0 dfail:1 fail:1 skip:70 ro-ilk1-i5-650 total:224 pass:157 dwarn:0 dfail:1 fail:1 skip:65 ro-ivb-i7-3770 total:229 pass:188 dwarn:0 dfail:1 fail:0 skip:40 ro-skl3-i5-6260u total:229 pass:208 dwarn:1 dfail:1 fail:0 skip:19 ro-snb-i7-2620M total:229 pass:179 dwarn:0 dfail:1 fail:1 skip:48 Results at /archive/results/CI_IGT_test/RO_Patchwork_1364/ a755d6c drm-intel-nightly: 2016y-07m-01d-13h-54m-24s UTC integration manifest 27039f2 drm/i915: Mass convert dev->dev_private to to_i915(dev) ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [CI 05/20] drm/i915: Separate GPU hang waitqueue from advance
Currently __i915_wait_request uses a per-engine wait_queue_t for the dual purpose of waking after the GPU advances or for waking after an error. In the future, we may add even more wake sources and require greater separation, but for now we can conceptually simplify wakeups by separating the two sources. In particular, this allows us to use different wait-queues (e.g. one on the engine advancement, a global one for errors and one on each requests) without any hassle. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h | 6 ++ drivers/gpu/drm/i915/i915_gem.c | 5 + drivers/gpu/drm/i915/i915_irq.c | 19 --- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4948c90c9bd4..0d0e4ac4dadb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1410,6 +1410,12 @@ struct i915_gpu_error { #define I915_WEDGED(1 << 31) /** +* Waitqueue to signal when a hang is detected. Used to for waiters +* to release the struct_mutex for the reset to procede. +*/ + wait_queue_head_t wait_queue; + + /** * Waitqueue to signal when the reset has completed. Used by clients * that wait for dev_priv->mm.wedged to settle. */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e0b1e286bf87..b5278d117ea0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1455,6 +1455,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, const bool irq_test_in_progress = ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_engine_flag(engine); int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; + DEFINE_WAIT(reset); DEFINE_WAIT(wait); unsigned long timeout_expire; s64 before = 0; /* Only to silence a compiler warning. */ @@ -1499,6 +1500,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, goto out; } + add_wait_queue(&dev_priv->gpu_error.wait_queue, &reset); for (;;) { struct timer_list timer; @@ -1557,6 +1559,8 @@ int __i915_wait_request(struct drm_i915_gem_request *req, destroy_timer_on_stack(&timer); } } + remove_wait_queue(&dev_priv->gpu_error.wait_queue, &reset); + if (!irq_test_in_progress) engine->irq_put(engine); @@ -5287,6 +5291,7 @@ i915_gem_load_init(struct drm_device *dev) i915_gem_retire_work_handler); INIT_DELAYED_WORK(&dev_priv->mm.idle_work, i915_gem_idle_work_handler); + init_waitqueue_head(&dev_priv->gpu_error.wait_queue); init_waitqueue_head(&dev_priv->gpu_error.reset_queue); dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 83f40baeb1f3..6c17596d75dd 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2488,11 +2488,8 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg) return ret; } -static void i915_error_wake_up(struct drm_i915_private *dev_priv, - bool reset_completed) +static void i915_error_wake_up(struct drm_i915_private *dev_priv) { - struct intel_engine_cs *engine; - /* * Notify all waiters for GPU completion events that reset state has * been changed, and that they need to restart their wait after @@ -2501,18 +2498,10 @@ static void i915_error_wake_up(struct drm_i915_private *dev_priv, */ /* Wake up __wait_seqno, potentially holding dev->struct_mutex. */ - for_each_engine(engine, dev_priv) - wake_up_all(&engine->irq_queue); + wake_up_all(&dev_priv->gpu_error.wait_queue); /* Wake up intel_crtc_wait_for_pending_flips, holding crtc->mutex. */ wake_up_all(&dev_priv->pending_flip_queue); - - /* -* Signal tasks blocked in i915_gem_wait_for_error that the pending -* reset state is cleared. -*/ - if (reset_completed) - wake_up_all(&dev_priv->gpu_error.reset_queue); } /** @@ -2577,7 +2566,7 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv) * Note: The wake_up also serves as a memory barrier so that * waiters see the update value of the reset counter atomic_t. */ - i915_error_wake_up(dev_priv, true); + wake_up_all(&dev_priv->gpu_error.reset_queue); } } @@ -2714,7 +2703,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv, * ensure that the waiters see the updated value of the reset * counter a
[Intel-gfx] [CI 01/20] drm/i915/shrinker: Flush active on objects before counting
As we inspect obj->active to decide how many objects we can shrink (we only shrink idle objects), it helps to flush the active lists first in order to have a more accurate count of available objects. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_shrinker.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 886a8797566d..1bf14544d8ad 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -265,6 +265,8 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) if (!i915_gem_shrinker_lock(dev, &unlock)) return 0; + i915_gem_retire_requests(dev_priv); + count = 0; list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) if (can_release_pages(obj)) -- 2.8.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [CI 10/20] drm/i915: Allocate scratch page from stolen
With the last direct CPU access to the scratch page removed, we can now allocate it from our small amount of reserved system pages (stolen memory). Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 7ccfb1e57d12..cc65c269b82a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -665,7 +665,9 @@ int intel_init_pipe_control(struct intel_engine_cs *engine) WARN_ON(engine->scratch.obj); - obj = i915_gem_object_create(engine->i915->dev, 4096); + obj = i915_gem_object_create_stolen(engine->i915->dev, 4096); + if (obj == NULL) + obj = i915_gem_object_create(engine->i915->dev, 4096); if (IS_ERR(obj)) { DRM_ERROR("Failed to allocate scratch page\n"); ret = PTR_ERR(obj); -- 2.8.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [CI 06/20] drm/i915: Slaughter the thundering i915_wait_request herd
One particularly stressful scenario consists of many independent tasks all competing for GPU time and waiting upon the results (e.g. realtime transcoding of many, many streams). One bottleneck in particular is that each client waits on its own results, but every client is woken up after every batchbuffer - hence the thunder of hooves as then every client must do its heavyweight dance to read a coherent seqno to see if it is the lucky one. Ideally, we only want one client to wake up after the interrupt and check its request for completion. Since the requests must retire in order, we can select the first client on the oldest request to be woken. Once that client has completed his wait, we can then wake up the next client and so on. However, all clients then incur latency as every process in the chain may be delayed for scheduling - this may also then cause some priority inversion. To reduce the latency, when a client is added or removed from the list, we scan the tree for completed seqno and wake up all the completed waiters in parallel. Using igt/benchmarks/gem_latency, we can demonstrate this effect. The benchmark measures the number of GPU cycles between completion of a batch and the client waking up from a call to wait-ioctl. With many concurrent waiters, with each on a different request, we observe that the wakeup latency before the patch scales nearly linearly with the number of waiters (before external factors kick in making the scaling much worse). After applying the patch, we can see that only the single waiter for the request is being woken up, providing a constant wakeup latency for every operation. However, the situation is not quite as rosy for many waiters on the same request, though to the best of my knowledge this is much less likely in practice. Here, we can observe that the concurrent waiters incur extra latency from being woken up by the solitary bottom-half, rather than directly by the interrupt. This appears to be scheduler induced (having discounted adverse effects from having a rbtree walk/erase in the wakeup path), each additional wake_up_process() costs approximately 1us on big core. Another effect of performing the secondary wakeups from the first bottom-half is the incurred delay this imposes on high priority threads - rather than immediately returning to userspace and leaving the interrupt handler to wake the others. To offset the delay incurred with additional waiters on a request, we could use a hybrid scheme that did a quick read in the interrupt handler and dequeued all the completed waiters (incurring the overhead in the interrupt handler, not the best plan either as we then incur GPU submission latency) but we would still have to wake up the bottom-half every time to do the heavyweight slow read. Or we could only kick the waiters on the seqno with the same priority as the current task (i.e. in the realtime waiter scenario, only it is woken up immediately by the interrupt and simply queues the next waiter before returning to userspace, minimising its delay at the expense of the chain, and also reducing contention on its scheduler runqueue). This is effective at avoid long pauses in the interrupt handler and at avoiding the extra latency in realtime/high-priority waiters. v2: Convert from a kworker per engine into a dedicated kthread for the bottom-half. v3: Rename request members and tweak comments. v4: Use a per-engine spinlock in the breadcrumbs bottom-half. v5: Fix race in locklessly checking waiter status and kicking the task on adding a new waiter. v6: Fix deciding when to force the timer to hide missing interrupts. v7: Move the bottom-half from the kthread to the first client process. v8: Reword a few comments v9: Break the busy loop when the interrupt is unmasked or has fired. v10: Comments, unnecessary churn, better debugging from Tvrtko v11: Wake all completed waiters on removing the current bottom-half to reduce the latency of waking up a herd of clients all waiting on the same request. v12: Rearrange missed-interrupt fault injection so that it works with igt/drv_missed_irq_hang v13: Rename intel_breadcrumb and friends to intel_wait in preparation for signal handling. v14: RCU commentary, assert_spin_locked v15: Hide BUG_ON behind the compiler; report on gem_latency findings. v16: Sort seqno-groups by priority so that first-waiter has the highest task priority (and so avoid priority inversion). v17: Add waiters to post-mortem GPU hang state. v18: Return early for a completed wait after acquiring the spinlock. Avoids adding ourselves to the tree if the is already complete, and skips the awkward question of why we don't do completion wakeups for waits earlier than or equal to ourselves. v19: Prepare for init_breadcrumbs to fail. Later patches may want to allocate during init, so be prepared to propagate back the error code. Testcase: igt/gem_concurrent_blit Testcase: igt/benchmarks/gem_latency Signed-off-by: Chris Wilson Cc: "Rogozhkin, Dmitry V" Cc: "Gong, Zhi
[Intel-gfx] [CI 04/20] drm/i915: Make queueing the hangcheck work inline
Since the function is a small wrapper around schedule_delayed_work(), move it inline to remove the function call overhead for the principle caller. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h | 18 +- drivers/gpu/drm/i915/i915_irq.c | 17 - 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f4aa727e522a..4948c90c9bd4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2932,7 +2932,23 @@ void intel_hpd_cancel_work(struct drm_i915_private *dev_priv); bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port); /* i915_irq.c */ -void i915_queue_hangcheck(struct drm_i915_private *dev_priv); +static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv) +{ + unsigned long delay; + + if (unlikely(!i915.enable_hangcheck)) + return; + + /* Don't continually defer the hangcheck so that it is always run at +* least once after work has been scheduled on any ring. Otherwise, +* we will ignore a hung ring if a second ring is kept busy. +*/ + + delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES); + queue_delayed_work(system_long_wq, + &dev_priv->gpu_error.hangcheck_work, delay); +} + __printf(3, 4) void i915_handle_error(struct drm_i915_private *dev_priv, u32 engine_mask, diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 3ad4ef9250d8..83f40baeb1f3 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3247,23 +3247,6 @@ out: ENABLE_RPM_WAKEREF_ASSERTS(dev_priv); } -void i915_queue_hangcheck(struct drm_i915_private *dev_priv) -{ - unsigned long delay; - - if (!i915.enable_hangcheck) - return; - - /* Don't continually defer the hangcheck so that it is always run at -* least once after work has been scheduled on any ring. Otherwise, -* we will ignore a hung ring if a second ring is kept busy. -*/ - - delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES); - queue_delayed_work(system_long_wq, - &dev_priv->gpu_error.hangcheck_work, delay); -} - static void ibx_irq_reset(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; -- 2.8.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [CI 11/20] drm/i915: Refactor scratch object allocation for gen2 w/a buffer
The gen2 w/a buffer is stuffed into the same slot as the gen5+ scratch buffer. If we pass in the size we want to allocate for the scratch buffer, both callers can use the same routine. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_lrc.c| 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 32 drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 3 files changed, 10 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c8411f8bb4ac..f1a01137334c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2056,7 +2056,7 @@ static int logical_render_ring_init(struct intel_engine_cs *engine) engine->emit_flush = gen8_emit_flush_render; engine->emit_request = gen8_emit_request_render; - ret = intel_init_pipe_control(engine); + ret = intel_init_pipe_control(engine, 4096); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index cc65c269b82a..f89b1797b465 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -658,16 +658,16 @@ void intel_fini_pipe_control(struct intel_engine_cs *engine) engine->scratch.obj = NULL; } -int intel_init_pipe_control(struct intel_engine_cs *engine) +int intel_init_pipe_control(struct intel_engine_cs *engine, int size) { struct drm_i915_gem_object *obj; int ret; WARN_ON(engine->scratch.obj); - obj = i915_gem_object_create_stolen(engine->i915->dev, 4096); + obj = i915_gem_object_create_stolen(engine->i915->dev, size); if (obj == NULL) - obj = i915_gem_object_create(engine->i915->dev, 4096); + obj = i915_gem_object_create(engine->i915->dev, size); if (IS_ERR(obj)) { DRM_ERROR("Failed to allocate scratch page\n"); ret = PTR_ERR(obj); @@ -3002,7 +3002,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *engine = &dev_priv->engine[RCS]; - struct drm_i915_gem_object *obj; int ret; engine->name = "render ring"; @@ -3045,31 +3044,16 @@ int intel_init_render_ring_buffer(struct drm_device *dev) engine->init_hw = init_render_ring; engine->cleanup = render_ring_cleanup; - /* Workaround batchbuffer to combat CS tlb bug. */ - if (HAS_BROKEN_CS_TLB(dev_priv)) { - obj = i915_gem_object_create(dev, I830_WA_SIZE); - if (IS_ERR(obj)) { - DRM_ERROR("Failed to allocate batch bo\n"); - return PTR_ERR(obj); - } - - ret = i915_gem_obj_ggtt_pin(obj, 0, 0); - if (ret != 0) { - drm_gem_object_unreference(&obj->base); - DRM_ERROR("Failed to ping batch bo\n"); - return ret; - } - - engine->scratch.obj = obj; - engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj); - } - ret = intel_init_ring_buffer(dev, engine); if (ret) return ret; if (INTEL_GEN(dev_priv) >= 5) { - ret = intel_init_pipe_control(engine); + ret = intel_init_pipe_control(engine, 4096); + if (ret) + return ret; + } else if (HAS_BROKEN_CS_TLB(dev_priv)) { + ret = intel_init_pipe_control(engine, I830_WA_SIZE); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 4a3152993415..e7495a2d6367 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -471,8 +471,8 @@ void intel_ring_init_seqno(struct intel_engine_cs *engine, u32 seqno); int intel_ring_flush_all_caches(struct drm_i915_gem_request *req); int intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req); +int intel_init_pipe_control(struct intel_engine_cs *engine, int size); void intel_fini_pipe_control(struct intel_engine_cs *engine); -int intel_init_pipe_control(struct intel_engine_cs *engine); int intel_init_render_ring_buffer(struct drm_device *dev); int intel_init_bsd_ring_buffer(struct drm_device *dev); -- 2.8.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [CI 12/20] drm/i915: Add a delay between interrupt and inspecting the final seqno (ilk)
On Ironlake, there is no command nor register to ensure that the write from a MI_STORE command is completed (and coherent on the CPU) before the command parser continues. This means that the ordering between the seqno write and the subsequent user interrupt is undefined (like gen6+). So to ensure that the seqno write is completed after the final user interrupt we need to delay the read sufficiently to allow the write to complete. This delay is undefined by the bspec, and empirically requires 75us even though a register read combined with a clflush is less than 500ns. Hence, the delay is due to an on-chip buffer rather than the latency of the write to memory. Note that the render ring controls this by filling the PIPE_CONTROL fifo with stalling commands that force the earliest pipe-control with the seqno to be completed before the command parser continues. Given that we need a barrier operation for BSD, we may as well forgo the extra per-batch latency by using a common per-interrupt barrier. Studying the impact of adding the usleep shows that in both sequences of and individual synchronous no-op batches is negligible for the media engine (where the write now is unordered with the interrupt). Converting the render engine over from the current glutton of pie-controls over to the per-interrupt delays speeds up both the sequential and individual synchronous no-ops by 20% and 60%, respectively. This speed up holds even when looking at the throughput of small copies (4KiB->4MiB), both serial and synchronous, by about 20%. This is because despite adding a significant delay to the interrupt, in all likelihood we will see the seqno write without having to apply the barrier (only in the rare corner cases where the write is delayed on the last required is the delay necessary). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94307 Testcase: igt/gem_sync #ilk Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_irq.c | 10 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 81 +++-- 2 files changed, 20 insertions(+), 71 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7c379afcff2f..be7f0b9b27e0 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1264,8 +1264,7 @@ static void ivybridge_parity_error_irq_handler(struct drm_i915_private *dev_priv static void ilk_gt_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) { - if (gt_iir & - (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT)) + if (gt_iir & GT_RENDER_USER_INTERRUPT) notify_ring(&dev_priv->engine[RCS]); if (gt_iir & ILK_BSD_USER_INTERRUPT) notify_ring(&dev_priv->engine[VCS]); @@ -1274,9 +1273,7 @@ static void ilk_gt_irq_handler(struct drm_i915_private *dev_priv, static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) { - - if (gt_iir & - (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT)) + if (gt_iir & GT_RENDER_USER_INTERRUPT) notify_ring(&dev_priv->engine[RCS]); if (gt_iir & GT_BSD_USER_INTERRUPT) notify_ring(&dev_priv->engine[VCS]); @@ -3601,8 +3598,7 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev) gt_irqs |= GT_RENDER_USER_INTERRUPT; if (IS_GEN5(dev)) { - gt_irqs |= GT_RENDER_PIPECTL_NOTIFY_INTERRUPT | - ILK_BSD_USER_INTERRUPT; + gt_irqs |= ILK_BSD_USER_INTERRUPT; } else { gt_irqs |= GT_BLT_USER_INTERRUPT | GT_BSD_USER_INTERRUPT; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index f89b1797b465..e5029425c883 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1593,67 +1593,22 @@ gen6_ring_sync(struct drm_i915_gem_request *waiter_req, return 0; } -#define PIPE_CONTROL_FLUSH(ring__, addr__) \ -do { \ - intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |\ -PIPE_CONTROL_DEPTH_STALL); \ - intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \ - intel_ring_emit(ring__, 0); \ - intel_ring_emit(ring__, 0); \ -} while (0) - -static int -pc_render_add_request(struct drm_i915_gem_request *req) +static void +gen5_seqno_barrier(struct intel_engine_cs *ring) { - struct intel_engine_cs *engine = req->engine; - u32 addr = engine->status_page.gfx_addr + - (I915_GEM_HWS_INDEX << MI_STO
[Intel-gfx] [CI 02/20] drm/i915: Delay queuing hangcheck to wait-request
We can forgo queuing the hangcheck from the start of every request to until we wait upon a request. This reduces the overhead of every request, but may increase the latency of detecting a hang. However, if nothing every waits upon a hang, did it ever hang? It also improves the robustness of the wait-request by ensuring that the hangchecker is indeed running before we sleep indefinitely (and thereby ensuring that we never actually sleep forever waiting for a dead GPU). As pointed out by Tvrtko, it is possible for a GPU hang to go unnoticed for as long as nobody is waiting for the GPU. Though this rare, during that time we may be consuming more power than if we had promptly recovered, and in the most extreme case we may exhaust all memory before forcing the hangcheck. Something to be wary off in future. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem.c | 15 +++ drivers/gpu/drm/i915/i915_irq.c | 10 -- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1d9878258103..e0b1e286bf87 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1532,6 +1532,15 @@ int __i915_wait_request(struct drm_i915_gem_request *req, break; } + /* Ensure that even if the GPU hangs, we get woken up. +* +* However, note that if no one is waiting, we never notice +* a gpu hang. Eventually, we will have to wait for a resource +* held by the GPU and so trigger a hangcheck. In the most +* pathological case, this will be upon memory starvation! +*/ + i915_queue_hangcheck(dev_priv); + timer.function = NULL; if (timeout || missed_irq(dev_priv, engine)) { unsigned long expire; @@ -2919,8 +2928,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, /* Not allowed to fail! */ WARN(ret, "emit|add_request failed: %d!\n", ret); - i915_queue_hangcheck(engine->i915); - queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, round_jiffies_up_relative(HZ)); @@ -3264,8 +3271,8 @@ i915_gem_retire_requests(struct drm_i915_private *dev_priv) if (idle) mod_delayed_work(dev_priv->wq, - &dev_priv->mm.idle_work, - msecs_to_jiffies(100)); +&dev_priv->mm.idle_work, +msecs_to_jiffies(100)); return idle; } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 4378a659d962..5614582ca240 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3135,10 +3135,10 @@ static void i915_hangcheck_elapsed(struct work_struct *work) intel_uncore_arm_unclaimed_mmio_detection(dev_priv); for_each_engine_id(engine, dev_priv, id) { + bool busy = waitqueue_active(&engine->irq_queue); u64 acthd; u32 seqno; unsigned user_interrupts; - bool busy = true; semaphore_clear_deadlocks(dev_priv); @@ -3161,12 +3161,11 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (engine->hangcheck.seqno == seqno) { if (ring_idle(engine, seqno)) { engine->hangcheck.action = HANGCHECK_IDLE; - if (waitqueue_active(&engine->irq_queue)) { + if (busy) { /* Safeguard against driver failure */ user_interrupts = kick_waiters(engine); engine->hangcheck.score += BUSY; - } else - busy = false; + } } else { /* We always increment the hangcheck score * if the ring is busy and still processing @@ -3240,9 +3239,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work) goto out; } + /* Reset timer in case GPU hangs without another request being added */ if (busy_count) - /* Reset timer case chip hangs without another request -* being added */ i915_queue_hangcheck(dev_priv); out: -- 2.8.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx