[Intel-gfx] [PATCH 1/2] drm/i915: compile-time consistency check on __EXEC_OBJECT flags
Two different sets of flag bits are stored in the 'flags' member of a 'struct drm_i915_gem_exec_object2', and they're defined in two different source files, increasing the risk of an accidental clash. Some flags in this field are supplied by the user; these are defined in i915_drm.h, and they start from the LSB and work up. Other flags are defined in i915_gem_execbuffer, for internal use within that file only; they start from the MSB and work down. So here we add a compile-time check that the two sets of flags do not overlap, which would cause all sorts of confusion. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 include/uapi/drm/i915_drm.h| 11 ++- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 7941f1f..608fdc4 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -34,10 +34,11 @@ #include #include -#define __EXEC_OBJECT_HAS_PIN (1<<31) -#define __EXEC_OBJECT_HAS_FENCE (1<<30) -#define __EXEC_OBJECT_NEEDS_MAP (1<<29) -#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) +#define __EXEC_OBJECT_HAS_PIN (1<<31) +#define __EXEC_OBJECT_HAS_FENCE (1<<30) +#define __EXEC_OBJECT_NEEDS_MAP (1<<29) +#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) +#define __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<28) /* all of the above */ #define BATCH_OFFSET_BIAS (256*1024) @@ -1007,6 +1008,9 @@ static bool only_mappable_for_reloc(unsigned int flags) unsigned invalid_flags; int i; + /* INTERNAL flags must not overlap with external ones */ + BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS); + invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; if (USES_FULL_PPGTT(dev)) invalid_flags |= EXEC_OBJECT_NEEDS_GTT; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index c17d63d..079d274 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -696,12 +696,13 @@ struct drm_i915_gem_exec_object2 { */ __u64 offset; -#define EXEC_OBJECT_NEEDS_FENCE (1<<0) -#define EXEC_OBJECT_NEEDS_GTT (1<<1) -#define EXEC_OBJECT_WRITE (1<<2) +#define EXEC_OBJECT_NEEDS_FENCE (1<<0) +#define EXEC_OBJECT_NEEDS_GTT (1<<1) +#define EXEC_OBJECT_WRITE (1<<2) #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) -#define EXEC_OBJECT_PINNED (1<<4) -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1) +#define EXEC_OBJECT_PINNED (1<<4) +/* All remaining bits are MBZ and RESERVED FOR FUTURE USE */ +#define __EXEC_OBJECT_UNKNOWN_FLAGS(-(EXEC_OBJECT_PINNED<<1)) __u64 flags; __u64 rsvd1; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/2] drm/i915: refactor eb_get_batch()
Precursor for fix to secure batch execution. We will need to be able to retrieve the batch VMA (as well as the batch itself) from the eb list, so this patch extracts that part of eb_get_batch() into a separate function, and moves both parts to a more logical place in the file, near where the eb list is created. Also, it may not be obvious, but the current execbuffer2 ioctl interface requires that the buffer object containing the batch-to-be-executed be the LAST entry in the exec2_list[] array (I expected it to be the first!). To clarify this, we can replace the rather obscure construct "list_entry(eb->vmas.prev, ...)" in the old version of eb_get_batch() with the equivalent but more explicit "list_last_entry(&eb->vmas,...)" in the new eb_get_batch_vma() and of course add an explanatory comment. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 49 ++ 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 608fdc4..eea8b1f 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -186,6 +186,35 @@ struct eb_vmas { return ret; } +static inline struct i915_vma * +eb_get_batch_vma(struct eb_vmas *eb) +{ + /* The batch is always the LAST item in the VMA list */ + struct i915_vma *vma = list_last_entry(&eb->vmas, typeof(*vma), exec_list); + + return vma; +} + +static struct drm_i915_gem_object * +eb_get_batch(struct eb_vmas *eb) +{ + struct i915_vma *vma = eb_get_batch_vma(eb); + + /* +* SNA is doing fancy tricks with compressing batch buffers, which leads +* to negative relocation deltas. Usually that works out ok since the +* relocate address is still positive, except when the batch is placed +* very low in the GTT. Ensure this doesn't happen. +* +* Note that actual hangs have only been observed on gen7, but for +* paranoia do it everywhere. +*/ + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; + + return vma->obj; +} + static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle) { if (eb->and < 0) { @@ -1341,26 +1370,6 @@ static bool only_mappable_for_reloc(unsigned int flags) return file_priv->bsd_ring; } -static struct drm_i915_gem_object * -eb_get_batch(struct eb_vmas *eb) -{ - struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); - - /* -* SNA is doing fancy tricks with compressing batch buffers, which leads -* to negative relocation deltas. Usually that works out ok since the -* relocate address is still positive, except when the batch is placed -* very low in the GTT. Ensure this doesn't happen. -* -* Note that actual hangs have only been observed on gen7, but for -* paranoia do it everywhere. -*/ - if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; - - return vma->obj; -} - #define I915_USER_RINGS (4) static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: unify first-stage engine struct setup
intel_lrc.c has a table of "logical rings" (meaning engines), while intel_ringbuffer.c has separately open-coded initialisation for each engine. We can deduplicate this somewhat by using the same first-stage engine-setup function for both modes. So here we expose the function that transfers information from the static table of (all) known engines to the dev_priv->engine array of engines available on this device (adjusting the names along the way) and then embed calls to it in both the LRC and the legacy-mode setup. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/intel_lrc.c| 40 + drivers/gpu/drm/i915/intel_ringbuffer.c | 39 +--- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 + 3 files changed, 41 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 62b0dc6..bd6266e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1991,8 +1991,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) } static inline void -logical_ring_default_irqs(struct intel_engine_cs *engine, unsigned shift) +logical_ring_default_irqs(struct intel_engine_cs *engine) { + unsigned shift = engine->irq_shift; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; init_waitqueue_head(&engine->irq_queue); @@ -2093,14 +2094,14 @@ static int logical_render_ring_init(struct intel_engine_cs *engine) return ret; } -static const struct logical_ring_info { +static const struct engine_info { const char *name; unsigned exec_id; unsigned guc_id; u32 mmio_base; unsigned irq_shift; int (*init)(struct intel_engine_cs *engine); -} logical_rings[] = { +} intel_engines[] = { [RCS] = { .name = "render ring", .exec_id = I915_EXEC_RENDER, @@ -2143,20 +2144,31 @@ static int logical_render_ring_init(struct intel_engine_cs *engine) }, }; -static struct intel_engine_cs * -logical_ring_setup(struct drm_i915_private *dev_priv, enum intel_engine_id id) +struct intel_engine_cs * +intel_engine_setup(struct drm_i915_private *dev_priv, + enum intel_engine_id id) { - const struct logical_ring_info *info = &logical_rings[id]; + const struct engine_info *info = &intel_engines[id]; struct intel_engine_cs *engine = &dev_priv->engine[id]; - enum forcewake_domains fw_domains; engine->id = id; + engine->i915 = dev_priv; engine->name = info->name; engine->exec_id = info->exec_id; - engine->guc_id = info->guc_id; + engine->hw_id = engine->guc_id = info->guc_id; engine->mmio_base = info->mmio_base; + engine->irq_shift = info->irq_shift; - engine->i915 = dev_priv; + return engine; +} + +static struct intel_engine_cs * +logical_ring_setup(struct drm_i915_private *dev_priv, enum intel_engine_id id) +{ + struct intel_engine_cs *engine; + enum forcewake_domains fw_domains; + + engine = intel_engine_setup(dev_priv, id); /* Intentionally left blank. */ engine->buffer = NULL; @@ -2186,7 +2198,7 @@ static int logical_render_ring_init(struct intel_engine_cs *engine) logical_ring_init_platform_invariants(engine); logical_ring_default_vfuncs(engine); - logical_ring_default_irqs(engine, info->irq_shift); + logical_ring_default_irqs(engine); intel_engine_init_hangcheck(engine); i915_gem_batch_pool_init(dev_priv->dev, &engine->batch_pool); @@ -2215,14 +2227,14 @@ int intel_logical_rings_init(struct drm_device *dev) WARN_ON(INTEL_INFO(dev_priv)->ring_mask & GENMASK(sizeof(mask) * BITS_PER_BYTE - 1, I915_NUM_ENGINES)); - for (i = 0; i < ARRAY_SIZE(logical_rings); i++) { + for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { if (!HAS_ENGINE(dev_priv, i)) continue; - if (!logical_rings[i].init) + if (!intel_engines[i].init) continue; - ret = logical_rings[i].init(logical_ring_setup(dev_priv, i)); + ret = intel_engines[i].init(logical_ring_setup(dev_priv, i)); if (ret) goto cleanup; @@ -2230,7 +2242,7 @@ int intel_logical_rings_init(struct drm_device *dev) } /* -* Catch failures to update logical_rings table when the new engines +* Catch failures to update intel_engines table when the new engines * are added to the driver by a warning and disabling the forgotten * engines. */ diff --gi
Re: [Intel-gfx] [PATCH] drm/i915: tidy up request alloc
On 30/06/16 13:49, Tvrtko Ursulin wrote: On 30/06/16 11:22, Chris Wilson wrote: On Thu, Jun 30, 2016 at 09:50:20AM +0100, Tvrtko Ursulin wrote: On 30/06/16 02:35, Hong Liu wrote: Return the allocated request pointer directly to remove the double pointer parameter. Signed-off-by: Hong Liu --- drivers/gpu/drm/i915/i915_gem.c | 25 +++-- 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1d98782..9881455 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2988,32 +2988,26 @@ void i915_gem_request_free(struct kref *req_ref) kmem_cache_free(req->i915->requests, req); } -static inline int +static inline struct drm_i915_gem_request * __i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx, - struct drm_i915_gem_request **req_out) + struct i915_gem_context *ctx) { struct drm_i915_private *dev_priv = engine->i915; unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error); struct drm_i915_gem_request *req; int ret; -if (!req_out) -return -EINVAL; - -*req_out = NULL; - /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex * and restart. */ ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); if (ret) -return ret; +return ERR_PTR(ret); req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); if (req == NULL) -return -ENOMEM; +return ERR_PTR(-ENOMEM); ret = i915_gem_get_seqno(engine->i915, &req->seqno); if (ret) @@ -3041,14 +3035,13 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine, if (ret) goto err_ctx; -*req_out = req; -return 0; +return req; err_ctx: i915_gem_context_unreference(ctx); err: kmem_cache_free(dev_priv->requests, req); -return ret; +return ERR_PTR(ret); } /** @@ -3067,13 +3060,9 @@ struct drm_i915_gem_request * i915_gem_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { -struct drm_i915_gem_request *req; -int err; - if (ctx == NULL) ctx = engine->i915->kernel_context; -err = __i915_gem_request_alloc(engine, ctx, &req); -return err ? ERR_PTR(err) : req; +return __i915_gem_request_alloc(engine, ctx); } struct drm_i915_gem_request * Looks good to me. And have this feeling I've seen this somewhere before. Several times. This is not the full tidy, nor does it realise the ramifactions of request alloc through the stack. Hm I can't spot that it is doing anything wrong or making anything worse. You don't want to let the small cleanup in? Regards, Tvrtko It ought to make almost no difference, because the *only* place the inner function is called is from the outer one, which passes a pointer to a local for the returned object; and the inner one is then inlined, so the compiler doesn't actually put it on the stack and call to the inner allocator anyway. Strangely, however, with this change the code becomes ~400 bytes bigger! Disassembly reveals that while the code for the externally-callable outer function is indeed almost identical, a second copy of it has also been inlined at the one callsite in this file: __i915_gem_object_sync() ... req = i915_gem_request_alloc(to, NULL); I don't think that's a critical path and would rather have 400 bytes smaller codespace. We can get that back by adding /noinline/ to the outer function i915_gem_request_alloc() (not, of course, to the inner one, that definitely *should* be inline). .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915/guc: Consolidate firmware major-minor to one place
On 30/06/16 13:46, Tvrtko Ursulin wrote: From: Tvrtko Ursulin Currently to change the firmware one has to update the exported module firmware string and the major-minor versions used for verification after load. Consolidate that to a single place defining correct major and minor versions per platform. Signed-off-by: Tvrtko Ursulin Cc: Dave Gordon --- drivers/gpu/drm/i915/intel_guc_loader.c | 21 +++-- 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 4f6311a91a7e..9b08afafe802 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -59,10 +59,19 @@ * */ -#define I915_SKL_GUC_UCODE "i915/skl_guc_ver6_1.bin" +#define SKL_FW_MAJOR 6 +#define SKL_FW_MINOR 1 + +#define BXT_FW_MAJOR 8 +#define BXT_FW_MINOR 7 + +#define GUC_FW_PATH(platform, major, minor) \ + "i915/" __stringify(platform) "_guc_ver" __stringify(major) "_" __stringify(minor) ".bin" + +#define I915_SKL_GUC_UCODE GUC_FW_PATH(skl, SKL_FW_MAJOR, SKL_FW_MINOR) MODULE_FIRMWARE(I915_SKL_GUC_UCODE); -#define I915_BXT_GUC_UCODE "i915/bxt_guc_ver8_7.bin" +#define I915_BXT_GUC_UCODE GUC_FW_PATH(bxt, BXT_FW_MAJOR, BXT_FW_MINOR) MODULE_FIRMWARE(I915_BXT_GUC_UCODE); /* User-friendly representation of an enum */ @@ -692,12 +701,12 @@ void intel_guc_init(struct drm_device *dev) fw_path = NULL; } else if (IS_SKYLAKE(dev)) { fw_path = I915_SKL_GUC_UCODE; - guc_fw->guc_fw_major_wanted = 6; - guc_fw->guc_fw_minor_wanted = 1; + guc_fw->guc_fw_major_wanted = SKL_FW_MAJOR; + guc_fw->guc_fw_minor_wanted = SKL_FW_MINOR; } else if (IS_BROXTON(dev)) { fw_path = I915_BXT_GUC_UCODE; - guc_fw->guc_fw_major_wanted = 8; - guc_fw->guc_fw_minor_wanted = 7; + guc_fw->guc_fw_major_wanted = BXT_FW_MAJOR; + guc_fw->guc_fw_minor_wanted = BXT_FW_MINOR; } else { fw_path = ""; /* unknown device */ } Looks fine. Reviewed-by: Dave Gordon ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Mass convert dev->dev_private to to_i915(dev)
On 01/07/16 16:26, Chris Wilson wrote: Since we now subclass struct drm_device, we can save pointer dances by noting the equivalence of struct drm_device and struct drm_i915_private, i.e. by using to_i915(). textdata bss dec hex filename 10738244562 416 1078802 107612 drivers/gpu/drm/i915/i915.ko 10689764562 416 1073954 106322 drivers/gpu/drm/i915/i915.ko Created by the coccinelle script: @@ expression E; identifier p; @@ - struct drm_i915_private *p = E->dev_private; + struct drm_i915_private *p = to_i915(E); Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c| 176 +++ drivers/gpu/drm/i915/i915_drv.c| 36 ++-- drivers/gpu/drm/i915/i915_gem.c| 54 ++--- drivers/gpu/drm/i915/i915_gem_context.c| 10 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +- drivers/gpu/drm/i915/i915_gem_fence.c | 24 +-- drivers/gpu/drm/i915/i915_gem_gtt.c| 20 +- drivers/gpu/drm/i915/i915_gem_stolen.c | 6 +- drivers/gpu/drm/i915/i915_gem_tiling.c | 4 +- drivers/gpu/drm/i915/i915_gpu_error.c | 6 +- drivers/gpu/drm/i915/i915_guc_submission.c | 4 +- drivers/gpu/drm/i915/i915_irq.c| 92 drivers/gpu/drm/i915/i915_suspend.c| 8 +- drivers/gpu/drm/i915/i915_sysfs.c | 22 +- drivers/gpu/drm/i915/intel_audio.c | 18 +- drivers/gpu/drm/i915/intel_color.c | 18 +- drivers/gpu/drm/i915/intel_crt.c | 24 +-- drivers/gpu/drm/i915/intel_ddi.c | 36 ++-- drivers/gpu/drm/i915/intel_display.c | 286 - drivers/gpu/drm/i915/intel_dp.c| 107 + drivers/gpu/drm/i915/intel_dp_mst.c| 6 +- drivers/gpu/drm/i915/intel_dpio_phy.c | 10 +- drivers/gpu/drm/i915/intel_dpll_mgr.c | 12 +- drivers/gpu/drm/i915/intel_drv.h | 4 +- drivers/gpu/drm/i915/intel_dsi.c | 36 ++-- drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c | 2 +- drivers/gpu/drm/i915/intel_dsi_panel_vbt.c | 6 +- drivers/gpu/drm/i915/intel_dsi_pll.c | 12 +- drivers/gpu/drm/i915/intel_dvo.c | 18 +- drivers/gpu/drm/i915/intel_fbc.c | 24 +-- drivers/gpu/drm/i915/intel_fbdev.c | 10 +- drivers/gpu/drm/i915/intel_fifo_underrun.c | 18 +- drivers/gpu/drm/i915/intel_guc_loader.c| 6 +- drivers/gpu/drm/i915/intel_hdmi.c | 48 ++--- drivers/gpu/drm/i915/intel_i2c.c | 6 +- drivers/gpu/drm/i915/intel_lrc.c | 4 +- drivers/gpu/drm/i915/intel_lvds.c | 16 +- drivers/gpu/drm/i915/intel_modes.c | 4 +- drivers/gpu/drm/i915/intel_overlay.c | 4 +- drivers/gpu/drm/i915/intel_panel.c | 6 +- drivers/gpu/drm/i915/intel_pm.c| 110 +- drivers/gpu/drm/i915/intel_psr.c | 36 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c| 10 +- drivers/gpu/drm/i915/intel_sdvo.c | 22 +- drivers/gpu/drm/i915/intel_sprite.c| 18 +- drivers/gpu/drm/i915/intel_tv.c| 12 +- drivers/gpu/drm/i915/intel_uncore.c| 2 +- 47 files changed, 708 insertions(+), 709 deletions(-) Looks fine :) Reviewed-by: Dave Gordon I found three more to convert, plus one place where we can delete some entirely redundant locals, both 'dev' AND 'dev_priv' :) .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: convert a few more E->dev_private to to_i915(E)
Also remove some redundant dev and dev_priv locals Signed-off-by: Dave Gordon Cc: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 4 ++-- drivers/gpu/drm/i915/intel_display.c| 4 drivers/gpu/drm/i915/intel_guc_loader.c | 2 +- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 49f3ea7..1366d4e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4738,7 +4738,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, obj->fence_reg = I915_FENCE_REG_NONE; obj->madv = I915_MADV_WILLNEED; - i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); + i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); } static const struct drm_i915_gem_object_ops i915_gem_object_ops = { @@ -5377,7 +5377,7 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file) return -ENOMEM; file->driver_priv = file_priv; - file_priv->dev_priv = dev->dev_private; + file_priv->dev_priv = to_i915(dev); file_priv->file = file; INIT_LIST_HEAD(&file_priv->rps.link); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 92152f2..2b60477 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14170,15 +14170,11 @@ void intel_crtc_restore_mode(struct drm_crtc *crtc) skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state) { int max_scale; - struct drm_device *dev; - struct drm_i915_private *dev_priv; int crtc_clock, cdclk; if (!intel_crtc || !crtc_state->base.enable) return DRM_PLANE_HELPER_NO_SCALING; - dev = intel_crtc->base.dev; - dev_priv = dev->dev_private; crtc_clock = crtc_state->base.adjusted_mode.crtc_clock; cdclk = to_intel_atomic_state(crtc_state->base.state)->cdclk; diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index cf9b2dc..cdf0fbc 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -605,7 +605,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* Header and uCode will be loaded to WOPCM. Size of the two. */ size = guc_fw->header_size + guc_fw->ucode_size; - if (size > guc_wopcm_size(dev->dev_private)) { + if (size > guc_wopcm_size(to_i915(dev))) { DRM_ERROR("Firmware is too large to fit in WOPCM\n"); goto fail; } -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: tidy up request alloc
On 04/07/16 05:08, Liu, Hong wrote: On Fri, 2016-07-01 at 19:34 +0100, Chris Wilson wrote: On Fri, Jul 01, 2016 at 05:58:18PM +0100, Dave Gordon wrote: On 30/06/16 13:49, Tvrtko Ursulin wrote: On 30/06/16 11:22, Chris Wilson wrote: On Thu, Jun 30, 2016 at 09:50:20AM +0100, Tvrtko Ursulin wrote: On 30/06/16 02:35, Hong Liu wrote: Return the allocated request pointer directly to remove the double pointer parameter. Signed-off-by: Hong Liu --- drivers/gpu/drm/i915/i915_gem.c | 25 +++ -- 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1d98782..9881455 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2988,32 +2988,26 @@ void i915_gem_request_free(struct kref *req_ref) kmem_cache_free(req->i915->requests, req); } -static inline int +static inline struct drm_i915_gem_request * __i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx, - struct drm_i915_gem_request **req_out) + struct i915_gem_context *ctx) { struct drm_i915_private *dev_priv = engine->i915; unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error); struct drm_i915_gem_request *req; int ret; -if (!req_out) -return -EINVAL; - -*req_out = NULL; - /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex * and restart. */ ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); if (ret) -return ret; +return ERR_PTR(ret); req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); if (req == NULL) -return -ENOMEM; +return ERR_PTR(-ENOMEM); ret = i915_gem_get_seqno(engine->i915, &req->seqno); if (ret) @@ -3041,14 +3035,13 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine, if (ret) goto err_ctx; -*req_out = req; -return 0; +return req; err_ctx: i915_gem_context_unreference(ctx); err: kmem_cache_free(dev_priv->requests, req); -return ret; +return ERR_PTR(ret); } /** @@ -3067,13 +3060,9 @@ struct drm_i915_gem_request * i915_gem_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { -struct drm_i915_gem_request *req; -int err; - if (ctx == NULL) ctx = engine->i915->kernel_context; -err = __i915_gem_request_alloc(engine, ctx, &req); -return err ? ERR_PTR(err) : req; +return __i915_gem_request_alloc(engine, ctx); } struct drm_i915_gem_request * Looks good to me. And have this feeling I've seen this somewhere before. Several times. This is not the full tidy, nor does it realise the ramifactions of request alloc through the stack. Hm I can't spot that it is doing anything wrong or making anything worse. You don't want to let the small cleanup in? Regards, Tvrtko It ought to make almost no difference, because the *only* place the inner function is called is from the outer one, which passes a pointer to a local for the returned object; and the inner one is then inlined, so the compiler doesn't actually put it on the stack and call to the inner allocator anyway. Strangely, however, with this change the code becomes ~400 bytes bigger! Disassembly reveals that while the code for the externally-callable outer function is indeed almost identical, a second copy of it has also been inlined at the one callsite in this file: __i915_gem_object_sync() ... req = i915_gem_request_alloc(to, NULL); I don't think that's a critical path and would rather have 400 bytes smaller codespace. We can get that back by adding /noinline/ to the outer function i915_gem_request_alloc() (not, of course, to the inner one, that definitely *should* be inline). __i915_gem_object_sync() should not be calling i915_gem_request_alloc(). That's the issue with this patch, your patch and John's patch. So we wrote the i915_gem_request_alloc() this way is to avoid being inlined into callers like __i915_gem_object_sync()? Not specifically, as the description of commit 268270883 says, "... this patch renames the existing i915_gem_request_alloc(), and makes it local (static inline), and replaces it with a wrapper that provides a default if the context is NULL, and also has a nicer calling convention (doesn't require a pointer to an output parameter). Then we change all callers to use the new convention: OLD: err = i915_gem_request_alloc(ring, user_ctx, &req); if (err) ... NEW: req = i915_gem_request_alloc(ring, user_ctx); if (IS_ERR(req)) ... OLD: err = i915_gem_request_alloc(ring, ring->default_cont
Re: [Intel-gfx] [PATCH] drm/i915/guc: Protect against HAS_GUC_* returning true values other than one
On 04/07/16 15:30, Tvrtko Ursulin wrote: From: Tvrtko Ursulin At the moment HAS_GUC_UCODE == HAS_GUC == IS_GEN9 == (INTEL_INFO(dev)->gen_mask & BIT(8)), which is true but not one. And module parameters are integers and not booleans so compiler will not normalize the value for us. Quick and easy fix for the GuC loading code and the whole area can be evaluated afterwards. Signed-off-by: Tvrtko Ursulin Reported-by: Chris Wilson Cc: Dave Gordon --- drivers/gpu/drm/i915/intel_guc_loader.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index d925e2daeb24..72ea5b97e242 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -687,9 +687,9 @@ void intel_guc_init(struct drm_device *dev) /* A negative value means "use platform default" */ if (i915.enable_guc_loading < 0) - i915.enable_guc_loading = HAS_GUC_UCODE(dev); + i915.enable_guc_loading = !!HAS_GUC_UCODE(dev); if (i915.enable_guc_submission < 0) - i915.enable_guc_submission = HAS_GUC_SCHED(dev); + i915.enable_guc_submission = !!HAS_GUC_SCHED(dev); if (!HAS_GUC_UCODE(dev)) { fw_path = NULL; Or we could just fix the IS_GENx() macros: .Dave. >From 4c82153bd0a520d1d85757ccfc2241776c7634af Mon Sep 17 00:00:00 2001 From: Dave Gordon Date: Tue, 5 Jul 2016 12:11:12 +0100 Subject: [PATCH] drm/i915: IS_GENx() must return bool Organization: Intel Corporation (UK) Ltd. - Co. Reg. #1134945 - Pipers Way, Swindon SN3 1RJ Since "ae5702d2 drm/i915: Make IS_GENx macros work on a mask" which optimised the IS_GENx() macros to perform a simple bitmask operation rather than an arithmetic comparison, the values of these macros have been powers-of-2 integers rather than true booleans. This confuses some code that expects them to be specifically 0 or 1 rather than just 0 or nonzero. So here we convert all the individual GENx() macros to use a single underlying common macro, to which we add "!!" to convert the result to an actual bool. The compiler knows when this actually makes a difference and doesn't insert any instructions if it only needs a zero/nonzero test, so this patch increases the binary size by only ~40 bytes total, for the cases where we actually want the values 0 or 1. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_drv.h | 17 + 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f0b1f43..431d862 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2763,14 +2763,15 @@ struct drm_i915_cmd_table { * have their own (e.g. HAS_PCH_SPLIT for ILK+ display, IS_foo for particular * chips, etc.). */ -#define IS_GEN2(dev) (INTEL_INFO(dev)->gen_mask & BIT(1)) -#define IS_GEN3(dev) (INTEL_INFO(dev)->gen_mask & BIT(2)) -#define IS_GEN4(dev) (INTEL_INFO(dev)->gen_mask & BIT(3)) -#define IS_GEN5(dev) (INTEL_INFO(dev)->gen_mask & BIT(4)) -#define IS_GEN6(dev) (INTEL_INFO(dev)->gen_mask & BIT(5)) -#define IS_GEN7(dev) (INTEL_INFO(dev)->gen_mask & BIT(6)) -#define IS_GEN8(dev) (INTEL_INFO(dev)->gen_mask & BIT(7)) -#define IS_GEN9(dev) (INTEL_INFO(dev)->gen_mask & BIT(8)) +#define _IS_GEN(x, dev) (!!(INTEL_INFO(dev)->gen_mask & BIT((x)-1))) +#define IS_GEN2(dev) _IS_GEN(2, dev) +#define IS_GEN3(dev) _IS_GEN(3, dev) +#define IS_GEN4(dev) _IS_GEN(4, dev) +#define IS_GEN5(dev) _IS_GEN(5, dev) +#define IS_GEN6(dev) _IS_GEN(6, dev) +#define IS_GEN7(dev) _IS_GEN(7, dev) +#define IS_GEN8(dev) _IS_GEN(8, dev) +#define IS_GEN9(dev) _IS_GEN(9, dev) #define ENGINE_MASK(id) BIT(id) #define RENDER_RING ENGINE_MASK(RCS) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915/guc: Protect against HAS_GUC_* returning true values other than one
On 05/07/16 12:56, Tvrtko Ursulin wrote: On 05/07/16 12:50, Dave Gordon wrote: On 04/07/16 15:30, Tvrtko Ursulin wrote: From: Tvrtko Ursulin At the moment HAS_GUC_UCODE == HAS_GUC == IS_GEN9 == (INTEL_INFO(dev)->gen_mask & BIT(8)), which is true but not one. And module parameters are integers and not booleans so compiler will not normalize the value for us. Quick and easy fix for the GuC loading code and the whole area can be evaluated afterwards. Signed-off-by: Tvrtko Ursulin Reported-by: Chris Wilson Cc: Dave Gordon --- drivers/gpu/drm/i915/intel_guc_loader.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index d925e2daeb24..72ea5b97e242 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -687,9 +687,9 @@ void intel_guc_init(struct drm_device *dev) /* A negative value means "use platform default" */ if (i915.enable_guc_loading < 0) -i915.enable_guc_loading = HAS_GUC_UCODE(dev); +i915.enable_guc_loading = !!HAS_GUC_UCODE(dev); if (i915.enable_guc_submission < 0) -i915.enable_guc_submission = HAS_GUC_SCHED(dev); +i915.enable_guc_submission = !!HAS_GUC_SCHED(dev); if (!HAS_GUC_UCODE(dev)) { fw_path = NULL; Or we could just fix the IS_GENx() macros: You mean commit af1346a0f38fe5b762729a91ed10c7c7f59b76c9 Author: Tvrtko Ursulin Date: Mon Jul 4 15:50:23 2016 +0100 drm/i915: Explicitly convert some macros to boolean values :D Yeah, I was reading email out-of-order. But I like mine better anyway (refactor into a single underlying macro, and more parentheses). BTW I tried #define IS_GEN2(dev)(IS_GEN(dev, 2, 2)) (because the IS_GEN() macro already has the !! booleanisation) but it increased the codesize by ~4K. Hence the separate _IS_GEN(). Still, I think being explicit when assigning boolean type macros to integer is a good thing to do. Because I thought true is defined as non-zero in C. Unless I am behind the times. Regards, Tvrtko The *result* of a comparison or other boolean operation is and always has been 0-or-1 in C (whereas in BCPL TRUE was -1). It's the *inputs* to boolean operations that are tested for zero/nonzero. OTOH maybe I will change the enable_guc_{loading,submission) values to an enum or set of #defines, and then the assignment of the default values will use ?: to pick appropriate values. .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Convert dev_priv->dev backpointers to dev_priv->drm
On 05/07/16 09:51, Chris Wilson wrote: Since drm_i915_private is now a subclass of drm_device we do not need to chase the drm_i915_private->dev backpointer and can instead simply access drm_i915_private->drm directly. text data bss dec hex filename 10687574565 416 1073738 10624a drivers/gpu/drm/i915/i915.ko 10669494565 416 1071930 105b3a drivers/gpu/drm/i915/i915.ko Created by the coccinelle script: @@ struct drm_i915_private *d; identifier i; @@ ( - d->dev->i + d->drm.i | - d->dev + &d->drm ) and for good measure the dev_priv->dev backpointer was removed entirely. Signed-off-by: Chris Wilson How about doing this with a macro, to isolate the users of the conversion from the actual layout of the drm/private pair? The caller need not know how one actually gets from one to the other, whether it's by following a pointer, finding a containing parent, locating a member, or just casting the pointer; the conversion macro encapsulates all that. #define to_drm(dev_priv)(&dev_priv->drm) and thus at the point of use: - struct drm_device *dev = dev_priv->dev; + struct drm_device *dev = to_drm(dev_priv); etc. And the even simpler Cocci: @@ struct drm_i915_private *d; @@ - d->dev + to_drm(d) .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 3/6] drm/i915/huc: Add HuC fw loading support
On 29/06/16 18:59, Rodrigo Vivi wrote: On Wed, Jun 29, 2016 at 7:31 AM, Dave Gordon wrote: On 29/06/16 00:03, Rodrigo Vivi wrote: I don't believe we need to be that extreme here. Daniel asked a cleaner version, but we don't need to block the huc on a full rework of an unified fw loader. Oh, I agree, we should take this "mostly" as-is and then reunify them after. .Dave. But the merge on hug/guc loading is just the minor thing Daniel asked. The major request is to stop using the fetch_status, but errnos instead. That's not going to happen. It's written as a state machine for good reason, because the various elements (fetch/load/reload) get called at different (and rather arbitrary) points in the driver load sequence, and they need to maintain state from one stage to another, not rely on the caller(s) to interpret errnos to determine what the next callback should be. Unless you (or Daniel) just mean change the details of the encoding i.e. how that state is represented? We could do that, but I don't think it would be useful to reuse unrelated errnos rather than have our own precise and specific enumeration of the state of the loading process. .Dave. so, maybe one extra patch that simplifies this right now before this series would be the ideal so we could speed up the merge and maybe later to the unified firmware loading solution. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v2 1/6] drm/i915/guc: Make the GuC fw loading helper functions general
On 01/07/16 14:15, Peter Antoine wrote: Rename some of the GuC fw loading code to make them more general. We will utilise them for HuC loading as well. s/intel_guc_fw/intel_uc_fw/g s/GUC_FIRMWARE/UC_FIRMWARE/g Struct intel_guc_fw is renamed to intel_uc_fw. Prefix of tts members, such as 'guc' or 'guc_fw' either is renamed to 'uc' or removed for same purpose. v2: rebased on top of nightly. reapplied the search/replace as upstream code as changed. Signed-off-by: Alex Dai Signed-off-by: Peter Antoine This didn't apply cleanly, so it will need rebasing (again), but it's only a simple conflict with recent patches. Apart from that, its OK now, so: Reviewed-by: Dave Gordon (which you can carry over to the rebased version). --- drivers/gpu/drm/i915/i915_debugfs.c| 12 +-- drivers/gpu/drm/i915/i915_guc_submission.c | 4 +- drivers/gpu/drm/i915/intel_guc.h | 39 drivers/gpu/drm/i915/intel_guc_loader.c| 142 ++--- 4 files changed, 99 insertions(+), 98 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index f664884..3883df5 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2483,7 +2483,7 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) { struct drm_info_node *node = m->private; struct drm_i915_private *dev_priv = node->minor->dev->dev_private; - struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; + struct intel_uc_fw *guc_fw = &dev_priv->guc.guc_fw; u32 tmp, i; if (!HAS_GUC_UCODE(dev_priv)) @@ -2491,15 +2491,15 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) seq_printf(m, "GuC firmware status:\n"); seq_printf(m, "\tpath: %s\n", - guc_fw->guc_fw_path); + guc_fw->uc_fw_path); seq_printf(m, "\tfetch: %s\n", - intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status)); + intel_uc_fw_status_repr(guc_fw->fetch_status)); seq_printf(m, "\tload: %s\n", - intel_guc_fw_status_repr(guc_fw->guc_fw_load_status)); + intel_uc_fw_status_repr(guc_fw->load_status)); seq_printf(m, "\tversion wanted: %d.%d\n", - guc_fw->guc_fw_major_wanted, guc_fw->guc_fw_minor_wanted); + guc_fw->major_ver_wanted, guc_fw->minor_ver_wanted); seq_printf(m, "\tversion found: %d.%d\n", - guc_fw->guc_fw_major_found, guc_fw->guc_fw_minor_found); + guc_fw->major_ver_found, guc_fw->minor_ver_found); seq_printf(m, "\theader: offset is %d; size = %d\n", guc_fw->header_offset, guc_fw->header_size); seq_printf(m, "\tuCode: offset is %d; size = %d\n", diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 355b647..2bfa86e 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -1038,7 +1038,7 @@ int intel_guc_suspend(struct drm_device *dev) struct i915_gem_context *ctx; u32 data[3]; - if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS) + if (guc->guc_fw.load_status != UC_FIRMWARE_SUCCESS) return 0; ctx = dev_priv->kernel_context; @@ -1064,7 +1064,7 @@ int intel_guc_resume(struct drm_device *dev) struct i915_gem_context *ctx; u32 data[3]; - if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS) + if (guc->guc_fw.load_status != UC_FIRMWARE_SUCCESS) return 0; ctx = dev_priv->kernel_context; diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 3e3e743..02adcfc 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -90,29 +90,29 @@ struct i915_guc_client { uint64_t submissions[I915_NUM_ENGINES]; }; -enum intel_guc_fw_status { - GUC_FIRMWARE_FAIL = -1, - GUC_FIRMWARE_NONE = 0, - GUC_FIRMWARE_PENDING, - GUC_FIRMWARE_SUCCESS +enum intel_uc_fw_status { + UC_FIRMWARE_FAIL = -1, + UC_FIRMWARE_NONE = 0, + UC_FIRMWARE_PENDING, + UC_FIRMWARE_SUCCESS }; /* * This structure encapsulates all the data needed during the process * of fetching, caching, and loading the firmware image into the GuC. */ -struct intel_guc_fw { - struct drm_device * guc_dev; - const char *guc_fw_path; - size_t guc_fw_size; - struct drm_i915_gem_object *guc_fw_obj; - enum intel_guc_fw_statusguc_fw_fetch_status; - enum intel_guc_fw_stat
[Intel-gfx] [PATCH 2/2] drm/i915: refactor eb_get_batch()
Precursor for fix to secure batch execution. We will need to be able to retrieve the batch VMA (as well as the batch itself) from the eb list, so this patch extracts that part of eb_get_batch() into a separate function, and moves both parts to a more logical place in the file, near where the eb list is created. Also, it may not be obvious, but the current execbuffer2 ioctl interface requires that the buffer object containing the batch-to-be-executed be the LAST entry in the exec2_list[] array (I expected it to be the first!). To clarify this, we can replace the rather obscure construct "list_entry(eb->vmas.prev, ...)" in the old version of eb_get_batch() with the equivalent but more explicit "list_last_entry(&eb->vmas,...)" in the new eb_get_batch_vma() and of course add an explanatory comment. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 49 ++ 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1bb1f25..f6724ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -186,6 +186,35 @@ struct eb_vmas { return ret; } +static inline struct i915_vma * +eb_get_batch_vma(struct eb_vmas *eb) +{ + /* The batch is always the LAST item in the VMA list */ + struct i915_vma *vma = list_last_entry(&eb->vmas, typeof(*vma), exec_list); + + return vma; +} + +static struct drm_i915_gem_object * +eb_get_batch(struct eb_vmas *eb) +{ + struct i915_vma *vma = eb_get_batch_vma(eb); + + /* +* SNA is doing fancy tricks with compressing batch buffers, which leads +* to negative relocation deltas. Usually that works out ok since the +* relocate address is still positive, except when the batch is placed +* very low in the GTT. Ensure this doesn't happen. +* +* Note that actual hangs have only been observed on gen7, but for +* paranoia do it everywhere. +*/ + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; + + return vma->obj; +} + static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle) { if (eb->and < 0) { @@ -1341,26 +1370,6 @@ static bool only_mappable_for_reloc(unsigned int flags) return file_priv->bsd_ring; } -static struct drm_i915_gem_object * -eb_get_batch(struct eb_vmas *eb) -{ - struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); - - /* -* SNA is doing fancy tricks with compressing batch buffers, which leads -* to negative relocation deltas. Usually that works out ok since the -* relocate address is still positive, except when the batch is placed -* very low in the GTT. Ensure this doesn't happen. -* -* Note that actual hangs have only been observed on gen7, but for -* paranoia do it everywhere. -*/ - if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; - - return vma->obj; -} - #define I915_USER_RINGS (4) static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/2] drm/i915: compile-time consistency check on __EXEC_OBJECT flags
Two different sets of flag bits are stored in the 'flags' member of a 'struct drm_i915_gem_exec_object2', and they're defined in two different source files, increasing the risk of an accidental clash. Some flags in this field are supplied by the user; these are defined in i915_drm.h, and they start from the LSB and work up. Other flags are defined in i915_gem_execbuffer, for internal use within that file only; they start from the MSB and work down. So here we add a compile-time check that the two sets of flags do not overlap, which would cause all sorts of confusion. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 include/uapi/drm/i915_drm.h| 11 ++- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1978633..1bb1f25 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -34,10 +34,11 @@ #include #include -#define __EXEC_OBJECT_HAS_PIN (1<<31) -#define __EXEC_OBJECT_HAS_FENCE (1<<30) -#define __EXEC_OBJECT_NEEDS_MAP (1<<29) -#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) +#define __EXEC_OBJECT_HAS_PIN (1<<31) +#define __EXEC_OBJECT_HAS_FENCE (1<<30) +#define __EXEC_OBJECT_NEEDS_MAP (1<<29) +#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) +#define __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<28) /* all of the above */ #define BATCH_OFFSET_BIAS (256*1024) @@ -1007,6 +1008,9 @@ static bool only_mappable_for_reloc(unsigned int flags) unsigned invalid_flags; int i; + /* INTERNAL flags must not overlap with external ones */ + BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS); + invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; if (USES_FULL_PPGTT(dev)) invalid_flags |= EXEC_OBJECT_NEEDS_GTT; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index d7e81a3..51b9360 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -698,12 +698,13 @@ struct drm_i915_gem_exec_object2 { */ __u64 offset; -#define EXEC_OBJECT_NEEDS_FENCE (1<<0) -#define EXEC_OBJECT_NEEDS_GTT (1<<1) -#define EXEC_OBJECT_WRITE (1<<2) +#define EXEC_OBJECT_NEEDS_FENCE (1<<0) +#define EXEC_OBJECT_NEEDS_GTT (1<<1) +#define EXEC_OBJECT_WRITE (1<<2) #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) -#define EXEC_OBJECT_PINNED (1<<4) -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1) +#define EXEC_OBJECT_PINNED (1<<4) +/* All remaining bits are MBZ and RESERVED FOR FUTURE USE */ +#define __EXEC_OBJECT_UNKNOWN_FLAGS(-(EXEC_OBJECT_PINNED<<1)) __u64 flags; __u64 rsvd1; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: avoid wait_for_atomic() in non-atomic host2guc_action()
Rather than using wait_for_atomic() when chacking for a response from the GuC, we can get the effect of a hybrid spin/sleep wait by breaking it into two stages. First, spin-wait for up to 10us to minimise latency for "quick" commands; then, if that times out, sleep-wait for up 10ms (the maximum allowed for a "slow" command). Being able to do this depends on the recent patch 18f4b84 drm/i915: Use atomic waits for short non-atomic ones and is similar to the hybrid approach in 1758b90 drm/i915: Use a hybrid scheme for fast register waits (although we can't use that as-is, because that interface doesn't quite match what we need here). Signed-off-by: Dave Gordon Cc: Tvrtko Ursulin Cc: Chris Wilson --- drivers/gpu/drm/i915/i915_guc_submission.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index bfc8bf6..2112e02 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -97,8 +97,14 @@ static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len) I915_WRITE(HOST2GUC_INTERRUPT, HOST2GUC_TRIGGER); - /* No HOST2GUC command should take longer than 10ms */ - ret = wait_for_atomic(host2guc_action_response(dev_priv, &status), 10); + /* +* Fast commands should complete in less than 10us, so sample quickly +* up to that length of time, then switch to a slower sleep-wait loop. +* No HOST2GUC command should ever take longer than 10ms. +*/ + ret = wait_for_us(host2guc_action_response(dev_priv, &status), 10); + if (ret) + ret = wait_for(host2guc_action_response(dev_priv, &status), 10); if (status != GUC2HOST_STATUS_SUCCESS) { /* * Either the GuC explicitly returned an error (which -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/guc: symbolic names for user load/submission preferences
The existing code that accesses the "enable_guc_loading" and "enable_guc_submission" parameters uses explicit numerical values for the various possibilities, including in some cases relying on boolean 0/1 mapping to specific values (which could be confusing for maintainers). So this patch just provides and uses names for the values representing the DEFAULT, DISABLED, PREFERRED, and MANDATORY options that the user can select (-1, 0, 1, 2 respectively). This should produce identical code to the previous version! Signed-off-by: Dave Gordon Cc: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_guc_submission.c | 2 +- drivers/gpu/drm/i915/intel_guc.h | 15 +++ drivers/gpu/drm/i915/intel_guc_loader.c| 26 ++ drivers/gpu/drm/i915/intel_lrc.c | 6 +++--- 4 files changed, 33 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 2112e02..33c0e0ab 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -971,7 +971,7 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS); i915_guc_submission_disable(dev_priv); - if (!i915.enable_guc_submission) + if (i915.enable_guc_submission == GUC_SUBMISSION_DISABLED) return 0; /* not enabled */ if (guc->ctx_pool_obj) diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 3e3e743..7ac835c 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -90,6 +90,21 @@ struct i915_guc_client { uint64_t submissions[I915_NUM_ENGINES]; }; +/* These represent user-requested preferences */ +enum { + GUC_SUBMISSION_DEFAULT = -1, + GUC_SUBMISSION_DISABLED = 0, + GUC_SUBMISSION_PREFERRED, + GUC_SUBMISSION_MANDATORY +}; +enum { + FIRMWARE_LOAD_DEFAULT = -1, + FIRMWARE_LOAD_DISABLED = 0, + FIRMWARE_LOAD_PREFERRED, + FIRMWARE_LOAD_MANDATORY +}; + +/* These represent the actual firmware status */ enum intel_guc_fw_status { GUC_FIRMWARE_FAIL = -1, GUC_FIRMWARE_NONE = 0, diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 605c696..2cd37db 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -189,7 +189,7 @@ static void set_guc_init_params(struct drm_i915_private *dev_priv) } /* If GuC submission is enabled, set up additional parameters here */ - if (i915.enable_guc_submission) { + if (i915.enable_guc_submission != GUC_SUBMISSION_DISABLED) { u32 pgs = i915_gem_obj_ggtt_offset(dev_priv->guc.ctx_pool_obj); u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16; @@ -424,7 +424,7 @@ int intel_guc_setup(struct drm_device *dev) intel_guc_fw_status_repr(guc_fw->guc_fw_load_status)); /* Loading forbidden, or no firmware to load? */ - if (!i915.enable_guc_loading) { + if (i915.enable_guc_loading == FIRMWARE_LOAD_DISABLED) { err = 0; goto fail; } else if (fw_path == NULL) { @@ -493,7 +493,7 @@ int intel_guc_setup(struct drm_device *dev) intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status), intel_guc_fw_status_repr(guc_fw->guc_fw_load_status)); - if (i915.enable_guc_submission) { + if (i915.enable_guc_submission != GUC_SUBMISSION_DISABLED) { err = i915_guc_submission_enable(dev_priv); if (err) goto fail; @@ -519,9 +519,9 @@ int intel_guc_setup(struct drm_device *dev) * nonfatal error (i.e. it doesn't prevent driver load, but * marks the GPU as wedged until reset). */ - if (i915.enable_guc_loading > 1) { + if (i915.enable_guc_loading >= FIRMWARE_LOAD_MANDATORY) { ret = -EIO; - } else if (i915.enable_guc_submission > 1) { + } else if (i915.enable_guc_submission >= GUC_SUBMISSION_MANDATORY) { ret = -EIO; } else { ret = 0; @@ -536,7 +536,7 @@ int intel_guc_setup(struct drm_device *dev) else DRM_ERROR("GuC firmware load failed: %d\n", err); - if (i915.enable_guc_submission) { + if (i915.enable_guc_submission != GUC_SUBMISSION_DISABLED) { if (fw_path == NULL) DRM_INFO("GuC submission without firmware not supported\n"); if (ret == 0) @@ -544,7 +544,7 @@ int intel_guc_setup(struct drm_device *dev) else DRM_ERROR("GuC init failed: %d\n", ret); } - i915.enable_guc_submission = 0; + i915.enable_
[Intel-gfx] [PATCH 2/3] drm/i915/guc: downgrade some DRM_ERROR() messages to DRM_WARN()
Where we're going to continue regardless of the problem, rather than fail, then the message should be a WARNing rather than an ERROR. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_guc_submission.c | 18 +++--- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 2112e02..e299b64 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -114,10 +114,8 @@ static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len) if (ret != -ETIMEDOUT) ret = -EIO; - DRM_ERROR("GUC: host2guc action 0x%X failed. ret=%d " - "status=0x%08X response=0x%08X\n", - data[0], ret, status, - I915_READ(SOFT_SCRATCH(15))); + DRM_WARN("Action 0x%X failed; ret=%d status=0x%08X response=0x%08X\n", +data[0], ret, status, I915_READ(SOFT_SCRATCH(15))); dev_priv->guc.action_fail += 1; dev_priv->guc.action_err = ret; @@ -553,8 +551,8 @@ static int guc_ring_doorbell(struct i915_guc_client *gc) if (db_ret.db_status == GUC_DOORBELL_DISABLED) break; - DRM_ERROR("Cookie mismatch. Expected %d, returned %d\n", - db_cmp.cookie, db_ret.cookie); + DRM_WARN("Cookie mismatch. Expected %d, found %d\n", +db_cmp.cookie, db_ret.cookie); /* update the cookie to newly read cookie from GuC */ db_cmp.cookie = db_ret.cookie; @@ -726,8 +724,8 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) /* Restore to original value */ err = guc_update_doorbell_id(guc, client, db_id); if (err) - DRM_ERROR("Failed to restore doorbell to %d, err %d\n", - db_id, err); + DRM_WARN("Failed to restore doorbell to %d, err %d\n", +db_id, err); for (i = 0; i < GUC_MAX_DOORBELLS; ++i) { i915_reg_t drbreg = GEN8_DRBREGL(i); @@ -819,8 +817,6 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) return client; err: - DRM_ERROR("FAILED to create priority %u GuC client!\n", priority); - guc_client_free(dev_priv, client); return NULL; } @@ -998,7 +994,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) GUC_CTX_PRIORITY_KMD_NORMAL, dev_priv->kernel_context); if (!client) { - DRM_ERROR("Failed to create execbuf guc_client\n"); + DRM_ERROR("Failed to create normal GuC client!\n"); return -ENOMEM; } -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/3] drm: extra printk() wrapper macros
We had only DRM_INFO() and DRM_ERROR(), whereas the underlying printk() provides several other useful intermediate levels such as NOTICE and WARNING. So this patch fills out the set by providing both regular and once-only macros for each of the levels INFO, NOTICE, and WARNING, using a common underlying macro that does all the token-pasting. DRM_ERROR is unchanged, as it's not just a printk wrapper. Signed-off-by: Dave Gordon --- include/drm/drmP.h | 26 -- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/include/drm/drmP.h b/include/drm/drmP.h index cf918e3e..82648b1 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -162,6 +162,26 @@ void drm_err(const char *format, ...); /** \name Macros to make printk easier */ /*@{*/ +#define_DRM_PRINTK(once, level, fmt, ...) \ + do {\ + printk##once(KERN_##level "[" DRM_NAME "] " fmt,\ +##__VA_ARGS__);\ + } while (0) + +#define DRM_INFO(fmt, ...) \ + _DRM_PRINTK(, INFO, fmt, ##__VA_ARGS__) +#define DRM_NOTE(fmt, ...) \ + _DRM_PRINTK(, NOTICE, fmt, ##__VA_ARGS__) +#define DRM_WARN(fmt, ...) \ + _DRM_PRINTK(, WARNING, fmt, ##__VA_ARGS__) + +#define DRM_INFO_ONCE(fmt, ...) \ + _DRM_PRINTK(_once, INFO, fmt, __VA_ARGS__) +#define DRM_NOTE_ONCE(fmt, ...)\ + _DRM_PRINTK(_once, NOTICE, fmt, ##__VA_ARGS__) +#define DRM_WARN_ONCE(fmt, ...) \ + _DRM_PRINTK(_once, WARNING, fmt, ##__VA_ARGS__) + /** * Error output. * @@ -187,12 +207,6 @@ void drm_err(const char *format, ...); drm_err(fmt, ##__VA_ARGS__);\ }) -#define DRM_INFO(fmt, ...) \ - printk(KERN_INFO "[" DRM_NAME "] " fmt, ##__VA_ARGS__) - -#define DRM_INFO_ONCE(fmt, ...)\ - printk_once(KERN_INFO "[" DRM_NAME "] " fmt, ##__VA_ARGS__) - /** * Debug output. * -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 3/3] drm/i915/guc: revisit GuC loader message levels
Some downgraded from DRM_ERROR() to DRM_WARN(), some eliminated, and a few upgraded from DRM_INFO() to DRM_NOTE() or DRM_WARN(). Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/intel_guc_loader.c | 20 ++-- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 605c696..fd032eb 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -140,12 +140,14 @@ static u32 get_gttype(struct drm_i915_private *dev_priv) static u32 get_core_family(struct drm_i915_private *dev_priv) { - switch (INTEL_INFO(dev_priv)->gen) { + u32 gen = INTEL_GEN(dev_priv); + + switch (gen) { case 9: return GFXCORE_FAMILY_GEN9; default: - DRM_ERROR("GUC: unsupported core family\n"); + DRM_WARN("GEN%d does not support GuC operation\n", gen); return GFXCORE_FAMILY_UNKNOWN; } } @@ -433,7 +435,7 @@ int intel_guc_setup(struct drm_device *dev) goto fail; } else if (*fw_path == '\0') { /* Device has a GuC but we don't know what f/w to load? */ - DRM_INFO("No GuC firmware known for this platform\n"); + DRM_WARN("No GuC firmware known for this platform\n"); err = -ENODEV; goto fail; } @@ -471,10 +473,8 @@ int intel_guc_setup(struct drm_device *dev) * that the state and timing are fairly predictable */ err = i915_reset_guc(dev_priv); - if (err) { - DRM_ERROR("GuC reset failed: %d\n", err); + if (err) goto fail; - } err = guc_ucode_xfer(dev_priv); if (!err) @@ -532,15 +532,15 @@ int intel_guc_setup(struct drm_device *dev) else if (err == 0) DRM_INFO("GuC firmware load skipped\n"); else if (ret != -EIO) - DRM_INFO("GuC firmware load failed: %d\n", err); + DRM_NOTE("GuC firmware load failed: %d\n", err); else - DRM_ERROR("GuC firmware load failed: %d\n", err); + DRM_WARN("GuC firmware load failed: %d\n", err); if (i915.enable_guc_submission) { if (fw_path == NULL) DRM_INFO("GuC submission without firmware not supported\n"); if (ret == 0) - DRM_INFO("Falling back from GuC submission to execlist mode\n"); + DRM_NOTE("Falling back from GuC submission to execlist mode\n"); else DRM_ERROR("GuC init failed: %d\n", ret); } @@ -656,7 +656,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) fail: DRM_DEBUG_DRIVER("GuC fw fetch status FAIL; err %d, fw %p, obj %p\n", err, fw, guc_fw->guc_fw_obj); - DRM_ERROR("Failed to fetch GuC firmware from %s (error %d)\n", + DRM_WARN("Failed to fetch GuC firmware from %s (error %d)\n", guc_fw->guc_fw_path, err); mutex_lock(&dev->struct_mutex); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915/guc: symbolic names for user load/submission preferences
On 11/07/16 20:58, Chris Wilson wrote: On Mon, Jul 11, 2016 at 06:12:40PM +0100, Dave Gordon wrote: The existing code that accesses the "enable_guc_loading" and "enable_guc_submission" parameters uses explicit numerical values for the various possibilities, including in some cases relying on boolean 0/1 mapping to specific values (which could be confusing for maintainers). So this patch just provides and uses names for the values representing the DEFAULT, DISABLED, PREFERRED, and MANDATORY options that the user can select (-1, 0, 1, 2 respectively). When is MANDATORY a good idea? If the hw doesn't support any other mechanism, then it will shut itself down gracefully if setup fails. If the user wants to force guc for testing, they only need to set the module parameter then check the guc is enabled afterwards and fail the test. At what point do we need such a warty user interface to the kernel? -Chris Validation like it, so it's REALLY REALLY OBVIOUS if the system is misconfigured (e.g. wrong firmware version) as driver initialisation will fail rather than quietly continue by falling back to execlists. Remember Daniel originally insisted on NO FALLBACK -- again, so that developers and testers didn't get confused by the system continuing to work despite the presence of a (hardware,firmware,driver) bug -- so that's the option that provides it. Of course it's not what end-users want, and so it's not what end-users get. You only get NO-FALLBACK mode if you specifically ask for it. Note also, all this is already implemented, this patch just provides symbolic names for the code to use instead of literal numbers. .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 2/3] drm/i915/guc: downgrade some DRM_ERROR() messages to DRM_WARN()
On 12/07/16 10:20, Tvrtko Ursulin wrote: On 11/07/16 19:01, Dave Gordon wrote: Where we're going to continue regardless of the problem, rather than fail, then the message should be a WARNing rather than an ERROR. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_guc_submission.c | 18 +++--- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 2112e02..e299b64 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -114,10 +114,8 @@ static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len) if (ret != -ETIMEDOUT) ret = -EIO; -DRM_ERROR("GUC: host2guc action 0x%X failed. ret=%d " -"status=0x%08X response=0x%08X\n", -data[0], ret, status, -I915_READ(SOFT_SCRATCH(15))); +DRM_WARN("Action 0x%X failed; ret=%d status=0x%08X response=0x%08X\n", + data[0], ret, status, I915_READ(SOFT_SCRATCH(15))); Hm, this does propagate the error code to the callers some which will act and log the failure. Majority won't though - like suspend/resume etc. In those cases it feels more like an error than a warning. It's definitely something that shouldn't happen, so we need to log it; and it has to be done at this level because we don't pass enough information back to leave it to the caller. But OTOH this layer doesn't have enough information to determine just how serious a failure is. So as a compromise the idea is to log a WARNING here, and then the caller can choose to: 1. pass the failure up (until we reach a layer with more context) 2. quietly disregard the failure and continue anyway 3. report an ERROR and fail/abort the process. That way we should get all the useful information about the root cause of something that ends up as an ERROR, while neither ignoring nor being too verbose about failures from which we may ultimately recover. For example, one of the callers (the doorbell h/w initialisation code) considers some failures as interesting but not critical (DEBUG level) but other instances of the exact same operation are fatal (ERROR). dev_priv->guc.action_fail += 1; dev_priv->guc.action_err = ret; @@ -553,8 +551,8 @@ static int guc_ring_doorbell(struct i915_guc_client *gc) if (db_ret.db_status == GUC_DOORBELL_DISABLED) break; -DRM_ERROR("Cookie mismatch. Expected %d, returned %d\n", - db_cmp.cookie, db_ret.cookie); +DRM_WARN("Cookie mismatch. Expected %d, found %d\n", + db_cmp.cookie, db_ret.cookie); This one is interesting, error is propagated out a bit but then ignored in actual command submission. If the above message means command will not be submitted error is probably more appropriate. Or perhaps we cannot tell if the command was submitted or not in this case? Note that this is inside a retry loop. It shouldn't ever happen, but if it does we'll report it and try one more time. If it keeps happening (which would require active interference by some other party) we won't be able to ring the doorbell and the failure will be propagated back out of the GuC submission code. OTOH the caller then ignores it because "submission is not allowed to fail" (!) And yes, it is then undefined as to whether the command has been submitted or not. If it hasn't we'll expect a GPU hang later. .Dave. /* update the cookie to newly read cookie from GuC */ db_cmp.cookie = db_ret.cookie; @@ -726,8 +724,8 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) /* Restore to original value */ err = guc_update_doorbell_id(guc, client, db_id); if (err) -DRM_ERROR("Failed to restore doorbell to %d, err %d\n", -db_id, err); +DRM_WARN("Failed to restore doorbell to %d, err %d\n", + db_id, err); for (i = 0; i < GUC_MAX_DOORBELLS; ++i) { i915_reg_t drbreg = GEN8_DRBREGL(i); @@ -819,8 +817,6 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) return client; err: -DRM_ERROR("FAILED to create priority %u GuC client!\n", priority); - guc_client_free(dev_priv, client); return NULL; } @@ -998,7 +994,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) GUC_CTX_PRIORITY_KMD_NORMAL, dev_priv->kernel_context); if (!client) { -DRM_ERROR("Failed to create execbuf guc_client\n"); +DRM_ERROR("Failed to create normal GuC client!\n"); return -ENOMEM; } Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/3] drm: extra printk() wrapper macros
On 12/07/16 10:06, Tvrtko Ursulin wrote: On 11/07/16 19:01, Dave Gordon wrote: We had only DRM_INFO() and DRM_ERROR(), whereas the underlying printk() provides several other useful intermediate levels such as NOTICE and WARNING. So this patch fills out the set by providing both regular and once-only macros for each of the levels INFO, NOTICE, and WARNING, using a common underlying macro that does all the token-pasting. DRM_ERROR is unchanged, as it's not just a printk wrapper. Signed-off-by: Dave Gordon --- include/drm/drmP.h | 26 -- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/include/drm/drmP.h b/include/drm/drmP.h index cf918e3e..82648b1 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -162,6 +162,26 @@ void drm_err(const char *format, ...); /** \name Macros to make printk easier */ /*@{*/ +#define_DRM_PRINTK(once, level, fmt, ...)\ +do {\ +printk##once(KERN_##level "[" DRM_NAME "] " fmt,\ + ##__VA_ARGS__);\ +} while (0) + +#define DRM_INFO(fmt, ...)\ +_DRM_PRINTK(, INFO, fmt, ##__VA_ARGS__) +#define DRM_NOTE(fmt, ...)\ +_DRM_PRINTK(, NOTICE, fmt, ##__VA_ARGS__) To me DRM_NOTICE would be better to keep consistent with kernel naming for the equivalent log level. Maybe, but then we'd probably want DRM_WARNING() as well, and the names get cumbersome, especially when you want to tag "_ONCE" on the end as well. I liked the consistency of {INFO,NOTE,WARN} all being four letters ;) Any comments from dri-devel on INFO/NOTE/WARN vs INFO/NOTICE/WARNING? Or any other suggestions? .Dave. +#define DRM_WARN(fmt, ...)\ +_DRM_PRINTK(, WARNING, fmt, ##__VA_ARGS__) + +#define DRM_INFO_ONCE(fmt, ...)\ +_DRM_PRINTK(_once, INFO, fmt, __VA_ARGS__) +#define DRM_NOTE_ONCE(fmt, ...)\ +_DRM_PRINTK(_once, NOTICE, fmt, ##__VA_ARGS__) +#define DRM_WARN_ONCE(fmt, ...)\ +_DRM_PRINTK(_once, WARNING, fmt, ##__VA_ARGS__) + /** * Error output. * @@ -187,12 +207,6 @@ void drm_err(const char *format, ...); drm_err(fmt, ##__VA_ARGS__);\ }) -#define DRM_INFO(fmt, ...)\ -printk(KERN_INFO "[" DRM_NAME "] " fmt, ##__VA_ARGS__) - -#define DRM_INFO_ONCE(fmt, ...)\ -printk_once(KERN_INFO "[" DRM_NAME "] " fmt, ##__VA_ARGS__) - /** * Debug output. * Otherwise acked by me. Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/3] drm: extra printk() wrapper macros
On 12/07/16 15:25, Daniel Vetter wrote: On Mon, Jul 11, 2016 at 07:01:27PM +0100, Dave Gordon wrote: We had only DRM_INFO() and DRM_ERROR(), whereas the underlying printk() provides several other useful intermediate levels such as NOTICE and WARNING. So this patch fills out the set by providing both regular and once-only macros for each of the levels INFO, NOTICE, and WARNING, using a common underlying macro that does all the token-pasting. DRM_ERROR is unchanged, as it's not just a printk wrapper. Signed-off-by: Dave Gordon I'm not sure what exactly the brave new drm debug model should look like (probably some form of pimped dynamic debug printk, to be able to be backwards compatible with the gazillion of blog posts recommending to capture dmesg with drm.debug=0xe). But extending these is probably not what we want ... -Daniel These are not debug of any sort, these message are intended to be seen by the user (or administrator), and these macros allow us to emit the messages at the most appropriate kernel message level. .Dave. --- include/drm/drmP.h | 26 -- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/include/drm/drmP.h b/include/drm/drmP.h index cf918e3e..82648b1 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -162,6 +162,26 @@ void drm_err(const char *format, ...); /** \name Macros to make printk easier */ /*@{*/ +#define_DRM_PRINTK(once, level, fmt, ...) \ + do {\ + printk##once(KERN_##level "[" DRM_NAME "] " fmt,\ +##__VA_ARGS__);\ + } while (0) + +#define DRM_INFO(fmt, ...) \ + _DRM_PRINTK(, INFO, fmt, ##__VA_ARGS__) +#define DRM_NOTE(fmt, ...) \ + _DRM_PRINTK(, NOTICE, fmt, ##__VA_ARGS__) +#define DRM_WARN(fmt, ...) \ + _DRM_PRINTK(, WARNING, fmt, ##__VA_ARGS__) + +#define DRM_INFO_ONCE(fmt, ...) \ + _DRM_PRINTK(_once, INFO, fmt, __VA_ARGS__) +#define DRM_NOTE_ONCE(fmt, ...)\ + _DRM_PRINTK(_once, NOTICE, fmt, ##__VA_ARGS__) +#define DRM_WARN_ONCE(fmt, ...) \ + _DRM_PRINTK(_once, WARNING, fmt, ##__VA_ARGS__) + /** * Error output. * @@ -187,12 +207,6 @@ void drm_err(const char *format, ...); drm_err(fmt, ##__VA_ARGS__);\ }) -#define DRM_INFO(fmt, ...) \ - printk(KERN_INFO "[" DRM_NAME "] " fmt, ##__VA_ARGS__) - -#define DRM_INFO_ONCE(fmt, ...)\ - printk_once(KERN_INFO "[" DRM_NAME "] " fmt, ##__VA_ARGS__) - /** * Debug output. * -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 3/3] drm/i915/guc: revisit GuC loader message levels
On 12/07/16 10:26, Tvrtko Ursulin wrote: On 11/07/16 19:01, Dave Gordon wrote: Some downgraded from DRM_ERROR() to DRM_WARN(), some eliminated, and a few upgraded from DRM_INFO() to DRM_NOTE() or DRM_WARN(). Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/intel_guc_loader.c | 20 ++-- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 605c696..fd032eb 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -140,12 +140,14 @@ static u32 get_gttype(struct drm_i915_private *dev_priv) static u32 get_core_family(struct drm_i915_private *dev_priv) { -switch (INTEL_INFO(dev_priv)->gen) { +u32 gen = INTEL_GEN(dev_priv); + +switch (gen) { case 9: return GFXCORE_FAMILY_GEN9; default: -DRM_ERROR("GUC: unsupported core family\n"); +DRM_WARN("GEN%d does not support GuC operation\n", gen); return GFXCORE_FAMILY_UNKNOWN; } } @@ -433,7 +435,7 @@ int intel_guc_setup(struct drm_device *dev) goto fail; } else if (*fw_path == '\0') { /* Device has a GuC but we don't know what f/w to load? */ -DRM_INFO("No GuC firmware known for this platform\n"); +DRM_WARN("No GuC firmware known for this platform\n"); err = -ENODEV; goto fail; } @@ -471,10 +473,8 @@ int intel_guc_setup(struct drm_device *dev) * that the state and timing are fairly predictable */ err = i915_reset_guc(dev_priv); -if (err) { -DRM_ERROR("GuC reset failed: %d\n", err); +if (err) goto fail; -} err = guc_ucode_xfer(dev_priv); if (!err) @@ -532,15 +532,15 @@ int intel_guc_setup(struct drm_device *dev) else if (err == 0) DRM_INFO("GuC firmware load skipped\n"); else if (ret != -EIO) -DRM_INFO("GuC firmware load failed: %d\n", err); +DRM_NOTE("GuC firmware load failed: %d\n", err); else -DRM_ERROR("GuC firmware load failed: %d\n", err); +DRM_WARN("GuC firmware load failed: %d\n", err); if (i915.enable_guc_submission) { if (fw_path == NULL) DRM_INFO("GuC submission without firmware not supported\n"); if (ret == 0) -DRM_INFO("Falling back from GuC submission to execlist mode\n"); +DRM_NOTE("Falling back from GuC submission to execlist mode\n"); else DRM_ERROR("GuC init failed: %d\n", ret); } @@ -656,7 +656,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) fail: DRM_DEBUG_DRIVER("GuC fw fetch status FAIL; err %d, fw %p, obj %p\n", err, fw, guc_fw->guc_fw_obj); -DRM_ERROR("Failed to fetch GuC firmware from %s (error %d)\n", +DRM_WARN("Failed to fetch GuC firmware from %s (error %d)\n", guc_fw->guc_fw_path, err); mutex_lock(&dev->struct_mutex); R-b if you also change all the other DRM_ERRORs in guc_fw_fetch to DRM_DEBUG_DRIVER and merge this last two log lines (DRM_DEBUG_DRIVER + DRM_WARN) to one. :) Regards, Tvrtko No, that wouldn't be appropriate. We want the user to be informed if any of these failures occurs, because it means their system is in some way misconfigured e.g. corrupted firmware file. That's definitely not a DEBUG-only event, and it must be logged even if we're going to try to continue in fallback mode. I could change all the earlier ERRORs to NOTEs and leave just the last one as an ERROR i.e. explanation first, consequence after. As for the DEBUG, that's for a different purpose. Whereas the various ERROR/NOTE/INFO messages relate to the existence, format, or content of the required firmware file in the filesystem or ramdisk, the DEBUG is about internal failures such as not being able to allocate memory, over which the user/administrator has no direct control. I might swap them round though (i.e. DEBUG after the ERROR, to explain further than I want to in a user-facing message). .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 1/3] drm: extra printk() wrapper macros
We had only DRM_INFO() and DRM_ERROR(), whereas the underlying printk() provides several other useful intermediate levels such as NOTICE and WARNING. So this patch fills out the set by providing both regular and once-only macros for each of the levels INFO, NOTICE, and WARNING, using a common underlying macro that does all the token-pasting. DRM_ERROR is unchanged, as it's not just a printk wrapper. v2: Fix whitespace, missing ## (Eric Engestrom) Signed-off-by: Dave Gordon Reviewed-by: Eric Engestrom --- include/drm/drmP.h | 26 -- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/include/drm/drmP.h b/include/drm/drmP.h index c2fe2cf..1f53cc2 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -162,6 +162,26 @@ void drm_err(const char *format, ...); /** \name Macros to make printk easier */ /*@{*/ +#define _DRM_PRINTK(once, level, fmt, ...) \ + do {\ + printk##once(KERN_##level "[" DRM_NAME "] " fmt,\ +##__VA_ARGS__);\ + } while (0) + +#define DRM_INFO(fmt, ...) \ + _DRM_PRINTK(, INFO, fmt, ##__VA_ARGS__) +#define DRM_NOTE(fmt, ...) \ + _DRM_PRINTK(, NOTICE, fmt, ##__VA_ARGS__) +#define DRM_WARN(fmt, ...) \ + _DRM_PRINTK(, WARNING, fmt, ##__VA_ARGS__) + +#define DRM_INFO_ONCE(fmt, ...) \ + _DRM_PRINTK(_once, INFO, fmt, ##__VA_ARGS__) +#define DRM_NOTE_ONCE(fmt, ...) \ + _DRM_PRINTK(_once, NOTICE, fmt, ##__VA_ARGS__) +#define DRM_WARN_ONCE(fmt, ...) \ + _DRM_PRINTK(_once, WARNING, fmt, ##__VA_ARGS__) + /** * Error output. * @@ -187,12 +207,6 @@ void drm_err(const char *format, ...); drm_err(fmt, ##__VA_ARGS__);\ }) -#define DRM_INFO(fmt, ...) \ - printk(KERN_INFO "[" DRM_NAME "] " fmt, ##__VA_ARGS__) - -#define DRM_INFO_ONCE(fmt, ...)\ - printk_once(KERN_INFO "[" DRM_NAME "] " fmt, ##__VA_ARGS__) - /** * Debug output. * -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 2/3] drm/i915/guc: downgrade some DRM_ERROR() messages to DRM_WARN()
Where we're going to continue regardless of the problem, rather than fail, then the message should be a WARNing rather than an ERROR. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_guc_submission.c | 18 +++--- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 2112e02..e299b64 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -114,10 +114,8 @@ static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len) if (ret != -ETIMEDOUT) ret = -EIO; - DRM_ERROR("GUC: host2guc action 0x%X failed. ret=%d " - "status=0x%08X response=0x%08X\n", - data[0], ret, status, - I915_READ(SOFT_SCRATCH(15))); + DRM_WARN("Action 0x%X failed; ret=%d status=0x%08X response=0x%08X\n", +data[0], ret, status, I915_READ(SOFT_SCRATCH(15))); dev_priv->guc.action_fail += 1; dev_priv->guc.action_err = ret; @@ -553,8 +551,8 @@ static int guc_ring_doorbell(struct i915_guc_client *gc) if (db_ret.db_status == GUC_DOORBELL_DISABLED) break; - DRM_ERROR("Cookie mismatch. Expected %d, returned %d\n", - db_cmp.cookie, db_ret.cookie); + DRM_WARN("Cookie mismatch. Expected %d, found %d\n", +db_cmp.cookie, db_ret.cookie); /* update the cookie to newly read cookie from GuC */ db_cmp.cookie = db_ret.cookie; @@ -726,8 +724,8 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) /* Restore to original value */ err = guc_update_doorbell_id(guc, client, db_id); if (err) - DRM_ERROR("Failed to restore doorbell to %d, err %d\n", - db_id, err); + DRM_WARN("Failed to restore doorbell to %d, err %d\n", +db_id, err); for (i = 0; i < GUC_MAX_DOORBELLS; ++i) { i915_reg_t drbreg = GEN8_DRBREGL(i); @@ -819,8 +817,6 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) return client; err: - DRM_ERROR("FAILED to create priority %u GuC client!\n", priority); - guc_client_free(dev_priv, client); return NULL; } @@ -998,7 +994,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) GUC_CTX_PRIORITY_KMD_NORMAL, dev_priv->kernel_context); if (!client) { - DRM_ERROR("Failed to create execbuf guc_client\n"); + DRM_ERROR("Failed to create normal GuC client!\n"); return -ENOMEM; } -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 3/3] drm/i915/guc: revisit GuC loader message levels
Some downgraded from DRM_ERROR() to DRM_WARN() or DRM_NOTE(), a few upgraded from DRM_INFO() to DRM_NOTE() or DRM_WARN(), and one eliminated completely. A typical failure mode might now look like this in the dmesg log: [drm] Failed to fetch valid GuC firmware from i915/skl_guc_ver6_1.bin (error -2) [drm] GuC firmware load failed: -5 [drm] Falling back from GuC submission to execlist mode which provides sufficient notice that * there is a problem with the firmware binary * and consequently loading the GuC has failed * and so we have selected the fallback (execlist) mode while not cluttering the log with developer-only details. v2: different permutation of levels :) Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/intel_guc_loader.c | 34 - 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 605c696..a2f4fa4 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -140,12 +140,14 @@ static u32 get_gttype(struct drm_i915_private *dev_priv) static u32 get_core_family(struct drm_i915_private *dev_priv) { - switch (INTEL_INFO(dev_priv)->gen) { + u32 gen = INTEL_GEN(dev_priv); + + switch (gen) { case 9: return GFXCORE_FAMILY_GEN9; default: - DRM_ERROR("GUC: unsupported core family\n"); + DRM_WARN("GEN%d does not support GuC operation\n", gen); return GFXCORE_FAMILY_UNKNOWN; } } @@ -433,7 +435,7 @@ int intel_guc_setup(struct drm_device *dev) goto fail; } else if (*fw_path == '\0') { /* Device has a GuC but we don't know what f/w to load? */ - DRM_INFO("No GuC firmware known for this platform\n"); + DRM_WARN("No GuC firmware known for this platform\n"); err = -ENODEV; goto fail; } @@ -471,10 +473,8 @@ int intel_guc_setup(struct drm_device *dev) * that the state and timing are fairly predictable */ err = i915_reset_guc(dev_priv); - if (err) { - DRM_ERROR("GuC reset failed: %d\n", err); + if (err) goto fail; - } err = guc_ucode_xfer(dev_priv); if (!err) @@ -532,15 +532,15 @@ int intel_guc_setup(struct drm_device *dev) else if (err == 0) DRM_INFO("GuC firmware load skipped\n"); else if (ret != -EIO) - DRM_INFO("GuC firmware load failed: %d\n", err); + DRM_NOTE("GuC firmware load failed: %d\n", err); else - DRM_ERROR("GuC firmware load failed: %d\n", err); + DRM_WARN("GuC firmware load failed: %d\n", err); if (i915.enable_guc_submission) { if (fw_path == NULL) DRM_INFO("GuC submission without firmware not supported\n"); if (ret == 0) - DRM_INFO("Falling back from GuC submission to execlist mode\n"); + DRM_NOTE("Falling back from GuC submission to execlist mode\n"); else DRM_ERROR("GuC init failed: %d\n", ret); } @@ -571,7 +571,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* Check the size of the blob before examining buffer contents */ if (fw->size < sizeof(struct guc_css_header)) { - DRM_ERROR("Firmware header is missing\n"); + DRM_NOTE("Firmware header is missing\n"); goto fail; } @@ -583,7 +583,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) css->key_size_dw - css->exponent_size_dw) * sizeof(u32); if (guc_fw->header_size != sizeof(struct guc_css_header)) { - DRM_ERROR("CSS header definition mismatch\n"); + DRM_NOTE("CSS header definition mismatch\n"); goto fail; } @@ -593,7 +593,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* now RSA */ if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { - DRM_ERROR("RSA key size is bad\n"); + DRM_NOTE("RSA key size is bad\n"); goto fail; } guc_fw->rsa_offset = guc_fw->ucode_offset + guc_fw->ucode_size; @@ -602,14 +602,14 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* At least, it should have header, uCode and RSA. Size of all three. */
[Intel-gfx] [PATCH] drm/i915/guc: symbolic names for user load/submission preferences
The existing code that accesses the "enable_guc_loading" and "enable_guc_submission" parameters uses explicit numerical values for the various possibilities, including in some cases relying on boolean 0/1 mapping to specific values (which could be confusing for maintainers). So this patch just provides and uses names for the values representing the DEFAULT, DISABLED, PREFERRED, and MANDATORY options that the user can select (-1, 0, 1, 2 respectively). This should produce identical code to the previous version! Signed-off-by: Dave Gordon Cc: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_guc_submission.c | 2 +- drivers/gpu/drm/i915/intel_guc.h | 15 +++ drivers/gpu/drm/i915/intel_guc_loader.c| 26 ++ drivers/gpu/drm/i915/intel_lrc.c | 6 +++--- 4 files changed, 33 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 2112e02..33c0e0ab 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -971,7 +971,7 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS); i915_guc_submission_disable(dev_priv); - if (!i915.enable_guc_submission) + if (i915.enable_guc_submission == GUC_SUBMISSION_DISABLED) return 0; /* not enabled */ if (guc->ctx_pool_obj) diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 3e3e743..7ac835c 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -90,6 +90,21 @@ struct i915_guc_client { uint64_t submissions[I915_NUM_ENGINES]; }; +/* These represent user-requested preferences */ +enum { + GUC_SUBMISSION_DEFAULT = -1, + GUC_SUBMISSION_DISABLED = 0, + GUC_SUBMISSION_PREFERRED, + GUC_SUBMISSION_MANDATORY +}; +enum { + FIRMWARE_LOAD_DEFAULT = -1, + FIRMWARE_LOAD_DISABLED = 0, + FIRMWARE_LOAD_PREFERRED, + FIRMWARE_LOAD_MANDATORY +}; + +/* These represent the actual firmware status */ enum intel_guc_fw_status { GUC_FIRMWARE_FAIL = -1, GUC_FIRMWARE_NONE = 0, diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 605c696..2cd37db 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -189,7 +189,7 @@ static void set_guc_init_params(struct drm_i915_private *dev_priv) } /* If GuC submission is enabled, set up additional parameters here */ - if (i915.enable_guc_submission) { + if (i915.enable_guc_submission != GUC_SUBMISSION_DISABLED) { u32 pgs = i915_gem_obj_ggtt_offset(dev_priv->guc.ctx_pool_obj); u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16; @@ -424,7 +424,7 @@ int intel_guc_setup(struct drm_device *dev) intel_guc_fw_status_repr(guc_fw->guc_fw_load_status)); /* Loading forbidden, or no firmware to load? */ - if (!i915.enable_guc_loading) { + if (i915.enable_guc_loading == FIRMWARE_LOAD_DISABLED) { err = 0; goto fail; } else if (fw_path == NULL) { @@ -493,7 +493,7 @@ int intel_guc_setup(struct drm_device *dev) intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status), intel_guc_fw_status_repr(guc_fw->guc_fw_load_status)); - if (i915.enable_guc_submission) { + if (i915.enable_guc_submission != GUC_SUBMISSION_DISABLED) { err = i915_guc_submission_enable(dev_priv); if (err) goto fail; @@ -519,9 +519,9 @@ int intel_guc_setup(struct drm_device *dev) * nonfatal error (i.e. it doesn't prevent driver load, but * marks the GPU as wedged until reset). */ - if (i915.enable_guc_loading > 1) { + if (i915.enable_guc_loading >= FIRMWARE_LOAD_MANDATORY) { ret = -EIO; - } else if (i915.enable_guc_submission > 1) { + } else if (i915.enable_guc_submission >= GUC_SUBMISSION_MANDATORY) { ret = -EIO; } else { ret = 0; @@ -536,7 +536,7 @@ int intel_guc_setup(struct drm_device *dev) else DRM_ERROR("GuC firmware load failed: %d\n", err); - if (i915.enable_guc_submission) { + if (i915.enable_guc_submission != GUC_SUBMISSION_DISABLED) { if (fw_path == NULL) DRM_INFO("GuC submission without firmware not supported\n"); if (ret == 0) @@ -544,7 +544,7 @@ int intel_guc_setup(struct drm_device *dev) else DRM_ERROR("GuC init failed: %d\n", ret); } - i915.enable_guc_submission = 0; + i915.enable_
Re: [Intel-gfx] [PATCH 1/5] drm/i915: unify first-stage engine struct setup
On 13/07/16 14:16, Tvrtko Ursulin wrote: On 13/07/16 13:23, Daniel Vetter wrote: On Fri, Jul 01, 2016 at 05:47:11PM +0100, Tvrtko Ursulin wrote: From: Dave Gordon [snip] { -const struct logical_ring_info *info = &logical_rings[id]; +const struct engine_info *info = &intel_engines[id]; struct intel_engine_cs *engine = &dev_priv->engine[id]; -enum forcewake_domains fw_domains; engine->id = id; +engine->i915 = dev_priv; engine->name = info->name; engine->exec_id = info->exec_id; -engine->guc_id = info->guc_id; +engine->hw_id = engine->guc_id = info->guc_id; Optional bikeshed: s/info->guc_id/info->hw_id/ makes sense imo in the new context. Or nuking engine->guc_id. Someone said we cannot be sure they will be the same in the future. So maybe just rename to hw_id for now. Regards, Tvrtko The GuC firmware *could* use a completely different enumeration of the engines, but why would it? Since it's closely tied to the hardware, it *ought* to use the same naming scheme as the h/w. So I have no objection to getting rid of guc_id entirely, and changing existing uses to use hw_id instead. OTOH it seems little benefit and would certainly involve more work to reverse. The firmware team might, after all, decide that they too want to decouple the logical-engine-numbers used for the KMD interface from whatever the hardware team decide is the best way to number engines -- which might after all change between generations or even different SKUs of the same device! SO, I think the "best" version of that line is probably: + engine->hw_id = info->hw_id; + + /* Current GuC f/w uses hw_id not driver id */ + engine->guc_id = info->hw_id; and we'll add "info->guc_id" back again if it ever becomes necessary. .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 2/2] drm/i915: refactor eb_get_batch()
On 13/07/16 13:44, Chris Wilson wrote: On Wed, Jul 13, 2016 at 02:38:16PM +0200, Daniel Vetter wrote: On Thu, Jun 30, 2016 at 04:12:49PM +0100, Dave Gordon wrote: Precursor for fix to secure batch execution. We will need to be able to retrieve the batch VMA (as well as the batch itself) from the eb list, so this patch extracts that part of eb_get_batch() into a separate function, and moves both parts to a more logical place in the file, near where the eb list is created. Also, it may not be obvious, but the current execbuffer2 ioctl interface requires that the buffer object containing the batch-to-be-executed be the LAST entry in the exec2_list[] array (I expected it to be the first!). To clarify this, we can replace the rather obscure construct "list_entry(eb->vmas.prev, ...)" in the old version of eb_get_batch() with the equivalent but more explicit "list_last_entry(&eb->vmas,...)" in the new eb_get_batch_vma() and of course add an explanatory comment. Signed-off-by: Dave Gordon I have no context on the secure batch fix you're talking about, but this here makes sense as an independent cleanup. It won't help though, so this is just churn for no purpose. -Chris At the very least, it replaces a confusing construct with a comprehensible one annotated with an explanatory comment. Separating finding the VMA for the batch from finding the batch itself also improves clarity and costs nothing (compiler inlines it anyway). Comprehensibility -- and hence maintainability -- is always a worthwhile purpose :) BTW, do the comments in this code from patch d23db88 drm/i915: Prevent negative relocation deltas from wrapping still apply? 'Cos I think it's pretty ugly to be setting a flag on a VMA as a side-effect of a "lookup" type operation :( Surely cleaner to do that sort of think at the top level i.e. inside i915_gem_do_execbuffer() ? .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] ✗ Ro.CI.BAT: warning for drm/i915/guc: Protect against HAS_GUC_* returning true values other than one (rev4)
On 13/07/16 14:38, Patchwork wrote: == Series Details == Series: drm/i915/guc: Protect against HAS_GUC_* returning true values other than one (rev4) URL : https://patchwork.freedesktop.org/series/9473/ State : warning == Summary == Series 9473v4 drm/i915/guc: Protect against HAS_GUC_* returning true values other than one http://patchwork.freedesktop.org/api/1.0/series/9473/revisions/4/mbox Test gem_exec_suspend: Subgroup basic-s3: pass -> DMESG-WARN (ro-bdw-i7-5557U) Test kms_pipe_crc_basic: Subgroup suspend-read-crc-pipe-a: dmesg-warn -> PASS (ro-skl3-i5-6260u) skip -> DMESG-WARN (ro-bdw-i7-5557U) Both of these look like https://bugs.freedesktop.org/show_bug.cgi?id=96614 [BAT BDW] *ERROR* failed to enable link training/failed to start channel equalization .Dave. fi-kbl-qkkr total:237 pass:174 dwarn:27 dfail:2 fail:7 skip:27 fi-skl-i5-6260u total:237 pass:189 dwarn:27 dfail:2 fail:7 skip:12 fi-skl-i7-6700k total:237 pass:200 dwarn:2 dfail:0 fail:9 skip:26 ro-bdw-i5-5250u total:237 pass:210 dwarn:2 dfail:0 fail:9 skip:16 ro-bdw-i7-5557U total:237 pass:210 dwarn:3 dfail:0 fail:9 skip:15 ro-bsw-n3050 total:217 pass:170 dwarn:0 dfail:0 fail:4 skip:42 ro-skl3-i5-6260u total:237 pass:213 dwarn:3 dfail:0 fail:9 skip:12 fi-snb-i7-2600 failed to connect after reboot ro-bdw-i7-5600u failed to connect after reboot ro-byt-n2820 failed to connect after reboot ro-hsw-i3-4010u failed to connect after reboot ro-hsw-i7-4770r failed to connect after reboot ro-ilk1-i5-650 failed to connect after reboot ro-ilk-i7-620lm failed to connect after reboot ro-ivb-i7-3770 failed to connect after reboot ro-snb-i7-2620M failed to connect after reboot Results at /archive/results/CI_IGT_test/RO_Patchwork_1483/ e8b1f0b drm-intel-nightly: 2016y-07m-13d-10h-57m-19s UTC integration manifest 40c374e drm/i915/guc: symbolic names for user load/submission preferences ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [CI resend 2/2] drm/i915: refactor eb_get_batch()
Precursor for fix to secure batch execution. We will need to be able to retrieve the batch VMA (as well as the batch itself) from the eb list, so this patch extracts that part of eb_get_batch() into a separate function, and moves both parts to a more logical place in the file, near where the eb list is created. Also, it may not be obvious, but the current execbuffer2 ioctl interface requires that the buffer object containing the batch-to-be-executed be the LAST entry in the exec2_list[] array (I expected it to be the first!). To clarify this, we can replace the rather obscure construct "list_entry(eb->vmas.prev, ...)" in the old version of eb_get_batch() with the equivalent but more explicit "list_last_entry(&eb->vmas,...)" in the new eb_get_batch_vma() and of course add an explanatory comment. Signed-off-by: Dave Gordon Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 49 ++ 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1bb1f25..f6724ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -186,6 +186,35 @@ struct eb_vmas { return ret; } +static inline struct i915_vma * +eb_get_batch_vma(struct eb_vmas *eb) +{ + /* The batch is always the LAST item in the VMA list */ + struct i915_vma *vma = list_last_entry(&eb->vmas, typeof(*vma), exec_list); + + return vma; +} + +static struct drm_i915_gem_object * +eb_get_batch(struct eb_vmas *eb) +{ + struct i915_vma *vma = eb_get_batch_vma(eb); + + /* +* SNA is doing fancy tricks with compressing batch buffers, which leads +* to negative relocation deltas. Usually that works out ok since the +* relocate address is still positive, except when the batch is placed +* very low in the GTT. Ensure this doesn't happen. +* +* Note that actual hangs have only been observed on gen7, but for +* paranoia do it everywhere. +*/ + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; + + return vma->obj; +} + static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle) { if (eb->and < 0) { @@ -1341,26 +1370,6 @@ static bool only_mappable_for_reloc(unsigned int flags) return file_priv->bsd_ring; } -static struct drm_i915_gem_object * -eb_get_batch(struct eb_vmas *eb) -{ - struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); - - /* -* SNA is doing fancy tricks with compressing batch buffers, which leads -* to negative relocation deltas. Usually that works out ok since the -* relocate address is still positive, except when the batch is placed -* very low in the GTT. Ensure this doesn't happen. -* -* Note that actual hangs have only been observed on gen7, but for -* paranoia do it everywhere. -*/ - if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; - - return vma->obj; -} - #define I915_USER_RINGS (4) static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [CI resend 1/2] drm/i915: compile-time consistency check on __EXEC_OBJECT flags
Two different sets of flag bits are stored in the 'flags' member of a 'struct drm_i915_gem_exec_object2', and they're defined in two different source files, increasing the risk of an accidental clash. Some flags in this field are supplied by the user; these are defined in i915_drm.h, and they start from the LSB and work up. Other flags are defined in i915_gem_execbuffer, for internal use within that file only; they start from the MSB and work down. So here we add a compile-time check that the two sets of flags do not overlap, which would cause all sorts of confusion. Signed-off-by: Dave Gordon Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 include/uapi/drm/i915_drm.h| 11 ++- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1978633..1bb1f25 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -34,10 +34,11 @@ #include #include -#define __EXEC_OBJECT_HAS_PIN (1<<31) -#define __EXEC_OBJECT_HAS_FENCE (1<<30) -#define __EXEC_OBJECT_NEEDS_MAP (1<<29) -#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) +#define __EXEC_OBJECT_HAS_PIN (1<<31) +#define __EXEC_OBJECT_HAS_FENCE (1<<30) +#define __EXEC_OBJECT_NEEDS_MAP (1<<29) +#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) +#define __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<28) /* all of the above */ #define BATCH_OFFSET_BIAS (256*1024) @@ -1007,6 +1008,9 @@ static bool only_mappable_for_reloc(unsigned int flags) unsigned invalid_flags; int i; + /* INTERNAL flags must not overlap with external ones */ + BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS); + invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; if (USES_FULL_PPGTT(dev)) invalid_flags |= EXEC_OBJECT_NEEDS_GTT; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index d7e81a3..51b9360 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -698,12 +698,13 @@ struct drm_i915_gem_exec_object2 { */ __u64 offset; -#define EXEC_OBJECT_NEEDS_FENCE (1<<0) -#define EXEC_OBJECT_NEEDS_GTT (1<<1) -#define EXEC_OBJECT_WRITE (1<<2) +#define EXEC_OBJECT_NEEDS_FENCE (1<<0) +#define EXEC_OBJECT_NEEDS_GTT (1<<1) +#define EXEC_OBJECT_WRITE (1<<2) #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) -#define EXEC_OBJECT_PINNED (1<<4) -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1) +#define EXEC_OBJECT_PINNED (1<<4) +/* All remaining bits are MBZ and RESERVED FOR FUTURE USE */ +#define __EXEC_OBJECT_UNKNOWN_FLAGS(-(EXEC_OBJECT_PINNED<<1)) __u64 flags; __u64 rsvd1; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 3/6] drm/i915/huc: Add HuC fw loading support
On 13/07/16 13:48, Daniel Vetter wrote: On Thu, Jun 23, 2016 at 02:52:41PM +0100, Peter Antoine wrote: On Thu, 23 Jun 2016, Dave Gordon wrote: On 22/06/16 09:31, Daniel Vetter wrote: No, the *correct* fix is to unify all the firmware loaders we have. There should just be ONE piece of code that can be used to fetch and load ANy firmware into ANY auxiliary microcontroller. NOT one per microcontroller, all different -- that way lies madness. We already had a unified loader for the HuC and GuC a year ago, but IIRC the party line then was "just make it (GuC) specific, then copypaste it for the second uC, and when we've got three versions we'll have learnt how we really want a unified loader to behave." Well. here's the copypaste, and we already have a different loader for the DMC/CSR, so it must be time for (re-)unification. .Dave. Just to add, if you uc_fw_fetch() has an error code you will still have to remember the state of the fetch or at each reset/resume/etc... or you will have to try the firmware load again and that can take a long time. So the state will have to be re-instated. Seeing this code was written with the given goals and were written in the same vane as code that was deemed acceptable, it seems weird at this late stage to change the design goals. Note: this is the third time that these patches have been posted and were only rejected (as far as I know) due to no open-source user. Which there is now, and is why I have reposted these patches. I never liked the guc firmware code, but figure for one copy it's not worth fighting over. Adding more copies (or perpetuating the design by making it generic) isn't what I'm looking for. *You* asked for more copies, back when we proposed a single unified solution last year. We already had a *single* GuC+HuC loader which could also have been extended to support the DMC as well, but at the time you wanted a GuC-specific version -- and by implication, a separate HuC loader -- *in addition to* the DMC loader. > Firmware loading shouldn't be that complicated, really. Maybe it shouldn't be, and maybe it isn't -- you may not be seeing how simple this code actually is. Fetch firmware, validate it, save it in a GEM object; later, DMA it to the h/w; at each stage keep track of status so we know what has been done and what is still to do (or redo, during reset). Any complications are because the h/w (e.g. write-once memory) makes them necessary, or artefacts of the GEM object system, or because of the driver's byzantine sequence of operations during load/reset/suspend/resume/unload. The unified firmware loader is called request_firmware. If that's not good enough, pls fix the core function, not paper code over in i915. That's exactly the function we call. Then we have to validate and save the blob. And remember that we've done so. In that regard DMC/CSR is unified, everything else isn't yet. Unified with what? Maybe the "DMC" is unified with the "CSR" -- which AFAIK are the same thing -- and the software just randomly uses both names to maximise confusion? if (HAS_CSR(dev)) { struct intel_csr *csr = &dev_priv->csr; err_printf(m, "DMC loaded: %s\n", yesno(csr->dmc_payload != NULL)); err_printf(m, "DMC fw version: %d.%d\n", CSR_VERSION_MAJOR(csr->version), CSR_VERSION_MINOR(csr->version)); } ... if (!IS_GEN9(dev_priv)) { DRM_ERROR("No CSR support available for this platform\n"); return; } if (!dev_priv->csr.dmc_payload) { DRM_ERROR("Tried to program CSR with empty payload\n"); return; } And according to the comments in intel_csr.c -- but not the code -- /* * Firmware loading status will be one of the below states: * FW_UNINITIALIZED, FW_LOADED, FW_FAILED. * * Once the firmware is written into the registers status will * be moved from FW_UNINITIALIZED to FW_LOADED and for any * erroneous condition status will be moved to FW_FAILED. */ So I don't think you should hold this code up as a masterpiece of "unified" design -- which in any case you argued against last year, when we presented a unified loader. Specifically, you said, "In my experience trying to extract common code at all costs is harmful way too often." Also, the approach taken in the DMC loader -- which appears to have been copypasted from a /very early/ version of the GuC loader, before I fixed the async-load problems -- just wouldn't work for the HuC/GuC, where the kernel needs to know when the firmware load has been completed so that it can start sending work to the GuC. The DMC loader only works because it doesn
Re: [Intel-gfx] [PATCH 3/6] drm/i915/huc: Add HuC fw loading support
On 14/07/16 15:16, Daniel Vetter wrote: On Wed, Jul 13, 2016 at 03:52:39PM +0100, Peter Antoine wrote: On Wed, 13 Jul 2016, Daniel Vetter wrote: On Thu, Jun 23, 2016 at 02:52:41PM +0100, Peter Antoine wrote: On Thu, 23 Jun 2016, Dave Gordon wrote: On 22/06/16 09:31, Daniel Vetter wrote: No, the *correct* fix is to unify all the firmware loaders we have. There should just be ONE piece of code that can be used to fetch and load ANy firmware into ANY auxiliary microcontroller. NOT one per microcontroller, all different -- that way lies madness. We already had a unified loader for the HuC and GuC a year ago, but IIRC the party line then was "just make it (GuC) specific, then copypaste it for the second uC, and when we've got three versions we'll have learnt how we really want a unified loader to behave." Well. here's the copypaste, and we already have a different loader for the DMC/CSR, so it must be time for (re-)unification. .Dave. Just to add, if you uc_fw_fetch() has an error code you will still have to remember the state of the fetch or at each reset/resume/etc... or you will have to try the firmware load again and that can take a long time. So the state will have to be re-instated. Seeing this code was written with the given goals and were written in the same vane as code that was deemed acceptable, it seems weird at this late stage to change the design goals. Note: this is the third time that these patches have been posted and were only rejected (as far as I know) due to no open-source user. Which there is now, and is why I have reposted these patches. I never liked the guc firmware code, but figure for one copy it's not worth fighting over. Adding more copies (or perpetuating the design by making it generic) isn't what I'm looking for. Firmware loading shouldn't be that complicated, really. The unified firmware loader is called request_firmware. If that's not good enough, pls fix the core function, not paper code over in i915. In that regard DMC/CSR is unified, everything else isn't yet. Iirc the big issue is delayed firmware loading for built-in i915 and fw only available later on. This is an open issue in request_firmware() since years, and there's various patches floating around. If the problem is that Greg KH doesn't consider those patches, I can help with that. But not pushing the core fix forward isn't acceptable imo. Once that fix is landed we can treat request_firmware as reliable (it might take a while, hence must be run in an async work like DMC loading), with no need to ever retry anything. If fw loading fails we can just mark the entire render part of the gpu as dead by injecting the equivalent of a non-recoverable hang (async setup) or failing engine init with -EIO (if this is still synchronous, which I don't expect really). If there's another reason for this complexity, please explain since I'd like to understand why we need this. -Daniel I was not involved at the start and I am porting code that others have written, but as far as I understand this, you requested that the code be duplicated. See Dave's comment above as he was involved. The code uses request_firmware() to handle the load of the firmware into memory and the rest of this code manages the loading of that memory into the HuC's SRAM. This needs extra setup that should not really go into the generic firmware loader (one loader for uIA's make sense as this will futureproof the code). Also the SRAM is write-once memory and needs to be handled correctly. Also, the GuC needs to verify the HuC some this becomes a little be more fun. Also, again you are ignoring the point that not having firmware is not fatal. We remember the state of the load as this is required to save time when we come out of reset to not waste time trying to reload the firmware, if it has failed already. They are other mechinums to do this, but they will always need some form of history. Also, if request_firmware() is broken why has this not been fixed? If it has been broken for "years" why and how do you expect to to be fixed now? If the "not pushing the core fix is not acceptable" why has that not been done? Apparently because I'm a too nice maintainer and allowed half-solutions to get landed, under the expecations that people would indeed follow up and fix things. And yes, you care, you fix it, is how this works, whether you like it or not. -Daniel AFAIK request_firmware() is *not* broken. It does what it says i.e. fetches a blob from the internal bucket or from the filesystem; and fails if it's not found. So it doesn't need fixing. What's broken (from the Android POV) is i915 needing to do too much too early i.e. before all filesystems are mounted. The answer to *that* is to defer *all* engine initialisation until (much) later; but that's nothing at all to do
Re: [Intel-gfx] [PATCH 3/6] drm/i915/huc: Add HuC fw loading support
On 14/07/16 15:26, Daniel Vetter wrote: On Thu, Jul 14, 2016 at 03:08:41PM +0100, Dave Gordon wrote: On 13/07/16 13:48, Daniel Vetter wrote: On Thu, Jun 23, 2016 at 02:52:41PM +0100, Peter Antoine wrote: On Thu, 23 Jun 2016, Dave Gordon wrote: On 22/06/16 09:31, Daniel Vetter wrote: No, the *correct* fix is to unify all the firmware loaders we have. There should just be ONE piece of code that can be used to fetch and load ANy firmware into ANY auxiliary microcontroller. NOT one per microcontroller, all different -- that way lies madness. We already had a unified loader for the HuC and GuC a year ago, but IIRC the party line then was "just make it (GuC) specific, then copypaste it for the second uC, and when we've got three versions we'll have learnt how we really want a unified loader to behave." Well. here's the copypaste, and we already have a different loader for the DMC/CSR, so it must be time for (re-)unification. .Dave. Just to add, if you uc_fw_fetch() has an error code you will still have to remember the state of the fetch or at each reset/resume/etc... or you will have to try the firmware load again and that can take a long time. So the state will have to be re-instated. Seeing this code was written with the given goals and were written in the same vane as code that was deemed acceptable, it seems weird at this late stage to change the design goals. Note: this is the third time that these patches have been posted and were only rejected (as far as I know) due to no open-source user. Which there is now, and is why I have reposted these patches. I never liked the guc firmware code, but figure for one copy it's not worth fighting over. Adding more copies (or perpetuating the design by making it generic) isn't what I'm looking for. *You* asked for more copies, back when we proposed a single unified solution last year. We already had a *single* GuC+HuC loader which could also have been extended to support the DMC as well, but at the time you wanted a GuC-specific version -- and by implication, a separate HuC loader -- *in addition to* the DMC loader. Firmware loading shouldn't be that complicated, really. Maybe it shouldn't be, and maybe it isn't -- you may not be seeing how simple this code actually is. Fetch firmware, validate it, save it in a GEM object; later, DMA it to the h/w; at each stage keep track of status so we know what has been done and what is still to do (or redo, during reset). Any complications are because the h/w (e.g. write-once memory) makes them necessary, or artefacts of the GEM object system, or because of the driver's byzantine sequence of operations during load/reset/suspend/resume/unload. The unified firmware loader is called request_firmware. If that's not good enough, pls fix the core function, not paper code over in i915. That's exactly the function we call. Then we have to validate and save the blob. And remember that we've done so. In that regard DMC/CSR is unified, everything else isn't yet. Unified with what? Maybe the "DMC" is unified with the "CSR" -- which AFAIK are the same thing -- and the software just randomly uses both names to maximise confusion? if (HAS_CSR(dev)) { struct intel_csr *csr = &dev_priv->csr; err_printf(m, "DMC loaded: %s\n", yesno(csr->dmc_payload != NULL)); err_printf(m, "DMC fw version: %d.%d\n", CSR_VERSION_MAJOR(csr->version), CSR_VERSION_MINOR(csr->version)); } ... if (!IS_GEN9(dev_priv)) { DRM_ERROR("No CSR support available for this platform\n"); return; } if (!dev_priv->csr.dmc_payload) { DRM_ERROR("Tried to program CSR with empty payload\n"); return; } And according to the comments in intel_csr.c -- but not the code -- /* * Firmware loading status will be one of the below states: * FW_UNINITIALIZED, FW_LOADED, FW_FAILED. * * Once the firmware is written into the registers status will * be moved from FW_UNINITIALIZED to FW_LOADED and for any * erroneous condition status will be moved to FW_FAILED. */ So I don't think you should hold this code up as a masterpiece of "unified" design -- which in any case you argued against last year, when we presented a unified loader. Specifically, you said, "In my experience trying to extract common code at all costs is harmful way too often." Also, the approach taken in the DMC loader -- which appears to have been copypasted from a /very early/ version of the GuC loader, before I fixed the async-load problems -- just wouldn't work for the HuC/GuC, where the kernel needs to know when the firmware load has been completed so that it
Re: [Intel-gfx] [PATCH 2/2] drm/i915: refactor eb_get_batch()
On 14/07/16 15:03, Chris Wilson wrote: On Thu, Jul 14, 2016 at 02:12:55PM +0100, Dave Gordon wrote: On 13/07/16 13:44, Chris Wilson wrote: On Wed, Jul 13, 2016 at 02:38:16PM +0200, Daniel Vetter wrote: On Thu, Jun 30, 2016 at 04:12:49PM +0100, Dave Gordon wrote: Precursor for fix to secure batch execution. We will need to be able to retrieve the batch VMA (as well as the batch itself) from the eb list, so this patch extracts that part of eb_get_batch() into a separate function, and moves both parts to a more logical place in the file, near where the eb list is created. Also, it may not be obvious, but the current execbuffer2 ioctl interface requires that the buffer object containing the batch-to-be-executed be the LAST entry in the exec2_list[] array (I expected it to be the first!). To clarify this, we can replace the rather obscure construct "list_entry(eb->vmas.prev, ...)" in the old version of eb_get_batch() with the equivalent but more explicit "list_last_entry(&eb->vmas,...)" in the new eb_get_batch_vma() and of course add an explanatory comment. Signed-off-by: Dave Gordon I have no context on the secure batch fix you're talking about, but this here makes sense as an independent cleanup. It won't help though, so this is just churn for no purpose. -Chris At the very least, it replaces a confusing construct with a comprehensible one annotated with an explanatory comment. No. It deepens a confusion in the code that I've been trying to get removed over the last couple of years. ? I was referring to the list_{last_}entry() change. That's definitely a clarification as to how things work now. Of course, if you're planning to make the batch the first object rather than the last, I won't object. But whichever it is, let's use the most-appropriately-named of the available list functions when we pick an item from a list. And comment why or what it's doing. Separating finding the VMA for the batch from finding the batch itself also improves clarity and costs nothing (compiler inlines it anyway). No. That's the confusion you have here. The object is irrelevant. Ah, so we have a function to return an irrelevant object. Let's just delete it then ;) Do you think we /should/ just get rid of eb_get_batch()? Maybe just have eb_get_batch_vma() return the VMA to the [single] caller i915_gem_do_execbuffer() instead, and then have /that/ do both the flag-setting ugliness and the indirection to the object (which evidently is not irrelevant to it) ? Comprehensibility -- and hence maintainability -- is always a worthwhile purpose :) s/comprehensibility/greater confusion/ Spoken like a true Discordian ;) > BTW, do the comments in this code from patch d23db88 drm/i915: Prevent negative relocation deltas from wrapping still apply? 'Cos I think it's pretty ugly to be setting a flag on a VMA as a side-effect of a "lookup" type operation :( Surely cleaner to do that sort of think at the top level i.e. inside i915_gem_do_execbuffer() ? The comment is wrong since the practice is more widespread and it is a particular hw bug on Ivybridge. -Chris Another reason to move it out to the caller and update the comments in the process! .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] ✗ Ro.CI.BAT: failure for series starting with [CI,resend,1/2] drm/i915: compile-time consistency check on __EXEC_OBJECT flags
On 15/07/16 09:15, Patchwork wrote: == Series Details == Series: series starting with [CI,resend,1/2] drm/i915: compile-time consistency check on __EXEC_OBJECT flags URL : https://patchwork.freedesktop.org/series/9876/ State : failure == Summary == Series 9876v1 Series without cover letter http://patchwork.freedesktop.org/api/1.0/series/9876/revisions/1/mbox Test drv_module_reload_basic: skip -> PASS (ro-ivb-i7-3770) Test kms_cursor_legacy: Subgroup basic-flip-vs-cursor: dmesg-warn -> PASS (ro-byt-n2820) Test kms_pipe_crc_basic: Subgroup nonblocking-crc-pipe-b: skip -> PASS (fi-skl-i5-6260u) Subgroup read-crc-pipe-c: pass -> SKIP (fi-skl-i5-6260u) "No connector found for pipe 2" See https://bugs.freedesktop.org/show_bug.cgi?id=93769 .Dave. Subgroup suspend-read-crc-pipe-a: pass -> INCOMPLETE (fi-skl-i7-6700k) Test vgem_basic: Subgroup debugfs: incomplete -> PASS (ro-snb-i7-2620M) fi-kbl-qkkr total:241 pass:174 dwarn:29 dfail:1 fail:6 skip:31 fi-skl-i5-6260u total:241 pass:217 dwarn:0 dfail:0 fail:7 skip:17 fi-skl-i7-6700k total:195 pass:170 dwarn:0 dfail:0 fail:0 skip:24 fi-snb-i7-2600 total:241 pass:190 dwarn:0 dfail:0 fail:7 skip:44 ro-bdw-i5-5250u total:241 pass:213 dwarn:4 dfail:0 fail:7 skip:17 ro-bdw-i7-5557U total:241 pass:213 dwarn:1 dfail:0 fail:7 skip:20 ro-bdw-i7-5600u total:241 pass:199 dwarn:0 dfail:0 fail:7 skip:35 ro-byt-n2820 total:241 pass:191 dwarn:0 dfail:0 fail:8 skip:42 ro-hsw-i3-4010u total:241 pass:206 dwarn:0 dfail:0 fail:7 skip:28 ro-hsw-i7-4770r total:241 pass:206 dwarn:0 dfail:0 fail:7 skip:28 ro-ilk-i7-620lm total:241 pass:166 dwarn:0 dfail:0 fail:8 skip:67 ro-ilk1-i5-650 total:236 pass:166 dwarn:0 dfail:0 fail:8 skip:62 ro-ivb-i7-3770 total:241 pass:197 dwarn:0 dfail:0 fail:7 skip:37 ro-skl3-i5-6260u total:241 pass:217 dwarn:1 dfail:0 fail:7 skip:16 ro-snb-i7-2620M total:241 pass:188 dwarn:0 dfail:0 fail:8 skip:45 Results at /archive/results/CI_IGT_test/RO_Patchwork_1493/ c01b445 drm-intel-nightly: 2016y-07m-15d-07h-02m-07s UTC integration manifest ce44917 drm/i915: refactor eb_get_batch() 46acffb drm/i915: compile-time consistency check on __EXEC_OBJECT flags ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Use SSE4.1 movntdqa to accelerate reads from WC memory
On 18/07/16 12:35, Chris Wilson wrote: On Mon, Jul 18, 2016 at 12:15:32PM +0100, Tvrtko Ursulin wrote: I am not sure about this, but looking at the raid6 for example, it has a lot more annotations in cases like this. It seems to be telling the compiler which memory ranges does each instruction access, and also uses "asm volatile" - whether or not that is really needed I don't know. For example: asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d])); And: asm volatile("movdqa %%xmm4,%0" : "=m" (q[d])); Each one is telling the compiler the instruction is either reading or writing respectively from a certain memory address. You don't have any of that, and don't even specify nothing as an output parameter so I am not sure if your code is safe. The asm is correct. We do not modify either of the two pointers which we pass in via register inputs, but the memory behind them - hence the memory clobber. This is a choice of how much we let the compiler decide about addressing, and how much we tell it about what the asm code really does. The examples above get the compiler to generate *any* suitable addressing mode for each specific location involved in the transfers, so the compiler knows a lot about what's happening and can track where each datum comes from and goes to. OTOH Chris' code +asm("movntdqa (%0), %%xmm0\n" +"movntdqa 16(%0), %%xmm1\n" +"movntdqa 32(%0), %%xmm2\n" +"movntdqa 48(%0), %%xmm3\n" +"movaps %%xmm0, (%1)\n" +"movaps %%xmm1, 16(%1)\n" +"movaps %%xmm2, 32(%1)\n" +"movaps %%xmm3, 48(%1)\n" +:: "r" (src), "r" (dst) : "memory"); - doesn't need "volatile" because asm statements that have no output operands are implicitly volatile. - makes the compiler give us the source and destination *addresses* in a register each; beyond that, it doesn't know what we're doing with them, so the third ("clobbers") parameter has to say "memory" i.e. treat *all* memory contents as unknown after this. [[From GCC docs: The "memory" clobber tells the compiler that the assembly code performs memory reads or writes to items other than those listed in the input and output operands (for example, accessing the memory pointed to by one of the input parameters). To ensure memory contains correct values, GCC may need to flush specific register values to memory before executing the asm. Further, the compiler does not assume that any values read from memory before an asm remain unchanged after that asm; it reloads them as needed. Using the "memory" clobber effectively forms a read/write memory barrier for the compiler.]] BTW, should we not tell it we've *also* clobbered %xmm[0-3]? So they're both correct, just taking different approaches. I don't know which would give the best performance for this specific case. .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] ✗ Ro.CI.BAT: failure for drm/i915: Treat eDP as always connected, again
On 18/07/16 13:29, Daniel Vetter wrote: On Mon, Jul 18, 2016 at 11:43:08AM -, Patchwork wrote: == Series Details == Series: drm/i915: Treat eDP as always connected, again URL : https://patchwork.freedesktop.org/series/9977/ State : failure == Summary == Series 9977v1 drm/i915: Treat eDP as always connected, again http://patchwork.freedesktop.org/api/1.0/series/9977/revisions/1/mbox Test gem_exec_suspend: Subgroup basic-s3: pass -> DMESG-WARN (fi-skl-i5-6260u) Test gem_sync: Subgroup basic-store-each: pass -> DMESG-FAIL (ro-bdw-i7-5600u) I tried to find the bug reports for these, and we don't track them. And at least the 2nd one looks like it's a one-off, so who knows what's going on. But while trying to figure out what's going on I stumbled over about 5 other sporadic CI issues in those boxes which aren't event tracked either. /me cries So normally not good enough for CI, but regressions win even against our shitty CI. Hence applied and will cherry-pick over to -fixes. -Daniel Looks like the failure in basic-store-each has been happening since at least 2016-07-10, but only on one specific BDW machine (ro-bdw-i7-5600u). Failure is: > Failed assertion: intel_detect_and_clear_missed_interrupts(fd) == 0 Intermittent, though, as Chris's recent results show: Test gem_sync: Subgroup basic-store-each: dmesg-fail -> PASS (ro-bdw-i7-5600u) making it very difficult to determine whether it's a real regression or some problem with that specific machine -- it doesn't happen on the other BDWs, but they're all slightly different. .Dave. fi-hsw-i7-4770k total:242 pass:210 dwarn:0 dfail:0 fail:12 skip:20 fi-kbl-qkkr total:242 pass:176 dwarn:27 dfail:0 fail:12 skip:27 fi-skl-i5-6260u total:242 pass:218 dwarn:1 dfail:0 fail:11 skip:12 fi-skl-i7-6700k total:107 pass:84 dwarn:0 dfail:0 fail:0 skip:22 fi-snb-i7-2600 total:242 pass:190 dwarn:0 dfail:0 fail:12 skip:40 ro-bdw-i5-5250u total:243 pass:214 dwarn:4 dfail:0 fail:12 skip:13 ro-bdw-i7-5557U total:243 pass:215 dwarn:0 dfail:0 fail:12 skip:16 ro-bdw-i7-5600u total:243 pass:199 dwarn:0 dfail:1 fail:11 skip:32 ro-bsw-n3050 total:218 pass:173 dwarn:0 dfail:0 fail:2 skip:42 ro-byt-n2820 total:243 pass:191 dwarn:0 dfail:0 fail:14 skip:38 ro-hsw-i3-4010u total:243 pass:206 dwarn:0 dfail:0 fail:13 skip:24 ro-hsw-i7-4770r total:243 pass:206 dwarn:0 dfail:0 fail:13 skip:24 ro-ilk-i7-620lm total:243 pass:166 dwarn:0 dfail:0 fail:14 skip:63 ro-ilk1-i5-650 total:238 pass:166 dwarn:0 dfail:0 fail:14 skip:58 ro-ivb-i7-3770 total:243 pass:197 dwarn:0 dfail:0 fail:13 skip:33 ro-skl3-i5-6260u total:243 pass:218 dwarn:1 dfail:0 fail:12 skip:12 ro-snb-i7-2620M total:243 pass:188 dwarn:0 dfail:0 fail:13 skip:42 fi-bsw-n3050 failed to connect after reboot Results at /archive/results/CI_IGT_test/RO_Patchwork_1517/ 74bd981 drm-intel-nightly: 2016y-07m-18d-10h-05m-42s UTC integration manifest c3022da drm/i915: Treat eDP as always connected, again ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] ✗ Ro.CI.BAT: failure for drm/i915: Treat eDP as always connected, again
On 18/07/16 13:50, Dave Gordon wrote: On 18/07/16 13:29, Daniel Vetter wrote: On Mon, Jul 18, 2016 at 11:43:08AM -, Patchwork wrote: == Series Details == Series: drm/i915: Treat eDP as always connected, again URL : https://patchwork.freedesktop.org/series/9977/ State : failure == Summary == Series 9977v1 drm/i915: Treat eDP as always connected, again http://patchwork.freedesktop.org/api/1.0/series/9977/revisions/1/mbox Test gem_exec_suspend: Subgroup basic-s3: pass -> DMESG-WARN (fi-skl-i5-6260u) Test gem_sync: Subgroup basic-store-each: pass -> DMESG-FAIL (ro-bdw-i7-5600u) I tried to find the bug reports for these, and we don't track them. And at least the 2nd one looks like it's a one-off, so who knows what's going on. But while trying to figure out what's going on I stumbled over about 5 other sporadic CI issues in those boxes which aren't event tracked either. /me cries So normally not good enough for CI, but regressions win even against our shitty CI. Hence applied and will cherry-pick over to -fixes. -Daniel Looks like the failure in basic-store-each has been happening since at least 2016-07-10, but only on one specific BDW machine (ro-bdw-i7-5600u). Failure is: > Failed assertion: intel_detect_and_clear_missed_interrupts(fd) == 0 Intermittent, though, as Chris's recent results show: Test gem_sync: Subgroup basic-store-each: dmesg-fail -> PASS (ro-bdw-i7-5600u) making it very difficult to determine whether it's a real regression or some problem with that specific machine -- it doesn't happen on the other BDWs, but they're all slightly different. .Dave. Bug filed: Bug 96975 - [BAT BDW] basic-store-each fails, intel_detect_and_clear_missed_interrupts(fd) == 0 .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Use SSE4.1 movntdqa to accelerate reads from WC memory
On 18/07/16 13:56, Tvrtko Ursulin wrote: On 18/07/16 12:57, Dave Gordon wrote: On 18/07/16 12:35, Chris Wilson wrote: On Mon, Jul 18, 2016 at 12:15:32PM +0100, Tvrtko Ursulin wrote: I am not sure about this, but looking at the raid6 for example, it has a lot more annotations in cases like this. It seems to be telling the compiler which memory ranges does each instruction access, and also uses "asm volatile" - whether or not that is really needed I don't know. For example: asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d])); And: asm volatile("movdqa %%xmm4,%0" : "=m" (q[d])); Each one is telling the compiler the instruction is either reading or writing respectively from a certain memory address. You don't have any of that, and don't even specify nothing as an output parameter so I am not sure if your code is safe. The asm is correct. We do not modify either of the two pointers which we pass in via register inputs, but the memory behind them - hence the memory clobber. This is a choice of how much we let the compiler decide about addressing, and how much we tell it about what the asm code really does. The examples above get the compiler to generate *any* suitable addressing mode for each specific location involved in the transfers, so the compiler knows a lot about what's happening and can track where each datum comes from and goes to. OTOH Chris' code +asm("movntdqa (%0), %%xmm0\n" +"movntdqa 16(%0), %%xmm1\n" +"movntdqa 32(%0), %%xmm2\n" +"movntdqa 48(%0), %%xmm3\n" +"movaps %%xmm0, (%1)\n" +"movaps %%xmm1, 16(%1)\n" +"movaps %%xmm2, 32(%1)\n" +"movaps %%xmm3, 48(%1)\n" +:: "r" (src), "r" (dst) : "memory"); - doesn't need "volatile" because asm statements that have no output operands are implicitly volatile. - makes the compiler give us the source and destination *addresses* in a register each; beyond that, it doesn't know what we're doing with them, so the third ("clobbers") parameter has to say "memory" i.e. treat *all* memory contents as unknown after this. [[From GCC docs: The "memory" clobber tells the compiler that the assembly code performs memory reads or writes to items other than those listed in the input and output operands (for example, accessing the memory pointed to by one of the input parameters). To ensure memory contains correct values, GCC may need to flush specific register values to memory before executing the asm. Further, the compiler does not assume that any values read from memory before an asm remain unchanged after that asm; it reloads them as needed. Using the "memory" clobber effectively forms a read/write memory barrier for the compiler.]] BTW, should we not tell it we've *also* clobbered %xmm[0-3]? So they're both correct, just taking different approaches. I don't know which would give the best performance for this specific case. Cool, learn something new every day. :) I've tried writing it as: struct qw2 { u64 q[2]; } __attribute__((packed)); static void __memcpy_ntdqa(struct qw2 *dst, const struct qw2 *src, unsigned long len) { kernel_fpu_begin(); len >>= 4; while (len >= 4) { asm("movntdqa (%0), %%xmm0" :: "r" (src), "m" (src[0])); asm("movntdqa 16(%0), %%xmm1" :: "r" (src), "m" (src[1])); Couldn't this be just: asm("movntdqa %1, %%xmm1" :: "r" (src), "m" (src[1])); thus letting the compiler supply the offset? Does the compiler know about %xmm* registers? If so you could maybe get it to choose which to use in each instruction, or should at least tell it which ones are being clobbered. asm("movntdqa 32(%0), %%xmm2" :: "r" (src), "m" (src[2])); asm("movntdqa 48(%0), %%xmm3" :: "r" (src), "m" (src[3])); asm("movaps %%xmm0, (%1)" : "=m" (dst[0]) : "r" (dst)); asm("movaps %%xmm1, 16(%1)" : "=m" (dst[1]) : "r" (dst)); asm("movaps %%xmm2, 32(%1)" : "=m" (dst[2]) : "r" (dst)); asm("movaps %%xmm3, 48(%1)" : "=m" (dst[3]) : "r" (dst)); src += 4; dst += 4; len -= 4; } while (len--) { asm("movntdqa (%0), %%xmm0" :: "r" (src), "m" (src[0])); asm("mo
Re: [Intel-gfx] [PATCH] drm/i915: Use SSE4.1 movntdqa to accelerate reads from WC memory
On 18/07/16 16:06, Tvrtko Ursulin wrote: On 18/07/16 14:46, Tvrtko Ursulin wrote: [snip] This version generates the smallest code: static void __memcpy_ntdqa(struct qw2 *dst, const struct qw2 *src, unsigned long len) { unsigned long l4; kernel_fpu_begin(); l4 = len / 4; while (l4) { asm("movntdqa (%0), %%xmm0" :: "r" (src), "m" (src[0])); asm("movntdqa 16(%0), %%xmm1" :: "r" (src), "m" (src[1])); asm("movntdqa 32(%0), %%xmm2" :: "r" (src), "m" (src[2])); asm("movntdqa 48(%0), %%xmm3" :: "r" (src), "m" (src[3])); asm("movaps %%xmm0, (%1)" : "=m" (dst[0]) : "r" (dst)); asm("movaps %%xmm1, 16(%1)" : "=m" (dst[1]) : "r" (dst)); asm("movaps %%xmm2, 32(%1)" : "=m" (dst[2]) : "r" (dst)); asm("movaps %%xmm3, 48(%1)" : "=m" (dst[3]) : "r" (dst)); src += 4; dst += 4; l4--; } len %= 4; while (len) { asm("movntdqa (%0), %%xmm0" :: "r" (src), "m" (src[0])); asm("movaps %%xmm0, (%1)" : "=m" (dst[0]) : "r" (dst)); src++; dst++; len--; } kernel_fpu_end(); } Although I still haven't figured out a way to convince it to use the same registers for src and dest between the two loops. I remembered one famous interview question, along the lines of, "what is the code below doing". Translated to this example: static void __memcpy_ntdqa(struct qw2 *dst, const struct qw2 *src, unsigned long len) { unsigned long n; kernel_fpu_begin(); n = (len + 3) / 4; switch (len % 4) { case 0: do { asm("movntdqa %1, %%xmm0\n" "movaps %%xmm0, %0\n" : "=m" (*dst): "m" (*src)); src++; dst++; case 3: asm("movntdqa %1, %%xmm1\n" "movaps %%xmm1, %0\n" : "=m" (*dst): "m" (*src)); src++; dst++; case 2: asm("movntdqa %1, %%xmm2\n" "movaps %%xmm2, %0\n" : "=m" (*dst): "m" (*src)); src++; dst++; case 1: asm("movntdqa %1, %%xmm3\n" "movaps %%xmm3, %0\n" : "=m" (*dst): "m" (*src)); src++; dst++; } while (--n > 0); } kernel_fpu_end(); } :D No idea if loads/stores can run async in this case. Regards, Tvrtko Here's yet another variant, just to document other ways of writing it: #include "asm/fpu/api.h" /* This is the datatype of an xmm register */ typedef double xmmd_t __attribute__ ((vector_size (16))); __attribute__((target("sse4.1"))) void __memcpy_ntdqa(xmmd_t *dst, const xmmd_t *src, unsigned long len) { xmmd_t tmp0, tmp1, tmp2, tmp3; unsigned long l64; kernel_fpu_begin(); /* Whole 64-byte blocks as 4*16 bytes */ for (l64 = len/64; l64--; ) { asm("movntdqa %1, %0" : "=x" (tmp0) : "m" (*src++)); asm("movntdqa %1, %0" : "=x" (tmp1) : "m" (*src++)); asm("movntdqa %1, %0" : "=x" (tmp2) : "m" (*src++)); asm("movntdqa %1, %0" : "=x" (tmp3) : "m" (*src++)); asm("movaps %1, %0" : "=m" (*dst++) : "x" (tmp0)); asm("movaps %1, %0" : "=m" (*dst++) : "x" (tmp1)); asm("movaps %1, %0" : "=m" (*dst++) : "x" (tmp2)); asm("movaps %1, %0" : "=m" (*dst++) : "x" (tmp3)); } /* Remaining up-to-3 16-byte chunks */ for (len &= 63, len >>= 4; len--; ) { asm("movntdqa %1, %0" : "=x" (tmp0) : "m" (*src++)); asm("movaps %1, %0" : "=m" (*dst++) : "x" (tmp0)); } kernel_fpu_end(); } I wondered whether we could get GCC to unroll the loops automatically i.e. just write the one loop and say we wanted it unrolled four times, leaving the compiler to deal with the remainder; but I didn't find a way to specify "unroll 4 times" as opposed to just "unroll this some". .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [CI-RESEND 1/3] drm: extra printk() wrapper macros
We had only DRM_INFO() and DRM_ERROR(), whereas the underlying printk() provides several other useful intermediate levels such as NOTICE and WARNING. So this patch fills out the set by providing both regular and once-only macros for each of the levels INFO, NOTICE, and WARNING, using a common underlying macro that does all the token-pasting. DRM_ERROR is unchanged, as it's not just a printk wrapper. v2: Fix whitespace, missing ## (Eric Engestrom) Signed-off-by: Dave Gordon Reviewed-by: Eric Engestrom --- include/drm/drmP.h | 26 -- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/include/drm/drmP.h b/include/drm/drmP.h index d377865..3669cdd 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -162,6 +162,26 @@ void drm_err(const char *format, ...); /** \name Macros to make printk easier */ /*@{*/ +#define _DRM_PRINTK(once, level, fmt, ...) \ + do {\ + printk##once(KERN_##level "[" DRM_NAME "] " fmt,\ +##__VA_ARGS__);\ + } while (0) + +#define DRM_INFO(fmt, ...) \ + _DRM_PRINTK(, INFO, fmt, ##__VA_ARGS__) +#define DRM_NOTE(fmt, ...) \ + _DRM_PRINTK(, NOTICE, fmt, ##__VA_ARGS__) +#define DRM_WARN(fmt, ...) \ + _DRM_PRINTK(, WARNING, fmt, ##__VA_ARGS__) + +#define DRM_INFO_ONCE(fmt, ...) \ + _DRM_PRINTK(_once, INFO, fmt, ##__VA_ARGS__) +#define DRM_NOTE_ONCE(fmt, ...) \ + _DRM_PRINTK(_once, NOTICE, fmt, ##__VA_ARGS__) +#define DRM_WARN_ONCE(fmt, ...) \ + _DRM_PRINTK(_once, WARNING, fmt, ##__VA_ARGS__) + /** * Error output. * @@ -187,12 +207,6 @@ void drm_err(const char *format, ...); drm_err(fmt, ##__VA_ARGS__);\ }) -#define DRM_INFO(fmt, ...) \ - printk(KERN_INFO "[" DRM_NAME "] " fmt, ##__VA_ARGS__) - -#define DRM_INFO_ONCE(fmt, ...)\ - printk_once(KERN_INFO "[" DRM_NAME "] " fmt, ##__VA_ARGS__) - /** * Debug output. * -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [CI-RESEND 3/3] drm/i915/guc: revisit GuC loader message levels
Some downgraded from DRM_ERROR() to DRM_WARN() or DRM_NOTE(), a few upgraded from DRM_INFO() to DRM_NOTE() or DRM_WARN(), and one eliminated completely. v2: different permutation of levels :) Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/intel_guc_loader.c | 34 - 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 605c696..a2f4fa4 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -140,12 +140,14 @@ static u32 get_gttype(struct drm_i915_private *dev_priv) static u32 get_core_family(struct drm_i915_private *dev_priv) { - switch (INTEL_INFO(dev_priv)->gen) { + u32 gen = INTEL_GEN(dev_priv); + + switch (gen) { case 9: return GFXCORE_FAMILY_GEN9; default: - DRM_ERROR("GUC: unsupported core family\n"); + DRM_WARN("GEN%d does not support GuC operation\n", gen); return GFXCORE_FAMILY_UNKNOWN; } } @@ -433,7 +435,7 @@ int intel_guc_setup(struct drm_device *dev) goto fail; } else if (*fw_path == '\0') { /* Device has a GuC but we don't know what f/w to load? */ - DRM_INFO("No GuC firmware known for this platform\n"); + DRM_WARN("No GuC firmware known for this platform\n"); err = -ENODEV; goto fail; } @@ -471,10 +473,8 @@ int intel_guc_setup(struct drm_device *dev) * that the state and timing are fairly predictable */ err = i915_reset_guc(dev_priv); - if (err) { - DRM_ERROR("GuC reset failed: %d\n", err); + if (err) goto fail; - } err = guc_ucode_xfer(dev_priv); if (!err) @@ -532,15 +532,15 @@ int intel_guc_setup(struct drm_device *dev) else if (err == 0) DRM_INFO("GuC firmware load skipped\n"); else if (ret != -EIO) - DRM_INFO("GuC firmware load failed: %d\n", err); + DRM_NOTE("GuC firmware load failed: %d\n", err); else - DRM_ERROR("GuC firmware load failed: %d\n", err); + DRM_WARN("GuC firmware load failed: %d\n", err); if (i915.enable_guc_submission) { if (fw_path == NULL) DRM_INFO("GuC submission without firmware not supported\n"); if (ret == 0) - DRM_INFO("Falling back from GuC submission to execlist mode\n"); + DRM_NOTE("Falling back from GuC submission to execlist mode\n"); else DRM_ERROR("GuC init failed: %d\n", ret); } @@ -571,7 +571,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* Check the size of the blob before examining buffer contents */ if (fw->size < sizeof(struct guc_css_header)) { - DRM_ERROR("Firmware header is missing\n"); + DRM_NOTE("Firmware header is missing\n"); goto fail; } @@ -583,7 +583,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) css->key_size_dw - css->exponent_size_dw) * sizeof(u32); if (guc_fw->header_size != sizeof(struct guc_css_header)) { - DRM_ERROR("CSS header definition mismatch\n"); + DRM_NOTE("CSS header definition mismatch\n"); goto fail; } @@ -593,7 +593,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* now RSA */ if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { - DRM_ERROR("RSA key size is bad\n"); + DRM_NOTE("RSA key size is bad\n"); goto fail; } guc_fw->rsa_offset = guc_fw->ucode_offset + guc_fw->ucode_size; @@ -602,14 +602,14 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* At least, it should have header, uCode and RSA. Size of all three. */ size = guc_fw->header_size + guc_fw->ucode_size + guc_fw->rsa_size; if (fw->size < size) { - DRM_ERROR("Missing firmware components\n"); + DRM_NOTE("Missing firmware components\n"); goto fail; } /* Header and uCode will be loaded to WOPCM. Size of the two. */ size = guc_fw->header_size + guc_fw->ucode_size; if (size > guc_wopcm_size(to_
[Intel-gfx] [CI-RESEND 2/3] drm/i915/guc: downgrade some DRM_ERROR() messages to DRM_WARN()
Where we're going to continue regardless of the problem, rather than fail, then the message should be a WARNing rather than an ERROR. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_guc_submission.c | 18 +++--- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 2112e02..e299b64 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -114,10 +114,8 @@ static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len) if (ret != -ETIMEDOUT) ret = -EIO; - DRM_ERROR("GUC: host2guc action 0x%X failed. ret=%d " - "status=0x%08X response=0x%08X\n", - data[0], ret, status, - I915_READ(SOFT_SCRATCH(15))); + DRM_WARN("Action 0x%X failed; ret=%d status=0x%08X response=0x%08X\n", +data[0], ret, status, I915_READ(SOFT_SCRATCH(15))); dev_priv->guc.action_fail += 1; dev_priv->guc.action_err = ret; @@ -553,8 +551,8 @@ static int guc_ring_doorbell(struct i915_guc_client *gc) if (db_ret.db_status == GUC_DOORBELL_DISABLED) break; - DRM_ERROR("Cookie mismatch. Expected %d, returned %d\n", - db_cmp.cookie, db_ret.cookie); + DRM_WARN("Cookie mismatch. Expected %d, found %d\n", +db_cmp.cookie, db_ret.cookie); /* update the cookie to newly read cookie from GuC */ db_cmp.cookie = db_ret.cookie; @@ -726,8 +724,8 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) /* Restore to original value */ err = guc_update_doorbell_id(guc, client, db_id); if (err) - DRM_ERROR("Failed to restore doorbell to %d, err %d\n", - db_id, err); + DRM_WARN("Failed to restore doorbell to %d, err %d\n", +db_id, err); for (i = 0; i < GUC_MAX_DOORBELLS; ++i) { i915_reg_t drbreg = GEN8_DRBREGL(i); @@ -819,8 +817,6 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) return client; err: - DRM_ERROR("FAILED to create priority %u GuC client!\n", priority); - guc_client_free(dev_priv, client); return NULL; } @@ -998,7 +994,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) GUC_CTX_PRIORITY_KMD_NORMAL, dev_priv->kernel_context); if (!client) { - DRM_ERROR("Failed to create execbuf guc_client\n"); + DRM_ERROR("Failed to create normal GuC client!\n"); return -ENOMEM; } -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/5] drm/i915/guc: doorbell reset should avoid used doorbells
guc_init_doorbell_hw() borrows the (currently single) GuC client to use in reinitialising ALL the doorbell registers (as the hardware doesn't reset them when the GuC is reset). As a prerequisite for accommodating multiple clients, it should only reset doorbells that are supposed to be disabled, avoiding those that are marked as in use by any client. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_guc_submission.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 2112e02..d8402e4 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -699,7 +699,7 @@ static void gem_release_guc_obj(struct drm_i915_gem_object *obj) } /* - * Borrow the first client to set up & tear down every doorbell + * Borrow the first client to set up & tear down each unused doorbell * in turn, to ensure that all doorbell h/w is (re)initialised. */ static void guc_init_doorbell_hw(struct intel_guc *guc) @@ -715,6 +715,9 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) i915_reg_t drbreg = GEN8_DRBREGL(i); u32 value = I915_READ(drbreg); + if (test_bit(i, guc->doorbell_bitmap)) + continue; + err = guc_update_doorbell_id(guc, client, i); /* Report update failure or unexpectedly active doorbell */ @@ -733,6 +736,9 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) i915_reg_t drbreg = GEN8_DRBREGL(i); u32 value = I915_READ(drbreg); + if (test_bit(i, guc->doorbell_bitmap)) + continue; + if (i != db_id && (value & GUC_DOORBELL_ENABLED)) DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) finally 0x%x\n", i, drbreg.reg, value); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 3/5] drm/i915/guc: use a separate GuC client for each engine
When using a single GuC client for multiple engines, the i915 driver has to merge all work items into a single work queue, which the GuC firmware then demultiplexes into separate submission queues per engine. In theory, this could lead to the single queue becoming a bottleneck in which an excess of outstanding work for one or more engines might prevent work for an idle engine reaching the hardware. To reduce this risk, we can create one GuC client per engine. Each will have its own workqueue, to be used only for work targeting a single engine, so there will be no cross-engine contention for workqueue slots. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_debugfs.c| 25 - drivers/gpu/drm/i915/i915_guc_submission.c | 35 +++--- drivers/gpu/drm/i915/intel_guc.h | 2 +- 3 files changed, 42 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 90aef45..5cbb8ef 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2570,20 +2570,26 @@ static int i915_guc_info(struct seq_file *m, void *data) struct drm_device *dev = node->minor->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_guc guc; - struct i915_guc_client client = {}; + struct i915_guc_client *clients; struct intel_engine_cs *engine; + enum intel_engine_id id; u64 total = 0; if (!HAS_GUC_SCHED(dev_priv)) return 0; + clients = kcalloc(I915_NUM_ENGINES, sizeof(*clients), GFP_KERNEL); + if (clients == NULL) + return -ENOMEM; + if (mutex_lock_interruptible(&dev->struct_mutex)) - return 0; + goto done; /* Take a local copy of the GuC data, so we can dump it at leisure */ guc = dev_priv->guc; - if (guc.execbuf_client) - client = *guc.execbuf_client; + for_each_engine_id(engine, dev_priv, id) + if (guc.exec_clients[id]) + clients[id] = *guc.exec_clients[id]; mutex_unlock(&dev->struct_mutex); @@ -2606,11 +2612,18 @@ static int i915_guc_info(struct seq_file *m, void *data) } seq_printf(m, "\t%s: %llu\n", "Total", total); - seq_printf(m, "\nGuC execbuf client @ %p:\n", guc.execbuf_client); - i915_guc_client_info(m, dev_priv, &client); + for_each_engine_id(engine, dev_priv, id) { + seq_printf(m, "\nGuC exec_client[%d] @ %p:\n", + id, guc.exec_clients[id]); + if (guc.exec_clients[id]) + i915_guc_client_info(m, dev_priv, &clients[id]); + } /* Add more as required ... */ +done: + kfree(clients); + return 0; } diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index dc5f485..b0f9945 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -434,7 +434,9 @@ static void guc_fini_ctx_desc(struct intel_guc *guc, int i915_guc_wq_check_space(struct drm_i915_gem_request *request) { const size_t wqi_size = sizeof(struct guc_wq_item); - struct i915_guc_client *gc = request->i915->guc.execbuf_client; + enum intel_engine_id engine_id = request->engine->id; + struct intel_guc *guc = &request->i915->guc; + struct i915_guc_client *gc = guc->exec_clients[engine_id]; struct guc_process_desc *desc; u32 freespace; @@ -589,7 +591,7 @@ int i915_guc_submit(struct drm_i915_gem_request *rq) { unsigned int engine_id = rq->engine->id; struct intel_guc *guc = &rq->i915->guc; - struct i915_guc_client *client = guc->execbuf_client; + struct i915_guc_client *client = guc->exec_clients[engine_id]; int b_ret; guc_add_workqueue_item(client, rq); @@ -723,7 +725,7 @@ static bool guc_doorbell_check(struct intel_guc *guc, uint16_t db_id) */ static void guc_init_doorbell_hw(struct intel_guc *guc) { - struct i915_guc_client *client = guc->execbuf_client; + struct i915_guc_client *client = guc->exec_clients[RCS]; uint16_t db_id; int i, err; @@ -1004,17 +1006,21 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; struct i915_guc_client *client; + struct intel_engine_cs *engine; - /* client for execbuf submission */ - client = guc_client_alloc(dev_priv, - GUC_CTX_PRIORITY_KMD_NORMAL, - dev_priv->kernel_context); - if (!client) { - DRM_ERROR("Failed to create execbuf g
[Intel-gfx] [PATCH 4/5] drm/i915/guc: add engine mask to GuC client & pass to GuC
The Context Descriptor passed by the kernel to the GuC contains a field specifying which engine(s) the context will use. Historically, this was always set to "all of them", but now that we have one client per engine, we can be more precise, and set only the single bit for the engine that the client is associated with. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_guc_submission.c | 15 ++- drivers/gpu/drm/i915/intel_guc.h | 3 ++- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index b0f9945..4daba77 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -340,7 +340,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, desc.priority = client->priority; desc.db_id = client->doorbell_id; - for_each_engine(engine, dev_priv) { + for_each_engine_masked(engine, dev_priv, client->engines) { struct intel_context *ce = &ctx->engine[engine->id]; struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id]; struct drm_i915_gem_object *obj; @@ -374,6 +374,8 @@ static void guc_init_ctx_desc(struct intel_guc *guc, desc.engines_used |= (1 << engine->guc_id); } + DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n", + client->engines, desc.engines_used); WARN_ON(desc.engines_used == 0); /* @@ -768,6 +770,7 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) */ static struct i915_guc_client * guc_client_alloc(struct drm_i915_private *dev_priv, +uint32_t engines, uint32_t priority, struct i915_gem_context *ctx) { @@ -780,10 +783,11 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) if (!client) return NULL; - client->doorbell_id = GUC_INVALID_DOORBELL_ID; - client->priority = priority; client->owner = ctx; client->guc = guc; + client->engines = engines; + client->priority = priority; + client->doorbell_id = GUC_INVALID_DOORBELL_ID; client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0, GUC_MAX_GPU_CONTEXTS, GFP_KERNEL); @@ -825,8 +829,8 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) if (guc_init_doorbell(guc, client, db_id)) goto err; - DRM_DEBUG_DRIVER("new priority %u client %p: ctx_index %u\n", - priority, client, client->ctx_index); + DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: ctx_index %u\n", + priority, client, client->engines, client->ctx_index); DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%x\n", client->doorbell_id, client->doorbell_offset); @@ -1011,6 +1015,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv) { /* client for execbuf submission */ client = guc_client_alloc(dev_priv, + intel_engine_flag(engine), GUC_CTX_PRIORITY_KMD_NORMAL, dev_priv->kernel_context); if (!client) { diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 7b4cc4d..53d41b5 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -67,6 +67,8 @@ struct i915_guc_client { void *client_base; /* first page (only) of above */ struct i915_gem_context *owner; struct intel_guc *guc; + + uint32_t engines; /* bitmap of (host) engine ids */ uint32_t priority; uint32_t ctx_index; @@ -79,7 +81,6 @@ struct i915_guc_client { uint32_t wq_offset; uint32_t wq_size; uint32_t wq_tail; - uint32_t unused;/* Was 'wq_head'*/ uint32_t no_wq_space; uint32_t q_fail;/* No longer used */ -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/5] drm/i915/guc: refactor guc_init_doorbell_hw()
Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_guc_submission.c | 54 +- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index d8402e4..dc5f485 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -698,32 +698,47 @@ static void gem_release_guc_obj(struct drm_i915_gem_object *obj) kfree(client); } +/* Check that a doorbell register is in the expected state */ +static bool guc_doorbell_check(struct intel_guc *guc, uint16_t db_id) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + i915_reg_t drbreg = GEN8_DRBREGL(db_id); + uint32_t value = I915_READ(drbreg); + bool enabled = (value & GUC_DOORBELL_ENABLED) != 0; + bool expected = test_bit(db_id, guc->doorbell_bitmap); + + if (enabled == expected) + return true; + + DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) 0x%x, should be %s\n", +db_id, drbreg.reg, value, +expected ? "active" : "inactive"); + + return false; +} + /* * Borrow the first client to set up & tear down each unused doorbell * in turn, to ensure that all doorbell h/w is (re)initialised. */ static void guc_init_doorbell_hw(struct intel_guc *guc) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); struct i915_guc_client *client = guc->execbuf_client; - uint16_t db_id, i; - int err; + uint16_t db_id; + int i, err; + /* Save client's original doorbell selection */ db_id = client->doorbell_id; for (i = 0; i < GUC_MAX_DOORBELLS; ++i) { - i915_reg_t drbreg = GEN8_DRBREGL(i); - u32 value = I915_READ(drbreg); - - if (test_bit(i, guc->doorbell_bitmap)) + /* Skip if doorbell is OK */ + if (guc_doorbell_check(guc, i)) continue; err = guc_update_doorbell_id(guc, client, i); - - /* Report update failure or unexpectedly active doorbell */ - if (err || (i != db_id && (value & GUC_DOORBELL_ENABLED))) - DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) was 0x%x, err %d\n", - i, drbreg.reg, value, err); + if (err) + DRM_DEBUG_DRIVER("Doorbell %d update failed, err %d\n", + i, err); } /* Restore to original value */ @@ -732,18 +747,9 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) DRM_ERROR("Failed to restore doorbell to %d, err %d\n", db_id, err); - for (i = 0; i < GUC_MAX_DOORBELLS; ++i) { - i915_reg_t drbreg = GEN8_DRBREGL(i); - u32 value = I915_READ(drbreg); - - if (test_bit(i, guc->doorbell_bitmap)) - continue; - - if (i != db_id && (value & GUC_DOORBELL_ENABLED)) - DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) finally 0x%x\n", - i, drbreg.reg, value); - - } + /* Read back & verify all doorbell registers */ + for (i = 0; i < GUC_MAX_DOORBELLS; ++i) + (void)guc_doorbell_check(guc, i); } /** -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 5/5] drm/i915/guc: use for_each_engine_id() where appropriate
Now that host structures are indexed by host engine-id rather than guc_id, we can usefully convert some for_each_engine() loops to use for_each_engine_id() and avoid multiple dereferences of engine->id. Also a few related tweaks to cache structure members locally wherever they're used more than once or twice, hopefully eliminating memory references. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_debugfs.c| 17 + drivers/gpu/drm/i915/i915_guc_submission.c | 22 +- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 5cbb8ef..76918ab 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2541,6 +2541,7 @@ static void i915_guc_client_info(struct seq_file *m, struct i915_guc_client *client) { struct intel_engine_cs *engine; + enum intel_engine_id id; uint64_t tot = 0; seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n", @@ -2555,11 +2556,11 @@ static void i915_guc_client_info(struct seq_file *m, seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail); seq_printf(m, "\tLast submission result: %d\n", client->retcode); - for_each_engine(engine, dev_priv) { + for_each_engine_id(engine, dev_priv, id) { + u64 submissions = client->submissions[id]; + tot += submissions; seq_printf(m, "\tSubmissions: %llu %s\n", - client->submissions[engine->id], - engine->name); - tot += client->submissions[engine->id]; + submissions, engine->name); } seq_printf(m, "\tTotal: %llu\n", tot); } @@ -2604,11 +2605,11 @@ static int i915_guc_info(struct seq_file *m, void *data) seq_printf(m, "GuC last action error code: %d\n", guc.action_err); seq_printf(m, "\nGuC submissions:\n"); - for_each_engine(engine, dev_priv) { + for_each_engine_id(engine, dev_priv, id) { + u64 submissions = guc.submissions[id]; + total += submissions; seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n", - engine->name, guc.submissions[engine->id], - guc.last_seqno[engine->id]); - total += guc.submissions[engine->id]; + engine->name, submissions, guc.last_seqno[id]); } seq_printf(m, "\t%s: %llu\n", "Total", total); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 4daba77..5beed1b 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -342,7 +342,8 @@ static void guc_init_ctx_desc(struct intel_guc *guc, for_each_engine_masked(engine, dev_priv, client->engines) { struct intel_context *ce = &ctx->engine[engine->id]; - struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id]; + uint32_t guc_engine_id = engine->guc_id; + struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id]; struct drm_i915_gem_object *obj; /* TODO: We have a design issue to be solved here. Only when we @@ -361,7 +362,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, gfx_addr = i915_gem_obj_ggtt_offset(ce->state); lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE; lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | - (engine->guc_id << GUC_ELC_ENGINE_OFFSET); + (guc_engine_id << GUC_ELC_ENGINE_OFFSET); obj = ce->ringbuf->obj; gfx_addr = i915_gem_obj_ggtt_offset(obj); @@ -371,7 +372,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, lrc->ring_next_free_location = gfx_addr; lrc->ring_current_tail_pointer_value = 0; - desc.engines_used |= (1 << engine->guc_id); + desc.engines_used |= (1 << guc_engine_id); } DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n", @@ -461,6 +462,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc, /* wqi_len is in DWords, and does not include the one-word header */ const size_t wqi_size = sizeof(struct guc_wq_item); const u32 wqi_len = wqi_size/sizeof(u32) - 1; + struct intel_engine_cs *engine = rq->engine; struct guc_process_desc *desc; struct guc_wq_item
Re: [Intel-gfx] [PATCH v2] i915: fix build error with -Werror
On 19/07/16 08:05, Daniel Vetter wrote: On Mon, Jul 04, 2016 at 11:30:06AM -0400, Jeff Mahoney wrote: This fixes the following build error with -Werror and gcc 6.1: drivers/gpu/drm/i915/i915_debugfs.c:2103:6: error: suggest explicit braces to avoid ambiguous 'else' [-Werror=parentheses] Signed-off-by: Jeff Mahoney This doesn't apply on -next any more ... Is this still an issue on latest kernels? -Daniel --- drivers/gpu/drm/i915/i915_debugfs.c |7 --- 1 file changed, 4 insertions(+), 3 deletions(-) --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2100,9 +2100,10 @@ static int i915_dump_lrc(struct seq_file return ret; list_for_each_entry(ctx, &dev_priv->context_list, link) - if (ctx != dev_priv->kernel_context) + if (ctx != dev_priv->kernel_context) { for_each_engine(engine, dev_priv) i915_dump_lrc_obj(m, ctx, engine); + } mutex_unlock(&dev->struct_mutex); That's a curious warning. Ever since commit 373701b1fc7d7c0013ae4fffd8103615c150751e drm: fix potential dangling else problems in for_each_ macros Author: Jani Nikula Date: Tue Nov 24 21:21:55 2015 +0200 Link: http://patchwork.freedesktop.org/patch/msgid/1448392916-2281-1-git-send-email-jani.nik...@intel.com we've avoided leaving a dangling else; the code should expand as for ( /* each entry */ ) if (ctx != dev_priv->kernel_context) for ( /* each engine */ ) if (!intel_engine_initialized(engine)) {} else i915_dump_lrc_obj(m, ctx, engine); ... so that the (hidden) else is clearly matched with the (hidden) if() generated by the macro expansion. Surely the compiler can't think that an else inside a for-loop could be mistakenly paired with one outside the loop? Of course we did *have* a proposal for an alternative iterator strategy that didn't expose any if/else at all, but some people didn't like it :L Oh well, it just shows that using macros to rewrite C syntax is still an abomination, Stephen Bourne notwithstanding. If you want iterators and blocks, use Ruby ;) .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] ✓ Ro.CI.BAT: success for series starting with [CI-RESEND,1/3] drm: extra printk() wrapper macros
On 19/07/16 14:53, Patchwork wrote: == Series Details == Series: series starting with [CI-RESEND,1/3] drm: extra printk() wrapper macros URL : https://patchwork.freedesktop.org/series/10036/ State : success == Summary == Series 10036v1 Series without cover letter http://patchwork.freedesktop.org/api/1.0/series/10036/revisions/1/mbox Test drv_module_reload_basic: dmesg-warn -> PASS (ro-skl3-i5-6260u) Test gem_sync: Subgroup basic-store-each: fail -> DMESG-FAIL (ro-bdw-i7-5600u) This is (now) Bug 96974 https://bugs.freedesktop.org/show_bug.cgi?id=96974 [BAT BDW] gem_sync / basic-store-each fails sporadically I filed this too, but Villa beat me by a couple of minutes, so my bug 96975 is now a dup of 96974. Maybe we should get another identical machine and see whether the results are consistent, 'cos the other BDWs aren't showing this at all. .Dave. fi-hsw-i7-4770k total:243 pass:213 dwarn:0 dfail:0 fail:10 skip:20 fi-kbl-qkkr total:243 pass:178 dwarn:27 dfail:1 fail:10 skip:27 fi-skl-i5-6260u total:243 pass:222 dwarn:0 dfail:0 fail:9 skip:12 fi-skl-i7-6700k total:243 pass:208 dwarn:0 dfail:0 fail:9 skip:26 fi-snb-i7-2600 total:243 pass:193 dwarn:0 dfail:0 fail:10 skip:40 ro-bdw-i5-5250u total:244 pass:217 dwarn:4 dfail:0 fail:10 skip:13 ro-bdw-i7-5557U total:244 pass:219 dwarn:1 dfail:0 fail:9 skip:15 ro-bdw-i7-5600u total:244 pass:201 dwarn:0 dfail:1 fail:10 skip:32 ro-bsw-n3050 total:218 pass:173 dwarn:0 dfail:0 fail:2 skip:42 ro-byt-n2820 total:244 pass:194 dwarn:0 dfail:0 fail:12 skip:38 ro-hsw-i3-4010u total:244 pass:209 dwarn:0 dfail:0 fail:11 skip:24 ro-hsw-i7-4770r total:244 pass:209 dwarn:0 dfail:0 fail:11 skip:24 ro-ilk-i7-620lm total:244 pass:169 dwarn:0 dfail:0 fail:12 skip:63 ro-ilk1-i5-650 total:239 pass:170 dwarn:0 dfail:0 fail:11 skip:58 ro-ivb-i7-3770 total:244 pass:200 dwarn:0 dfail:0 fail:11 skip:33 ro-skl3-i5-6260u total:244 pass:222 dwarn:0 dfail:0 fail:10 skip:12 ro-snb-i7-2620M total:244 pass:190 dwarn:0 dfail:0 fail:12 skip:42 Results at /archive/results/CI_IGT_test/RO_Patchwork_1531/ 63a drm-intel-nightly: 2016y-07m-19d-13h-02m-39s UTC integration manifest b23d573 drm/i915/guc: revisit GuC loader message levels 950c1a4 drm/i915/guc: downgrade some DRM_ERROR() messages to DRM_WARN() 5e8e33b drm: extra printk() wrapper macros ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] ✗ Ro.CI.BAT: failure for series starting with [1/5] drm/i915/guc: doorbell reset should avoid used doorbells
On 19/07/16 15:16, Patchwork wrote: == Series Details == Series: series starting with [1/5] drm/i915/guc: doorbell reset should avoid used doorbells URL : https://patchwork.freedesktop.org/series/10040/ State : failure == Summary == Series 10040v1 Series without cover letter http://patchwork.freedesktop.org/api/1.0/series/10040/revisions/1/mbox Test drv_module_reload_basic: pass -> SKIP (ro-hsw-i3-4010u) Test gem_sync: Subgroup basic-store-each: fail -> PASS (ro-bdw-i7-5600u) Test kms_cursor_legacy: Subgroup basic-cursor-vs-flip: pass -> FAIL (ro-ilk1-i5-650) Wibble? The test log for this says: + Results for igt@kms_cursor_legacy@basic-flip-vs-cursor + Result: pass + + IGT-Version: 1.15-g4d03467 (x86_64) (Linux: 4.7.0-rc7-gfxbench-RO_Patchwork_1532+ x86_64) + Test requirement not met in function __real_main427, file kms_cursor_legacy.c:448: + Test requirement: !(n >= data.resources->count_crtcs) + Subtest basic-flip-vs-cursor: SUCCESS (1.156s) + + Command /opt/igt/tests/kms_cursor_legacy --run-subtest basic-flip-vs-cursor So whatever that unfulfilled test requirement is, the result should be PASS. Test kms_pipe_crc_basic: Subgroup read-crc-pipe-c-frame-sequence: pass -> DMESG-WARN (fi-hsw-i7-4770k) [ 436.687908] [drm:drm_edid_block_valid] *ERROR* EDID checksum is invalid, remainder is 122 [ 436.688345] [drm:drm_edid_block_valid] *ERROR* EDID checksum is invalid, remainder is 122 which looks like https://bugzilla.kernel.org/show_bug.cgi?id=85951 [hsw] [drm:drm_edid_block_valid] *ERROR* EDID checksum is invalid, remainder is 46 (which was closed as unreproducible) .Dave. Subgroup suspend-read-crc-pipe-c: pass -> SKIP (fi-hsw-i7-4770k) Test prime_vgem: Subgroup basic-fence-read: fail -> PASS (ro-byt-n2820) fi-hsw-i7-4770k total:243 pass:211 dwarn:1 dfail:0 fail:10 skip:21 fi-kbl-qkkr total:243 pass:177 dwarn:26 dfail:1 fail:10 skip:29 fi-skl-i7-6700k total:243 pass:208 dwarn:0 dfail:0 fail:9 skip:26 fi-snb-i7-2600 total:243 pass:193 dwarn:0 dfail:0 fail:10 skip:40 ro-bdw-i5-5250u total:244 pass:217 dwarn:4 dfail:0 fail:10 skip:13 ro-bdw-i7-5557U total:244 pass:219 dwarn:1 dfail:0 fail:9 skip:15 ro-bdw-i7-5600u total:244 pass:202 dwarn:0 dfail:0 fail:10 skip:32 ro-bsw-n3050 total:218 pass:173 dwarn:0 dfail:0 fail:2 skip:42 ro-byt-n2820 total:244 pass:195 dwarn:0 dfail:0 fail:11 skip:38 ro-hsw-i3-4010u total:244 pass:208 dwarn:0 dfail:0 fail:11 skip:25 ro-hsw-i7-4770r total:244 pass:209 dwarn:0 dfail:0 fail:11 skip:24 ro-ilk1-i5-650 total:239 pass:169 dwarn:0 dfail:0 fail:12 skip:58 ro-ivb-i7-3770 total:244 pass:200 dwarn:0 dfail:0 fail:11 skip:33 ro-skl3-i5-6260u total:244 pass:221 dwarn:1 dfail:0 fail:10 skip:12 ro-snb-i7-2620M total:244 pass:190 dwarn:0 dfail:0 fail:12 skip:42 fi-skl-i5-6260u failed to connect after reboot Results at /archive/results/CI_IGT_test/RO_Patchwork_1532/ 63a drm-intel-nightly: 2016y-07m-19d-13h-02m-39s UTC integration manifest 207ec9d drm/i915/guc: use for_each_engine_id() where appropriate 5982c52 drm/i915/guc: add engine mask to GuC client & pass to GuC b1f5991 drm/i915/guc: use a separate GuC client for each engine 89623f4 drm/i915/guc: refactor guc_init_doorbell_hw() b9c0a41 drm/i915/guc: doorbell reset should avoid used doorbells ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 3/5] drm/i915/guc: use a separate GuC client for each engine
On 19/07/16 16:02, Tvrtko Ursulin wrote: On 19/07/16 13:59, Dave Gordon wrote: When using a single GuC client for multiple engines, the i915 driver has to merge all work items into a single work queue, which the GuC firmware then demultiplexes into separate submission queues per engine. In theory, this could lead to the single queue becoming a bottleneck in which an excess of outstanding work for one or more engines might prevent work for an idle engine reaching the hardware. To reduce this risk, we can create one GuC client per engine. Each will have its own workqueue, to be used only for work targeting a single engine, so there will be no cross-engine contention for workqueue slots. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_debugfs.c| 25 - drivers/gpu/drm/i915/i915_guc_submission.c | 35 +++--- drivers/gpu/drm/i915/intel_guc.h | 2 +- 3 files changed, 42 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 90aef45..5cbb8ef 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2570,20 +2570,26 @@ static int i915_guc_info(struct seq_file *m, void *data) struct drm_device *dev = node->minor->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_guc guc; -struct i915_guc_client client = {}; +struct i915_guc_client *clients; struct intel_engine_cs *engine; +enum intel_engine_id id; u64 total = 0; if (!HAS_GUC_SCHED(dev_priv)) return 0; +clients = kcalloc(I915_NUM_ENGINES, sizeof(*clients), GFP_KERNEL); +if (clients == NULL) +return -ENOMEM; + if (mutex_lock_interruptible(&dev->struct_mutex)) -return 0; +goto done; /* Take a local copy of the GuC data, so we can dump it at leisure */ guc = dev_priv->guc; -if (guc.execbuf_client) -client = *guc.execbuf_client; +for_each_engine_id(engine, dev_priv, id) +if (guc.exec_clients[id]) +clients[id] = *guc.exec_clients[id]; mutex_unlock(&dev->struct_mutex); @@ -2606,11 +2612,18 @@ static int i915_guc_info(struct seq_file *m, void *data) } seq_printf(m, "\t%s: %llu\n", "Total", total); -seq_printf(m, "\nGuC execbuf client @ %p:\n", guc.execbuf_client); -i915_guc_client_info(m, dev_priv, &client); +for_each_engine_id(engine, dev_priv, id) { +seq_printf(m, "\nGuC exec_client[%d] @ %p:\n", +id, guc.exec_clients[id]); Minor and not a blocker for this patch, but I would potentially re-consider if printing out the client pointer is useful. It's really only useful to know whether it's NULL or not; but printing the pointer itself is simpler than printing a message saying that. This is a debugfs interface, so the content is pretty much ad-hoc. .Dave. +if (guc.exec_clients[id]) +i915_guc_client_info(m, dev_priv, &clients[id]); +} /* Add more as required ... */ +done: +kfree(clients); + return 0; } diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index dc5f485..b0f9945 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -434,7 +434,9 @@ static void guc_fini_ctx_desc(struct intel_guc *guc, int i915_guc_wq_check_space(struct drm_i915_gem_request *request) { const size_t wqi_size = sizeof(struct guc_wq_item); -struct i915_guc_client *gc = request->i915->guc.execbuf_client; +enum intel_engine_id engine_id = request->engine->id; +struct intel_guc *guc = &request->i915->guc; +struct i915_guc_client *gc = guc->exec_clients[engine_id]; struct guc_process_desc *desc; u32 freespace; @@ -589,7 +591,7 @@ int i915_guc_submit(struct drm_i915_gem_request *rq) { unsigned int engine_id = rq->engine->id; struct intel_guc *guc = &rq->i915->guc; -struct i915_guc_client *client = guc->execbuf_client; +struct i915_guc_client *client = guc->exec_clients[engine_id]; int b_ret; guc_add_workqueue_item(client, rq); @@ -723,7 +725,7 @@ static bool guc_doorbell_check(struct intel_guc *guc, uint16_t db_id) */ static void guc_init_doorbell_hw(struct intel_guc *guc) { -struct i915_guc_client *client = guc->execbuf_client; +struct i915_guc_client *client = guc->exec_clients[RCS]; uint16_t db_id; int i, err; @@ -1004,17 +1006,21 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; struct i915_guc_client *client; +struct intel_engine_cs *engine; -/* client for execbuf submission */ -client = guc_client_alloc(dev_priv,
Re: [Intel-gfx] [CI-RESEND 3/3] drm/i915/guc: revisit GuC loader message levels
On 19/07/16 15:26, Tvrtko Ursulin wrote: On 19/07/16 13:20, Dave Gordon wrote: Some downgraded from DRM_ERROR() to DRM_WARN() or DRM_NOTE(), a few upgraded from DRM_INFO() to DRM_NOTE() or DRM_WARN(), and one eliminated completely. v2: different permutation of levels :) Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/intel_guc_loader.c | 34 - 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 605c696..a2f4fa4 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -140,12 +140,14 @@ static u32 get_gttype(struct drm_i915_private *dev_priv) static u32 get_core_family(struct drm_i915_private *dev_priv) { -switch (INTEL_INFO(dev_priv)->gen) { +u32 gen = INTEL_GEN(dev_priv); + +switch (gen) { case 9: return GFXCORE_FAMILY_GEN9; default: -DRM_ERROR("GUC: unsupported core family\n"); +DRM_WARN("GEN%d does not support GuC operation\n", gen); This looks more like a WARN_ON condition to me, something developers need to notice extremely easily in development and will never happen in deployment. OK; the check in the caller below should have prevented us reaching this code if the hardware ID isn't known to the driver. Changed to WARN(1, ...). return GFXCORE_FAMILY_UNKNOWN; } } @@ -433,7 +435,7 @@ int intel_guc_setup(struct drm_device *dev) goto fail; } else if (*fw_path == '\0') { /* Device has a GuC but we don't know what f/w to load? */ -DRM_INFO("No GuC firmware known for this platform\n"); +DRM_WARN("No GuC firmware known for this platform\n"); This looks the same to me (WARN_ON), it can only happen if someone messes up things in development. Maybe. This is the outer check that protects the code above, and as such it's the first place we report unrecognised hardware. But we don't want to prevent the driver from working (via fallback to execlists) so developers can at least boot new hardware and not just get a blank screen. So a WARNING of some type is right, but does the stack trace from WARN() add any value? If not -- and I think not -- DRM_WARN() is what we want. err = -ENODEV; goto fail; } @@ -471,10 +473,8 @@ int intel_guc_setup(struct drm_device *dev) * that the state and timing are fairly predictable */ err = i915_reset_guc(dev_priv); -if (err) { -DRM_ERROR("GuC reset failed: %d\n", err); +if (err) goto fail; -} err = guc_ucode_xfer(dev_priv); if (!err) @@ -532,15 +532,15 @@ int intel_guc_setup(struct drm_device *dev) else if (err == 0) DRM_INFO("GuC firmware load skipped\n"); else if (ret != -EIO) -DRM_INFO("GuC firmware load failed: %d\n", err); +DRM_NOTE("GuC firmware load failed: %d\n", err); else -DRM_ERROR("GuC firmware load failed: %d\n", err); +DRM_WARN("GuC firmware load failed: %d\n", err); if (i915.enable_guc_submission) { if (fw_path == NULL) DRM_INFO("GuC submission without firmware not supported\n"); if (ret == 0) -DRM_INFO("Falling back from GuC submission to execlist mode\n"); +DRM_NOTE("Falling back from GuC submission to execlist mode\n"); else DRM_ERROR("GuC init failed: %d\n", ret); } @@ -571,7 +571,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* Check the size of the blob before examining buffer contents */ if (fw->size < sizeof(struct guc_css_header)) { -DRM_ERROR("Firmware header is missing\n"); +DRM_NOTE("Firmware header is missing\n"); goto fail; } @@ -583,7 +583,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) css->key_size_dw - css->exponent_size_dw) * sizeof(u32); if (guc_fw->header_size != sizeof(struct guc_css_header)) { -DRM_ERROR("CSS header definition mismatch\n"); +DRM_NOTE("CSS header definition mismatch\n"); goto fail; } @@ -593,7 +593,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* now RSA */ if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { -DRM_ERROR("RSA key size is bad\n"); +DRM_NOTE("RSA key size is bad\n"); goto fail; } guc_fw->rsa_offset = guc_fw->ucode_offset + guc_fw->ucode_size; @@ -602,14 +602,14 @@ static void guc_fw_fetch(struct drm_device *dev,
[Intel-gfx] [PATCH v3 3/3] drm/i915/guc: revisit GuC loader message levels
Some downgraded from DRM_ERROR() to DRM_WARN() or DRM_NOTE(), a few upgraded from DRM_INFO() to DRM_NOTE() or DRM_WARN(), and one eliminated completely. v2: different permutation of levels :) v3: convert a couple of "this shouldn't happen" messages to WARN() [Tvrtko Ursulin]. Signed-off-by: Dave Gordon Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_guc_loader.c | 34 - 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 605c696..a7901a6 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -140,12 +140,14 @@ static u32 get_gttype(struct drm_i915_private *dev_priv) static u32 get_core_family(struct drm_i915_private *dev_priv) { - switch (INTEL_INFO(dev_priv)->gen) { + u32 gen = INTEL_GEN(dev_priv); + + switch (gen) { case 9: return GFXCORE_FAMILY_GEN9; default: - DRM_ERROR("GUC: unsupported core family\n"); + WARN(1, "GEN%d does not support GuC operation!\n", gen); return GFXCORE_FAMILY_UNKNOWN; } } @@ -433,7 +435,7 @@ int intel_guc_setup(struct drm_device *dev) goto fail; } else if (*fw_path == '\0') { /* Device has a GuC but we don't know what f/w to load? */ - DRM_INFO("No GuC firmware known for this platform\n"); + WARN(1, "No GuC firmware known for this platform!\n"); err = -ENODEV; goto fail; } @@ -471,10 +473,8 @@ int intel_guc_setup(struct drm_device *dev) * that the state and timing are fairly predictable */ err = i915_reset_guc(dev_priv); - if (err) { - DRM_ERROR("GuC reset failed: %d\n", err); + if (err) goto fail; - } err = guc_ucode_xfer(dev_priv); if (!err) @@ -532,15 +532,15 @@ int intel_guc_setup(struct drm_device *dev) else if (err == 0) DRM_INFO("GuC firmware load skipped\n"); else if (ret != -EIO) - DRM_INFO("GuC firmware load failed: %d\n", err); + DRM_NOTE("GuC firmware load failed: %d\n", err); else - DRM_ERROR("GuC firmware load failed: %d\n", err); + DRM_WARN("GuC firmware load failed: %d\n", err); if (i915.enable_guc_submission) { if (fw_path == NULL) DRM_INFO("GuC submission without firmware not supported\n"); if (ret == 0) - DRM_INFO("Falling back from GuC submission to execlist mode\n"); + DRM_NOTE("Falling back from GuC submission to execlist mode\n"); else DRM_ERROR("GuC init failed: %d\n", ret); } @@ -571,7 +571,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* Check the size of the blob before examining buffer contents */ if (fw->size < sizeof(struct guc_css_header)) { - DRM_ERROR("Firmware header is missing\n"); + DRM_NOTE("Firmware header is missing\n"); goto fail; } @@ -583,7 +583,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) css->key_size_dw - css->exponent_size_dw) * sizeof(u32); if (guc_fw->header_size != sizeof(struct guc_css_header)) { - DRM_ERROR("CSS header definition mismatch\n"); + DRM_NOTE("CSS header definition mismatch\n"); goto fail; } @@ -593,7 +593,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* now RSA */ if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { - DRM_ERROR("RSA key size is bad\n"); + DRM_NOTE("RSA key size is bad\n"); goto fail; } guc_fw->rsa_offset = guc_fw->ucode_offset + guc_fw->ucode_size; @@ -602,14 +602,14 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* At least, it should have header, uCode and RSA. Size of all three. */ size = guc_fw->header_size + guc_fw->ucode_size + guc_fw->rsa_size; if (fw->size < size) { - DRM_ERROR("Missing firmware components\n"); + DRM_NOTE("Missing firmware components\n"); goto fail; } /* Header and uCode will be loaded to
[Intel-gfx] [PATCH v3 1/3] drm: extra printk() wrapper macros
We had only DRM_INFO() and DRM_ERROR(), whereas the underlying printk() provides several other useful intermediate levels such as NOTICE and WARNING. So this patch fills out the set by providing both regular and once-only macros for each of the levels INFO, NOTICE, and WARNING, using a common underlying macro that does all the token-pasting. DRM_ERROR is unchanged, as it's not just a printk wrapper. v2: Fix whitespace, missing ## (Eric Engestrom) Signed-off-by: Dave Gordon Reviewed-by: Eric Engestrom Cc: dri-de...@lists.freedesktop.org --- include/drm/drmP.h | 26 -- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/include/drm/drmP.h b/include/drm/drmP.h index d377865..3669cdd 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -162,6 +162,26 @@ void drm_err(const char *format, ...); /** \name Macros to make printk easier */ /*@{*/ +#define _DRM_PRINTK(once, level, fmt, ...) \ + do {\ + printk##once(KERN_##level "[" DRM_NAME "] " fmt,\ +##__VA_ARGS__);\ + } while (0) + +#define DRM_INFO(fmt, ...) \ + _DRM_PRINTK(, INFO, fmt, ##__VA_ARGS__) +#define DRM_NOTE(fmt, ...) \ + _DRM_PRINTK(, NOTICE, fmt, ##__VA_ARGS__) +#define DRM_WARN(fmt, ...) \ + _DRM_PRINTK(, WARNING, fmt, ##__VA_ARGS__) + +#define DRM_INFO_ONCE(fmt, ...) \ + _DRM_PRINTK(_once, INFO, fmt, ##__VA_ARGS__) +#define DRM_NOTE_ONCE(fmt, ...) \ + _DRM_PRINTK(_once, NOTICE, fmt, ##__VA_ARGS__) +#define DRM_WARN_ONCE(fmt, ...) \ + _DRM_PRINTK(_once, WARNING, fmt, ##__VA_ARGS__) + /** * Error output. * @@ -187,12 +207,6 @@ void drm_err(const char *format, ...); drm_err(fmt, ##__VA_ARGS__);\ }) -#define DRM_INFO(fmt, ...) \ - printk(KERN_INFO "[" DRM_NAME "] " fmt, ##__VA_ARGS__) - -#define DRM_INFO_ONCE(fmt, ...)\ - printk_once(KERN_INFO "[" DRM_NAME "] " fmt, ##__VA_ARGS__) - /** * Debug output. * -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 2/3] drm/i915/guc: downgrade some DRM_ERROR() messages to DRM_WARN()
Where we're going to continue regardless of the problem, rather than fail, then the message should be a WARNing rather than an ERROR. Signed-off-by: Dave Gordon Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_guc_submission.c | 18 +++--- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 2112e02..e299b64 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -114,10 +114,8 @@ static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len) if (ret != -ETIMEDOUT) ret = -EIO; - DRM_ERROR("GUC: host2guc action 0x%X failed. ret=%d " - "status=0x%08X response=0x%08X\n", - data[0], ret, status, - I915_READ(SOFT_SCRATCH(15))); + DRM_WARN("Action 0x%X failed; ret=%d status=0x%08X response=0x%08X\n", +data[0], ret, status, I915_READ(SOFT_SCRATCH(15))); dev_priv->guc.action_fail += 1; dev_priv->guc.action_err = ret; @@ -553,8 +551,8 @@ static int guc_ring_doorbell(struct i915_guc_client *gc) if (db_ret.db_status == GUC_DOORBELL_DISABLED) break; - DRM_ERROR("Cookie mismatch. Expected %d, returned %d\n", - db_cmp.cookie, db_ret.cookie); + DRM_WARN("Cookie mismatch. Expected %d, found %d\n", +db_cmp.cookie, db_ret.cookie); /* update the cookie to newly read cookie from GuC */ db_cmp.cookie = db_ret.cookie; @@ -726,8 +724,8 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) /* Restore to original value */ err = guc_update_doorbell_id(guc, client, db_id); if (err) - DRM_ERROR("Failed to restore doorbell to %d, err %d\n", - db_id, err); + DRM_WARN("Failed to restore doorbell to %d, err %d\n", +db_id, err); for (i = 0; i < GUC_MAX_DOORBELLS; ++i) { i915_reg_t drbreg = GEN8_DRBREGL(i); @@ -819,8 +817,6 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) return client; err: - DRM_ERROR("FAILED to create priority %u GuC client!\n", priority); - guc_client_free(dev_priv, client); return NULL; } @@ -998,7 +994,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) GUC_CTX_PRIORITY_KMD_NORMAL, dev_priv->kernel_context); if (!client) { - DRM_ERROR("Failed to create execbuf guc_client\n"); + DRM_ERROR("Failed to create normal GuC client!\n"); return -ENOMEM; } -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 12/13] drm/i915: Consolidate legacy semaphore initialization
On 15/07/16 14:13, Tvrtko Ursulin wrote: On 29/06/16 17:00, Chris Wilson wrote: On Wed, Jun 29, 2016 at 04:41:58PM +0100, Tvrtko Ursulin wrote: On 29/06/16 16:34, Chris Wilson wrote: On Wed, Jun 29, 2016 at 04:09:31PM +0100, Tvrtko Ursulin wrote: From: Tvrtko Ursulin Replace per-engine initialization with a common half-programatic, half-data driven code for ease of maintenance and compactness. Signed-off-by: Tvrtko Ursulin This is the biggest pill to swallow (since our 5x5 table is only sparsely populated), but it looks correct, and more importantly easier to read. Yeah I was out of ideas on how to improve it. Fresh mind needed to try and spot a pattern in how MI_SEMAPHORE_SYNC_* and GEN6_*SYNC map to bits and registers respectively, and write it as a function. It's actually a very simple cyclic function based on register offset = base + (signaler hw_id - waiter hw_id - 1) % num_rings. (The only real challenge is picking the direction.) commit c8c99b0f0dea1ced5d0e10cdb9143356cc16b484 Author: Ben Widawsky Date: Wed Sep 14 20:32:47 2011 -0700 drm/i915: Dumb down the semaphore logic While I think the previous code is correct, it was hard to follow and hard to debug. Since we already have a ring abstraction, might as well use it to handle the semaphore updates and compares. Doesn't seem to fit, or I just can't figure it out. Needs two functions to get rid of the table: f1(0, 1) = 2 f1(0, 2) = 0 f1(0, 3) = 2 f1(1, 0) = 0 f1(1, 2) = 2 f1(1, 3) = 1 f1(2, 0) = 2 f1(2, 1) = 0 f1(2, 3) = 0 f1(3, 0) = 1 f1(3, 1) = 1 f1(3, 2) = 1 and: f2(0, 1) = 1 f2(0, 2) = 0 f2(0, 3) = 1 f2(1, 0) = 0 f2(1, 2) = 1 f2(1, 3) = 2 f2(2, 0) = 1 f2(2, 1) = 0 f2(2, 3) = 0 f2(3, 0) = 2 f2(3, 1) = 2 f2(3, 2) = 2 A weekend math puzzle for someone? :) Regards, Tvrtko Here's the APL expression for (the transpose of) f2, with -1's filled in along the leading diagonal (you need ⎕io←0 so the ⍳-vectors are in origin 0) {¯1+(⍵≠⍳4)⍀(2|⍵)⌽(⌽⍣(1=⍵))1+⍳3}¨⍳4 ┌┬┬┬┐ │¯1 0 1 2│1 ¯1 0 2│0 1 ¯1 2│1 2 0 ¯1│ └┴┴┴┘ or transposed back so that the first argument is the row index and the second is the column index: ⍉↑{¯1+(⍵≠⍳4)⍀(2|⍵)⌽(⌽⍣(1=⍵))1+⍳3}¨⍳4 ¯1 1 0 1 0 ¯1 1 2 1 0 ¯1 0 2 2 2 ¯1 http://tryapl.org/?a=%u2349%u2191%7B%AF1+%28%u2375%u2260%u23734%29%u2340%282%7C%u2375%29%u233D%28%u233D%u2363%281%3D%u2375%29%291+%u23733%7D%A8%u23734&run f1 is trivially derived from this by the observation that f1 is just f2 with the 1's and 2's interchanged. .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Remove misleading CSR firmware loading docs
On 14/07/16 15:15, Daniel Vetter wrote: I forgot to remove these when reworking the firmware loading sequence last year. The new sequence is that we load firmware, and if it's not there we entirely (and permanently) fail dmc setup. Reported-by: Dave Gordon Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_csr.c | 7 --- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index c3b33a10c15c..1ea0e1f43397 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -32,13 +32,6 @@ * onwards to drive newly added DMC (Display microcontroller) in display * engine to save and restore the state of display engine when it enter into * low-power state and comes back to normal. - * - * Firmware loading status will be one of the below states: FW_UNINITIALIZED, - * FW_LOADED, FW_FAILED. - * - * Once the firmware is written into the registers status will be moved from - * FW_UNINITIALIZED to FW_LOADED and for any erroneous condition status will - * be moved to FW_FAILED. */ #define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin" LGTM. Reviewed-by: Dave Gordon ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 3/9] drm/i915: Wrap drm_gem_object_lookup in i915_gem_object_lookup
e *dev, struct drm_i915_gem_object *obj; struct drm_mode_fb_cmd2 mode_cmd = *user_mode_cmd; - obj = to_intel_bo(drm_gem_object_lookup(filp, mode_cmd.handles[0])); - if (&obj->base == NULL) + obj = i915_gem_object_lookup(filp, mode_cmd.handles[0]); + if (!obj) return ERR_PTR(-ENOENT); fb = intel_framebuffer_create(dev, &mode_cmd, obj); diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 3212d8806b5a..5ca797b01ccb 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -1122,9 +1122,8 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, } crtc = to_intel_crtc(drmmode_crtc); - new_bo = to_intel_bo(drm_gem_object_lookup(file_priv, - put_image_rec->bo_handle)); - if (&new_bo->base == NULL) { + new_bo = i915_gem_object_lookup(file_priv, put_image_rec->bo_handle); + if (!new_bo) { ret = -ENOENT; goto out_free; } LGTM. Reviewed-by: Dave Gordon ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 4/9] drm/i915: Wrap drm_gem_object_reference in i915_gem_object_get
ex 6f10b421487b..3aa76d3608ea 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -190,7 +190,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, if (!can_release_pages(obj)) continue; - drm_gem_object_reference(&obj->base); + i915_gem_object_get(obj); /* For the unbound phase, this should be a no-op! */ list_for_each_entry_safe(vma, v, diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index ba16e044fac6..c41bf74f926e 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -622,8 +622,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj, obj->userptr.work = &work->work; obj->userptr.workers++; - work->obj = obj; - drm_gem_object_reference(&obj->base); + work->obj = i915_gem_object_get(obj); work->task = current; get_task_struct(work->task); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 77d320584478..84904a2d3fcd 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11649,7 +11649,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, /* Reference the objects for the scheduled work. */ drm_framebuffer_reference(work->old_fb); - drm_gem_object_reference(&obj->base); crtc->primary->fb = fb; update_state_fb(crtc->primary); @@ -11657,7 +11656,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, intel_fbc_pre_update(intel_crtc, intel_crtc->config, to_intel_plane_state(primary->state)); - work->pending_flip_obj = obj; + work->pending_flip_obj = i915_gem_object_get(obj); ret = i915_mutex_lock_interruptible(dev); if (ret) This last one moves the point at which the reference is taken past several function calls; is that intentional, or at least harmless? With that answered, then Reviewed-by: Dave Gordon ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 5/9] drm/i915: Rename drm_gem_object_unreference in preparation for lockless free
On 20/07/16 12:43, Joonas Lahtinen wrote: On ke, 2016-07-20 at 09:59 +0100, Chris Wilson wrote: diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 605c69658d2c..75a1496ceb6f 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -737,13 +737,15 @@ void intel_guc_fini(struct drm_device *dev) struct drm_i915_private *dev_priv = to_i915(dev); struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; + if (!guc_fw->guc_fw_obj) + return; + This is definitely not a mechanical change, should be split to separate patch (maybe outside of this series too). With that split out, Reviewed-by: Joonas Lahtinen Regards, Joonas mutex_lock(&dev->struct_mutex); direct_interrupts_to_host(dev_priv); i915_guc_submission_disable(dev_priv); i915_guc_submission_fini(dev_priv); - if (guc_fw->guc_fw_obj) - drm_gem_object_unreference(&guc_fw->guc_fw_obj->base); + i915_gem_object_put(guc_fw->guc_fw_obj); guc_fw->guc_fw_obj = NULL; mutex_unlock(&dev->struct_mutex); In my version of this set of transformations, I allowed the reference-releasing function (equivalent to i915_gem_object_put() here) to be called with NULL as a parameter; drm_gem_object_unreference() can already handle NULL, and since we now guarantee gem-obj == NULL <=> i915-obj == NULL, it's safe to pass it though. That allows quite a few simplifications in various bits of cleanup code. .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 8/9] drm/i915: Rename ring->virtual_start as ring->vaddr
On 20/07/16 10:00, Chris Wilson wrote: Just a different colour to better match virtual addresses elsewhere. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_irq.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 11 +-- drivers/gpu/drm/i915/intel_ringbuffer.h | 4 ++-- 3 files changed, 8 insertions(+), 9 deletions(-) Just mechanical transformations with no issues that I can see, so Reviewed-by: Dave Gordon diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 38d9ae90b613..0fef5cda4aca 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2899,7 +2899,7 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) */ head = I915_READ_HEAD(engine) & HEAD_ADDR; backwards = (INTEL_GEN(dev_priv) >= 8) ? 5 : 4; - vaddr = (void __iomem *) engine->buffer->virtual_start; + vaddr = (void __iomem *) engine->buffer->vaddr; for (i = backwards; i; --i) { /* diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index afed24abb596..b9638e19d304 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1972,14 +1972,14 @@ static int init_phys_status_page(struct intel_engine_cs *engine) void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf) { - GEM_BUG_ON(ringbuf->vma == NULL); - GEM_BUG_ON(ringbuf->virtual_start == NULL); + GEM_BUG_ON(!ringbuf->vma); + GEM_BUG_ON(!ringbuf->vaddr); if (HAS_LLC(ringbuf->obj->base.dev) && !ringbuf->obj->stolen) i915_gem_object_unpin_map(ringbuf->obj); else i915_vma_unpin_iomap(ringbuf->vma); - ringbuf->virtual_start = NULL; + ringbuf->vaddr = NULL; i915_gem_object_ggtt_unpin(ringbuf->obj); ringbuf->vma = NULL; @@ -2029,7 +2029,7 @@ int intel_pin_and_map_ringbuffer_obj(struct drm_i915_private *dev_priv, } } - ringbuf->virtual_start = addr; + ringbuf->vaddr = addr; ringbuf->vma = i915_gem_obj_to_ggtt(obj); return 0; @@ -2391,8 +2391,7 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) GEM_BUG_ON(ringbuf->tail + remain_actual > ringbuf->size); /* Fill the tail with MI_NOOP */ - memset(ringbuf->virtual_start + ringbuf->tail, - 0, remain_actual); + memset(ringbuf->vaddr + ringbuf->tail, 0, remain_actual); ringbuf->tail = 0; ringbuf->space -= remain_actual; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index d1b2d9bd89eb..05bab8bda63d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -84,7 +84,7 @@ struct intel_ring_hangcheck { struct intel_ringbuffer { struct drm_i915_gem_object *obj; - void *virtual_start; + void *vaddr; struct i915_vma *vma; struct intel_engine_cs *engine; @@ -457,7 +457,7 @@ int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); static inline void __intel_ringbuffer_emit(struct intel_ringbuffer *rb, u32 data) { - *(uint32_t *)(rb->virtual_start + rb->tail) = data; + *(uint32_t *)(rb->vaddr + rb->tail) = data; rb->tail += 4; } ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 12/13] drm/i915: Consolidate legacy semaphore initialization
On 20/07/16 10:54, Tvrtko Ursulin wrote: On 19/07/16 19:38, Dave Gordon wrote: On 15/07/16 14:13, Tvrtko Ursulin wrote: On 29/06/16 17:00, Chris Wilson wrote: On Wed, Jun 29, 2016 at 04:41:58PM +0100, Tvrtko Ursulin wrote: On 29/06/16 16:34, Chris Wilson wrote: On Wed, Jun 29, 2016 at 04:09:31PM +0100, Tvrtko Ursulin wrote: From: Tvrtko Ursulin Replace per-engine initialization with a common half-programatic, half-data driven code for ease of maintenance and compactness. Signed-off-by: Tvrtko Ursulin This is the biggest pill to swallow (since our 5x5 table is only sparsely populated), but it looks correct, and more importantly easier to read. Yeah I was out of ideas on how to improve it. Fresh mind needed to try and spot a pattern in how MI_SEMAPHORE_SYNC_* and GEN6_*SYNC map to bits and registers respectively, and write it as a function. It's actually a very simple cyclic function based on register offset = base + (signaler hw_id - waiter hw_id - 1) % num_rings. (The only real challenge is picking the direction.) commit c8c99b0f0dea1ced5d0e10cdb9143356cc16b484 Author: Ben Widawsky Date: Wed Sep 14 20:32:47 2011 -0700 drm/i915: Dumb down the semaphore logic While I think the previous code is correct, it was hard to follow and hard to debug. Since we already have a ring abstraction, might as well use it to handle the semaphore updates and compares. Doesn't seem to fit, or I just can't figure it out. Needs two functions to get rid of the table: f1(0, 1) = 2 f1(0, 2) = 0 f1(0, 3) = 2 f1(1, 0) = 0 f1(1, 2) = 2 f1(1, 3) = 1 f1(2, 0) = 2 f1(2, 1) = 0 f1(2, 3) = 0 f1(3, 0) = 1 f1(3, 1) = 1 f1(3, 2) = 1 and: f2(0, 1) = 1 f2(0, 2) = 0 f2(0, 3) = 1 f2(1, 0) = 0 f2(1, 2) = 1 f2(1, 3) = 2 f2(2, 0) = 1 f2(2, 1) = 0 f2(2, 3) = 0 f2(3, 0) = 2 f2(3, 1) = 2 f2(3, 2) = 2 A weekend math puzzle for someone? :) Regards, Tvrtko Here's the APL expression for (the transpose of) f2, with -1's filled in along the leading diagonal (you need ⎕io←0 so the ⍳-vectors are in origin 0) {¯1+(⍵≠⍳4)⍀(2|⍵)⌽(⌽⍣(1=⍵))1+⍳3}¨⍳4 ┌┬┬┬┐ │¯1 0 1 2│1 ¯1 0 2│0 1 ¯1 2│1 2 0 ¯1│ └┴┴┴┘ or transposed back so that the first argument is the row index and the second is the column index: ⍉↑{¯1+(⍵≠⍳4)⍀(2|⍵)⌽(⌽⍣(1=⍵))1+⍳3}¨⍳4 ¯1 1 0 1 0 ¯1 1 2 1 0 ¯1 0 2 2 2 ¯1 http://tryapl.org/?a=%u2349%u2191%7B%AF1+%28%u2375%u2260%u23734%29%u2340%282%7C%u2375%29%u233D%28%u233D%u2363%281%3D%u2375%29%291+%u23733%7D%A8%u23734&run :-C ! How to convert that to C ? :) f1 is trivially derived from this by the observation that f1 is just f2 with the 1's and 2's interchanged. Ah yes, nicely spotted. Regards, Tvrtko Assuming you don't care about the leading diagonal (x == y), then (⍵≠⍳4)⍀(2|⍵)⌽(⌽⍣(1=⍵)) translates into: int f2(unsigned int x, unsigned int y) { x -= x >= y; if (y == 1) x = 3 - x; x += y & 1; return x % 3; } y:x 0 1 2 3 0: 0 0 1 2 1: 1 1 0 2 2: 0 1 1 2 3: 1 2 0 0 Each line of C corresponds quite closely to one operation in the APL :) Although, in APL we tend to leave the data unchanged while shuffling it around into new shapes, whereas the C below does the equivalent things by changing the data (noting that it's all modulo-3 arithmetic). (⍵≠⍳4)⍀ inserts the leading diagonal, corresponding to the subtraction of x >= y (which removes the leading diagonal). ⌽⍣(1=⍵) reverses the sequence if y==1; in C, that's the 3-x (2|⍵)⌽ rotates the sequence by 1 if y is odd; that's the += and the final % ensures that the result is 0-2. .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] ✗ Ro.CI.BAT: failure for series starting with [1/2] drm/i915/gen9: Add WaInPlaceDecompressionHang
On 20/07/16 13:50, Chris Wilson wrote: On Wed, Jul 20, 2016 at 03:49:00PM +0300, Joonas Lahtinen wrote: On ke, 2016-07-20 at 12:05 +, Patchwork wrote: == Series Details == Series: series starting with [1/2] drm/i915/gen9: Add WaInPlaceDecompressionHang URL : https://patchwork.freedesktop.org/series/10088/ State : failure == Summary == Series 10088v1 Series without cover letter http://patchwork.freedesktop.org/api/1.0/series/10088/revisions/1/mbox Test gem_exec_suspend: Subgroup basic-s3: pass -> INCOMPLETE (fi-hsw-i7-4770k) Test gem_sync: Subgroup basic-store-each: pass -> FAIL (ro-bdw-i7-5600u) Arun, do you mind giving these a look, Mika left for a vacation. Neither related to this pair of patches if that's what you mean. -Chris The latter is Bug 96974 - [BAT BDW] gem_sync / basic-store-each fails sporadically frequently seen on that specific machine, but not on any other BDW. .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 03/18] drm/i915: Rename backpointer from intel_ringbuffer to intel_engine_cs
On 20/07/16 14:11, Chris Wilson wrote: Having ringbuf->ring point to an engine is confusing, so rename it once again to ring->engine. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_ringbuffer.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) Doesn't do what it says in the commit message (which sounded like a good idea). This patch actually just renames the function intel_init_ring_buffer() to intel_init_engine(). However /most/ of the code in that function is to do with initialising a ringbuffer! .Dave. diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ac51e4885046..3cfbfe40f6e8 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2171,7 +2171,7 @@ static void intel_ring_context_unpin(struct i915_gem_context *ctx, i915_gem_context_put(ctx); } -static int intel_init_ring_buffer(struct intel_engine_cs *engine) +static int intel_init_engine(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; struct intel_ringbuffer *ringbuf; @@ -2868,7 +2868,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) engine->init_hw = init_render_ring; engine->cleanup = render_ring_cleanup; - ret = intel_init_ring_buffer(engine); + ret = intel_init_engine(engine); if (ret) return ret; @@ -2907,7 +2907,7 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) engine->irq_enable_mask = I915_BSD_USER_INTERRUPT; } - return intel_init_ring_buffer(engine); + return intel_init_engine(engine); } /** @@ -2921,7 +2921,7 @@ int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine) engine->flush = gen6_bsd_ring_flush; - return intel_init_ring_buffer(engine); + return intel_init_engine(engine); } int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) @@ -2934,7 +2934,7 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) if (INTEL_GEN(dev_priv) < 8) engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; - return intel_init_ring_buffer(engine); + return intel_init_engine(engine); } int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) @@ -2951,7 +2951,7 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) engine->irq_disable = hsw_vebox_irq_disable; } - return intel_init_ring_buffer(engine); + return intel_init_engine(engine); } int ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 02/18] drm/i915: Rename request->ringbuf to request->ring
On 20/07/16 14:11, Chris Wilson wrote: Now that we have disambuigated ring and engine, we can use the clearer and more consistent name for the intel_ringbuffer pointer in the request. Signed-off-by: Chris Wilson You missed a few instances of 'ring' meaning engine: i915_gem_execbuffer.c: struct intel_engine_cs **ring) intel_mocs.h:int intel_mocs_init_engine(struct intel_engine_cs *ring); intel_ringbuffer.c:gen5_seqno_barrier(struct intel_engine_cs *ring) intel_ringbuffer.h: void(*irq_enable)(struct intel_engine_cs *ring); intel_ringbuffer.h: void(*irq_disable)(struct intel_engine_cs *ring); intel_ringbuffer.h: int (*init_hw)(struct intel_engine_cs *ring); intel_ringbuffer.h: void (*irq_seqno_barrier)(struct intel_engine_cs *ring); intel_ringbuffer.h: void(*cleanup)(struct intel_engine_cs *ring); I think we have to purge every last trace of this usage before using 'ring' as shorthand for 'ringbuf[fer]'. .Dave. --- drivers/gpu/drm/i915/i915_gem_context.c| 4 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +- drivers/gpu/drm/i915/i915_gem_gtt.c| 6 +- drivers/gpu/drm/i915/i915_gem_request.c| 16 +++--- drivers/gpu/drm/i915/i915_gem_request.h| 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 20 +++ drivers/gpu/drm/i915/intel_display.c | 10 ++-- drivers/gpu/drm/i915/intel_lrc.c | 57 +- drivers/gpu/drm/i915/intel_mocs.c | 36 ++-- drivers/gpu/drm/i915/intel_overlay.c | 8 +-- drivers/gpu/drm/i915/intel_ringbuffer.c| 92 +++--- 11 files changed, 126 insertions(+), 129 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index b6d10bd763a0..16138c4ff7db 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -552,7 +552,7 @@ static inline int mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) { struct drm_i915_private *dev_priv = req->i915; - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; u32 flags = hw_flags | MI_MM_SPACE_GTT; const int num_rings = /* Use an extended w/a on ivb+ if signalling from other rings */ @@ -654,7 +654,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) static int remap_l3(struct drm_i915_gem_request *req, int slice) { u32 *remap_info = req->i915->l3_parity.remap_info[slice]; - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; int i, ret; if (!remap_info) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index e2c4d99a1e7f..501a1751d432 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1173,7 +1173,7 @@ i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params) static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; int ret, i; if (!IS_GEN7(req->i915) || req->engine->id != RCS) { @@ -1303,7 +1303,7 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, if (params->engine->id == RCS && instp_mode != dev_priv->relative_constants_mode) { - struct intel_ringbuffer *ring = params->request->ringbuf; + struct intel_ringbuffer *ring = params->request->ring; ret = intel_ring_begin(params->request, 4); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index abc439be2049..a48329baf432 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -669,7 +669,7 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, unsigned entry, dma_addr_t addr) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; int ret; BUG_ON(entry >= 4); @@ -1660,7 +1660,7 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; int ret; /* NB: TLBs must be flushed and invalidated before a switch */ @@ -1688,7 +1688,7 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring
Re: [Intel-gfx] [PATCH 02/18] drm/i915: Rename request->ringbuf to request->ring
On 20/07/16 15:12, Dave Gordon wrote: On 20/07/16 14:11, Chris Wilson wrote: Now that we have disambuigated ring and engine, we can use the clearer and more consistent name for the intel_ringbuffer pointer in the request. Signed-off-by: Chris Wilson You missed a few instances of 'ring' meaning engine: i915_gem_execbuffer.c: struct intel_engine_cs **ring) intel_mocs.h:int intel_mocs_init_engine(struct intel_engine_cs *ring); intel_ringbuffer.c:gen5_seqno_barrier(struct intel_engine_cs *ring) intel_ringbuffer.h:void(*irq_enable)(struct intel_engine_cs *ring); intel_ringbuffer.h:void(*irq_disable)(struct intel_engine_cs *ring); intel_ringbuffer.h:int(*init_hw)(struct intel_engine_cs *ring); intel_ringbuffer.h:void(*irq_seqno_barrier)(struct intel_engine_cs *ring); intel_ringbuffer.h:void(*cleanup)(struct intel_engine_cs *ring); I think we have to purge every last trace of this usage before using 'ring' as shorthand for 'ringbuf[fer]'. .Dave. Oh yes, also there are lots of other things called 'ring' which aren't ringbuffers, such as an engine: #define RING_ELSP(ring) _MMIO((ring)->mmio_base + 0x230) or an engine id: static i915_reg_t mocs_register(enum intel_engine_id ring, int index) i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) int ring = req->engine->id; or a different structure entirely: struct drm_i915_error_ring *ring = &error->ring[ring_idx]; I could probably write some Cocci to find-and-rename all the things called 'ring' that weren't ringbuffers, but it would be easier not to overload the identifier with a host of different meanings in the first place. So I think adding any more instances of things called 'ring' should wait until the name has no other meanings, if ringbuffers are the thing you want it to unambiguously identify. .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] ✗ Ro.CI.BAT: failure for series starting with [CI,1/9] drm/i915: Rename request reference/unreference to get/put
On 20/07/16 14:02, Patchwork wrote: == Series Details == Series: series starting with [CI,1/9] drm/i915: Rename request reference/unreference to get/put URL : https://patchwork.freedesktop.org/series/10089/ State : failure == Summary == Series 10089v1 Series without cover letter http://patchwork.freedesktop.org/api/1.0/series/10089/revisions/1/mbox Test gem_sync: Subgroup basic-store-each: pass -> FAIL (ro-bdw-i7-5600u) That's Bug 96974 - [BAT BDW] gem_sync / basic-store-each fails sporadically It claims that some interrupts were missed during the test but it doesn't happen on any other BDW (or any other machine at all). Perhaps we need another one "exactly the same" to see whether it's at all reproducible anywhere but that one system? .Dave. fi-hsw-i7-4770k total:244 pass:216 dwarn:0 dfail:0 fail:8 skip:20 fi-kbl-qkkr total:244 pass:180 dwarn:28 dfail:1 fail:8 skip:27 fi-skl-i5-6260u total:244 pass:224 dwarn:0 dfail:0 fail:8 skip:12 fi-skl-i7-6700k total:244 pass:210 dwarn:0 dfail:0 fail:8 skip:26 fi-snb-i7-2600 total:244 pass:196 dwarn:0 dfail:0 fail:8 skip:40 ro-bdw-i5-5250u total:244 pass:219 dwarn:4 dfail:0 fail:8 skip:13 ro-bdw-i7-5600u total:244 pass:203 dwarn:0 dfail:0 fail:9 skip:32 ro-bsw-n3050 total:218 pass:173 dwarn:0 dfail:0 fail:2 skip:42 ro-byt-n2820 total:244 pass:197 dwarn:0 dfail:0 fail:9 skip:38 ro-hsw-i3-4010u total:244 pass:212 dwarn:0 dfail:0 fail:8 skip:24 ro-hsw-i7-4770r total:244 pass:212 dwarn:0 dfail:0 fail:8 skip:24 ro-ilk-i7-620lm total:244 pass:172 dwarn:0 dfail:0 fail:9 skip:63 ro-ilk1-i5-650 total:239 pass:172 dwarn:0 dfail:0 fail:9 skip:58 ro-ivb-i7-3770 total:244 pass:203 dwarn:0 dfail:0 fail:8 skip:33 ro-skl3-i5-6260u total:244 pass:224 dwarn:0 dfail:0 fail:8 skip:12 ro-snb-i7-2620M total:244 pass:193 dwarn:0 dfail:0 fail:9 skip:42 ro-bdw-i7-5557U failed to connect after reboot Results at /archive/results/CI_IGT_test/RO_Patchwork_1543/ cafe8a2 drm-intel-nightly: 2016y-07m-20d-08h-45m-03s UTC integration manifest 1f14a4a drm/i915: Convert i915_semaphores_is_enabled over to early sanitize b2c39d7 drm/i915: Rename ring->virtual_start as ring->vaddr 170ddd1 drm/i915: Treat ringbuffer writes as write to normal memory 5118b49 drm/i915: Rename drm_gem_object_unreference_unlocked in preparation for lockless free 9e842dc drm/i915: Rename drm_gem_object_unreference in preparation for lockless free cc3c3c5 drm/i915: Wrap drm_gem_object_reference in i915_gem_object_get dd5b89e drm/i915: Wrap drm_gem_object_lookup in i915_gem_object_lookup 9794f2d drm/i915: Rename i915_gem_context_reference/unreference() e08379e drm/i915: Rename request reference/unreference to get/put ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 12/13] drm/i915: Consolidate legacy semaphore initialization
On 20/07/16 17:07, Tvrtko Ursulin wrote: On 20/07/16 13:50, Dave Gordon wrote: On 20/07/16 10:54, Tvrtko Ursulin wrote: On 19/07/16 19:38, Dave Gordon wrote: On 15/07/16 14:13, Tvrtko Ursulin wrote: On 29/06/16 17:00, Chris Wilson wrote: On Wed, Jun 29, 2016 at 04:41:58PM +0100, Tvrtko Ursulin wrote: On 29/06/16 16:34, Chris Wilson wrote: On Wed, Jun 29, 2016 at 04:09:31PM +0100, Tvrtko Ursulin wrote: From: Tvrtko Ursulin Replace per-engine initialization with a common half-programatic, half-data driven code for ease of maintenance and compactness. Signed-off-by: Tvrtko Ursulin This is the biggest pill to swallow (since our 5x5 table is only sparsely populated), but it looks correct, and more importantly easier to read. Yeah I was out of ideas on how to improve it. Fresh mind needed to try and spot a pattern in how MI_SEMAPHORE_SYNC_* and GEN6_*SYNC map to bits and registers respectively, and write it as a function. It's actually a very simple cyclic function based on register offset = base + (signaler hw_id - waiter hw_id - 1) % num_rings. (The only real challenge is picking the direction.) commit c8c99b0f0dea1ced5d0e10cdb9143356cc16b484 Author: Ben Widawsky Date: Wed Sep 14 20:32:47 2011 -0700 drm/i915: Dumb down the semaphore logic While I think the previous code is correct, it was hard to follow and hard to debug. Since we already have a ring abstraction, might as well use it to handle the semaphore updates and compares. Doesn't seem to fit, or I just can't figure it out. Needs two functions to get rid of the table: f1(0, 1) = 2 f1(0, 2) = 0 f1(0, 3) = 2 f1(1, 0) = 0 f1(1, 2) = 2 f1(1, 3) = 1 f1(2, 0) = 2 f1(2, 1) = 0 f1(2, 3) = 0 f1(3, 0) = 1 f1(3, 1) = 1 f1(3, 2) = 1 and: f2(0, 1) = 1 f2(0, 2) = 0 f2(0, 3) = 1 f2(1, 0) = 0 f2(1, 2) = 1 f2(1, 3) = 2 f2(2, 0) = 1 f2(2, 1) = 0 f2(2, 3) = 0 f2(3, 0) = 2 f2(3, 1) = 2 f2(3, 2) = 2 A weekend math puzzle for someone? :) Regards, Tvrtko Here's the APL expression for (the transpose of) f2, with -1's filled in along the leading diagonal (you need ⎕io←0 so the ⍳-vectors are in origin 0) {¯1+(⍵≠⍳4)⍀(2|⍵)⌽(⌽⍣(1=⍵))1+⍳3}¨⍳4 ┌┬┬┬┐ │¯1 0 1 2│1 ¯1 0 2│0 1 ¯1 2│1 2 0 ¯1│ └┴┴┴┘ or transposed back so that the first argument is the row index and the second is the column index: ⍉↑{¯1+(⍵≠⍳4)⍀(2|⍵)⌽(⌽⍣(1=⍵))1+⍳3}¨⍳4 ¯1 1 0 1 0 ¯1 1 2 1 0 ¯1 0 2 2 2 ¯1 http://tryapl.org/?a=%u2349%u2191%7B%AF1+%28%u2375%u2260%u23734%29%u2340%282%7C%u2375%29%u233D%28%u233D%u2363%281%3D%u2375%29%291+%u23733%7D%A8%u23734&run :-C ! How to convert that to C ? :) f1 is trivially derived from this by the observation that f1 is just f2 with the 1's and 2's interchanged. Ah yes, nicely spotted. Regards, Tvrtko Assuming you don't care about the leading diagonal (x == y), then (⍵≠⍳4)⍀(2|⍵)⌽(⌽⍣(1=⍵)) translates into: int f2(unsigned int x, unsigned int y) { x -= x >= y; if (y == 1) x = 3 - x; x += y & 1; return x % 3; } y:x 0 1 2 3 0: 0 0 1 2 1: 1 1 0 2 2: 0 1 1 2 3: 1 2 0 0 Each line of C corresponds quite closely to one operation in the APL :) Although, in APL we tend to leave the data unchanged while shuffling it around into new shapes, whereas the C below does the equivalent things by changing the data (noting that it's all modulo-3 arithmetic). (⍵≠⍳4)⍀ inserts the leading diagonal, corresponding to the subtraction of x >= y (which removes the leading diagonal). ⌽⍣(1=⍵) reverses the sequence if y==1; in C, that's the 3-x (2|⍵)⌽ rotates the sequence by 1 if y is odd; that's the += and the final % ensures that the result is 0-2. I was hoping for a solution which does not include conditionals, someone led me to believe it is possible! :) But thanks, your transformation really works. I've sent a patch implementing it to trybot for now. Regards, Tvrtko You can write it like this if you don't want any visible conditionals :) unsigned int f2(unsigned int x, unsigned int y) { x -= x >= y; x += y & 1; x ^= y & x >> y; /* WTF? */ return x % 3; } But I think that's even more obscure. .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] Reduce usage of the name 'ring' for engines et al
Chris Wilson is trying to convert 'ringbuffer' to 'ring', but at present there's rather too much legacy code using 'ring' for various other things, usually engines or engine-ids. This patchset converts some of them (but not as yet the gpu_error or trace code). Chris: what is your prefered name for a local holding an engine id? 'engine_id' is obvious, but seems overly long and clunky. Anything better? ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/3] drm/i915: rename macro parameter(ring) to (engine)
'ring' is an old deprecated term for a GPU engine. Here we make the terminology more consistent by renaming the 'ring' parameter of lots of macros that calculate addresses within the MMIO space of an engine. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_reg.h | 14 +++--- drivers/gpu/drm/i915/intel_lrc.h| 16 drivers/gpu/drm/i915/intel_ringbuffer.h | 24 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8bfde75..559c9d7 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -186,13 +186,13 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN9_GRDOM_GUC(1 << 5) #define GEN8_GRDOM_MEDIA2 (1 << 7) -#define RING_PP_DIR_BASE(ring) _MMIO((ring)->mmio_base+0x228) -#define RING_PP_DIR_BASE_READ(ring)_MMIO((ring)->mmio_base+0x518) -#define RING_PP_DIR_DCLV(ring) _MMIO((ring)->mmio_base+0x220) +#define RING_PP_DIR_BASE(engine) _MMIO((engine)->mmio_base+0x228) +#define RING_PP_DIR_BASE_READ(engine) _MMIO((engine)->mmio_base+0x518) +#define RING_PP_DIR_DCLV(engine) _MMIO((engine)->mmio_base+0x220) #define PP_DIR_DCLV_2G 0x -#define GEN8_RING_PDP_UDW(ring, n) _MMIO((ring)->mmio_base+0x270 + (n) * 8 + 4) -#define GEN8_RING_PDP_LDW(ring, n) _MMIO((ring)->mmio_base+0x270 + (n) * 8) +#define GEN8_RING_PDP_UDW(engine, n) _MMIO((engine)->mmio_base+0x270 + (n) * 8 + 4) +#define GEN8_RING_PDP_LDW(engine, n) _MMIO((engine)->mmio_base+0x270 + (n) * 8) #define GEN8_R_PWR_CLK_STATE _MMIO(0x20C8) #define GEN8_RPCS_ENABLE (1 << 31) @@ -1647,7 +1647,7 @@ enum skl_disp_power_wells { #define ARB_MODE_BWGTLB_DISABLE (1<<9) #define ARB_MODE_SWIZZLE_BDW (1<<1) #define RENDER_HWS_PGA_GEN7_MMIO(0x04080) -#define RING_FAULT_REG(ring) _MMIO(0x4094 + 0x100*(ring)->id) +#define RING_FAULT_REG(engine) _MMIO(0x4094 + 0x100*(engine)->id) #define RING_FAULT_GTTSEL_MASK (1<<11) #define RING_FAULT_SRCID(x) (((x) >> 3) & 0xff) #define RING_FAULT_FAULT_TYPE(x) (((x) >> 1) & 0x3) @@ -1842,7 +1842,7 @@ enum skl_disp_power_wells { #define GFX_MODE _MMIO(0x2520) #define GFX_MODE_GEN7 _MMIO(0x229c) -#define RING_MODE_GEN7(ring) _MMIO((ring)->mmio_base+0x29c) +#define RING_MODE_GEN7(engine) _MMIO((engine)->mmio_base+0x29c) #define GFX_RUN_LIST_ENABLE (1<<15) #define GFX_INTERRUPT_STEERING (1<<14) #define GFX_TLB_INVALIDATE_EXPLICIT (1<<13) diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index aa3ac02..3828730 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -29,17 +29,17 @@ #define GEN8_LR_CONTEXT_ALIGN 4096 /* Execlists regs */ -#define RING_ELSP(ring)_MMIO((ring)->mmio_base + 0x230) -#define RING_EXECLIST_STATUS_LO(ring) _MMIO((ring)->mmio_base + 0x234) -#define RING_EXECLIST_STATUS_HI(ring) _MMIO((ring)->mmio_base + 0x234 + 4) -#define RING_CONTEXT_CONTROL(ring) _MMIO((ring)->mmio_base + 0x244) +#define RING_ELSP(engine) _MMIO((engine)->mmio_base + 0x230) +#define RING_EXECLIST_STATUS_LO(engine) _MMIO((engine)->mmio_base + 0x234) +#define RING_EXECLIST_STATUS_HI(engine) _MMIO((engine)->mmio_base + 0x234 + 4) +#define RING_CONTEXT_CONTROL(engine) _MMIO((engine)->mmio_base + 0x244) #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH (1 << 3) #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) #define CTX_CTRL_RS_CTX_ENABLE(1 << 1) -#define RING_CONTEXT_STATUS_BUF_BASE(ring) _MMIO((ring)->mmio_base + 0x370) -#define RING_CONTEXT_STATUS_BUF_LO(ring, i)_MMIO((ring)->mmio_base + 0x370 + (i) * 8) -#define RING_CONTEXT_STATUS_BUF_HI(ring, i)_MMIO((ring)->mmio_base + 0x370 + (i) * 8 + 4) -#define RING_CONTEXT_STATUS_PTR(ring) _MMIO((ring)->mmio_base + 0x3a0) +#define RING_CONTEXT_STATUS_BUF_BASE(engine) _MMIO((engine)->mmio_base + 0x370) +#define RING_CONTEXT_STATUS_BUF_LO(engine, i) _MMIO((engine)->mmio_base + 0x370 + (i) * 8) +#define RING_CONTEXT_STATUS_BUF_HI(engine, i) _MMIO((engine)->mmio_base + 0x370 + (i) * 8 + 4) +#define RING_CONTEXT_STATUS_PTR(engine) _MMIO((engine)->mmio_base + 0x3a0) /* The docs specify that the write pointer wraps around after 5h, "After status * is written out to the last available status QW at offset 5h, this pointer diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 05bab8b.
[Intel-gfx] [PATCH 3/3] drm/i915: rename & update eb_select_ring()
'ring' is an old deprecated term for a GPU engine, so we're trying to phase out all such terminology. eb_select_ring() not only has 'ring' (meaning engine) in its name, but it has an ugly calling convention whereby it returns an errno and stores a pointer-to-engine indirectly through an output parameter. As there is only one error it ever returns (-EINVAL), we can make it return the pointer directly, and have the caller pass back the error code -EINVAL if the pointer result is NULL. Thus we can replace - ret = eb_select_ring(dev_priv, file, args, &engine); - if (ret) - return ret; with + engine = eb_select_engine(dev_priv, file, args); + if (!engine) + return -EINVAL; for increased clarity and maybe save a few cycles too. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 32 +++--- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 6482ec2..f8d8ae3 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1380,24 +1380,24 @@ static bool only_mappable_for_reloc(unsigned int flags) [I915_EXEC_VEBOX] = VECS }; -static int -eb_select_ring(struct drm_i915_private *dev_priv, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args, - struct intel_engine_cs **ring) +static struct intel_engine_cs * +eb_select_engine(struct drm_i915_private *dev_priv, +struct drm_file *file, +struct drm_i915_gem_execbuffer2 *args) { unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; + struct intel_engine_cs *engine; if (user_ring_id > I915_USER_RINGS) { DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); - return -EINVAL; + return NULL; } if ((user_ring_id != I915_EXEC_BSD) && ((args->flags & I915_EXEC_BSD_MASK) != 0)) { DRM_DEBUG("execbuf with non bsd ring but with invalid " "bsd dispatch flags: %d\n", (int)(args->flags)); - return -EINVAL; + return NULL; } if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) { @@ -1412,20 +1412,20 @@ static bool only_mappable_for_reloc(unsigned int flags) } else { DRM_DEBUG("execbuf with unknown bsd ring: %u\n", bsd_idx); - return -EINVAL; + return NULL; } - *ring = &dev_priv->engine[_VCS(bsd_idx)]; + engine = &dev_priv->engine[_VCS(bsd_idx)]; } else { - *ring = &dev_priv->engine[user_ring_map[user_ring_id]]; + engine = &dev_priv->engine[user_ring_map[user_ring_id]]; } - if (!intel_engine_initialized(*ring)) { + if (!intel_engine_initialized(engine)) { DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id); - return -EINVAL; + return NULL; } - return 0; + return engine; } static int @@ -1467,9 +1467,9 @@ static bool only_mappable_for_reloc(unsigned int flags) if (args->flags & I915_EXEC_IS_PINNED) dispatch_flags |= I915_DISPATCH_PINNED; - ret = eb_select_ring(dev_priv, file, args, &engine); - if (ret) - return ret; + engine = eb_select_engine(dev_priv, file, args); + if (!engine) + return -EINVAL; if (args->buffer_count < 1) { DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/3] drm/i915: rename 'ring' where it refers to an engine or engine_id
'ring' is an old deprecated term for a GPU engine. Chris Wilson wants to use the name for what is currently known as an intel_ringbuffer, but it will be dreadfully confusing if some rings are ringbuffers but other rings are still engines. So this patch changes the names of a bunch of parameters called 'ring' to either 'engine' or 'engine_id' according to what they actually are. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/intel_mocs.c | 6 +++--- drivers/gpu/drm/i915/intel_mocs.h | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 12 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 2280c32..bd46968 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -204,9 +204,9 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, return result; } -static i915_reg_t mocs_register(enum intel_engine_id ring, int index) +static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) { - switch (ring) { + switch (engine_id) { case RCS: return GEN9_GFX_MOCS(index); case VCS: @@ -218,7 +218,7 @@ static i915_reg_t mocs_register(enum intel_engine_id ring, int index) case VCS2: return GEN9_MFX1_MOCS(index); default: - MISSING_CASE(ring); + MISSING_CASE(engine_id); return INVALID_MMIO_REG; } } diff --git a/drivers/gpu/drm/i915/intel_mocs.h b/drivers/gpu/drm/i915/intel_mocs.h index 4640299..a8bd9f7 100644 --- a/drivers/gpu/drm/i915/intel_mocs.h +++ b/drivers/gpu/drm/i915/intel_mocs.h @@ -54,6 +54,6 @@ int intel_rcs_context_init_mocs(struct drm_i915_gem_request *req); void intel_mocs_init_l3cc_table(struct drm_device *dev); -int intel_mocs_init_engine(struct intel_engine_cs *ring); +int intel_mocs_init_engine(struct intel_engine_cs *engine); #endif diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b844e69..a6f7db2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1581,7 +1581,7 @@ static inline bool i915_gem_has_seqno_wrapped(struct drm_i915_private *dev_priv, } static void -gen5_seqno_barrier(struct intel_engine_cs *ring) +gen5_seqno_barrier(struct intel_engine_cs *engine) { /* MI_STORE are internally buffered by the GPU and not flushed * either by MI_FLUSH or SyncFlush or any other combination of diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 4671fb8..0f80194 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -197,14 +197,14 @@ struct intel_engine_cs { u32 irq_keep_mask; /* always keep these interrupts */ u32 irq_enable_mask; /* bitmask to enable ring interrupt */ - void(*irq_enable)(struct intel_engine_cs *ring); - void(*irq_disable)(struct intel_engine_cs *ring); + void(*irq_enable)(struct intel_engine_cs *engine); + void(*irq_disable)(struct intel_engine_cs *engine); - int (*init_hw)(struct intel_engine_cs *ring); + int (*init_hw)(struct intel_engine_cs *engine); int (*init_context)(struct drm_i915_gem_request *req); - void(*write_tail)(struct intel_engine_cs *ring, + void(*write_tail)(struct intel_engine_cs *engine, u32 value); int __must_check (*flush)(struct drm_i915_gem_request *req, u32 invalidate_domains, @@ -216,14 +216,14 @@ struct intel_engine_cs { * seen value is good enough. Note that the seqno will always be * monotonic, even if not coherent. */ - void(*irq_seqno_barrier)(struct intel_engine_cs *ring); + void(*irq_seqno_barrier)(struct intel_engine_cs *engine); int (*dispatch_execbuffer)(struct drm_i915_gem_request *req, u64 offset, u32 length, unsigned dispatch_flags); #define I915_DISPATCH_SECURE 0x1 #define I915_DISPATCH_PINNED 0x2 #define I915_DISPATCH_RS 0x4 - void(*cleanup)(struct intel_engine_cs *ring); + void(*cleanup)(struct intel_engine_cs *engine); /* GEN8 signal/wait table - never trust comments! *signal to signal tosignal to signal to signal to -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Replace gen6 semaphore signal table with code
On 21/07/16 10:31, Tvrtko Ursulin wrote: From: Tvrtko Ursulin Static table wastes space for invalid combinations and engines which are not supported by Gen6 (legacy semaphores). Replace it with a function devised by Dave Gordon. I have verified that it generates the same mappings between mbox selectors and signalling registers. Signed-off-by: Tvrtko Ursulin Cc: Dave Gordon Cc: Chris Wilson --- drivers/gpu/drm/i915/i915_reg.h | 7 ++--- drivers/gpu/drm/i915/intel_engine_cs.c | 48 + drivers/gpu/drm/i915/intel_ringbuffer.c | 40 ++- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +++ 4 files changed, 57 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8bfde75789f6..28aa876e2d87 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1604,9 +1604,10 @@ enum skl_disp_power_wells { #define RING_HEAD(base) _MMIO((base)+0x34) #define RING_START(base) _MMIO((base)+0x38) #define RING_CTL(base)_MMIO((base)+0x3c) -#define RING_SYNC_0(base) _MMIO((base)+0x40) -#define RING_SYNC_1(base) _MMIO((base)+0x44) -#define RING_SYNC_2(base) _MMIO((base)+0x48) +#define RING_SYNC(base, n) _MMIO((base) + 0x40 + (n) * 4) +#define RING_SYNC_0(base) RING_SYNC(base, 0) +#define RING_SYNC_1(base) RING_SYNC(base, 1) +#define RING_SYNC_2(base) RING_SYNC(base, 2) #define GEN6_RVSYNC (RING_SYNC_0(RENDER_RING_BASE)) #define GEN6_RBSYNC (RING_SYNC_1(RENDER_RING_BASE)) #define GEN6_RVESYNC (RING_SYNC_2(RENDER_RING_BASE)) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index f4a35ec78481..9837fddae259 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -209,3 +209,51 @@ int intel_engine_init_common(struct intel_engine_cs *engine) return i915_cmd_parser_init_ring(engine); } + +#define I915_NUM_GEN6_SEMAPHORE_ENGINES (4) + +static int gen6_sem_f(unsigned int x, unsigned int y) +{ + if (x == y) + return -1; + + x = intel_engines[x].guc_id; + y = intel_engines[y].guc_id; You could have the caller pass two engine pointers rather than converting passing indexes that aren't actually the values needed. Or you could have the caller pass the 'hw_id' (probably better than 'guc_id') directly. + + if (x >= I915_NUM_GEN6_SEMAPHORE_ENGINES || + y >= I915_NUM_GEN6_SEMAPHORE_ENGINES) + return -1; And maybe move all the error checking out, so this function *just* contains the tricksy calculation below? + + x -= x >= y; + if (y == 1) + x = 3 - x; + x += y & 1; + return x % 3; +} + +u32 gen6_wait_mbox(enum intel_engine_id x, enum intel_engine_id y) +{ + int r; + + r = gen6_sem_f(x, y); + if (r < 0) + return MI_SEMAPHORE_SYNC_INVALID; + + if (r == 1) + r = 2; + else if (r == 2) + r = 1; BTW this is ((-r) % 3). Since gen6_sem_f() already does a "% 3" at the end you might want to pass it a flag and let it do the negation when required. int gen6_sem_f2(unsigned int hw_x, unsigned int hw_y, bool wait) { hw_x -= hw_x >= hw_y; hw_x += hw_y & 1; hw_x ^= hw_y & hw_x >> hw_y; /* WTF? */ return (wait ? -hw_x : hw_x) % 3; } .Dave. + + return r << 16; +} + +i915_reg_t gen6_signal_reg(enum intel_engine_id x, enum intel_engine_id y) +{ + int r; + + r = gen6_sem_f(x, y); + if (r < 0) + return GEN6_NOSYNC; + + return RING_SYNC(intel_engines[y].mmio_base, r); +} diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b844e6984ae7..049527d381de 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2731,44 +2731,8 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, * sema between VCS2 and RCS later. */ for (i = 0; i < I915_NUM_ENGINES; i++) { - static const struct { - u32 wait_mbox; - i915_reg_t mbox_reg; - } sem_data[I915_NUM_ENGINES][I915_NUM_ENGINES] = { - [RCS] = { - [VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC }, - [BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC }, - [VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC }, - }, - [VCS] = { -
[Intel-gfx] [PATCH] drm/i915: use i915_gem_object_put_unlocked() after releasing mutex
The exit path in intel_overlay_put_image_ioctl() first unlocks the struct_mutex, then drops its reference to 'new_bo' by calling i915_gem_object_put(). As it isn't holding the mutex at this point, this should be i915_gem_object_put_unlocked(). This was previously correct but got splatted in the recent s/drm_gem_object_unreference/i915_gem_object_put/ where the _unlocked suffix was lost in this one case. Also fixes a whitespace glitch introduced in the same commit. Fixes: f8c417cd drm/i915: Rename drm_gem_object_unreference in preparation ... Signed-off-by: Dave Gordon Cc: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_evict.c | 3 +-- drivers/gpu/drm/i915/intel_overlay.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 81f7b43..6e09e11 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -196,8 +196,7 @@ vma = list_first_entry(&eviction_list, struct i915_vma, exec_list); - - obj = vma->obj; + obj = vma->obj; list_del_init(&vma->exec_list); if (ret == 0) ret = i915_vma_unbind(vma); diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 8654a32..c10ce36 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -1219,7 +1219,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, out_unlock: mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); - i915_gem_object_put(new_bo); + i915_gem_object_put_unlocked(new_bo); out_free: kfree(params); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 07/23] drm/i915: Move HAS_GUC_UCODE definition to platform definition
On 21/07/16 11:38, Tvrtko Ursulin wrote: On 20/07/16 22:07, Rodrigo Vivi wrote: please kill this _ucode variation that is just a alias to guc instead Not sure, it was added with a particular goal. Cc Dave in case he wants to comment. Regards, Tvrtko The comment is already in the source code, just above the lines that this patch changes. .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2] drm/i915: use i915_gem_object_put_unlocked() after releasing mutex
The exit path in intel_overlay_put_image_ioctl() first unlocks the struct_mutex, then drops its reference to 'new_bo' by calling i915_gem_object_put(). As it isn't holding the mutex at this point, this should be i915_gem_object_put_unlocked(). This was previously correct but got splatted in the recent s/drm_gem_object_unreference/i915_gem_object_put/ where the _unlocked suffix was lost in this one case. v2: don't bother fixing whitespace glitch [Chris Wilson] Chris can do it next time he touches gem_evict.c ;) Fixes: f8c417cd drm/i915: Rename drm_gem_object_unreference in preparation ... Signed-off-by: Dave Gordon Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_overlay.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 8654a32..c10ce36 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -1219,7 +1219,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, out_unlock: mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); - i915_gem_object_put(new_bo); + i915_gem_object_put_unlocked(new_bo); out_free: kfree(params); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 0/6] drm/i915/guc: use one GuC client per GPU engine
When using a single GuC client for multiple engines, the i915 driver has to merge all work items into a single work queue, which the GuC firmware then demultiplexes into separate submission queues per engine. In theory, this could lead to the single queue becoming a bottleneck in which an excess of outstanding work for one or more engines might prevent work for an idle engine reaching the hardware. To reduce this risk, we can create one GuC client per engine. Each will have its own workqueue, to be used only for work targeting a single engine, so there will be no cross-engine contention for workqueue slots. Dave Gordon (6): drm/i915/guc: doorbell reset should avoid used doorbells drm/i915/guc: refactor guc_init_doorbell_hw() drm/i915/guc: use a separate GuC client for each engine drm/i915/guc: add engine mask to GuC client & pass to GuC drm/i915/guc: use for_each_engine_id() where appropriate drm/i915/guc: re-optimise i915_guc_client layout drivers/gpu/drm/i915/i915_debugfs.c| 43 +++ drivers/gpu/drm/i915/i915_guc_submission.c | 118 ++--- drivers/gpu/drm/i915/intel_guc.h | 11 ++- 3 files changed, 107 insertions(+), 65 deletions(-) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 1/6] drm/i915/guc: doorbell reset should avoid used doorbells
guc_init_doorbell_hw() borrows the (currently single) GuC client to use in reinitialising ALL the doorbell registers (as the hardware doesn't reset them when the GuC is reset). As a prerequisite for accommodating multiple clients, it should only reset doorbells that are supposed to be disabled, avoiding those that are marked as in use by any client. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_guc_submission.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 01c1c16..9c5b81b 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -699,7 +699,7 @@ static void gem_release_guc_obj(struct drm_i915_gem_object *obj) } /* - * Borrow the first client to set up & tear down every doorbell + * Borrow the first client to set up & tear down each unused doorbell * in turn, to ensure that all doorbell h/w is (re)initialised. */ static void guc_init_doorbell_hw(struct intel_guc *guc) @@ -715,6 +715,9 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) i915_reg_t drbreg = GEN8_DRBREGL(i); u32 value = I915_READ(drbreg); + if (test_bit(i, guc->doorbell_bitmap)) + continue; + err = guc_update_doorbell_id(guc, client, i); /* Report update failure or unexpectedly active doorbell */ @@ -733,6 +736,9 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) i915_reg_t drbreg = GEN8_DRBREGL(i); u32 value = I915_READ(drbreg); + if (test_bit(i, guc->doorbell_bitmap)) + continue; + if (i != db_id && (value & GUC_DOORBELL_ENABLED)) DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) finally 0x%x\n", i, drbreg.reg, value); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 3/6] drm/i915/guc: use a separate GuC client for each engine
When using a single GuC client for multiple engines, the i915 driver has to merge all work items into a single work queue, which the GuC firmware then demultiplexes into separate submission queues per engine. In theory, this could lead to the single queue becoming a bottleneck in which an excess of outstanding work for one or more engines might prevent work for an idle engine reaching the hardware. To reduce this risk, we can create one GuC client per engine. Each will have its own workqueue, to be used only for work targeting a single engine, so there will be no cross-engine contention for workqueue slots. Signed-off-by: Dave Gordon Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_debugfs.c| 25 - drivers/gpu/drm/i915/i915_guc_submission.c | 35 +++--- drivers/gpu/drm/i915/intel_guc.h | 2 +- 3 files changed, 42 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9aa62c5..793b1d9 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2564,20 +2564,26 @@ static int i915_guc_info(struct seq_file *m, void *data) struct drm_device *dev = node->minor->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_guc guc; - struct i915_guc_client client = {}; + struct i915_guc_client *clients; struct intel_engine_cs *engine; + enum intel_engine_id id; u64 total = 0; if (!HAS_GUC_SCHED(dev_priv)) return 0; + clients = kcalloc(I915_NUM_ENGINES, sizeof(*clients), GFP_KERNEL); + if (clients == NULL) + return -ENOMEM; + if (mutex_lock_interruptible(&dev->struct_mutex)) - return 0; + goto done; /* Take a local copy of the GuC data, so we can dump it at leisure */ guc = dev_priv->guc; - if (guc.execbuf_client) - client = *guc.execbuf_client; + for_each_engine_id(engine, dev_priv, id) + if (guc.exec_clients[id]) + clients[id] = *guc.exec_clients[id]; mutex_unlock(&dev->struct_mutex); @@ -2600,11 +2606,18 @@ static int i915_guc_info(struct seq_file *m, void *data) } seq_printf(m, "\t%s: %llu\n", "Total", total); - seq_printf(m, "\nGuC execbuf client @ %p:\n", guc.execbuf_client); - i915_guc_client_info(m, dev_priv, &client); + for_each_engine_id(engine, dev_priv, id) { + seq_printf(m, "\nGuC exec_client[%d] @ %p:\n", + id, guc.exec_clients[id]); + if (guc.exec_clients[id]) + i915_guc_client_info(m, dev_priv, &clients[id]); + } /* Add more as required ... */ +done: + kfree(clients); + return 0; } diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 816bdca..797 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -434,7 +434,9 @@ static void guc_fini_ctx_desc(struct intel_guc *guc, int i915_guc_wq_check_space(struct drm_i915_gem_request *request) { const size_t wqi_size = sizeof(struct guc_wq_item); - struct i915_guc_client *gc = request->i915->guc.execbuf_client; + enum intel_engine_id engine_id = request->engine->id; + struct intel_guc *guc = &request->i915->guc; + struct i915_guc_client *gc = guc->exec_clients[engine_id]; struct guc_process_desc *desc; u32 freespace; @@ -589,7 +591,7 @@ int i915_guc_submit(struct drm_i915_gem_request *rq) { unsigned int engine_id = rq->engine->id; struct intel_guc *guc = &rq->i915->guc; - struct i915_guc_client *client = guc->execbuf_client; + struct i915_guc_client *client = guc->exec_clients[engine_id]; int b_ret; guc_add_workqueue_item(client, rq); @@ -723,7 +725,7 @@ static bool guc_doorbell_check(struct intel_guc *guc, uint16_t db_id) */ static void guc_init_doorbell_hw(struct intel_guc *guc) { - struct i915_guc_client *client = guc->execbuf_client; + struct i915_guc_client *client = guc->exec_clients[RCS]; uint16_t db_id; int i, err; @@ -1004,17 +1006,21 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; struct i915_guc_client *client; + struct intel_engine_cs *engine; - /* client for execbuf submission */ - client = guc_client_alloc(dev_priv, - GUC_CTX_PRIORITY_KMD_NORMAL, - dev_priv->kernel_context); - if (!client) { - DRM_ERROR("Failed
[Intel-gfx] [PATCH v2 2/6] drm/i915/guc: refactor guc_init_doorbell_hw()
We have essentially the same code in each of two different loops, so we can refactor it into a little helper function. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_guc_submission.c | 54 +- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 9c5b81b..816bdca 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -698,32 +698,47 @@ static void gem_release_guc_obj(struct drm_i915_gem_object *obj) kfree(client); } +/* Check that a doorbell register is in the expected state */ +static bool guc_doorbell_check(struct intel_guc *guc, uint16_t db_id) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + i915_reg_t drbreg = GEN8_DRBREGL(db_id); + uint32_t value = I915_READ(drbreg); + bool enabled = (value & GUC_DOORBELL_ENABLED) != 0; + bool expected = test_bit(db_id, guc->doorbell_bitmap); + + if (enabled == expected) + return true; + + DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) 0x%x, should be %s\n", +db_id, drbreg.reg, value, +expected ? "active" : "inactive"); + + return false; +} + /* * Borrow the first client to set up & tear down each unused doorbell * in turn, to ensure that all doorbell h/w is (re)initialised. */ static void guc_init_doorbell_hw(struct intel_guc *guc) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); struct i915_guc_client *client = guc->execbuf_client; - uint16_t db_id, i; - int err; + uint16_t db_id; + int i, err; + /* Save client's original doorbell selection */ db_id = client->doorbell_id; for (i = 0; i < GUC_MAX_DOORBELLS; ++i) { - i915_reg_t drbreg = GEN8_DRBREGL(i); - u32 value = I915_READ(drbreg); - - if (test_bit(i, guc->doorbell_bitmap)) + /* Skip if doorbell is OK */ + if (guc_doorbell_check(guc, i)) continue; err = guc_update_doorbell_id(guc, client, i); - - /* Report update failure or unexpectedly active doorbell */ - if (err || (i != db_id && (value & GUC_DOORBELL_ENABLED))) - DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) was 0x%x, err %d\n", - i, drbreg.reg, value, err); + if (err) + DRM_DEBUG_DRIVER("Doorbell %d update failed, err %d\n", + i, err); } /* Restore to original value */ @@ -732,18 +747,9 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) DRM_ERROR("Failed to restore doorbell to %d, err %d\n", db_id, err); - for (i = 0; i < GUC_MAX_DOORBELLS; ++i) { - i915_reg_t drbreg = GEN8_DRBREGL(i); - u32 value = I915_READ(drbreg); - - if (test_bit(i, guc->doorbell_bitmap)) - continue; - - if (i != db_id && (value & GUC_DOORBELL_ENABLED)) - DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) finally 0x%x\n", - i, drbreg.reg, value); - - } + /* Read back & verify all doorbell registers */ + for (i = 0; i < GUC_MAX_DOORBELLS; ++i) + (void)guc_doorbell_check(guc, i); } /** -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 4/6] drm/i915/guc: add engine mask to GuC client & pass to GuC
The Context Descriptor passed by the kernel to the GuC contains a field specifying which engine(s) the context will use. Historically, this was always set to "all of them", but now that we have one client per engine, we can be more precise, and set only the single bit for the engine that the client is associated with. Signed-off-by: Dave Gordon Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_guc_submission.c | 15 ++- drivers/gpu/drm/i915/intel_guc.h | 3 ++- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 797..6756db0 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -340,7 +340,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, desc.priority = client->priority; desc.db_id = client->doorbell_id; - for_each_engine(engine, dev_priv) { + for_each_engine_masked(engine, dev_priv, client->engines) { struct intel_context *ce = &ctx->engine[engine->id]; struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id]; struct drm_i915_gem_object *obj; @@ -374,6 +374,8 @@ static void guc_init_ctx_desc(struct intel_guc *guc, desc.engines_used |= (1 << engine->guc_id); } + DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n", + client->engines, desc.engines_used); WARN_ON(desc.engines_used == 0); /* @@ -768,6 +770,7 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) */ static struct i915_guc_client * guc_client_alloc(struct drm_i915_private *dev_priv, +uint32_t engines, uint32_t priority, struct i915_gem_context *ctx) { @@ -780,10 +783,11 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) if (!client) return NULL; - client->doorbell_id = GUC_INVALID_DOORBELL_ID; - client->priority = priority; client->owner = ctx; client->guc = guc; + client->engines = engines; + client->priority = priority; + client->doorbell_id = GUC_INVALID_DOORBELL_ID; client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0, GUC_MAX_GPU_CONTEXTS, GFP_KERNEL); @@ -825,8 +829,8 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) if (guc_init_doorbell(guc, client, db_id)) goto err; - DRM_DEBUG_DRIVER("new priority %u client %p: ctx_index %u\n", - priority, client, client->ctx_index); + DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: ctx_index %u\n", + priority, client, client->engines, client->ctx_index); DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%x\n", client->doorbell_id, client->doorbell_offset); @@ -1011,6 +1015,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv) { /* client for execbuf submission */ client = guc_client_alloc(dev_priv, + intel_engine_flag(engine), GUC_CTX_PRIORITY_KMD_NORMAL, dev_priv->kernel_context); if (!client) { diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 7b4cc4d..53d41b5 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -67,6 +67,8 @@ struct i915_guc_client { void *client_base; /* first page (only) of above */ struct i915_gem_context *owner; struct intel_guc *guc; + + uint32_t engines; /* bitmap of (host) engine ids */ uint32_t priority; uint32_t ctx_index; @@ -79,7 +81,6 @@ struct i915_guc_client { uint32_t wq_offset; uint32_t wq_size; uint32_t wq_tail; - uint32_t unused;/* Was 'wq_head'*/ uint32_t no_wq_space; uint32_t q_fail;/* No longer used */ -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 5/6] drm/i915/guc: use for_each_engine_id() where appropriate
Now that host structures are indexed by host engine-id rather than guc_id, we can usefully convert some for_each_engine() loops to use for_each_engine_id() and avoid multiple dereferences of engine->id. Also a few related tweaks to cache structure members locally wherever they're used more than once or twice, hopefully eliminating memory references. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_debugfs.c| 17 + drivers/gpu/drm/i915/i915_guc_submission.c | 22 +- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 793b1d9..2106766 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2535,6 +2535,7 @@ static void i915_guc_client_info(struct seq_file *m, struct i915_guc_client *client) { struct intel_engine_cs *engine; + enum intel_engine_id id; uint64_t tot = 0; seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n", @@ -2549,11 +2550,11 @@ static void i915_guc_client_info(struct seq_file *m, seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail); seq_printf(m, "\tLast submission result: %d\n", client->retcode); - for_each_engine(engine, dev_priv) { + for_each_engine_id(engine, dev_priv, id) { + u64 submissions = client->submissions[id]; + tot += submissions; seq_printf(m, "\tSubmissions: %llu %s\n", - client->submissions[engine->id], - engine->name); - tot += client->submissions[engine->id]; + submissions, engine->name); } seq_printf(m, "\tTotal: %llu\n", tot); } @@ -2598,11 +2599,11 @@ static int i915_guc_info(struct seq_file *m, void *data) seq_printf(m, "GuC last action error code: %d\n", guc.action_err); seq_printf(m, "\nGuC submissions:\n"); - for_each_engine(engine, dev_priv) { + for_each_engine_id(engine, dev_priv, id) { + u64 submissions = guc.submissions[id]; + total += submissions; seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n", - engine->name, guc.submissions[engine->id], - guc.last_seqno[engine->id]); - total += guc.submissions[engine->id]; + engine->name, submissions, guc.last_seqno[id]); } seq_printf(m, "\t%s: %llu\n", "Total", total); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 6756db0..ece3479 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -342,7 +342,8 @@ static void guc_init_ctx_desc(struct intel_guc *guc, for_each_engine_masked(engine, dev_priv, client->engines) { struct intel_context *ce = &ctx->engine[engine->id]; - struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id]; + uint32_t guc_engine_id = engine->guc_id; + struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id]; struct drm_i915_gem_object *obj; /* TODO: We have a design issue to be solved here. Only when we @@ -361,7 +362,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, gfx_addr = i915_gem_obj_ggtt_offset(ce->state); lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE; lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | - (engine->guc_id << GUC_ELC_ENGINE_OFFSET); + (guc_engine_id << GUC_ELC_ENGINE_OFFSET); obj = ce->ringbuf->obj; gfx_addr = i915_gem_obj_ggtt_offset(obj); @@ -371,7 +372,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, lrc->ring_next_free_location = gfx_addr; lrc->ring_current_tail_pointer_value = 0; - desc.engines_used |= (1 << engine->guc_id); + desc.engines_used |= (1 << guc_engine_id); } DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n", @@ -461,6 +462,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc, /* wqi_len is in DWords, and does not include the one-word header */ const size_t wqi_size = sizeof(struct guc_wq_item); const u32 wqi_len = wqi_size/sizeof(u32) - 1; + struct intel_engine_cs *engine = rq->engine; struct guc_process_desc *desc; struct guc_wq_item
[Intel-gfx] [PATCH v2 6/6] drm/i915/guc: re-optimise i915_guc_client layout
As we're tweaking the GuC-related code in debugfs, we can drop the now-used 'q_fail' and repack the structure. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_debugfs.c | 1 - drivers/gpu/drm/i915/intel_guc.h| 6 ++ 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 2106766..096d212 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2546,7 +2546,6 @@ static void i915_guc_client_info(struct seq_file *m, client->wq_size, client->wq_offset, client->wq_tail); seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space); - seq_printf(m, "\tFailed to queue: %u\n", client->q_fail); seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail); seq_printf(m, "\tLast submission result: %d\n", client->retcode); diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 53d41b5..a74b128 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -71,19 +71,17 @@ struct i915_guc_client { uint32_t engines; /* bitmap of (host) engine ids */ uint32_t priority; uint32_t ctx_index; - uint32_t proc_desc_offset; + uint32_t doorbell_offset; uint32_t cookie; uint16_t doorbell_id; - uint16_t padding; /* Maintain alignment */ + uint16_t padding[3];/* Maintain alignment */ uint32_t wq_offset; uint32_t wq_size; uint32_t wq_tail; - uint32_t no_wq_space; - uint32_t q_fail;/* No longer used */ uint32_t b_fail; int retcode; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v4 0/3] drm/i915/guc: emit (drm) messages at the most appropriate level
A few adjustments to the messages emitted from the driver, promoting or demoting them to the level most suited to the target audience as well as the impact of the thing being reported. Dave Gordon (3): drm: extra printk() wrapper macros drm/i915/guc: downgrade some DRM_ERROR() messages to DRM_WARN() drm/i915/guc: revisit GuC loader message levels drivers/gpu/drm/i915/i915_guc_submission.c | 18 ++-- drivers/gpu/drm/i915/intel_guc_loader.c| 34 +++--- include/drm/drmP.h | 26 +-- 3 files changed, 44 insertions(+), 34 deletions(-) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v4 2/3] drm/i915/guc: downgrade some DRM_ERROR() messages to DRM_WARN()
Where we're going to continue regardless of the problem, rather than fail, then the message should be a WARNing rather than an ERROR. Signed-off-by: Dave Gordon Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_guc_submission.c | 18 +++--- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 01c1c16..ee4d346 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -114,10 +114,8 @@ static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len) if (ret != -ETIMEDOUT) ret = -EIO; - DRM_ERROR("GUC: host2guc action 0x%X failed. ret=%d " - "status=0x%08X response=0x%08X\n", - data[0], ret, status, - I915_READ(SOFT_SCRATCH(15))); + DRM_WARN("Action 0x%X failed; ret=%d status=0x%08X response=0x%08X\n", +data[0], ret, status, I915_READ(SOFT_SCRATCH(15))); dev_priv->guc.action_fail += 1; dev_priv->guc.action_err = ret; @@ -553,8 +551,8 @@ static int guc_ring_doorbell(struct i915_guc_client *gc) if (db_ret.db_status == GUC_DOORBELL_DISABLED) break; - DRM_ERROR("Cookie mismatch. Expected %d, returned %d\n", - db_cmp.cookie, db_ret.cookie); + DRM_WARN("Cookie mismatch. Expected %d, found %d\n", +db_cmp.cookie, db_ret.cookie); /* update the cookie to newly read cookie from GuC */ db_cmp.cookie = db_ret.cookie; @@ -726,8 +724,8 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) /* Restore to original value */ err = guc_update_doorbell_id(guc, client, db_id); if (err) - DRM_ERROR("Failed to restore doorbell to %d, err %d\n", - db_id, err); + DRM_WARN("Failed to restore doorbell to %d, err %d\n", +db_id, err); for (i = 0; i < GUC_MAX_DOORBELLS; ++i) { i915_reg_t drbreg = GEN8_DRBREGL(i); @@ -819,8 +817,6 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) return client; err: - DRM_ERROR("FAILED to create priority %u GuC client!\n", priority); - guc_client_free(dev_priv, client); return NULL; } @@ -998,7 +994,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) GUC_CTX_PRIORITY_KMD_NORMAL, dev_priv->kernel_context); if (!client) { - DRM_ERROR("Failed to create execbuf guc_client\n"); + DRM_ERROR("Failed to create normal GuC client!\n"); return -ENOMEM; } -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v4 1/3] drm: extra printk() wrapper macros
We had only DRM_INFO() and DRM_ERROR(), whereas the underlying printk() provides several other useful intermediate levels such as NOTICE and WARNING. So this patch fills out the set by providing both regular and once-only macros for each of the levels INFO, NOTICE, and WARNING, using a common underlying macro that does all the token-pasting. DRM_ERROR is unchanged, as it's not just a printk wrapper. v2: Fix whitespace, missing ## (Eric Engestrom) Signed-off-by: Dave Gordon Reviewed-by: Eric Engestrom Cc: dri-de...@lists.freedesktop.org --- include/drm/drmP.h | 26 -- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/include/drm/drmP.h b/include/drm/drmP.h index d377865..3669cdd 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -162,6 +162,26 @@ void drm_err(const char *format, ...); /** \name Macros to make printk easier */ /*@{*/ +#define _DRM_PRINTK(once, level, fmt, ...) \ + do {\ + printk##once(KERN_##level "[" DRM_NAME "] " fmt,\ +##__VA_ARGS__);\ + } while (0) + +#define DRM_INFO(fmt, ...) \ + _DRM_PRINTK(, INFO, fmt, ##__VA_ARGS__) +#define DRM_NOTE(fmt, ...) \ + _DRM_PRINTK(, NOTICE, fmt, ##__VA_ARGS__) +#define DRM_WARN(fmt, ...) \ + _DRM_PRINTK(, WARNING, fmt, ##__VA_ARGS__) + +#define DRM_INFO_ONCE(fmt, ...) \ + _DRM_PRINTK(_once, INFO, fmt, ##__VA_ARGS__) +#define DRM_NOTE_ONCE(fmt, ...) \ + _DRM_PRINTK(_once, NOTICE, fmt, ##__VA_ARGS__) +#define DRM_WARN_ONCE(fmt, ...) \ + _DRM_PRINTK(_once, WARNING, fmt, ##__VA_ARGS__) + /** * Error output. * @@ -187,12 +207,6 @@ void drm_err(const char *format, ...); drm_err(fmt, ##__VA_ARGS__);\ }) -#define DRM_INFO(fmt, ...) \ - printk(KERN_INFO "[" DRM_NAME "] " fmt, ##__VA_ARGS__) - -#define DRM_INFO_ONCE(fmt, ...)\ - printk_once(KERN_INFO "[" DRM_NAME "] " fmt, ##__VA_ARGS__) - /** * Debug output. * -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v4 3/3] drm/i915/guc: revisit GuC loader message levels
Some downgraded from DRM_ERROR() to DRM_WARN() or DRM_NOTE(), a few upgraded from DRM_INFO() to DRM_NOTE() or DRM_WARN(), and one eliminated completely. v2: different permutation of levels :) v3: convert a couple of "this shouldn't happen" messages to WARN() Signed-off-by: Dave Gordon Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_guc_loader.c | 34 - 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index b883efd..61bbf20 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -140,12 +140,14 @@ static u32 get_gttype(struct drm_i915_private *dev_priv) static u32 get_core_family(struct drm_i915_private *dev_priv) { - switch (INTEL_INFO(dev_priv)->gen) { + u32 gen = INTEL_GEN(dev_priv); + + switch (gen) { case 9: return GFXCORE_FAMILY_GEN9; default: - DRM_ERROR("GUC: unsupported core family\n"); + WARN(1, "GEN%d does not support GuC operation!\n", gen); return GFXCORE_FAMILY_UNKNOWN; } } @@ -435,7 +437,7 @@ int intel_guc_setup(struct drm_device *dev) goto fail; } else if (*fw_path == '\0') { /* Device has a GuC but we don't know what f/w to load? */ - DRM_INFO("No GuC firmware known for this platform\n"); + WARN(1, "No GuC firmware known for this platform!\n"); err = -ENODEV; goto fail; } @@ -473,10 +475,8 @@ int intel_guc_setup(struct drm_device *dev) * that the state and timing are fairly predictable */ err = i915_reset_guc(dev_priv); - if (err) { - DRM_ERROR("GuC reset failed: %d\n", err); + if (err) goto fail; - } err = guc_ucode_xfer(dev_priv); if (!err) @@ -534,15 +534,15 @@ int intel_guc_setup(struct drm_device *dev) else if (err == 0) DRM_INFO("GuC firmware load skipped\n"); else if (ret != -EIO) - DRM_INFO("GuC firmware load failed: %d\n", err); + DRM_NOTE("GuC firmware load failed: %d\n", err); else - DRM_ERROR("GuC firmware load failed: %d\n", err); + DRM_WARN("GuC firmware load failed: %d\n", err); if (i915.enable_guc_submission) { if (fw_path == NULL) DRM_INFO("GuC submission without firmware not supported\n"); if (ret == 0) - DRM_INFO("Falling back from GuC submission to execlist mode\n"); + DRM_NOTE("Falling back from GuC submission to execlist mode\n"); else DRM_ERROR("GuC init failed: %d\n", ret); } @@ -573,7 +573,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* Check the size of the blob before examining buffer contents */ if (fw->size < sizeof(struct guc_css_header)) { - DRM_ERROR("Firmware header is missing\n"); + DRM_NOTE("Firmware header is missing\n"); goto fail; } @@ -585,7 +585,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) css->key_size_dw - css->exponent_size_dw) * sizeof(u32); if (guc_fw->header_size != sizeof(struct guc_css_header)) { - DRM_ERROR("CSS header definition mismatch\n"); + DRM_NOTE("CSS header definition mismatch\n"); goto fail; } @@ -595,7 +595,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* now RSA */ if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { - DRM_ERROR("RSA key size is bad\n"); + DRM_NOTE("RSA key size is bad\n"); goto fail; } guc_fw->rsa_offset = guc_fw->ucode_offset + guc_fw->ucode_size; @@ -604,14 +604,14 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* At least, it should have header, uCode and RSA. Size of all three. */ size = guc_fw->header_size + guc_fw->ucode_size + guc_fw->rsa_size; if (fw->size < size) { - DRM_ERROR("Missing firmware components\n"); + DRM_NOTE("Missing firmware components\n"); goto fail; } /* Header and uCode will be loaded to WOPCM. Size of the two. */
Re: [Intel-gfx] [PATCH v2 3/6] drm/i915/guc: use a separate GuC client for each engine
On 21/07/16 19:30, Chris Wilson wrote: On Thu, Jul 21, 2016 at 07:15:39PM +0100, Dave Gordon wrote: When using a single GuC client for multiple engines, the i915 driver has to merge all work items into a single work queue, which the GuC firmware then demultiplexes into separate submission queues per engine. In theory, this could lead to the single queue becoming a bottleneck in which an excess of outstanding work for one or more engines might prevent work for an idle engine reaching the hardware. To reduce this risk, we can create one GuC client per engine. Each will have its own workqueue, to be used only for work targeting a single engine, so there will be no cross-engine contention for workqueue slots. Signed-off-by: Dave Gordon Reviewed-by: Tvrtko Ursulin Does guc_context_desc.engines_used have any effect? -Chris Yes, some of the firmware code uses it to optimise which queues it scans at certain times. If it knows that a certain queue *doesn't* contain work for a given engine, it can skip scanning that queue entirely. Does this patchset change the results in the parallel-submission nop test? .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v2 5/6] drm/i915/guc: use for_each_engine_id() where appropriate
On 22/07/16 11:04, Tvrtko Ursulin wrote: On 21/07/16 19:15, Dave Gordon wrote: Now that host structures are indexed by host engine-id rather than guc_id, we can usefully convert some for_each_engine() loops to use for_each_engine_id() and avoid multiple dereferences of engine->id. Also a few related tweaks to cache structure members locally wherever they're used more than once or twice, hopefully eliminating memory references. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_debugfs.c| 17 + drivers/gpu/drm/i915/i915_guc_submission.c | 22 +- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 793b1d9..2106766 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2535,6 +2535,7 @@ static void i915_guc_client_info(struct seq_file *m, struct i915_guc_client *client) { struct intel_engine_cs *engine; +enum intel_engine_id id; uint64_t tot = 0; seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n", @@ -2549,11 +2550,11 @@ static void i915_guc_client_info(struct seq_file *m, seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail); seq_printf(m, "\tLast submission result: %d\n", client->retcode); -for_each_engine(engine, dev_priv) { +for_each_engine_id(engine, dev_priv, id) { +u64 submissions = client->submissions[id]; +tot += submissions; seq_printf(m, "\tSubmissions: %llu %s\n", -client->submissions[engine->id], -engine->name); -tot += client->submissions[engine->id]; +submissions, engine->name); } seq_printf(m, "\tTotal: %llu\n", tot); } @@ -2598,11 +2599,11 @@ static int i915_guc_info(struct seq_file *m, void *data) seq_printf(m, "GuC last action error code: %d\n", guc.action_err); seq_printf(m, "\nGuC submissions:\n"); -for_each_engine(engine, dev_priv) { +for_each_engine_id(engine, dev_priv, id) { +u64 submissions = guc.submissions[id]; +total += submissions; seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n", -engine->name, guc.submissions[engine->id], -guc.last_seqno[engine->id]); -total += guc.submissions[engine->id]; +engine->name, submissions, guc.last_seqno[id]); } seq_printf(m, "\t%s: %llu\n", "Total", total); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 6756db0..ece3479 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -342,7 +342,8 @@ static void guc_init_ctx_desc(struct intel_guc *guc, for_each_engine_masked(engine, dev_priv, client->engines) { struct intel_context *ce = &ctx->engine[engine->id]; -struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id]; +uint32_t guc_engine_id = engine->guc_id; +struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id]; struct drm_i915_gem_object *obj; /* TODO: We have a design issue to be solved here. Only when we @@ -361,7 +362,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, gfx_addr = i915_gem_obj_ggtt_offset(ce->state); lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE; lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | -(engine->guc_id << GUC_ELC_ENGINE_OFFSET); +(guc_engine_id << GUC_ELC_ENGINE_OFFSET); obj = ce->ringbuf->obj; gfx_addr = i915_gem_obj_ggtt_offset(obj); @@ -371,7 +372,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, lrc->ring_next_free_location = gfx_addr; lrc->ring_current_tail_pointer_value = 0; -desc.engines_used |= (1 << engine->guc_id); +desc.engines_used |= (1 << guc_engine_id); } DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n", @@ -461,6 +462,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc, /* wqi_len is in DWords, and does not include the one-word header */ const size_t wqi_size = sizeof(struct guc_wq_item); const u32 wqi_len = wqi_size/sizeof(u32) - 1; +struct intel_engine_cs *engine = rq->engine; struct guc_process_desc *desc; struct guc_wq_item *wqi; void *base; @@ -502,12 +504,11 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc, /* Now fill in the 4-word work queue item */ wqi->header = WQ_TYPE_INORDER | (wqi_len <<
Re: [Intel-gfx] [I-G-T] igt/gem_mocs_settings: improve RC6 testings
On 22/07/16 10:40, Antoine, Peter wrote: -Original Message- From: Chris Wilson [mailto:ch...@chris-wilson.co.uk] Sent: Friday, July 22, 2016 10:38 AM To: Antoine, Peter Cc: intel-gfx@lists.freedesktop.org Subject: Re: [I-G-T] igt/gem_mocs_settings: improve RC6 testings On Thu, Jul 21, 2016 at 09:49:51PM +, Antoine, Peter wrote: -Original Message- From: Chris Wilson [mailto:ch...@chris-wilson.co.uk] Sent: Thursday, July 21, 2016 9:40 PM To: Antoine, Peter Cc: intel-gfx@lists.freedesktop.org Subject: Re: [I-G-T] igt/gem_mocs_settings: improve RC6 testings On Tue, Jul 19, 2016 at 11:25:29AM +0100, Peter Antoine wrote: On some platforms the MOCS values are not always saved and restored on RC6 enter/exit. The rational is that the context with restore these values. On these platforms the test will fail as it tests the values by directly reading the MOCS registers. But there's nothing wrong with the existing tests per-se? You just want to add a new one that explicitly tests rc6 save/restore, and in doing so just need to limit the forcewake. For that you just want to limit intel_register_access to the critical sections. (You don't need to find the pci_dev afresh everytime.) On some platforms (BXT) it does not restore L3CC registers. So that on these platforms the direct read will fail unless you hold forcewake for the whole test. It also implies that the registers are valid for the whole power lifecycle, so if you are using the tests as API documentation then it implies that the registers are always valid. This is not always true on all platforms. Right. If those registers are only valid when read from within an active context, please send the patch to remove the invalid tests first. That means all the direct register access is undefined, right? Ok, will do that. Peter. So the direct memory reads are removed to give the correct API usage for the MOCS. That is that the l3CC registers are only valid while in the RCS context. Ok. +static void context_rc6_test(void) +{ + int fd = drm_open_driver(DRIVER_INTEL); + int res_ms; + uint32_t ctx_id = gem_context_create(fd); + + igt_debug("RC6 Context Test\n"); + check_control_registers(fd, I915_EXEC_RENDER, ctx_id, false); + check_l3cc_registers(fd, I915_EXEC_RENDER, ctx_id, false); + + res_ms = read_rc6_residency(); + sleep(3); And you could spin here until rc6 residency increased. timeout = 3000 / 2; while (read_rc6_residency() == initial_res && --timeout) usleep(2000); Ok. Decided against that as 3 seconds (I know magic value) should have been enough. I can change to do the spin with a timeout. Peter, your email client's quoting is broken in the thread above; it makes it quite difficult to determine who said what. Different authors should have different numbers of >> before their contributions, or some other way of identifying who said what. .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] ✗ Ro.CI.BAT: failure for drm/i915/guc: emit (drm) messages at the most appropriate level
On 22/07/16 07:25, Patchwork wrote: == Series Details == Series: drm/i915/guc: emit (drm) messages at the most appropriate level URL : https://patchwork.freedesktop.org/series/10150/ State : failure == Summary == Series 10150v1 drm/i915/guc: emit (drm) messages at the most appropriate level http://patchwork.freedesktop.org/api/1.0/series/10150/revisions/1/mbox Test gem_sync: Subgroup basic-store-each: pass -> DMESG-FAIL (ro-bdw-i7-5600u) The now-familiar https://bugs.freedesktop.org/show_bug.cgi?id=96974 Bug 96974 - [BAT BDW] gem_sync / basic-store-each fails sporadically Though it looks like Chris may have a fix for this :) .Dave. fi-hsw-i7-4770k total:244 pass:216 dwarn:0 dfail:0 fail:8 skip:20 fi-kbl-qkkr total:244 pass:180 dwarn:27 dfail:1 fail:9 skip:27 fi-skl-i5-6260u total:244 pass:224 dwarn:0 dfail:0 fail:8 skip:12 fi-skl-i7-6700k total:244 pass:210 dwarn:0 dfail:0 fail:8 skip:26 fi-snb-i7-2600 total:244 pass:196 dwarn:0 dfail:0 fail:8 skip:40 ro-bdw-i5-5250u total:244 pass:219 dwarn:4 dfail:0 fail:8 skip:13 ro-bdw-i7-5557U total:244 pass:221 dwarn:1 dfail:0 fail:8 skip:14 ro-bdw-i7-5600u total:244 pass:203 dwarn:0 dfail:1 fail:8 skip:32 ro-bsw-n3050 total:218 pass:173 dwarn:0 dfail:0 fail:2 skip:42 ro-byt-n2820 total:244 pass:197 dwarn:0 dfail:0 fail:9 skip:38 ro-hsw-i3-4010u total:244 pass:212 dwarn:0 dfail:0 fail:8 skip:24 ro-hsw-i7-4770r total:244 pass:212 dwarn:0 dfail:0 fail:8 skip:24 ro-ilk-i7-620lm total:244 pass:172 dwarn:0 dfail:0 fail:9 skip:63 ro-ilk1-i5-650 total:239 pass:172 dwarn:0 dfail:0 fail:9 skip:58 ro-ivb-i7-3770 total:244 pass:203 dwarn:0 dfail:0 fail:8 skip:33 ro-skl3-i5-6260u total:244 pass:224 dwarn:0 dfail:0 fail:8 skip:12 ro-snb-i7-2620M total:244 pass:193 dwarn:0 dfail:0 fail:9 skip:42 Results at /archive/results/CI_IGT_test/RO_Patchwork_1566/ cf82f46 drm-intel-nightly: 2016y-07m-21d-20h-43m-36s UTC integration manifest 5c44b9b drm/i915/guc: revisit GuC loader message levels 50ffd43 drm/i915/guc: downgrade some DRM_ERROR() messages to DRM_WARN() 900ee7b drm: extra printk() wrapper macros ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3] drm/i915: Replace gen6 semaphore signal table with code
On 21/07/16 14:46, Tvrtko Ursulin wrote: On 21/07/16 14:31, Chris Wilson wrote: On Thu, Jul 21, 2016 at 02:16:22PM +0100, Tvrtko Ursulin wrote: On 21/07/16 13:59, Chris Wilson wrote: On Thu, Jul 21, 2016 at 01:00:47PM +0100, Tvrtko Ursulin wrote: From: Tvrtko Ursulin Static table wastes space for invalid combinations and engines which are not supported by Gen6 (legacy semaphores). Replace it with a function devised by Dave Gordon. I have verified that it generates the same mappings between mbox selectors and signalling registers. So just how big was that table? How big are the functions replacing it? With I915_NUM_ENGINES of 5 table is 5 * 5 * (2 * 4) = 200 bytes. With the patch .text grows by 144 bytes here and .rodata shrinks by 256. So a net gain of 112 bytes with my config. Conclusion is that as long as we got five engines it is not that interesting to get rid of the table. Since the semaphore matrix is only relevant to a specific gen, you could remove it from the multi-generational engine-list and instead just have it in the gen-specific code that needs it. That way it won't continue to grow as new engines are added. The one gen that needs it is fixed at 4x4, so it could just be a 16-byte lookup table, or 32 bits (0b11001001_10110001_00101101_10010011) if you really want to save space ;-) v2: Add a comment describing what gen6_sem_f does. v3: This time with git add. I like having the table a lot... Even if we don't find the function convincing we should add that comment. Signed-off-by: Tvrtko Ursulin Cc: Dave Gordon Cc: Chris Wilson --- drivers/gpu/drm/i915/i915_reg.h | 7 +-- drivers/gpu/drm/i915/intel_engine_cs.c | 93 + drivers/gpu/drm/i915/intel_ringbuffer.c | 40 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 ++ 4 files changed, 102 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 9397ddec26b9..c2fe718582c8 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1604,9 +1604,10 @@ enum skl_disp_power_wells { #define RING_HEAD(base)_MMIO((base)+0x34) #define RING_START(base)_MMIO((base)+0x38) #define RING_CTL(base)_MMIO((base)+0x3c) -#define RING_SYNC_0(base)_MMIO((base)+0x40) -#define RING_SYNC_1(base)_MMIO((base)+0x44) -#define RING_SYNC_2(base)_MMIO((base)+0x48) +#define RING_SYNC(base, n)_MMIO((base) + 0x40 + (n) * 4) +#define RING_SYNC_0(base)RING_SYNC(base, 0) +#define RING_SYNC_1(base)RING_SYNC(base, 1) +#define RING_SYNC_2(base)RING_SYNC(base, 2) #define GEN6_RVSYNC(RING_SYNC_0(RENDER_RING_BASE)) #define GEN6_RBSYNC(RING_SYNC_1(RENDER_RING_BASE)) #define GEN6_RVESYNC(RING_SYNC_2(RENDER_RING_BASE)) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index f4a35ec78481..19455b20b322 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -209,3 +209,96 @@ int intel_engine_init_common(struct intel_engine_cs *engine) return i915_cmd_parser_init_ring(engine); } + +#define I915_NUM_GEN6_SEMAPHORE_ENGINES (4) + +/* + * For Gen6 semaphores where the driver issues MI_SEMAPHORE_MBOX commands + * with register selects so that a specific engine can wake up another engine + * waiting on a matching register, the matrix of required register selects + * looks like this: + * + * |RCS| VCS | BCS | VECS + * -+---+---+---+--- + * RCS | MI_SEMAPHORE_SYNC_INVALID |MI_SEMAPHORE_SYNC_VR |MI_SEMAPHORE_SYNC_BR |MI_SEMAPHORE_SYNC_VER + * VCS |MI_SEMAPHORE_SYNC_RV | MI_SEMAPHORE_SYNC_INVALID |MI_SEMAPHORE_SYNC_BV |MI_SEMAPHORE_SYNC_VEV + * BCS |MI_SEMAPHORE_SYNC_RB |MI_SEMAPHORE_SYNC_VB | MI_SEMAPHORE_SYNC_INVALID |MI_SEMAPHORE_SYNC_VEB + * VECS |MI_SEMAPHORE_SYNC_RVE |MI_SEMAPHORE_SYNC_VVE |MI_SEMAPHORE_SYNC_BVE | MI_SEMAPHORE_SYNC_INVALID + * + * This distilled to integers looks like this: + * + * | 0 | 1 | 2 | 3 + * --+-+-+-+- + * 0 | -1 | 0 | 2 | 1 + * 1 | 2 | -1 | 0 | 1 + * 2 | 0 | 2 | -1 | 1 + * 3 | 2 | 1 | 0 | -1 Actually (and conveniently) MI_SEMAPHORE_SYNC_INVALID is 3 (<<16) so we don't really need to return -1 and then map it to INVALID, we can just use 0-3 directly. The binary string I wrote above represents this table; then to get the result we want it just has to be shifted. + * + * In the opposite direction, the same table showing register addresses is: + * + * | RCS | VCS | BCS |VECS + * -+--+--+--+-- + * RCS | GEN6_NOSYNC | GEN6_RVSYNC | GEN6_R
[Intel-gfx] [PATCH v3 4/4] drm/i915/guc: use symbolic names in setting defaults for module parameters
Of course, this also re-enables GuC loading and submission by default on suitable platforms, since it's Intel's Plan of Record that GuC submission shall be used where available. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_params.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index b6e404c..16ad975 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -54,9 +54,9 @@ struct i915_params i915 __read_mostly = { .verbose_state_checks = 1, .nuclear_pageflip = 0, .edp_vswing = 0, - .enable_guc_loading = 0, - .enable_guc_submission = 0, - .guc_log_level = -1, + .enable_guc_loading = FIRMWARE_LOAD_DEFAULT, + .enable_guc_submission = GUC_SUBMISSION_DEFAULT, + .guc_log_level = GUC_LOG_VERBOSITY_NONE, .enable_dp_mst = true, .inject_load_failure = 0, .enable_dpcd_backlight = false, -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 1/4] drm/i915/guc: symbolic names for GuC submission preferences
The existing code that accesses the "enable_guc_submission" parameter uses explicit numerical values for the various possibilities, including in one case relying on boolean 0/1 mapping to specific values (which could be confusing for maintainers). So this patch just provides and uses names for the values representing the DEFAULT, DISABLED, PREFERRED, and MANDATORY submission options that the user can select (-1, 0, 1, 2 respectively). This should produce identical code to the previous version! Signed-off-by: Dave Gordon Cc: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_guc_submission.c | 2 +- drivers/gpu/drm/i915/intel_guc.h | 6 ++ drivers/gpu/drm/i915/intel_guc_loader.c| 15 --- drivers/gpu/drm/i915/intel_lrc.c | 6 +++--- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 01c1c16..e564c976 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -971,7 +971,7 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS); i915_guc_submission_disable(dev_priv); - if (!i915.enable_guc_submission) + if (i915.enable_guc_submission == GUC_SUBMISSION_DISABLED) return 0; /* not enabled */ if (guc->ctx_pool_obj) diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 3e3e743..52ecbba 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -90,6 +90,12 @@ struct i915_guc_client { uint64_t submissions[I915_NUM_ENGINES]; }; +enum { + GUC_SUBMISSION_DEFAULT = -1, + GUC_SUBMISSION_DISABLED = 0, + GUC_SUBMISSION_PREFERRED, + GUC_SUBMISSION_MANDATORY +}; enum intel_guc_fw_status { GUC_FIRMWARE_FAIL = -1, GUC_FIRMWARE_NONE = 0, diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index b883efd..d8bd4cb 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -189,7 +189,7 @@ static void set_guc_init_params(struct drm_i915_private *dev_priv) } /* If GuC submission is enabled, set up additional parameters here */ - if (i915.enable_guc_submission) { + if (i915.enable_guc_submission != GUC_SUBMISSION_DISABLED) { u32 pgs = i915_gem_obj_ggtt_offset(dev_priv->guc.ctx_pool_obj); u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16; @@ -495,7 +495,7 @@ int intel_guc_setup(struct drm_device *dev) intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status), intel_guc_fw_status_repr(guc_fw->guc_fw_load_status)); - if (i915.enable_guc_submission) { + if (i915.enable_guc_submission != GUC_SUBMISSION_DISABLED) { err = i915_guc_submission_enable(dev_priv); if (err) goto fail; @@ -523,7 +523,7 @@ int intel_guc_setup(struct drm_device *dev) */ if (i915.enable_guc_loading > 1) { ret = -EIO; - } else if (i915.enable_guc_submission > 1) { + } else if (i915.enable_guc_submission >= GUC_SUBMISSION_MANDATORY) { ret = -EIO; } else { ret = 0; @@ -538,7 +538,7 @@ int intel_guc_setup(struct drm_device *dev) else DRM_ERROR("GuC firmware load failed: %d\n", err); - if (i915.enable_guc_submission) { + if (i915.enable_guc_submission != GUC_SUBMISSION_DISABLED) { if (fw_path == NULL) DRM_INFO("GuC submission without firmware not supported\n"); if (ret == 0) @@ -546,7 +546,7 @@ int intel_guc_setup(struct drm_device *dev) else DRM_ERROR("GuC init failed: %d\n", ret); } - i915.enable_guc_submission = 0; + i915.enable_guc_submission = GUC_SUBMISSION_DISABLED; return ret; } @@ -690,8 +690,9 @@ void intel_guc_init(struct drm_device *dev) /* A negative value means "use platform default" */ if (i915.enable_guc_loading < 0) i915.enable_guc_loading = HAS_GUC_UCODE(dev); - if (i915.enable_guc_submission < 0) - i915.enable_guc_submission = HAS_GUC_SCHED(dev); + if (i915.enable_guc_submission <= GUC_SUBMISSION_DEFAULT) + i915.enable_guc_submission = HAS_GUC_SCHED(dev) ? + GUC_SUBMISSION_PREFERRED : GUC_SUBMISSION_DISABLED; if (!HAS_GUC_UCODE(dev)) { fw_path = NULL; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index daf1279..960e676 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915
[Intel-gfx] [PATCH v3 0/4] drm/i915/guc: use symbolic names for module parameter values
There are various literal constants used in the GuC module-parameter processing code; this sequence of patches replaces them with symbolic names for greater clarity. And then it re-enables GuC submission by default :) v3: Original patch broken into two (1/4 + 2/4) Name for GuC log level NONE added (3/4 Re-enable GuC loading & submission (4/4) Added cover letter :) Dave Gordon (4): drm/i915/guc: symbolic names for GuC submission preferences drm/i915/guc: symbolic names for GuC firmare loading preferences drm/i915/guc: symbolic name for GuC log-level none drm/i915/guc: use symbolic names in setting defaults for module parameters drivers/gpu/drm/i915/i915_guc_submission.c | 4 ++-- drivers/gpu/drm/i915/i915_params.c | 6 +++--- drivers/gpu/drm/i915/intel_guc.h | 20 drivers/gpu/drm/i915/intel_guc_fwif.h | 1 + drivers/gpu/drm/i915/intel_guc_loader.c| 30 -- drivers/gpu/drm/i915/intel_lrc.c | 6 +++--- 6 files changed, 45 insertions(+), 22 deletions(-) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx