[Intel-gfx] [PATCH v3] drm/i915/mst: Hookup DRM DP MST late_register/early_unregister callbacks
i915 can enable aux device nodes for DP MST by calling drm_dp_mst_connector_late_register()/drm_dp_mst_connector_early_unregister(), so let's hook that up. Changes since v1: * Call intel_connector_register/unregister() from intel_dp_mst_connector_late_register/unregister() so we don't lose error injection - Ville Syrjälä Changes since v2: * Don't forget to clean up if intel_connector_register() fails - Ville Cc: Ville Syrjälä Cc: Manasi Navare Cc: "Lee, Shawn C" Signed-off-by: Lyude Paul --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 33 +++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index d53978ed3c12..e08caca658c6 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -548,12 +548,41 @@ static int intel_dp_mst_get_ddc_modes(struct drm_connector *connector) return ret; } +static int +intel_dp_mst_connector_late_register(struct drm_connector *connector) +{ + struct intel_connector *intel_connector = to_intel_connector(connector); + int ret; + + ret = drm_dp_mst_connector_late_register(connector, +intel_connector->port); + if (ret < 0) + return ret; + + ret = intel_connector_register(connector); + if (ret < 0) + drm_dp_mst_connector_early_unregister(connector, + intel_connector->port); + + return ret; +} + +static void +intel_dp_mst_connector_early_unregister(struct drm_connector *connector) +{ + struct intel_connector *intel_connector = to_intel_connector(connector); + + intel_connector_unregister(connector); + drm_dp_mst_connector_early_unregister(connector, + intel_connector->port); +} + static const struct drm_connector_funcs intel_dp_mst_connector_funcs = { .fill_modes = drm_helper_probe_single_connector_modes, .atomic_get_property = intel_digital_connector_atomic_get_property, .atomic_set_property = intel_digital_connector_atomic_set_property, - .late_register = intel_connector_register, - .early_unregister = intel_connector_unregister, + .late_register = intel_dp_mst_connector_late_register, + .early_unregister = intel_dp_mst_connector_early_unregister, .destroy = intel_connector_destroy, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, .atomic_duplicate_state = intel_digital_connector_duplicate_state, -- 2.24.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [RFC 02/12] drm/i915: Update client name on context create
On 10/03/2020 18:11, Chris Wilson wrote: Quoting Tvrtko Ursulin (2020-03-09 18:31:19) @@ -92,8 +107,8 @@ __i915_drm_client_register(struct i915_drm_client *client, static void __i915_drm_client_unregister(struct i915_drm_client *client) { - put_pid(fetch_and_zero(&client->pid)); - kfree(fetch_and_zero(&client->name)); + put_pid(rcu_replace_pointer(client->pid, NULL, true)); + kfree(rcu_replace_pointer(client->name, NULL, true)); client_unregister is not after an RCU grace period, so what's the protection here? Against concurrent access via sysfs? Hm.. I think kobject_put needs to go first and clearing of name and pid last. Will fix this. Accesses via GEM contexts always have a reference so that should be fine. RCU business on pid and name is basically only so the two can be asynchronously replaced if need to be updated on context create. So anyone accessing them sees either old or new, but always valid data. Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for list: Prevent compiler reloads inside 'safe' list iteration
== Series Details == Series: list: Prevent compiler reloads inside 'safe' list iteration URL : https://patchwork.freedesktop.org/series/74495/ State : warning == Summary == $ dim checkpatch origin/drm-tip d40a755ea0b3 list: Prevent compiler reloads inside 'safe' list iteration -:37: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'pos' - possible side-effects? #37: FILE: include/linux/list.h:547: +#define list_next_entry_safe(pos, member) \ + list_entry(READ_ONCE((pos)->member.next), typeof(*(pos)), member) -:37: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'member' - possible side-effects? #37: FILE: include/linux/list.h:547: +#define list_next_entry_safe(pos, member) \ + list_entry(READ_ONCE((pos)->member.next), typeof(*(pos)), member) -:37: CHECK:MACRO_ARG_PRECEDENCE: Macro argument 'member' may be better as '(member)' to avoid precedence issues #37: FILE: include/linux/list.h:547: +#define list_next_entry_safe(pos, member) \ + list_entry(READ_ONCE((pos)->member.next), typeof(*(pos)), member) -:55: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'pos' - possible side-effects? #55: FILE: include/linux/list.h:566: +#define list_prev_entry_safe(pos, member) \ + list_entry(READ_ONCE((pos)->member.prev), typeof(*(pos)), member) -:55: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'member' - possible side-effects? #55: FILE: include/linux/list.h:566: +#define list_prev_entry_safe(pos, member) \ + list_entry(READ_ONCE((pos)->member.prev), typeof(*(pos)), member) -:55: CHECK:MACRO_ARG_PRECEDENCE: Macro argument 'member' may be better as '(member)' to avoid precedence issues #55: FILE: include/linux/list.h:566: +#define list_prev_entry_safe(pos, member) \ + list_entry(READ_ONCE((pos)->member.prev), typeof(*(pos)), member) -:82: WARNING:SPACE_BEFORE_TAB: please, no space before tabs #82: FILE: include/linux/list.h:725: +#define list_for_each_entry_safe_continue(pos, n, head, member) ^I\$ -:82: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'pos' - possible side-effects? #82: FILE: include/linux/list.h:725: +#define list_for_each_entry_safe_continue(pos, n, head, member)\ + for (pos = list_next_entry(pos, member),\ + n = list_next_entry_safe(pos, member); \ +&pos->member != (head);\ +pos = n, n = list_next_entry_safe(n, member)) -:82: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'n' - possible side-effects? #82: FILE: include/linux/list.h:725: +#define list_for_each_entry_safe_continue(pos, n, head, member)\ + for (pos = list_next_entry(pos, member),\ + n = list_next_entry_safe(pos, member); \ +&pos->member != (head);\ +pos = n, n = list_next_entry_safe(n, member)) -:82: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'member' - possible side-effects? #82: FILE: include/linux/list.h:725: +#define list_for_each_entry_safe_continue(pos, n, head, member)\ + for (pos = list_next_entry(pos, member),\ + n = list_next_entry_safe(pos, member); \ +&pos->member != (head);\ +pos = n, n = list_next_entry_safe(n, member)) -:83: WARNING:SPACE_BEFORE_TAB: please, no space before tabs #83: FILE: include/linux/list.h:726: +^Ifor (pos = list_next_entry(pos, member), ^I^I^I\$ -:98: WARNING:SPACE_BEFORE_TAB: please, no space before tabs #98: FILE: include/linux/list.h:741: +#define list_for_each_entry_safe_from(pos, n, head, member) ^I^I\$ -:98: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'pos' - possible side-effects? #98: FILE: include/linux/list.h:741: +#define list_for_each_entry_safe_from(pos, n, head, member)\ + for (n = list_next_entry_safe(pos, member); \ +&pos->member != (head);\ +pos = n, n = list_next_entry_safe(n, member)) -:98: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'n' - possible side-effects? #98: FILE: include/linux/list.h:741: +#define list_for_each_entry_safe_from(pos, n, head, member)\ + for (n = list_next_entry_safe(pos, member); \ +&pos->member != (head);\ +pos = n, n = list_next_entry_safe(n, member)) -:98: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'member' - possible side-effects? #98: FILE: include/linux/list.h:741: +#define list_for_each_entry_safe_from(pos, n, head, member)\ + for (n = list_next_entry_safe(pos, member); \ +&pos->member != (head);\ +pos = n, n = list_next_entry_safe(n, member)) total: 0 errors, 3 warnings, 12 checks, 94 lines checked __
Re: [Intel-gfx] [RFC 05/12] drm/i915: Track runtime spent in unreachable intel_contexts
On 10/03/2020 18:25, Chris Wilson wrote: Quoting Tvrtko Ursulin (2020-03-09 18:31:22) From: Tvrtko Ursulin As contexts are abandoned we want to remember how much GPU time they used (per class) so later we can used it for smarter purposes. Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 13 - drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 5 + 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index abc3a3e2fcf1..5f6861a36655 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -257,7 +257,19 @@ static void free_engines_rcu(struct rcu_head *rcu) { struct i915_gem_engines *engines = container_of(rcu, struct i915_gem_engines, rcu); + struct i915_gem_context *ctx = engines->ctx; + struct i915_gem_engines_iter it; + struct intel_context *ce; + + /* Transfer accumulated runtime to the parent GEM context. */ + for_each_gem_engine(ce, engines, it) { + unsigned int class = ce->engine->uabi_class; + GEM_BUG_ON(class >= ARRAY_SIZE(ctx->past_runtime)); + atomic64_add(ce->runtime.total, &ctx->past_runtime[class]); + } -> give this its own routine. Ack. + + i915_gem_context_put(ctx); i915_sw_fence_fini(&engines->fence); free_engines(engines); } @@ -540,7 +552,6 @@ static int engines_notify(struct i915_sw_fence *fence, list_del(&engines->link); spin_unlock_irqrestore(&ctx->stale.lock, flags); } - i915_gem_context_put(engines->ctx); Or accumulate here? Here we know the engines are idle and released, albeit there is the delay in accumulating after the swap. I'm not going to worry about that, live replacement of engines I don't expect anyone to notice the busy stats being off for a bit. Worst case is that they see a sudden jump; but typical practice will be to setup engines up before they being activity. We only have to worry about is if the transient misleading stats can be exploited. It was even here initially but then I started fearing it may not be the last unpin of intel_context, pending context save/complete so sounded safer to make it really really last. And But I guess you are right in saying that small error when replacing engines should not be large concern. If I move the accumulation back here I don't need the intel_context->closed patch any more so that's a plus. Unless it can be a large error if context ran for quite some time. Hm.. I think I still prefer to be safe and accumulate latest as possible. Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [RFC 06/12] drm/i915: Track runtime spent in closed GEM contexts
On 10/03/2020 18:28, Chris Wilson wrote: Quoting Tvrtko Ursulin (2020-03-09 18:31:23) diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h index 7825df32798d..10752107e8c7 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.h +++ b/drivers/gpu/drm/i915/i915_drm_client.h @@ -16,6 +16,8 @@ #include #include +#include "gt/intel_engine_types.h" + struct i915_drm_clients { struct mutex lock; struct xarray xarray; @@ -43,6 +45,11 @@ struct i915_drm_client { struct device_attribute pid; struct device_attribute name; } attr; + + /** +* @past_runtime: Accumulation of pphwsp runtimes from closed contexts. +*/ + atomic64_t past_runtime[MAX_ENGINE_CLASS + 1]; Just to plant a seed: i915_drm_client_stats.[ch] ? Let it grow a bit first? :) Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [RFC 08/12] drm/i915: Expose per-engine client busyness
On 10/03/2020 18:32, Chris Wilson wrote: Quoting Tvrtko Ursulin (2020-03-09 18:31:25) +static ssize_t +show_client_busy(struct device *kdev, struct device_attribute *attr, char *buf) +{ + struct i915_engine_busy_attribute *i915_attr = + container_of(attr, typeof(*i915_attr), attr); + unsigned int class = i915_attr->engine_class; + struct i915_drm_client *client = i915_attr->client; + u64 total = atomic64_read(&client->past_runtime[class]); + struct list_head *list = &client->ctx_list; + struct i915_gem_context *ctx; + + rcu_read_lock(); + list_for_each_entry_rcu(ctx, list, client_link) { + total += atomic64_read(&ctx->past_runtime[class]); + total += pphwsp_busy_add(ctx, class); + } + rcu_read_unlock(); + + total *= RUNTIME_INFO(i915_attr->i915)->cs_timestamp_period_ns; Planning early retirement? In 600 years, they'll have forgotten how to email ;) Shruggety shrug. :) I am guessing you would prefer both internal representations (sw and pphwsp runtimes) to be consistently in nanoseconds? I thought why multiply at various places when once at the readout time is enough. And I should mention again how I am not sure at the moment how to meld the two stats into one more "perfect" output. Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 6/7] drm/i915/perf: add interrupt enabling parameter
On Tue, Mar 03, 2020 at 02:19:04PM -0800, Umesh Nerlige Ramappa wrote: From: Lionel Landwerlin This let's the application choose to be driven by the interrupt mechanism of the HW. In conjuction with long periods for checks for the availability of data on the CPU, this can reduce the CPU load when doing capture of OA data. v2: Version the new parameter (Joonas) v3: Rebase (Umesh) Signed-off-by: Lionel Landwerlin Signed-off-by: Umesh Nerlige Ramappa --- drivers/gpu/drm/i915/i915_perf.c | 58 +++- include/uapi/drm/i915_drm.h | 10 ++ 2 files changed, 53 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 502961da840d..ab41cba85b40 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -252,7 +252,7 @@ * oa_buffer_check(). * * Most of the implementation details for this workaround are in - * oa_buffer_check_unlocked() and _append_oa_reports() + * oa_buffer_check() and _append_oa_reports() * * Note for posterity: previously the driver used to define an effective tail * pointer that lagged the real pointer by a 'tail margin' measured in bytes @@ -447,8 +447,9 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) } /** - * oa_buffer_check_unlocked - check for data and update tail ptr state + * oa_buffer_check - check for data and update tail ptr state * @stream: i915 stream instance + * @lock: whether to take the oa_buffer spin lock * * This is either called via fops (for blocking reads in user ctx) or the poll * check hrtimer (atomic ctx) to check the OA buffer tail pointer and check @@ -470,8 +471,9 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) * * Returns: %true if the OA buffer contains data, else %false */ -static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) +static bool oa_buffer_check(struct i915_perf_stream *stream, bool lock) Hi Lionel, All callers seem to set the lock to true when calling oa_buffer_check(). Do you recall why the parameter was introduced? If not, we probably want to remove this change. Thanks, Umesh ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3] drm/i915/mst: Hookup DRM DP MST late_register/early_unregister callbacks
On Tue, Mar 10, 2020 at 03:51:21PM -0400, Lyude Paul wrote: > i915 can enable aux device nodes for DP MST by calling > drm_dp_mst_connector_late_register()/drm_dp_mst_connector_early_unregister(), > so let's hook that up. > > Changes since v1: > * Call intel_connector_register/unregister() from > intel_dp_mst_connector_late_register/unregister() so we don't lose > error injection - Ville Syrjälä > Changes since v2: > * Don't forget to clean up if intel_connector_register() fails - Ville > > Cc: Ville Syrjälä > Cc: Manasi Navare > Cc: "Lee, Shawn C" > Signed-off-by: Lyude Paul Reviewed-by: Ville Syrjälä > --- > drivers/gpu/drm/i915/display/intel_dp_mst.c | 33 +++-- > 1 file changed, 31 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c > b/drivers/gpu/drm/i915/display/intel_dp_mst.c > index d53978ed3c12..e08caca658c6 100644 > --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c > +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c > @@ -548,12 +548,41 @@ static int intel_dp_mst_get_ddc_modes(struct > drm_connector *connector) > return ret; > } > > +static int > +intel_dp_mst_connector_late_register(struct drm_connector *connector) > +{ > + struct intel_connector *intel_connector = to_intel_connector(connector); > + int ret; > + > + ret = drm_dp_mst_connector_late_register(connector, > + intel_connector->port); > + if (ret < 0) > + return ret; > + > + ret = intel_connector_register(connector); > + if (ret < 0) > + drm_dp_mst_connector_early_unregister(connector, > + intel_connector->port); > + > + return ret; > +} > + > +static void > +intel_dp_mst_connector_early_unregister(struct drm_connector *connector) > +{ > + struct intel_connector *intel_connector = to_intel_connector(connector); > + > + intel_connector_unregister(connector); > + drm_dp_mst_connector_early_unregister(connector, > + intel_connector->port); > +} > + > static const struct drm_connector_funcs intel_dp_mst_connector_funcs = { > .fill_modes = drm_helper_probe_single_connector_modes, > .atomic_get_property = intel_digital_connector_atomic_get_property, > .atomic_set_property = intel_digital_connector_atomic_set_property, > - .late_register = intel_connector_register, > - .early_unregister = intel_connector_unregister, > + .late_register = intel_dp_mst_connector_late_register, > + .early_unregister = intel_dp_mst_connector_early_unregister, > .destroy = intel_connector_destroy, > .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, > .atomic_duplicate_state = intel_digital_connector_duplicate_state, > -- > 2.24.1 -- Ville Syrjälä Intel ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✓ Fi.CI.IGT: success for drm/i915/perf: Invalidate OA TLB on when closing perf stream
== Series Details == Series: drm/i915/perf: Invalidate OA TLB on when closing perf stream URL : https://patchwork.freedesktop.org/series/74469/ State : success == Summary == CI Bug Log - changes from CI_DRM_8106_full -> Patchwork_16897_full Summary --- **SUCCESS** No regressions found. Known issues Here are the changes found in Patchwork_16897_full that come from known issues: ### IGT changes ### Issues hit * igt@gem_ctx_persistence@legacy-engines-mixed-process@bsd: - shard-kbl: [PASS][1] -> [FAIL][2] ([i915#679]) [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-kbl3/igt@gem_ctx_persistence@legacy-engines-mixed-proc...@bsd.html [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16897/shard-kbl3/igt@gem_ctx_persistence@legacy-engines-mixed-proc...@bsd.html * igt@gem_ctx_persistence@legacy-engines-mixed-process@bsd1: - shard-kbl: [PASS][3] -> [INCOMPLETE][4] ([i915#1239]) [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-kbl3/igt@gem_ctx_persistence@legacy-engines-mixed-proc...@bsd1.html [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16897/shard-kbl3/igt@gem_ctx_persistence@legacy-engines-mixed-proc...@bsd1.html * igt@gem_exec_balancer@smoke: - shard-iclb: [PASS][5] -> [SKIP][6] ([fdo#110854]) [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-iclb1/igt@gem_exec_balan...@smoke.html [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16897/shard-iclb6/igt@gem_exec_balan...@smoke.html * igt@gem_exec_parallel@vcs1-fds: - shard-iclb: [PASS][7] -> [SKIP][8] ([fdo#112080]) +7 similar issues [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-iclb4/igt@gem_exec_paral...@vcs1-fds.html [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16897/shard-iclb3/igt@gem_exec_paral...@vcs1-fds.html * igt@gem_exec_schedule@implicit-write-read-bsd2: - shard-iclb: [PASS][9] -> [SKIP][10] ([fdo#109276] / [i915#677]) [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-iclb1/igt@gem_exec_sched...@implicit-write-read-bsd2.html [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16897/shard-iclb6/igt@gem_exec_sched...@implicit-write-read-bsd2.html * igt@gem_exec_schedule@in-order-bsd: - shard-iclb: [PASS][11] -> [SKIP][12] ([fdo#112146]) +3 similar issues [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-iclb7/igt@gem_exec_sched...@in-order-bsd.html [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16897/shard-iclb4/igt@gem_exec_sched...@in-order-bsd.html * igt@gem_exec_schedule@pi-common-bsd: - shard-iclb: [PASS][13] -> [SKIP][14] ([i915#677]) +1 similar issue [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-iclb5/igt@gem_exec_sched...@pi-common-bsd.html [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16897/shard-iclb1/igt@gem_exec_sched...@pi-common-bsd.html * igt@gem_exec_schedule@preempt-queue-bsd1: - shard-iclb: [PASS][15] -> [SKIP][16] ([fdo#109276]) +14 similar issues [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-iclb1/igt@gem_exec_sched...@preempt-queue-bsd1.html [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16897/shard-iclb7/igt@gem_exec_sched...@preempt-queue-bsd1.html * igt@gem_exec_whisper@basic-contexts-forked: - shard-tglb: [PASS][17] -> [INCOMPLETE][18] ([i915#1318]) [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-tglb6/igt@gem_exec_whis...@basic-contexts-forked.html [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16897/shard-tglb6/igt@gem_exec_whis...@basic-contexts-forked.html * igt@gem_exec_whisper@basic-fds-priority: - shard-glk: [PASS][19] -> [DMESG-WARN][20] ([i915#118] / [i915#95]) [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-glk8/igt@gem_exec_whis...@basic-fds-priority.html [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16897/shard-glk7/igt@gem_exec_whis...@basic-fds-priority.html * igt@gen9_exec_parse@allowed-all: - shard-glk: [PASS][21] -> [DMESG-WARN][22] ([i915#716]) [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-glk5/igt@gen9_exec_pa...@allowed-all.html [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16897/shard-glk9/igt@gen9_exec_pa...@allowed-all.html * igt@kms_draw_crc@draw-method-rgb565-pwrite-xtiled: - shard-glk: [PASS][23] -> [FAIL][24] ([i915#52] / [i915#54]) [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-glk9/igt@kms_draw_...@draw-method-rgb565-pwrite-xtiled.html [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16897/shard-glk6/igt@kms_draw_...@draw-method-rgb565-pwrite-xtiled.html * igt@kms_flip@flip-vs-expired-vblank: - shard-skl:
Re: [Intel-gfx] [RFC 08/12] drm/i915: Expose per-engine client busyness
Quoting Tvrtko Ursulin (2020-03-10 20:04:23) > > On 10/03/2020 18:32, Chris Wilson wrote: > > Quoting Tvrtko Ursulin (2020-03-09 18:31:25) > >> +static ssize_t > >> +show_client_busy(struct device *kdev, struct device_attribute *attr, char > >> *buf) > >> +{ > >> + struct i915_engine_busy_attribute *i915_attr = > >> + container_of(attr, typeof(*i915_attr), attr); > >> + unsigned int class = i915_attr->engine_class; > >> + struct i915_drm_client *client = i915_attr->client; > >> + u64 total = atomic64_read(&client->past_runtime[class]); > >> + struct list_head *list = &client->ctx_list; > >> + struct i915_gem_context *ctx; > >> + > >> + rcu_read_lock(); > >> + list_for_each_entry_rcu(ctx, list, client_link) { > >> + total += atomic64_read(&ctx->past_runtime[class]); > >> + total += pphwsp_busy_add(ctx, class); > >> + } > >> + rcu_read_unlock(); > >> + > >> + total *= RUNTIME_INFO(i915_attr->i915)->cs_timestamp_period_ns; > > > > Planning early retirement? In 600 years, they'll have forgotten how to > > email ;) > > Shruggety shrug. :) I am guessing you would prefer both internal > representations (sw and pphwsp runtimes) to be consistently in > nanoseconds? I thought why multiply at various places when once at the > readout time is enough. It's fine. I was just double checking overflow, and then remembered the end result is 64b nanoseconds. Keep the internal representation convenient for accumulation, and the conversion at the boundary. > And I should mention again how I am not sure at the moment how to meld > the two stats into one more "perfect" output. One of the things that crossed my mind was wondering if it was possible to throw in a pulse before reading the stats (if active etc). Usual dilemma with non-preemptible contexts, so probably not worth it as those hogs will remain hogs. And I worry about the disparity between sw busy and hw runtime. -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: Get rid of silly void* from MST code
From: Ville Syrjälä Not sure why this thing is trying to avoid declaring the proper type for these pointers. But since these are used only once let's just get rid of the local variable entirely. Signed-off-by: Ville Syrjälä --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index e08caca658c6..883ea11b9773 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -49,7 +49,6 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder, to_intel_connector(conn_state->connector); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; - void *port = connector->port; bool constant_n = drm_dp_has_quirk(&intel_dp->desc, 0, DP_DPCD_QUIRK_CONSTANT_N); int bpp, slots = -EINVAL; @@ -65,7 +64,8 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder, false); slots = drm_dp_atomic_find_vcpi_slots(state, &intel_dp->mst_mgr, - port, crtc_state->pbn, 0); + connector->port, + crtc_state->pbn, 0); if (slots == -EDEADLK) return slots; if (slots >= 0) @@ -147,7 +147,6 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder, to_intel_digital_connector_state(conn_state); const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; - void *port = connector->port; struct link_config_limits limits; int ret; @@ -159,7 +158,8 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder, if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO) pipe_config->has_audio = - drm_dp_mst_port_has_audio(&intel_dp->mst_mgr, port); + drm_dp_mst_port_has_audio(&intel_dp->mst_mgr, + connector->port); else pipe_config->has_audio = intel_conn_state->force_audio == HDMI_AUDIO_ON; -- 2.24.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v19 1/8] drm/i915: Start passing latency as parameter
On Tue, Mar 10, 2020 at 02:54:12PM +, Lisovskiy, Stanislav wrote: > On Tue, 2020-03-10 at 16:32 +0200, Ville Syrjälä wrote: > > On Mon, Mar 09, 2020 at 06:11:57PM +0200, Stanislav Lisovskiy wrote: > > > We need to start passing memory latency as a > > > parameter when calculating plane wm levels, > > > as latency can get changed in different > > > circumstances(for example with or without SAGV). > > > So we need to be more flexible on that matter. > > > > > > Reviewed-by: Ville Syrjälä > > > Signed-off-by: Stanislav Lisovskiy > > > --- > > > drivers/gpu/drm/i915/intel_pm.c | 12 > > > 1 file changed, 8 insertions(+), 4 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/i915/intel_pm.c > > > b/drivers/gpu/drm/i915/intel_pm.c > > > index 8375054ba27d..c7928c870b0a 100644 > > > --- a/drivers/gpu/drm/i915/intel_pm.c > > > +++ b/drivers/gpu/drm/i915/intel_pm.c > > > @@ -4016,6 +4016,7 @@ static int skl_compute_wm_params(const struct > > > intel_crtc_state *crtc_state, > > >int color_plane); > > > static void skl_compute_plane_wm(const struct intel_crtc_state > > > *crtc_state, > > >int level, > > > + u32 latency, > > > > So you didn't change the types? > > Yes, I saw your comment there - and looked into this, however I just > wondered, does it make any sense do to that. The reason is because > skl_latency is anyway defined as u16 in i915_drv.h, just as pri/spr/cur > latencies, so wonder how this "unsigned int" going to fit into this. > Should I maybe then change it to u16 - at least that would somehow > comply with the current declarations. It's u16 in the struct to not waste space. In the code it's just a number so a sized type doesn't make all that much sense. And I think most of the code uses int/unsigned int for it anyway. -- Ville Syrjälä Intel ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] ✓ Fi.CI.IGT: success for series starting with [v2,1/2] drm/i915/display: Deactive FBC in fastsets when disabled by parameter (rev2)
On Tue, 2020-03-10 at 18:13 +, Patchwork wrote: > == Series Details == > > Series: series starting with [v2,1/2] drm/i915/display: Deactive FBC > in fastsets when disabled by parameter (rev2) > URL : https://patchwork.freedesktop.org/series/74401/ > State : success > > == Summary == > > CI Bug Log - changes from CI_DRM_8106_full -> Patchwork_16894_full > > > Summary > --- > > **SUCCESS** > > No regressions found. Pushed to dinq, thanks for the reviews Ville > > > > Known issues > > > Here are the changes found in Patchwork_16894_full that come from > known issues: > > ### IGT changes ### > > Issues hit > > * igt@gem_busy@close-race: > - shard-tglb: [PASS][1] -> [INCOMPLETE][2] ([i915#977]) >[1]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-tglb7/igt@gem_b...@close-race.html >[2]: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16894/shard-tglb1/igt@gem_b...@close-race.html > > * igt@gem_ctx_persistence@close-replace-race: > - shard-tglb: [PASS][3] -> [INCOMPLETE][4] ([i915#1402]) >[3]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-tglb8/igt@gem_ctx_persiste...@close-replace-race.html >[4]: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16894/shard-tglb2/igt@gem_ctx_persiste...@close-replace-race.html > > * igt@gem_exec_schedule@pi-common-bsd: > - shard-iclb: [PASS][5] -> [SKIP][6] ([i915#677]) +1 > similar issue >[5]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-iclb5/igt@gem_exec_sched...@pi-common-bsd.html >[6]: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16894/shard-iclb1/igt@gem_exec_sched...@pi-common-bsd.html > > * igt@gem_exec_schedule@preempt-other-bsd: > - shard-iclb: [PASS][7] -> [SKIP][8] ([fdo#112146]) +2 > similar issues >[7]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-iclb8/igt@gem_exec_sched...@preempt-other-bsd.html >[8]: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16894/shard-iclb4/igt@gem_exec_sched...@preempt-other-bsd.html > > * igt@gem_exec_schedule@preempt-queue-bsd1: > - shard-iclb: [PASS][9] -> [SKIP][10] ([fdo#109276]) +13 > similar issues >[9]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-iclb1/igt@gem_exec_sched...@preempt-queue-bsd1.html >[10]: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16894/shard-iclb7/igt@gem_exec_sched...@preempt-queue-bsd1.html > > * igt@gen9_exec_parse@allowed-all: > - shard-glk: [PASS][11] -> [DMESG-WARN][12] ([i915#716]) >[11]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-glk5/igt@gen9_exec_pa...@allowed-all.html >[12]: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16894/shard-glk7/igt@gen9_exec_pa...@allowed-all.html > > * igt@i915_pm_dc@dc5-dpms: > - shard-iclb: [PASS][13] -> [FAIL][14] ([i915#447]) >[13]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-iclb8/igt@i915_pm...@dc5-dpms.html >[14]: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16894/shard-iclb3/igt@i915_pm...@dc5-dpms.html > > * igt@i915_pm_dc@dc6-psr: > - shard-iclb: [PASS][15] -> [FAIL][16] ([i915#454]) >[15]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-iclb8/igt@i915_pm...@dc6-psr.html >[16]: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16894/shard-iclb6/igt@i915_pm...@dc6-psr.html > > * igt@kms_cursor_crc@pipe-a-cursor-suspend: > - shard-kbl: [PASS][17] -> [DMESG-WARN][18] ([i915#180]) > +1 similar issue >[17]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-kbl7/igt@kms_cursor_...@pipe-a-cursor-suspend.html >[18]: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16894/shard-kbl1/igt@kms_cursor_...@pipe-a-cursor-suspend.html > > * igt@kms_cursor_legacy@2x-long-flip-vs-cursor-legacy: > - shard-glk: [PASS][19] -> [FAIL][20] ([i915#72]) >[19]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-glk7/igt@kms_cursor_leg...@2x-long-flip-vs-cursor-legacy.html >[20]: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16894/shard-glk8/igt@kms_cursor_leg...@2x-long-flip-vs-cursor-legacy.html > > * igt@kms_flip@plain-flip-ts-check: > - shard-glk: [PASS][21] -> [FAIL][22] ([i915#34]) >[21]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-glk1/igt@kms_f...@plain-flip-ts-check.html >[22]: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16894/shard-glk7/igt@kms_f...@plain-flip-ts-check.html > > * igt@kms_flip_tiling@flip-to-yf-tiled: > - shard-skl: [PASS][23] -> [FAIL][24] ([fdo#107931] / > [i915#167]) >[23]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-skl6/igt@kms_flip_til...@flip-to-yf-tiled.html >[24]: > https://intel-gfx-ci.01.org/tree/drm-tip/P
Re: [Intel-gfx] [PATCH 7/7] drm/i915/perf: add flushing ioctl
On 09/03/2020 21:51, Umesh Nerlige Ramappa wrote: On Wed, Mar 04, 2020 at 09:56:28PM -0800, Dixit, Ashutosh wrote: On Wed, 04 Mar 2020 00:52:34 -0800, Lionel Landwerlin wrote: On 04/03/2020 07:48, Dixit, Ashutosh wrote: > On Tue, 03 Mar 2020 14:19:05 -0800, Umesh Nerlige Ramappa wrote: >> From: Lionel Landwerlin >> >> With the currently available parameters for the i915-perf stream, >> there are still situations that are not well covered : >> >> If an application opens the stream with polling disable or at very low >> frequency and OA interrupt enabled, no data will be available even >> though somewhere between nothing and half of the OA buffer worth of >> data might have landed in memory. >> >> To solve this issue we have a new flush ioctl on the perf stream that >> forces the i915-perf driver to look at the state of the buffer when >> called and makes any data available through both poll() & read() type >> syscalls. >> >> v2: Version the ioctl (Joonas) >> v3: Rebase (Umesh) >> >> Signed-off-by: Lionel Landwerlin >> Signed-off-by: Umesh Nerlige Ramappa > [snip] > >> +/** >> + * i915_perf_flush_data - handle `I915_PERF_IOCTL_FLUSH_DATA` ioctl >> + * @stream: An enabled i915 perf stream >> + * >> + * The intention is to flush all the data available for reading from the OA >> + * buffer >> + */ >> +static void i915_perf_flush_data(struct i915_perf_stream *stream) >> +{ >> + stream->pollin = oa_buffer_check(stream, true); >> +} > Since this function doesn't actually wake up any thread (which anyway can > be done by sending a signal to the blocked thread), is the only purpose of > this function to update OA buffer head/tail? But in that it is not clear > why a separate ioctl should be created for this, can't the read() call > itself call oa_buffer_check() to update the OA buffer head/tail? > > Again just trying to minimize uapi changes if possible. Most applications will call read() after being notified by poll()/select() that some data is available. Correct this is the standard non blocking read behavior. Changing that behavior will break some of the existing perf tests . I am not suggesting changing that (that standard non blocking read behavior). If any data is available, this new ioctl will wake up existing waiters on poll()/select(). The issue is we are not calling wake_up() in the above function to wake up any blocked waiters. The ioctl will just update the OA buffer head/tail so that (a) a subsequent blocking read will not block, or (b) a subsequent non blocking read will return valid data (not -EAGAIN), or (c) a poll/select will not block but return immediately saying data is available. That is why it seems to me the ioctl is not required, updating the OA buffer head/tail can be done as part of the read() (and the poll/select) calls themselves. We will investigate if this can be done and update the patches in the next revision accordingly. Thanks! In this case, where we are trying to determine if there is any data in the oa buffer before the next interrupt has fired, user could call poll with a reasonable timeout to determine if data is available or not. That would eliminate the need for the flush ioctl. Thoughts? Thanks, Umesh I almost forgot why this would cause problem. Checking the state of the buffer every time you call poll() will pretty much guarantee you have at least one report to read every time. So that would lead to lot more wakeups :( The whole system has to stay "unidirectional" with either interrupts or timeout driving the wakeups. This additional ioctl is the only solution I could find to add one more input to the wakeup mechanism. -Lionel ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 6/7] drm/i915/perf: add interrupt enabling parameter
On 10/03/2020 22:08, Umesh Nerlige Ramappa wrote: On Tue, Mar 03, 2020 at 02:19:04PM -0800, Umesh Nerlige Ramappa wrote: From: Lionel Landwerlin This let's the application choose to be driven by the interrupt mechanism of the HW. In conjuction with long periods for checks for the availability of data on the CPU, this can reduce the CPU load when doing capture of OA data. v2: Version the new parameter (Joonas) v3: Rebase (Umesh) Signed-off-by: Lionel Landwerlin Signed-off-by: Umesh Nerlige Ramappa --- drivers/gpu/drm/i915/i915_perf.c | 58 +++- include/uapi/drm/i915_drm.h | 10 ++ 2 files changed, 53 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 502961da840d..ab41cba85b40 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -252,7 +252,7 @@ * oa_buffer_check(). * * Most of the implementation details for this workaround are in - * oa_buffer_check_unlocked() and _append_oa_reports() + * oa_buffer_check() and _append_oa_reports() * * Note for posterity: previously the driver used to define an effective tail * pointer that lagged the real pointer by a 'tail margin' measured in bytes @@ -447,8 +447,9 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) } /** - * oa_buffer_check_unlocked - check for data and update tail ptr state + * oa_buffer_check - check for data and update tail ptr state * @stream: i915 stream instance + * @lock: whether to take the oa_buffer spin lock * * This is either called via fops (for blocking reads in user ctx) or the poll * check hrtimer (atomic ctx) to check the OA buffer tail pointer and check @@ -470,8 +471,9 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) * * Returns: %true if the OA buffer contains data, else %false */ -static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) +static bool oa_buffer_check(struct i915_perf_stream *stream, bool lock) Hi Lionel, All callers seem to set the lock to true when calling oa_buffer_check(). Do you recall why the parameter was introduced? If not, we probably want to remove this change. Thanks, Umesh Err... Sorry, I don't remember. It's probably a leftover the initial iteration where I was trying to get the OA head/tail register from the interrupt. I guess you can drop that param and leave the function with the _unlocked prefix. Thanks, -Lionel ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✓ Fi.CI.IGT: success for series starting with [v5,1/2] drm/edid: Name the detailed monitor range flags
== Series Details == Series: series starting with [v5,1/2] drm/edid: Name the detailed monitor range flags URL : https://patchwork.freedesktop.org/series/74471/ State : success == Summary == CI Bug Log - changes from CI_DRM_8106_full -> Patchwork_16898_full Summary --- **SUCCESS** No regressions found. Known issues Here are the changes found in Patchwork_16898_full that come from known issues: ### IGT changes ### Issues hit * igt@gem_ctx_isolation@rcs0-s3: - shard-skl: [PASS][1] -> [INCOMPLETE][2] ([i915#69]) [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-skl1/igt@gem_ctx_isolat...@rcs0-s3.html [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16898/shard-skl5/igt@gem_ctx_isolat...@rcs0-s3.html * igt@gem_ctx_shared@exec-single-timeline-bsd: - shard-iclb: [PASS][3] -> [SKIP][4] ([fdo#110841]) [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-iclb8/igt@gem_ctx_sha...@exec-single-timeline-bsd.html [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16898/shard-iclb2/igt@gem_ctx_sha...@exec-single-timeline-bsd.html * igt@gem_exec_async@concurrent-writes-bsd: - shard-iclb: [PASS][5] -> [SKIP][6] ([fdo#112146]) +2 similar issues [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-iclb8/igt@gem_exec_as...@concurrent-writes-bsd.html [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16898/shard-iclb2/igt@gem_exec_as...@concurrent-writes-bsd.html * igt@gem_exec_create@madvise: - shard-glk: [PASS][7] -> [DMESG-WARN][8] ([i915#118] / [i915#95]) [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-glk6/igt@gem_exec_cre...@madvise.html [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16898/shard-glk2/igt@gem_exec_cre...@madvise.html * igt@gem_exec_schedule@pi-userfault-bsd: - shard-iclb: [PASS][9] -> [SKIP][10] ([i915#677]) [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-iclb8/igt@gem_exec_sched...@pi-userfault-bsd.html [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16898/shard-iclb2/igt@gem_exec_sched...@pi-userfault-bsd.html * igt@gem_exec_schedule@preempt-queue-bsd1: - shard-iclb: [PASS][11] -> [SKIP][12] ([fdo#109276]) +13 similar issues [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-iclb1/igt@gem_exec_sched...@preempt-queue-bsd1.html [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16898/shard-iclb8/igt@gem_exec_sched...@preempt-queue-bsd1.html * igt@gem_ppgtt@flink-and-close-vma-leak: - shard-kbl: [PASS][13] -> [FAIL][14] ([i915#644]) [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-kbl2/igt@gem_pp...@flink-and-close-vma-leak.html [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16898/shard-kbl7/igt@gem_pp...@flink-and-close-vma-leak.html * igt@gem_workarounds@suspend-resume-context: - shard-apl: [PASS][15] -> [DMESG-WARN][16] ([i915#180]) +2 similar issues [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-apl2/igt@gem_workarou...@suspend-resume-context.html [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16898/shard-apl1/igt@gem_workarou...@suspend-resume-context.html * igt@kms_flip@flip-vs-suspend: - shard-snb: [PASS][17] -> [DMESG-WARN][18] ([i915#42]) [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-snb6/igt@kms_f...@flip-vs-suspend.html [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16898/shard-snb4/igt@kms_f...@flip-vs-suspend.html * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-draw-pwrite: - shard-glk: [PASS][19] -> [FAIL][20] ([i915#49]) [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-glk8/igt@kms_frontbuffer_track...@fbc-2p-primscrn-spr-indfb-draw-pwrite.html [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16898/shard-glk6/igt@kms_frontbuffer_track...@fbc-2p-primscrn-spr-indfb-draw-pwrite.html * igt@kms_frontbuffer_tracking@fbc-suspend: - shard-kbl: [PASS][21] -> [DMESG-WARN][22] ([i915#180]) +8 similar issues [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-kbl4/igt@kms_frontbuffer_track...@fbc-suspend.html [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16898/shard-kbl6/igt@kms_frontbuffer_track...@fbc-suspend.html * igt@kms_plane_lowres@pipe-a-tiling-y: - shard-glk: [PASS][23] -> [FAIL][24] ([i915#899]) [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-glk9/igt@kms_plane_low...@pipe-a-tiling-y.html [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16898/shard-glk3/igt@kms_plane_low...@pipe-a-tiling-y.html * igt@kms_psr2_su@frontbuffer: - shard-iclb: [PASS][25] -> [SKIP][26] ([fdo#109642] / [fdo#111068]) [25]: https://i
[Intel-gfx] ✗ Fi.CI.IGT: failure for drm: Add support for integrated privacy screen
== Series Details == Series: drm: Add support for integrated privacy screen URL : https://patchwork.freedesktop.org/series/74473/ State : failure == Summary == CI Bug Log - changes from CI_DRM_8106_full -> Patchwork_16899_full Summary --- **FAILURE** Serious unknown changes coming with Patchwork_16899_full absolutely need to be verified manually. If you think the reported changes have nothing to do with the changes introduced in Patchwork_16899_full, please notify your bug team to allow them to document this new failure mode, which will reduce false positives in CI. Possible new issues --- Here are the unknown changes that may have been introduced in Patchwork_16899_full: ### IGT changes ### Possible regressions * igt@runner@aborted: - shard-tglb: NOTRUN -> ([FAIL][1], [FAIL][2], [FAIL][3], [FAIL][4], [FAIL][5], [FAIL][6], [FAIL][7], [FAIL][8], [FAIL][9], [FAIL][10], [FAIL][11], [FAIL][12], [FAIL][13], [FAIL][14], [FAIL][15], [FAIL][16], [FAIL][17], [FAIL][18], [FAIL][19], [FAIL][20], [FAIL][21], [FAIL][22], [FAIL][23], [FAIL][24], [FAIL][25]) [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb3/igt@run...@aborted.html [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb5/igt@run...@aborted.html [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb8/igt@run...@aborted.html [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb6/igt@run...@aborted.html [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb3/igt@run...@aborted.html [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb5/igt@run...@aborted.html [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb5/igt@run...@aborted.html [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb6/igt@run...@aborted.html [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb5/igt@run...@aborted.html [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb6/igt@run...@aborted.html [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb5/igt@run...@aborted.html [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb5/igt@run...@aborted.html [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb3/igt@run...@aborted.html [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb7/igt@run...@aborted.html [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb3/igt@run...@aborted.html [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb7/igt@run...@aborted.html [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb3/igt@run...@aborted.html [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb8/igt@run...@aborted.html [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb1/igt@run...@aborted.html [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb2/igt@run...@aborted.html [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb1/igt@run...@aborted.html [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb1/igt@run...@aborted.html [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb1/igt@run...@aborted.html [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb2/igt@run...@aborted.html [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-tglb2/igt@run...@aborted.html Known issues Here are the changes found in Patchwork_16899_full that come from known issues: ### IGT changes ### Issues hit * igt@gem_exec_whisper@basic-fds: - shard-glk: [PASS][26] -> [DMESG-WARN][27] ([i915#118] / [i915#95]) +2 similar issues [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-glk6/igt@gem_exec_whis...@basic-fds.html [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-glk8/igt@gem_exec_whis...@basic-fds.html * igt@gen9_exec_parse@allowed-all: - shard-glk: [PASS][28] -> [INCOMPLETE][29] ([i915#58] / [k.org#198133]) [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-glk5/igt@gen9_exec_pa...@allowed-all.html [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-glk4/igt@gen9_exec_pa...@allowed-all.html * igt@kms_fbcon_fbt@fbc-suspend: - shard-apl: [PASS][30] -> [DMESG-WARN][31] ([i915#180]) +2 similar issues [30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-apl1/igt@kms_fbcon_...@fbc-suspend.html [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16899/shard-apl6/igt@kms_fbcon_...@fbc-suspend.html * igt@kms_frontbu
Re: [Intel-gfx] [PATCH] drm/i915: Get rid of silly void* from MST code
On Tue, 2020-03-10 at 22:27 +0200, Ville Syrjala wrote: > From: Ville Syrjälä > > Not sure why this thing is trying to avoid declaring the proper > type for these pointers. But since these are used only once let's > just get rid of the local variable entirely. Reviewed-by: José Roberto de Souza > > Signed-off-by: Ville Syrjälä > --- > drivers/gpu/drm/i915/display/intel_dp_mst.c | 8 > 1 file changed, 4 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c > b/drivers/gpu/drm/i915/display/intel_dp_mst.c > index e08caca658c6..883ea11b9773 100644 > --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c > +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c > @@ -49,7 +49,6 @@ static int intel_dp_mst_compute_link_config(struct > intel_encoder *encoder, > to_intel_connector(conn_state->connector); > const struct drm_display_mode *adjusted_mode = > &crtc_state->hw.adjusted_mode; > - void *port = connector->port; > bool constant_n = drm_dp_has_quirk(&intel_dp->desc, 0, > DP_DPCD_QUIRK_CONSTANT_N); > int bpp, slots = -EINVAL; > @@ -65,7 +64,8 @@ static int intel_dp_mst_compute_link_config(struct > intel_encoder *encoder, > false); > > slots = drm_dp_atomic_find_vcpi_slots(state, &intel_dp- > >mst_mgr, > - port, crtc_state- > >pbn, 0); > + connector->port, > + crtc_state->pbn, > 0); > if (slots == -EDEADLK) > return slots; > if (slots >= 0) > @@ -147,7 +147,6 @@ static int intel_dp_mst_compute_config(struct > intel_encoder *encoder, > to_intel_digital_connector_state(conn_state); > const struct drm_display_mode *adjusted_mode = > &pipe_config->hw.adjusted_mode; > - void *port = connector->port; > struct link_config_limits limits; > int ret; > > @@ -159,7 +158,8 @@ static int intel_dp_mst_compute_config(struct > intel_encoder *encoder, > > if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO) > pipe_config->has_audio = > - drm_dp_mst_port_has_audio(&intel_dp->mst_mgr, > port); > + drm_dp_mst_port_has_audio(&intel_dp->mst_mgr, > + connector->port); > else > pipe_config->has_audio = > intel_conn_state->force_audio == HDMI_AUDIO_ON; ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC] GPU-bound energy efficiency improvements for the intel_pstate driver (v2).
This is my second take on improving the energy efficiency of the intel_pstate driver under IO-bound conditions. The problem and approach to solve it are roughly the same as in my previous series [1] at a high level: In IO-bound scenarios (by definition) the throughput of the system doesn't improve with increasing CPU frequency beyond the threshold value at which the IO device becomes the bottleneck, however with the current governors (whether HWP is in use or not) the CPU frequency tends to oscillate with the load, often with an amplitude far into the turbo range, leading to severely reduced energy efficiency, which is particularly problematic when a limited TDP budget is shared among a number of cores running some multithreaded workload, or among a CPU core and an integrated GPU. Improving the energy efficiency of the CPU improves the throughput of the system in such TDP-limited conditions. See [4] for some preliminary benchmark results from a Razer Blade Stealth 13 Late 2019/LY320 laptop with an Intel ICL processor and integrated graphics, including throughput results that range up to a ~15% improvement and performance-per-watt results up to a ~43% improvement (estimated via RAPL). Particularly the throughput results may vary substantially from one platform to another depending on the TDP budget and the balance of load between CPU and GPU. One of the main differences relative to my previous version is that the trade-off between energy efficiency and frequency ramp-up latency is now exposed to device drivers through a new PM QoS class [It would make sense to expose it to userspace too eventually but that's beyond the purpose of this series]. The new PM QoS class provides a latency target to CPUFREQ governors which gives them permission to filter out CPU frequency oscillations with a period significantly shorter than the specified target, whenever doing so leads to improved energy efficiency. This series takes advantage of the new PM QoS class from the i915 driver whenever the driver determines that the GPU has become a bottleneck for an extended period of time. At that point it places a PM QoS ramp-up latency target which causes CPUFREQ to limit the CPU to a reasonably energy-efficient frequency able to at least achieve the required amount of work in a time window approximately equal to the ramp-up latency target (since any longer-term energy efficiency optimization would potentially violate the latency target). This seems more effective than clamping the CPU frequency to a fixed value directly from various subsystems, since the CPU is a shared resource, so the frequency bound needs to consider the load and latency requirements of all independent workloads running on the same CPU core in order to avoid performance degradation in a multitasking, possibly virtualized environment. The main limitation of this PM QoS approach is that whenever multiple clients request different ramp-up latency targets, only the strictest (lowest latency) one will apply system-wide, potentially leading to suboptimal energy efficiency for the less latency-sensitive clients, (though it won't artificially limit the CPU throughput of the most latency-sensitive clients as a result of the PM QoS requests placed by less latency-sensitive ones). In order to address this limitation I'm working on a more complicated solution which integrates with the task scheduler in order to provide response latency control with process granularity (pretty much in the spirit of PELT). One of the alternatives Rafael and I were discussing was to expose that through a third cgroup clamp on top of the MIN and MAX utilization clamps, but I'm open to any other possibilities regarding what the interface should look like. Either way the current (scheduling-unaware) PM QoS-based interface should provide most of the benefit except in heavily multitasking environments. A branch with this series in testable form can be found here [2], based on linux-next from a few days ago. Another important difference with respect to my previous revision is that the present one targets HWP systems (though for the moment it's only enabled by default on ICL, even though that can be overridden through the kernel command line). I have WIP code that uses the same governor in order to provide a similar benefit on non-HWP systems (like my previous revision), which can be found in this branch for reference [3] -- I'm planning to finish that up and send it as follow-up to this series assuming people are happy with the overall approach. Thanks in advance for any review feed-back and test reports. [PATCH 01/10] PM: QoS: Add CPU_RESPONSE_FREQUENCY global PM QoS limit. [PATCH 02/10] drm/i915: Adjust PM QoS response frequency based on GPU load. [PATCH 03/10] OPTIONAL: drm/i915: Expose PM QoS control parameters via debugfs. [PATCH 04/10] Revert "cpufreq: intel_pstate: Drop ->update_util from pstate_funcs" [PATCH 05/10] cpufreq: intel_pstate: Implement VLP controller statisti
[Intel-gfx] [PATCH 08/10] cpufreq: intel_pstate: Enable VLP controller based on ACPI FADT profile and CPUID.
For the moment the VLP controller is only enabled on ICL platforms other than server FADT profiles in order to reduce the validation effort of the initial submission. It should work on any other processors that support HWP though (and soon enough on non-HWP too): In order to override the default behavior (e.g. to test on other platforms) the VLP controller can be forcefully enabled or disabled by passing "intel_pstate=vlp" or "intel_pstate=no_vlp" respectively in the kernel command line. v2: Handle HWP VLP controller. Signed-off-by: Francisco Jerez --- .../admin-guide/kernel-parameters.txt | 5 Documentation/admin-guide/pm/intel_pstate.rst | 7 ++ drivers/cpufreq/intel_pstate.c| 25 +-- 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 0c9894247015..9bc55fc2752e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1828,6 +1828,11 @@ per_cpu_perf_limits Allow per-logical-CPU P-State performance control limits using cpufreq sysfs interface + vlp + Force use of VLP P-state controller. Overrides selection + derived from ACPI FADT profile. + no_vlp + Prevent use of VLP P-state controller (see "vlp" parameter). intremap= [X86-64, Intel-IOMMU] on enable Interrupt Remapping (default) diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst index 67e414e34f37..da6b64812848 100644 --- a/Documentation/admin-guide/pm/intel_pstate.rst +++ b/Documentation/admin-guide/pm/intel_pstate.rst @@ -669,6 +669,13 @@ of them have to be prepended with the ``intel_pstate=`` prefix. Use per-logical-CPU P-State limits (see `Coordination of P-state Limits`_ for details). +``vlp`` + Force use of VLP P-state controller. Overrides selection derived + from ACPI FADT profile. + +``no_vlp`` + Prevent use of VLP P-state controller (see "vlp" parameter). + Diagnostics and Tuning == diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index a01eed40d897..050cc8f03c26 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3029,6 +3029,7 @@ static int intel_pstate_update_status(const char *buf, size_t size) static int no_load __initdata; static int no_hwp __initdata; +static int vlp __initdata = -1; static int hwp_only __initdata; static unsigned int force_load __initdata; @@ -3193,6 +3194,7 @@ static inline void intel_pstate_request_control_from_smm(void) {} #endif /* CONFIG_ACPI */ #define INTEL_PSTATE_HWP_BROADWELL 0x01 +#define INTEL_PSTATE_HWP_VLP 0x02 #define ICPU_HWP(model, hwp_mode) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_HWP, hwp_mode } @@ -3200,12 +3202,15 @@ static inline void intel_pstate_request_control_from_smm(void) {} static const struct x86_cpu_id hwp_support_ids[] __initconst = { ICPU_HWP(INTEL_FAM6_BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL), ICPU_HWP(INTEL_FAM6_BROADWELL_D, INTEL_PSTATE_HWP_BROADWELL), + ICPU_HWP(INTEL_FAM6_ICELAKE, INTEL_PSTATE_HWP_VLP), + ICPU_HWP(INTEL_FAM6_ICELAKE_L, INTEL_PSTATE_HWP_VLP), ICPU_HWP(X86_MODEL_ANY, 0), {} }; static int __init intel_pstate_init(void) { + bool use_vlp = vlp == 1; const struct x86_cpu_id *id; int rc; @@ -3222,8 +3227,19 @@ static int __init intel_pstate_init(void) pstate_funcs.update_util = intel_pstate_update_util; } else { hwp_active++; - pstate_funcs.update_util = intel_pstate_update_util_hwp; - hwp_mode_bdw = id->driver_data; + + if (vlp < 0 && !intel_pstate_acpi_pm_profile_server() && + (id->driver_data & INTEL_PSTATE_HWP_VLP)) { + /* Enable VLP controller by default. */ + use_vlp = true; + } + + pstate_funcs.update_util = use_vlp ? + intel_pstate_update_util_hwp_vlp : + intel_pstate_update_util_hwp; + + hwp_mode_bdw = (id->driver_data & + INTEL_PSTATE_HWP_BROADWELL); intel_pstate.attr = hwp_cpufreq_attrs; goto hwp_cpu_matched; } @@ -3301,6 +3317,11 @@ static int __init intel_pstate_setup(char *str) if (!strcmp(str, "per_cpu_perf_limits")) per_cpu_limits = true;
[Intel-gfx] [PATCH 10/10] OPTIONAL: cpufreq: intel_pstate: Expose VLP controller parameters via debugfs.
This is not required for the controller to work but has proven very useful for debugging and testing of alternative heuristic parameters, which may offer a better trade-off between energy efficiency and latency. A warning is printed out which should taint the kernel for the non-standard calibration of the heuristic to be obvious in bug reports. v2: Use DEFINE_DEBUGFS_ATTRIBUTE rather than DEFINE_SIMPLE_ATTRIBUTE for debugfs files (Julia). Add realtime statistic threshold and averaging frequency parameters. Signed-off-by: Francisco Jerez Signed-off-by: Fengguang Wu Signed-off-by: Julia Lawall --- drivers/cpufreq/intel_pstate.c | 92 ++ 1 file changed, 92 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index c4558a131660..ab893a211746 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1030,6 +1030,94 @@ static void intel_pstate_update_limits(unsigned int cpu) mutex_unlock(&intel_pstate_driver_lock); } +/** debugfs begin / +static void intel_pstate_reset_vlp(struct cpudata *cpu); + +static int vlp_param_set(void *data, u64 val) +{ + unsigned int cpu; + + *(u32 *)data = val; + for_each_possible_cpu(cpu) { + if (all_cpu_data[cpu]) + intel_pstate_reset_vlp(all_cpu_data[cpu]); + } + + WARN_ONCE(1, "Unsupported P-state VLP parameter update via debugging interface"); + + return 0; +} + +static int vlp_param_get(void *data, u64 *val) +{ + *val = *(u32 *)data; + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(fops_vlp_param, vlp_param_get, vlp_param_set, +"%llu\n"); + +static struct dentry *debugfs_parent; + +struct vlp_param { + char *name; + void *value; + struct dentry *dentry; +}; + +static struct vlp_param vlp_files[] = { + {"vlp_sample_interval_ms", &vlp_params.sample_interval_ms, }, + {"vlp_setpoint_0_pml", &vlp_params.setpoint_0_pml, }, + {"vlp_setpoint_aggr_pml", &vlp_params.setpoint_aggr_pml, }, + {"vlp_avg_hz", &vlp_params.avg_hz, }, + {"vlp_realtime_gain_pml", &vlp_params.realtime_gain_pml, }, + {"vlp_debug", &vlp_params.debug, }, + {NULL, NULL, } +}; + +static void intel_pstate_update_util_hwp_vlp(struct update_util_data *data, +u64 time, unsigned int flags); + +static void intel_pstate_debug_expose_params(void) +{ + int i; + + if (pstate_funcs.update_util != intel_pstate_update_util_hwp_vlp) + return; + + debugfs_parent = debugfs_create_dir("pstate_snb", NULL); + if (IS_ERR_OR_NULL(debugfs_parent)) + return; + + for (i = 0; vlp_files[i].name; i++) { + struct dentry *dentry; + + dentry = debugfs_create_file_unsafe(vlp_files[i].name, 0660, + debugfs_parent, + vlp_files[i].value, + &fops_vlp_param); + if (!IS_ERR(dentry)) + vlp_files[i].dentry = dentry; + } +} + +static void intel_pstate_debug_hide_params(void) +{ + int i; + + if (IS_ERR_OR_NULL(debugfs_parent)) + return; + + for (i = 0; vlp_files[i].name; i++) { + debugfs_remove(vlp_files[i].dentry); + vlp_files[i].dentry = NULL; + } + + debugfs_remove(debugfs_parent); + debugfs_parent = NULL; +} + +/** debugfs end / + /** sysfs begin / #define show_one(file_name, object)\ static ssize_t show_##file_name \ @@ -2970,6 +3058,8 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver) global.min_perf_pct = min_perf_pct_min(); + intel_pstate_debug_expose_params(); + return 0; } @@ -2978,6 +3068,8 @@ static int intel_pstate_unregister_driver(void) if (hwp_active) return -EBUSY; + intel_pstate_debug_hide_params(); + cpufreq_unregister_driver(intel_pstate_driver); intel_pstate_driver_cleanup(); -- 2.22.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 03/10] OPTIONAL: drm/i915: Expose PM QoS control parameters via debugfs.
Signed-off-by: Francisco Jerez --- drivers/gpu/drm/i915/i915_debugfs.c | 69 + 1 file changed, 69 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 8f2525e4ce0f..e5c27b9302d9 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1745,6 +1745,72 @@ static const struct file_operations i915_guc_log_relay_fops = { .release = i915_guc_log_relay_release, }; +static int +i915_rf_qos_delay_max_ns_set(void *data, u64 val) +{ + struct drm_i915_private *dev_priv = data; + + WRITE_ONCE(dev_priv->gt.rf_qos.delay_max_ns, val); + return 0; +} + +static int +i915_rf_qos_delay_max_ns_get(void *data, u64 *val) +{ + struct drm_i915_private *dev_priv = data; + + *val = READ_ONCE(dev_priv->gt.rf_qos.delay_max_ns); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_rf_qos_delay_max_ns_fops, + i915_rf_qos_delay_max_ns_get, + i915_rf_qos_delay_max_ns_set, "%llu\n"); + +static int +i915_rf_qos_delay_slope_shift_set(void *data, u64 val) +{ + struct drm_i915_private *dev_priv = data; + + WRITE_ONCE(dev_priv->gt.rf_qos.delay_slope_shift, val); + return 0; +} + +static int +i915_rf_qos_delay_slope_shift_get(void *data, u64 *val) +{ + struct drm_i915_private *dev_priv = data; + + *val = READ_ONCE(dev_priv->gt.rf_qos.delay_slope_shift); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_rf_qos_delay_slope_shift_fops, + i915_rf_qos_delay_slope_shift_get, + i915_rf_qos_delay_slope_shift_set, "%llu\n"); + +static int +i915_rf_qos_target_hz_set(void *data, u64 val) +{ + struct drm_i915_private *dev_priv = data; + + WRITE_ONCE(dev_priv->gt.rf_qos.target_hz, val); + return 0; +} + +static int +i915_rf_qos_target_hz_get(void *data, u64 *val) +{ + struct drm_i915_private *dev_priv = data; + + *val = READ_ONCE(dev_priv->gt.rf_qos.target_hz); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_rf_qos_target_hz_fops, + i915_rf_qos_target_hz_get, + i915_rf_qos_target_hz_set, "%llu\n"); + static int i915_runtime_pm_status(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -2390,6 +2456,9 @@ static const struct i915_debugfs_files { #endif {"i915_guc_log_level", &i915_guc_log_level_fops}, {"i915_guc_log_relay", &i915_guc_log_relay_fops}, + {"i915_rf_qos_delay_max_ns", &i915_rf_qos_delay_max_ns_fops}, + {"i915_rf_qos_delay_slope_shift", &i915_rf_qos_delay_slope_shift_fops}, + {"i915_rf_qos_target_hz", &i915_rf_qos_target_hz_fops} }; int i915_debugfs_register(struct drm_i915_private *dev_priv) -- 2.22.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 07/10] cpufreq: intel_pstate: Implement VLP controller for HWP parts.
This implements a simple variably low-pass-filtering governor in control of the HWP MIN/MAX PERF range based on the previously introduced get_vlp_target_range(). See "cpufreq: intel_pstate: Implement VLP controller target P-state range estimation." for the rationale. Signed-off-by: Francisco Jerez --- drivers/cpufreq/intel_pstate.c | 79 +- 1 file changed, 77 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index cecadfec8bc1..a01eed40d897 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1905,6 +1905,20 @@ static void intel_pstate_reset_vlp(struct cpudata *cpu) vlp->gain = max(1, div_fp(1000, vlp_params.setpoint_0_pml)); vlp->target.p_base = 0; vlp->stats.last_response_frequency_hz = vlp_params.avg_hz; + + if (hwp_active) { + const uint32_t p0 = max(cpu->pstate.min_pstate, + cpu->min_perf_ratio); + const uint32_t p1 = max_t(uint32_t, p0, cpu->max_perf_ratio); + const uint64_t hwp_req = (READ_ONCE(cpu->hwp_req_cached) & + ~(HWP_MAX_PERF(~0L) | + HWP_MIN_PERF(~0L) | + HWP_DESIRED_PERF(~0L))) | +HWP_MIN_PERF(p0) | HWP_MAX_PERF(p1); + + wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, hwp_req); + cpu->hwp_req_cached = hwp_req; + } } /** @@ -,6 +2236,46 @@ static void intel_pstate_adjust_pstate(struct cpudata *cpu) fp_toint(cpu->iowait_boost * 100)); } +static void intel_pstate_adjust_pstate_range(struct cpudata *cpu, +const unsigned int range[]) +{ + const int from = cpu->hwp_req_cached; + unsigned int p0, p1, p_min, p_max; + struct sample *sample; + uint64_t hwp_req; + + update_turbo_state(); + + p0 = max(cpu->pstate.min_pstate, cpu->min_perf_ratio); + p1 = max_t(unsigned int, p0, cpu->max_perf_ratio); + p_min = clamp_t(unsigned int, range[0], p0, p1); + p_max = clamp_t(unsigned int, range[1], p0, p1); + + trace_cpu_frequency(p_max * cpu->pstate.scaling, cpu->cpu); + + hwp_req = (READ_ONCE(cpu->hwp_req_cached) & + ~(HWP_MAX_PERF(~0L) | HWP_MIN_PERF(~0L) | +HWP_DESIRED_PERF(~0L))) | + HWP_MIN_PERF(vlp_params.debug & 2 ? p0 : p_min) | + HWP_MAX_PERF(vlp_params.debug & 4 ? p1 : p_max); + + if (hwp_req != cpu->hwp_req_cached) { + wrmsrl(MSR_HWP_REQUEST, hwp_req); + cpu->hwp_req_cached = hwp_req; + } + + sample = &cpu->sample; + trace_pstate_sample(mul_ext_fp(100, sample->core_avg_perf), + fp_toint(sample->busy_scaled), + from, + hwp_req, + sample->mperf, + sample->aperf, + sample->tsc, + get_avg_frequency(cpu), + fp_toint(cpu->iowait_boost * 100)); +} + static void intel_pstate_update_util(struct update_util_data *data, u64 time, unsigned int flags) { @@ -2260,6 +2314,22 @@ static void intel_pstate_update_util(struct update_util_data *data, u64 time, intel_pstate_adjust_pstate(cpu); } +/** + * Implementation of the cpufreq update_util hook based on the VLP + * controller (see get_vlp_target_range()). + */ +static void intel_pstate_update_util_hwp_vlp(struct update_util_data *data, +u64 time, unsigned int flags) +{ + struct cpudata *cpu = container_of(data, struct cpudata, update_util); + + if (update_vlp_sample(cpu, time, flags)) { + const struct vlp_target_range *target = + get_vlp_target_range(cpu); + intel_pstate_adjust_pstate_range(cpu, target->value); + } +} + static struct pstate_funcs core_funcs = { .get_max = core_get_max_pstate, .get_max_physical = core_get_max_pstate_physical, @@ -2389,6 +2459,9 @@ static int intel_pstate_init_cpu(unsigned int cpunum) intel_pstate_get_cpu_pstates(cpu); + if (pstate_funcs.update_util == intel_pstate_update_util_hwp_vlp) + intel_pstate_reset_vlp(cpu); + pr_debug("controlling: cpu %d\n", cpunum); return 0; @@ -2398,7 +2471,8 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num) { struct cpudata *cpu = all_cpu_data[cpu_num]; - if (hwp_active && !hwp_boost) + if (hwp_active && !hwp_boost && + pstate_funcs.update_util != intel_pstate_update_util_hwp_vlp) return; if (cpu->update_util_set
[Intel-gfx] [PATCH 01/10] PM: QoS: Add CPU_RESPONSE_FREQUENCY global PM QoS limit.
The purpose of this PM QoS limit is to give device drivers additional control over the latency/energy efficiency trade-off made by the PM subsystem (particularly the CPUFREQ governor). It allows device drivers to set a lower bound on the response latency of PM (defined as the time it takes from wake-up to the CPU reaching a certain steady-state level of performance [e.g. the nominal frequency] in response to a step-function load). It reports to PM the minimum ramp-up latency considered of use to the application, and explicitly requests PM to filter out oscillations faster than the specified frequency. It is somewhat complementary to the current CPU_DMA_LATENCY PM QoS class which can be understood as specifying an upper latency bound on the CPU wake-up time, instead of a lower bound on the CPU frequency ramp-up time. Note that even though this provides a latency constraint it's represented as its reciprocal in Hz units for computational efficiency (since it would take a 64-bit division to compute the number of cycles elapsed from a time increment in nanoseconds and a time bound, while a frequency can simply be multiplied with the time increment). This implements a MAX constraint so that the strictest (highest response frequency) request is honored. This means that PM won't provide any guarantee that frequencies greater than the specified bound will be filtered, since that might be incompatible with the constraints specified by another more latency-sensitive application (A more fine-grained result could be achieved with a scheduling-based interface). The default value needs to be equal to zero (best effort) for it to behave as identity of the MAX operation. Signed-off-by: Francisco Jerez --- include/linux/pm_qos.h | 9 +++ include/trace/events/power.h | 33 kernel/power/qos.c | 141 ++- 3 files changed, 165 insertions(+), 18 deletions(-) diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 4a69d4af3ff8..b522e2194c05 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -28,6 +28,7 @@ enum pm_qos_flags_status { #define PM_QOS_LATENCY_ANY_NS ((s64)PM_QOS_LATENCY_ANY * NSEC_PER_USEC) #define PM_QOS_CPU_LATENCY_DEFAULT_VALUE (2000 * USEC_PER_SEC) +#define PM_QOS_CPU_RESPONSE_FREQUENCY_DEFAULT_VALUE 0 #define PM_QOS_RESUME_LATENCY_DEFAULT_VALUEPM_QOS_LATENCY_ANY #define PM_QOS_RESUME_LATENCY_NO_CONSTRAINTPM_QOS_LATENCY_ANY #define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS PM_QOS_LATENCY_ANY_NS @@ -162,6 +163,14 @@ static inline void cpu_latency_qos_update_request(struct pm_qos_request *req, static inline void cpu_latency_qos_remove_request(struct pm_qos_request *req) {} #endif +s32 cpu_response_frequency_qos_limit(void); +bool cpu_response_frequency_qos_request_active(struct pm_qos_request *req); +void cpu_response_frequency_qos_add_request(struct pm_qos_request *req, + s32 value); +void cpu_response_frequency_qos_update_request(struct pm_qos_request *req, + s32 new_value); +void cpu_response_frequency_qos_remove_request(struct pm_qos_request *req); + #ifdef CONFIG_PM enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask); enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev, s32 mask); diff --git a/include/trace/events/power.h b/include/trace/events/power.h index af5018aa9517..7e4b52e8ca3a 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -359,45 +359,48 @@ DEFINE_EVENT(power_domain, power_domain_target, ); /* - * CPU latency QoS events used for global CPU latency QoS list updates + * CPU latency/response frequency QoS events used for global CPU PM + * QoS list updates. */ -DECLARE_EVENT_CLASS(cpu_latency_qos_request, +DECLARE_EVENT_CLASS(pm_qos_request, - TP_PROTO(s32 value), + TP_PROTO(const char *name, s32 value), - TP_ARGS(value), + TP_ARGS(name, value), TP_STRUCT__entry( + __string(name, name ) __field( s32,value ) ), TP_fast_assign( + __assign_str(name, name); __entry->value = value; ), - TP_printk("CPU_DMA_LATENCY value=%d", - __entry->value) + TP_printk("pm_qos_class=%s value=%d", + __get_str(name), __entry->value) ); -DEFINE_EVENT(cpu_latency_qos_request, pm_qos_add_request, +DEFINE_EVENT(pm_qos_request, pm_qos_add_request, - TP_PROTO(s32 value), + TP_PROTO(const char *name, s32 value), - TP_ARGS(value) + TP_ARGS(name, value) ); -DEFINE_EVENT(cpu_latency_qos_request, pm_qos_update_request, +DEFINE_EVENT(pm_qos_request, pm_qos_update_request, - TP_PROTO(s32 value), + TP_PROTO(const char *name, s32 value), - TP_ARGS(value) + TP_ARGS(name, value) );
[Intel-gfx] [PATCH 02/10] drm/i915: Adjust PM QoS response frequency based on GPU load.
This allows CPUFREQ governors to realize when the system becomes non-CPU-bound due to GPU rendering activity, and cause them to respond more conservatively to the workload by limiting their response frequency: CPU energy usage will be reduced when there isn't a good chance for system performance to scale with CPU frequency due to the GPU bottleneck. This leaves additional TDP budget available for the GPU to reach higher frequencies, which is translated into an improvement in graphics performance to the extent that the workload remains TDP-limited (Most non-trivial graphics benchmarks out there improve significantly in the TDP-constrained platforms where this is currently enabled, see the cover letter for some numbers). If the workload isn't (anymore) TDP-limited performance should stay roughly constant, but energy usage will be divided by a similar factor. Signed-off-by: Francisco Jerez --- drivers/gpu/drm/i915/gt/intel_engine_cs.c| 1 + drivers/gpu/drm/i915/gt/intel_engine_types.h | 7 ++ drivers/gpu/drm/i915/gt/intel_gt_pm.c| 107 +++ drivers/gpu/drm/i915/gt/intel_gt_pm.h| 3 + drivers/gpu/drm/i915/gt/intel_gt_types.h | 12 +++ drivers/gpu/drm/i915/gt/intel_lrc.c | 14 +++ 6 files changed, 144 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 53ac3f00909a..16ebdfa1dfc9 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -504,6 +504,7 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine) execlists->queue_priority_hint = INT_MIN; execlists->queue = RB_ROOT_CACHED; + atomic_set(&execlists->overload, 0); } static void cleanup_status_page(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 80cdde712842..1b17b2f0c7a3 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -266,6 +266,13 @@ struct intel_engine_execlists { */ u8 csb_head; + /** +* @overload: whether at least two execlist ports are +* currently submitted to the hardware, indicating that CPU +* latency isn't critical in order to maintain the GPU busy. +*/ + atomic_t overload; + I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;) }; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 8b653c0f5e5f..f1f859e89a8f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -107,6 +107,102 @@ void intel_gt_pm_init_early(struct intel_gt *gt) intel_wakeref_init(>->wakeref, gt->uncore->rpm, &wf_ops); } +/** + * Time increment until the most immediate PM QoS response frequency + * update. + * + * May be in the future (return value > 0) if the GPU is currently + * active but we haven't updated the PM QoS request to reflect a + * bottleneck yet. May be in the past (return value < 0) if the GPU + * isn't fully utilized and we've already reset the PM QoS request to + * the default value. May be zero if a PM QoS request update is due. + * + * The time increment returned by this function decreases linearly + * with time until it reaches either zero or a configurable limit. + */ +static int32_t time_to_rf_qos_update_ns(struct intel_gt *gt) +{ + const uint64_t t1 = ktime_get_ns(); + const uint64_t dt1 = gt->rf_qos.delay_max_ns; + + if (atomic_read_acquire(>->rf_qos.active_count)) { + const uint64_t t0 = atomic64_read(>->rf_qos.time_set_ns); + + return min(dt1, t0 <= t1 ? 0 : t0 - t1); + } else { + const uint64_t t0 = atomic64_read(>->rf_qos.time_clear_ns); + const unsigned int shift = gt->rf_qos.delay_slope_shift; + + return -(int32_t)(t1 <= t0 ? 1 : + min(dt1, (t1 - t0) << shift)); + } +} + +/** + * Perform a delayed PM QoS response frequency update. + */ +static void intel_gt_rf_qos_update(struct intel_gt *gt) +{ + const uint32_t dt = max(0, time_to_rf_qos_update_ns(gt)); + + timer_reduce(>->rf_qos.timer, jiffies + nsecs_to_jiffies(dt)); +} + +/** + * Timer that fires once the delay used to switch the PM QoS response + * frequency request has elapsed. + */ +static void intel_gt_rf_qos_timeout(struct timer_list *timer) +{ + struct intel_gt *gt = container_of(timer, struct intel_gt, + rf_qos.timer); + const int32_t dt = time_to_rf_qos_update_ns(gt); + + if (dt == 0) + cpu_response_frequency_qos_update_request( + >->rf_qos.req, gt->rf_qos.target_hz); + else + cpu_response_frequency_qos_update_request( + >->rf_qos.req, PM_QOS_DEFAULT_VALUE); + +
[Intel-gfx] [PATCH 04/10] Revert "cpufreq: intel_pstate: Drop ->update_util from pstate_funcs"
This reverts commit c4f3f70cacba2fa19545389a12d09b606d2ad1cf. A future commit will introduce a new update_util implementation, so the pstate_funcs table entry is going to be useful. Signed-off-by: Francisco Jerez --- drivers/cpufreq/intel_pstate.c | 17 + 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 7fa869004cf0..8cb5bf419b40 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -277,6 +277,7 @@ static struct cpudata **all_cpu_data; * @get_scaling: Callback to get frequency scaling factor * @get_val: Callback to convert P state to actual MSR write value * @get_vid: Callback to get VID data for Atom platforms + * @update_util: Active mode utilization update callback. * * Core and Atom CPU models have different way to get P State limits. This * structure is used to store those callbacks. @@ -290,6 +291,8 @@ struct pstate_funcs { int (*get_aperf_mperf_shift)(void); u64 (*get_val)(struct cpudata*, int pstate); void (*get_vid)(struct cpudata *); + void (*update_util)(struct update_util_data *data, u64 time, + unsigned int flags); }; static struct pstate_funcs pstate_funcs __read_mostly; @@ -1877,6 +1880,7 @@ static struct pstate_funcs core_funcs = { .get_turbo = core_get_turbo_pstate, .get_scaling = core_get_scaling, .get_val = core_get_val, + .update_util = intel_pstate_update_util, }; static const struct pstate_funcs silvermont_funcs = { @@ -1887,6 +1891,7 @@ static const struct pstate_funcs silvermont_funcs = { .get_val = atom_get_val, .get_scaling = silvermont_get_scaling, .get_vid = atom_get_vid, + .update_util = intel_pstate_update_util, }; static const struct pstate_funcs airmont_funcs = { @@ -1897,6 +1902,7 @@ static const struct pstate_funcs airmont_funcs = { .get_val = atom_get_val, .get_scaling = airmont_get_scaling, .get_vid = atom_get_vid, + .update_util = intel_pstate_update_util, }; static const struct pstate_funcs knl_funcs = { @@ -1907,6 +1913,7 @@ static const struct pstate_funcs knl_funcs = { .get_aperf_mperf_shift = knl_get_aperf_mperf_shift, .get_scaling = core_get_scaling, .get_val = core_get_val, + .update_util = intel_pstate_update_util, }; #define ICPU(model, policy) \ @@ -2013,9 +2020,7 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num) /* Prevent intel_pstate_update_util() from using stale data. */ cpu->sample.time = 0; cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, -(hwp_active ? - intel_pstate_update_util_hwp : - intel_pstate_update_util)); +pstate_funcs.update_util); cpu->update_util_set = true; } @@ -2584,6 +2589,7 @@ static void __init copy_cpu_funcs(struct pstate_funcs *funcs) pstate_funcs.get_scaling = funcs->get_scaling; pstate_funcs.get_val = funcs->get_val; pstate_funcs.get_vid = funcs->get_vid; + pstate_funcs.update_util = funcs->update_util; pstate_funcs.get_aperf_mperf_shift = funcs->get_aperf_mperf_shift; } @@ -2750,8 +2756,11 @@ static int __init intel_pstate_init(void) id = x86_match_cpu(hwp_support_ids); if (id) { copy_cpu_funcs(&core_funcs); - if (!no_hwp) { + if (no_hwp) { + pstate_funcs.update_util = intel_pstate_update_util; + } else { hwp_active++; + pstate_funcs.update_util = intel_pstate_update_util_hwp; hwp_mode_bdw = id->driver_data; intel_pstate.attr = hwp_cpufreq_attrs; goto hwp_cpu_matched; -- 2.22.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 09/10] OPTIONAL: cpufreq: intel_pstate: Add tracing of VLP controller status.
Signed-off-by: Francisco Jerez --- drivers/cpufreq/intel_pstate.c | 9 ++--- include/trace/events/power.h | 13 + 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 050cc8f03c26..c4558a131660 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2233,7 +2233,8 @@ static void intel_pstate_adjust_pstate(struct cpudata *cpu) sample->aperf, sample->tsc, get_avg_frequency(cpu), - fp_toint(cpu->iowait_boost * 100)); + fp_toint(cpu->iowait_boost * 100), + cpu->vlp.status.value); } static void intel_pstate_adjust_pstate_range(struct cpudata *cpu, @@ -2273,7 +2274,8 @@ static void intel_pstate_adjust_pstate_range(struct cpudata *cpu, sample->aperf, sample->tsc, get_avg_frequency(cpu), - fp_toint(cpu->iowait_boost * 100)); + fp_toint(cpu->iowait_boost * 100), + cpu->vlp.status.value); } static void intel_pstate_update_util(struct update_util_data *data, u64 time, @@ -2782,7 +2784,8 @@ static void intel_cpufreq_trace(struct cpudata *cpu, unsigned int trace_type, in sample->aperf, sample->tsc, get_avg_frequency(cpu), - fp_toint(cpu->iowait_boost * 100)); + fp_toint(cpu->iowait_boost * 100), + 0); } static int intel_cpufreq_target(struct cpufreq_policy *policy, diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 7e4b52e8ca3a..e94d5e618175 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -72,7 +72,8 @@ TRACE_EVENT(pstate_sample, u64 aperf, u64 tsc, u32 freq, - u32 io_boost + u32 io_boost, + u32 vlp_status ), TP_ARGS(core_busy, @@ -83,7 +84,8 @@ TRACE_EVENT(pstate_sample, aperf, tsc, freq, - io_boost + io_boost, + vlp_status ), TP_STRUCT__entry( @@ -96,6 +98,7 @@ TRACE_EVENT(pstate_sample, __field(u64, tsc) __field(u32, freq) __field(u32, io_boost) + __field(u32, vlp_status) ), TP_fast_assign( @@ -108,9 +111,10 @@ TRACE_EVENT(pstate_sample, __entry->tsc = tsc; __entry->freq = freq; __entry->io_boost = io_boost; + __entry->vlp_status = vlp_status; ), - TP_printk("core_busy=%lu scaled=%lu from=%lu to=%lu mperf=%llu aperf=%llu tsc=%llu freq=%lu io_boost=%lu", + TP_printk("core_busy=%lu scaled=%lu from=%lu to=%lu mperf=%llu aperf=%llu tsc=%llu freq=%lu io_boost=%lu vlp=%lu", (unsigned long)__entry->core_busy, (unsigned long)__entry->scaled_busy, (unsigned long)__entry->from, @@ -119,7 +123,8 @@ TRACE_EVENT(pstate_sample, (unsigned long long)__entry->aperf, (unsigned long long)__entry->tsc, (unsigned long)__entry->freq, - (unsigned long)__entry->io_boost + (unsigned long)__entry->io_boost, + (unsigned long)__entry->vlp_status ) ); -- 2.22.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 06/10] cpufreq: intel_pstate: Implement VLP controller target P-state range estimation.
The function introduced here calculates a P-state range derived from the statistics computed in the previous patch which will be used to drive the HWP P-state range or (if HWP is not available) as basis for some additional kernel-side frequency selection mechanism which will choose a single P-state from the range. This is meant to provide a variably low-pass filtering effect that will damp oscillations below a frequency threshold that can be specified by device drivers via PM QoS in order to achieve energy-efficient behavior in cases where the system has an IO bottleneck. Signed-off-by: Francisco Jerez --- drivers/cpufreq/intel_pstate.c | 157 + 1 file changed, 157 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 12ee350db2a9..cecadfec8bc1 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -207,17 +207,34 @@ struct vlp_status_sample { int32_t realtime_avg; }; +/** + * VLP controller state used for the estimation of the target P-state + * range, computed by get_vlp_target_range() from the heuristic status + * information defined above in struct vlp_status_sample. + */ +struct vlp_target_range { + unsigned int value[2]; + int32_t p_base; +}; + /** * struct vlp_data - VLP controller parameters and state. * @sample_interval_ns: Update interval in ns. * @sample_frequency_hz: Reciprocal of the update interval in Hz. + * @gain*: Response factor of the controller relative to each + * one of its linear input variables as fixed-point + * fraction. */ struct vlp_data { s64 sample_interval_ns; int32_t sample_frequency_hz; + int32_t gain_aggr; + int32_t gain_rt; + int32_t gain; struct vlp_input_stats stats; struct vlp_status_sample status; + struct vlp_target_range target; }; /** @@ -323,12 +340,18 @@ static struct cpudata **all_cpu_data; /** * struct vlp_params - VLP controller static configuration * @sample_interval_ms: Update interval in ms. + * @setpoint_*_pml: Target CPU utilization at which the controller is + * expected to leave the current P-state untouched, + * as an integer per mille. * @avg*_hz:Exponential averaging frequencies of the various * low-pass filters as an integer in Hz. */ struct vlp_params { int sample_interval_ms; + int setpoint_0_pml; + int setpoint_aggr_pml; int avg_hz; + int realtime_gain_pml; int debug; }; @@ -362,7 +385,10 @@ struct pstate_funcs { static struct pstate_funcs pstate_funcs __read_mostly; static struct vlp_params vlp_params __read_mostly = { .sample_interval_ms = 10, + .setpoint_0_pml = 900, + .setpoint_aggr_pml = 1500, .avg_hz = 2, + .realtime_gain_pml = 12000, .debug = 0, }; @@ -1873,6 +1899,11 @@ static void intel_pstate_reset_vlp(struct cpudata *cpu) vlp->sample_interval_ns = vlp_params.sample_interval_ms * NSEC_PER_MSEC; vlp->sample_frequency_hz = max(1u, (uint32_t)MSEC_PER_SEC / vlp_params.sample_interval_ms); + vlp->gain_rt = div_fp(cpu->pstate.max_pstate * + vlp_params.realtime_gain_pml, 1000); + vlp->gain_aggr = max(1, div_fp(1000, vlp_params.setpoint_aggr_pml)); + vlp->gain = max(1, div_fp(1000, vlp_params.setpoint_0_pml)); + vlp->target.p_base = 0; vlp->stats.last_response_frequency_hz = vlp_params.avg_hz; } @@ -1996,6 +2027,132 @@ static const struct vlp_status_sample *get_vlp_status_sample( return last_status; } +/** + * Calculate the target P-state range for the next update period. + * Uses a variably low-pass-filtering controller intended to improve + * energy efficiency when a CPU response frequency target is specified + * via PM QoS (e.g. under IO-bound conditions). + */ +static const struct vlp_target_range *get_vlp_target_range(struct cpudata *cpu) +{ + struct vlp_data *vlp = &cpu->vlp; + struct vlp_target_range *last_target = &vlp->target; + + /* +* P-state limits in fixed-point as allowed by the policy. +*/ + const int32_t p0 = int_tofp(max(cpu->pstate.min_pstate, + cpu->min_perf_ratio)); + const int32_t p1 = int_tofp(cpu->max_perf_ratio); + + /* +* Observed average P-state during the sampling period. The +* conservative path (po_cons) uses the TSC increment as +* denominator which will give the minimum (arguably most +* energy-efficient) P-state able to accomplish the observed +* amount of work during the sampling period. +* +* The downside of that somewhat optimistic estimate is that +* it
[Intel-gfx] [PATCH 05/10] cpufreq: intel_pstate: Implement VLP controller statistics and status calculation.
The goal of the helper code introduced here is to compute two informational data structures: struct vlp_input_stats aggregating various scheduling and PM statistics gathered in every call of the update_util() hook, and struct vlp_status_sample which contains status information derived from the former indicating whether the system is likely to have an IO or CPU bottleneck. This will be used as main heuristic input by the new variably low-pass filtering controller (AKA VLP) that will assist the HWP at finding a reasonably energy-efficient P-state given the additional information available to the kernel about I/O utilization and scheduling behavior. Signed-off-by: Francisco Jerez --- drivers/cpufreq/intel_pstate.c | 230 + 1 file changed, 230 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 8cb5bf419b40..12ee350db2a9 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -33,6 +34,8 @@ #include #include +#include "../../kernel/sched/sched.h" + #define INTEL_PSTATE_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC) #define INTEL_CPUFREQ_TRANSITION_LATENCY 2 @@ -59,6 +62,11 @@ static inline int32_t mul_fp(int32_t x, int32_t y) return ((int64_t)x * (int64_t)y) >> FRAC_BITS; } +static inline int rnd_fp(int32_t x) +{ + return (x + (1 << (FRAC_BITS - 1))) >> FRAC_BITS; +} + static inline int32_t div_fp(s64 x, s64 y) { return div64_s64((int64_t)x << FRAC_BITS, y); @@ -169,6 +177,49 @@ struct vid_data { int32_t ratio; }; +/** + * Scheduling and PM statistics gathered by update_vlp_sample() at + * every call of the VLP update_state() hook, used as heuristic + * inputs. + */ +struct vlp_input_stats { + int32_t realtime_count; + int32_t io_wait_count; + uint32_t max_response_frequency_hz; + uint32_t last_response_frequency_hz; +}; + +enum vlp_status { + VLP_BOTTLENECK_IO = 1 << 0, + /* +* XXX - Add other status bits here indicating a CPU or TDP +* bottleneck. +*/ +}; + +/** + * Heuristic status information calculated by get_vlp_status_sample() + * from struct vlp_input_stats above, indicating whether the system + * has a potential IO or latency bottleneck. + */ +struct vlp_status_sample { + enum vlp_status value; + int32_t realtime_avg; +}; + +/** + * struct vlp_data - VLP controller parameters and state. + * @sample_interval_ns: Update interval in ns. + * @sample_frequency_hz: Reciprocal of the update interval in Hz. + */ +struct vlp_data { + s64 sample_interval_ns; + int32_t sample_frequency_hz; + + struct vlp_input_stats stats; + struct vlp_status_sample status; +}; + /** * struct global_params - Global parameters, mostly tunable via sysfs. * @no_turbo: Whether or not to use turbo P-states. @@ -239,6 +290,7 @@ struct cpudata { struct pstate_data pstate; struct vid_data vid; + struct vlp_data vlp; u64 last_update; u64 last_sample_time; @@ -268,6 +320,18 @@ struct cpudata { static struct cpudata **all_cpu_data; +/** + * struct vlp_params - VLP controller static configuration + * @sample_interval_ms: Update interval in ms. + * @avg*_hz:Exponential averaging frequencies of the various + * low-pass filters as an integer in Hz. + */ +struct vlp_params { + int sample_interval_ms; + int avg_hz; + int debug; +}; + /** * struct pstate_funcs - Per CPU model specific callbacks * @get_max: Callback to get maximum non turbo effective P state @@ -296,6 +360,11 @@ struct pstate_funcs { }; static struct pstate_funcs pstate_funcs __read_mostly; +static struct vlp_params vlp_params __read_mostly = { + .sample_interval_ms = 10, + .avg_hz = 2, + .debug = 0, +}; static int hwp_active __read_mostly; static int hwp_mode_bdw __read_mostly; @@ -1793,6 +1862,167 @@ static inline int32_t get_target_pstate(struct cpudata *cpu) return target; } +/** + * Initialize the struct vlp_data of the specified CPU to the defaults + * calculated from @vlp_params. + */ +static void intel_pstate_reset_vlp(struct cpudata *cpu) +{ + struct vlp_data *vlp = &cpu->vlp; + + vlp->sample_interval_ns = vlp_params.sample_interval_ms * NSEC_PER_MSEC; + vlp->sample_frequency_hz = max(1u, (uint32_t)MSEC_PER_SEC / + vlp_params.sample_interval_ms); + vlp->stats.last_response_frequency_hz = vlp_params.avg_hz; +} + +/** + * Fixed point representation with twice the usual number of + * fractional bits. + */ +#define DFRAC_BITS 16 +#define DFRAC_ONE (1 << DFRAC_BITS) +#define DFRAC_MAX_INT (0u - (uint32_t)DFRAC_ONE) + +/** + * Fast but rather inaccurate piecewise-linear ap
Re: [Intel-gfx] [PATCH 02/10] drm/i915: Adjust PM QoS response frequency based on GPU load.
Quoting Francisco Jerez (2020-03-10 21:41:55) > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c > b/drivers/gpu/drm/i915/gt/intel_lrc.c > index b9b3f78f1324..a5d7a80b826d 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > @@ -1577,6 +1577,11 @@ static void execlists_submit_ports(struct > intel_engine_cs *engine) > /* we need to manually load the submit queue */ > if (execlists->ctrl_reg) > writel(EL_CTRL_LOAD, execlists->ctrl_reg); > + > + if (execlists_num_ports(execlists) > 1 && pending[1] is always defined, the minimum submission is one slot, with pending[1] as the sentinel NULL. > + execlists->pending[1] && > + !atomic_xchg(&execlists->overload, 1)) > + intel_gt_pm_active_begin(&engine->i915->gt); engine->gt > } > > static bool ctx_single_port_submission(const struct intel_context *ce) > @@ -2213,6 +2218,12 @@ cancel_port_requests(struct intel_engine_execlists * > const execlists) > clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight)); > > WRITE_ONCE(execlists->active, execlists->inflight); > + > + if (atomic_xchg(&execlists->overload, 0)) { > + struct intel_engine_cs *engine = > + container_of(execlists, typeof(*engine), execlists); > + intel_gt_pm_active_end(&engine->i915->gt); > + } > } > > static inline void > @@ -2386,6 +2397,9 @@ static void process_csb(struct intel_engine_cs *engine) > /* port0 completed, advanced to port1 */ > trace_ports(execlists, "completed", > execlists->active); > > + if (atomic_xchg(&execlists->overload, 0)) > + intel_gt_pm_active_end(&engine->i915->gt); So this looses track if we preempt a dual-ELSP submission with a single-ELSP submission (and never go back to dual). If you move this to the end of the loop and check if (!execlists->active[1] && atomic_xchg(&execlists->overload, 0)) intel_gt_pm_active_end(engine->gt); so that it covers both preemption/promotion and completion. However, that will fluctuate quite rapidly. (And runs the risk of exceeding the sentinel.) An alternative approach would be to couple along schedule_in/schedule_out atomic_set(overload, -1); __execlists_schedule_in: if (!atomic_fetch_inc(overload) intel_gt_pm_active_begin(engine->gt); __execlists_schedule_out: if (!atomic_dec_return(overload) intel_gt_pm_active_end(engine->gt); which would mean we are overloaded as soon as we try to submit an overlapping ELSP. The metric feels very multiple client (game + display server, or saturated transcode) centric. In the endless kernel world, we expect 100% engine utilisation from a single context, and never a dual-ELSP submission. They are also likely to want to avoid being throttled to converse TDP for the CPU. Should we also reduce the overload for the number of clients who are waiting for interrupts from the GPU, so that their wakeup latency is not impacted? -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] ✗ Fi.CI.BAT: failure for Gen11 workarounds
On Tue, 2020-03-10 at 17:30 +, Patchwork wrote: > == Series Details == > > Series: Gen11 workarounds > URL : https://patchwork.freedesktop.org/series/74475/ > State : failure > > == Summary == > > CI Bug Log - changes from CI_DRM_8106 -> Patchwork_16900 > > > Summary > --- > > **FAILURE** > > Serious unknown changes coming with Patchwork_16900 absolutely need > to be > verified manually. > > If you think the reported changes have nothing to do with the > changes > introduced in Patchwork_16900, please notify your bug team to allow > them > to document this new failure mode, which will reduce false > positives in CI. > > External URL: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16900/index.html > > Possible new issues > --- > > Here are the unknown changes that may have been introduced in > Patchwork_16900: > > ### IGT changes ### > > Possible regressions > > * igt@i915_selftest@live@dmabuf: > - fi-icl-u2: [PASS][1] -> [DMESG-WARN][2] +33 similar > issues >[1]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/fi-icl-u2/igt@i915_selftest@l...@dmabuf.html >[2]: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16900/fi-icl-u2/igt@i915_selftest@l...@dmabuf.html Looks like there some problem with the WAs in patch 4 and 7. > > * igt@i915_selftest@live@memory_region: > - fi-icl-y: [PASS][3] -> [DMESG-WARN][4] +35 similar > issues >[3]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/fi-icl-y/igt@i915_selftest@live@memory_region.html >[4]: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16900/fi-icl-y/igt@i915_selftest@live@memory_region.html > > * igt@i915_selftest@live@perf: > - fi-icl-guc: [PASS][5] -> [DMESG-WARN][6] +35 similar > issues >[5]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/fi-icl-guc/igt@i915_selftest@l...@perf.html >[6]: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16900/fi-icl-guc/igt@i915_selftest@l...@perf.html > > * igt@i915_selftest@live@vma: > - fi-icl-dsi: [PASS][7] -> [DMESG-WARN][8] +35 similar > issues >[7]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/fi-icl-dsi/igt@i915_selftest@l...@vma.html >[8]: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16900/fi-icl-dsi/igt@i915_selftest@l...@vma.html > > > Suppressed > > The following results come from untrusted machines, tests, or > statuses. > They do not affect the overall result. > > * igt@i915_selftest@live@mman: > - {fi-ehl-1}: [PASS][9] -> [DMESG-WARN][10] +36 similar > issues >[9]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/fi-ehl-1/igt@i915_selftest@l...@mman.html >[10]: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16900/fi-ehl-1/igt@i915_selftest@l...@mman.html > > * {igt@i915_selftest@live@ring_submission}: > - fi-icl-y: [PASS][11] -> [DMESG-WARN][12] >[11]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/fi-icl-y/igt@i915_selftest@live@ring_submission.html >[12]: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16900/fi-icl-y/igt@i915_selftest@live@ring_submission.html > - fi-icl-u2: [PASS][13] -> [DMESG-WARN][14] >[13]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/fi-icl-u2/igt@i915_selftest@live@ring_submission.html >[14]: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16900/fi-icl-u2/igt@i915_selftest@live@ring_submission.html > - fi-icl-dsi: [PASS][15] -> [DMESG-WARN][16] >[15]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/fi-icl-dsi/igt@i915_selftest@live@ring_submission.html >[16]: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16900/fi-icl-dsi/igt@i915_selftest@live@ring_submission.html > - fi-icl-guc: [PASS][17] -> [DMESG-WARN][18] >[17]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/fi-icl-guc/igt@i915_selftest@live@ring_submission.html >[18]: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16900/fi-icl-guc/igt@i915_selftest@live@ring_submission.html > > > Known issues > > > Here are the changes found in Patchwork_16900 that come from known > issues: > > ### IGT changes ### > > Issues hit > > * igt@gem_exec_suspend@basic-s4-devices: > - fi-tgl-y: [PASS][19] -> [FAIL][20] ([CI#94]) >[19]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/fi-tgl-y/igt@gem_exec_susp...@basic-s4-devices.html >[20]: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16900/fi-tgl-y/igt@gem_exec_susp...@basic-s4-devices.html > > * igt@i915_selftest@live@requests: > - fi-icl-dsi: [PASS][21] -> [DMESG-WARN][22] > ([fdo#109644] / [fdo#110464]) >[21]: > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/fi-icl-dsi/igt@i915_selftest@l...@requests.html >[22]: > ht
[Intel-gfx] ✓ Fi.CI.IGT: success for drm/i915/display: conversion to drm_device based logging macros (rev6)
== Series Details == Series: drm/i915/display: conversion to drm_device based logging macros (rev6) URL : https://patchwork.freedesktop.org/series/72760/ State : success == Summary == CI Bug Log - changes from CI_DRM_8106_full -> Patchwork_16901_full Summary --- **SUCCESS** No regressions found. Known issues Here are the changes found in Patchwork_16901_full that come from known issues: ### IGT changes ### Issues hit * igt@gem_exec_balancer@smoke: - shard-iclb: [PASS][1] -> [SKIP][2] ([fdo#110854]) [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-iclb1/igt@gem_exec_balan...@smoke.html [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16901/shard-iclb8/igt@gem_exec_balan...@smoke.html * igt@gem_exec_parallel@vcs1-fds: - shard-iclb: [PASS][3] -> [SKIP][4] ([fdo#112080]) +10 similar issues [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-iclb4/igt@gem_exec_paral...@vcs1-fds.html [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16901/shard-iclb5/igt@gem_exec_paral...@vcs1-fds.html * igt@gem_exec_schedule@implicit-read-write-bsd2: - shard-iclb: [PASS][5] -> [SKIP][6] ([fdo#109276] / [i915#677]) +1 similar issue [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-iclb4/igt@gem_exec_sched...@implicit-read-write-bsd2.html [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16901/shard-iclb3/igt@gem_exec_sched...@implicit-read-write-bsd2.html * igt@gem_exec_schedule@preempt-other-bsd: - shard-iclb: [PASS][7] -> [SKIP][8] ([fdo#112146]) +3 similar issues [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-iclb8/igt@gem_exec_sched...@preempt-other-bsd.html [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16901/shard-iclb2/igt@gem_exec_sched...@preempt-other-bsd.html * igt@gem_exec_schedule@preempt-queue-bsd1: - shard-iclb: [PASS][9] -> [SKIP][10] ([fdo#109276]) +17 similar issues [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-iclb1/igt@gem_exec_sched...@preempt-queue-bsd1.html [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16901/shard-iclb8/igt@gem_exec_sched...@preempt-queue-bsd1.html * igt@kms_cursor_crc@pipe-a-cursor-suspend: - shard-kbl: [PASS][11] -> [DMESG-WARN][12] ([i915#180]) +3 similar issues [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-kbl7/igt@kms_cursor_...@pipe-a-cursor-suspend.html [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16901/shard-kbl1/igt@kms_cursor_...@pipe-a-cursor-suspend.html * igt@kms_flip@flip-vs-expired-vblank: - shard-skl: [PASS][13] -> [FAIL][14] ([i915#46]) [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-skl10/igt@kms_f...@flip-vs-expired-vblank.html [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16901/shard-skl2/igt@kms_f...@flip-vs-expired-vblank.html * igt@kms_flip@flip-vs-expired-vblank-interruptible: - shard-skl: [PASS][15] -> [FAIL][16] ([i915#79]) [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-skl10/igt@kms_f...@flip-vs-expired-vblank-interruptible.html [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16901/shard-skl2/igt@kms_f...@flip-vs-expired-vblank-interruptible.html * igt@kms_flip@flip-vs-suspend-interruptible: - shard-apl: [PASS][17] -> [DMESG-WARN][18] ([i915#180]) +1 similar issue [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-apl6/igt@kms_f...@flip-vs-suspend-interruptible.html [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16901/shard-apl4/igt@kms_f...@flip-vs-suspend-interruptible.html * igt@kms_flip@plain-flip-ts-check-interruptible: - shard-skl: [PASS][19] -> [FAIL][20] ([i915#34]) [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-skl1/igt@kms_f...@plain-flip-ts-check-interruptible.html [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16901/shard-skl5/igt@kms_f...@plain-flip-ts-check-interruptible.html * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-draw-pwrite: - shard-glk: [PASS][21] -> [FAIL][22] ([i915#49]) [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-glk8/igt@kms_frontbuffer_track...@fbc-2p-primscrn-spr-indfb-draw-pwrite.html [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16901/shard-glk8/igt@kms_frontbuffer_track...@fbc-2p-primscrn-spr-indfb-draw-pwrite.html * igt@kms_psr2_su@frontbuffer: - shard-iclb: [PASS][23] -> [SKIP][24] ([fdo#109642] / [fdo#111068]) [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/shard-iclb2/igt@kms_psr2...@frontbuffer.html [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16901/shard-iclb1/igt@kms_psr2...@frontbuffer.html * igt@kms_psr@psr2_cursor_render:
Re: [Intel-gfx] [PULL] gvt-next
On Tue, Mar 10, 2020 at 04:19:28PM +0800, Zhenyu Wang wrote: > > Hi, > > Here's more gvt change for -next. Mostly rebase and fix Chris's > cleanup on intel engine and dev_priv usage. And also one fix for CFL > after VFIO edid enabled in last gvt-next pull. pulled to dinq, Thanks, Rodrigo. > > thanks > -- > The following changes since commit a8bb49b64c4f4284fb36169bdd9fc6efd62eb26a: > > drm/i915/gvt: Fix drm_WARN issue where vgpu ptr is unavailable (2020-02-25 > 16:13:04 +0800) > > are available in the Git repository at: > > https://github.com/intel/gvt-linux tags/gvt-next-2020-03-10 > > for you to fetch changes up to a61ac1e75105a077ec1efd6923ae3c619f862304: > > drm/i915/gvt: Wean gvt off using dev_priv (2020-03-06 10:08:10 +0800) > > > gvt-next-2020-03-10 > > - Fix CFL dmabuf display after vfio edid enabling (Tina) > - Clean up scan non-priv batch debugfs entry (Chris) > - Use intel engines initialized in gvt, cleanup previous ring id (Chris) > - Use intel_gt instead (Chris) > > > Chris Wilson (3): > drm/i915/gvt: cleanup debugfs scan_nonprivbb > drm/i915/gvt: Wean gvt off dev_priv->engine[] > drm/i915/gvt: Wean gvt off using dev_priv > > Tina Zhang (1): > drm/i915/gvt: Fix dma-buf display blur issue on CFL > > drivers/gpu/drm/i915/gvt/aperture_gm.c | 84 ++- > drivers/gpu/drm/i915/gvt/cfg_space.c| 8 +- > drivers/gpu/drm/i915/gvt/cmd_parser.c | 204 -- > drivers/gpu/drm/i915/gvt/debugfs.c | 45 +- > drivers/gpu/drm/i915/gvt/display.c | 21 +-- > drivers/gpu/drm/i915/gvt/dmabuf.c | 4 +- > drivers/gpu/drm/i915/gvt/edid.c | 16 +- > drivers/gpu/drm/i915/gvt/execlist.c | 103 +++-- > drivers/gpu/drm/i915/gvt/execlist.h | 5 +- > drivers/gpu/drm/i915/gvt/fb_decoder.c | 6 +- > drivers/gpu/drm/i915/gvt/firmware.c | 16 +- > drivers/gpu/drm/i915/gvt/gtt.c | 50 +++ > drivers/gpu/drm/i915/gvt/gvt.c | 38 ++--- > drivers/gpu/drm/i915/gvt/gvt.h | 25 ++-- > drivers/gpu/drm/i915/gvt/handlers.c | 193 - > drivers/gpu/drm/i915/gvt/interrupt.c| 14 +- > drivers/gpu/drm/i915/gvt/kvmgt.c| 10 +- > drivers/gpu/drm/i915/gvt/mmio.c | 6 +- > drivers/gpu/drm/i915/gvt/mmio.h | 4 +- > drivers/gpu/drm/i915/gvt/mmio_context.c | 127 > drivers/gpu/drm/i915/gvt/mmio_context.h | 5 +- > drivers/gpu/drm/i915/gvt/sched_policy.c | 25 ++-- > drivers/gpu/drm/i915/gvt/scheduler.c| 249 > +++- > drivers/gpu/drm/i915/gvt/scheduler.h| 9 +- > drivers/gpu/drm/i915/gvt/vgpu.c | 12 +- > 25 files changed, 601 insertions(+), 678 deletions(-) > > -- > Open Source Technology Center, Intel ltd. > > $gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v6 2/2] drm/edid: Add function to parse EDID descriptors for monitor range
Adaptive Sync is a VESA feature so add a DRM core helper to parse the EDID's detailed descritors to obtain the adaptive sync monitor range. Store this info as part fo drm_display_info so it can be used across all drivers. This part of the code is stripped out of amdgpu's function amdgpu_dm_update_freesync_caps() to make it generic and be used across all DRM drivers v6: * Call it monitor_range (Ville) v5: * Use the renamed flags v4: * Use is_display_descriptor() (Ville) * Name the monitor range flags (Ville) v3: * Remove the edid parsing restriction for just DP (Nicholas) * Use drm_for_each_detailed_block (Ville) * Make the drm_get_adaptive_sync_range function static (Harry, Jani) v2: * Change vmin and vmax to use u8 (Ville) * Dont store pixel clock since that is just a max dotclock and not related to VRR mode (Manasi) Cc: Ville Syrjälä Cc: Harry Wentland Cc: Clinton A Taylor Cc: Kazlauskas Nicholas Signed-off-by: Manasi Navare Reviewed-by: Nicholas Kazlauskas --- drivers/gpu/drm/drm_edid.c | 44 + include/drm/drm_connector.h | 22 +++ 2 files changed, 66 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index ad41764a4ebe..b269cd7f7679 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -4938,6 +4938,47 @@ static void drm_parse_cea_ext(struct drm_connector *connector, } } +static +void get_monitor_range(struct detailed_timing *timing, + void *info_monitor_range) +{ + struct drm_monitor_range_info *monitor_range = info_monitor_range; + const struct detailed_non_pixel *data = &timing->data.other_data; + const struct detailed_data_monitor_range *range = &data->data.range; + + if (!is_display_descriptor((const u8 *)timing, EDID_DETAIL_MONITOR_RANGE)) + return; + + /* +* Check for flag range limits only. If flag == 1 then +* no additional timing information provided. +* Default GTF, GTF Secondary curve and CVT are not +* supported +*/ + if (range->flags != DRM_EDID_RANGE_LIMITS_ONLY_FLAG) + return; + + monitor_range->min_vfreq = range->min_vfreq; + monitor_range->max_vfreq = range->max_vfreq; +} + +static +void drm_get_monitor_range(struct drm_connector *connector, + const struct edid *edid) +{ + struct drm_display_info *info = &connector->display_info; + + if (!version_greater(edid, 1, 1)) + return; + + drm_for_each_detailed_block((u8 *)edid, get_monitor_range, + &info->monitor_range); + + DRM_DEBUG_KMS("Supported Monitor Refresh rate range is %d Hz - %d Hz\n", + info->monitor_range.min_vfreq, + info->monitor_range.max_vfreq); +} + /* A connector has no EDID information, so we've got no EDID to compute quirks from. Reset * all of the values which would have been set from EDID */ @@ -4960,6 +5001,7 @@ drm_reset_display_info(struct drm_connector *connector) memset(&info->hdmi, 0, sizeof(info->hdmi)); info->non_desktop = 0; + memset(&info->monitor_range, 0, sizeof(info->monitor_range)); } u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edid) @@ -4975,6 +5017,8 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi info->non_desktop = !!(quirks & EDID_QUIRK_NON_DESKTOP); + drm_get_monitor_range(connector, edid); + DRM_DEBUG_KMS("non_desktop set to %d\n", info->non_desktop); if (edid->revision < 3) diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 0df7a95ca5d9..19ae6bb5c85b 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -254,6 +254,23 @@ enum drm_panel_orientation { DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, }; +/** + * struct drm_monitor_range_info - Panel's Monitor range in EDID for + * &drm_display_info + * + * This struct is used to store a frequency range supported by panel + * as parsed from EDID's detailed monitor range descriptor block. + * + * @min_vfreq: This is the min supported refresh rate in Hz from + * EDID's detailed monitor range. + * @max_vfreq: This is the max supported refresh rate in Hz from + * EDID's detailed monitor range + */ +struct drm_monitor_range_info { + u8 min_vfreq; + u8 max_vfreq; +}; + /* * This is a consolidated colorimetry list supported by HDMI and * DP protocol standard. The respective connectors will register @@ -473,6 +490,11 @@ struct drm_display_info { * @non_desktop: Non desktop display (HMD). */ bool non_desktop; + + /** +* @monitor_range: Frequency range supported by monitor range descriptor +*/ + struct drm_monitor_range_info monitor_range; }; int drm_display_info_set_bus_formats(str
[Intel-gfx] [PATCH v6 1/2] drm/edid: Name the detailed monitor range flags
This patch adds defines for the detailed monitor range flags as per the EDID specification. v2: * Rename the flags with DRM_EDID_ (Jani N) Suggested-by: Ville Syrjälä Cc: Ville Syrjälä Cc: Harry Wentland Cc: Clinton A Taylor Cc: Kazlauskas Nicholas Cc: Jani Nikula Signed-off-by: Manasi Navare Reviewed-by: Nicholas Kazlauskas --- include/drm/drm_edid.h | 5 + 1 file changed, 5 insertions(+) diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index f0b03d401c27..34b15e3d070c 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -91,6 +91,11 @@ struct detailed_data_string { u8 str[13]; } __attribute__((packed)); +#define DRM_EDID_DEFAULT_GTF_SUPPORT_FLAG 0x00 +#define DRM_EDID_RANGE_LIMITS_ONLY_FLAG 0x01 +#define DRM_EDID_SECONDARY_GTF_SUPPORT_FLAG 0x02 +#define DRM_EDID_CVT_SUPPORT_FLAG 0x04 + struct detailed_data_monitor_range { u8 min_vfreq; u8 max_vfreq; -- 2.19.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] ✗ Fi.CI.BAT: failure for Gen11 workarounds
On Tue, Mar 10, 2020 at 03:28:01PM -0700, Souza, Jose wrote: > On Tue, 2020-03-10 at 17:30 +, Patchwork wrote: > > == Series Details == > > > > Series: Gen11 workarounds > > URL : https://patchwork.freedesktop.org/series/74475/ > > State : failure > > > > == Summary == > > > > CI Bug Log - changes from CI_DRM_8106 -> Patchwork_16900 > > > > > > Summary > > --- > > > > **FAILURE** > > > > Serious unknown changes coming with Patchwork_16900 absolutely need > > to be > > verified manually. > > > > If you think the reported changes have nothing to do with the > > changes > > introduced in Patchwork_16900, please notify your bug team to allow > > them > > to document this new failure mode, which will reduce false > > positives in CI. > > > > External URL: > > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16900/index.html > > > > Possible new issues > > --- > > > > Here are the unknown changes that may have been introduced in > > Patchwork_16900: > > > > ### IGT changes ### > > > > Possible regressions > > > > * igt@i915_selftest@live@dmabuf: > > - fi-icl-u2: [PASS][1] -> [DMESG-WARN][2] +33 similar > > issues > >[1]: > > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/fi-icl-u2/igt@i915_selftest@l...@dmabuf.html > >[2]: > > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16900/fi-icl-u2/igt@i915_selftest@l...@dmabuf.html > > Looks like there some problem with the WAs in patch 4 and 7. Wa_1406306137 should actually move to the context section; that register is part of the context on gen11, but not on gen12, so I'll do that in version 2. Wa_1409178092 had problems when it was first implemented in the driver (under a different name). My hope was that other MCR changes (and moving the WA to the proper place) would fix those problems, but it still seems that the workaround doesn't stick. I'll probably just drop it again in v2 while we seek more guidance from the hardware people. Matt > > > > > * igt@i915_selftest@live@memory_region: > > - fi-icl-y: [PASS][3] -> [DMESG-WARN][4] +35 similar > > issues > >[3]: > > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/fi-icl-y/igt@i915_selftest@live@memory_region.html > >[4]: > > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16900/fi-icl-y/igt@i915_selftest@live@memory_region.html > > > > * igt@i915_selftest@live@perf: > > - fi-icl-guc: [PASS][5] -> [DMESG-WARN][6] +35 similar > > issues > >[5]: > > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/fi-icl-guc/igt@i915_selftest@l...@perf.html > >[6]: > > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16900/fi-icl-guc/igt@i915_selftest@l...@perf.html > > > > * igt@i915_selftest@live@vma: > > - fi-icl-dsi: [PASS][7] -> [DMESG-WARN][8] +35 similar > > issues > >[7]: > > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/fi-icl-dsi/igt@i915_selftest@l...@vma.html > >[8]: > > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16900/fi-icl-dsi/igt@i915_selftest@l...@vma.html > > > > > > Suppressed > > > > The following results come from untrusted machines, tests, or > > statuses. > > They do not affect the overall result. > > > > * igt@i915_selftest@live@mman: > > - {fi-ehl-1}: [PASS][9] -> [DMESG-WARN][10] +36 similar > > issues > >[9]: > > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/fi-ehl-1/igt@i915_selftest@l...@mman.html > >[10]: > > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16900/fi-ehl-1/igt@i915_selftest@l...@mman.html > > > > * {igt@i915_selftest@live@ring_submission}: > > - fi-icl-y: [PASS][11] -> [DMESG-WARN][12] > >[11]: > > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/fi-icl-y/igt@i915_selftest@live@ring_submission.html > >[12]: > > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16900/fi-icl-y/igt@i915_selftest@live@ring_submission.html > > - fi-icl-u2: [PASS][13] -> [DMESG-WARN][14] > >[13]: > > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/fi-icl-u2/igt@i915_selftest@live@ring_submission.html > >[14]: > > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16900/fi-icl-u2/igt@i915_selftest@live@ring_submission.html > > - fi-icl-dsi: [PASS][15] -> [DMESG-WARN][16] > >[15]: > > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/fi-icl-dsi/igt@i915_selftest@live@ring_submission.html > >[16]: > > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16900/fi-icl-dsi/igt@i915_selftest@live@ring_submission.html > > - fi-icl-guc: [PASS][17] -> [DMESG-WARN][18] > >[17]: > > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8106/fi-icl-guc/igt@i915_selftest@live@ring_submission.html > >[18]: > > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16900/fi-icl-guc/igt@i915_selftest@live@ring_submissio
Re: [Intel-gfx] [PATCH 02/10] drm/i915: Adjust PM QoS response frequency based on GPU load.
Chris Wilson writes: > Quoting Francisco Jerez (2020-03-10 21:41:55) >> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c >> b/drivers/gpu/drm/i915/gt/intel_lrc.c >> index b9b3f78f1324..a5d7a80b826d 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c >> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c >> @@ -1577,6 +1577,11 @@ static void execlists_submit_ports(struct >> intel_engine_cs *engine) >> /* we need to manually load the submit queue */ >> if (execlists->ctrl_reg) >> writel(EL_CTRL_LOAD, execlists->ctrl_reg); >> + >> + if (execlists_num_ports(execlists) > 1 && > pending[1] is always defined, the minimum submission is one slot, with > pending[1] as the sentinel NULL. > >> + execlists->pending[1] && >> + !atomic_xchg(&execlists->overload, 1)) >> + intel_gt_pm_active_begin(&engine->i915->gt); > > engine->gt > Applied your suggestions above locally, will probably wait to have a few more changes batched up before sending a v2. >> } >> >> static bool ctx_single_port_submission(const struct intel_context *ce) >> @@ -2213,6 +2218,12 @@ cancel_port_requests(struct intel_engine_execlists * >> const execlists) >> clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight)); >> >> WRITE_ONCE(execlists->active, execlists->inflight); >> + >> + if (atomic_xchg(&execlists->overload, 0)) { >> + struct intel_engine_cs *engine = >> + container_of(execlists, typeof(*engine), execlists); >> + intel_gt_pm_active_end(&engine->i915->gt); >> + } >> } >> >> static inline void >> @@ -2386,6 +2397,9 @@ static void process_csb(struct intel_engine_cs *engine) >> /* port0 completed, advanced to port1 */ >> trace_ports(execlists, "completed", >> execlists->active); >> >> + if (atomic_xchg(&execlists->overload, 0)) >> + intel_gt_pm_active_end(&engine->i915->gt); > > So this looses track if we preempt a dual-ELSP submission with a > single-ELSP submission (and never go back to dual). > Yes, good point. You're right that if a dual-ELSP submission gets preempted by a single-ELSP submission "overload" will remain signaled until the first completion interrupt arrives (e.g. from the preempting submission). > If you move this to the end of the loop and check > > if (!execlists->active[1] && atomic_xchg(&execlists->overload, 0)) > intel_gt_pm_active_end(engine->gt); > > so that it covers both preemption/promotion and completion. > That sounds reasonable. > However, that will fluctuate quite rapidly. (And runs the risk of > exceeding the sentinel.) > > An alternative approach would be to couple along > schedule_in/schedule_out > > atomic_set(overload, -1); > > __execlists_schedule_in: > if (!atomic_fetch_inc(overload) > intel_gt_pm_active_begin(engine->gt); > __execlists_schedule_out: > if (!atomic_dec_return(overload) > intel_gt_pm_active_end(engine->gt); > > which would mean we are overloaded as soon as we try to submit an > overlapping ELSP. > That sounds good to me too, and AFAICT would have roughly the same behavior as this metric except for the preemption corner case you mention above. I'll try this and verify that I get approximately the same performance numbers. > > The metric feels very multiple client (game + display server, or > saturated transcode) centric. In the endless kernel world, we expect > 100% engine utilisation from a single context, and never a dual-ELSP > submission. They are also likely to want to avoid being throttled to > converse TDP for the CPU. > Yes, this metric is fairly conservative, it won't trigger in all cases which would potentially benefit from the energy efficiency optimization, only where we can be reasonably certain that CPU latency is not critical in order to keep the GPU busy (e.g. because the CS has an additional ELSP port pending execution that will immediately kick in as soon as the current one completes). My original approach was to call intel_gt_pm_active_begin() directly as soon as the first ELSP is submitted to the GPU, which was somewhat more effective at improving the energy efficiency of the system than waiting for the second port to be in use, but it involved a slight execlists submission latency cost that led to some regressions. It would certainly cover the single-context case you have in mind though. I'll get some updated numbers with my previous approach so we can decide which one provides a better trade-off. > Should we also reduce the overload for the number of clients who are > waiting for interrupts from the GPU, so that their wakeup latency is not > impacted? A number of clients waiting doesn't necessarily indicate that wake-up latency is a concern. It frequently indicates the opposite: That the GPU has a bottleneck which will only be exacerbated by attemp
[Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gt: Pull checking rps->pm_events under the irq_lock (rev2)
== Series Details == Series: drm/i915/gt: Pull checking rps->pm_events under the irq_lock (rev2) URL : https://patchwork.freedesktop.org/series/74510/ State : warning == Summary == $ dim checkpatch origin/drm-tip 76ddbcfdb7a0 drm/i915/gt: Pull checking rps->pm_events under the irq_lock -:7: WARNING:TYPO_SPELLING: 'diable' may be misspelled - perhaps 'disable'? #7: write in rps_diable_interrupts. -:31: WARNING:TYPO_SPELLING: 'interrrupt' may be misspelled - perhaps 'interrupt'? #31: interrrupt generation are also strongly ordered -- just this may not be total: 0 errors, 2 warnings, 0 checks, 72 lines checked ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v4 7/7] drm/i915/dp: Program vswing, pre-emphasis, test-pattern
Thank You Animesh. Tested using scope with small set of tests and automation run to completion. Started full DP PHY compliance (~160 tests) overnight. On Tue, 2020-03-10 at 21:07 +0530, Animesh Manna wrote: > This patch process phy compliance request by programming requested > vswing, pre-emphasis and test pattern. > > v1: Initial patch. > v2: Fixes added during testing with test-scope. (Khaled/Clint/Manasi) > - pipe used as argument during registers programming instead of port. > - TRANS_CONF must be disable/enable as well during ddi > disable/enable. > - harcoded PLTPAT 80 bit custom pattern as the DPR-100 does not set > it > in the sink’s DPCDs > - TRANS_DDI_FUNC_CTL DDI_Select (Bits 27:30) need to reset/set during > disable/enable. > > Cc: Clinton Taylor > Cc: Manasi Navare > Signed-off-by: Animesh Manna > Signed-off-by: Khaled Almahallawy > --- > drivers/gpu/drm/i915/display/intel_dp.c | 135 > > drivers/gpu/drm/i915/display/intel_dp.h | 1 + > 2 files changed, 136 insertions(+) > > diff --git a/drivers/gpu/drm/i915/display/intel_dp.c > b/drivers/gpu/drm/i915/display/intel_dp.c > index 16a4a48c8168..0239a72537ba 100644 > --- a/drivers/gpu/drm/i915/display/intel_dp.c > +++ b/drivers/gpu/drm/i915/display/intel_dp.c > @@ -5020,6 +5020,139 @@ static u8 intel_dp_prepare_phytest(struct > intel_dp *intel_dp) > return DP_TEST_ACK; > } > > +static void intel_dp_phy_pattern_update(struct intel_dp *intel_dp) > +{ > + struct drm_i915_private *dev_priv = > + to_i915(dp_to_dig_port(intel_dp)- > >base.base.dev); > + struct intel_digital_port *intel_dig_port = > dp_to_dig_port(intel_dp); > + struct drm_dp_phy_test_params *data = > + &intel_dp->compliance.test_data.phytest; > + struct intel_crtc *crtc = to_intel_crtc(intel_dig_port- > >base.base.crtc); > + enum pipe pipe = crtc->pipe; > + u32 temp; > + > + switch (data->phy_pattern) { > + case DP_PHY_TEST_PATTERN_NONE: > + DRM_DEBUG_KMS("Disable Phy Test Pattern\n"); > + intel_de_write(dev_priv, DDI_DP_COMP_CTL(pipe), 0x0); > + break; > + case DP_PHY_TEST_PATTERN_D10_2: > + DRM_DEBUG_KMS("Set D10.2 Phy Test Pattern\n"); > + intel_de_write(dev_priv, DDI_DP_COMP_CTL(pipe), > +DDI_DP_COMP_CTL_ENABLE | > DDI_DP_COMP_CTL_D10_2); > + break; > + case DP_PHY_TEST_PATTERN_ERROR_COUNT: > + DRM_DEBUG_KMS("Set Error Count Phy Test Pattern\n"); > + intel_de_write(dev_priv, DDI_DP_COMP_CTL(pipe), > +DDI_DP_COMP_CTL_ENABLE | > +DDI_DP_COMP_CTL_SCRAMBLED_0); > + break; > + case DP_PHY_TEST_PATTERN_PRBS7: > + DRM_DEBUG_KMS("Set PRBS7 Phy Test Pattern\n"); > + intel_de_write(dev_priv, DDI_DP_COMP_CTL(pipe), > +DDI_DP_COMP_CTL_ENABLE | > DDI_DP_COMP_CTL_PRBS7); > + break; > + case DP_PHY_TEST_PATTERN_80BIT_CUSTOM: > + DRM_DEBUG_KMS("Set 80Bit Custom Phy Test Pattern > 0x3e0f83e0 0x0f83e0f8 0xf83e\n"); > + temp = 0x3e0f83e0; > + intel_de_write(dev_priv, DDI_DP_COMP_PAT(pipe, 0), > temp); > + temp = 0x0f83e0f8; > + intel_de_write(dev_priv, DDI_DP_COMP_PAT(pipe, 1), > temp); > + temp = 0xf83e; > + intel_de_write(dev_priv, DDI_DP_COMP_PAT(pipe, 2), > temp); > + intel_de_write(dev_priv, DDI_DP_COMP_CTL(pipe), > +DDI_DP_COMP_CTL_ENABLE | > +DDI_DP_COMP_CTL_CUSTOM80); > + break; > + case DP_PHY_TEST_PATTERN_CP2520: > + DRM_DEBUG_KMS("Set HBR2 compliance Phy Test > Pattern\n"); > + temp = 0xFB; > + intel_de_write(dev_priv, DDI_DP_COMP_CTL(pipe), > +DDI_DP_COMP_CTL_ENABLE | > DDI_DP_COMP_CTL_HBR2 | > +temp); > + break; > + default: > + WARN(1, "Invalid Phy Test PAttern\n"); > + } > +} > + > +static void > +intel_dp_autotest_phy_ddi_disable(struct intel_dp *intel_dp) > +{ > + struct intel_digital_port *intel_dig_port = > dp_to_dig_port(intel_dp); > + struct drm_device *dev = intel_dig_port->base.base.dev; > + struct drm_i915_private *dev_priv = to_i915(dev); > + struct intel_crtc *crtc = to_intel_crtc(intel_dig_port- > >base.base.crtc); > + enum pipe pipe = crtc->pipe; > + u32 trans_ddi_func_ctl_value, trans_conf_value, > dp_tp_ctl_value; > + > + trans_ddi_func_ctl_value = intel_de_read(dev_priv, > + TRANS_DDI_FUNC_CTL(pip > e)); > + trans_conf_value = intel_de_read(dev_priv, PIPECONF(pipe)); > + dp_tp_ctl_value = intel_de_read(dev_priv, TGL_DP_TP_CTL(pipe)); > + > + trans_ddi_func_ctl_value &= ~(0x1F << 27); Based on Clint suggestion:
Re: [Intel-gfx] [RFC] GPU-bound energy efficiency improvements for the intel_pstate driver (v2).
On Tue, 2020-03-10 at 14:41 -0700, Francisco Jerez wrote: > [...] > Thanks in advance for any review feed-back and test reports. > > [PATCH 01/10] PM: QoS: Add CPU_RESPONSE_FREQUENCY global PM QoS > limit. > [PATCH 02/10] drm/i915: Adjust PM QoS response frequency based on GPU > load. > [PATCH 03/10] OPTIONAL: drm/i915: Expose PM QoS control parameters > via debugfs. > [PATCH 04/10] Revert "cpufreq: intel_pstate: Drop ->update_util from > pstate_funcs" > [PATCH 05/10] cpufreq: intel_pstate: Implement VLP controller > statistics and status calculation. > [PATCH 06/10] cpufreq: intel_pstate: Implement VLP controller target > P-state range estimation. > [PATCH 07/10] cpufreq: intel_pstate: Implement VLP controller for HWP > parts. > [PATCH 08/10] cpufreq: intel_pstate: Enable VLP controller based on > ACPI FADT profile and CPUID. > [PATCH 09/10] OPTIONAL: cpufreq: intel_pstate: Add tracing of VLP > controller status. > [PATCH 10/10] OPTIONAL: cpufreq: intel_pstate: Expose VLP controller > parameters via debugfs. > Do you have debug patch (You don't to submit as a patch), which will allow me to dynamically disable/enable all these changes? I want to compare and do some measurements. Thanks, Srinivas > [1] https://marc.info/?l=linux-pm&m=152221943320908&w=2 > [2] > https://github.com/curro/linux/commits/intel_pstate-vlp-v2-hwp-only > [3] https://github.com/curro/linux/commits/intel_pstate-vlp-v2 > [4] > http://people.freedesktop.org/~currojerez/intel_pstate-vlp-v2/benchmark-comparison-ICL.log > ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] linux-next: manual merge of the drm-intel tree with the drm-intel-fixes tree
Hi all, Today's linux-next merge of the drm-intel tree got a conflict in: drivers/gpu/drm/i915/gvt/vgpu.c between commit: 04d6067f1f19 ("drm/i915/gvt: Fix unnecessary schedule timer when no vGPU exits") from the drm-intel-fixes tree and commit: 12d5861973c7 ("drm/i915/gvt: Make WARN* drm specific where vgpu ptr is available") from the drm-intel tree. I fixed it up (see below) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. -- Cheers, Stephen Rothwell diff --cc drivers/gpu/drm/i915/gvt/vgpu.c index 345c2aa3b491,abcde8ce1a9a.. --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@@ -271,18 -272,12 +272,19 @@@ void intel_gvt_release_vgpu(struct inte void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) { struct intel_gvt *gvt = vgpu->gvt; + struct drm_i915_private *i915 = gvt->gt->i915; - WARN(vgpu->active, "vGPU is still active!\n"); - mutex_lock(&vgpu->vgpu_lock); - + drm_WARN(&i915->drm, vgpu->active, "vGPU is still active!\n"); + /* + * remove idr first so later clean can judge if need to stop + * service if no active vgpu. + */ + mutex_lock(&gvt->lock); + idr_remove(&gvt->vgpu_idr, vgpu->id); + mutex_unlock(&gvt->lock); + + mutex_lock(&vgpu->vgpu_lock); intel_gvt_debugfs_remove_vgpu(vgpu); intel_vgpu_clean_sched_policy(vgpu); intel_vgpu_clean_submission(vgpu); pgpx0RCjIEAhC.pgp Description: OpenPGP digital signature ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 7/7] drm/i915/perf: add flushing ioctl
On Tue, 10 Mar 2020 13:44:30 -0700, Lionel Landwerlin wrote: > > On 09/03/2020 21:51, Umesh Nerlige Ramappa wrote: > > On Wed, Mar 04, 2020 at 09:56:28PM -0800, Dixit, Ashutosh wrote: > >> On Wed, 04 Mar 2020 00:52:34 -0800, Lionel Landwerlin wrote: > >>> > >>> On 04/03/2020 07:48, Dixit, Ashutosh wrote: > >>> > On Tue, 03 Mar 2020 14:19:05 -0800, Umesh Nerlige Ramappa wrote: > >>> >> From: Lionel Landwerlin > >>> >> > >>> >> With the currently available parameters for the i915-perf stream, > >>> >> there are still situations that are not well covered : > >>> >> > >>> >> If an application opens the stream with polling disable or at very > >>> low > >>> >> frequency and OA interrupt enabled, no data will be available even > >>> >> though somewhere between nothing and half of the OA buffer worth of > >>> >> data might have landed in memory. > >>> >> > >>> >> To solve this issue we have a new flush ioctl on the perf stream > >>> that > >>> >> forces the i915-perf driver to look at the state of the buffer when > >>> >> called and makes any data available through both poll() & read() > >>> type > >>> >> syscalls. > >>> >> > >>> >> v2: Version the ioctl (Joonas) > >>> >> v3: Rebase (Umesh) > >>> >> > >>> >> Signed-off-by: Lionel Landwerlin > >>> >> Signed-off-by: Umesh Nerlige Ramappa > >>> > >>> > [snip] > >>> > > >>> >> +/** > >>> >> + * i915_perf_flush_data - handle `I915_PERF_IOCTL_FLUSH_DATA` ioctl > >>> >> + * @stream: An enabled i915 perf stream > >>> >> + * > >>> >> + * The intention is to flush all the data available for reading > >>> from the OA > >>> >> + * buffer > >>> >> + */ > >>> >> +static void i915_perf_flush_data(struct i915_perf_stream *stream) > >>> >> +{ > >>> >> + stream->pollin = oa_buffer_check(stream, true); > >>> >> +} > >>> > Since this function doesn't actually wake up any thread (which anyway > >>> can > >>> > be done by sending a signal to the blocked thread), is the only > >>> purpose of > >>> > this function to update OA buffer head/tail? But in that it is not > >>> clear > >>> > why a separate ioctl should be created for this, can't the read() > >>> call > >>> > itself call oa_buffer_check() to update the OA buffer head/tail? > >>> > > >>> > Again just trying to minimize uapi changes if possible. > >>> > >>> Most applications will call read() after being notified by > >>> poll()/select() > >>> that some data is available. > >> > >> Correct this is the standard non blocking read behavior. > >> > >>> Changing that behavior will break some of the existing perf tests . > >> > >> I am not suggesting changing that (that standard non blocking read > >> behavior). > >> > >>> If any data is available, this new ioctl will wake up existing waiters > >>> on > >>> poll()/select(). > >> > >> The issue is we are not calling wake_up() in the above function to wake > >> up > >> any blocked waiters. The ioctl will just update the OA buffer head/tail > >> so > >> that (a) a subsequent blocking read will not block, or (b) a subsequent > >> non > >> blocking read will return valid data (not -EAGAIN), or (c) a poll/select > >> will not block but return immediately saying data is available. > >> > >> That is why it seems to me the ioctl is not required, updating the OA > >> buffer head/tail can be done as part of the read() (and the poll/select) > >> calls themselves. > >> > >> We will investigate if this can be done and update the patches in the > >> next > >> revision accordingly. Thanks! > > > > In this case, where we are trying to determine if there is any data in > > the oa buffer before the next interrupt has fired, user could call poll > > with a reasonable timeout to determine if data is available or not. That > > would eliminate the need for the flush ioctl. Thoughts? > > > > Thanks, > > Umesh > > > I almost forgot why this would cause problem. > > Checking the state of the buffer every time you call poll() will pretty > much guarantee you have at least one report to read every time. > > So that would lead to lot more wakeups :( > > The whole system has to stay "unidirectional" with either interrupts or > timeout driving the wakeups. > > This additional ioctl is the only solution I could find to add one more > input to the wakeup mechanism. Well, aren't we asking the app to sleep for time T and then call flush (followed by read)? Then we might as well ask them to sleep for time T and call poll? Or we can ask them set the hrtimer to T, skip the sleep and call poll (followed by read)? Aren't these 3 mechanisms equivalent? To me the last option seems to be the cleanest. Thanks! ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v4] drm/i915: Init lspcon after HPD in intel_dp_detect()
> On Feb 15, 2020, at 01:56, Kai-Heng Feng wrote: > > On HP 800 G4 DM, if HDMI cable isn't plugged before boot, the HDMI port > becomes useless and never responds to cable hotplugging: > [3.031904] [drm:lspcon_init [i915]] *ERROR* Failed to probe lspcon > [3.031945] [drm:intel_ddi_init [i915]] *ERROR* LSPCON init failed on port > D > > Seems like the lspcon chip on the system in question only gets powered > after the cable is plugged. > > So let's call lspcon_init() dynamically to properly initialize the > lspcon chip and make HDMI port work. > > Signed-off-by: Kai-Heng Feng A gentle ping. > --- > v4: > - Trust VBT in intel_infoframe_init(). > - Init lspcon in intel_dp_detect(). > > v3: > - Make sure it's handled under long HPD case. > > v2: > - Move lspcon_init() inside of intel_dp_hpd_pulse(). > > drivers/gpu/drm/i915/display/intel_ddi.c | 17 + > drivers/gpu/drm/i915/display/intel_dp.c | 13 - > drivers/gpu/drm/i915/display/intel_hdmi.c | 2 +- > 3 files changed, 14 insertions(+), 18 deletions(-) > > diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c > b/drivers/gpu/drm/i915/display/intel_ddi.c > index 33f1dc3d7c1a..ca717434b406 100644 > --- a/drivers/gpu/drm/i915/display/intel_ddi.c > +++ b/drivers/gpu/drm/i915/display/intel_ddi.c > @@ -4741,7 +4741,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, > enum port port) > &dev_priv->vbt.ddi_port_info[port]; > struct intel_digital_port *intel_dig_port; > struct intel_encoder *encoder; > - bool init_hdmi, init_dp, init_lspcon = false; > + bool init_hdmi, init_dp; > enum phy phy = intel_port_to_phy(dev_priv, port); > > init_hdmi = port_info->supports_dvi || port_info->supports_hdmi; > @@ -4754,7 +4754,6 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, > enum port port) >* is initialized before lspcon. >*/ > init_dp = true; > - init_lspcon = true; > init_hdmi = false; > DRM_DEBUG_KMS("VBT says port %c has lspcon\n", port_name(port)); > } > @@ -4833,20 +4832,6 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, > enum port port) > goto err; > } > > - if (init_lspcon) { > - if (lspcon_init(intel_dig_port)) > - /* TODO: handle hdmi info frame part */ > - DRM_DEBUG_KMS("LSPCON init success on port %c\n", > - port_name(port)); > - else > - /* > - * LSPCON init faied, but DP init was success, so > - * lets try to drive as DP++ port. > - */ > - DRM_ERROR("LSPCON init failed on port %c\n", > - port_name(port)); > - } > - > intel_infoframe_init(intel_dig_port); > > return; > diff --git a/drivers/gpu/drm/i915/display/intel_dp.c > b/drivers/gpu/drm/i915/display/intel_dp.c > index c7424e2a04a3..43117aa86292 100644 > --- a/drivers/gpu/drm/i915/display/intel_dp.c > +++ b/drivers/gpu/drm/i915/display/intel_dp.c > @@ -5663,8 +5663,19 @@ intel_dp_detect(struct drm_connector *connector, > /* Can't disconnect eDP */ > if (intel_dp_is_edp(intel_dp)) > status = edp_detect(intel_dp); > - else if (intel_digital_port_connected(encoder)) > + else if (intel_digital_port_connected(encoder)) { > + if (intel_bios_is_lspcon_present(dev_priv, dig_port->base.port) > && > + !dig_port->lspcon.active) { > + if (lspcon_init(dig_port)) > + DRM_DEBUG_KMS("LSPCON init success on port > %c\n", > + port_name(dig_port->base.port)); > + else > + DRM_DEBUG_KMS("LSPCON init failed on port %c\n", > + port_name(dig_port->base.port)); > + } > + > status = intel_dp_detect_dpcd(intel_dp); > + } > else > status = connector_status_disconnected; > > diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c > b/drivers/gpu/drm/i915/display/intel_hdmi.c > index 93ac0f296852..27a5aa8cefc9 100644 > --- a/drivers/gpu/drm/i915/display/intel_hdmi.c > +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c > @@ -3100,7 +3100,7 @@ void intel_infoframe_init(struct intel_digital_port > *intel_dig_port) > intel_dig_port->set_infoframes = g4x_set_infoframes; > intel_dig_port->infoframes_enabled = g4x_infoframes_enabled; > } else if (HAS_DDI(dev_priv)) { > - if (intel_dig_port->lspcon.active) { > + if (intel_bios_is_lspcon_present(dev_priv, > intel_dig_port->base.port)) { > intel_dig_port->write_infoframe = > lspcon_write_infoframe; > intel_dig_port->rea
Re: [Intel-gfx] [RFC] GPU-bound energy efficiency improvements for the intel_pstate driver (v2).
"Pandruvada, Srinivas" writes: > On Tue, 2020-03-10 at 14:41 -0700, Francisco Jerez wrote: >> > > [...] > >> Thanks in advance for any review feed-back and test reports. >> >> [PATCH 01/10] PM: QoS: Add CPU_RESPONSE_FREQUENCY global PM QoS >> limit. >> [PATCH 02/10] drm/i915: Adjust PM QoS response frequency based on GPU >> load. >> [PATCH 03/10] OPTIONAL: drm/i915: Expose PM QoS control parameters >> via debugfs. >> [PATCH 04/10] Revert "cpufreq: intel_pstate: Drop ->update_util from >> pstate_funcs" >> [PATCH 05/10] cpufreq: intel_pstate: Implement VLP controller >> statistics and status calculation. >> [PATCH 06/10] cpufreq: intel_pstate: Implement VLP controller target >> P-state range estimation. >> [PATCH 07/10] cpufreq: intel_pstate: Implement VLP controller for HWP >> parts. >> [PATCH 08/10] cpufreq: intel_pstate: Enable VLP controller based on >> ACPI FADT profile and CPUID. >> [PATCH 09/10] OPTIONAL: cpufreq: intel_pstate: Add tracing of VLP >> controller status. >> [PATCH 10/10] OPTIONAL: cpufreq: intel_pstate: Expose VLP controller >> parameters via debugfs. >> > Do you have debug patch (You don't to submit as a patch), which will > allow me to dynamically disable/enable all these changes? I want to > compare and do some measurements. > Something like this (fully untested) patch? It should prevent the VLP controller from running if you do: echo 16 > /sys/kernel/debug/pstate_snb/lp_debug > Thanks, > Srinivas > >>[...] diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index ab893a211746..8749b4a14447 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2411,6 +2411,9 @@ static void intel_pstate_update_util(struct update_util_data *data, u64 time, static void intel_pstate_update_util_hwp_vlp(struct update_util_data *data, u64 time, unsigned int flags) { + if ((vlp_params.debug & 16)) + return; + struct cpudata *cpu = container_of(data, struct cpudata, update_util); if (update_vlp_sample(cpu, time, flags)) { signature.asc Description: PGP signature ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915/mst: Hookup DRM DP MST late_register/early_unregister callbacks (rev3)
== Series Details == Series: drm/i915/mst: Hookup DRM DP MST late_register/early_unregister callbacks (rev3) URL : https://patchwork.freedesktop.org/series/74532/ State : failure == Summary == Applying: drm/i915/mst: Hookup DRM DP MST late_register/early_unregister callbacks Using index info to reconstruct a base tree... M drivers/gpu/drm/i915/display/intel_dp_mst.c Falling back to patching base and 3-way merge... No changes -- Patch already applied. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✗ Fi.CI.BUILD: failure for GPU-bound energy efficiency improvements for the intel_pstate driver (v2).
== Series Details == Series: GPU-bound energy efficiency improvements for the intel_pstate driver (v2). URL : https://patchwork.freedesktop.org/series/74540/ State : failure == Summary == Applying: PM: QoS: Add CPU_RESPONSE_FREQUENCY global PM QoS limit. error: sha1 information is lacking or useless (include/linux/pm_qos.h). error: could not build fake ancestor hint: Use 'git am --show-current-patch' to see the failed patch Patch failed at 0001 PM: QoS: Add CPU_RESPONSE_FREQUENCY global PM QoS limit. When you have resolved this problem, run "git am --continue". If you prefer to skip this patch, run "git am --skip" instead. To restore the original branch and stop patching, run "git am --abort". ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH -next 026/491] INTEL GVT-g DRIVERS (Intel GPU Virtualization): Use fallthrough;
Convert the various uses of fallthrough comments to fallthrough; Done via script Link: https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe.com/ Signed-off-by: Joe Perches --- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 1793f69..0e792f9 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1225,7 +1225,7 @@ static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification) switch (notification) { case VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE: root_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY; - /* fall through */ + fallthrough; case VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE: mm = intel_vgpu_get_ppgtt_mm(vgpu, root_entry_type, pdps); return PTR_ERR_OR_ZERO(mm); -- 2.24.0 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v8 02/12] drm/i915: Use 64-bit division macro
Since the PWM framework is switching struct pwm_state.duty_cycle's datatype to u64, prepare for this transition by using DIV_ROUND_UP_ULL to handle a 64-bit dividend. Cc: Jani Nikula Cc: Joonas Lahtinen Cc: David Airlie Cc: Daniel Vetter Cc: Chris Wilson Cc: "Ville Syrjälä" Cc: intel-gfx@lists.freedesktop.org Cc: dri-de...@lists.freedesktop.org Signed-off-by: Guru Das Srinagesh --- drivers/gpu/drm/i915/display/intel_panel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index bc14e9c..843cac1 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -1868,7 +1868,7 @@ static int pwm_setup_backlight(struct intel_connector *connector, panel->backlight.min = 0; /* 0% */ panel->backlight.max = 100; /* 100% */ - panel->backlight.level = DIV_ROUND_UP( + panel->backlight.level = DIV_ROUND_UP_ULL( pwm_get_duty_cycle(panel->backlight.pwm) * 100, CRC_PMIC_PWM_PERIOD_NS); panel->backlight.enabled = panel->backlight.level != 0; -- The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH -next 000/491] treewide: use fallthrough;
There is a new fallthrough pseudo-keyword macro that can be used to replace the various /* fallthrough */ style comments that are used to indicate a case label code block is intended to fallthrough to the next case label block. See commit 294f69e662d1 ("compiler_attributes.h: Add 'fallthrough' pseudo keyword for switch/case use") These patches are intended to allow clang to detect missing switch/case fallthrough uses. Do a depth-first pass on the MAINTAINERS file and find the various F: pattern files and convert the fallthrough comments to fallthrough; for all files matched by all F: patterns in in each section. Done via the perl script below and the previously posted cvt_fallthrough.pl script. Link: https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe.com/ These patches are based on next-20200310 and are available in git://repo.or.cz/linux-2.6/trivial-mods.git in branch 20200310_fallthrough_2 $ cat commit_fallthrough.pl #!/usr/bin/env perl use sort 'stable'; # # Reorder a sorted array so file entries are before directory entries # depends on a trailing / for directories # so: # foo/ # foo/bar.c # becomes # foo/bar.c # foo/ # sub file_before_directory { my ($array_ref) = (@_); my $count = scalar(@$array_ref); for (my $i = 1; $i < $count; $i++) { if (substr(@$array_ref[$i - 1], -1) eq '/' && substr(@$array_ref[$i], 0, length(@$array_ref[$i - 1])) eq @$array_ref[$i - 1]) { my $string = @$array_ref[$i - 1]; @$array_ref[$i - 1] = @$array_ref[$i]; @$array_ref[$i] = $string; } } } sub uniq { my (@parms) = @_; my %saw; @parms = grep(!$saw{$_}++, @parms); return @parms; } # Get all the F: file patterns in MAINTAINERS that could be a .[ch] file my $maintainer_patterns = `grep -P '^F:\\s+' MAINTAINERS`; my @patterns = split('\n', $maintainer_patterns); s/^F:\s*// for @patterns; @patterns = grep(!/^(?:Documentation|tools|scripts)\//, @patterns); @patterns = grep(!/\.(?:dtsi?|rst|config)$/, @patterns); @patterns = sort @patterns; @patterns = sort { $b =~ tr/\//\// cmp $a =~ tr/\//\// } @patterns; file_before_directory(\@patterns); my %sections_done; foreach my $pattern (@patterns) { # Find the files the pattern matches my $pattern_files = `git ls-files -- $pattern`; my @new_patterns = split('\n', $pattern_files); $pattern_files = join(' ', @new_patterns); next if ($pattern_files =~ /^\s*$/); # Find the section the first file matches my $pattern_file = @new_patterns[0]; my $section_output = `./scripts/get_maintainer.pl --nogit --nogit-fallback --sections --pattern-depth=1 $pattern_file`; my @section = split('\n', $section_output); my $section_header = @section[0]; print("Section: <$section_header>\n"); # Skip the section if it's already done print("Already done '$section_header'\n") if ($sections_done{$section_header}); next if ($sections_done{$section_header}++); # Find all the .[ch] files in all F: lines in that section my @new_section; foreach my $line (@section) { last if ($line =~ /^\s*$/); push(@new_section, $line); } @section = grep(/^F:/, @new_section); s/^F:\s*// for @section; @section = grep(!/^(?:Documentation|tools|scripts)\//, @section); @section = grep(!/\.(?:dtsi?|rst|config)$/, @section); @section = sort @section; @section = uniq(@section); my $section_files = join(' ', @section); print("section_files: <$section_files>\n"); next if ($section_files =~ /^\s*$/); my $cvt_files = `git ls-files -- $section_files`; my @files = split('\n', $cvt_files); @files = grep(!/^(?:Documentation|tools|scripts)\//, @files); @files = grep(!/\.(?:dtsi?|rst|config)$/, @files); @files = grep(/\.[ch]$/, @files); @files = sort @files; @files = uniq(@files); $cvt_files = join(' ', @files); print("files: <$cvt_files>\n"); next if (scalar(@files) < 1); # Convert fallthroughs for all [.ch] files in the section print("doing cvt_fallthrough.pl -- $cvt_files\n"); `cvt_fallthrough.pl -- $cvt_files`; # If nothing changed, nothing to commit `git diff-index --quiet HEAD --`; next if (!$?); # Commit the changes my $fh; open($fh, "+>", "cvt_fallthrough.commit_msg") or die "$0: can't create temporary file: $!\n"; print $fh <https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git@perches.com/ EOF ; close $fh; `git commit -s -a -F cvt_fallthrough.commit_msg`; } Joe Perches (491): MELLANOX ETHERNET INNOVA DRIVERS: Use fallthrough; MARVELL OCTEONTX2 RVU ADMIN FUNCTION DRI
[Intel-gfx] [PATCH v8 00/12] Convert PWM period and duty cycle to u64
Because period and duty cycle are defined in the PWM framework structs as ints with units of nanoseconds, the maximum time duration that can be set is limited to ~2.147 seconds. Consequently, applications desiring to set greater time periods via the PWM framework are not be able to do so - like, for instance, causing an LED to blink at an interval of 5 seconds. Redefining the period and duty cycle struct members in the core PWM framework structs as u64 values will enable larger time durations to be set and solve this problem. Such a change to the framework mandates that drivers using these struct members (and corresponding helper functions) also be modified correctly in order to prevent compilation errors. This patch series introduces the changes to all the drivers first, followed by the framework change at the very end so that when the latter is applied, all the drivers are in good shape and there are no compilation errors. Changes from v7: - Changed commit messages of all patches to be brief and to the point. - Added explanation of change in cover letter. - Dropped change to pwm-sti.c as upon review it was unnecessary as struct pwm_capture is not being modified in the PWM core. Changes from v6: - Split out the driver changes out into separate patches, one patch per file for ease of reviewing. Changes from v5: - Dropped the conversion of struct pwm_capture to u64 for reasons mentioned in https://www.spinics.net/lists/linux-pwm/msg11541.html Changes from v4: - Split the patch into two: one for changes to the drivers, and the actual switch to u64 for ease of reverting should the need arise. - Re-examined the patch and made the following corrections: * intel_panel.c: DIV64_U64_ROUND_UP -> DIV_ROUND_UP_ULL (as only the numerator would be 64-bit in this case). * pwm-sti.c: do_div -> div_u64 (do_div is optimized only for x86 architectures, and div_u64's comment block suggests to use this as much as possible). Changes from v3: - Rebased to current tip of for-next. Changes from v2: - Fixed %u -> %llu in a dev_dbg in pwm-stm32-lp.c, thanks to kbuild test robot - Added a couple of fixes to pwm-imx-tpm.c and pwm-sifive.c Changes from v1: - Fixed compilation errors seen when compiling for different archs. v1: - Reworked the change pushed upstream earlier [1] so as to not add an extension to an obsolete API. With this change, pwm_ops->apply() can be used to set pwm_state parameters as usual. [1] https://lore.kernel.org/lkml/20190916140048.GB7488@ulmo/ Cc: Lee Jones Cc: Daniel Thompson Cc: Jingoo Han Cc: Bartlomiej Zolnierkiewicz Cc: linux-fb...@vger.kernel.org Cc: Maxime Ripard Cc: Chen-Yu Tsai Cc: Philipp Zabel Cc: Fabrice Gasnier Cc: Maxime Coquelin Cc: Alexandre Torgue Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: linux-ri...@lists.infradead.org Cc: Yash Shah Cc: Atish Patra Cc: Shawn Guo Cc: Sascha Hauer Cc: Pengutronix Kernel Team Cc: Fabio Estevam Cc: NXP Linux Team Cc: Sascha Hauer Cc: Pengutronix Kernel Team Cc: Fabio Estevam Cc: NXP Linux Team Cc: Alexander Shiyan Cc: Mauro Carvalho Chehab Cc: Richard Fontana Cc: Thomas Gleixner Cc: Kate Stewart Cc: Allison Randal Cc: linux-me...@vger.kernel.org Cc: Kamil Debski Cc: Bartlomiej Zolnierkiewicz Cc: Jean Delvare Cc: Guenter Roeck Cc: Liam Girdwood Cc: Mark Brown Cc: linux-hw...@vger.kernel.org Cc: Jani Nikula Cc: Joonas Lahtinen Cc: David Airlie Cc: Daniel Vetter Cc: Chris Wilson Cc: "Ville Syrjälä" Cc: intel-gfx@lists.freedesktop.org Cc: dri-de...@lists.freedesktop.org Cc: Michael Turquette Cc: Stephen Boyd Cc: linux-...@vger.kernel.org Guru Das Srinagesh (12): clk: pwm: Use 64-bit division function drm/i915: Use 64-bit division macro hwmon: pwm-fan: Use 64-bit division macro ir-rx51: Use 64-bit division macro pwm: clps711x: Use 64-bit division macro pwm: pwm-imx-tpm: Use 64-bit division macro pwm: imx27: Use 64-bit division macro and function pwm: sifive: Use 64-bit division macro pwm: stm32-lp: Use %llu format specifier for period pwm: sun4i: Use 64-bit division function backlight: pwm_bl: Use 64-bit division function pwm: core: Convert period and duty cycle to u64 drivers/clk/clk-pwm.c | 2 +- drivers/gpu/drm/i915/display/intel_panel.c | 2 +- drivers/hwmon/pwm-fan.c| 2 +- drivers/media/rc/ir-rx51.c | 3 ++- drivers/pwm/core.c | 4 ++-- drivers/pwm/pwm-clps711x.c | 2 +- drivers/pwm/pwm-imx-tpm.c | 2 +- drivers/pwm/pwm-imx27.c| 5 ++--- drivers/pwm/pwm-sifive.c | 2 +- drivers/pwm/pwm-stm32-lp.c | 2 +- drivers/pwm/pwm-sun4i.c| 2 +- drivers/pwm/sysfs.c| 8 drivers/video/backlight/pwm_bl.c | 3 ++- include/linux/pwm.h|
[Intel-gfx] [PULL] gvt-fixes
Hi, Here's more gvt fixes for 5.6. Fix timer issue caused by idr destroy change and VBT size error. Thanks -- The following changes since commit b549c252b1292aea959cd9b83537fcb9384a6112: drm/i915/gvt: Fix orphan vgpu dmabuf_objs' lifetime (2020-02-25 16:14:20 +0800) are available in the Git repository at: https://github.com/intel/gvt-linux tags/gvt-fixes-2020-03-10 for you to fetch changes up to 2fa7e15c5f466fdd0c0b196b1dc4a65d191efd96: drm/i915/gvt: Fix emulated vbt size issue (2020-03-06 09:35:30 +0800) gvt-fixes-2020-03-10 - Fix vgpu idr destroy causing timer destroy failure (Zhenyu) - Fix VBT size (Tina) Tina Zhang (1): drm/i915/gvt: Fix emulated vbt size issue Zhenyu Wang (1): drm/i915/gvt: Fix unnecessary schedule timer when no vGPU exits drivers/gpu/drm/i915/gvt/opregion.c | 5 ++--- drivers/gpu/drm/i915/gvt/vgpu.c | 12 +--- 2 files changed, 11 insertions(+), 6 deletions(-) -- Open Source Technology Center, Intel ltd. $gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827 signature.asc Description: PGP signature ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PULL] gvt-next
Hi, Here's more gvt change for -next. Mostly rebase and fix Chris's cleanup on intel engine and dev_priv usage. And also one fix for CFL after VFIO edid enabled in last gvt-next pull. thanks -- The following changes since commit a8bb49b64c4f4284fb36169bdd9fc6efd62eb26a: drm/i915/gvt: Fix drm_WARN issue where vgpu ptr is unavailable (2020-02-25 16:13:04 +0800) are available in the Git repository at: https://github.com/intel/gvt-linux tags/gvt-next-2020-03-10 for you to fetch changes up to a61ac1e75105a077ec1efd6923ae3c619f862304: drm/i915/gvt: Wean gvt off using dev_priv (2020-03-06 10:08:10 +0800) gvt-next-2020-03-10 - Fix CFL dmabuf display after vfio edid enabling (Tina) - Clean up scan non-priv batch debugfs entry (Chris) - Use intel engines initialized in gvt, cleanup previous ring id (Chris) - Use intel_gt instead (Chris) Chris Wilson (3): drm/i915/gvt: cleanup debugfs scan_nonprivbb drm/i915/gvt: Wean gvt off dev_priv->engine[] drm/i915/gvt: Wean gvt off using dev_priv Tina Zhang (1): drm/i915/gvt: Fix dma-buf display blur issue on CFL drivers/gpu/drm/i915/gvt/aperture_gm.c | 84 ++- drivers/gpu/drm/i915/gvt/cfg_space.c| 8 +- drivers/gpu/drm/i915/gvt/cmd_parser.c | 204 -- drivers/gpu/drm/i915/gvt/debugfs.c | 45 +- drivers/gpu/drm/i915/gvt/display.c | 21 +-- drivers/gpu/drm/i915/gvt/dmabuf.c | 4 +- drivers/gpu/drm/i915/gvt/edid.c | 16 +- drivers/gpu/drm/i915/gvt/execlist.c | 103 +++-- drivers/gpu/drm/i915/gvt/execlist.h | 5 +- drivers/gpu/drm/i915/gvt/fb_decoder.c | 6 +- drivers/gpu/drm/i915/gvt/firmware.c | 16 +- drivers/gpu/drm/i915/gvt/gtt.c | 50 +++ drivers/gpu/drm/i915/gvt/gvt.c | 38 ++--- drivers/gpu/drm/i915/gvt/gvt.h | 25 ++-- drivers/gpu/drm/i915/gvt/handlers.c | 193 - drivers/gpu/drm/i915/gvt/interrupt.c| 14 +- drivers/gpu/drm/i915/gvt/kvmgt.c| 10 +- drivers/gpu/drm/i915/gvt/mmio.c | 6 +- drivers/gpu/drm/i915/gvt/mmio.h | 4 +- drivers/gpu/drm/i915/gvt/mmio_context.c | 127 drivers/gpu/drm/i915/gvt/mmio_context.h | 5 +- drivers/gpu/drm/i915/gvt/sched_policy.c | 25 ++-- drivers/gpu/drm/i915/gvt/scheduler.c| 249 +++- drivers/gpu/drm/i915/gvt/scheduler.h| 9 +- drivers/gpu/drm/i915/gvt/vgpu.c | 12 +- 25 files changed, 601 insertions(+), 678 deletions(-) -- Open Source Technology Center, Intel ltd. $gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827 signature.asc Description: PGP signature ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [RFC 01/12] drm/i915: Expose list of clients in sysfs
On 10/03/2020 00:13, Chris Wilson wrote: Quoting Tvrtko Ursulin (2020-03-09 23:26:34) On 09/03/2020 21:34, Chris Wilson wrote: Quoting Tvrtko Ursulin (2020-03-09 18:31:18) +struct i915_drm_client * +i915_drm_client_add(struct i915_drm_clients *clients, struct task_struct *task) +{ + struct i915_drm_client *client; + int ret; + + client = kzalloc(sizeof(*client), GFP_KERNEL); + if (!client) + return ERR_PTR(-ENOMEM); + + kref_init(&client->kref); + client->clients = clients; + + ret = mutex_lock_interruptible(&clients->lock); + if (ret) + goto err_id; + ret = xa_alloc_cyclic(&clients->xarray, &client->id, client, + xa_limit_32b, &clients->next_id, GFP_KERNEL); So what's next_id used for that explains having the over-arching mutex? It's to give out client id's "cyclically" - before I apparently misunderstood what xa_alloc_cyclic is supposed to do - I thought after giving out id 1 it would give out 2 next, even if 1 was returned to the pool in the meantime. But it doesn't, I need to track the start point for the next search with "next". Ok. A requirement of the API for the external counter. I want this to make intel_gpu_top's life easier, so it doesn't have to deal with id recycling for all practical purposes. Fair enough. I only worry about the radix nodes and sparse ids :) I only found in docs that it should be efficient when the data is "densely clustered". And given that does appear based on a tree like structure I thought that means a few clusters of ids should be okay. But maybe in practice we would have more than a few clusters. I guess that could indeed be the case.. hm.. Maybe I could use a list and keep pointer to last entry. When u32 next wraps I reset to list head. Downside is any search for next free id potentially has to walk over one used up cluster. May be passable apart for IGT-type stress tests. And a peek into xa implementation told me the internal lock is not protecting "next. See xa_alloc_cyclic(), seems to cover __xa_alloc_cycle (where *next is manipulated) under the xa_lock. Ha, true, not sure how I went past top-level and forgot what's in there. :) Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915/tgl: WaDisableGPGPUMidThreadPreemption
On 09/03/2020 17:02, Mrozek, Michal wrote: But he asked whether it's possible for Media and OpenCL drivers to also disable mid-thread preemption through the INTERFACE_DESCRIPTOR_DATA, instead of from the >>kernel side, so we could try to experiment with it in the future. Interface Descriptor setting only switches the preemption from mid thread to thread group. It doesn't allow to disable it completely and there are cases where this is required (i.e. VME). For that we need mmio whitelist. With "disable it completely" you mean disable preemption completely - go lower than thread-group in granularity? Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915/tgl: WaDisableGPGPUMidThreadPreemption
>>With "disable it completely" you mean disable preemption completely - go >>lower than thread-group in granularity? Yes, disable it completely. Michal ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 04/10] drm/i915/fifo_underrun: convert to drm_device based logging.
From: Wambui Karuga Convert various instances of the printk based drm logging macros to the struct drm_device based logging macros in i915/display/intel_fifo_underrun.c. This was done using the following coccinelle script: @@ identifier fn, T; @@ fn(...,struct drm_i915_private *T,...) { <+... ( -DRM_INFO( +drm_info(&T->drm, ...) | -DRM_ERROR( +drm_err(&T->drm, ...) | -DRM_WARN( +drm_warn(&T->drm, ...) | -DRM_DEBUG( +drm_dbg(&T->drm, ...) | -DRM_DEBUG_DRIVER( +drm_dbg(&T->drm, ...) | -DRM_DEBUG_KMS( +drm_dbg_kms(&T->drm, ...) | -DRM_DEBUG_ATOMIC( +drm_dbg_atomic(&T->drm, ...) ) ...+> } @@ identifier fn, T; @@ fn(...) { ... struct drm_i915_private *T = ...; <+... ( -DRM_INFO( +drm_info(&T->drm, ...) | -DRM_ERROR( +drm_err(&T->drm, ...) | -DRM_WARN( +drm_warn(&T->drm, ...) | -DRM_DEBUG( +drm_dbg(&T->drm, ...) | -DRM_DEBUG_KMS( +drm_dbg_kms(&T->drm, ...) | -DRM_DEBUG_DRIVER( +drm_dbg(&T->drm, ...) | -DRM_DEBUG_ATOMIC( +drm_dbg_atomic(&T->drm, ...) ) ...+> } New checkpatch warnings were addressed manually. Signed-off-by: Wambui Karuga Signed-off-by: Jani Nikula --- .../drm/i915/display/intel_fifo_underrun.c| 29 ++- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c index 470b3b0b9bdb..813a4f7033e1 100644 --- a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c @@ -103,7 +103,7 @@ static void i9xx_check_fifo_underruns(struct intel_crtc *crtc) intel_de_posting_read(dev_priv, reg); trace_intel_cpu_fifo_underrun(dev_priv, crtc->pipe); - DRM_ERROR("pipe %c underrun\n", pipe_name(crtc->pipe)); + drm_err(&dev_priv->drm, "pipe %c underrun\n", pipe_name(crtc->pipe)); } static void i9xx_set_fifo_underrun_reporting(struct drm_device *dev, @@ -123,7 +123,8 @@ static void i9xx_set_fifo_underrun_reporting(struct drm_device *dev, intel_de_posting_read(dev_priv, reg); } else { if (old && intel_de_read(dev_priv, reg) & PIPE_FIFO_UNDERRUN_STATUS) - DRM_ERROR("pipe %c underrun\n", pipe_name(pipe)); + drm_err(&dev_priv->drm, "pipe %c underrun\n", + pipe_name(pipe)); } } @@ -155,7 +156,7 @@ static void ivb_check_fifo_underruns(struct intel_crtc *crtc) intel_de_posting_read(dev_priv, GEN7_ERR_INT); trace_intel_cpu_fifo_underrun(dev_priv, pipe); - DRM_ERROR("fifo underrun on pipe %c\n", pipe_name(pipe)); + drm_err(&dev_priv->drm, "fifo underrun on pipe %c\n", pipe_name(pipe)); } static void ivb_set_fifo_underrun_reporting(struct drm_device *dev, @@ -176,8 +177,9 @@ static void ivb_set_fifo_underrun_reporting(struct drm_device *dev, if (old && intel_de_read(dev_priv, GEN7_ERR_INT) & ERR_INT_FIFO_UNDERRUN(pipe)) { - DRM_ERROR("uncleared fifo underrun on pipe %c\n", - pipe_name(pipe)); + drm_err(&dev_priv->drm, + "uncleared fifo underrun on pipe %c\n", + pipe_name(pipe)); } } } @@ -223,8 +225,8 @@ static void cpt_check_pch_fifo_underruns(struct intel_crtc *crtc) intel_de_posting_read(dev_priv, SERR_INT); trace_intel_pch_fifo_underrun(dev_priv, pch_transcoder); - DRM_ERROR("pch fifo underrun on pch transcoder %c\n", - pipe_name(pch_transcoder)); + drm_err(&dev_priv->drm, "pch fifo underrun on pch transcoder %c\n", + pipe_name(pch_transcoder)); } static void cpt_set_fifo_underrun_reporting(struct drm_device *dev, @@ -246,8 +248,9 @@ static void cpt_set_fifo_underrun_reporting(struct drm_device *dev, if (old && intel_de_read(dev_priv, SERR_INT) & SERR_INT_TRANS_FIFO_UNDERRUN(pch_transcoder)) { - DRM_ERROR("uncleared pch fifo underrun on pch transcoder %c\n", - pipe_name(pch_transcoder)); + drm_err(&dev_priv->drm, + "uncleared pch fifo underrun on pch transcoder %c\n", + pipe_name(pch_transcoder)); } } } @@ -381,8 +384,8 @@ void intel_cpu_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv, if (intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false)) { trace_intel_cpu_fifo_underrun(dev_priv, pipe); - DRM_ERROR("CPU pipe %c FIFO underrun\n", - pipe_name(pipe)); + drm_err(&dev_priv->drm, "CPU pipe %c FIFO underrun\n", + pipe_name(pipe)); } intel_fbc_handle_fifo_underrun_irq(dev_priv); @@ -403,8 +406,8 @@ void intel_pch_fifo_underrun_irq_handler(struct drm_i9
[Intel-gfx] [PATCH 03/10] drm/i915/fbdev: convert to drm_device based logging.
From: Wambui Karuga Convert various instances of printk based drm logging macros to the struct drm_device based logging macros in i915/display/intel_fbdev.c. This also involves extracting the drm_i915_private device from various intel types. v2 by Jani: - fix the final one too Signed-off-by: Wambui Karuga Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fbdev.c | 96 +- 1 file changed, 55 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 3bc804212a99..bd39eb6a21b8 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -146,7 +146,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper, if (IS_ERR(obj)) obj = i915_gem_object_create_shmem(dev_priv, size); if (IS_ERR(obj)) { - DRM_ERROR("failed to allocate framebuffer\n"); + drm_err(&dev_priv->drm, "failed to allocate framebuffer\n"); return PTR_ERR(obj); } @@ -183,21 +183,23 @@ static int intelfb_create(struct drm_fb_helper *helper, if (intel_fb && (sizes->fb_width > intel_fb->base.width || sizes->fb_height > intel_fb->base.height)) { - DRM_DEBUG_KMS("BIOS fb too small (%dx%d), we require (%dx%d)," - " releasing it\n", - intel_fb->base.width, intel_fb->base.height, - sizes->fb_width, sizes->fb_height); + drm_dbg_kms(&dev_priv->drm, + "BIOS fb too small (%dx%d), we require (%dx%d)," + " releasing it\n", + intel_fb->base.width, intel_fb->base.height, + sizes->fb_width, sizes->fb_height); drm_framebuffer_put(&intel_fb->base); intel_fb = ifbdev->fb = NULL; } if (!intel_fb || drm_WARN_ON(dev, !intel_fb_obj(&intel_fb->base))) { - DRM_DEBUG_KMS("no BIOS fb, allocating a new one\n"); + drm_dbg_kms(&dev_priv->drm, + "no BIOS fb, allocating a new one\n"); ret = intelfb_alloc(helper, sizes); if (ret) return ret; intel_fb = ifbdev->fb; } else { - DRM_DEBUG_KMS("re-using BIOS fb\n"); + drm_dbg_kms(&dev_priv->drm, "re-using BIOS fb\n"); prealloc = true; sizes->fb_width = intel_fb->base.width; sizes->fb_height = intel_fb->base.height; @@ -220,7 +222,7 @@ static int intelfb_create(struct drm_fb_helper *helper, info = drm_fb_helper_alloc_fbi(helper); if (IS_ERR(info)) { - DRM_ERROR("Failed to allocate fb_info\n"); + drm_err(&dev_priv->drm, "Failed to allocate fb_info\n"); ret = PTR_ERR(info); goto out_unpin; } @@ -240,7 +242,8 @@ static int intelfb_create(struct drm_fb_helper *helper, vaddr = i915_vma_pin_iomap(vma); if (IS_ERR(vaddr)) { - DRM_ERROR("Failed to remap framebuffer into virtual memory\n"); + drm_err(&dev_priv->drm, + "Failed to remap framebuffer into virtual memory\n"); ret = PTR_ERR(vaddr); goto out_unpin; } @@ -258,9 +261,9 @@ static int intelfb_create(struct drm_fb_helper *helper, /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */ - DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08x\n", - ifbdev->fb->base.width, ifbdev->fb->base.height, - i915_ggtt_offset(vma)); + drm_dbg_kms(&dev_priv->drm, "allocated %dx%d fb: 0x%08x\n", + ifbdev->fb->base.width, ifbdev->fb->base.height, + i915_ggtt_offset(vma)); ifbdev->vma = vma; ifbdev->vma_flags = flags; @@ -309,6 +312,7 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev) static bool intel_fbdev_init_bios(struct drm_device *dev, struct intel_fbdev *ifbdev) { + struct drm_i915_private *i915 = to_i915(dev); struct intel_framebuffer *fb = NULL; struct drm_crtc *crtc; struct intel_crtc *intel_crtc; @@ -321,21 +325,24 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, intel_crtc = to_intel_crtc(crtc); if (!crtc->state->active || !obj) { - DRM_DEBUG_KMS("pipe %c not active or no fb, skipping\n", - pipe_name(intel_crtc->pipe)); + drm_dbg_kms(&i915->drm, + "pipe %c not active or no fb, skipping\n", + pipe_name(intel_crtc->pipe)); continue;
[Intel-gfx] [PATCH 10/10] drm/i915/overlay: convert to drm_device based logging.
From: Wambui Karuga Convert various instances of the printk based drm logging macros to the struct drm_device based logging macros in i915/display/intel_overlay.c. This transformation was achieved using the following coccinelle script: @@ identifier fn, T; @@ fn(...,struct drm_i915_private *T,...) { <+... ( -DRM_INFO( +drm_info(&T->drm, ...) | -DRM_ERROR( +drm_err(&T->drm, ...) | -DRM_WARN( +drm_warn(&T->drm, ...) | -DRM_DEBUG( +drm_dbg(&T->drm, ...) | -DRM_DEBUG_DRIVER( +drm_dbg(&T->drm, ...) | -DRM_DEBUG_KMS( +drm_dbg_kms(&T->drm, ...) | -DRM_DEBUG_ATOMIC( +drm_dbg_atomic(&T->drm, ...) ) ...+> } @@ identifier fn, T; @@ fn(...) { ... struct drm_i915_private *T = ...; <+... ( -DRM_INFO( +drm_info(&T->drm, ...) | -DRM_ERROR( +drm_err(&T->drm, ...) | -DRM_WARN( +drm_warn(&T->drm, ...) | -DRM_DEBUG( +drm_dbg(&T->drm, ...) | -DRM_DEBUG_KMS( +drm_dbg_kms(&T->drm, ...) | -DRM_DEBUG_DRIVER( +drm_dbg(&T->drm, ...) | -DRM_DEBUG_ATOMIC( +drm_dbg_atomic(&T->drm, ...) ) ...+> } Note that this converts DRM_DEBUG to drm_dbg(). Checkpatch warnings were addressed manually. References: https://lists.freedesktop.org/archives/dri-devel/2020-January/253381.html Signed-off-by: Wambui Karuga Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_overlay.c | 11 ++- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 3b0cb3534e2a..481187223101 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -323,7 +323,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay, /* check for underruns */ tmp = intel_de_read(dev_priv, DOVSTA); if (tmp & (1 << 17)) - DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp); + drm_dbg(&dev_priv->drm, "overlay underrun, DOVSTA: %x\n", tmp); rq = alloc_request(overlay, NULL); if (IS_ERR(rq)) @@ -1068,7 +1068,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, overlay = dev_priv->overlay; if (!overlay) { - DRM_DEBUG("userspace bug: no overlay\n"); + drm_dbg(&dev_priv->drm, "userspace bug: no overlay\n"); return -ENODEV; } @@ -1092,7 +1092,8 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, drm_modeset_lock_all(dev); if (i915_gem_object_is_tiled(new_bo)) { - DRM_DEBUG_KMS("buffer used for overlay image can not be tiled\n"); + drm_dbg_kms(&dev_priv->drm, + "buffer used for overlay image can not be tiled\n"); ret = -EINVAL; goto out_unlock; } @@ -1227,7 +1228,7 @@ int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data, overlay = dev_priv->overlay; if (!overlay) { - DRM_DEBUG("userspace bug: no overlay\n"); + drm_dbg(&dev_priv->drm, "userspace bug: no overlay\n"); return -ENODEV; } @@ -1371,7 +1372,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv) update_reg_attrs(overlay, overlay->regs); dev_priv->overlay = overlay; - DRM_INFO("Initialized overlay support.\n"); + drm_info(&dev_priv->drm, "Initialized overlay support.\n"); return; out_free: -- 2.20.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 09/10] drm/i915/lvds: convert to drm_device based logging macros.
From: Wambui Karuga Converts various instances of the printk based drm logging macros to the struct drm_device based logging macros in i915/display/intel_lvds.c. This transformation was done by the following coccinelle script that matches based on the existence of a drm_i915_private device: @@ identifier fn, T; @@ fn(...,struct drm_i915_private *T,...) { <+... ( -DRM_INFO( +drm_info(&T->drm, ...) | -DRM_ERROR( +drm_err(&T->drm, ...) | -DRM_WARN( +drm_warn(&T->drm, ...) | -DRM_DEBUG( +drm_dbg(&T->drm, ...) | -DRM_DEBUG_DRIVER( +drm_dbg(&T->drm, ...) | -DRM_DEBUG_KMS( +drm_dbg_kms(&T->drm, ...) | -DRM_DEBUG_ATOMIC( +drm_dbg_atomic(&T->drm, ...) ) ...+> } @@ identifier fn, T; @@ fn(...) { ... struct drm_i915_private *T = ...; <+... ( -DRM_INFO( +drm_info(&T->drm, ...) | -DRM_ERROR( +drm_err(&T->drm, ...) | -DRM_WARN( +drm_warn(&T->drm, ...) | -DRM_DEBUG( +drm_dbg(&T->drm, ...) | -DRM_DEBUG_KMS( +drm_dbg_kms(&T->drm, ...) | -DRM_DEBUG_DRIVER( +drm_dbg(&T->drm, ...) | -DRM_DEBUG_ATOMIC( +drm_dbg_atomic(&T->drm, ...) ) ...+> } New checkpatch warnings were fixed manually. Signed-off-by: Wambui Karuga Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_lvds.c | 43 +-- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index 37755e1c5dd1..9a067effcfa0 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -181,8 +181,9 @@ static void intel_lvds_pps_get_hw_state(struct drm_i915_private *dev_priv, if (INTEL_GEN(dev_priv) <= 4 && pps->t1_t2 == 0 && pps->t5 == 0 && pps->t3 == 0 && pps->tx == 0) { - DRM_DEBUG_KMS("Panel power timings uninitialized, " - "setting defaults\n"); + drm_dbg_kms(&dev_priv->drm, + "Panel power timings uninitialized, " + "setting defaults\n"); /* Set T2 to 40ms and T5 to 200ms in 100 usec units */ pps->t1_t2 = 40 * 10; pps->t5 = 200 * 10; @@ -191,10 +192,10 @@ static void intel_lvds_pps_get_hw_state(struct drm_i915_private *dev_priv, pps->tx = 200 * 10; } - DRM_DEBUG_DRIVER("LVDS PPS:t1+t2 %d t3 %d t4 %d t5 %d tx %d " -"divider %d port %d powerdown_on_reset %d\n", -pps->t1_t2, pps->t3, pps->t4, pps->t5, pps->tx, -pps->divider, pps->port, pps->powerdown_on_reset); + drm_dbg(&dev_priv->drm, "LVDS PPS:t1+t2 %d t3 %d t4 %d t5 %d tx %d " + "divider %d port %d powerdown_on_reset %d\n", + pps->t1_t2, pps->t3, pps->t4, pps->t5, pps->tx, + pps->divider, pps->port, pps->powerdown_on_reset); } static void intel_lvds_pps_init_hw(struct drm_i915_private *dev_priv, @@ -316,7 +317,8 @@ static void intel_enable_lvds(struct intel_encoder *encoder, intel_de_posting_read(dev_priv, lvds_encoder->reg); if (intel_de_wait_for_set(dev_priv, PP_STATUS(0), PP_ON, 5000)) - DRM_ERROR("timed out waiting for panel to power on\n"); + drm_err(&dev_priv->drm, + "timed out waiting for panel to power on\n"); intel_panel_enable_backlight(pipe_config, conn_state); } @@ -331,7 +333,8 @@ static void intel_disable_lvds(struct intel_encoder *encoder, intel_de_write(dev_priv, PP_CONTROL(0), intel_de_read(dev_priv, PP_CONTROL(0)) & ~PANEL_POWER_ON); if (intel_de_wait_for_clear(dev_priv, PP_STATUS(0), PP_ON, 1000)) - DRM_ERROR("timed out waiting for panel to power off\n"); + drm_err(&dev_priv->drm, + "timed out waiting for panel to power off\n"); intel_de_write(dev_priv, lvds_encoder->reg, intel_de_read(dev_priv, lvds_encoder->reg) & ~LVDS_PORT_EN); @@ -397,7 +400,7 @@ static int intel_lvds_compute_config(struct intel_encoder *intel_encoder, /* Should never happen!! */ if (INTEL_GEN(dev_priv) < 4 && intel_crtc->pipe == 0) { - DRM_ERROR("Can't support LVDS on pipe A\n"); + drm_err(&dev_priv->drm, "Can't support LVDS on pipe A\n"); return -EINVAL; } @@ -407,8 +410,9 @@ static int intel_lvds_compute_config(struct intel_encoder *intel_encoder, lvds_bpp = 6*3; if (lvds_bpp != pipe_config->pipe_bpp && !pipe_config->bw_constrained) { - DRM_DEBUG_KMS("forcing display bpp (was %d) to LVDS (%d)\n", - pipe_config->pipe_bpp, lvds_bpp); + drm_dbg_kms(&dev_priv->drm, + "forcing display bpp (was %d) to LVDS (%d)\n", + pipe_config->pipe_bpp, lvds_bpp); pipe_config->pipe_bpp = lvds_bpp; } @@ -832,7 +836,
[Intel-gfx] [PATCH 02/10] drm/i915/fbc: convert to drm_device based logging macros.
From: Wambui Karuga This replaces the uses of the printk based drm logging macros with the struct drm_device based logging macros in i915/display/intel_fbc.c. This transformation was done using the following coccinelle semantic patch that matches based on the existence of a drm_i915_private device pointer: @@ identifier fn, T; @@ fn(...,struct drm_i915_private *T,...) { <+... ( -DRM_INFO( +drm_info(&T->drm, ...) | -DRM_ERROR( +drm_err(&T->drm, ...) | -DRM_WARN( +drm_warn(&T->drm, ...) | -DRM_DEBUG( +drm_dbg(&T->drm, ...) | -DRM_DEBUG_DRIVER( +drm_dbg(&T->drm, ...) | -DRM_DEBUG_KMS( +drm_dbg_kms(&T->drm, ...) | -DRM_DEBUG_ATOMIC( +drm_dbg_atomic(&T->drm, ...) ) ...+> } @@ identifier fn, T; @@ fn(...) { ... struct drm_i915_private *T = ...; <+... ( -DRM_INFO( +drm_info(&T->drm, ...) | -DRM_ERROR( +drm_err(&T->drm, ...) | -DRM_WARN( +drm_warn(&T->drm, ...) | -DRM_DEBUG( +drm_dbg(&T->drm, ...) | -DRM_DEBUG_KMS( +drm_dbg_kms(&T->drm, ...) | -DRM_DEBUG_DRIVER( +drm_dbg(&T->drm, ...) | -DRM_DEBUG_ATOMIC( +drm_dbg_atomic(&T->drm, ...) ) ...+> } New checkpatch warnings were addressed manually. v2 by Jani: - also convert pr_info_once to drm based logging Signed-off-by: Wambui Karuga Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fbc.c | 30 ++-- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 2d982c322be9..ea0c3ecf7230 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -104,7 +104,7 @@ static void i8xx_fbc_deactivate(struct drm_i915_private *dev_priv) /* Wait for compressing bit to clear */ if (intel_de_wait_for_clear(dev_priv, FBC_STATUS, FBC_STAT_COMPRESSING, 10)) { - DRM_DEBUG_KMS("FBC idle timed out\n"); + drm_dbg_kms(&dev_priv->drm, "FBC idle timed out\n"); return; } } @@ -485,7 +485,8 @@ static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv, if (!ret) goto err_llb; else if (ret > 1) { - DRM_INFO("Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n"); + drm_info(&dev_priv->drm, +"Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n"); } @@ -521,8 +522,9 @@ static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv, dev_priv->dsm.start + compressed_llb->start); } - DRM_DEBUG_KMS("reserved %llu bytes of contiguous stolen space for FBC, threshold: %d\n", - fbc->compressed_fb.size, fbc->threshold); + drm_dbg_kms(&dev_priv->drm, + "reserved %llu bytes of contiguous stolen space for FBC, threshold: %d\n", + fbc->compressed_fb.size, fbc->threshold); return 0; @@ -531,7 +533,7 @@ static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv, i915_gem_stolen_remove_node(dev_priv, &fbc->compressed_fb); err_llb: if (drm_mm_initialized(&dev_priv->mm.stolen)) - pr_info_once("drm: not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size); + drm_info_once(&dev_priv->drm, "not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size); return -ENOSPC; } @@ -945,7 +947,8 @@ static void __intel_fbc_disable(struct drm_i915_private *dev_priv) drm_WARN_ON(&dev_priv->drm, !fbc->crtc); drm_WARN_ON(&dev_priv->drm, fbc->active); - DRM_DEBUG_KMS("Disabling FBC on pipe %c\n", pipe_name(crtc->pipe)); + drm_dbg_kms(&dev_priv->drm, "Disabling FBC on pipe %c\n", + pipe_name(crtc->pipe)); __intel_fbc_cleanup_cfb(dev_priv); @@ -1173,7 +1176,8 @@ void intel_fbc_enable(struct intel_atomic_state *state, else cache->gen9_wa_cfb_stride = 0; - DRM_DEBUG_KMS("Enabling FBC on pipe %c\n", pipe_name(crtc->pipe)); + drm_dbg_kms(&dev_priv->drm, "Enabling FBC on pipe %c\n", + pipe_name(crtc->pipe)); fbc->no_fbc_reason = "FBC enabled but not active yet\n"; fbc->crtc = crtc; @@ -1235,7 +1239,7 @@ static void intel_fbc_underrun_work_fn(struct work_struct *work) if (fbc->underrun_detected || !fbc->crtc) goto out; - DRM_DEBUG_KMS("Disabling FBC due to FIFO underrun.\n"); + drm_dbg_kms(&dev_priv->drm, "Disabling FBC due to FIFO underrun.\n"); fbc
[Intel-gfx] [PATCH 05/10] drm/i915/gmbus: convert to drm_device based logging,
From: Wambui Karuga Conversion instances of printk based drm logging macros to use the struct drm_device based logging macros in i915/display/intel_gmbus.c. This was done using the following coccinelle semantic patch that transforms based on the existence of an existing drm_i915_private device: @@ identifier fn, T; @@ fn(...,struct drm_i915_private *T,...) { <+... ( -DRM_INFO( +drm_info(&T->drm, ...) | -DRM_ERROR( +drm_err(&T->drm, ...) | -DRM_WARN( +drm_warn(&T->drm, ...) | -DRM_DEBUG( +drm_dbg(&T->drm, ...) | -DRM_DEBUG_DRIVER( +drm_dbg(&T->drm, ...) | -DRM_DEBUG_KMS( +drm_dbg_kms(&T->drm, ...) | -DRM_DEBUG_ATOMIC( +drm_dbg_atomic(&T->drm, ...) ) ...+> } @@ identifier fn, T; @@ fn(...) { ... struct drm_i915_private *T = ...; <+... ( -DRM_INFO( +drm_info(&T->drm, ...) | -DRM_ERROR( +drm_err(&T->drm, ...) | -DRM_WARN( +drm_warn(&T->drm, ...) | -DRM_DEBUG( +drm_dbg(&T->drm, ...) | -DRM_DEBUG_KMS( +drm_dbg_kms(&T->drm, ...) | -DRM_DEBUG_DRIVER( +drm_dbg(&T->drm, ...) | -DRM_DEBUG_ATOMIC( +drm_dbg_atomic(&T->drm, ...) ) ...+> } New checkpatch warnings were addressed manually. Signed-off-by: Wambui Karuga Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_gmbus.c | 33 +- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index 0ac44973..1fd3a5a6296b 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -631,8 +631,9 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num, * till then let it sleep. */ if (gmbus_wait_idle(dev_priv)) { - DRM_DEBUG_KMS("GMBUS [%s] timed out waiting for idle\n", -adapter->name); + drm_dbg_kms(&dev_priv->drm, + "GMBUS [%s] timed out waiting for idle\n", + adapter->name); ret = -ETIMEDOUT; } intel_de_write_fw(dev_priv, GMBUS0, 0); @@ -655,8 +656,9 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num, */ ret = -ENXIO; if (gmbus_wait_idle(dev_priv)) { - DRM_DEBUG_KMS("GMBUS [%s] timed out after NAK\n", - adapter->name); + drm_dbg_kms(&dev_priv->drm, + "GMBUS [%s] timed out after NAK\n", + adapter->name); ret = -ETIMEDOUT; } @@ -668,9 +670,9 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num, intel_de_write_fw(dev_priv, GMBUS1, 0); intel_de_write_fw(dev_priv, GMBUS0, 0); - DRM_DEBUG_KMS("GMBUS [%s] NAK for addr: %04x %c(%d)\n", -adapter->name, msgs[i].addr, -(msgs[i].flags & I2C_M_RD) ? 'r' : 'w', msgs[i].len); + drm_dbg_kms(&dev_priv->drm, "GMBUS [%s] NAK for addr: %04x %c(%d)\n", + adapter->name, msgs[i].addr, + (msgs[i].flags & I2C_M_RD) ? 'r' : 'w', msgs[i].len); /* * Passive adapters sometimes NAK the first probe. Retry the first @@ -679,16 +681,18 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num, * drm_do_probe_ddc_edid, which bails out on the first -ENXIO. */ if (ret == -ENXIO && i == 0 && try++ == 0) { - DRM_DEBUG_KMS("GMBUS [%s] NAK on first message, retry\n", - adapter->name); + drm_dbg_kms(&dev_priv->drm, + "GMBUS [%s] NAK on first message, retry\n", + adapter->name); goto retry; } goto out; timeout: - DRM_DEBUG_KMS("GMBUS [%s] timed out, falling back to bit banging on pin %d\n", - bus->adapter.name, bus->reg0 & 0xff); + drm_dbg_kms(&dev_priv->drm, + "GMBUS [%s] timed out, falling back to bit banging on pin %d\n", + bus->adapter.name, bus->reg0 & 0xff); intel_de_write_fw(dev_priv, GMBUS0, 0); /* @@ -925,9 +929,10 @@ void intel_gmbus_force_bit(struct i2c_adapter *adapter, bool force_bit) mutex_lock(&dev_priv->gmbus_mutex); bus->force_bit += force_bit ? 1 : -1; - DRM_DEBUG_KMS("%sabling bit-banging on %s. force bit now %d\n", - force_bit ? "en" : "dis", adapter->name, - bus->force_bit); + drm_dbg_kms(&dev_priv->drm, + "%sabling bit-banging on %s. force bit now %d\n", + force_bit ? "en" : "dis", adapter->name, + bus->force_bit); mutex_unlock(&dev_priv->gmbus_mutex); } -- 2.20.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 06/10] drm/i915/hdcp: convert to struct drm_device based logging.
From: Wambui Karuga Converts various instances of the printk based drm logging macros to the struct drm_device based logging macros in i915/display/intel_hdcp.c. This also involves extracting the drm_i915_private device from the intel_connector type for use in the macros. v2 by Jani: - rebase Signed-off-by: Wambui Karuga Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_hdcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index ee0f27ea2810..cd3b686980b2 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -1391,6 +1391,7 @@ static int hdcp2_propagate_stream_management_info(struct intel_connector *connector) { struct intel_digital_port *intel_dig_port = intel_attached_dig_port(connector); + struct drm_i915_private *i915 = to_i915(connector->base.dev); struct intel_hdcp *hdcp = &connector->hdcp; union { struct hdcp2_rep_stream_manage stream_manage; @@ -1431,7 +1432,7 @@ int hdcp2_propagate_stream_management_info(struct intel_connector *connector) hdcp->seq_num_m++; if (hdcp->seq_num_m > HDCP_2_2_SEQ_NUM_MAX) { - DRM_DEBUG_KMS("seq_num_m roll over.\n"); + drm_dbg_kms(&i915->drm, "seq_num_m roll over.\n"); return -1; } -- 2.20.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 08/10] drm/i915/lpe_audio: convert to drm_device based logging macros.
From: Wambui Karuga Convert various uses of the printk based drm logging macros to the struct drm_device based logging macros in i915/display/intel_lpe_audio.c. Note that this converts DRM_DEBUG to drm_dbg(). References: https://lists.freedesktop.org/archives/dri-devel/2020-January/253381.html Signed-off-by: Wambui Karuga Signed-off-by: Jani Nikula --- .../gpu/drm/i915/display/intel_lpe_audio.c| 23 +++ 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_lpe_audio.c b/drivers/gpu/drm/i915/display/intel_lpe_audio.c index 516e7179a5a4..ad5cc13037ae 100644 --- a/drivers/gpu/drm/i915/display/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/display/intel_lpe_audio.c @@ -127,7 +127,8 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv) kfree(pdata); if (IS_ERR(platdev)) { - DRM_ERROR("Failed to allocate LPE audio platform device\n"); + drm_err(&dev_priv->drm, + "Failed to allocate LPE audio platform device\n"); return platdev; } @@ -190,7 +191,8 @@ static bool lpe_audio_detect(struct drm_i915_private *dev_priv) }; if (!pci_dev_present(atom_hdaudio_ids)) { - DRM_INFO("HDaudio controller not detected, using LPE audio instead\n"); + drm_info(&dev_priv->drm, +"HDaudio controller not detected, using LPE audio instead\n"); lpe_present = true; } } @@ -203,18 +205,19 @@ static int lpe_audio_setup(struct drm_i915_private *dev_priv) dev_priv->lpe_audio.irq = irq_alloc_desc(0); if (dev_priv->lpe_audio.irq < 0) { - DRM_ERROR("Failed to allocate IRQ desc: %d\n", + drm_err(&dev_priv->drm, "Failed to allocate IRQ desc: %d\n", dev_priv->lpe_audio.irq); ret = dev_priv->lpe_audio.irq; goto err; } - DRM_DEBUG("irq = %d\n", dev_priv->lpe_audio.irq); + drm_dbg(&dev_priv->drm, "irq = %d\n", dev_priv->lpe_audio.irq); ret = lpe_audio_irq_init(dev_priv); if (ret) { - DRM_ERROR("Failed to initialize irqchip for lpe audio: %d\n", + drm_err(&dev_priv->drm, + "Failed to initialize irqchip for lpe audio: %d\n", ret); goto err_free_irq; } @@ -223,7 +226,8 @@ static int lpe_audio_setup(struct drm_i915_private *dev_priv) if (IS_ERR(dev_priv->lpe_audio.platdev)) { ret = PTR_ERR(dev_priv->lpe_audio.platdev); - DRM_ERROR("Failed to create lpe audio platform device: %d\n", + drm_err(&dev_priv->drm, + "Failed to create lpe audio platform device: %d\n", ret); goto err_free_irq; } @@ -259,8 +263,8 @@ void intel_lpe_audio_irq_handler(struct drm_i915_private *dev_priv) ret = generic_handle_irq(dev_priv->lpe_audio.irq); if (ret) - DRM_ERROR_RATELIMITED("error handling LPE audio irq: %d\n", - ret); + drm_err_ratelimited(&dev_priv->drm, + "error handling LPE audio irq: %d\n", ret); } /** @@ -278,7 +282,8 @@ int intel_lpe_audio_init(struct drm_i915_private *dev_priv) if (lpe_audio_detect(dev_priv)) { ret = lpe_audio_setup(dev_priv); if (ret < 0) - DRM_ERROR("failed to setup LPE Audio bridge\n"); + drm_err(&dev_priv->drm, + "failed to setup LPE Audio bridge\n"); } return ret; } -- 2.20.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 07/10] drm/i915/hotplug: convert to drm_device based logging.
From: Wambui Karuga Converts various instances of the printk based drm logging macros to the struct drm_device based logging macros in i915/display/intel_hotplug.c. In some cases, this involves extracting the drm_i915_private pointer from the drm_device struct to be used in the logging macros. Signed-off-by: Wambui Karuga Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_hotplug.c | 40 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c index 562227d54ccc..a091442efba4 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug.c @@ -170,10 +170,13 @@ static bool intel_hpd_irq_storm_detect(struct drm_i915_private *dev_priv, hpd->stats[pin].count += increment; if (hpd->stats[pin].count > threshold) { hpd->stats[pin].state = HPD_MARK_DISABLED; - DRM_DEBUG_KMS("HPD interrupt storm detected on PIN %d\n", pin); + drm_dbg_kms(&dev_priv->drm, + "HPD interrupt storm detected on PIN %d\n", pin); storm = true; } else { - DRM_DEBUG_KMS("Received HPD interrupt on PIN %d - cnt: %d\n", pin, + drm_dbg_kms(&dev_priv->drm, + "Received HPD interrupt on PIN %d - cnt: %d\n", + pin, hpd->stats[pin].count); } @@ -202,7 +205,8 @@ intel_hpd_irq_storm_switch_to_polling(struct drm_i915_private *dev_priv) dev_priv->hotplug.stats[pin].state != HPD_MARK_DISABLED) continue; - DRM_INFO("HPD interrupt storm detected on connector %s: " + drm_info(&dev_priv->drm, +"HPD interrupt storm detected on connector %s: " "switching from hotplug detection to polling\n", connector->base.name); @@ -244,8 +248,9 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work) continue; if (connector->base.polled != connector->polled) - DRM_DEBUG_DRIVER("Reenabling HPD on connector %s\n", -connector->base.name); + drm_dbg(&dev_priv->drm, + "Reenabling HPD on connector %s\n", + connector->base.name); connector->base.polled = connector->polled; } drm_connector_list_iter_end(&conn_iter); @@ -280,11 +285,12 @@ intel_encoder_hotplug(struct intel_encoder *encoder, if (old_status == connector->base.status) return INTEL_HOTPLUG_UNCHANGED; - DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %s to %s\n", - connector->base.base.id, - connector->base.name, - drm_get_connector_status_name(old_status), - drm_get_connector_status_name(connector->base.status)); + drm_dbg_kms(&to_i915(dev)->drm, + "[CONNECTOR:%d:%s] status updated from %s to %s\n", + connector->base.base.id, + connector->base.name, + drm_get_connector_status_name(old_status), + drm_get_connector_status_name(connector->base.status)); return INTEL_HOTPLUG_CHANGED; } @@ -358,7 +364,7 @@ static void i915_hotplug_work_func(struct work_struct *work) u32 hpd_retry_bits; mutex_lock(&dev->mode_config.mutex); - DRM_DEBUG_KMS("running encoder hotplug functions\n"); + drm_dbg_kms(&dev_priv->drm, "running encoder hotplug functions\n"); spin_lock_irq(&dev_priv->irq_lock); @@ -386,8 +392,9 @@ static void i915_hotplug_work_func(struct work_struct *work) struct intel_encoder *encoder = intel_attached_encoder(connector); - DRM_DEBUG_KMS("Connector %s (pin %i) received hotplug event.\n", - connector->base.name, pin); + drm_dbg_kms(&dev_priv->drm, + "Connector %s (pin %i) received hotplug event.\n", + connector->base.name, pin); switch (encoder->hotplug(encoder, connector, hpd_event_bits & hpd_bit)) { @@ -472,9 +479,10 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, long_hpd = long_mask & BIT(pin); - DRM_DEBUG_DRIVER("digital hpd on [ENCODER:%d:%s] - %s\n", -encoder->base.base.id, encoder->base.name, -long_hpd ? "long" : "short"); + drm_dbg(&dev_priv->drm, +
Re: [Intel-gfx] [PATCH] drm/i915/perf: Invalidate OA TLB on when closing perf stream
On 09/03/2020 23:10, Umesh Nerlige Ramappa wrote: On running several back to back perf capture sessions involving closing and opening the perf stream, invalid OA reports are seen in the beginning of the OA buffer in some sessions. Fix this by invalidating OA TLB when the perf stream is closed or disabled on gen12. Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Lionel Landwerlin --- drivers/gpu/drm/i915/i915_perf.c | 8 drivers/gpu/drm/i915/i915_reg.h | 2 ++ 2 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 1b074bb4a7fe..551be589d6f4 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2700,6 +2700,14 @@ static void gen12_oa_disable(struct i915_perf_stream *stream) 50)) drm_err(&stream->perf->i915->drm, "wait for OA to be disabled timed out\n"); + + intel_uncore_write(uncore, GEN12_OA_TLB_INV_CR, 1); + if (intel_wait_for_register(uncore, + GEN12_OA_TLB_INV_CR, + 1, 0, + 50)) + drm_err(&stream->perf->i915->drm, + "wait for OA tlb invalidate timed out\n"); } /** diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 79ae9654dac9..95725e61d9f1 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -693,6 +693,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OABUFFER_SIZE_8M(6 << 3) #define OABUFFER_SIZE_16M (7 << 3) +#define GEN12_OA_TLB_INV_CR _MMIO(0xceec) + /* Gen12 OAR unit */ #define GEN12_OAR_OACONTROL _MMIO(0x2960) #define GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/tgl: WaEnablePreemptionGranularityControlByUMD
From: Tvrtko Ursulin Certain workloads need the ability to disable preemption completely so allow them to do that by whitelisting GEN8_CS_CHICKEN1. Signed-off-by: Tvrtko Ursulin Cc: Michal Mrozek Cc: Tony Ye Cc: Rafael Antognolli Cc: Jason Ekstrand --- We need confirmation and acks from all three userspace components here. Especially since my impression was some are for and some were against whitelisting this one. --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 391f39b1fb26..37becdf77427 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1276,6 +1276,9 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine) /* Wa_1806527549:tgl */ whitelist_reg(w, HIZ_CHICKEN); + + /* WaEnablePreemptionGranularityControlByUMD:tgl */ + whitelist_reg(w, GEN8_CS_CHICKEN1); break; default: break; -- 2.20.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] list: Prevent compiler reloads inside 'safe' list iteration
Instruct the compiler to read the next element in the list iteration once, and that it is not allowed to reload the value from the stale element later. This is important as during the course of the safe iteration, the stale element may be poisoned (unbeknownst to the compiler). This helps prevent kcsan warnings over 'unsafe' conduct in releasing the list elements during list_for_each_entry_safe() and friends. Signed-off-by: Chris Wilson Cc: Andrew Morton Cc: "Paul E. McKenney" Cc: Randy Dunlap Cc: sta...@vger.kernel.org --- include/linux/list.h | 50 +++- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/include/linux/list.h b/include/linux/list.h index 884216db3246..c4d215d02259 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -536,6 +536,17 @@ static inline void list_splice_tail_init(struct list_head *list, #define list_next_entry(pos, member) \ list_entry((pos)->member.next, typeof(*(pos)), member) +/** + * list_next_entry_safe - get the next element in list [once] + * @pos: the type * to cursor + * @member:the name of the list_head within the struct. + * + * Like list_next_entry() but prevents the compiler from reloading the + * next element. + */ +#define list_next_entry_safe(pos, member) \ + list_entry(READ_ONCE((pos)->member.next), typeof(*(pos)), member) + /** * list_prev_entry - get the prev element in list * @pos: the type * to cursor @@ -544,6 +555,17 @@ static inline void list_splice_tail_init(struct list_head *list, #define list_prev_entry(pos, member) \ list_entry((pos)->member.prev, typeof(*(pos)), member) +/** + * list_prev_entry_safe - get the prev element in list [once] + * @pos: the type * to cursor + * @member:the name of the list_head within the struct. + * + * Like list_prev_entry() but prevents the compiler from reloading the + * previous element. + */ +#define list_prev_entry_safe(pos, member) \ + list_entry(READ_ONCE((pos)->member.prev), typeof(*(pos)), member) + /** * list_for_each - iterate over a list * @pos: the &struct list_head to use as a loop cursor. @@ -686,9 +708,9 @@ static inline void list_splice_tail_init(struct list_head *list, */ #define list_for_each_entry_safe(pos, n, head, member) \ for (pos = list_first_entry(head, typeof(*pos), member),\ - n = list_next_entry(pos, member); \ + n = list_next_entry_safe(pos, member); \ &pos->member != (head);\ -pos = n, n = list_next_entry(n, member)) +pos = n, n = list_next_entry_safe(n, member)) /** * list_for_each_entry_safe_continue - continue list iteration safe against removal @@ -700,11 +722,11 @@ static inline void list_splice_tail_init(struct list_head *list, * Iterate over list of given type, continuing after current point, * safe against removal of list entry. */ -#define list_for_each_entry_safe_continue(pos, n, head, member) \ - for (pos = list_next_entry(pos, member), \ - n = list_next_entry(pos, member); \ -&pos->member != (head); \ -pos = n, n = list_next_entry(n, member)) +#define list_for_each_entry_safe_continue(pos, n, head, member)\ + for (pos = list_next_entry(pos, member),\ + n = list_next_entry_safe(pos, member); \ +&pos->member != (head);\ +pos = n, n = list_next_entry_safe(n, member)) /** * list_for_each_entry_safe_from - iterate over list from current point safe against removal @@ -716,10 +738,10 @@ static inline void list_splice_tail_init(struct list_head *list, * Iterate over list of given type from current point, safe against * removal of list entry. */ -#define list_for_each_entry_safe_from(pos, n, head, member) \ - for (n = list_next_entry(pos, member); \ -&pos->member != (head); \ -pos = n, n = list_next_entry(n, member)) +#define list_for_each_entry_safe_from(pos, n, head, member)\ + for (n = list_next_entry_safe(pos, member); \ +&pos->member != (head);\ +pos = n, n = list_next_entry_safe(n, member)) /** * list_for_each_entry_safe_reverse - iterate backwards over list safe against removal @@ -733,9 +755,9 @@ static inline void list_splice_tail_init(struct list_head *list, */ #define list_for_each_entry_safe_reverse(pos, n, head, member) \ for (pos = list_last_entry(head, typeof(*pos), member),
Re: [Intel-gfx] [PULL] gvt-fixes
On Tue, 10 Mar 2020, Zhenyu Wang wrote: > Hi, > > Here's more gvt fixes for 5.6. Fix timer issue caused by idr destroy > change and VBT size error. Pulled and pushed to drm-intel-fixes, thanks. BR, Jani. > > Thanks > -- > > The following changes since commit b549c252b1292aea959cd9b83537fcb9384a6112: > > drm/i915/gvt: Fix orphan vgpu dmabuf_objs' lifetime (2020-02-25 16:14:20 > +0800) > > are available in the Git repository at: > > https://github.com/intel/gvt-linux tags/gvt-fixes-2020-03-10 > > for you to fetch changes up to 2fa7e15c5f466fdd0c0b196b1dc4a65d191efd96: > > drm/i915/gvt: Fix emulated vbt size issue (2020-03-06 09:35:30 +0800) > > > gvt-fixes-2020-03-10 > > - Fix vgpu idr destroy causing timer destroy failure (Zhenyu) > - Fix VBT size (Tina) > > > Tina Zhang (1): > drm/i915/gvt: Fix emulated vbt size issue > > Zhenyu Wang (1): > drm/i915/gvt: Fix unnecessary schedule timer when no vGPU exits > > drivers/gpu/drm/i915/gvt/opregion.c | 5 ++--- > drivers/gpu/drm/i915/gvt/vgpu.c | 12 +--- > 2 files changed, 11 insertions(+), 6 deletions(-) -- Jani Nikula, Intel Open Source Graphics Center ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915/tgl: WaEnablePreemptionGranularityControlByUMD
>> We need confirmation and acks from all three userspace components here. >> Especially since my impression was some are for and some were against >> whitelisting this one. Acked-by: Michal Mrozek ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 03/17] drm/i915: Improve the start alignment of bonded pairs
On 06/03/2020 13:38, Chris Wilson wrote: Always wait on the start of the signaler request to reduce the problem of dequeueing the bonded pair too early -- we want both payloads to start at the same time, with no latency, and yet still allow others to make full use of the slack in the system. This reduce the amount of time we spend waiting on the semaphore used to synchronise the start of the bonded payload. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_request.c | 41 + 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 66efd16c4850..db11006b4ac9 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1128,14 +1128,45 @@ __i915_request_await_execution(struct i915_request *to, &from->fence)) return 0; - /* Ensure both start together [after all semaphores in signal] */ - if (intel_engine_has_semaphores(to->engine)) - err = __emit_semaphore_wait(to, from, from->fence.seqno - 1); - else - err = i915_request_await_start(to, from); + /* +* Wait until the start of this request. +* +* The execution cb fires when we submit the request to HW. But in +* many cases this may be long before the request itself is ready to +* run (consider that we submit 2 requests for the same context, where +* the request of interest is behind an indefinite spinner). So we hook +* up to both to reduce our queues and keep the execution lag minimised +* in the worst case, though we hope that the await_start is elided. +*/ + err = i915_request_await_start(to, from); if (err < 0) return err; + /* +* Ensure both start together [after all semaphores in signal] +* +* Now that we are queued to the HW at roughly the same time (thanks +* to the execute cb) and are ready to run at roughly the same time +* (thanks to the await start), our signaler may still be indefinitely +* delayed by waiting on a semaphore from a remote engine. If our +* signaler depends on a semaphore, so indirectly do we, and we do not +* want to start our payload until our signaler also starts theirs. +* So we wait. +* +* However, there is also a second condition for which we need to wait +* for the precise start of the signaler. Consider that the signaler +* was submitted in a chain of requests following another context +* (with just an ordinary intra-engine fence dependency between the +* two). In this case the signaler is queued to HW, but not for +* immediate execution, and so we must wait until it reaches the +* active slot. +*/ + if (intel_engine_has_semaphores(to->engine)) { + err = __emit_semaphore_wait(to, from, from->fence.seqno - 1); + if (err < 0) + return err; + } + /* Couple the dependency tree for PI on this exposed to->fence */ if (to->engine->schedule) { err = i915_sched_node_add_dependency(&to->sched, &from->sched); Reviewed-by: Tvrtko Ursulin Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 04/17] drm/i915: Tweak scheduler's kick_submission()
On 06/03/2020 13:38, Chris Wilson wrote: Skip useless priority bumping on adding a new dependency, but otherwise prevent tasklet scheduling until we have completed all the potential rescheduling. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_scheduler.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 52f71e83e088..603cba36d6a4 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -209,6 +209,8 @@ static void kick_submission(struct intel_engine_cs *engine, if (!inflight) goto unlock; + engine->execlists.queue_priority_hint = prio; + What is the significance of moving this up? I couldn't correlate it to the commit message. /* * If we are already the currently executing context, don't * bother evaluating if we should preempt ourselves. @@ -216,7 +218,6 @@ static void kick_submission(struct intel_engine_cs *engine, if (inflight->context == rq->context) goto unlock; - engine->execlists.queue_priority_hint = prio; if (need_preempt(prio, rq_prio(inflight))) tasklet_hi_schedule(&engine->execlists.tasklet); @@ -463,11 +464,15 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node, if (!dep) return -ENOMEM; + local_bh_disable(); + if (!__i915_sched_node_add_dependency(node, signal, dep, I915_DEPENDENCY_EXTERNAL | I915_DEPENDENCY_ALLOC)) i915_dependency_free(dep); + local_bh_enable(); /* kick submission tasklet */ + And this presumably postpones the tasklet until __bump_priority -> __i915_schedule is finished. But then why the request submission path which calls __i915_sched_node_add_dependency directly does not need this treatment? Regards, Tvrtko return 0; } ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [RFC 0/7] Asynchronous flip implementation for i915
> -Original Message- > From: Zanoni, Paulo R > Sent: Tuesday, March 10, 2020 5:35 AM > To: B S, Karthik ; intel-gfx@lists.freedesktop.org > Cc: ville.syrj...@linux.intel.com; Kulkarni, Vandita > ; Shankar, Uma > Subject: Re: [RFC 0/7] Asynchronous flip implementation for i915 > > Em sex, 2020-03-06 às 17:09 +0530, Karthik B S escreveu: > > Without async flip support in the kernel, fullscreen apps where game > > resolution is equal to the screen resolution, must perform an extra > > blit per frame prior to flipping. > > > > Asynchronous page flips will also boost the FPS of Mesa benchmarks. > > > Thanks a lot for doing this work! Thanks a lot for the review. > > I did some quick smoke tests on a Gemini Lake and while this appears to be > working fine with xf86-video-modesetting, the "pageflip.c" program I shared > previously breaks when you launch it as "./pageflip -n": this argument makes > the program *not* request for page flip events (by not setting > DRM_MODE_PAGE_FLIP_EVENT) and just try to flip as fast as it can. I didn't > investigate why this breaks, but it's probably some corner case the series is > forgetting. I hadn't tried out this option. Thanks for pointing this out. Will fix this in the next revision. > > Also, doesn't async pageflip interact with some other display features? > Don't we need to disable at least one of FBC, PSR and/or render compression > when using async page flips? > > Ville mentioned some possible interactions with SURF/OFFSET tracking too > (framebuffers not being at the start of the bo), which doesn't seem to be > covered by the series. > Yes, both the above hasn't been taken care of in this series. Thanks for pointing it out. Will check it and update in the next revision. Thanks, Karthik > Thanks, > Paulo > > > > > Karthik B S (7): > > drm/i915: Define flip done functions and enable IER > > drm/i915: Add support for async flips in I915 > > drm/i915: Make commit call blocking in case of async flips > > drm/i915: Add checks specific to async flips > > drm/i915: Add flip_done_handler definition > > drm/i915: Enable and handle flip done interrupt > > drm/i915: Do not call drm_crtc_arm_vblank_event in async flips > > > > drivers/gpu/drm/i915/display/intel_display.c | 55 +-- > > drivers/gpu/drm/i915/display/intel_sprite.c | 12 ++-- > > drivers/gpu/drm/i915/i915_irq.c | 58 +++- > > drivers/gpu/drm/i915/i915_irq.h | 2 + > > drivers/gpu/drm/i915/i915_reg.h | 1 + > > 5 files changed, 117 insertions(+), 11 deletions(-) > > ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: Defer semaphore priority bumping to a workqueue
Since the semaphore fence may be signaled from inside an interrupt handler from inside a request holding its request->lock, we cannot then enter into the engine->active.lock for processing the semaphore priority bump as we may traverse our call tree and end up on another held request. CPU 0: [ 2243.218864] _raw_spin_lock_irqsave+0x9a/0xb0 [ 2243.218867] i915_schedule_bump_priority+0x49/0x80 [i915] [ 2243.218869] semaphore_notify+0x6d/0x98 [i915] [ 2243.218871] __i915_sw_fence_complete+0x61/0x420 [i915] [ 2243.218874] ? kmem_cache_free+0x211/0x290 [ 2243.218876] i915_sw_fence_complete+0x58/0x80 [i915] [ 2243.218879] dma_i915_sw_fence_wake+0x3e/0x80 [i915] [ 2243.218881] signal_irq_work+0x571/0x690 [i915] [ 2243.218883] irq_work_run_list+0xd7/0x120 [ 2243.218885] irq_work_run+0x1d/0x50 [ 2243.218887] smp_irq_work_interrupt+0x21/0x30 [ 2243.218889] irq_work_interrupt+0xf/0x20 CPU 1: [ 2242.173107] _raw_spin_lock+0x8f/0xa0 [ 2242.173110] __i915_request_submit+0x64/0x4a0 [i915] [ 2242.173112] __execlists_submission_tasklet+0x8ee/0x2120 [i915] [ 2242.173114] ? i915_sched_lookup_priolist+0x1e3/0x2b0 [i915] [ 2242.173117] execlists_submit_request+0x2e8/0x2f0 [i915] [ 2242.173119] submit_notify+0x8f/0xc0 [i915] [ 2242.173121] __i915_sw_fence_complete+0x61/0x420 [i915] [ 2242.173124] ? _raw_spin_unlock_irqrestore+0x39/0x40 [ 2242.173137] i915_sw_fence_complete+0x58/0x80 [i915] [ 2242.173140] i915_sw_fence_commit+0x16/0x20 [i915] CPU 2: [ 2242.173107] _raw_spin_lock+0x8f/0xa0 [ 2242.173110] __i915_request_submit+0x64/0x4a0 [i915] [ 2242.173112] __execlists_submission_tasklet+0x8ee/0x2120 [i915] [ 2242.173114] ? i915_sched_lookup_priolist+0x1e3/0x2b0 [i915] [ 2242.173117] execlists_submit_request+0x2e8/0x2f0 [i915] [ 2242.173119] submit_notify+0x8f/0xc0 [i915] Closes: https://gitlab.freedesktop.org/drm/intel/issues/1318 Fixes: b7404c7ecb38 ("drm/i915: Bump ready tasks ahead of busywaits") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: # v5.2+ --- drivers/gpu/drm/i915/i915_request.c | 22 +- drivers/gpu/drm/i915/i915_request.h | 2 ++ 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 04b52bf347bf..129357d4b599 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -588,19 +588,31 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) return NOTIFY_DONE; } +static void irq_semaphore_cb(struct irq_work *wrk) +{ + struct i915_request *rq = + container_of(wrk, typeof(*rq), semaphore_work); + + i915_schedule_bump_priority(rq, I915_PRIORITY_NOSEMAPHORE); + i915_request_put(rq); +} + static int __i915_sw_fence_call semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { - struct i915_request *request = - container_of(fence, typeof(*request), semaphore); + struct i915_request *rq = container_of(fence, typeof(*rq), semaphore); switch (state) { case FENCE_COMPLETE: - i915_schedule_bump_priority(request, I915_PRIORITY_NOSEMAPHORE); + if (!(READ_ONCE(rq->sched.attr.priority) & I915_PRIORITY_NOSEMAPHORE)) { + i915_request_get(rq); + init_irq_work(&rq->semaphore_work, irq_semaphore_cb); + irq_work_queue(&rq->semaphore_work); + } break; case FENCE_FREE: - i915_request_put(request); + i915_request_put(rq); break; } @@ -1369,9 +1381,9 @@ void __i915_request_queue(struct i915_request *rq, * decide whether to preempt the entire chain so that it is ready to * run at the earliest possible convenience. */ - i915_sw_fence_commit(&rq->semaphore); if (attr && rq->engine->schedule) rq->engine->schedule(rq, attr); + i915_sw_fence_commit(&rq->semaphore); i915_sw_fence_commit(&rq->submit); } diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 6020d5b2a3df..3c552bfea67a 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -26,6 +26,7 @@ #define I915_REQUEST_H #include +#include #include #include "gem/i915_gem_context_types.h" @@ -208,6 +209,7 @@ struct i915_request { }; struct list_head execute_cb; struct i915_sw_fence semaphore; + struct irq_work semaphore_work; /* * A list of everyone we wait upon, and everyone who waits upon us. -- 2.20.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 06/17] drm/i915: Extend i915_request_await_active to use all timelines
On 06/03/2020 13:38, Chris Wilson wrote: Extend i915_request_await_active() to be able to asynchronously wait on all the tracked timelines simultaneously. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_active.c | 51 +++--- drivers/gpu/drm/i915/i915_active.h | 5 ++- drivers/gpu/drm/i915/i915_vma.c| 2 +- 3 files changed, 45 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 1826de14d2da..e659688db043 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -518,23 +518,52 @@ int i915_active_wait(struct i915_active *ref) return 0; } -int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) +static int await_active(struct i915_request *rq, + struct i915_active_fence *active) +{ + struct dma_fence *fence; + + if (is_barrier(active)) + return 0; + + fence = i915_active_fence_get(active); + if (fence) { + int err; + + err = i915_request_await_dma_fence(rq, fence); + dma_fence_put(fence); + if (err < 0) + return err; + } + + return 0; +} + +int i915_request_await_active(struct i915_request *rq, + struct i915_active *ref, + unsigned int flags) { int err = 0; + /* We must always wait for the exclusive fence! */ if (rcu_access_pointer(ref->excl.fence)) { - struct dma_fence *fence; - - rcu_read_lock(); - fence = dma_fence_get_rcu_safe(&ref->excl.fence); - rcu_read_unlock(); - if (fence) { - err = i915_request_await_dma_fence(rq, fence); - dma_fence_put(fence); - } + err = await_active(rq, &ref->excl); + if (err) + return err; } - /* In the future we may choose to await on all fences */ + if (flags & I915_ACTIVE_AWAIT_ALL && i915_active_acquire_if_busy(ref)) { + struct active_node *it, *n; + + rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { + err = await_active(rq, &it->base); + if (err) + break; + } + i915_active_release(ref); + if (err) + return err; + } return err; } diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index 7e438501333e..e3c13060c4c7 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -183,7 +183,10 @@ static inline bool i915_active_has_exclusive(struct i915_active *ref) int i915_active_wait(struct i915_active *ref); -int i915_request_await_active(struct i915_request *rq, struct i915_active *ref); +int i915_request_await_active(struct i915_request *rq, + struct i915_active *ref, + unsigned int flags); +#define I915_ACTIVE_AWAIT_ALL BIT(0) int i915_active_acquire(struct i915_active *ref); bool i915_active_acquire_if_busy(struct i915_active *ref); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 3dde671145f7..5b3efb43a8ef 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1173,7 +1173,7 @@ int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) GEM_BUG_ON(!i915_vma_is_pinned(vma)); /* Wait for the vma to be bound before we start! */ - err = i915_request_await_active(rq, &vma->active); + err = i915_request_await_active(rq, &vma->active, 0); if (err) return err; Reviewed-by: Tvrtko Ursulin Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 08/17] drm/i915/selftests: Add request throughput measurement to perf
On 06/03/2020 13:38, Chris Wilson wrote: Under ideal circumstances, the driver should be able to keep the GPU fully saturated with work. Measure how close to ideal we get under the harshest of conditions with no user payload. Signed-off-by: Chris Wilson --- .../drm/i915/selftests/i915_perf_selftests.h | 1 + drivers/gpu/drm/i915/selftests/i915_request.c | 285 +- 2 files changed, 285 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h index 3bf7f53e9924..d8da142985eb 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h @@ -16,5 +16,6 @@ * Tests are executed in order by igt/i915_selftest */ selftest(engine_cs, intel_engine_cs_perf_selftests) +selftest(request, i915_request_perf_selftests) selftest(blt, i915_gem_object_blt_perf_selftests) selftest(region, intel_memory_region_perf_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index f89d9c42f1fa..d4c088cfe4e1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -23,6 +23,7 @@ */ #include +#include #include "gem/i915_gem_pm.h" #include "gem/selftests/mock_context.h" @@ -1233,7 +1234,7 @@ static int live_parallel_engines(void *arg) struct igt_live_test t; unsigned int idx; - snprintf(name, sizeof(name), "%pS", fn); + snprintf(name, sizeof(name), "%ps", *fn); err = igt_live_test_begin(&t, i915, __func__, name); if (err) break; @@ -1470,3 +1471,285 @@ int i915_request_live_selftests(struct drm_i915_private *i915) return i915_subtests(tests, i915); } + +struct perf_parallel { + struct intel_engine_cs *engine; + unsigned long count; + ktime_t time; + ktime_t busy; + u64 runtime; +}; + +static int switch_to_kernel_sync(struct intel_context *ce, int err) +{ + struct i915_request *rq; + struct dma_fence *fence; + + rq = intel_engine_create_kernel_request(ce->engine); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + fence = i915_active_fence_get(&ce->timeline->last_request); + if (fence) { + i915_request_await_dma_fence(rq, fence); + dma_fence_put(fence); + } + + rq = i915_request_get(rq); + i915_request_add(rq); + if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err) + err = -ETIME; + i915_request_put(rq); + + while (!err && !intel_engine_is_idle(ce->engine)) + intel_engine_flush_submission(ce->engine); + + return err; +} + +static int perf_sync(void *arg) +{ + struct perf_parallel *p = arg; + struct intel_engine_cs *engine = p->engine; + struct intel_context *ce; + IGT_TIMEOUT(end_time); + unsigned long count; + bool busy; + int err = 0; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = intel_context_pin(ce); + if (err) { + intel_context_put(ce); + return err; + } + + busy = false; + if (intel_engine_supports_stats(engine) && + !intel_enable_engine_stats(engine)) { + p->busy = intel_engine_get_busy_time(engine); + busy = true; + } + + p->time = ktime_get(); + count = 0; + do { + struct i915_request *rq; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + i915_request_get(rq); + i915_request_add(rq); + + err = 0; + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(rq); + if (err) + break; + + count++; + } while (!__igt_timeout(end_time, NULL)); + p->time = ktime_sub(ktime_get(), p->time); + + if (busy) { + p->busy = ktime_sub(intel_engine_get_busy_time(engine), + p->busy); + intel_disable_engine_stats(engine); + } + + err = switch_to_kernel_sync(ce, err); + p->runtime = intel_context_get_total_runtime_ns(ce); + p->count = count; + + intel_context_unpin(ce); + intel_context_put(ce); + return err; +} + +static int perf_many(void *arg) +{ + struct perf_parallel *p = arg; + struct intel_engine_cs *engine = p->engine; + struct intel_context *ce; + IGT_TIMEOUT(end_time); + unsigned long count; + int err = 0; + bool busy; + + ce = intel_context
Re: [Intel-gfx] [PATCH] drm/i915: Defer semaphore priority bumping to a workqueue
Quoting Chris Wilson (2020-03-10 10:17:20) > Since the semaphore fence may be signaled from inside an interrupt > handler from inside a request holding its request->lock, we cannot then > enter into the engine->active.lock for processing the semaphore priority > bump as we may traverse our call tree and end up on another held > request. > > CPU 0: > [ 2243.218864] _raw_spin_lock_irqsave+0x9a/0xb0 > [ 2243.218867] i915_schedule_bump_priority+0x49/0x80 [i915] > [ 2243.218869] semaphore_notify+0x6d/0x98 [i915] > [ 2243.218871] __i915_sw_fence_complete+0x61/0x420 [i915] > [ 2243.218874] ? kmem_cache_free+0x211/0x290 > [ 2243.218876] i915_sw_fence_complete+0x58/0x80 [i915] > [ 2243.218879] dma_i915_sw_fence_wake+0x3e/0x80 [i915] > [ 2243.218881] signal_irq_work+0x571/0x690 [i915] > [ 2243.218883] irq_work_run_list+0xd7/0x120 > [ 2243.218885] irq_work_run+0x1d/0x50 > [ 2243.218887] smp_irq_work_interrupt+0x21/0x30 > [ 2243.218889] irq_work_interrupt+0xf/0x20 > > CPU 1: > [ 2242.173107] _raw_spin_lock+0x8f/0xa0 > [ 2242.173110] __i915_request_submit+0x64/0x4a0 [i915] > [ 2242.173112] __execlists_submission_tasklet+0x8ee/0x2120 [i915] > [ 2242.173114] ? i915_sched_lookup_priolist+0x1e3/0x2b0 [i915] > [ 2242.173117] execlists_submit_request+0x2e8/0x2f0 [i915] > [ 2242.173119] submit_notify+0x8f/0xc0 [i915] > [ 2242.173121] __i915_sw_fence_complete+0x61/0x420 [i915] > [ 2242.173124] ? _raw_spin_unlock_irqrestore+0x39/0x40 > [ 2242.173137] i915_sw_fence_complete+0x58/0x80 [i915] > [ 2242.173140] i915_sw_fence_commit+0x16/0x20 [i915] > > CPU 2: > [ 2242.173107] _raw_spin_lock+0x8f/0xa0 > [ 2242.173110] __i915_request_submit+0x64/0x4a0 [i915] > [ 2242.173112] __execlists_submission_tasklet+0x8ee/0x2120 [i915] > [ 2242.173114] ? i915_sched_lookup_priolist+0x1e3/0x2b0 [i915] > [ 2242.173117] execlists_submit_request+0x2e8/0x2f0 [i915] > [ 2242.173119] submit_notify+0x8f/0xc0 [i915] Ignore this, I thought this was a third interesting chunk, but I copied the same one twice. -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [RFC 1/7] drm/i915: Define flip done functions and enable IER
> -Original Message- > From: Zanoni, Paulo R > Sent: Tuesday, March 10, 2020 4:48 AM > To: B S, Karthik ; intel-gfx@lists.freedesktop.org > Cc: ville.syrj...@linux.intel.com; Kulkarni, Vandita > ; Shankar, Uma > Subject: Re: [RFC 1/7] drm/i915: Define flip done functions and enable IER > > Em sex, 2020-03-06 às 17:09 +0530, Karthik B S escreveu: > > Add enable/disable flip done functions and enable the flip done > > interrupt in IER. > > > > Flip done interrupt is used to send the page flip event as soon as the > > surface address is written as per the requirement of async flips. > > > > Signed-off-by: Karthik B S > > --- > > drivers/gpu/drm/i915/i915_irq.c | 37 > > - > drivers/gpu/drm/i915/i915_irq.h | > > 2 ++ > > 2 files changed, 38 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_irq.c > > b/drivers/gpu/drm/i915/i915_irq.c index ecf07b0faad2..5955e737a45d > > 100644 > > --- a/drivers/gpu/drm/i915/i915_irq.c > > +++ b/drivers/gpu/drm/i915/i915_irq.c > > @@ -2626,6 +2626,27 @@ int bdw_enable_vblank(struct drm_crtc *crtc) > > return 0; > > } > > > > +void icl_enable_flip_done(struct drm_crtc *crtc) > > > Platform prefixes indicate the first platform that is able to run this > function. > In this case I can't even see which platforms will run the function because > it's > only later in the series that this function will get called. I'm not a fan of > this > patch splitting style where a function gets added in patch X and then used in > patch X+Y. IMHO functions should only be introduced in patches where they > are used. > This makes the code much easier to review. Thanks for the review. Will update the patches as per your feedback. > > So, shouldn't this be skl_enable_flip_done()? Agreed. Will update the function name. > > > +{ > > + struct drm_i915_private *dev_priv = to_i915(crtc->dev); > > + enum pipe pipe = to_intel_crtc(crtc)->pipe; > > + struct drm_vblank_crtc *vblank = &dev_priv->drm.vblank[pipe]; > > + unsigned long irqflags; > > + > > + /* Make sure that vblank is not enabled, as we are already sending > > +* the page flip event in the flip_done_handler. > > +*/ > > + if (atomic_read(&vblank->refcount) != 0) > > + drm_crtc_vblank_put(crtc); > > This is the kind of thing that will be much easier to review when this patch > gets squashed in the one that makes use of these functions. Will update the patches as per your feedback. > > Even after reading the whole series, this put() doesn't seem correct to me. > What is the problem with having vblanks enabled? Is it because we were > sending duplicate vblank events without these lines? Where is the > get() that triggers this put()? Please help me understand this. Checked the code once more after your review and I agree that this wouldn't be needed as there is no get() called for which this put() would be needed. And as the event is sent in the flip_done_handler in this series, there is no need to disable vblank. > > > > + > > + spin_lock_irqsave(&dev_priv->irq_lock, irqflags); > > + > > + bdw_enable_pipe_irq(dev_priv, pipe, > GEN9_PIPE_PLANE1_FLIP_DONE); > > + > > + spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); > > + > > +} > > + > > /* Called from drm generic code, passed 'crtc' which > > * we use as a pipe index > > */ > > @@ -2686,6 +2707,20 @@ void bdw_disable_vblank(struct drm_crtc *crtc) > > spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } > > > > + > > +void icl_disable_flip_done(struct drm_crtc *crtc) { > > + struct drm_i915_private *dev_priv = to_i915(crtc->dev); > > + enum pipe pipe = to_intel_crtc(crtc)->pipe; > > + unsigned long irqflags; > > + > > + spin_lock_irqsave(&dev_priv->irq_lock, irqflags); > > + > > + bdw_disable_pipe_irq(dev_priv, pipe, > GEN9_PIPE_PLANE1_FLIP_DONE); > > + > > + spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } > > + > > static void ibx_irq_reset(struct drm_i915_private *dev_priv) { > > struct intel_uncore *uncore = &dev_priv->uncore; @@ -3375,7 > +3410,7 > > @@ static void gen8_de_irq_postinstall(struct drm_i915_private > *dev_priv) > > de_port_masked |= CNL_AUX_CHANNEL_F; > > > > de_pipe_enables = de_pipe_masked | GEN8_PIPE_VBLANK | > > - GEN8_PIPE_FIFO_UNDERRUN; > > + GEN8_PIPE_FIFO_UNDERRUN | > GEN9_PIPE_PLANE1_FLIP_DONE; > > This is going to set this bit for gen8 too, which is something we probably > don't > want since it doesn't exist there. Will add a gen check here in the next revision. > > The patch also does not add the handler for the interrupt, which doesn't > make sense (see my point above). Noted. > > Also, don't we want to do like GEN8_PIPE_VBLANK and also set it on the > power_well_post_enable hook? If not, why? This is probably a case we > should write an IGT subtest for. Will check this and update in the next revision. > > > > > de_p
Re: [Intel-gfx] [RFC 2/7] drm/i915: Add support for async flips in I915
> -Original Message- > From: Zanoni, Paulo R > Sent: Tuesday, March 10, 2020 4:48 AM > To: B S, Karthik ; intel-gfx@lists.freedesktop.org > Cc: ville.syrj...@linux.intel.com; Kulkarni, Vandita > ; Shankar, Uma > Subject: Re: [RFC 2/7] drm/i915: Add support for async flips in I915 > > Em sex, 2020-03-06 às 17:09 +0530, Karthik B S escreveu: > > Enable support for async flips in I915. > > Set the Async Address Update Enable bit in plane ctl when async flip > > is requested. > > > > Signed-off-by: Karthik B S > > --- > > drivers/gpu/drm/i915/display/intel_display.c | 4 > > drivers/gpu/drm/i915/i915_reg.h | 1 + > > 2 files changed, 5 insertions(+) > > > > diff --git a/drivers/gpu/drm/i915/display/intel_display.c > > b/drivers/gpu/drm/i915/display/intel_display.c > > index dd47eb65b563..4ce9897f5c58 100644 > > --- a/drivers/gpu/drm/i915/display/intel_display.c > > +++ b/drivers/gpu/drm/i915/display/intel_display.c > > @@ -4756,6 +4756,9 @@ u32 skl_plane_ctl(const struct intel_crtc_state > *crtc_state, > > plane_ctl |= > PLANE_CTL_YUV_RANGE_CORRECTION_DISABLE; > > } > > > > + if (crtc_state->uapi.async_flip) > > + plane_ctl |= PLANE_CTL_ASYNC_FLIP; > > + > > plane_ctl |= skl_plane_ctl_format(fb->format->format); > > plane_ctl |= skl_plane_ctl_tiling(fb->modifier); > > plane_ctl |= skl_plane_ctl_rotate(rotation & > DRM_MODE_ROTATE_MASK); > > @@ -17738,6 +17741,7 @@ static void intel_mode_config_init(struct > > drm_i915_private *i915) > > > > mode_config->funcs = &intel_mode_funcs; > > > > + mode_config->async_page_flip = true; > > We should only enable the feature to user space after it has been fully > implemented inside the Kernel. Think about the case where git-bisect > decides to land at exactly this commit when someone is debugging a failure > unrelated to async vblanks. > > Also, when features have trivial on/off switches like the line above, it's > better if the patch that enables the feature only contains the line that > toggles > the on/off switch. This way, if a revert is needed, we can just switch it to > off > without removing more code. Also, it enables us to land the rest of the code > while keeping the feature off for stabilization. > > Also, the line above is enabling the feature for every platform, which is > probably not a goal of this series. Agreed. Will make the on/off part a separate patch and also add a gen check for it. > > > > /* > > * Maximum framebuffer dimensions, chosen to match > > * the maximum render engine surface size on gen4+. > > diff --git a/drivers/gpu/drm/i915/i915_reg.h > > b/drivers/gpu/drm/i915/i915_reg.h index 80cf02a6eec1..42037aee9b78 > > 100644 > > --- a/drivers/gpu/drm/i915/i915_reg.h > > +++ b/drivers/gpu/drm/i915/i915_reg.h > > @@ -6794,6 +6794,7 @@ enum { > > #define PLANE_CTL_TILED_X(1 << 10) > > #define PLANE_CTL_TILED_Y(4 << 10) > > #define PLANE_CTL_TILED_YF (5 << 10) > > +#define PLANE_CTL_ASYNC_FLIP (1 << 9) > > #define PLANE_CTL_FLIP_HORIZONTAL(1 << 8) > > #define PLANE_CTL_MEDIA_DECOMPRESSION_ENABLE (1 << 4) /* > TGL+ */ > > #define PLANE_CTL_ALPHA_MASK (0x3 << 4) /* Pre-GLK > */ ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [drm-intel:topic/core-for-CI 18/20] init/Kconfig:77: symbol BROKEN is selected by DRM_I915_DEBUG
tree: git://anongit.freedesktop.org/drm-intel topic/core-for-CI head: 72212a758bdd916331477e782bdad1fa3f625322 commit: 19f102d485b9f5e03677f73133d9922e2650686f [18/20] Revert "drm/i915: Don't select BROKEN" config: powerpc-randconfig-a001-20200310 compiler: powerpc-linux-gcc (GCC) 9.2.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross git checkout 19f102d485b9f5e03677f73133d9922e2650686f GCC_VERSION=9.2.0 make.cross ARCH=powerpc randconfig GCC_VERSION=9.2.0 make.cross ARCH=powerpc If you fix the issue, kindly add following tag Reported-by: kbuild test robot All errors (new ones prefixed by >>): >> arch/powerpc/platforms/embedded6xx/Kconfig:2:error: recursive dependency >> detected! arch/powerpc/platforms/embedded6xx/Kconfig:2: symbol EMBEDDED6xx depends on BROKEN_ON_SMP >> init/Kconfig:80: symbol BROKEN_ON_SMP depends on BROKEN >> init/Kconfig:77: symbol BROKEN is selected by DRM_I915_DEBUG >> drivers/gpu/drm/i915/Kconfig.debug:19: symbol DRM_I915_DEBUG depends on >> DRM_I915 >> drivers/gpu/drm/i915/Kconfig:2: symbol DRM_I915 depends on DRM >> drivers/gpu/drm/Kconfig:8: symbol DRM depends on AGP >> drivers/char/agp/Kconfig:2: symbol AGP depends on PCI drivers/pci/Kconfig:16: symbol PCI depends on HAVE_PCI >> drivers/pci/Kconfig:7: symbol HAVE_PCI is selected by FORCE_PCI >> drivers/pci/Kconfig:11: symbol FORCE_PCI is selected by MVME5100 arch/powerpc/platforms/embedded6xx/Kconfig:51: symbol MVME5100 depends on EMBEDDED6xx For a resolution refer to Documentation/kbuild/kconfig-language.rst subsection "Kconfig recursive dependency limitations" vim +77 init/Kconfig ^1da177e4c3f41 Linus Torvalds 2005-04-16 76 ^1da177e4c3f41 Linus Torvalds 2005-04-16 @77 config BROKEN ^1da177e4c3f41 Linus Torvalds 2005-04-16 78bool ^1da177e4c3f41 Linus Torvalds 2005-04-16 79 ^1da177e4c3f41 Linus Torvalds 2005-04-16 @80 config BROKEN_ON_SMP ^1da177e4c3f41 Linus Torvalds 2005-04-16 81bool ^1da177e4c3f41 Linus Torvalds 2005-04-16 82depends on BROKEN || !SMP ^1da177e4c3f41 Linus Torvalds 2005-04-16 83default y ^1da177e4c3f41 Linus Torvalds 2005-04-16 84 :: The code at line 77 was first introduced by commit :: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 Linux-2.6.12-rc2 :: TO: Linus Torvalds :: CC: Linus Torvalds --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 04/17] drm/i915: Tweak scheduler's kick_submission()
Quoting Tvrtko Ursulin (2020-03-10 10:07:33) > > On 06/03/2020 13:38, Chris Wilson wrote: > > Skip useless priority bumping on adding a new dependency, but otherwise > > prevent tasklet scheduling until we have completed all the potential > > rescheduling. > > > > Signed-off-by: Chris Wilson > > --- > > drivers/gpu/drm/i915/i915_scheduler.c | 7 ++- > > 1 file changed, 6 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_scheduler.c > > b/drivers/gpu/drm/i915/i915_scheduler.c > > index 52f71e83e088..603cba36d6a4 100644 > > --- a/drivers/gpu/drm/i915/i915_scheduler.c > > +++ b/drivers/gpu/drm/i915/i915_scheduler.c > > @@ -209,6 +209,8 @@ static void kick_submission(struct intel_engine_cs > > *engine, > > if (!inflight) > > goto unlock; > > > > + engine->execlists.queue_priority_hint = prio; > > + > > What is the significance of moving this up? I couldn't correlate it to > the commit message. It's correcting the priority bumping. If we have the same context as active, we should not be skipping the hint update and so avoid the useless bump on a later dependency. > > /* > >* If we are already the currently executing context, don't > >* bother evaluating if we should preempt ourselves. > > @@ -216,7 +218,6 @@ static void kick_submission(struct intel_engine_cs > > *engine, > > if (inflight->context == rq->context) > > goto unlock; > > > > - engine->execlists.queue_priority_hint = prio; > > if (need_preempt(prio, rq_prio(inflight))) > > tasklet_hi_schedule(&engine->execlists.tasklet); > > > > @@ -463,11 +464,15 @@ int i915_sched_node_add_dependency(struct > > i915_sched_node *node, > > if (!dep) > > return -ENOMEM; > > > > + local_bh_disable(); > > + > > if (!__i915_sched_node_add_dependency(node, signal, dep, > > I915_DEPENDENCY_EXTERNAL | > > I915_DEPENDENCY_ALLOC)) > > i915_dependency_free(dep); > > > > + local_bh_enable(); /* kick submission tasklet */ > > + > > And this presumably postpones the tasklet until __bump_priority -> > __i915_schedule is finished. But then why the request submission path > which calls __i915_sched_node_add_dependency directly does not need this > treatment? Because we haven't completed the updates by that point, and upon actual request submission the tasklet is flushed. Plus on not all request submission paths would it be legal. -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [drm-intel:topic/core-for-CI 18/20] arch/powerpc/platforms/embedded6xx/Kconfig:2: symbol EMBEDDED6xx depends on BROKEN_ON_SMP
tree: git://anongit.freedesktop.org/drm-intel topic/core-for-CI head: 72212a758bdd916331477e782bdad1fa3f625322 commit: 19f102d485b9f5e03677f73133d9922e2650686f [18/20] Revert "drm/i915: Don't select BROKEN" config: powerpc-defconfig compiler: powerpc64-linux-gcc (GCC) 9.2.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross git checkout 19f102d485b9f5e03677f73133d9922e2650686f GCC_VERSION=9.2.0 make.cross ARCH=powerpc defconfig GCC_VERSION=9.2.0 make.cross ARCH=powerpc If you fix the issue, kindly add following tag Reported-by: kbuild test robot All errors (new ones prefixed by >>): arch/powerpc/platforms/embedded6xx/Kconfig:2:error: recursive dependency detected! >> arch/powerpc/platforms/embedded6xx/Kconfig:2: symbol EMBEDDED6xx depends on >> BROKEN_ON_SMP init/Kconfig:80: symbol BROKEN_ON_SMP depends on BROKEN init/Kconfig:77: symbol BROKEN is selected by DRM_I915_DEBUG drivers/gpu/drm/i915/Kconfig.debug:19: symbol DRM_I915_DEBUG depends on DRM_I915 drivers/gpu/drm/i915/Kconfig:2: symbol DRM_I915 depends on DRM drivers/gpu/drm/Kconfig:8: symbol DRM depends on AGP drivers/char/agp/Kconfig:2: symbol AGP depends on PCI >> drivers/pci/Kconfig:16: symbol PCI depends on HAVE_PCI drivers/pci/Kconfig:7: symbol HAVE_PCI is selected by FORCE_PCI drivers/pci/Kconfig:11: symbol FORCE_PCI is selected by MVME5100 >> arch/powerpc/platforms/embedded6xx/Kconfig:51: symbol MVME5100 depends on >> EMBEDDED6xx For a resolution refer to Documentation/kbuild/kconfig-language.rst subsection "Kconfig recursive dependency limitations" vim +2 arch/powerpc/platforms/embedded6xx/Kconfig a35e370cfd2ddf Arnd Bergmann 2007-08-30 @2 config EMBEDDED6xx a35e370cfd2ddf Arnd Bergmann 2007-08-30 3 bool "Embedded 6xx/7xx/7xxx-based boards" be34fff07c3755 Christophe Leroy 2018-11-17 4 depends on PPC_BOOK3S_32 && BROKEN_ON_SMP 14cf11af6cf608 Paul Mackerras2005-09-26 5 04d76b937bdf60 Guennadi Liakhovetski 2006-12-01 6 config LINKSTATION 04d76b937bdf60 Guennadi Liakhovetski 2006-12-01 7 bool "Linkstation / Kurobox(HG) from Buffalo" a35e370cfd2ddf Arnd Bergmann 2007-08-30 8 depends on EMBEDDED6xx 04d76b937bdf60 Guennadi Liakhovetski 2006-12-01 9 select MPIC 04d76b937bdf60 Guennadi Liakhovetski 2006-12-01 10 select FSL_SOC 04d76b937bdf60 Guennadi Liakhovetski 2006-12-01 11 select PPC_UDBG_16550 if SERIAL_8250 44d7631bdb1621 Segher Boessenkool2007-02-22 12 select DEFAULT_UIMAGE 3490cba56f7f8a Jon Loeliger 2008-01-23 13 select MPC10X_BRIDGE 04d76b937bdf60 Guennadi Liakhovetski 2006-12-01 14 help 04d76b937bdf60 Guennadi Liakhovetski 2006-12-01 15 Select LINKSTATION if configuring for one of PPC- (MPC8241) 04d76b937bdf60 Guennadi Liakhovetski 2006-12-01 16 based NAS systems from Buffalo Technology. So far only 04d76b937bdf60 Guennadi Liakhovetski 2006-12-01 17 KuroboxHG has been tested. In the future classical Kurobox, 04d76b937bdf60 Guennadi Liakhovetski 2006-12-01 18 Linkstation-I HD-HLAN and HD-HGLAN versions, and PPC-based 04d76b937bdf60 Guennadi Liakhovetski 2006-12-01 19 Terastation systems should be supported too. 04d76b937bdf60 Guennadi Liakhovetski 2006-12-01 20 3490cba56f7f8a Jon Loeliger 2008-01-23 21 config STORCENTER 3490cba56f7f8a Jon Loeliger 2008-01-23 22 bool "IOMEGA StorCenter" 3490cba56f7f8a Jon Loeliger 2008-01-23 23 depends on EMBEDDED6xx 3490cba56f7f8a Jon Loeliger 2008-01-23 24 select MPIC 3490cba56f7f8a Jon Loeliger 2008-01-23 25 select FSL_SOC 3490cba56f7f8a Jon Loeliger 2008-01-23 26 select PPC_UDBG_16550 if SERIAL_8250 3490cba56f7f8a Jon Loeliger 2008-01-23 27 select MPC10X_BRIDGE 3490cba56f7f8a Jon Loeliger 2008-01-23 28 help 3490cba56f7f8a Jon Loeliger 2008-01-23 29 Select STORCENTER if configuring for the iomega StorCenter 3490cba56f7f8a Jon Loeliger 2008-01-23 30 with an 8241 CPU in it. 3490cba56f7f8a Jon Loeliger 2008-01-23 31 c5d56332fd6c2f Zang Roy-r61911 2006-06-13 32 config MPC7448HPC2 c5d56332fd6c2f Zang Roy-r61911 2006-06-13 33 bool "Freescale MPC7448HPC2(Taiga)" a35e370cfd2ddf Arnd Bergmann 2007-08-30 34 depends on EMBEDDED6xx c5d56332fd6c2f Zang Roy-r61911 2006-06-13 35 select TSI108_BRIDGE c5d56332fd6c2f Zang Roy-r61911 2006-06-13 36 select DEFAULT_UIMAGE c5d56332fd6c2f Zang Roy-r61911 2006-06-13 37 select PPC_UDBG_16550 c5d56332fd6c2f Zang Roy-r61911 2006-06-13 38 help c5d56332fd6c2f Zang Roy-r61911 2006-06-13 39 Select MPC7448HPC2 if configuring for Freescale MPC7448HPC2 (Taiga) c5d56332fd6c2f Zang Roy-r61911 2006-06-13 40
Re: [Intel-gfx] [PATCH 08/17] drm/i915/selftests: Add request throughput measurement to perf
Quoting Tvrtko Ursulin (2020-03-10 10:38:21) > > On 06/03/2020 13:38, Chris Wilson wrote: > > +static int perf_many(void *arg) > > +{ > > + struct perf_parallel *p = arg; > > + struct intel_engine_cs *engine = p->engine; > > + struct intel_context *ce; > > + IGT_TIMEOUT(end_time); > > + unsigned long count; > > + int err = 0; > > + bool busy; > > + > > + ce = intel_context_create(engine); > > + if (IS_ERR(ce)) > > + return PTR_ERR(ce); > > + > > + err = intel_context_pin(ce); > > + if (err) { > > + intel_context_put(ce); > > + return err; > > + } > > + > > + busy = false; > > + if (intel_engine_supports_stats(engine) && > > + !intel_enable_engine_stats(engine)) { > > + p->busy = intel_engine_get_busy_time(engine); > > + busy = true; > > + } > > + > > + count = 0; > > + p->time = ktime_get(); > > + do { > > + struct i915_request *rq; > > + > > + rq = i915_request_create(ce); > > + if (IS_ERR(rq)) { > > + err = PTR_ERR(rq); > > + break; > > + } > > + > > + i915_request_add(rq); > > Any concerns on ring size here and maybe managing the wait explicitly? No concern, the intention is to flood the ring. If we are able to wait on the ring, we have succeeded in submitting faster than the engine can retire. (Which might be another issue for us to resolve, as it may be our own interrupt latency that is then the bottleneck.) If we did a sync0, sync1, many; that could give us some more insight into the interrupt latency in comparison to engine latency. > > > + count++; > > + } while (!__igt_timeout(end_time, NULL)); > > + p->time = ktime_sub(ktime_get(), p->time); > > + > > + if (busy) { > > + p->busy = ktime_sub(intel_engine_get_busy_time(engine), > > + p->busy); > > + intel_disable_engine_stats(engine); > > + } > > + > > + err = switch_to_kernel_sync(ce, err); > > + p->runtime = intel_context_get_total_runtime_ns(ce); > > + p->count = count; > > + > > + intel_context_unpin(ce); > > + intel_context_put(ce); > > + return err; > > +} > > + > > +static int perf_parallel_engines(void *arg) > > +{ > > + struct drm_i915_private *i915 = arg; > > + static int (* const func[])(void *arg) = { > > + perf_sync, > > + perf_many, > > + NULL, > > + }; > > + const unsigned int nengines = num_uabi_engines(i915); > > + struct intel_engine_cs *engine; > > + int (* const *fn)(void *arg); > > + struct pm_qos_request *qos; > > + struct { > > + struct perf_parallel p; > > + struct task_struct *tsk; > > + } *engines; > > + int err = 0; > > + > > + engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL); > > + if (!engines) > > + return -ENOMEM; > > + > > + qos = kzalloc(sizeof(*qos), GFP_KERNEL); > > + if (qos) > > + pm_qos_add_request(qos, PM_QOS_CPU_DMA_LATENCY, 0); > > + > > + for (fn = func; *fn; fn++) { > > + char name[KSYM_NAME_LEN]; > > + struct igt_live_test t; > > + unsigned int idx; > > + > > + snprintf(name, sizeof(name), "%ps", *fn); > > Is this any better than just storing the name in local static array? It's easier for sure, and since the name is already in a static array, why not use it :) > > + err = igt_live_test_begin(&t, i915, __func__, name); > > + if (err) > > + break; > > + > > + atomic_set(&i915->selftest.counter, nengines); > > + > > + idx = 0; > > + for_each_uabi_engine(engine, i915) { > > For a pure driver overhead test I would suggest this to be a gt live test. It's a request performance test, so sits above the gt. My thinking is that this is a more of a high level request/scheduler test than execlists/guc (though it depends on those backends). > > + intel_engine_pm_get(engine); > > + > > + memset(&engines[idx].p, 0, sizeof(engines[idx].p)); > > + engines[idx].p.engine = engine; > > + > > + engines[idx].tsk = kthread_run(*fn, &engines[idx].p, > > +"igt:%s", > > engine->name); > > Test will get affected by the host CPU core count. How about we only > measure num_cpu engines? Might be even more important with discrete. No. We want to be able to fill the GPU with the different processors. Comparing glk to kbl helps highlight any inefficiencies we have -- we have to be efficient enough that core count is simply not a critical factor to offset our submission overhead. So we can run the same test and see how it scaled with engines vs cpus just by running it on dif
Re: [Intel-gfx] [PATCH 07/17] drm/i915/perf: Schedule oa_config after modifying the contexts
Quoting Chris Wilson (2020-03-06 13:38:42) > static int i915_perf_stream_enable_sync(struct i915_perf_stream *stream) > { > - struct i915_request *rq; > + struct i915_active *active; > + int err; > > - rq = stream->perf->ops.enable_metric_set(stream); > - if (IS_ERR(rq)) > - return PTR_ERR(rq); > + active = i915_active_create(); > + if (!active) > + return -ENOMEM; > > - i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); > - i915_request_put(rq); > + err = stream->perf->ops.enable_metric_set(stream, active); > + if (err == 0) > + i915_active_wait(active, TASK_UNINTERRUPTIBLE); Why UNINTERRUPTIBLE you might ask? Because if you've demonstrated that by having scheduled the oa config update that by not waiting for the change, the machine becomes unusable, that seems like a risk not worth taking. Hence why the i915_request_wait() was uninterruptible and the i915_active_wait() keeps the uninterruptible nature. -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v2] drm/i915: Limit audio CDCLK>=2*BCLK constraint back to GLK only
Hey, On Mon, 9 Mar 2020, Takashi Iwai wrote: > On Fri, 06 Mar 2020 17:45:44 +0100, Kai Vehmanen wrote: >> unfortunately it seems this fix that was done is not holding up in wider >> testing. It now looks we need to enforce the constraint in one form or [...] >> So how about: We move the glk_force_audio_cdclk() logic from >> intel_audio.c:i915_audio_component_get_power() to acomp init. >> This has some notable implications: > > That sounds reasonable to me. But it's basically the i915 stuff, so > I'd leave the decision to you guys :) thanks Takashi --let's wait for the comments. I'll add also Ville who wrote the original glk_force_audio() code diretly to the thread. > My another quick thought after reading this mail is whether we can > simply remove glk_force_audio_cdclk(false) in > i915_audio_component_put_power(). In this way, a flicker should be > reduced, at most only once at boot time, and the CDCLK is lowered only > when the audio is really used (once). If we could really limit this to actual first-time use (i.e. only if actual playback to HDMI/DP is done), that would be interesting compromise indeed, but as the ALSA side probe will call get_power, this will have limited benefit. I think this is in the end same as: > Or, similarly, it can be put into *_component_bind() and *_unbind() > instead of *_get_power() and *_put_power(). This indicates that the > corresponding audio device really exists. ... doing it bind. But yes, you are right, bind() and unbind() would be the appropriate places. Then if audio driver is not loaded, the freq constraint is not put into effect, and similarly if audio driver is unloaded, cdclk constraint is released. Br, Kai ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [RFC 01/12] drm/i915: Expose list of clients in sysfs
Quoting Tvrtko Ursulin (2020-03-09 18:31:18) > +static int > +__i915_drm_client_register(struct i915_drm_client *client, > + struct task_struct *task) > +{ > + struct i915_drm_clients *clients = client->clients; > + struct device_attribute *attr; > + int ret = -ENOMEM; > + char idstr[32]; > + > + client->pid = get_task_pid(task, PIDTYPE_PID); > + > + client->name = kstrdup(task->comm, GFP_KERNEL); > + if (!client->name) > + goto err_name; > + > + if (!clients->root) > + return 0; /* intel_fbdev_init registers a client before sysfs > */ > + > + snprintf(idstr, sizeof(idstr), "%u", client->id); > + client->root = kobject_create_and_add(idstr, clients->root); > + if (!client->root) > + goto err_client; > + > + attr = &client->attr.name; > + sysfs_attr_init(&attr->attr); > + attr->attr.name = "name"; > + attr->attr.mode = 0444; > + attr->show = show_client_name; > + > + ret = sysfs_create_file(client->root, (struct attribute *)attr); > + if (ret) > + goto err_attr; > + > + attr = &client->attr.pid; > + sysfs_attr_init(&attr->attr); > + attr->attr.name = "pid"; > + attr->attr.mode = 0444; > + attr->show = show_client_pid; > + > + ret = sysfs_create_file(client->root, (struct attribute *)attr); > + if (ret) > + goto err_attr; How do we think we will extend this (e.g. for client/1/(trace,debug))? i915_drm_client_add_attr() ? Or should we put all the attr here and make them known a priori? I think I prefer i915_drm_client_add_attr, but that will also require a notification chain? And that smells like overengineering. At any rate we have 2 other definite users around the corner for the client sysfs, so we should look at what API suits us. -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] list: Prevent compiler reloads inside 'safe' list iteration
From: Chris Wilson > Sent: 10 March 2020 09:21 > Instruct the compiler to read the next element in the list iteration > once, and that it is not allowed to reload the value from the stale > element later. This is important as during the course of the safe > iteration, the stale element may be poisoned (unbeknownst to the > compiler). Eh? I thought any function call will stop the compiler being allowed to reload the value. The 'safe' loop iterators are only 'safe' against called code removing the current item from the list. > This helps prevent kcsan warnings over 'unsafe' conduct in releasing the > list elements during list_for_each_entry_safe() and friends. Sounds like kcsan is buggy David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales) ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 04/17] drm/i915: Tweak scheduler's kick_submission()
On 10/03/2020 11:00, Chris Wilson wrote: Quoting Tvrtko Ursulin (2020-03-10 10:07:33) On 06/03/2020 13:38, Chris Wilson wrote: Skip useless priority bumping on adding a new dependency, but otherwise prevent tasklet scheduling until we have completed all the potential rescheduling. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_scheduler.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 52f71e83e088..603cba36d6a4 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -209,6 +209,8 @@ static void kick_submission(struct intel_engine_cs *engine, if (!inflight) goto unlock; + engine->execlists.queue_priority_hint = prio; + What is the significance of moving this up? I couldn't correlate it to the commit message. It's correcting the priority bumping. If we have the same context as active, we should not be skipping the hint update and so avoid the useless bump on a later dependency. /* * If we are already the currently executing context, don't * bother evaluating if we should preempt ourselves. @@ -216,7 +218,6 @@ static void kick_submission(struct intel_engine_cs *engine, if (inflight->context == rq->context) goto unlock; - engine->execlists.queue_priority_hint = prio; if (need_preempt(prio, rq_prio(inflight))) tasklet_hi_schedule(&engine->execlists.tasklet); @@ -463,11 +464,15 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node, if (!dep) return -ENOMEM; + local_bh_disable(); + if (!__i915_sched_node_add_dependency(node, signal, dep, I915_DEPENDENCY_EXTERNAL | I915_DEPENDENCY_ALLOC)) i915_dependency_free(dep); + local_bh_enable(); /* kick submission tasklet */ + And this presumably postpones the tasklet until __bump_priority -> __i915_schedule is finished. But then why the request submission path which calls __i915_sched_node_add_dependency directly does not need this treatment? Because we haven't completed the updates by that point, and upon actual request submission the tasklet is flushed. Plus on not all request submission paths would it be legal. Okay, Reviewed-by: Tvrtko Ursulin Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] list: Prevent compiler reloads inside 'safe' list iteration
Quoting David Laight (2020-03-10 11:36:41) > From: Chris Wilson > > Sent: 10 March 2020 09:21 > > Instruct the compiler to read the next element in the list iteration > > once, and that it is not allowed to reload the value from the stale > > element later. This is important as during the course of the safe > > iteration, the stale element may be poisoned (unbeknownst to the > > compiler). > > Eh? > I thought any function call will stop the compiler being allowed > to reload the value. > The 'safe' loop iterators are only 'safe' against called > code removing the current item from the list. > > > This helps prevent kcsan warnings over 'unsafe' conduct in releasing the > > list elements during list_for_each_entry_safe() and friends. > > Sounds like kcsan is buggy The warning kcsan gave made sense (a strange case where the emptying the list from inside the safe iterator would allow that list to be taken under a global mutex and have one extra request added to it. The list_for_each_entry_safe() should be ok in this scenario, so long as the next element is read before this element is dropped, and the compiler is instructed not to reload the element. kcsan is a little more insistent on having that annotation :) In this instance I would say it was a false positive from kcsan, but I can see why it would complain and suspect that given a sufficiently aggressive compiler, we may be caught out by a late reload of the next element. That's my conjecture, but I leave it to the lkmm experts to decide :) -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 08/17] drm/i915/selftests: Add request throughput measurement to perf
On 10/03/2020 11:09, Chris Wilson wrote: Quoting Tvrtko Ursulin (2020-03-10 10:38:21) On 06/03/2020 13:38, Chris Wilson wrote: +static int perf_many(void *arg) +{ + struct perf_parallel *p = arg; + struct intel_engine_cs *engine = p->engine; + struct intel_context *ce; + IGT_TIMEOUT(end_time); + unsigned long count; + int err = 0; + bool busy; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = intel_context_pin(ce); + if (err) { + intel_context_put(ce); + return err; + } + + busy = false; + if (intel_engine_supports_stats(engine) && + !intel_enable_engine_stats(engine)) { + p->busy = intel_engine_get_busy_time(engine); + busy = true; + } + + count = 0; + p->time = ktime_get(); + do { + struct i915_request *rq; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + i915_request_add(rq); Any concerns on ring size here and maybe managing the wait explicitly? No concern, the intention is to flood the ring. If we are able to wait on the ring, we have succeeded in submitting faster than the engine can retire. (Which might be another issue for us to resolve, as it may be our own interrupt latency that is then the bottleneck.) If we did a sync0, sync1, many; that could give us some more insight into the interrupt latency in comparison to engine latency. + count++; + } while (!__igt_timeout(end_time, NULL)); + p->time = ktime_sub(ktime_get(), p->time); + + if (busy) { + p->busy = ktime_sub(intel_engine_get_busy_time(engine), + p->busy); + intel_disable_engine_stats(engine); + } + + err = switch_to_kernel_sync(ce, err); + p->runtime = intel_context_get_total_runtime_ns(ce); + p->count = count; + + intel_context_unpin(ce); + intel_context_put(ce); + return err; +} + +static int perf_parallel_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + static int (* const func[])(void *arg) = { + perf_sync, + perf_many, + NULL, + }; + const unsigned int nengines = num_uabi_engines(i915); + struct intel_engine_cs *engine; + int (* const *fn)(void *arg); + struct pm_qos_request *qos; + struct { + struct perf_parallel p; + struct task_struct *tsk; + } *engines; + int err = 0; + + engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL); + if (!engines) + return -ENOMEM; + + qos = kzalloc(sizeof(*qos), GFP_KERNEL); + if (qos) + pm_qos_add_request(qos, PM_QOS_CPU_DMA_LATENCY, 0); + + for (fn = func; *fn; fn++) { + char name[KSYM_NAME_LEN]; + struct igt_live_test t; + unsigned int idx; + + snprintf(name, sizeof(name), "%ps", *fn); Is this any better than just storing the name in local static array? It's easier for sure, and since the name is already in a static array, why not use it :) It looks weird, it needs KSYM_NAME_LEN of stack space and the special %ps. But okay. + err = igt_live_test_begin(&t, i915, __func__, name); + if (err) + break; + + atomic_set(&i915->selftest.counter, nengines); + + idx = 0; + for_each_uabi_engine(engine, i915) { For a pure driver overhead test I would suggest this to be a gt live test. It's a request performance test, so sits above the gt. My thinking is that this is a more of a high level request/scheduler test than execlists/guc (though it depends on those backends). Okay, yeah, it makes sense. + intel_engine_pm_get(engine); + + memset(&engines[idx].p, 0, sizeof(engines[idx].p)); + engines[idx].p.engine = engine; + + engines[idx].tsk = kthread_run(*fn, &engines[idx].p, +"igt:%s", engine->name); Test will get affected by the host CPU core count. How about we only measure num_cpu engines? Might be even more important with discrete. No. We want to be able to fill the GPU with the different processors. Comparing glk to kbl helps highlight any inefficiencies we have -- we have to be efficient enough that core count is simply not a critical factor to offset our submission overhead. So we can run the same test and see how it scaled with engines vs cpus just by running it on different machines and look for problems. Normally you would expect one core per engine is enough to saturate the engine. I am afraid adding more combinations will be confusing when reading test results. (Same GPU, same engine count, different CPU core count.) How a
[Intel-gfx] [CI] drm/i915: Tweak scheduler's kick_submission()
Skip useless priority bumping on adding a new dependency by making sure that we do update the priority if we would have rescheduled the active cotnext. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_scheduler.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index af51810dc78c..68b06a7ba667 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -209,6 +209,8 @@ static void kick_submission(struct intel_engine_cs *engine, if (!inflight) goto unlock; + engine->execlists.queue_priority_hint = prio; + /* * If we are already the currently executing context, don't * bother evaluating if we should preempt ourselves. @@ -216,7 +218,6 @@ static void kick_submission(struct intel_engine_cs *engine, if (inflight->context == rq->context) goto unlock; - engine->execlists.queue_priority_hint = prio; if (need_preempt(prio, rq_prio(inflight))) tasklet_hi_schedule(&engine->execlists.tasklet); -- 2.20.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 07/17] drm/i915/perf: Schedule oa_config after modifying the contexts
On 10/03/2020 13:17, Chris Wilson wrote: Quoting Chris Wilson (2020-03-06 13:38:42) static int i915_perf_stream_enable_sync(struct i915_perf_stream *stream) { - struct i915_request *rq; + struct i915_active *active; + int err; - rq = stream->perf->ops.enable_metric_set(stream); - if (IS_ERR(rq)) - return PTR_ERR(rq); + active = i915_active_create(); + if (!active) + return -ENOMEM; - i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); - i915_request_put(rq); + err = stream->perf->ops.enable_metric_set(stream, active); + if (err == 0) + i915_active_wait(active, TASK_UNINTERRUPTIBLE); Why UNINTERRUPTIBLE you might ask? Because if you've demonstrated that by having scheduled the oa config update that by not waiting for the change, the machine becomes unusable, that seems like a risk not worth taking. Just to confirm, the risk would be that the task could be interrupted and that we would schedule another configuration request, without any way and that would bring us back to the buggy scenario we saw. -Lionel Hence why the i915_request_wait() was uninterruptible and the i915_active_wait() keeps the uninterruptible nature. -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [RFC 01/12] drm/i915: Expose list of clients in sysfs
On 10/03/2020 11:41, Chris Wilson wrote: Quoting Tvrtko Ursulin (2020-03-09 18:31:18) +static int +__i915_drm_client_register(struct i915_drm_client *client, + struct task_struct *task) +{ + struct i915_drm_clients *clients = client->clients; + struct device_attribute *attr; + int ret = -ENOMEM; + char idstr[32]; + + client->pid = get_task_pid(task, PIDTYPE_PID); + + client->name = kstrdup(task->comm, GFP_KERNEL); + if (!client->name) + goto err_name; + + if (!clients->root) + return 0; /* intel_fbdev_init registers a client before sysfs */ + + snprintf(idstr, sizeof(idstr), "%u", client->id); + client->root = kobject_create_and_add(idstr, clients->root); + if (!client->root) + goto err_client; + + attr = &client->attr.name; + sysfs_attr_init(&attr->attr); + attr->attr.name = "name"; + attr->attr.mode = 0444; + attr->show = show_client_name; + + ret = sysfs_create_file(client->root, (struct attribute *)attr); + if (ret) + goto err_attr; + + attr = &client->attr.pid; + sysfs_attr_init(&attr->attr); + attr->attr.name = "pid"; + attr->attr.mode = 0444; + attr->show = show_client_pid; + + ret = sysfs_create_file(client->root, (struct attribute *)attr); + if (ret) + goto err_attr; How do we think we will extend this (e.g. for client/1/(trace,debug))? i915_drm_client_add_attr() ? Or should we put all the attr here and make them known a priori? I think I prefer i915_drm_client_add_attr, but that will also require a notification chain? And that smells like overengineering. At any rate we have 2 other definite users around the corner for the client sysfs, so we should look at what API suits us. It sounds acceptable to me to just call their setup from here. __i915_drm_client_register sounds clear enough place. We potentially just split the function into "client core" and "add-on users" for better readability: __i915_drm_client_register { ...register_client(); ...register_client_busy(client, ...); ...register_client_xxx(client, ...); } Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 08/17] drm/i915/selftests: Add request throughput measurement to perf
Quoting Tvrtko Ursulin (2020-03-10 11:58:26) > > On 10/03/2020 11:09, Chris Wilson wrote: > > Quoting Tvrtko Ursulin (2020-03-10 10:38:21) > >> > >> On 06/03/2020 13:38, Chris Wilson wrote: > >>> + intel_engine_pm_get(engine); > >>> + > >>> + memset(&engines[idx].p, 0, sizeof(engines[idx].p)); > >>> + engines[idx].p.engine = engine; > >>> + > >>> + engines[idx].tsk = kthread_run(*fn, &engines[idx].p, > >>> +"igt:%s", > >>> engine->name); > >> > >> Test will get affected by the host CPU core count. How about we only > >> measure num_cpu engines? Might be even more important with discrete. > > > > No. We want to be able to fill the GPU with the different processors. > > Comparing glk to kbl helps highlight any inefficiencies we have -- we > > have to be efficient enough that core count is simply not a critical > > factor to offset our submission overhead. > > > > So we can run the same test and see how it scaled with engines vs cpus > > just by running it on different machines and look for problems. > > Normally you would expect one core per engine is enough to saturate the > engine. I am afraid adding more combinations will be confusing when > reading test results. (Same GPU, same engine count, different CPU core > count.) How about two subtest variants? One is 1:1 CPU core to engine, > and another can be all engines like here? Each machine will have its own consistent configuration. The question I have in mind is "can we saturate this machine"? This machine remains constant for all the runs. And our goal is that the driver is not a bottleneck on any machine. > Or possibly: > > 1. 1 CPU core - 1 engine - purest latency/overhead > 2. 1 CPU core - N engines (N = all engines) - more > 3. N CPU cores - N engines (N = min(engines, cores) - global lock > contention, stable setup > 4. M CPU cores - N engines (N, M = max) - lock contention stress > 5. N CPU cores - 1 engine (N = all cores) - more extreme lock contention I hear you in that you would like to have a serial test as well. Where we just use 1 cpu thread to submit to all engines as fast as we can and see how close we get with just "1 core". (There will still be parallelism one hopes from our interrupt handler.) -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Defer semaphore priority bumping to a workqueue
On 10/03/2020 10:17, Chris Wilson wrote: Since the semaphore fence may be signaled from inside an interrupt handler from inside a request holding its request->lock, we cannot then enter into the engine->active.lock for processing the semaphore priority bump as we may traverse our call tree and end up on another held request. CPU 0: [ 2243.218864] _raw_spin_lock_irqsave+0x9a/0xb0 [ 2243.218867] i915_schedule_bump_priority+0x49/0x80 [i915] [ 2243.218869] semaphore_notify+0x6d/0x98 [i915] [ 2243.218871] __i915_sw_fence_complete+0x61/0x420 [i915] [ 2243.218874] ? kmem_cache_free+0x211/0x290 [ 2243.218876] i915_sw_fence_complete+0x58/0x80 [i915] [ 2243.218879] dma_i915_sw_fence_wake+0x3e/0x80 [i915] [ 2243.218881] signal_irq_work+0x571/0x690 [i915] [ 2243.218883] irq_work_run_list+0xd7/0x120 [ 2243.218885] irq_work_run+0x1d/0x50 [ 2243.218887] smp_irq_work_interrupt+0x21/0x30 [ 2243.218889] irq_work_interrupt+0xf/0x20 CPU 1: [ 2242.173107] _raw_spin_lock+0x8f/0xa0 [ 2242.173110] __i915_request_submit+0x64/0x4a0 [i915] [ 2242.173112] __execlists_submission_tasklet+0x8ee/0x2120 [i915] [ 2242.173114] ? i915_sched_lookup_priolist+0x1e3/0x2b0 [i915] [ 2242.173117] execlists_submit_request+0x2e8/0x2f0 [i915] [ 2242.173119] submit_notify+0x8f/0xc0 [i915] [ 2242.173121] __i915_sw_fence_complete+0x61/0x420 [i915] [ 2242.173124] ? _raw_spin_unlock_irqrestore+0x39/0x40 [ 2242.173137] i915_sw_fence_complete+0x58/0x80 [i915] [ 2242.173140] i915_sw_fence_commit+0x16/0x20 [i915] CPU 2: [ 2242.173107] _raw_spin_lock+0x8f/0xa0 [ 2242.173110] __i915_request_submit+0x64/0x4a0 [i915] [ 2242.173112] __execlists_submission_tasklet+0x8ee/0x2120 [i915] [ 2242.173114] ? i915_sched_lookup_priolist+0x1e3/0x2b0 [i915] [ 2242.173117] execlists_submit_request+0x2e8/0x2f0 [i915] [ 2242.173119] submit_notify+0x8f/0xc0 [i915] Closes: https://gitlab.freedesktop.org/drm/intel/issues/1318 Fixes: b7404c7ecb38 ("drm/i915: Bump ready tasks ahead of busywaits") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: # v5.2+ --- drivers/gpu/drm/i915/i915_request.c | 22 +- drivers/gpu/drm/i915/i915_request.h | 2 ++ 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 04b52bf347bf..129357d4b599 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -588,19 +588,31 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) return NOTIFY_DONE; } +static void irq_semaphore_cb(struct irq_work *wrk) +{ + struct i915_request *rq = + container_of(wrk, typeof(*rq), semaphore_work); + + i915_schedule_bump_priority(rq, I915_PRIORITY_NOSEMAPHORE); + i915_request_put(rq); +} + static int __i915_sw_fence_call semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { - struct i915_request *request = - container_of(fence, typeof(*request), semaphore); + struct i915_request *rq = container_of(fence, typeof(*rq), semaphore); switch (state) { case FENCE_COMPLETE: - i915_schedule_bump_priority(request, I915_PRIORITY_NOSEMAPHORE); + if (!(READ_ONCE(rq->sched.attr.priority) & I915_PRIORITY_NOSEMAPHORE)) { + i915_request_get(rq); + init_irq_work(&rq->semaphore_work, irq_semaphore_cb); + irq_work_queue(&rq->semaphore_work); + } break; case FENCE_FREE: - i915_request_put(request); + i915_request_put(rq); break; } @@ -1369,9 +1381,9 @@ void __i915_request_queue(struct i915_request *rq, * decide whether to preempt the entire chain so that it is ready to * run at the earliest possible convenience. */ - i915_sw_fence_commit(&rq->semaphore); if (attr && rq->engine->schedule) rq->engine->schedule(rq, attr); + i915_sw_fence_commit(&rq->semaphore); i915_sw_fence_commit(&rq->submit); } diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 6020d5b2a3df..3c552bfea67a 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -26,6 +26,7 @@ #define I915_REQUEST_H #include +#include #include #include "gem/i915_gem_context_types.h" @@ -208,6 +209,7 @@ struct i915_request { }; struct list_head execute_cb; struct i915_sw_fence semaphore; + struct irq_work semaphore_work; /* * A list of everyone we wait upon, and everyone who waits upon us. Reviewed-by: Tvrtko Ursulin Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] list: Prevent compiler reloads inside 'safe' list iteration
From: Chris Wilson > Sent: 10 March 2020 11:50 > > Quoting David Laight (2020-03-10 11:36:41) > > From: Chris Wilson > > > Sent: 10 March 2020 09:21 > > > Instruct the compiler to read the next element in the list iteration > > > once, and that it is not allowed to reload the value from the stale > > > element later. This is important as during the course of the safe > > > iteration, the stale element may be poisoned (unbeknownst to the > > > compiler). > > > > Eh? > > I thought any function call will stop the compiler being allowed > > to reload the value. > > The 'safe' loop iterators are only 'safe' against called > > code removing the current item from the list. > > > > > This helps prevent kcsan warnings over 'unsafe' conduct in releasing the > > > list elements during list_for_each_entry_safe() and friends. > > > > Sounds like kcsan is buggy > > The warning kcsan gave made sense (a strange case where the emptying the > list from inside the safe iterator would allow that list to be taken > under a global mutex and have one extra request added to it. The > list_for_each_entry_safe() should be ok in this scenario, so long as the > next element is read before this element is dropped, and the compiler is > instructed not to reload the element. Normally the loop iteration code has to hold the mutex. I guess it can be released inside the loop provided no other code can ever delete entries. > kcsan is a little more insistent on having that annotation :) > > In this instance I would say it was a false positive from kcsan, but I > can see why it would complain and suspect that given a sufficiently > aggressive compiler, we may be caught out by a late reload of the next > element. If you have: for (; p; p = next) { next = p->next; external_function_call(void); } the compiler must assume that the function call can change 'p->next' and read it before the call. Is this a list with strange locking rules? The only deletes are from within the loop. Adds and deletes are locked. The list traversal isn't locked. I suspect kcsan bleats because it doesn't assume the compiler will use a single instruction/memory operation to read p->next. That is just stupid. David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales) ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] list: Prevent compiler reloads inside 'safe' list iteration
Quoting David Laight (2020-03-10 12:23:34) > From: Chris Wilson > > Sent: 10 March 2020 11:50 > > > > Quoting David Laight (2020-03-10 11:36:41) > > > From: Chris Wilson > > > > Sent: 10 March 2020 09:21 > > > > Instruct the compiler to read the next element in the list iteration > > > > once, and that it is not allowed to reload the value from the stale > > > > element later. This is important as during the course of the safe > > > > iteration, the stale element may be poisoned (unbeknownst to the > > > > compiler). > > > > > > Eh? > > > I thought any function call will stop the compiler being allowed > > > to reload the value. > > > The 'safe' loop iterators are only 'safe' against called > > > code removing the current item from the list. > > > > > > > This helps prevent kcsan warnings over 'unsafe' conduct in releasing the > > > > list elements during list_for_each_entry_safe() and friends. > > > > > > Sounds like kcsan is buggy > > > > The warning kcsan gave made sense (a strange case where the emptying the > > list from inside the safe iterator would allow that list to be taken > > under a global mutex and have one extra request added to it. The > > list_for_each_entry_safe() should be ok in this scenario, so long as the > > next element is read before this element is dropped, and the compiler is > > instructed not to reload the element. > > Normally the loop iteration code has to hold the mutex. > I guess it can be released inside the loop provided no other > code can ever delete entries. > > > kcsan is a little more insistent on having that annotation :) > > > > In this instance I would say it was a false positive from kcsan, but I > > can see why it would complain and suspect that given a sufficiently > > aggressive compiler, we may be caught out by a late reload of the next > > element. > > If you have: > for (; p; p = next) { > next = p->next; > external_function_call(void); > } > the compiler must assume that the function call > can change 'p->next' and read it before the call. > > Is this a list with strange locking rules? Yes. > The only deletes are from within the loop. All deletes are within the mutex. > Adds and deletes are locked. There's just one special case where after the very last element of all lists for an engine is removed, a global mutex is taken and one new element is added to one of the lists to track powering off the engine. > The list traversal isn't locked. There's rcu traversal of the list as well. > I suspect kcsan bleats because it doesn't assume the compiler > will use a single instruction/memory operation to read p->next. > That is just stupid. kcsan is looking for a write to a pointer after a read that is not in the same locking chain. While I have satisfied lockdep that I am not insane, I'm worrying in case kcsan has a valid objection to the potential data race in the safe list iterator. -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [RFC 4/7] drm/i915: Add checks specific to async flips
> -Original Message- > From: Zanoni, Paulo R > Sent: Tuesday, March 10, 2020 4:49 AM > To: B S, Karthik ; intel-gfx@lists.freedesktop.org > Cc: ville.syrj...@linux.intel.com; Kulkarni, Vandita > ; Shankar, Uma > Subject: Re: [RFC 4/7] drm/i915: Add checks specific to async flips > > Em sex, 2020-03-06 às 17:09 +0530, Karthik B S escreveu: > > Support added only for async flips on primary plane. > > If flip is requested on any other plane, reject it. > > > > Signed-off-by: Karthik B S > > --- > > drivers/gpu/drm/i915/display/intel_display.c | 29 > > > > 1 file changed, 29 insertions(+) > > > > diff --git a/drivers/gpu/drm/i915/display/intel_display.c > > b/drivers/gpu/drm/i915/display/intel_display.c > > index 25fad5d01e67..a8de08c3773e 100644 > > --- a/drivers/gpu/drm/i915/display/intel_display.c > > +++ b/drivers/gpu/drm/i915/display/intel_display.c > > @@ -14732,6 +14732,31 @@ static bool > intel_cpu_transcoders_need_modeset(struct intel_atomic_state *state, > > return false; > > } > > > > +static int intel_atomic_check_async(struct intel_atomic_state *state) > > +{ > > + struct drm_plane *plane; > > + struct drm_plane_state *plane_state; > > + struct intel_crtc_state *crtc_state; > > + struct intel_crtc *crtc; > > + int i, j; > > + > > + /*FIXME: Async flip is only supported for primary plane currently > > +* Support for overlays to be added. > > +*/ > > + for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { > > + if (crtc_state->uapi.async_flip) { > > + for_each_new_plane_in_state(&state->base, > > + plane, plane_state, j) { > > + if (plane->type != > DRM_PLANE_TYPE_PRIMARY) { > > + DRM_ERROR("Async flips is NOT > supported for non-primary > > +plane\n"); > > My understanding is that this is not a case of DRM_ERROR, since it's just user > space doing something it shouldn't. Sure. Will fix that in the next revision. > > Have we checked if xf86-video-modesetting or some other current user > space is going to try these calls on non-primary when async_flips are > enabled? Specifically, how does it react when it gets the EINVAL? We should > try to avoid the case where we release a Kernel that current user space is not > prepared for (even if it's not the Kernel's fault). Will check the user space behavior and update accordingly in the next revision. > > > > + return -EINVAL; > > + } > > + } > > + } > > + } > > + return 0; > > +} > > + > > /** > > * intel_atomic_check - validate state object > > * @dev: drm device > > @@ -14760,6 +14785,10 @@ static int intel_atomic_check(struct > drm_device *dev, > > if (ret) > > goto fail; > > > > + ret = intel_atomic_check_async(state); > > + if (ret) > > + goto fail; > > + > > for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, > > new_crtc_state, i) { > > if (!needs_modeset(new_crtc_state)) { ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [RFC 5/7] drm/i915: Add flip_done_handler definition
> -Original Message- > From: Zanoni, Paulo R > Sent: Tuesday, March 10, 2020 4:49 AM > To: B S, Karthik ; intel-gfx@lists.freedesktop.org > Cc: ville.syrj...@linux.intel.com; Kulkarni, Vandita > ; Shankar, Uma > Subject: Re: [RFC 5/7] drm/i915: Add flip_done_handler definition > > Em sex, 2020-03-06 às 17:09 +0530, Karthik B S escreveu: > > Send the flip done event in the handler and disable the interrupt. > > > > Signed-off-by: Karthik B S > > --- > > drivers/gpu/drm/i915/i915_irq.c | 18 ++ > > 1 file changed, 18 insertions(+) > > > > diff --git a/drivers/gpu/drm/i915/i915_irq.c > > b/drivers/gpu/drm/i915/i915_irq.c index 5955e737a45d..1feda9aecf4a > > 100644 > > --- a/drivers/gpu/drm/i915/i915_irq.c > > +++ b/drivers/gpu/drm/i915/i915_irq.c > > @@ -1243,6 +1243,24 @@ display_pipe_crc_irq_handler(struct > drm_i915_private *dev_priv, > > u32 crc4) {} > > #endif > > > > +static void flip_done_handler(struct drm_i915_private *dev_priv, > > + unsigned int pipe) > > The compiler is going to complain that we added a static function with no > caller. > > See my comment on commit 1: please squash this patch with the one that > makes use of the new function. Sure. Will restructure the patches as per your feedback. Thanks. > > > +{ > > + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); > > + struct drm_crtc_state *crtc_state = crtc->base.state; > > + struct drm_device *dev = &dev_priv->drm; > > + unsigned long irqflags; > > + > > + spin_lock_irqsave(&dev->event_lock, irqflags); > > + > > + if (crtc_state->event->base.event->type == > DRM_EVENT_FLIP_COMPLETE) { > > + drm_crtc_send_vblank_event(&crtc->base, crtc_state- > >event); > > + crtc_state->event = NULL; > > + } > > + > > + spin_unlock_irqrestore(&dev->event_lock, irqflags); > > + icl_disable_flip_done(&crtc->base); > > +} > > > > static void hsw_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, > > enum pipe pipe) ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/gt: Pull checking rps->pm_events under the irq_lock
Avoid angering kcsan by serialising the read of the pm_events with the write in rps_diable_interrupts. [ 6268.713419] BUG: KCSAN: data-race in intel_rps_park [i915] / rps_work [i915] [ 6268.713437] [ 6268.713449] write to 0x8881eda8efac of 4 bytes by task 1127 on cpu 3: [ 6268.713680] intel_rps_park+0x136/0x260 [i915] [ 6268.713905] __gt_park+0x61/0xa0 [i915] [ 6268.714128] intel_wakeref_put_last+0x42/0x90 [i915] [ 6268.714352] __intel_wakeref_put_work+0xd3/0xf0 [i915] [ 6268.714369] process_one_work+0x3b1/0x690 [ 6268.714384] worker_thread+0x80/0x670 [ 6268.714398] kthread+0x19a/0x1e0 [ 6268.714412] ret_from_fork+0x1f/0x30 [ 6268.714423] [ 6268.714435] read to 0x8881eda8efac of 4 bytes by task 950 on cpu 2: [ 6268.714664] rps_work+0xc2/0x680 [i915] [ 6268.714679] process_one_work+0x3b1/0x690 [ 6268.714693] worker_thread+0x80/0x670 [ 6268.714707] kthread+0x19a/0x1e0 [ 6268.714720] ret_from_fork+0x1f/0x30 Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_rps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 506738dede16..dbecfb5a5bb1 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1459,12 +1459,12 @@ static void rps_work(struct work_struct *work) u32 pm_iir = 0; spin_lock_irq(>->irq_lock); - pm_iir = fetch_and_zero(&rps->pm_iir); + pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events; client_boost = atomic_read(&rps->num_waiters); spin_unlock_irq(>->irq_lock); /* Make sure we didn't queue anything we're not going to process. */ - if ((pm_iir & rps->pm_events) == 0 && !client_boost) + if (!pm_iir && !client_boost) goto out; mutex_lock(&rps->lock); -- 2.20.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 14/17] drm/i915: have *_debugfs_init() functions return void.
Since commit 987d65d01356 (drm: debugfs: make drm_debugfs_create_files() never fail), drm_debugfs_create_files() never fails and should return void. Therefore, remove its use as the return value of debugfs_init() functions and have the functions return void. v2: convert intel_display_debugfs_register() stub to return void too. Signed-off-by: Wambui Karuga --- drivers/gpu/drm/i915/display/intel_display_debugfs.c | 8 drivers/gpu/drm/i915/display/intel_display_debugfs.h | 4 ++-- drivers/gpu/drm/i915/i915_debugfs.c | 8 drivers/gpu/drm/i915/i915_debugfs.h | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 1e6eb7f2f72d..424f4e52f783 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -1927,7 +1927,7 @@ static const struct { {"i915_edp_psr_debug", &i915_edp_psr_debug_fops}, }; -int intel_display_debugfs_register(struct drm_i915_private *i915) +void intel_display_debugfs_register(struct drm_i915_private *i915) { struct drm_minor *minor = i915->drm.primary; int i; @@ -1940,9 +1940,9 @@ int intel_display_debugfs_register(struct drm_i915_private *i915) intel_display_debugfs_files[i].fops); } - return drm_debugfs_create_files(intel_display_debugfs_list, - ARRAY_SIZE(intel_display_debugfs_list), - minor->debugfs_root, minor); + drm_debugfs_create_files(intel_display_debugfs_list, +ARRAY_SIZE(intel_display_debugfs_list), +minor->debugfs_root, minor); } static int i915_panel_show(struct seq_file *m, void *data) diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.h b/drivers/gpu/drm/i915/display/intel_display_debugfs.h index a3bea1ce04c2..c922c1745bfe 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.h +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.h @@ -10,10 +10,10 @@ struct drm_connector; struct drm_i915_private; #ifdef CONFIG_DEBUG_FS -int intel_display_debugfs_register(struct drm_i915_private *i915); +void intel_display_debugfs_register(struct drm_i915_private *i915); int intel_connector_debugfs_add(struct drm_connector *connector); #else -static inline int intel_display_debugfs_register(struct drm_i915_private *i915) { return 0; } +static inline void intel_display_debugfs_register(struct drm_i915_private *i915) {} static inline int intel_connector_debugfs_add(struct drm_connector *connector) { return 0; } #endif diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 8f2525e4ce0f..de313199c714 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2392,7 +2392,7 @@ static const struct i915_debugfs_files { {"i915_guc_log_relay", &i915_guc_log_relay_fops}, }; -int i915_debugfs_register(struct drm_i915_private *dev_priv) +void i915_debugfs_register(struct drm_i915_private *dev_priv) { struct drm_minor *minor = dev_priv->drm.primary; int i; @@ -2409,7 +2409,7 @@ int i915_debugfs_register(struct drm_i915_private *dev_priv) i915_debugfs_files[i].fops); } - return drm_debugfs_create_files(i915_debugfs_list, - I915_DEBUGFS_ENTRIES, - minor->debugfs_root, minor); + drm_debugfs_create_files(i915_debugfs_list, +I915_DEBUGFS_ENTRIES, +minor->debugfs_root, minor); } diff --git a/drivers/gpu/drm/i915/i915_debugfs.h b/drivers/gpu/drm/i915/i915_debugfs.h index 6da39c76ab5e..1de2736f1248 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.h +++ b/drivers/gpu/drm/i915/i915_debugfs.h @@ -12,10 +12,10 @@ struct drm_i915_private; struct seq_file; #ifdef CONFIG_DEBUG_FS -int i915_debugfs_register(struct drm_i915_private *dev_priv); +void i915_debugfs_register(struct drm_i915_private *dev_priv); void i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj); #else -static inline int i915_debugfs_register(struct drm_i915_private *dev_priv) { return 0; } +static inline void i915_debugfs_register(struct drm_i915_private *dev_priv) {} static inline void i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) {} #endif -- 2.25.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx