Re: [Intel-gfx] [PATCH] drm/i915/perf: Do not clear pollin for small user read buffers

2020-03-30 Thread Lionel Landwerlin

On 28/03/2020 01:16, Ashutosh Dixit wrote:

It is wrong to block the user thread in the next poll when OA data is
already available which could not fit in the user buffer provided in
the previous read. In several cases the exact user buffer size is not
known. Blocking user space in poll can lead to data loss when the
buffer size used is smaller than the available data.

This change fixes this issue and allows user space to read all OA data
even when using a buffer size smaller than the available data using
multiple non-blocking reads rather than staying blocked in poll till
the next timer interrupt.

v2: Fix ret value for blocking reads (Umesh)
v3: Mistake during patch send (Ashutosh)
v4: Remove -EAGAIN from comment (Umesh)

Cc: Umesh Nerlige Ramappa 
Cc: Lionel Landwerlin 
Signed-off-by: Ashutosh Dixit 



Looks like you change makes more sense than what I suggested.

I have a few nits below.


Thanks,


-Lionel



---
  drivers/gpu/drm/i915/i915_perf.c | 59 +++-
  1 file changed, 12 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c74ebac50015..5f6d9bff99c8 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2914,49 +2914,6 @@ void i915_oa_init_reg_state(const struct intel_context 
*ce,
gen8_update_reg_state_unlocked(ce, stream);
  }
  
-/**

- * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation
- * @stream: An i915 perf stream
- * @file: An i915 perf stream file
- * @buf: destination buffer given by userspace
- * @count: the number of bytes userspace wants to read
- * @ppos: (inout) file seek position (unused)
- *
- * Besides wrapping &i915_perf_stream_ops->read this provides a common place to
- * ensure that if we've successfully copied any data then reporting that takes
- * precedence over any internal error status, so the data isn't lost.
- *
- * For example ret will be -ENOSPC whenever there is more buffered data than
- * can be copied to userspace, but that's only interesting if we weren't able
- * to copy some data because it implies the userspace buffer is too small to
- * receive a single record (and we never split records).
- *
- * Another case with ret == -EFAULT is more of a grey area since it would seem
- * like bad form for userspace to ask us to overrun its buffer, but the user
- * knows best:
- *
- *   http://yarchive.net/comp/linux/partial_reads_writes.html
- *
- * Returns: The number of bytes copied or a negative error code on failure.
- */
-static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream,
-struct file *file,
-char __user *buf,
-size_t count,
-loff_t *ppos)
-{
-   /* Note we keep the offset (aka bytes read) separate from any
-* error status so that the final check for whether we return
-* the bytes read with a higher precedence than any error (see
-* comment below) doesn't need to be handled/duplicated in
-* stream->ops->read() implementations.
-*/
-   size_t offset = 0;
-   int ret = stream->ops->read(stream, buf, count, &offset);
-
-   return offset ?: (ret ?: -EAGAIN);
-}
-
  /**
   * i915_perf_read - handles read() FOP for i915 perf stream FDs
   * @file: An i915 perf stream file
@@ -2982,6 +2939,8 @@ static ssize_t i915_perf_read(struct file *file,
  {
struct i915_perf_stream *stream = file->private_data;
struct i915_perf *perf = stream->perf;
+   size_t offset = 0;
+   int __ret;
ssize_t ret;
  
  	/* To ensure it's handled consistently we simply treat all reads of a

@@ -3005,16 +2964,19 @@ static ssize_t i915_perf_read(struct file *file,
return ret;
  
  			mutex_lock(&perf->lock);

-   ret = i915_perf_read_locked(stream, file,
-   buf, count, ppos);
+   __ret = stream->ops->read(stream, buf, count, &offset);
+   ret = offset ?: (__ret ?: -EAGAIN);

I would drop this line above and move it to the end of the function.

mutex_unlock(&perf->lock);
} while (ret == -EAGAIN);
} else {
mutex_lock(&perf->lock);
-   ret = i915_perf_read_locked(stream, file, buf, count, ppos);
+   __ret = stream->ops->read(stream, buf, count, &offset);
+   ret = offset ?: (__ret ?: -EAGAIN);

I would drop this line above and move it to the end of the function.

mutex_unlock(&perf->lock);
}
  
+	/* Possible values for __ret are 0, -EFAULT, -ENOSPC, -EIO, ... */

+
/* We allow the poll checking to sometimes report false positive EPOLLIN
 * events where we might actually report EAGAIN on read() if there's

[Intel-gfx] [PATCH] drm/i915/perf: don't read head/tail pointers outside critical section

2020-03-30 Thread Lionel Landwerlin
Reading or writing those fields should only happen under
stream->oa_buffer.ptr_lock.

Signed-off-by: Lionel Landwerlin 
Fixes: d1df41eb72ef ("drm/i915/perf: rework aging tail workaround")
---
 drivers/gpu/drm/i915/i915_perf.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c74ebac50015..ec9421f02ebd 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -463,6 +463,7 @@ static bool oa_buffer_check_unlocked(struct 
i915_perf_stream *stream)
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
int report_size = stream->oa_buffer.format_size;
unsigned long flags;
+   bool pollin;
u32 hw_tail;
u64 now;
 
@@ -532,10 +533,13 @@ static bool oa_buffer_check_unlocked(struct 
i915_perf_stream *stream)
stream->oa_buffer.aging_timestamp = now;
}
 
+   pollin = OA_TAKEN(stream->oa_buffer.tail - gtt_offset,
+ stream->oa_buffer.head - gtt_offset) >= report_size;
+
+
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
 
-   return OA_TAKEN(stream->oa_buffer.tail - gtt_offset,
-   stream->oa_buffer.head - gtt_offset) >= report_size;
+   return pollin;
 }
 
 /**
-- 
2.26.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH i-g-t 1/4] i915/gem_bad_reloc: Reduce negative testing

2020-03-30 Thread Chris Wilson
The plain negative-reloc tests do no execute anything on the GPU and so
cannot determine if the GPU would fallover, they only exercise the
kernel's placement which is uniform across the engines. We should also
cover the engines with perhaps MI_STORE_DWORD, but for the moment the
solitary exercise of blt remains.

Signed-off-by: Chris Wilson 
---
 tests/i915/gem_bad_reloc.c | 16 ++--
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/tests/i915/gem_bad_reloc.c b/tests/i915/gem_bad_reloc.c
index 96c9babe6..57efc77c3 100644
--- a/tests/i915/gem_bad_reloc.c
+++ b/tests/i915/gem_bad_reloc.c
@@ -50,7 +50,7 @@ IGT_TEST_DESCRIPTION("Simulates SNA behaviour using negative 
self-relocations"
  * than the total size of the GTT), the GPU will hang.
  * See https://bugs.freedesktop.org/show_bug.cgi?id=78533
  */
-static void negative_reloc(int fd, unsigned engine, unsigned flags)
+static void negative_reloc(int fd, unsigned flags)
 {
struct drm_i915_gem_execbuffer2 execbuf;
struct drm_i915_gem_exec_object2 obj;
@@ -60,7 +60,6 @@ static void negative_reloc(int fd, unsigned engine, unsigned 
flags)
uint64_t *offsets;
int i;
 
-   gem_require_ring(fd, engine);
igt_require(intel_gen(intel_get_drm_devid(fd)) >= 7);
 
memset(&obj, 0, sizeof(obj));
@@ -70,7 +69,7 @@ static void negative_reloc(int fd, unsigned engine, unsigned 
flags)
memset(&execbuf, 0, sizeof(execbuf));
execbuf.buffers_ptr = (uintptr_t)&obj;
execbuf.buffer_count = 1;
-   execbuf.flags = engine | (flags & USE_LUT);
+   execbuf.flags = flags & USE_LUT;
igt_require(__gem_execbuf(fd, &execbuf) == 0);
 
igt_info("Found offset %lld for 4k batch\n", (long long)obj.offset);
@@ -185,7 +184,6 @@ static void negative_reloc_blt(int fd)
 
 igt_main
 {
-   const struct intel_execution_engine *e;
int fd = -1;
 
igt_fixture {
@@ -194,13 +192,11 @@ igt_main
gem_require_blitter(fd);
}
 
-   for (e = intel_execution_engines; e->name; e++) {
-   igt_subtest_f("negative-reloc-%s", e->name)
-   negative_reloc(fd, eb_ring(e), 0);
+   igt_subtest("negative-reloc")
+   negative_reloc(fd, 0);
 
-   igt_subtest_f("negative-reloc-lut-%s", e->name)
-   negative_reloc(fd, eb_ring(e), USE_LUT);
-   }
+   igt_subtest("negative-reloc-lut")
+   negative_reloc(fd, USE_LUT);
 
igt_subtest("negative-reloc-bltcopy")
negative_reloc_blt(fd);
-- 
2.26.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH i-g-t 4/4] i915/gem_exec_parallel: Dynamise per-engine tests

2020-03-30 Thread Chris Wilson
Convert the per-engine tests into a dynamic subtest.

Signed-off-by: Chris Wilson 
---
 tests/i915/gem_exec_parallel.c| 28 ++-
 tests/intel-ci/fast-feedback.testlist |  4 +---
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/tests/i915/gem_exec_parallel.c b/tests/i915/gem_exec_parallel.c
index 98316af40..66fe18534 100644
--- a/tests/i915/gem_exec_parallel.c
+++ b/tests/i915/gem_exec_parallel.c
@@ -196,7 +196,6 @@ static void all(int fd, struct intel_execution_engine2 
*engine, unsigned flags)
engines[nengine++] = e->flags;
}
} else {
-   igt_require(gem_class_can_store_dword(fd, engine->class));
engines[nengine++] = engine->flags;
}
igt_require(nengine);
@@ -254,7 +253,7 @@ igt_main
const char *name;
unsigned flags;
} modes[] = {
-   { "", 0 },
+   { "basic", 0 },
{ "contexts", CONTEXTS },
{ "fds", FDS },
{ NULL }
@@ -268,18 +267,21 @@ igt_main
igt_fork_hang_detector(fd);
}
 
-   for (const struct mode *m = modes; m->name; m++)
-   igt_subtest_f("%s", *m->name ? m->name : "basic")
-   /* NULL value means all engines */
-   all(fd, NULL, m->flags);
-
-   __for_each_physical_engine(fd, e) {
+   igt_subtest_with_dynamic("engines") {
for (const struct mode *m = modes; m->name; m++)
-   igt_subtest_f("%s%s%s",
- e->name,
- *m->name ? "-" : "",
- m->name)
-   all(fd, e, m->flags);
+   igt_dynamic(m->name)
+   /* NULL value means all engines */
+   all(fd, NULL, m->flags);
+   }
+
+   for (const struct mode *m = modes; m->name; m++) {
+   igt_subtest_with_dynamic(m->name) {
+   __for_each_physical_engine(fd, e) {
+   if (gem_class_can_store_dword(fd, e->class))
+   igt_dynamic(e->name)
+   all(fd, e, m->flags);
+   }
+   }
}
 
igt_fixture {
diff --git a/tests/intel-ci/fast-feedback.testlist 
b/tests/intel-ci/fast-feedback.testlist
index b41fb4a01..06367f822 100644
--- a/tests/intel-ci/fast-feedback.testlist
+++ b/tests/intel-ci/fast-feedback.testlist
@@ -21,9 +21,7 @@ igt@gem_exec_fence@basic-wait
 igt@gem_exec_fence@basic-await
 igt@gem_exec_fence@nb-await
 igt@gem_exec_gttfill@basic
-igt@gem_exec_parallel@basic
-igt@gem_exec_parallel@contexts
-igt@gem_exec_parallel@fds
+igt@gem_exec_parallel@engines
 igt@gem_exec_store@basic-all
 igt@gem_exec_suspend@basic
 igt@gem_exec_suspend@basic-s0
-- 
2.26.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH i-g-t 3/4] i915/gem_exec_capture: Dynamise per-engine tests

2020-03-30 Thread Chris Wilson
Convert the per-engine tests into a dynamic subtest.

Signed-off-by: Chris Wilson 
---
 tests/i915/gem_exec_capture.c | 20 
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/tests/i915/gem_exec_capture.c b/tests/i915/gem_exec_capture.c
index fe2c4bd12..bc13d8632 100644
--- a/tests/i915/gem_exec_capture.c
+++ b/tests/i915/gem_exec_capture.c
@@ -524,9 +524,14 @@ static size_t safer_strlen(const char *s)
return s ? strlen(s) : 0;
 }
 
+#define test_each_engine(T, i915, e) \
+   igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
+   for_each_if(gem_class_can_store_dword(i915, (e)->class)) \
+   igt_dynamic_f("%s", (e)->name)
+
 igt_main
 {
-   const struct intel_execution_engine *e;
+   const struct intel_execution_engine2 *e;
igt_hang_t hang;
int fd = -1;
int dir = -1;
@@ -550,17 +555,8 @@ igt_main
igt_require(safer_strlen(igt_sysfs_get(dir, "error")) > 0);
}
 
-   for (e = intel_execution_engines; e->name; e++) {
-   /* default exec-id is purely symbolic */
-   if (e->exec_id == 0)
-   continue;
-
-   igt_subtest_f("capture-%s", e->name) {
-   igt_require(gem_ring_has_physical_engine(fd, 
eb_ring(e)));
-   igt_require(gem_can_store_dword(fd, eb_ring(e)));
-   capture(fd, dir, eb_ring(e));
-   }
-   }
+   test_each_engine("capture", fd, e)
+   capture(fd, dir, e->flags);
 
igt_subtest_f("many-4K-zero") {
igt_require(gem_can_store_dword(fd, 0));
-- 
2.26.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH i-g-t 2/4] i915/gem_exec_async: Dynamise per-engine tests

2020-03-30 Thread Chris Wilson
Convert the per-engine tests into a dynamic subtest.

Signed-off-by: Chris Wilson 
---
 tests/i915/gem_exec_async.c | 37 -
 1 file changed, 16 insertions(+), 21 deletions(-)

diff --git a/tests/i915/gem_exec_async.c b/tests/i915/gem_exec_async.c
index 623493963..3187b4f63 100644
--- a/tests/i915/gem_exec_async.c
+++ b/tests/i915/gem_exec_async.c
@@ -80,9 +80,10 @@ static void store_dword(int fd, unsigned ring,
gem_close(fd, obj[1].handle);
 }
 
-static void one(int fd, unsigned ring, uint32_t flags)
+static void one(int fd, unsigned engine)
 {
const int gen = intel_gen(intel_get_drm_devid(fd));
+   const struct intel_execution_engine2 *e;
struct drm_i915_gem_exec_object2 obj[2];
 #define SCRATCH 0
 #define BATCH 1
@@ -137,22 +138,20 @@ static void one(int fd, unsigned ring, uint32_t flags)
memset(&execbuf, 0, sizeof(execbuf));
execbuf.buffers_ptr = to_user_pointer(obj);
execbuf.buffer_count = 2;
-   execbuf.flags = ring | flags;
-
-   igt_require(gem_engine_has_mutable_submission(fd, ring));
+   execbuf.flags = engine;
 
igt_require(__gem_execbuf(fd, &execbuf) == 0);
gem_close(fd, obj[BATCH].handle);
 
i = 0;
-   for_each_physical_engine(e, fd) {
-   if (eb_ring(e) == ring)
+   __for_each_physical_engine(fd, e) {
+   if (e->flags == engine)
continue;
 
-   if (!gem_can_store_dword(fd, eb_ring(e)))
+   if (!gem_class_can_store_dword(fd, e->class))
continue;
 
-   store_dword(fd, eb_ring(e), obj[SCRATCH].handle, 4*i, i);
+   store_dword(fd, e->flags, obj[SCRATCH].handle, 4*i, i);
i++;
}
 
@@ -185,9 +184,15 @@ static bool has_async_execbuf(int fd)
return async > 0;
 }
 
+#define test_each_engine(T, i915, e) \
+   igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
+   for_each_if(gem_class_can_store_dword(i915, (e)->class) && \
+   gem_class_has_mutable_submission(i915, (e)->class))\
+   igt_dynamic_f("%s", (e)->name)
+
 igt_main
 {
-   const struct intel_execution_engine *e;
+   const struct intel_execution_engine2 *e;
int fd = -1;
 
igt_fixture {
@@ -195,21 +200,11 @@ igt_main
igt_require_gem(fd);
gem_require_mmap_wc(fd);
igt_require(has_async_execbuf(fd));
-   igt_require(gem_can_store_dword(fd, 0));
igt_fork_hang_detector(fd);
}
 
-   for (e = intel_execution_engines; e->name; e++) {
-   /* default exec-id is purely symbolic */
-   if (e->exec_id == 0)
-   continue;
-
-   igt_subtest_f("concurrent-writes-%s", e->name) {
-   igt_require(gem_ring_has_physical_engine(fd, 
eb_ring(e)));
-   igt_require(gem_can_store_dword(fd, eb_ring(e)));
-   one(fd, e->exec_id, e->flags);
-   }
-   }
+   test_each_engine("concurrent-writes", fd, e)
+   one(fd, e->flags);
 
igt_fixture {
igt_stop_hang_detector();
-- 
2.26.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915/perf: don't read head/tail pointers outside critical section

2020-03-30 Thread Patchwork
== Series Details ==

Series: drm/i915/perf: don't read head/tail pointers outside critical section
URL   : https://patchwork.freedesktop.org/series/75220/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
97a65e4f417f drm/i915/perf: don't read head/tail pointers outside critical 
section
-:32: CHECK:LINE_SPACING: Please don't use multiple blank lines
#32: FILE: drivers/gpu/drm/i915/i915_perf.c:539:
+
+

total: 0 errors, 0 warnings, 1 checks, 22 lines checked

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/2] drm/i915: Extend hotplug detect retry on TypeC connectors to 5 seconds

2020-03-30 Thread Imre Deak
On TypeC ports if a sink deasserts/reasserts its HPD signal, generating
a hotplug interrupt without the sink getting unplugged/replugged from
the connector, there can be an up to 3 seconds delay until the AUX
channel gets functional. To avoid detection failures this delay causes
retry the detection for 5 seconds.

I noticed this on ICL/TGL RVPs and a DELL XPS 13 7390 ICL laptop.

References: https://gitlab.freedesktop.org/drm/intel/issues/1067
Signed-off-by: Imre Deak 
---
 drivers/gpu/drm/i915/display/intel_ddi.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c 
b/drivers/gpu/drm/i915/display/intel_ddi.c
index 4f508bf70f3b..2d947ff83488 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -4371,7 +4371,10 @@ static enum intel_hotplug_state
 intel_ddi_hotplug(struct intel_encoder *encoder,
  struct intel_connector *connector)
 {
+   struct drm_i915_private *i915 = to_i915(encoder->base.dev);
struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
+   enum phy phy = intel_port_to_phy(i915, encoder->port);
+   bool is_tc = intel_phy_is_tc(i915, phy);
struct drm_modeset_acquire_ctx ctx;
enum intel_hotplug_state state;
int ret;
@@ -4414,8 +4417,15 @@ intel_ddi_hotplug(struct intel_encoder *encoder,
 * valid EDID. To solve this schedule another detection cycle if this
 * time around we didn't detect any change in the sink's connection
 * status.
+*
+* Type-c connectors which get their HPD signal deasserted then
+* reasserted, without unplugging/replugging the sink from the
+* connector, introduce a delay until the AUX channel communication
+* becomes functional. Retry the detection for 5 seconds on type-c
+* connectors to account for this delay.
 */
-   if (state == INTEL_HOTPLUG_UNCHANGED && !connector->hotplug_retries &&
+   if (state == INTEL_HOTPLUG_UNCHANGED &&
+   connector->hotplug_retries < (is_tc ? 5 : 1) &&
!dig_port->dp.is_mst)
state = INTEL_HOTPLUG_RETRY;
 
-- 
2.23.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/2] drm/i915: Add a retry counter for hotplug detect retries

2020-03-30 Thread Imre Deak
On TypeC connectors we need to retry the detection after hotplug events
for a longer time, so add a retry counter to support this. The next
patch will add detection retries on TypeC ports needing this.

Signed-off-by: Imre Deak 
---
 drivers/gpu/drm/i915/display/intel_ddi.c |  7 +++
 .../gpu/drm/i915/display/intel_display_types.h   |  6 --
 drivers/gpu/drm/i915/display/intel_dp.c  |  7 +++
 drivers/gpu/drm/i915/display/intel_hdmi.c|  6 +++---
 drivers/gpu/drm/i915/display/intel_hotplug.c | 16 ++--
 drivers/gpu/drm/i915/display/intel_hotplug.h |  3 +--
 drivers/gpu/drm/i915/display/intel_sdvo.c|  5 ++---
 7 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c 
b/drivers/gpu/drm/i915/display/intel_ddi.c
index 916a802af788..4f508bf70f3b 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -4369,15 +4369,14 @@ static int intel_hdmi_reset_link(struct intel_encoder 
*encoder,
 
 static enum intel_hotplug_state
 intel_ddi_hotplug(struct intel_encoder *encoder,
- struct intel_connector *connector,
- bool irq_received)
+ struct intel_connector *connector)
 {
struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
struct drm_modeset_acquire_ctx ctx;
enum intel_hotplug_state state;
int ret;
 
-   state = intel_encoder_hotplug(encoder, connector, irq_received);
+   state = intel_encoder_hotplug(encoder, connector);
 
drm_modeset_acquire_init(&ctx, 0);
 
@@ -4416,7 +4415,7 @@ intel_ddi_hotplug(struct intel_encoder *encoder,
 * time around we didn't detect any change in the sink's connection
 * status.
 */
-   if (state == INTEL_HOTPLUG_UNCHANGED && irq_received &&
+   if (state == INTEL_HOTPLUG_UNCHANGED && !connector->hotplug_retries &&
!dig_port->dp.is_mst)
state = INTEL_HOTPLUG_RETRY;
 
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h 
b/drivers/gpu/drm/i915/display/intel_display_types.h
index 176ab5f1e867..671721e74075 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -132,8 +132,7 @@ struct intel_encoder {
u16 cloneable;
u8 pipe_mask;
enum intel_hotplug_state (*hotplug)(struct intel_encoder *encoder,
-   struct intel_connector *connector,
-   bool irq_received);
+   struct intel_connector *connector);
enum intel_output_type (*compute_output_type)(struct intel_encoder *,
  struct intel_crtc_state *,
  struct 
drm_connector_state *);
@@ -425,6 +424,9 @@ struct intel_connector {
struct edid *edid;
struct edid *detect_edid;
 
+   /* Number of times hotplug detection was tried after an HPD interrupt */
+   int hotplug_retries;
+
/* since POLL and HPD connectors may use the same HPD line keep the 
native
   state of connector->polled in case hotplug storm detection changes 
it */
u8 polled;
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c 
b/drivers/gpu/drm/i915/display/intel_dp.c
index 2e715e6d7bb4..ab676d5b389b 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -5556,14 +5556,13 @@ int intel_dp_retrain_link(struct intel_encoder *encoder,
  */
 static enum intel_hotplug_state
 intel_dp_hotplug(struct intel_encoder *encoder,
-struct intel_connector *connector,
-bool irq_received)
+struct intel_connector *connector)
 {
struct drm_modeset_acquire_ctx ctx;
enum intel_hotplug_state state;
int ret;
 
-   state = intel_encoder_hotplug(encoder, connector, irq_received);
+   state = intel_encoder_hotplug(encoder, connector);
 
drm_modeset_acquire_init(&ctx, 0);
 
@@ -5587,7 +5586,7 @@ intel_dp_hotplug(struct intel_encoder *encoder,
 * Keeping it consistent with intel_ddi_hotplug() and
 * intel_hdmi_hotplug().
 */
-   if (state == INTEL_HOTPLUG_UNCHANGED && irq_received)
+   if (state == INTEL_HOTPLUG_UNCHANGED && !connector->hotplug_retries)
state = INTEL_HOTPLUG_RETRY;
 
return state;
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c 
b/drivers/gpu/drm/i915/display/intel_hdmi.c
index 0076abc63851..74ee7b2d83ce 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.c
@@ -3262,11 +3262,11 @@ void intel_hdmi_init_connector(struct 
intel_digital_port *intel_dig_port,
 
 static enum intel_hotplug_state
 intel_hdmi_hotplug(struct intel_encoder *encoder,
-  struct

[Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915/perf: don't read head/tail pointers outside critical section

2020-03-30 Thread Patchwork
== Series Details ==

Series: drm/i915/perf: don't read head/tail pointers outside critical section
URL   : https://patchwork.freedesktop.org/series/75220/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_8212 -> Patchwork_17124


Summary
---

  **SUCCESS**

  No regressions found.

  External URL: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17124/index.html

Known issues


  Here are the changes found in Patchwork_17124 that come from known issues:

### IGT changes ###

 Issues hit 

  * igt@i915_selftest@live@execlists:
- fi-icl-y:   [PASS][1] -> [DMESG-FAIL][2] ([fdo#108569])
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8212/fi-icl-y/igt@i915_selftest@l...@execlists.html
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17124/fi-icl-y/igt@i915_selftest@l...@execlists.html
- fi-kbl-soraka:  [PASS][3] -> [INCOMPLETE][4] ([fdo#112259] / 
[i915#656])
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8212/fi-kbl-soraka/igt@i915_selftest@l...@execlists.html
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17124/fi-kbl-soraka/igt@i915_selftest@l...@execlists.html

  
 Possible fixes 

  * igt@i915_selftest@live@gt_timelines:
- {fi-tgl-u}: [DMESG-FAIL][5] -> [PASS][6]
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8212/fi-tgl-u/igt@i915_selftest@live@gt_timelines.html
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17124/fi-tgl-u/igt@i915_selftest@live@gt_timelines.html

  
  {name}: This element is suppressed. This means it is ignored when computing
  the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#112259]: https://bugs.freedesktop.org/show_bug.cgi?id=112259
  [i915#656]: https://gitlab.freedesktop.org/drm/intel/issues/656


Participating hosts (45 -> 44)
--

  Additional (5): fi-cml-u2 fi-cml-s fi-skl-6770hq fi-cfl-8700k fi-cfl-8109u 
  Missing(6): fi-byt-squawks fi-bsw-cyan fi-apl-guc fi-ctg-p8600 
fi-byt-clapper fi-bdw-samus 


Build changes
-

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_8212 -> Patchwork_17124

  CI-20190529: 20190529
  CI_DRM_8212: 68b152390f915c189e2dd0b29eec557d5d8be9a8 @ 
git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5544: 477c562fc9932939083d732b77dd7b083c6bc0a1 @ 
git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_17124: 97a65e4f417fc62caa75e155ad3d50179e5db9dd @ 
git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

97a65e4f417f drm/i915/perf: don't read head/tail pointers outside critical 
section

== Logs ==

For more details see: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17124/index.html
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/perf: don't read head/tail pointers outside critical section

2020-03-30 Thread Chris Wilson
Quoting Lionel Landwerlin (2020-03-30 10:14:11)
> Reading or writing those fields should only happen under
> stream->oa_buffer.ptr_lock.

Writing, yes. Reading as a pair, sure. There are other ways you can
ensure that the tail/head are read as one, but fair enough.

> Signed-off-by: Lionel Landwerlin 
> Fixes: d1df41eb72ef ("drm/i915/perf: rework aging tail workaround")
> ---
>  drivers/gpu/drm/i915/i915_perf.c | 8 ++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
> b/drivers/gpu/drm/i915/i915_perf.c
> index c74ebac50015..ec9421f02ebd 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -463,6 +463,7 @@ static bool oa_buffer_check_unlocked(struct 
> i915_perf_stream *stream)
> u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
> int report_size = stream->oa_buffer.format_size;
> unsigned long flags;
> +   bool pollin;
> u32 hw_tail;
> u64 now;
>  
> @@ -532,10 +533,13 @@ static bool oa_buffer_check_unlocked(struct 
> i915_perf_stream *stream)
> stream->oa_buffer.aging_timestamp = now;
> }
>  
> +   pollin = OA_TAKEN(stream->oa_buffer.tail - gtt_offset,
> + stream->oa_buffer.head - gtt_offset) >= report_size;
> +
> +

Bonus \n

Reviewed-by: Chris Wilson 

> spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>  
> -   return OA_TAKEN(stream->oa_buffer.tail - gtt_offset,
> -   stream->oa_buffer.head - gtt_offset) >= report_size;
> +   return pollin;

You could always leave the calculation here, and just have the read
inside.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] ✓ Fi.CI.IGT: success for drm/i915/perf: don't read head/tail pointers outside critical section

2020-03-30 Thread Patchwork
== Series Details ==

Series: drm/i915/perf: don't read head/tail pointers outside critical section
URL   : https://patchwork.freedesktop.org/series/75220/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_8212_full -> Patchwork_17124_full


Summary
---

  **SUCCESS**

  No regressions found.

  

Known issues


  Here are the changes found in Patchwork_17124_full that come from known 
issues:

### IGT changes ###

 Issues hit 

  * igt@gem_exec_balancer@smoke:
- shard-iclb: [PASS][1] -> [SKIP][2] ([fdo#110854])
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8212/shard-iclb1/igt@gem_exec_balan...@smoke.html
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17124/shard-iclb7/igt@gem_exec_balan...@smoke.html

  * igt@gem_exec_schedule@implicit-both-bsd:
- shard-iclb: [PASS][3] -> [SKIP][4] ([i915#677]) +1 similar issue
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8212/shard-iclb8/igt@gem_exec_sched...@implicit-both-bsd.html
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17124/shard-iclb4/igt@gem_exec_sched...@implicit-both-bsd.html

  * igt@gem_exec_schedule@implicit-read-write-bsd1:
- shard-iclb: [PASS][5] -> [SKIP][6] ([fdo#109276] / [i915#677]) +1 
similar issue
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8212/shard-iclb4/igt@gem_exec_sched...@implicit-read-write-bsd1.html
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17124/shard-iclb6/igt@gem_exec_sched...@implicit-read-write-bsd1.html

  * igt@gem_exec_schedule@independent-bsd2:
- shard-iclb: [PASS][7] -> [SKIP][8] ([fdo#109276]) +15 similar 
issues
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8212/shard-iclb2/igt@gem_exec_sched...@independent-bsd2.html
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17124/shard-iclb7/igt@gem_exec_sched...@independent-bsd2.html

  * igt@gem_exec_schedule@preempt-bsd:
- shard-iclb: [PASS][9] -> [SKIP][10] ([fdo#112146]) +7 similar 
issues
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8212/shard-iclb3/igt@gem_exec_sched...@preempt-bsd.html
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17124/shard-iclb1/igt@gem_exec_sched...@preempt-bsd.html

  * igt@gem_exec_store@pages-vcs1:
- shard-iclb: [PASS][11] -> [SKIP][12] ([fdo#112080]) +10 similar 
issues
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8212/shard-iclb4/igt@gem_exec_st...@pages-vcs1.html
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17124/shard-iclb6/igt@gem_exec_st...@pages-vcs1.html

  * igt@gen9_exec_parse@allowed-all:
- shard-glk:  [PASS][13] -> [DMESG-WARN][14] ([i915#716])
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8212/shard-glk8/igt@gen9_exec_pa...@allowed-all.html
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17124/shard-glk1/igt@gen9_exec_pa...@allowed-all.html

  * igt@i915_pm_rc6_residency@rc6-idle:
- shard-glk:  [PASS][15] -> [FAIL][16] ([i915#1527])
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8212/shard-glk1/igt@i915_pm_rc6_reside...@rc6-idle.html
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17124/shard-glk2/igt@i915_pm_rc6_reside...@rc6-idle.html

  * igt@kms_cursor_crc@pipe-b-cursor-64x64-sliding:
- shard-skl:  [PASS][17] -> [FAIL][18] ([i915#54])
   [17]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8212/shard-skl3/igt@kms_cursor_...@pipe-b-cursor-64x64-sliding.html
   [18]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17124/shard-skl1/igt@kms_cursor_...@pipe-b-cursor-64x64-sliding.html

  * igt@kms_cursor_legacy@2x-long-cursor-vs-flip-atomic:
- shard-hsw:  [PASS][19] -> [FAIL][20] ([i915#96])
   [19]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8212/shard-hsw1/igt@kms_cursor_leg...@2x-long-cursor-vs-flip-atomic.html
   [20]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17124/shard-hsw8/igt@kms_cursor_leg...@2x-long-cursor-vs-flip-atomic.html

  * igt@kms_fbcon_fbt@fbc-suspend:
- shard-kbl:  [PASS][21] -> [DMESG-WARN][22] ([i915#180] / 
[i915#93] / [i915#95])
   [21]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8212/shard-kbl3/igt@kms_fbcon_...@fbc-suspend.html
   [22]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17124/shard-kbl7/igt@kms_fbcon_...@fbc-suspend.html

  * igt@kms_flip_tiling@flip-changes-tiling-yf:
- shard-kbl:  [PASS][23] -> [FAIL][24] ([i915#699] / [i915#93] / 
[i915#95])
   [23]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8212/shard-kbl4/igt@kms_flip_til...@flip-changes-tiling-yf.html
   [24]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17124/shard-kbl7/igt@kms_flip_til...@flip-changes-tiling-yf.html

  * igt@kms_hdr@bpc-switch-dpms:
- shard-skl:  [PASS][25] -> [FAIL][26] ([i915#1188])
   [25]: 
https://intel-gfx-ci.01.org/tree/drm-tip

[Intel-gfx] [PATCH] drm/i915/execlists: Include priority info in trace_ports

2020-03-30 Thread Chris Wilson
Add some extra information into trace_ports to help with reviewing
correctness.

Signed-off-by: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 30 +
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 9104796673dc..9332269d2a79 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1415,6 +1415,24 @@ static inline void write_desc(struct 
intel_engine_execlists *execlists, u64 desc
}
 }
 
+static char *dump_port(char *buf, int buflen,
+  const char *prefix,
+  struct i915_request *rq)
+{
+   if (!rq)
+   return "";
+
+   snprintf(buf, buflen, "%s%llx:%lld%s prio %d",
+prefix,
+rq->fence.context, rq->fence.seqno,
+i915_request_completed(rq) ? "!" :
+i915_request_started(rq) ? "*" :
+"",
+rq_prio(rq));
+
+   return buf;
+}
+
 static __maybe_unused void
 trace_ports(const struct intel_engine_execlists *execlists,
const char *msg,
@@ -1422,18 +1440,14 @@ trace_ports(const struct intel_engine_execlists 
*execlists,
 {
const struct intel_engine_cs *engine =
container_of(execlists, typeof(*engine), execlists);
+   char p0[40], p1[40];
 
if (!ports[0])
return;
 
-   ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg,
-ports[0]->fence.context,
-ports[0]->fence.seqno,
-i915_request_completed(ports[0]) ? "!" :
-i915_request_started(ports[0]) ? "*" :
-"",
-ports[1] ? ports[1]->fence.context : 0,
-ports[1] ? ports[1]->fence.seqno : 0);
+   ENGINE_TRACE(engine, "%s { %s%s }\n", msg,
+dump_port(p0, sizeof(p0), "", ports[0]),
+dump_port(p1, sizeof(p1), ", ", ports[1]));
 }
 
 static inline bool
-- 
2.20.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915/huc: Add more errors for I915_PARAM_HUC_STATUS

2020-03-30 Thread Michal Wajdeczko
There might be many reasons why we failed to successfully
load and authenticate HuC firmware, but today we only use
single error in case of no HuC hardware. Add some more
error codes for most common cases (disabled, not installed,
corrupted or mismatched firmware).

Signed-off-by: Michal Wajdeczko 
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Daniele Ceraolo Spurio 
Cc: Tony Ye 
Cc: Robert M. Fosha 
---
 drivers/gpu/drm/i915/gt/uc/intel_huc.c | 22 ++
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index d6097b46600c..1e8073ec343f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -200,9 +200,13 @@ int intel_huc_auth(struct intel_huc *huc)
  * This function reads status register to verify if HuC
  * firmware was successfully loaded.
  *
- * Returns: 1 if HuC firmware is loaded and verified,
- * 0 if HuC firmware is not loaded and -ENODEV if HuC
- * is not present on this platform.
+ * Returns:
+ *  * -ENODEV if HuC is not present on this platform,
+ *  * -EOPNOTSUPP if HuC firmware is disabled,
+ *  * -ENOPKG if HuC firmware was not installed,
+ *  * -ENOEXEC if HuC firmware is invalid or mismatched,
+ *  * 0 if HuC firmware is not running,
+ *  * 1 if HuC firmware is authenticated and running.
  */
 int intel_huc_check_status(struct intel_huc *huc)
 {
@@ -210,8 +214,18 @@ int intel_huc_check_status(struct intel_huc *huc)
intel_wakeref_t wakeref;
u32 status = 0;
 
-   if (!intel_huc_is_supported(huc))
+   switch (__intel_uc_fw_status(&huc->fw)) {
+   case INTEL_UC_FIRMWARE_NOT_SUPPORTED:
return -ENODEV;
+   case INTEL_UC_FIRMWARE_DISABLED:
+   return -EOPNOTSUPP;
+   case INTEL_UC_FIRMWARE_MISSING:
+   return -ENOPKG;
+   case INTEL_UC_FIRMWARE_ERROR:
+   return -ENOEXEC;
+   default:
+   break;
+   }
 
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
status = intel_uncore_read(gt->uncore, huc->status.reg);
-- 
2.19.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915/huc: Fix HuC register used in debugfs

2020-03-30 Thread Michal Wajdeczko
We report HuC status in debugfs using register read, but
we missed that on Gen11+ HuC uses different register.
Use correct one.

While here, correct placement of the colon.

Signed-off-by: Michal Wajdeczko 
Cc: Daniele Ceraolo Spurio 
Cc: Chris Wilson 
---
 drivers/gpu/drm/i915/gt/uc/intel_huc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index d6097b46600c..3fee65308474 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -244,6 +244,6 @@ void intel_huc_load_status(struct intel_huc *huc, struct 
drm_printer *p)
intel_uc_fw_dump(&huc->fw, p);
 
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
-   drm_printf(p, "\nHuC status 0x%08x:\n",
-  intel_uncore_read(gt->uncore, HUC_STATUS2));
+   drm_printf(p, "HuC status: 0x%08x\n",
+  intel_uncore_read(gt->uncore, huc->status.reg));
 }
-- 
2.19.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915/selftests: Check timeout before flush and cond checks

2020-03-30 Thread Chris Wilson
Allow a bit of leniency for the CPU scheduler to be distracted while we
flush the tasklet and so ensure that we always check the status of the
request once more before timing out.

v2: Wait until the HW acked the submit, and we do any secondary actions
for the submit (e.g. timeslices)

Signed-off-by: Chris Wilson 
Cc: Matthew Auld 
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 30 ++
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c 
b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 6f06ba750a0a..dd6c63a2fb96 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -68,26 +68,38 @@ static void engine_heartbeat_enable(struct intel_engine_cs 
*engine,
engine->props.heartbeat_interval_ms = saved;
 }
 
+static bool is_active(struct i915_request *rq)
+{
+   if (i915_request_is_active(rq))
+   return true;
+
+   if (i915_request_on_hold(rq))
+   return true;
+
+   return false;
+}
+
 static int wait_for_submit(struct intel_engine_cs *engine,
   struct i915_request *rq,
   unsigned long timeout)
 {
timeout += jiffies;
do {
-   cond_resched();
-   intel_engine_flush_submission(engine);
+   bool done = time_after(jiffies, timeout);
 
-   if (READ_ONCE(engine->execlists.pending[0]))
-   continue;
-
-   if (i915_request_is_active(rq))
+   if (i915_request_completed(rq)) /* that was quick! */
return 0;
 
-   if (i915_request_started(rq)) /* that was quick! */
+   /* Wait until the HW has acknowleged the submission (or err) */
+   intel_engine_flush_submission(engine);
+   if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
return 0;
-   } while (time_before(jiffies, timeout));
 
-   return -ETIME;
+   if (done)
+   return -ETIME;
+
+   cond_resched();
+   } while (1);
 }
 
 static int wait_for_reset(struct intel_engine_cs *engine,
-- 
2.20.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/huc: Fix HuC register used in debugfs

2020-03-30 Thread Chris Wilson
Quoting Michal Wajdeczko (2020-03-30 12:33:38)
> We report HuC status in debugfs using register read, but
> we missed that on Gen11+ HuC uses different register.
> Use correct one.
> 
> While here, correct placement of the colon.
> 
> Signed-off-by: Michal Wajdeczko 
> Cc: Daniele Ceraolo Spurio 
> Cc: Chris Wilson 
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_huc.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> index d6097b46600c..3fee65308474 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> @@ -244,6 +244,6 @@ void intel_huc_load_status(struct intel_huc *huc, struct 
> drm_printer *p)
> intel_uc_fw_dump(&huc->fw, p);
>  
> with_intel_runtime_pm(gt->uncore->rpm, wakeref)
> -   drm_printf(p, "\nHuC status 0x%08x:\n",
> -  intel_uncore_read(gt->uncore, HUC_STATUS2));
> +   drm_printf(p, "HuC status: 0x%08x\n",
> +  intel_uncore_read(gt->uncore, huc->status.reg));

drivers/gpu/drm/i915/gt/uc/intel_huc.c: huc->status.reg = 
GEN11_HUC_KERNEL_LOAD_INFO;
drivers/gpu/drm/i915/gt/uc/intel_huc.c: huc->status.reg = HUC_STATUS2;
drivers/gpu/drm/i915/gt/uc/intel_huc.c: 
huc->status.reg,
drivers/gpu/drm/i915/gt/uc/intel_huc.c: status = 
intel_uncore_read(gt->uncore, huc->status.reg);

Reviewed-by: Chris Wilson 
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2] drm/i915: HDCP: fix Ri prime check done during link check

2020-03-30 Thread Oliver Barta
From: Oliver Barta 

The check was always succeeding even in case of a mismatch due to the
HDCP_STATUS_ENC bit being set. Make sure both bits are actually set.

Signed-off-by: Oliver Barta 
Fixes: 2320175feb74 ("drm/i915: Implement HDCP for HDMI")
---
 [v2] rebased on top of latest changes

 drivers/gpu/drm/i915/display/intel_hdmi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c 
b/drivers/gpu/drm/i915/display/intel_hdmi.c
index 0076abc63851..51a69f330588 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.c
@@ -1561,7 +1561,8 @@ bool intel_hdmi_hdcp_check_link(struct intel_digital_port 
*intel_dig_port)
intel_de_write(i915, HDCP_RPRIME(i915, cpu_transcoder, port), ri.reg);
 
/* Wait for Ri prime match */
-   if (wait_for(intel_de_read(i915, HDCP_STATUS(i915, cpu_transcoder, 
port)) &
+   if (wait_for((intel_de_read(i915, HDCP_STATUS(i915, cpu_transcoder,
+port)) & (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC)) ==
 (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) {
drm_err(&i915->drm,
"Ri' mismatch detected, link check failed (%x)\n",
-- 
2.20.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 0/5] Consider DBuf bandwidth when calculating CDCLK

2020-03-30 Thread Stanislav Lisovskiy
We need to calculate cdclk after watermarks/ddb has been calculated
as with recent hw CDCLK needs to be adjusted accordingly to DBuf
requirements, which is not possible with current code organization.

Setting CDCLK according to DBuf BW requirements and not just rejecting
if it doesn't satisfy BW requirements, will allow us to save power when
it is possible and gain additional bandwidth when it's needed - i.e
boosting both our power management and perfomance capabilities.

This patch is preparation for that, first we now extract modeset
calculation from modeset checks, in order to call it after wm/ddb
has been calculated.

Stanislav Lisovskiy (5):
  drm/i915: Decouple cdclk calculation from modeset checks
  drm/i915: Force recalculate min_cdclk if planes config changed
  drm/i915: Introduce for_each_dbuf_slice_in_mask macro
  drm/i915: Adjust CDCLK accordingly to our DBuf bw needs
  drm/i915: Remove unneeded hack now for CDCLK

 drivers/gpu/drm/i915/display/intel_bw.c   | 61 ++-
 drivers/gpu/drm/i915/display/intel_bw.h   |  8 +++
 drivers/gpu/drm/i915/display/intel_cdclk.c| 31 +++---
 drivers/gpu/drm/i915/display/intel_display.c  | 36 ---
 drivers/gpu/drm/i915/display/intel_display.h  |  7 +++
 .../drm/i915/display/intel_display_power.h|  5 ++
 drivers/gpu/drm/i915/intel_pm.c   | 34 ++-
 drivers/gpu/drm/i915/intel_pm.h   |  3 +
 8 files changed, 163 insertions(+), 22 deletions(-)

-- 
2.24.1.485.gad05a3d8e5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 1/5] drm/i915: Decouple cdclk calculation from modeset checks

2020-03-30 Thread Stanislav Lisovskiy
We need to calculate cdclk after watermarks/ddb has been calculated
as with recent hw CDCLK needs to be adjusted accordingly to DBuf
requirements, which is not possible with current code organization.

Setting CDCLK according to DBuf BW requirements and not just rejecting
if it doesn't satisfy BW requirements, will allow us to save power when
it is possible and gain additional bandwidth when it's needed - i.e
boosting both our power management and perfomance capabilities.

This patch is preparation for that, first we now extract modeset
calculation from modeset checks, in order to call it after wm/ddb
has been calculated.

v2: - Extract only intel_modeset_calc_cdclk from intel_modeset_checks
  (Ville Syrjälä)

Signed-off-by: Stanislav Lisovskiy 
---
 drivers/gpu/drm/i915/display/intel_display.c | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index 7c45d676c9b7..17d83f37f49f 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -14545,10 +14545,6 @@ static int intel_modeset_checks(struct 
intel_atomic_state *state)
return ret;
}
 
-   ret = intel_modeset_calc_cdclk(state);
-   if (ret)
-   return ret;
-
intel_modeset_clear_plls(state);
 
if (IS_HASWELL(dev_priv))
@@ -14882,10 +14878,6 @@ static int intel_atomic_check(struct drm_device *dev,
goto fail;
}
 
-   ret = intel_atomic_check_crtcs(state);
-   if (ret)
-   goto fail;
-
intel_fbc_choose_crtc(dev_priv, state);
ret = calc_watermark_data(state);
if (ret)
@@ -14895,6 +14887,16 @@ static int intel_atomic_check(struct drm_device *dev,
if (ret)
goto fail;
 
+   if (any_ms) {
+   ret = intel_modeset_calc_cdclk(state);
+   if (ret)
+   return ret;
+   }
+
+   ret = intel_atomic_check_crtcs(state);
+   if (ret)
+   goto fail;
+
for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
new_crtc_state, i) {
if (!needs_modeset(new_crtc_state) &&
-- 
2.24.1.485.gad05a3d8e5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 4/5] drm/i915: Adjust CDCLK accordingly to our DBuf bw needs

2020-03-30 Thread Stanislav Lisovskiy
According to BSpec max BW per slice is calculated using formula
Max BW = CDCLK * 64. Currently when calculating min CDCLK we
account only per plane requirements, however in order to avoid
FIFO underruns we need to estimate accumulated BW consumed by
all planes(ddb entries basically) residing on that particular
DBuf slice. This will allow us to put CDCLK lower and save power
when we don't need that much bandwidth or gain additional
performance once plane consumption grows.

v2: - Fix long line warning
- Limited new DBuf bw checks to only gens >= 11

v3: - Lets track used Dbuf bw per slice and per crtc in bw state
  (or may be in DBuf state in future), that way we don't need
  to have all crtcs in state and those only if we detect if
  are actually going to change cdclk, just same way as we
  do with other stuff, i.e intel_atomic_serialize_global_state
  and co. Just as per Ville's paradigm.
- Made dbuf bw calculation procedure look nicer by introducing
  for_each_dbuf_slice_in_mask - we often will now need to iterate
  slices using mask.
- According to experimental results CDCLK * 64 accounts for
  overall bandwidth across all dbufs, not per dbuf.

Signed-off-by: Stanislav Lisovskiy 
---
 drivers/gpu/drm/i915/display/intel_bw.c   | 61 ++-
 drivers/gpu/drm/i915/display/intel_bw.h   |  8 +++
 drivers/gpu/drm/i915/display/intel_cdclk.c| 25 
 drivers/gpu/drm/i915/display/intel_display.c  |  8 +++
 .../drm/i915/display/intel_display_power.h|  2 +
 drivers/gpu/drm/i915/intel_pm.c   | 34 ++-
 drivers/gpu/drm/i915/intel_pm.h   |  3 +
 7 files changed, 138 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_bw.c 
b/drivers/gpu/drm/i915/display/intel_bw.c
index 573a1c206b60..e9d65820fb76 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.c
+++ b/drivers/gpu/drm/i915/display/intel_bw.c
@@ -6,6 +6,7 @@
 #include 
 
 #include "intel_bw.h"
+#include "intel_pm.h"
 #include "intel_display_types.h"
 #include "intel_sideband.h"
 #include "intel_atomic.h"
@@ -338,7 +339,6 @@ static unsigned int intel_bw_crtc_data_rate(const struct 
intel_crtc_state *crtc_
 
return data_rate;
 }
-
 void intel_bw_crtc_update(struct intel_bw_state *bw_state,
  const struct intel_crtc_state *crtc_state)
 {
@@ -419,6 +419,65 @@ intel_atomic_bw_get_state(struct intel_atomic_state *state)
return to_intel_bw_state(bw_state);
 }
 
+int intel_bw_calc_min_cdclk(struct intel_atomic_state *state)
+{
+   struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+   int i = 0;
+   enum plane_id plane_id;
+   struct intel_crtc_state *crtc_state;
+   struct intel_crtc *crtc;
+   int max_bw = 0;
+   int min_cdclk;
+   enum pipe pipe;
+   struct intel_bw_state *bw_state;
+   int slice_id = 0;
+
+   bw_state = intel_atomic_bw_get_state(state);
+
+   if (IS_ERR(bw_state))
+   return PTR_ERR(bw_state);
+
+   for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
+   struct intel_crtc_bw *crtc_bw = 
&bw_state->dbuf_bw_used[crtc->pipe];
+
+   memset(&crtc_bw->dbuf_bw, 0, sizeof(crtc_bw->dbuf_bw));
+
+   for_each_plane_id_on_crtc(crtc, plane_id) {
+   struct skl_ddb_entry *plane_alloc =
+   &crtc_state->wm.skl.plane_ddb_y[plane_id];
+   struct skl_ddb_entry *uv_plane_alloc =
+   &crtc_state->wm.skl.plane_ddb_uv[plane_id];
+   unsigned int data_rate = 
crtc_state->data_rate[plane_id];
+
+   unsigned int dbuf_mask = 
skl_ddb_dbuf_slice_mask(dev_priv, plane_alloc);
+
+   dbuf_mask |= skl_ddb_dbuf_slice_mask(dev_priv, 
uv_plane_alloc);
+
+   DRM_DEBUG_KMS("Got dbuf mask %x for pipe %c ddb %d-%d 
plane %d data rate %d\n",
+ dbuf_mask, pipe_name(crtc->pipe), 
plane_alloc->start,
+ plane_alloc->end, plane_id, data_rate);
+
+   for_each_dbuf_slice_in_mask(slice_id, dbuf_mask)
+   crtc_bw->dbuf_bw[slice_id] += data_rate;
+   }
+   }
+
+   for_each_dbuf_slice(slice_id) {
+   int total_bw_per_slice = 0;
+
+   for_each_pipe(dev_priv, pipe) {
+   struct intel_crtc_bw *crtc_bw = 
&bw_state->dbuf_bw_used[pipe];
+
+   total_bw_per_slice += crtc_bw->dbuf_bw[slice_id];
+   }
+   max_bw += total_bw_per_slice;
+   }
+
+   min_cdclk = max_bw / 64;
+
+   return min_cdclk;
+}
+
 int intel_bw_atomic_check(struct intel_atomic_state *state)
 {
struct drm_i915_private *dev_priv = to_i915(state->base.dev);
diff --git a/drivers/gpu/drm/i915/display/intel_bw.h 
b/drivers/gpu/drm/i915/display

[Intel-gfx] [PATCH v3 5/5] drm/i915: Remove unneeded hack now for CDCLK

2020-03-30 Thread Stanislav Lisovskiy
No need to bump up CDCLK now, as it is now correctly
calculated, accounting for DBuf BW as BSpec says.

Signed-off-by: Stanislav Lisovskiy 
---
 drivers/gpu/drm/i915/display/intel_cdclk.c | 12 
 1 file changed, 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c 
b/drivers/gpu/drm/i915/display/intel_cdclk.c
index 036774e7f3ec..13e7ea6f471e 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -2077,18 +2077,6 @@ int intel_crtc_compute_min_cdclk(const struct 
intel_crtc_state *crtc_state)
/* Account for additional needs from the planes */
min_cdclk = max(intel_planes_min_cdclk(crtc_state), min_cdclk);
 
-   /*
-* HACK. Currently for TGL platforms we calculate
-* min_cdclk initially based on pixel_rate divided
-* by 2, accounting for also plane requirements,
-* however in some cases the lowest possible CDCLK
-* doesn't work and causing the underruns.
-* Explicitly stating here that this seems to be currently
-* rather a Hack, than final solution.
-*/
-   if (IS_TIGERLAKE(dev_priv))
-   min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate);
-
/*
 * Similar story as with skl_write_plane_wm and intel_enable_sagv
 * - in some certain driver parts, we don't have any guarantee that
-- 
2.24.1.485.gad05a3d8e5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 3/5] drm/i915: Introduce for_each_dbuf_slice_in_mask macro

2020-03-30 Thread Stanislav Lisovskiy
We quite often need now to iterate only particular dbuf slices
in mask, whether they are active or related to particular crtc.

Let's make our life a bit easier and use a macro for that.

Signed-off-by: Stanislav Lisovskiy 
---
 drivers/gpu/drm/i915/display/intel_display.h   | 7 +++
 drivers/gpu/drm/i915/display/intel_display_power.h | 3 +++
 2 files changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_display.h 
b/drivers/gpu/drm/i915/display/intel_display.h
index adb1225a3480..c898285f0dc3 100644
--- a/drivers/gpu/drm/i915/display/intel_display.h
+++ b/drivers/gpu/drm/i915/display/intel_display.h
@@ -187,6 +187,13 @@ enum plane_id {
for ((__p) = PLANE_PRIMARY; (__p) < I915_MAX_PLANES; (__p)++) \
for_each_if((__crtc)->plane_ids_mask & BIT(__p))
 
+#define for_each_dbuf_slice_in_mask(__slice, __mask) \
+   for ((__slice) = 0; (__slice) < I915_MAX_DBUF_SLICES; (__slice)++) \
+   for_each_if((1 << (__slice)) & (__mask))
+
+#define for_each_dbuf_slice(__slice) \
+   for_each_dbuf_slice_in_mask(__slice, (1 << I915_MAX_DBUF_SLICES) - 1)
+
 enum port {
PORT_NONE = -1,
 
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h 
b/drivers/gpu/drm/i915/display/intel_display_power.h
index da64a5edae7a..468e8fb0203a 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.h
+++ b/drivers/gpu/drm/i915/display/intel_display_power.h
@@ -311,8 +311,11 @@ intel_display_power_put_async(struct drm_i915_private 
*i915,
 enum dbuf_slice {
DBUF_S1,
DBUF_S2,
+   DBUF_SLICE_MAX
 };
 
+#define I915_DBUF_MAX_SLICES DBUF_SLICE_MAX
+
 #define with_intel_display_power(i915, domain, wf) \
for ((wf) = intel_display_power_get((i915), (domain)); (wf); \
 intel_display_power_put_async((i915), (domain), (wf)), (wf) = 0)
-- 
2.24.1.485.gad05a3d8e5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 2/5] drm/i915: Force recalculate min_cdclk if planes config changed

2020-03-30 Thread Stanislav Lisovskiy
In Gen11+ whenever we might exceed DBuf bandwidth we might need to
recalculate CDCLK which DBuf bandwidth is scaled with.
Total Dbuf bw used might change based on particular plane needs.

Signed-off-by: Stanislav Lisovskiy 
---
 drivers/gpu/drm/i915/display/intel_display.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index 17d83f37f49f..9fd32d61ebfe 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -14623,7 +14623,7 @@ static bool active_planes_affects_min_cdclk(struct 
drm_i915_private *dev_priv)
/* See {hsw,vlv,ivb}_plane_ratio() */
return IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv) ||
IS_CHERRYVIEW(dev_priv) || IS_VALLEYVIEW(dev_priv) ||
-   IS_IVYBRIDGE(dev_priv);
+   IS_IVYBRIDGE(dev_priv) || (INTEL_GEN(dev_priv) >= 11);
 }
 
 static int intel_atomic_check_planes(struct intel_atomic_state *state,
@@ -14669,7 +14669,13 @@ static int intel_atomic_check_planes(struct 
intel_atomic_state *state,
old_active_planes = old_crtc_state->active_planes & 
~BIT(PLANE_CURSOR);
new_active_planes = new_crtc_state->active_planes & 
~BIT(PLANE_CURSOR);
 
-   if (hweight8(old_active_planes) == hweight8(new_active_planes))
+   /*
+* Not only the number of planes, but if the plane 
configuration had
+* changed might already mean we need to recompute min CDCLK,
+* because different planes might consume different amount of 
Dbuf bandwidth
+* according to formula: Bw per plane = Pixel rate * bpp * 
pipe/plane scale factor
+*/
+   if (old_active_planes == new_active_planes)
continue;
 
ret = intel_crtc_add_planes_to_state(state, crtc, 
new_active_planes);
-- 
2.24.1.485.gad05a3d8e5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/huc: Add more errors for I915_PARAM_HUC_STATUS

2020-03-30 Thread Chris Wilson
Quoting Michal Wajdeczko (2020-03-30 12:33:02)
> There might be many reasons why we failed to successfully
> load and authenticate HuC firmware, but today we only use
> single error in case of no HuC hardware. Add some more
> error codes for most common cases (disabled, not installed,
> corrupted or mismatched firmware).
> 
> Signed-off-by: Michal Wajdeczko 
> Cc: Joonas Lahtinen 
> Cc: Chris Wilson 
> Cc: Daniele Ceraolo Spurio 
> Cc: Tony Ye 
> Cc: Robert M. Fosha 
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_huc.c | 22 ++
>  1 file changed, 18 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> index d6097b46600c..1e8073ec343f 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> @@ -200,9 +200,13 @@ int intel_huc_auth(struct intel_huc *huc)
>   * This function reads status register to verify if HuC
>   * firmware was successfully loaded.
>   *
> - * Returns: 1 if HuC firmware is loaded and verified,
> - * 0 if HuC firmware is not loaded and -ENODEV if HuC
> - * is not present on this platform.
> + * Returns:
> + *  * -ENODEV if HuC is not present on this platform,
> + *  * -EOPNOTSUPP if HuC firmware is disabled,
> + *  * -ENOPKG if HuC firmware was not installed,
> + *  * -ENOEXEC if HuC firmware is invalid or mismatched,
> + *  * 0 if HuC firmware is not running,
> + *  * 1 if HuC firmware is authenticated and running.
>   */
>  int intel_huc_check_status(struct intel_huc *huc)
>  {
> @@ -210,8 +214,18 @@ int intel_huc_check_status(struct intel_huc *huc)
> intel_wakeref_t wakeref;
> u32 status = 0;
>  
> -   if (!intel_huc_is_supported(huc))
> +   switch (__intel_uc_fw_status(&huc->fw)) {
> +   case INTEL_UC_FIRMWARE_NOT_SUPPORTED:
> return -ENODEV;

No HW support.

> +   case INTEL_UC_FIRMWARE_DISABLED:
> +   return -EOPNOTSUPP;

Override by user [sysadmin]

> +   case INTEL_UC_FIRMWARE_MISSING:
> +   return -ENOPKG;

FILENOTFOUND.

> +   case INTEL_UC_FIRMWARE_ERROR:
> +   return -ENOEXEC;

File corruption.

There's nothing else between us loading the fw and the huc rejecting
it?

FIRMWARE_FAIL? That's set as the opposite of FIRMWARE_TRANSFERRED in
that we failed to upload the image to the HW. The firmware itself hasn't
had a chance to run.

case INTEL_UC_FIRMWARE_FAIL:
return -ENXIO;

Or is that being overridden to FIRMWARE_ERROR?

Other than the question of whether there's one more step before the fw
is being run [and then able to set HUC_STATUS as it determines for
itself],

Reviewed-by: Chris Wilson 
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Add a retry counter for hotplug detect retries

2020-03-30 Thread Patchwork
== Series Details ==

Series: series starting with [1/2] drm/i915: Add a retry counter for hotplug 
detect retries
URL   : https://patchwork.freedesktop.org/series/75224/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_8213 -> Patchwork_17125


Summary
---

  **SUCCESS**

  No regressions found.

  External URL: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17125/index.html

Known issues


  Here are the changes found in Patchwork_17125 that come from known issues:

### IGT changes ###

 Possible fixes 

  * igt@i915_selftest@live@execlists:
- fi-bxt-dsi: [INCOMPLETE][1] ([i915#656]) -> [PASS][2]
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8213/fi-bxt-dsi/igt@i915_selftest@l...@execlists.html
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17125/fi-bxt-dsi/igt@i915_selftest@l...@execlists.html

  
  [i915#656]: https://gitlab.freedesktop.org/drm/intel/issues/656


Participating hosts (47 -> 43)
--

  Additional (4): fi-hsw-4770r fi-byt-j1900 fi-glk-dsi fi-bsw-n3050 
  Missing(8): fi-ilk-m540 fi-hsw-4200u fi-skl-6770hq fi-byt-squawks 
fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper fi-bdw-samus 


Build changes
-

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_8213 -> Patchwork_17125

  CI-20190529: 20190529
  CI_DRM_8213: 3cebf14c87d0d4508d4cc9c49db14061af752c37 @ 
git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5544: 477c562fc9932939083d732b77dd7b083c6bc0a1 @ 
git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_17125: fa5ee4c1786539998360fd6f6f4796fdb26882ed @ 
git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

fa5ee4c17865 drm/i915: Extend hotplug detect retry on TypeC connectors to 5 
seconds
a05d922e417e drm/i915: Add a retry counter for hotplug detect retries

== Logs ==

For more details see: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17125/index.html
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915/execlists: Explicitly reset both reg and context runtime

2020-03-30 Thread Chris Wilson
Upon a GPU reset, we copy the default context image over top of the
guilty image. This will rollback the CTX_TIMESTAMP register to before
our value of ce->runtime.last. Reset both back to 0 so that we do not
encounter an underflow on the next schedule out after resume.

This should not be a huge issue in practice, as hangs should be rare in
correct code.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 24 +---
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 9104796673dc..d53078b345be 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -238,6 +238,17 @@ __execlists_update_reg_state(const struct intel_context 
*ce,
 const struct intel_engine_cs *engine,
 u32 head);
 
+static u32 intel_context_get_runtime(const struct intel_context *ce)
+{
+   /*
+* We can use either ppHWSP[16] which is recorded before the context
+* switch (and so excludes the cost of context switches) or use the
+* value from the context image itself, which is saved/restored earlier
+* and so includes the cost of the save.
+*/
+   return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
+}
+
 static void mark_eio(struct i915_request *rq)
 {
if (i915_request_completed(rq))
@@ -1154,6 +1165,7 @@ static void restore_default_state(struct intel_context 
*ce,
   engine->context_size - PAGE_SIZE);
 
execlists_init_reg_state(regs, ce, engine, ce->ring, false);
+   ce->runtime.last = intel_context_get_runtime(ce);
 }
 
 static void reset_active(struct i915_request *rq,
@@ -1195,17 +1207,6 @@ static void reset_active(struct i915_request *rq,
ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
 }
 
-static u32 intel_context_get_runtime(const struct intel_context *ce)
-{
-   /*
-* We can use either ppHWSP[16] which is recorded before the context
-* switch (and so excludes the cost of context switches) or use the
-* value from the context image itself, which is saved/restored earlier
-* and so includes the cost of the save.
-*/
-   return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
-}
-
 static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
 {
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
@@ -4601,6 +4602,7 @@ static void init_common_reg_state(u32 * const regs,
regs[CTX_CONTEXT_CONTROL] = ctl;
 
regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+   regs[CTX_TIMESTAMP] = 0;
 }
 
 static void init_wa_bb_reg_state(u32 * const regs,
-- 
2.20.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] rcu_barrier() no longer allowed within mmap_sem?

2020-03-30 Thread Daniel Vetter
Hi all, for all = rcu, cpuhotplug and perf maintainers

We've hit an interesting new lockdep splat in our drm/i915 CI:

https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17096/shard-tglb7/igt@kms_frontbuffer_track...@fbcpsr-rgb101010-draw-mmap-gtt.html#dmesg-warnings861

Summarizing away the driver parts we have

< gpu locks which are held within mm->mmap_sem in various gpu fault handlers >

-> #4 (&mm->mmap_sem#2){}:
<4> [604.892615] __might_fault+0x63/0x90
<4> [604.892617] _copy_to_user+0x1e/0x80
<4> [604.892619] perf_read+0x200/0x2b0
<4> [604.892621] vfs_read+0x96/0x160
<4> [604.892622] ksys_read+0x9f/0xe0
<4> [604.892623] do_syscall_64+0x4f/0x220
<4> [604.892624] entry_SYSCALL_64_after_hwframe+0x49/0xbe
<4> [604.892625]
-> #3 (&cpuctx_mutex){+.+.}:
<4> [604.892626] __mutex_lock+0x9a/0x9c0
<4> [604.892627] perf_event_init_cpu+0xa4/0x140
<4> [604.892629] perf_event_init+0x19d/0x1cd
<4> [604.892630] start_kernel+0x362/0x4e4
<4> [604.892631] secondary_startup_64+0xa4/0xb0
<4> [604.892631]
-> #2 (pmus_lock){+.+.}:
<4> [604.892633] __mutex_lock+0x9a/0x9c0
<4> [604.892633] perf_event_init_cpu+0x6b/0x140
<4> [604.892635] cpuhp_invoke_callback+0x9b/0x9d0
<4> [604.892636] _cpu_up+0xa2/0x140
<4> [604.892637] do_cpu_up+0x61/0xa0
<4> [604.892639] smp_init+0x57/0x96
<4> [604.892639] kernel_init_freeable+0x87/0x1dc
<4> [604.892640] kernel_init+0x5/0x100
<4> [604.892642] ret_from_fork+0x24/0x50
<4> [604.892642]
-> #1 (cpu_hotplug_lock.rw_sem){}:
<4> [604.892643] cpus_read_lock+0x34/0xd0
<4> [604.892644] rcu_barrier+0xaa/0x190
<4> [604.892645] kernel_init+0x21/0x100
<4> [604.892647] ret_from_fork+0x24/0x50
<4> [604.892647]
-> #0 (rcu_state.barrier_mutex){+.+.}:
<4> [604.892649] __lock_acquire+0x1328/0x15d0
<4> [604.892650] lock_acquire+0xa7/0x1c0
<4> [604.892651] __mutex_lock+0x9a/0x9c0
<4> [604.892652] rcu_barrier+0x23/0x190
<4> [604.892680] i915_gem_object_unbind+0x29d/0x3f0 [i915]
<4> [604.892707] i915_gem_object_pin_to_display_plane+0x141/0x270 [i915]
<4> [604.892737] intel_pin_and_fence_fb_obj+0xec/0x1f0 [i915]
<4> [604.892767] intel_plane_pin_fb+0x3f/0xd0 [i915]
<4> [604.892797] intel_prepare_plane_fb+0x13b/0x5c0 [i915]
<4> [604.892798] drm_atomic_helper_prepare_planes+0x85/0x110
<4> [604.892827] intel_atomic_commit+0xda/0x390 [i915]
<4> [604.892828] drm_atomic_helper_set_config+0x57/0xa0
<4> [604.892830] drm_mode_setcrtc+0x1c4/0x720
<4> [604.892830] drm_ioctl_kernel+0xb0/0xf0
<4> [604.892831] drm_ioctl+0x2e1/0x390
<4> [604.892833] ksys_ioctl+0x7b/0x90
<4> [604.892835] __x64_sys_ioctl+0x11/0x20
<4> [604.892835] do_syscall_64+0x4f/0x220
<4> [604.892836] entry_SYSCALL_64_after_hwframe+0x49/0xbe

The last backtrace boils down to i915 driver code which holds the same
locks we are holding within mm->mmap_sem, and then ends up calling
rcu_barrier(). From what I can see i915 is just the messenger here,
any driver with this pattern of a lock held within mmap_sem which also
has a path of calling rcu_barrier while holding that lock should be
hitting this splat.

Two questions:
- This suggests that calling rcu_barrier() isn't ok anymore while
holding mmap_sem, or anything that has a dependency upon mmap_sem. I
guess that's not the idea, please confirm.
- Assuming this depedency is indeed not intended, where should the
loop be broken? It goes through perf, cpuhotplug and rcu subsystems,
and I don't have a clue about any of those.

Thanks a lot.

Cheers, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915/execlists: Include priority info in trace_ports

2020-03-30 Thread Patchwork
== Series Details ==

Series: drm/i915/execlists: Include priority info in trace_ports
URL   : https://patchwork.freedesktop.org/series/75229/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_8213 -> Patchwork_17126


Summary
---

  **FAILURE**

  Serious unknown changes coming with Patchwork_17126 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_17126, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17126/index.html

Possible new issues
---

  Here are the unknown changes that may have been introduced in Patchwork_17126:

### IGT changes ###

 Possible regressions 

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic:
- fi-cml-u2:  [PASS][1] -> [DMESG-WARN][2]
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8213/fi-cml-u2/igt@kms_cursor_leg...@basic-busy-flip-before-cursor-atomic.html
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17126/fi-cml-u2/igt@kms_cursor_leg...@basic-busy-flip-before-cursor-atomic.html

  
Known issues


  Here are the changes found in Patchwork_17126 that come from known issues:

### IGT changes ###

 Issues hit 

  * igt@i915_selftest@live@execlists:
- fi-icl-y:   [PASS][3] -> [DMESG-FAIL][4] ([fdo#108569])
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8213/fi-icl-y/igt@i915_selftest@l...@execlists.html
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17126/fi-icl-y/igt@i915_selftest@l...@execlists.html

  * igt@i915_selftest@live@requests:
- fi-icl-u2:  [PASS][5] -> [INCOMPLETE][6] ([fdo#109644] / 
[fdo#110464])
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8213/fi-icl-u2/igt@i915_selftest@l...@requests.html
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17126/fi-icl-u2/igt@i915_selftest@l...@requests.html

  
 Possible fixes 

  * igt@i915_selftest@live@execlists:
- fi-bxt-dsi: [INCOMPLETE][7] ([i915#656]) -> [PASS][8]
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8213/fi-bxt-dsi/igt@i915_selftest@l...@execlists.html
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17126/fi-bxt-dsi/igt@i915_selftest@l...@execlists.html

  
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#109644]: https://bugs.freedesktop.org/show_bug.cgi?id=109644
  [fdo#110464]: https://bugs.freedesktop.org/show_bug.cgi?id=110464
  [i915#656]: https://gitlab.freedesktop.org/drm/intel/issues/656


Participating hosts (47 -> 37)
--

  Additional (3): fi-byt-j1900 fi-glk-dsi fi-tgl-y 
  Missing(13): fi-ilk-m540 fi-hsw-4200u fi-skl-6770hq fi-byt-squawks 
fi-bsw-cyan fi-ctg-p8600 fi-gdg-551 fi-cfl-8109u fi-elk-e7500 fi-byt-n2820 
fi-byt-clapper fi-bdw-samus fi-snb-2600 


Build changes
-

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_8213 -> Patchwork_17126

  CI-20190529: 20190529
  CI_DRM_8213: 3cebf14c87d0d4508d4cc9c49db14061af752c37 @ 
git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5544: 477c562fc9932939083d732b77dd7b083c6bc0a1 @ 
git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_17126: 50d4d7aa6e6b5f5916662908a47cdf1e46bb5edf @ 
git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

50d4d7aa6e6b drm/i915/execlists: Include priority info in trace_ports

== Logs ==

For more details see: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17126/index.html
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/3] drm/i915/perf: break OA config buffer object in 2

2020-03-30 Thread Lionel Landwerlin

On 27/03/2020 12:40, Chris Wilson wrote:

Quoting Lionel Landwerlin (2020-03-27 10:32:07)

We want to enable performance monitoring on multiple contexts to cover
the Iris use case of using 2 GEM contexts (3D & compute).

So start by breaking the OA configuration BO which contains global &
per context register writes.

NOA muxes & OA configurations are global, while FLEXEU register
configurations are per context.

Signed-off-by: Lionel Landwerlin 
---
  drivers/gpu/drm/i915/i915_perf.c | 194 ++-
  1 file changed, 137 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 3222f6cd8255..f524f50abdef 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -376,7 +376,8 @@ struct i915_oa_config_bo {
 struct llist_node node;
  
 struct i915_oa_config *oa_config;

-   struct i915_vma *vma;
+   struct i915_vma *ctx_vma;
+   struct i915_vma *global_vma;

What's the allocation like? Worth packing into one vma and use an
offset?
-Chris


Good point, thanks!


-Lionel

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 0/3] drm/i915/perf: add support for multi context filtering

2020-03-30 Thread Lionel Landwerlin

On 27/03/2020 12:42, Chris Wilson wrote:

Quoting Lionel Landwerlin (2020-03-27 10:32:06)

Hi all,

i915/perf has currently support for single context filtering. This
allows mesa to read the content of the OA buffer and cut out any
unrelated context running in a middle of a query.

Iris currently uses 2 GEM contexts for 3D & compute commands. In order
to support performance queries on the compute context we need to be
able to also filter on the second GEM context used for compute
commands.

This series add support for filtering up to 4 GEM contexts in
i915/perf.

Why make it a fixed size? [From a quick look it's just fixed storage as
you use a dynamically sized array. Considered sorting and bsearching?]
-Chris


I figured you might not like too many contexts to be pinned.

Also a small size makes bsort kind of pointless ;)


I'll see how that looks. What the goto bsort utility in the kernel?


-Lionel

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 3/3] drm/i915/perf: Schedule oa_config after modifying the contexts

2020-03-30 Thread Lionel Landwerlin

On 27/03/2020 13:22, Chris Wilson wrote:

We wish that the scheduler emit the context modification commands prior
to enabling the oa_config, for which we must explicitly inform it of the
ordering constraints. This is especially important as we now wait for
the final oa_config setup to be completed and as this wait may be on a
distinct context to the state modifications, we need that command packet
to be always last in the queue.

We borrow the i915_active for its ability to track multiple timelines
and the last dma_fence on each; a flexible dma_resv. Keeping track of
each dma_fence is important for us so that we can efficiently schedule
the requests and reprioritise as required.

Reported-by: Lionel Landwerlin 
Signed-off-by: Chris Wilson 
Cc: Lionel Landwerlin 

Reviewed-by: Lionel Landwerlin 

---
  drivers/gpu/drm/i915/i915_perf.c   | 154 -
  drivers/gpu/drm/i915/i915_perf_types.h |   5 +-
  2 files changed, 102 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 3222f6cd8255..faf4b0970775 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1961,10 +1961,11 @@ get_oa_vma(struct i915_perf_stream *stream, struct 
i915_oa_config *oa_config)
return i915_vma_get(oa_bo->vma);
  }
  
-static struct i915_request *

+static int
  emit_oa_config(struct i915_perf_stream *stream,
   struct i915_oa_config *oa_config,
-  struct intel_context *ce)
+  struct intel_context *ce,
+  struct i915_active *active)
  {
struct i915_request *rq;
struct i915_vma *vma;
@@ -1972,7 +1973,7 @@ emit_oa_config(struct i915_perf_stream *stream,
  
  	vma = get_oa_vma(stream, oa_config);

if (IS_ERR(vma))
-   return ERR_CAST(vma);
+   return PTR_ERR(vma);
  
  	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);

if (err)
@@ -1986,6 +1987,18 @@ emit_oa_config(struct i915_perf_stream *stream,
goto err_vma_unpin;
}
  
+	if (!IS_ERR_OR_NULL(active)) {

+   /* After all individual context modifications */
+   err = i915_request_await_active(rq, active,
+   I915_ACTIVE_AWAIT_ALL);
+   if (err)
+   goto err_add_request;
+
+   err = i915_active_add_request(active, rq);
+   if (err)
+   goto err_add_request;
+   }
+
i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, 0);
if (!err)
@@ -2000,14 +2013,13 @@ emit_oa_config(struct i915_perf_stream *stream,
if (err)
goto err_add_request;
  
-	i915_request_get(rq);

  err_add_request:
i915_request_add(rq);
  err_vma_unpin:
i915_vma_unpin(vma);
  err_vma_put:
i915_vma_put(vma);
-   return err ? ERR_PTR(err) : rq;
+   return err;
  }
  
  static struct intel_context *oa_context(struct i915_perf_stream *stream)

@@ -2015,8 +2027,9 @@ static struct intel_context *oa_context(struct 
i915_perf_stream *stream)
return stream->pinned_ctx ?: stream->engine->kernel_context;
  }
  
-static struct i915_request *

-hsw_enable_metric_set(struct i915_perf_stream *stream)
+static int
+hsw_enable_metric_set(struct i915_perf_stream *stream,
+ struct i915_active *active)
  {
struct intel_uncore *uncore = stream->uncore;
  
@@ -2035,7 +2048,9 @@ hsw_enable_metric_set(struct i915_perf_stream *stream)

intel_uncore_rmw(uncore, GEN6_UCGCTL1,
 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
  
-	return emit_oa_config(stream, stream->oa_config, oa_context(stream));

+   return emit_oa_config(stream,
+ stream->oa_config, oa_context(stream),
+ active);
  }
  
  static void hsw_disable_metric_set(struct i915_perf_stream *stream)

@@ -2182,8 +2197,10 @@ static int gen8_modify_context(struct intel_context *ce,
return err;
  }
  
-static int gen8_modify_self(struct intel_context *ce,

-   const struct flex *flex, unsigned int count)
+static int
+gen8_modify_self(struct intel_context *ce,
+const struct flex *flex, unsigned int count,
+struct i915_active *active)
  {
struct i915_request *rq;
int err;
@@ -2194,8 +2211,17 @@ static int gen8_modify_self(struct intel_context *ce,
if (IS_ERR(rq))
return PTR_ERR(rq);
  
+	if (!IS_ERR_OR_NULL(active)) {

+   err = i915_active_add_request(active, rq);
+   if (err)
+   goto err_add_request;
+   }
+
err = gen8_load_flex(rq, ce, flex, count);
+   if (err)
+   goto err_add_request;
  
+err_add_request:

i915_request_add(rq);
return err;
  }
@@ -2229,7 +2255,8 @@ static int gen8_configure_conte

Re: [Intel-gfx] [PATCH 0/3] drm/i915/perf: add support for multi context filtering

2020-03-30 Thread Chris Wilson
Quoting Lionel Landwerlin (2020-03-30 14:14:18)
> On 27/03/2020 12:42, Chris Wilson wrote:
> > Quoting Lionel Landwerlin (2020-03-27 10:32:06)
> >> Hi all,
> >>
> >> i915/perf has currently support for single context filtering. This
> >> allows mesa to read the content of the OA buffer and cut out any
> >> unrelated context running in a middle of a query.
> >>
> >> Iris currently uses 2 GEM contexts for 3D & compute commands. In order
> >> to support performance queries on the compute context we need to be
> >> able to also filter on the second GEM context used for compute
> >> commands.
> >>
> >> This series add support for filtering up to 4 GEM contexts in
> >> i915/perf.
> > Why make it a fixed size? [From a quick look it's just fixed storage as
> > you use a dynamically sized array. Considered sorting and bsearching?]
> > -Chris
> 
> I figured you might not like too many contexts to be pinned.

Well if you can pin 65,356 contexts and still be able to profile, I'll
be impressed. Having things pinned will bite us [that fragmentation is
going to cause issues in the long run], but here's there's definitely a
natural limit in being able to pin everything the user requests. If we
can do that, we can likely profile their workload. Failure here will mean
that other users start seeing ENOSPC randomly (which is what we want to
avoid).

I think the first limit that will be hit will be the unique sw id space
for contexts. icl+ brings that down to 1023, minus the 3 bits we use
internally, so 127.

> Also a small size makes bsort kind of pointless ;)

Yeah, but otoh bsearch is such a small amount overhead for a small array
that unless it is very hot, I'm sure we can find other things to fill
the profiles.

> I'll see how that looks. What the goto bsort utility in the kernel?

#include 
#include 

I do wish the latter would have a macro generator.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915/gem: Split eb_vma into its own allocation

2020-03-30 Thread Chris Wilson
Use a separate array allocation for the execbuf vma, so that we can
track their lifetime independently from the copy of the user arguments.
With luck, this has a secondary benefit of splitting the malloc size to
within reason and avoid vmalloc. The downside is that we might require
two separate vmallocs -- but much less likely.

In the process, this prevents a memory leak on the ww_mutex error
unwind.

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1390
Signed-off-by: Chris Wilson 
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 131 ++
 1 file changed, 73 insertions(+), 58 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index f347e595a773..cda35e6dfc44 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -40,6 +40,11 @@ struct eb_vma {
u32 handle;
 };
 
+struct eb_vma_array {
+   struct kref kref;
+   struct eb_vma vma[];
+};
+
 enum {
FORCE_CPU_RELOC = 1,
FORCE_GTT_RELOC,
@@ -52,7 +57,6 @@ enum {
 #define __EXEC_OBJECT_NEEDS_MAPBIT(29)
 #define __EXEC_OBJECT_NEEDS_BIAS   BIT(28)
 #define __EXEC_OBJECT_INTERNAL_FLAGS   (~0u << 28) /* all of the above */
-#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | 
__EXEC_OBJECT_HAS_FENCE)
 
 #define __EXEC_HAS_RELOC   BIT(31)
 #define __EXEC_INTERNAL_FLAGS  (~0u << 31)
@@ -283,6 +287,7 @@ struct i915_execbuffer {
 */
int lut_size;
struct hlist_head *buckets; /** ht for relocation handles */
+   struct eb_vma_array *array;
 };
 
 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
@@ -292,8 +297,62 @@ static inline bool eb_use_cmdparser(const struct 
i915_execbuffer *eb)
 eb->args->batch_len);
 }
 
+static struct eb_vma_array *eb_vma_array_create(unsigned int count)
+{
+   struct eb_vma_array *arr;
+
+   arr = kvmalloc(struct_size(arr, vma, count), GFP_KERNEL | __GFP_NOWARN);
+   if (!arr)
+   return NULL;
+
+   kref_init(&arr->kref);
+   arr->vma[0].vma = NULL;
+
+   return arr;
+}
+
+static inline void eb_unreserve_vma(struct eb_vma *ev)
+{
+   struct i915_vma *vma = ev->vma;
+
+   if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
+   __i915_vma_unpin_fence(vma);
+
+   if (ev->flags & __EXEC_OBJECT_HAS_PIN)
+   __i915_vma_unpin(vma);
+
+   ev->flags &= ~(__EXEC_OBJECT_HAS_PIN |
+  __EXEC_OBJECT_HAS_FENCE);
+}
+
+static void eb_vma_array_destroy(struct kref *kref)
+{
+   struct eb_vma_array *arr = container_of(kref, typeof(*arr), kref);
+   struct eb_vma *ev = arr->vma;
+
+   while (ev->vma) {
+   eb_unreserve_vma(ev);
+   i915_vma_put(ev->vma);
+   ev++;
+   }
+
+   kvfree(arr);
+}
+
+static void eb_vma_array_put(struct eb_vma_array *arr)
+{
+   kref_put(&arr->kref, eb_vma_array_destroy);
+}
+
 static int eb_create(struct i915_execbuffer *eb)
 {
+   /* Allocate an extra slot for use by the command parser + sentinel */
+   eb->array = eb_vma_array_create(eb->buffer_count + 2);
+   if (!eb->array)
+   return -ENOMEM;
+
+   eb->vma = eb->array->vma;
+
if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
unsigned int size = 1 + ilog2(eb->buffer_count);
 
@@ -327,8 +386,10 @@ static int eb_create(struct i915_execbuffer *eb)
break;
} while (--size);
 
-   if (unlikely(!size))
+   if (unlikely(!size)) {
+   eb_vma_array_put(eb->array);
return -ENOMEM;
+   }
 
eb->lut_size = size;
} else {
@@ -402,26 +463,6 @@ eb_pin_vma(struct i915_execbuffer *eb,
return !eb_vma_misplaced(entry, vma, ev->flags);
 }
 
-static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
-{
-   GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
-
-   if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
-   __i915_vma_unpin_fence(vma);
-
-   __i915_vma_unpin(vma);
-}
-
-static inline void
-eb_unreserve_vma(struct eb_vma *ev)
-{
-   if (!(ev->flags & __EXEC_OBJECT_HAS_PIN))
-   return;
-
-   __eb_unreserve_vma(ev->vma, ev->flags);
-   ev->flags &= ~__EXEC_OBJECT_RESERVED;
-}
-
 static int
 eb_validate_vma(struct i915_execbuffer *eb,
struct drm_i915_gem_exec_object2 *entry,
@@ -863,31 +904,13 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned 
long handle)
}
 }
 
-static void eb_release_vmas(const struct i915_execbuffer *eb)
-{
-   const unsigned int count = eb->buffer_count;
-   unsigned int i;
-
-   for (i = 0; i < count; i++) {
-   struct eb_vma *ev = &eb->vma[i];
-   struct i915_vma *vma = ev->vma;
-
-   if (!vma)
-   

Re: [Intel-gfx] [PATCH] drm/i915/selftests: Check timeout before flush and cond checks

2020-03-30 Thread Matthew Auld

On 30/03/2020 13:16, Chris Wilson wrote:

Allow a bit of leniency for the CPU scheduler to be distracted while we
flush the tasklet and so ensure that we always check the status of the
request once more before timing out.

v2: Wait until the HW acked the submit, and we do any secondary actions
for the submit (e.g. timeslices)

Signed-off-by: Chris Wilson 
Cc: Matthew Auld 

Reviewed-by: Matthew Auld 

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/selftests: Check timeout before flush and cond checks

2020-03-30 Thread Matthew Auld
On Mon, 30 Mar 2020 at 13:17, Chris Wilson  wrote:
>
> Allow a bit of leniency for the CPU scheduler to be distracted while we
> flush the tasklet and so ensure that we always check the status of the
> request once more before timing out.
>
> v2: Wait until the HW acked the submit, and we do any secondary actions
> for the submit (e.g. timeslices)
>
> Signed-off-by: Chris Wilson 
> Cc: Matthew Auld 

Rejecting mails again.
Reviewed-by: Matthew Auld 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/selftests: Check timeout before flush and cond checks

2020-03-30 Thread Chris Wilson
Quoting Matthew Auld (2020-03-30 14:48:52)
> On Mon, 30 Mar 2020 at 13:17, Chris Wilson  wrote:
> >
> > Allow a bit of leniency for the CPU scheduler to be distracted while we
> > flush the tasklet and so ensure that we always check the status of the
> > request once more before timing out.
> >
> > v2: Wait until the HW acked the submit, and we do any secondary actions
> > for the submit (e.g. timeslices)
> >
> > Signed-off-by: Chris Wilson 
> > Cc: Matthew Auld 
> 
> Rejecting mails again.
> Reviewed-by: Matthew Auld 

But now I've told the list not to remove me from the CC, so at least I'm
now getting the ml copy :(
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] rcu_barrier() no longer allowed within mmap_sem?

2020-03-30 Thread Peter Zijlstra
On Mon, Mar 30, 2020 at 03:00:35PM +0200, Daniel Vetter wrote:
> Hi all, for all = rcu, cpuhotplug and perf maintainers
> 
> We've hit an interesting new lockdep splat in our drm/i915 CI:
> 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17096/shard-tglb7/igt@kms_frontbuffer_track...@fbcpsr-rgb101010-draw-mmap-gtt.html#dmesg-warnings861
> 
> Summarizing away the driver parts we have
> 
> < gpu locks which are held within mm->mmap_sem in various gpu fault handlers >
> 
> -> #4 (&mm->mmap_sem#2){}:
> <4> [604.892615] __might_fault+0x63/0x90
> <4> [604.892617] _copy_to_user+0x1e/0x80
> <4> [604.892619] perf_read+0x200/0x2b0
> <4> [604.892621] vfs_read+0x96/0x160
> <4> [604.892622] ksys_read+0x9f/0xe0
> <4> [604.892623] do_syscall_64+0x4f/0x220
> <4> [604.892624] entry_SYSCALL_64_after_hwframe+0x49/0xbe
> <4> [604.892625]
> -> #3 (&cpuctx_mutex){+.+.}:
> <4> [604.892626] __mutex_lock+0x9a/0x9c0
> <4> [604.892627] perf_event_init_cpu+0xa4/0x140
> <4> [604.892629] perf_event_init+0x19d/0x1cd
> <4> [604.892630] start_kernel+0x362/0x4e4
> <4> [604.892631] secondary_startup_64+0xa4/0xb0
> <4> [604.892631]
> -> #2 (pmus_lock){+.+.}:
> <4> [604.892633] __mutex_lock+0x9a/0x9c0
> <4> [604.892633] perf_event_init_cpu+0x6b/0x140
> <4> [604.892635] cpuhp_invoke_callback+0x9b/0x9d0
> <4> [604.892636] _cpu_up+0xa2/0x140
> <4> [604.892637] do_cpu_up+0x61/0xa0
> <4> [604.892639] smp_init+0x57/0x96
> <4> [604.892639] kernel_init_freeable+0x87/0x1dc
> <4> [604.892640] kernel_init+0x5/0x100
> <4> [604.892642] ret_from_fork+0x24/0x50
> <4> [604.892642]
> -> #1 (cpu_hotplug_lock.rw_sem){}:
> <4> [604.892643] cpus_read_lock+0x34/0xd0
> <4> [604.892644] rcu_barrier+0xaa/0x190
> <4> [604.892645] kernel_init+0x21/0x100
> <4> [604.892647] ret_from_fork+0x24/0x50
> <4> [604.892647]
> -> #0 (rcu_state.barrier_mutex){+.+.}:


> The last backtrace boils down to i915 driver code which holds the same
> locks we are holding within mm->mmap_sem, and then ends up calling
> rcu_barrier(). From what I can see i915 is just the messenger here,
> any driver with this pattern of a lock held within mmap_sem which also
> has a path of calling rcu_barrier while holding that lock should be
> hitting this splat.
> 
> Two questions:
> - This suggests that calling rcu_barrier() isn't ok anymore while
> holding mmap_sem, or anything that has a dependency upon mmap_sem. I
> guess that's not the idea, please confirm.
> - Assuming this depedency is indeed not intended, where should the
> loop be broken? It goes through perf, cpuhotplug and rcu subsystems,
> and I don't have a clue about any of those.

I wonder what is new here; the 1-4 chain there has been true for a long
time, see also the comment at perf_event_ctx_lock_nested().

That said; it _might_ be possible to break 3->4, that is, all the
copy_{to,from}_user() usage in perf can be lifted out from under the
various locks by re-arranging code, but I have a nagging feeling there
was more to it than that. Of course, while I did document the locking
rules, I seem to have forgotten to comment on exactly why these rules
are as they are.. oh well.


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/huc: Add more errors for I915_PARAM_HUC_STATUS

2020-03-30 Thread Michal Wajdeczko



On 30.03.2020 14:28, Chris Wilson wrote:
> Quoting Michal Wajdeczko (2020-03-30 12:33:02)
>> There might be many reasons why we failed to successfully
>> load and authenticate HuC firmware, but today we only use
>> single error in case of no HuC hardware. Add some more
>> error codes for most common cases (disabled, not installed,
>> corrupted or mismatched firmware).
>>
>> Signed-off-by: Michal Wajdeczko 
>> Cc: Joonas Lahtinen 
>> Cc: Chris Wilson 
>> Cc: Daniele Ceraolo Spurio 
>> Cc: Tony Ye 
>> Cc: Robert M. Fosha 
>> ---
>>  drivers/gpu/drm/i915/gt/uc/intel_huc.c | 22 ++
>>  1 file changed, 18 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c 
>> b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
>> index d6097b46600c..1e8073ec343f 100644
>> --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
>> @@ -200,9 +200,13 @@ int intel_huc_auth(struct intel_huc *huc)
>>   * This function reads status register to verify if HuC
>>   * firmware was successfully loaded.
>>   *
>> - * Returns: 1 if HuC firmware is loaded and verified,
>> - * 0 if HuC firmware is not loaded and -ENODEV if HuC
>> - * is not present on this platform.
>> + * Returns:
>> + *  * -ENODEV if HuC is not present on this platform,
>> + *  * -EOPNOTSUPP if HuC firmware is disabled,
>> + *  * -ENOPKG if HuC firmware was not installed,
>> + *  * -ENOEXEC if HuC firmware is invalid or mismatched,
>> + *  * 0 if HuC firmware is not running,
>> + *  * 1 if HuC firmware is authenticated and running.
>>   */
>>  int intel_huc_check_status(struct intel_huc *huc)
>>  {
>> @@ -210,8 +214,18 @@ int intel_huc_check_status(struct intel_huc *huc)
>> intel_wakeref_t wakeref;
>> u32 status = 0;
>>  
>> -   if (!intel_huc_is_supported(huc))
>> +   switch (__intel_uc_fw_status(&huc->fw)) {
>> +   case INTEL_UC_FIRMWARE_NOT_SUPPORTED:
>> return -ENODEV;
> 
> No HW support.
> 
>> +   case INTEL_UC_FIRMWARE_DISABLED:
>> +   return -EOPNOTSUPP;
> 
> Override by user [sysadmin]
> 
>> +   case INTEL_UC_FIRMWARE_MISSING:
>> +   return -ENOPKG;
> 
> FILENOTFOUND.
> 
>> +   case INTEL_UC_FIRMWARE_ERROR:
>> +   return -ENOEXEC;
> 
> File corruption.
> 
> There's nothing else between us loading the fw and the huc rejecting
> it?
> 
> FIRMWARE_FAIL? That's set as the opposite of FIRMWARE_TRANSFERRED in
> that we failed to upload the image to the HW. The firmware itself hasn't
> had a chance to run.
> 
> case INTEL_UC_FIRMWARE_FAIL:
>   return -ENXIO;
> 
> Or is that being overridden to FIRMWARE_ERROR?

No, it's not overridden by FIRMWARE_ERROR (since we use FIRMWARE_ERROR
as final state, while with FIRMWARE_FAIL there is a chance for recovery
during reset)

Also note that FIRMWARE_FAIL case is covered by the register check that
we have below, which provides HuC runtime status.

And if we decide to use FIRMWARE_FAIL to report -ENXIO, then it is
unlikely that we will ever report 0 again for any other fw error that
could prevent fw from successful load (now recall your and Joonas
position that this param shall stay as reflection of register read).

Michal

ps. on other hand, if we trust our uc_fw_status() then we can drop that
register read, finally decouple GET_PARAM from MMIO_READ and fully rely
on cached status:

case INTEL_UC_FIRMWARE_RUNNING:
return 1;
default:
return 0;

see [1] for my earlier attempt, before uc_fw.status was added

[1] https://patchwork.freedesktop.org/patch/306179/?series=60928&rev=1

> 
> Other than the question of whether there's one more step before the fw
> is being run [and then able to set HUC_STATUS as it determines for
> itself],
> 
> Reviewed-by: Chris Wilson 
> -Chris
> 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 06/22] drm/i915: Use per object locking in execbuf, v7.

2020-03-30 Thread Maarten Lankhorst
Now that we changed execbuf submission slightly to allow us to do all
pinning in one place, we can now simply add ww versions on top of
struct_mutex. All we have to do is a separate path for -EDEADLK
handling, which needs to unpin all gem bo's before dropping the lock,
then starting over.

This finally allows us to do parallel submission, but because not
all of the pinning code uses the ww ctx yet, we cannot completely
drop struct_mutex yet.

Changes since v1:
- Keep struct_mutex for now. :(
Changes since v2:
- Make sure we always lock the ww context in slowpath.
Changes since v3:
- Don't call __eb_unreserve_vma in eb_move_to_gpu now; this can be
  done on normal unlock path.
- Unconditionally release vmas and context.
Changes since v4:
- Rebased on top of struct_mutex reduction.
Changes since v5:
- Remove training wheels.
Changes since v6:
- Fix accidentally broken -ENOSPC handling.

Signed-off-by: Maarten Lankhorst 
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 273 ++
 1 file changed, 148 insertions(+), 125 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 55b06d7a1329..a337f3054ce3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -249,6 +249,8 @@ struct i915_execbuffer {
/** list of vma that have execobj.relocation_count */
struct list_head relocs;
 
+   struct i915_gem_ww_ctx ww;
+
/**
 * Track the most recently used object for relocations, as we
 * frequently have to perform multiple relocations within the same
@@ -404,24 +406,18 @@ eb_pin_vma(struct i915_execbuffer *eb,
return !eb_vma_misplaced(entry, vma, ev->flags);
 }
 
-static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
-{
-   GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
-
-   if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
-   __i915_vma_unpin_fence(vma);
-
-   __i915_vma_unpin(vma);
-}
-
 static inline void
 eb_unreserve_vma(struct eb_vma *ev)
 {
if (!(ev->flags & __EXEC_OBJECT_HAS_PIN))
return;
 
-   __eb_unreserve_vma(ev->vma, ev->flags);
ev->flags &= ~__EXEC_OBJECT_RESERVED;
+
+   if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
+   __i915_vma_unpin_fence(ev->vma);
+
+   __i915_vma_unpin(ev->vma);
 }
 
 static int
@@ -515,16 +511,6 @@ eb_add_vma(struct i915_execbuffer *eb,
 
eb->batch = ev;
}
-
-   if (eb_pin_vma(eb, entry, ev)) {
-   if (entry->offset != vma->node.start) {
-   entry->offset = vma->node.start | UPDATE;
-   eb->args->flags |= __EXEC_HAS_RELOC;
-   }
-   } else {
-   eb_unreserve_vma(ev);
-   list_add_tail(&ev->bind_link, &eb->unbound);
-   }
 }
 
 static inline int use_cpu_reloc(const struct reloc_cache *cache,
@@ -628,10 +614,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
 * This avoid unnecessary unbinding of later objects in order to make
 * room for the earlier objects *unless* we need to defragment.
 */
-
-   if (mutex_lock_interruptible(&eb->i915->drm.struct_mutex))
-   return -EINTR;
-
pass = 0;
do {
list_for_each_entry(ev, &eb->unbound, bind_link) {
@@ -639,8 +621,8 @@ static int eb_reserve(struct i915_execbuffer *eb)
if (err)
break;
}
-   if (!(err == -ENOSPC || err == -EAGAIN))
-   break;
+   if (err != -ENOSPC)
+   return err;
 
/* Resort *all* the objects into priority order */
INIT_LIST_HEAD(&eb->unbound);
@@ -670,13 +652,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
}
list_splice_tail(&last, &eb->unbound);
 
-   if (err == -EAGAIN) {
-   mutex_unlock(&eb->i915->drm.struct_mutex);
-   flush_workqueue(eb->i915->mm.userptr_wq);
-   mutex_lock(&eb->i915->drm.struct_mutex);
-   continue;
-   }
-
switch (pass++) {
case 0:
break;
@@ -687,20 +662,15 @@ static int eb_reserve(struct i915_execbuffer *eb)
err = i915_gem_evict_vm(eb->context->vm);
mutex_unlock(&eb->context->vm->mutex);
if (err)
-   goto unlock;
+   return err;
break;
 
default:
-   err = -ENOSPC;
-   goto unlock;
+   return -ENOSPC;
}
 
pin_flags = PIN_USER;
} while (1);
-
-unlock:
-   mutex_unlock(&eb-

[Intel-gfx] [PATCH 05/22] drm/i915: Parse command buffer earlier in eb_relocate(slow)

2020-03-30 Thread Maarten Lankhorst
We want to introduce backoff logic, but we need to lock the
pool object as well for command parsing. Because of this, we
will need backoff logic for the engine pool obj, move the batch
validation up slightly to eb_lookup_vmas, and the actual command
parsing in a separate function which can get called from execbuf
relocation fast and slowpath.

Signed-off-by: Maarten Lankhorst 
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 68 ++-
 1 file changed, 37 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index cc2be6964037..55b06d7a1329 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -285,6 +285,8 @@ struct i915_execbuffer {
struct hlist_head *buckets; /** ht for relocation handles */
 };
 
+static int eb_parse(struct i915_execbuffer *eb);
+
 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
 {
return intel_engine_requires_cmd_parser(eb->engine) ||
@@ -814,6 +816,7 @@ static struct i915_vma *eb_lookup_vma(struct 
i915_execbuffer *eb, u32 handle)
 
 static int eb_lookup_vmas(struct i915_execbuffer *eb)
 {
+   struct drm_i915_private *i915 = eb->i915;
unsigned int batch = eb_batch_index(eb);
unsigned int i;
int err = 0;
@@ -827,18 +830,37 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
vma = eb_lookup_vma(eb, eb->exec[i].handle);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
-   break;
+   goto err;
}
 
err = eb_validate_vma(eb, &eb->exec[i], vma);
if (unlikely(err)) {
i915_vma_put(vma);
-   break;
+   goto err;
}
 
eb_add_vma(eb, i, batch, vma);
}
 
+   if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) {
+   drm_dbg(&i915->drm,
+   "Attempting to use self-modifying batch buffer\n");
+   return -EINVAL;
+   }
+
+   if (range_overflows_t(u64,
+ eb->batch_start_offset, eb->batch_len,
+ eb->batch->vma->size)) {
+   drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
+   return -EINVAL;
+   }
+
+   if (eb->batch_len == 0)
+   eb->batch_len = eb->batch->vma->size - eb->batch_start_offset;
+
+   return 0;
+
+err:
eb->vma[i].vma = NULL;
return err;
 }
@@ -1688,7 +1710,7 @@ static int eb_prefault_relocations(const struct 
i915_execbuffer *eb)
return 0;
 }
 
-static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
+static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
 {
bool have_copy = false;
struct eb_vma *ev;
@@ -1739,6 +1761,11 @@ static noinline int eb_relocate_slow(struct 
i915_execbuffer *eb)
}
}
 
+   /* as last step, parse the command buffer */
+   err = eb_parse(eb);
+   if (err)
+   goto err;
+
/*
 * Leave the user relocations as are, this is the painfully slow path,
 * and we want to avoid the complication of dropping the lock whilst
@@ -1771,7 +1798,7 @@ static noinline int eb_relocate_slow(struct 
i915_execbuffer *eb)
return err;
 }
 
-static int eb_relocate(struct i915_execbuffer *eb)
+static int eb_relocate_parse(struct i915_execbuffer *eb)
 {
int err;
 
@@ -1791,11 +1818,11 @@ static int eb_relocate(struct i915_execbuffer *eb)
 
list_for_each_entry(ev, &eb->relocs, reloc_link) {
if (eb_relocate_vma(eb, ev))
-   return eb_relocate_slow(eb);
+   return eb_relocate_parse_slow(eb);
}
}
 
-   return 0;
+   return eb_parse(eb);
 }
 
 static int eb_move_to_gpu(struct i915_execbuffer *eb)
@@ -2731,7 +2758,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (unlikely(err))
goto err_context;
 
-   err = eb_relocate(&eb);
+   err = eb_relocate_parse(&eb);
if (err) {
/*
 * If the user expects the execobject.offset and
@@ -2744,33 +2771,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_vma;
}
 
-   if (unlikely(eb.batch->flags & EXEC_OBJECT_WRITE)) {
-   drm_dbg(&i915->drm,
-   "Attempting to use self-modifying batch buffer\n");
-   err = -EINVAL;
-   goto err_vma;
-   }
-
-   if (range_overflows_t(u64,
- eb.batch_start_offset, eb.batch_len,
- eb.batch->vma->size)) {
-   drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
-   

[Intel-gfx] [PATCH 01/22] Revert "drm/i915/gem: Drop relocation slowpath"

2020-03-30 Thread Maarten Lankhorst
This reverts commit 7dc8f1143778 ("drm/i915/gem: Drop relocation
slowpath"). We need the slowpath relocation for taking ww-mutex
inside the page fault handler, and we will take this mutex when
pinning all objects.

Cc: Chris Wilson 
Cc: Matthew Auld 
Signed-off-by: Maarten Lankhorst 
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 239 +-
 1 file changed, 235 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index f347e595a773..347c929b508d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1480,7 +1480,9 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, 
struct eb_vma *ev)
 * we would try to acquire the struct mutex again. Obviously
 * this is bad and so lockdep complains vehemently.
 */
-   copied = __copy_from_user(r, urelocs, count * sizeof(r[0]));
+   pagefault_disable();
+   copied = __copy_from_user_inatomic(r, urelocs, count * 
sizeof(r[0]));
+   pagefault_enable();
if (unlikely(copied)) {
remain = -EFAULT;
goto out;
@@ -1530,6 +1532,236 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, 
struct eb_vma *ev)
return remain;
 }
 
+static int
+eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev)
+{
+   const struct drm_i915_gem_exec_object2 *entry = ev->exec;
+   struct drm_i915_gem_relocation_entry *relocs =
+   u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
+   unsigned int i;
+   int err;
+
+   for (i = 0; i < entry->relocation_count; i++) {
+   u64 offset = eb_relocate_entry(eb, ev, &relocs[i]);
+
+   if ((s64)offset < 0) {
+   err = (int)offset;
+   goto err;
+   }
+   }
+   err = 0;
+err:
+   reloc_cache_reset(&eb->reloc_cache);
+   return err;
+}
+
+static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
+{
+   const char __user *addr, *end;
+   unsigned long size;
+   char __maybe_unused c;
+
+   size = entry->relocation_count;
+   if (size == 0)
+   return 0;
+
+   if (size > N_RELOC(ULONG_MAX))
+   return -EINVAL;
+
+   addr = u64_to_user_ptr(entry->relocs_ptr);
+   size *= sizeof(struct drm_i915_gem_relocation_entry);
+   if (!access_ok(addr, size))
+   return -EFAULT;
+
+   end = addr + size;
+   for (; addr < end; addr += PAGE_SIZE) {
+   int err = __get_user(c, addr);
+   if (err)
+   return err;
+   }
+   return __get_user(c, end - 1);
+}
+
+static int eb_copy_relocations(const struct i915_execbuffer *eb)
+{
+   struct drm_i915_gem_relocation_entry *relocs;
+   const unsigned int count = eb->buffer_count;
+   unsigned int i;
+   int err;
+
+   for (i = 0; i < count; i++) {
+   const unsigned int nreloc = eb->exec[i].relocation_count;
+   struct drm_i915_gem_relocation_entry __user *urelocs;
+   unsigned long size;
+   unsigned long copied;
+
+   if (nreloc == 0)
+   continue;
+
+   err = check_relocations(&eb->exec[i]);
+   if (err)
+   goto err;
+
+   urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
+   size = nreloc * sizeof(*relocs);
+
+   relocs = kvmalloc_array(size, 1, GFP_KERNEL);
+   if (!relocs) {
+   err = -ENOMEM;
+   goto err;
+   }
+
+   /* copy_from_user is limited to < 4GiB */
+   copied = 0;
+   do {
+   unsigned int len =
+   min_t(u64, BIT_ULL(31), size - copied);
+
+   if (__copy_from_user((char *)relocs + copied,
+(char __user *)urelocs + copied,
+len))
+   goto end;
+
+   copied += len;
+   } while (copied < size);
+
+   /*
+* As we do not update the known relocation offsets after
+* relocating (due to the complexities in lock handling),
+* we need to mark them as invalid now so that we force the
+* relocation processing next time. Just in case the target
+* object is evicted and then rebound into its old
+* presumed_offset before the next execbuffer - if that
+* happened we would make the mistake of assuming that the
+* relocations were valid.
+*/
+   if (!user_access_begin

[Intel-gfx] [PATCH 12/22] drm/i915: Make sure execbuffer always passes ww state to i915_vma_pin.

2020-03-30 Thread Maarten Lankhorst
As a preparation step for full object locking and wait/wound handling
during pin and object mapping, ensure that we always pass the ww context
in i915_gem_execbuffer.c to i915_vma_pin, use lockdep to ensure this
happens.

This also requires changing the order of eb_parse slightly, to ensure
we pass ww at a point where we could still handle -EDEADLK safely.

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/display/intel_display.c  |   2 +-
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |   4 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 125 ++
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  |   4 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.h  |   4 +-
 drivers/gpu/drm/i915/gt/intel_context.c   |  65 +
 drivers/gpu/drm/i915/gt/intel_context.h   |  13 ++
 drivers/gpu/drm/i915/gt/intel_context_types.h |   3 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c |   2 +-
 drivers/gpu/drm/i915/gt/intel_gt.c|   2 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c   |   5 +-
 drivers/gpu/drm/i915/gt/intel_renderstate.c   |   2 +-
 drivers/gpu/drm/i915/gt/intel_ring.c  |  10 +-
 drivers/gpu/drm/i915/gt/intel_ring.h  |   3 +-
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  15 +--
 drivers/gpu/drm/i915/gt/intel_timeline.c  |  12 +-
 drivers/gpu/drm/i915/gt/intel_timeline.h  |   3 +-
 drivers/gpu/drm/i915/gt/mock_engine.c |   3 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |   4 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc.c|   2 +-
 drivers/gpu/drm/i915/i915_drv.h   |  13 +-
 drivers/gpu/drm/i915/i915_gem.c   |  11 +-
 drivers/gpu/drm/i915/i915_vma.c   |  13 +-
 drivers/gpu/drm/i915/i915_vma.h   |  13 +-
 24 files changed, 207 insertions(+), 126 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index 2e2e5ce82dc2..a429e90956f5 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -3441,7 +3441,7 @@ initial_plane_vma(struct drm_i915_private *i915,
if (IS_ERR(vma))
goto err_obj;
 
-   if (i915_ggtt_pin(vma, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base))
+   if (i915_ggtt_pin(vma, NULL, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base))
goto err_obj;
 
if (i915_gem_object_is_tiled(obj) &&
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 062848951095..f5b01e70eb61 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1145,7 +1145,7 @@ static int context_barrier_task(struct i915_gem_context 
*ctx,
 
i915_gem_ww_ctx_init(&ww, true);
 retry:
-   err = intel_context_pin(ce);
+   err = intel_context_pin_ww(ce, &ww);
if (err)
goto err;
 
@@ -1238,7 +1238,7 @@ static int pin_ppgtt_update(struct intel_context *ce, 
struct i915_gem_ww_ctx *ww
 
if (!HAS_LOGICAL_RING_CONTEXTS(vm->i915))
/* ppGTT is not part of the legacy context image */
-   return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm));
+   return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm), ww);
 
return 0;
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 05f6e1a94977..0a2121429913 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -394,7 +394,7 @@ eb_pin_vma(struct i915_execbuffer *eb,
if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT))
pin_flags |= PIN_GLOBAL;
 
-   if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags)))
+   if (unlikely(i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags)))
return false;
 
if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
@@ -535,7 +535,7 @@ static inline int use_cpu_reloc(const struct reloc_cache 
*cache,
obj->cache_level != I915_CACHE_NONE);
 }
 
-static int eb_reserve_vma(const struct i915_execbuffer *eb,
+static int eb_reserve_vma(struct i915_execbuffer *eb,
  struct eb_vma *ev,
  u64 pin_flags)
 {
@@ -569,7 +569,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
return err;
}
 
-   err = i915_vma_pin(vma,
+   err = i915_vma_pin_ww(vma, &eb->ww,
   entry->pad_to_size, entry->alignment,
   pin_flags);
if (err)
@@ -1060,9 +1060,10 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
 }
 
 static void *reloc_iomap(struct drm_i915_gem_object *obj,
-struct reloc_cache *cache,
+struct i915_execbuffer *eb,
 unsigned long page)
 {
+   struct reloc_cache *cache = &eb-

[Intel-gfx] [PATCH 03/22] drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2.

2020-03-30 Thread Maarten Lankhorst
i915_gem_ww_ctx is used to lock all gem bo's for pinning and memory
eviction. We don't use it yet, but lets start adding the definition
first.

To use it, we have to pass a non-NULL ww to gem_object_lock, and don't
unlock directly. It is done in i915_gem_ww_ctx_fini.

Changes since v1:
- Change ww_ctx and obj order in locking functions (Jonas Lahtinen)

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/display/intel_display.c  |  4 +-
 .../gpu/drm/i915/gem/i915_gem_client_blt.c|  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 10 ++--
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h| 38 +++---
 .../gpu/drm/i915/gem/i915_gem_object_blt.c|  2 +-
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  9 
 drivers/gpu/drm/i915/gem/i915_gem_pm.c|  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_tiling.c|  2 +-
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../i915/gem/selftests/i915_gem_client_blt.c  |  2 +-
 .../i915/gem/selftests/i915_gem_coherency.c   | 10 ++--
 .../drm/i915/gem/selftests/i915_gem_context.c |  4 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  4 +-
 .../drm/i915/gem/selftests/i915_gem_phys.c|  2 +-
 drivers/gpu/drm/i915/gt/intel_gt.c|  2 +-
 .../gpu/drm/i915/gt/selftest_workarounds.c|  2 +-
 drivers/gpu/drm/i915/gvt/cmd_parser.c |  2 +-
 drivers/gpu/drm/i915/i915_gem.c   | 52 +--
 drivers/gpu/drm/i915/i915_gem.h   | 11 
 drivers/gpu/drm/i915/selftests/i915_gem.c | 41 +++
 drivers/gpu/drm/i915/selftests/i915_vma.c |  2 +-
 .../drm/i915/selftests/intel_memory_region.c  |  2 +-
 26 files changed, 175 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index e09a11b1e509..2e2e5ce82dc2 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -2303,7 +2303,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 
 void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
 {
-   i915_gem_object_lock(vma->obj);
+   i915_gem_object_lock(vma->obj, NULL);
if (flags & PLANE_HAS_FENCE)
i915_vma_unpin_fence(vma);
i915_gem_object_unpin_from_display_plane(vma);
@@ -17047,7 +17047,7 @@ static int intel_framebuffer_init(struct 
intel_framebuffer *intel_fb,
if (!intel_fb->frontbuffer)
return -ENOMEM;
 
-   i915_gem_object_lock(obj);
+   i915_gem_object_lock(obj, NULL);
tiling = i915_gem_object_get_tiling(obj);
stride = i915_gem_object_get_stride(obj);
i915_gem_object_unlock(obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c 
b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index 0598e5382a1d..5d94a77f9bdd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -287,7 +287,7 @@ int i915_gem_schedule_fill_pages_blt(struct 
drm_i915_gem_object *obj,
dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0);
i915_sw_fence_init(&work->wait, clear_pages_work_notify);
 
-   i915_gem_object_lock(obj);
+   i915_gem_object_lock(obj, NULL);
err = i915_sw_fence_await_reservation(&work->wait,
  obj->base.resv, NULL,
  true, I915_FENCE_TIMEOUT,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 50e7580f9337..ac2b88ca00ce 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -113,7 +113,7 @@ static void lut_close(struct i915_gem_context *ctx)
continue;
 
rcu_read_unlock();
-   i915_gem_object_lock(obj);
+   i915_gem_object_lock(obj, NULL);
list_for_each_entry(lut, &obj->lut_list, obj_link) {
if (lut->ctx != ctx)
continue;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 7db5a793739d..cfadccfc2990 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -128,7 +128,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf 
*dma_buf, enum dma_data_dire
if (err)
return err;
 
-   err = i915_gem_object_lock_interruptible(obj);
+   err = i915_gem_object_lock_interruptible(obj, NULL);
if (err)
goto out;
 
@@ -149,7 +149,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, 
enum dma_data_direct
if (err)
 

[Intel-gfx] [PATCH 16/22] drm/i915: Dirty hack to fix selftests locking inversion

2020-03-30 Thread Maarten Lankhorst
Some i915 selftests still use i915_vma_lock() as inner lock, and
intel_context_create_request() intel_timeline->mutex as outer lock.
Fortunately for selftests this is not an issue, they should be fixed
but we can move ahead and cleanify lockdep now.

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gt/intel_context.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 113d0bda1bcf..5c7acddf9651 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -460,6 +460,18 @@ struct i915_request *intel_context_create_request(struct 
intel_context *ce)
rq = i915_request_create(ce);
intel_context_unpin(ce);
 
+   if (IS_ERR(rq))
+   return rq;
+
+   /*
+* timeline->mutex should be the inner lock, but is used as outer lock.
+* Hack around this to shut up lockdep in selftests..
+*/
+   lockdep_unpin_lock(&ce->timeline->mutex, rq->cookie);
+   mutex_release(&ce->timeline->mutex.dep_map, _RET_IP_);
+   mutex_acquire(&ce->timeline->mutex.dep_map, SINGLE_DEPTH_NESTING, 0, 
_RET_IP_);
+   rq->cookie = lockdep_pin_lock(&ce->timeline->mutex);
+
return rq;
 }
 
-- 
2.25.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 08/22] drm/i915: Add ww context handling to context_barrier_task

2020-03-30 Thread Maarten Lankhorst
This is required if we want to pass a ww context in intel_context_pin
and gen6_ppgtt_pin().

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 55 ++-
 .../drm/i915/gem/selftests/i915_gem_context.c | 22 +++-
 2 files changed, 48 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index ac2b88ca00ce..062848951095 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1097,6 +1097,7 @@ I915_SELFTEST_DECLARE(static intel_engine_mask_t 
context_barrier_inject_fault);
 static int context_barrier_task(struct i915_gem_context *ctx,
intel_engine_mask_t engines,
bool (*skip)(struct intel_context *ce, void 
*data),
+   int (*pin)(struct intel_context *ce, struct 
i915_gem_ww_ctx *ww, void *data),
int (*emit)(struct i915_request *rq, void 
*data),
void (*task)(void *data),
void *data)
@@ -1104,6 +1105,7 @@ static int context_barrier_task(struct i915_gem_context 
*ctx,
struct context_barrier_task *cb;
struct i915_gem_engines_iter it;
struct i915_gem_engines *e;
+   struct i915_gem_ww_ctx ww;
struct intel_context *ce;
int err = 0;
 
@@ -1141,10 +1143,21 @@ static int context_barrier_task(struct i915_gem_context 
*ctx,
if (skip && skip(ce, data))
continue;
 
-   rq = intel_context_create_request(ce);
+   i915_gem_ww_ctx_init(&ww, true);
+retry:
+   err = intel_context_pin(ce);
+   if (err)
+   goto err;
+
+   if (pin)
+   err = pin(ce, &ww, data);
+   if (err)
+   goto err_unpin;
+
+   rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
-   break;
+   goto err_unpin;
}
 
err = 0;
@@ -1154,6 +1167,16 @@ static int context_barrier_task(struct i915_gem_context 
*ctx,
err = i915_active_add_request(&cb->base, rq);
 
i915_request_add(rq);
+err_unpin:
+   intel_context_unpin(ce);
+err:
+   if (err == -EDEADLK) {
+   err = i915_gem_ww_ctx_backoff(&ww);
+   if (!err)
+   goto retry;
+   }
+   i915_gem_ww_ctx_fini(&ww);
+
if (err)
break;
}
@@ -1209,6 +1232,17 @@ static void set_ppgtt_barrier(void *data)
i915_vm_close(old);
 }
 
+static int pin_ppgtt_update(struct intel_context *ce, struct i915_gem_ww_ctx 
*ww, void *data)
+{
+   struct i915_address_space *vm = ce->vm;
+
+   if (!HAS_LOGICAL_RING_CONTEXTS(vm->i915))
+   /* ppGTT is not part of the legacy context image */
+   return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm));
+
+   return 0;
+}
+
 static int emit_ppgtt_update(struct i915_request *rq, void *data)
 {
struct i915_address_space *vm = rq->context->vm;
@@ -1265,20 +1299,10 @@ static int emit_ppgtt_update(struct i915_request *rq, 
void *data)
 
 static bool skip_ppgtt_update(struct intel_context *ce, void *data)
 {
-   if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
-   return true;
-
if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915))
-   return false;
-
-   if (!atomic_read(&ce->pin_count))
-   return true;
-
-   /* ppGTT is not part of the legacy context image */
-   if (gen6_ppgtt_pin(i915_vm_to_ppgtt(ce->vm)))
-   return true;
-
-   return false;
+   return !ce->state;
+   else
+   return !atomic_read(&ce->pin_count);
 }
 
 static int set_ppgtt(struct drm_i915_file_private *file_priv,
@@ -1329,6 +1353,7 @@ static int set_ppgtt(struct drm_i915_file_private 
*file_priv,
 */
err = context_barrier_task(ctx, ALL_ENGINES,
   skip_ppgtt_update,
+  pin_ppgtt_update,
   emit_ppgtt_update,
   set_ppgtt_barrier,
   old);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 42edbd0f3c14..78356031ec61 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -1903,8 +1903,8 @@ static int mock_context_barrier(void *arg)
return -ENOMEM;
 
counter = 0;
-   err = context_barrier_task(ctx, 0,
-

[Intel-gfx] [PATCH 22/22] drm/i915: Ensure we hold the pin mutex

2020-03-30 Thread Maarten Lankhorst
Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gt/intel_renderstate.c | 2 +-
 drivers/gpu/drm/i915/i915_vma.c | 9 -
 drivers/gpu/drm/i915/i915_vma.h | 1 +
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c 
b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index c39d73142950..df42ba06711a 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -207,7 +207,7 @@ int intel_renderstate_init(struct intel_renderstate *so,
if (err)
goto err_context;
 
-   err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+   err = i915_vma_pin_ww(so->vma, &so->ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
if (err)
goto err_context;
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index e3d82be503dc..e22f287ba382 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -892,6 +892,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct 
i915_gem_ww_ctx *ww,
 #ifdef CONFIG_PROVE_LOCKING
if (debug_locks && lockdep_is_held(&vma->vm->i915->drm.struct_mutex))
WARN_ON(!ww);
+   if (debug_locks && ww && vma->resv)
+   assert_vma_held(vma);
 #endif
 
BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
@@ -1032,8 +1034,13 @@ int i915_ggtt_pin(struct i915_vma *vma, struct 
i915_gem_ww_ctx *ww,
 
GEM_BUG_ON(!i915_vma_is_ggtt(vma));
 
+   WARN_ON(!ww && vma->resv && dma_resv_held(vma->resv));
+
do {
-   err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL);
+   if (ww)
+   err = i915_vma_pin_ww(vma, ww, 0, align, flags | 
PIN_GLOBAL);
+   else
+   err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
if (err != -ENOSPC) {
if (!err) {
err = i915_vma_wait_for_bind(vma);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index da577729931f..b730f86e54f4 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -242,6 +242,7 @@ i915_vma_pin_ww(struct i915_vma *vma, struct 
i915_gem_ww_ctx *ww,
 static inline int __must_check
 i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
+   WARN_ON_ONCE(vma->resv && dma_resv_held(vma->resv));
return i915_vma_pin_ww(vma, NULL, size, alignment, flags);
 }
 
-- 
2.25.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 19/22] drm/i915: Move i915_vma_lock in the selftests to avoid lock inversion, v2.

2020-03-30 Thread Maarten Lankhorst
Make sure vma_lock is not used as inner lock when kernel context is used,
and add ww handling where appropriate.

Signed-off-by: Maarten Lankhorst 
---
 .../i915/gem/selftests/i915_gem_coherency.c   | 26 ++--
 .../drm/i915/gem/selftests/i915_gem_mman.c| 41 ++-
 drivers/gpu/drm/i915/selftests/i915_request.c | 18 +---
 3 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 99f8466a108a..d93b7d9ad174 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -199,25 +199,25 @@ static int gpu_set(struct context *ctx, unsigned long 
offset, u32 v)
 
i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
-   i915_gem_object_unlock(ctx->obj);
if (err)
-   return err;
+   goto out_unlock;
 
vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0);
-   if (IS_ERR(vma))
-   return PTR_ERR(vma);
+   if (IS_ERR(vma)) {
+   err = PTR_ERR(vma);
+   goto out_unlock;
+   }
 
rq = intel_engine_create_kernel_request(ctx->engine);
if (IS_ERR(rq)) {
-   i915_vma_unpin(vma);
-   return PTR_ERR(rq);
+   err = PTR_ERR(rq);
+   goto out_unpin;
}
 
cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs)) {
-   i915_request_add(rq);
-   i915_vma_unpin(vma);
-   return PTR_ERR(cs);
+   err = PTR_ERR(cs);
+   goto out_rq;
}
 
if (INTEL_GEN(ctx->engine->i915) >= 8) {
@@ -238,14 +238,16 @@ static int gpu_set(struct context *ctx, unsigned long 
offset, u32 v)
}
intel_ring_advance(rq, cs);
 
-   i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, true);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-   i915_vma_unlock(vma);
-   i915_vma_unpin(vma);
 
+out_rq:
i915_request_add(rq);
+out_unpin:
+   i915_vma_unpin(vma);
+out_unlock:
+   i915_gem_object_unlock(ctx->obj);
 
return err;
 }
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index a67d9e59fe12..d4aaf603a78f 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -528,31 +528,42 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
for_each_uabi_engine(engine, i915) {
struct i915_request *rq;
struct i915_vma *vma;
+   struct i915_gem_ww_ctx ww;
int err;
 
vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
if (IS_ERR(vma))
return PTR_ERR(vma);
 
-   err = i915_vma_pin(vma, 0, 0, PIN_USER);
+   i915_gem_ww_ctx_init(&ww, false);
+retry:
+   err = i915_gem_object_lock(obj, &ww);
+   if (!err)
+   err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
if (err)
-   return err;
+   goto err;
 
rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq)) {
-   i915_vma_unpin(vma);
-   return PTR_ERR(rq);
+   err = PTR_ERR(rq);
+   goto err_unpin;
}
 
-   i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, true);
if (err == 0)
err = i915_vma_move_to_active(vma, rq,
  EXEC_OBJECT_WRITE);
-   i915_vma_unlock(vma);
 
i915_request_add(rq);
+err_unpin:
i915_vma_unpin(vma);
+err:
+   if (err == -EDEADLK) {
+   err = i915_gem_ww_ctx_backoff(&ww);
+   if (!err)
+   goto retry;
+   }
+   i915_gem_ww_ctx_fini(&ww);
if (err)
return err;
}
@@ -1000,6 +1011,7 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
for_each_uabi_engine(engine, i915) {
struct i915_request *rq;
struct i915_vma *vma;
+   struct i915_gem_ww_ctx ww;
 
vma = i915_vma_instance(obj, engine->kernel_context->vm, NULL);
if (IS_ERR(vma)) {
@@ -1007,9 +1019,13 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
goto out_unmap;
}
 
-   err = i915_vma_pin(vma, 0, 0, PIN_USER);
+   i915_gem

[Intel-gfx] [PATCH 18/22] drm/i915: Use ww pinning for intel_context_create_request()

2020-03-30 Thread Maarten Lankhorst
We want to get rid of intel_context_pin(), convert
intel_context_create_request() first. :)

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gt/intel_context.c | 20 +++-
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 5c7acddf9651..f70135685552 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -450,15 +450,25 @@ int intel_context_prepare_remote_request(struct 
intel_context *ce,
 
 struct i915_request *intel_context_create_request(struct intel_context *ce)
 {
+   struct i915_gem_ww_ctx ww;
struct i915_request *rq;
int err;
 
-   err = intel_context_pin(ce);
-   if (unlikely(err))
-   return ERR_PTR(err);
+   i915_gem_ww_ctx_init(&ww, true);
+retry:
+   err = intel_context_pin_ww(ce, &ww);
+   if (!err) {
+   rq = i915_request_create(ce);
+   intel_context_unpin(ce);
+   } else if (err == -EDEADLK) {
+   err = i915_gem_ww_ctx_backoff(&ww);
+   if (!err)
+   goto retry;
+   } else {
+   rq = ERR_PTR(err);
+   }
 
-   rq = i915_request_create(ce);
-   intel_context_unpin(ce);
+   i915_gem_ww_ctx_fini(&ww);
 
if (IS_ERR(rq))
return rq;
-- 
2.25.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 20/22] drm/i915: Add ww locking to vm_fault_gtt

2020-03-30 Thread Maarten Lankhorst
Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gem/i915_gem_mman.c | 51 +++-
 1 file changed, 33 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index b39c24dae64e..e35e8d0b6938 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -283,37 +283,46 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
struct intel_runtime_pm *rpm = &i915->runtime_pm;
struct i915_ggtt *ggtt = &i915->ggtt;
bool write = area->vm_flags & VM_WRITE;
+   struct i915_gem_ww_ctx ww;
intel_wakeref_t wakeref;
struct i915_vma *vma;
pgoff_t page_offset;
int srcu;
int ret;
 
-   /* Sanity check that we allow writing into this object */
-   if (i915_gem_object_is_readonly(obj) && write)
-   return VM_FAULT_SIGBUS;
-
/* We don't use vmf->pgoff since that has the fake offset */
page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
 
trace_i915_gem_object_fault(obj, page_offset, true, write);
 
-   ret = i915_gem_object_pin_pages(obj);
+   wakeref = intel_runtime_pm_get(rpm);
+
+   i915_gem_ww_ctx_init(&ww, true);
+retry:
+   ret = i915_gem_object_lock(obj, &ww);
if (ret)
-   goto err;
+   goto err_rpm;
 
-   wakeref = intel_runtime_pm_get(rpm);
+   /* Sanity check that we allow writing into this object */
+   if (i915_gem_object_is_readonly(obj) && write) {
+   ret = -EFAULT;
+   goto err_rpm;
+   }
 
-   ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
+   ret = i915_gem_object_pin_pages(obj);
if (ret)
goto err_rpm;
 
+   ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
+   if (ret)
+   goto err_pages;
+
/* Now pin it into the GTT as needed */
-   vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
-  PIN_MAPPABLE |
-  PIN_NONBLOCK /* NOWARN */ |
-  PIN_NOEVICT);
-   if (IS_ERR(vma)) {
+   vma = i915_gem_object_ggtt_pin_ww(obj, &ww, NULL, 0, 0,
+ PIN_MAPPABLE |
+ PIN_NONBLOCK /* NOWARN */ |
+ PIN_NOEVICT);
+   if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) {
/* Use a partial view if it is bigger than available space */
struct i915_ggtt_view view =
compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
@@ -328,11 +337,11 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
 * all hope that the hardware is able to track future writes.
 */
 
-   vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
-   if (IS_ERR(vma)) {
+   vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
+   if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) {
flags = PIN_MAPPABLE;
view.type = I915_GGTT_VIEW_PARTIAL;
-   vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+   vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 
0, flags);
}
 
/* The entire mappable GGTT is pinned? Unexpected! */
@@ -389,10 +398,16 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
__i915_vma_unpin(vma);
 err_reset:
intel_gt_reset_unlock(ggtt->vm.gt, srcu);
+err_pages:
+   i915_gem_object_unpin_pages(obj);
 err_rpm:
+   if (ret == -EDEADLK) {
+   ret = i915_gem_ww_ctx_backoff(&ww);
+   if (!ret)
+   goto retry;
+   }
+   i915_gem_ww_ctx_fini(&ww);
intel_runtime_pm_put(rpm, wakeref);
-   i915_gem_object_unpin_pages(obj);
-err:
return i915_error_to_vmf_fault(ret);
 }
 
-- 
2.25.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 21/22] drm/i915: Add ww locking to pin_to_display_plane

2020-03-30 Thread Maarten Lankhorst
Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c | 65 --
 drivers/gpu/drm/i915/gem/i915_gem_object.h |  1 +
 2 files changed, 49 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c 
b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index e9d3b587f562..def8254b5fc2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -37,6 +37,12 @@ void i915_gem_object_flush_if_display(struct 
drm_i915_gem_object *obj)
i915_gem_object_unlock(obj);
 }
 
+void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
+{
+   if (i915_gem_object_is_framebuffer(obj))
+   __i915_gem_object_flush_for_display(obj);
+}
+
 /**
  * Moves a single object to the WC read, and possibly write domain.
  * @obj: object to act on
@@ -197,18 +203,12 @@ int i915_gem_object_set_cache_level(struct 
drm_i915_gem_object *obj,
if (ret)
return ret;
 
-   ret = i915_gem_object_lock_interruptible(obj, NULL);
-   if (ret)
-   return ret;
-
/* Always invalidate stale cachelines */
if (obj->cache_level != cache_level) {
i915_gem_object_set_cache_coherency(obj, cache_level);
obj->cache_dirty = true;
}
 
-   i915_gem_object_unlock(obj);
-
/* The cache-level will be applied when each vma is rebound. */
return i915_gem_object_unbind(obj,
  I915_GEM_OBJECT_UNBIND_ACTIVE |
@@ -255,6 +255,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void 
*data,
struct drm_i915_gem_caching *args = data;
struct drm_i915_gem_object *obj;
enum i915_cache_level level;
+   struct i915_gem_ww_ctx ww;
int ret = 0;
 
switch (args->caching) {
@@ -293,7 +294,18 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, 
void *data,
goto out;
}
 
-   ret = i915_gem_object_set_cache_level(obj, level);
+   i915_gem_ww_ctx_init(&ww, true);
+retry:
+   ret = i915_gem_object_lock(obj, &ww);
+   if (!ret)
+   ret = i915_gem_object_set_cache_level(obj, level);
+
+   if (ret == -EDEADLK) {
+   ret = i915_gem_ww_ctx_backoff(&ww);
+   if (!ret)
+   goto retry;
+   }
+   i915_gem_ww_ctx_fini(&ww);
 
 out:
i915_gem_object_put(obj);
@@ -313,6 +325,7 @@ i915_gem_object_pin_to_display_plane(struct 
drm_i915_gem_object *obj,
 unsigned int flags)
 {
struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   struct i915_gem_ww_ctx ww;
struct i915_vma *vma;
int ret;
 
@@ -320,6 +333,11 @@ i915_gem_object_pin_to_display_plane(struct 
drm_i915_gem_object *obj,
if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
return ERR_PTR(-EINVAL);
 
+   i915_gem_ww_ctx_init(&ww, true);
+retry:
+   ret = i915_gem_object_lock(obj, &ww);
+   if (ret)
+   goto err;
/*
 * The display engine is not coherent with the LLC cache on gen6.  As
 * a result, we make sure that the pinning that is about to occur is
@@ -334,7 +352,7 @@ i915_gem_object_pin_to_display_plane(struct 
drm_i915_gem_object *obj,
  HAS_WT(i915) ?
  I915_CACHE_WT : I915_CACHE_NONE);
if (ret)
-   return ERR_PTR(ret);
+   goto err;
 
/*
 * As the user may map the buffer once pinned in the display plane
@@ -347,18 +365,31 @@ i915_gem_object_pin_to_display_plane(struct 
drm_i915_gem_object *obj,
vma = ERR_PTR(-ENOSPC);
if ((flags & PIN_MAPPABLE) == 0 &&
(!view || view->type == I915_GGTT_VIEW_NORMAL))
-   vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
-  flags |
-  PIN_MAPPABLE |
-  PIN_NONBLOCK);
-   if (IS_ERR(vma))
-   vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
-   if (IS_ERR(vma))
-   return vma;
+   vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, alignment,
+ flags | PIN_MAPPABLE |
+ PIN_NONBLOCK);
+   if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
+   vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0,
+ alignment, flags);
+   if (IS_ERR(vma)) {
+   ret = PTR_ERR(vma);
+   goto err;
+   }
 
vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
 
-   i915_gem_object_flush_if_display(obj);
+   i915_gem_object_flush_if_display_loc

[Intel-gfx] (For CI testing) [PATCH 02/22] perf/core: Only copy-to-user after completely unlocking all locks.

2020-03-30 Thread Maarten Lankhorst
We inadvertently create a dependency on mmap_sem with a whole chain.

This breaks any user who wants to take a lock and call rcu_barrier(),
while also taking that lock inside mmap_sem:

<4> [604.892532] ==
<4> [604.892534] WARNING: possible circular locking dependency detected
<4> [604.892536] 5.6.0-rc7-CI-Patchwork_17096+ #1 Tainted: G U
<4> [604.892537] --
<4> [604.892538] kms_frontbuffer/2595 is trying to acquire lock:
<4> [604.892540] 8264a558 (rcu_state.barrier_mutex){+.+.}, at: 
rcu_barrier+0x23/0x190
<4> [604.892547]
but task is already holding lock:
<4> [604.892547] 888484716050 (reservation_ww_class_mutex){+.+.}, at: 
i915_gem_object_pin_to_display_plane+0x89/0x270 [i915]
<4> [604.892592]
which lock already depends on the new lock.
<4> [604.892593]
the existing dependency chain (in reverse order) is:
<4> [604.892594]
-> #6 (reservation_ww_class_mutex){+.+.}:
<4> [604.892597]__ww_mutex_lock.constprop.15+0xc3/0x1090
<4> [604.892598]ww_mutex_lock+0x39/0x70
<4> [604.892600]dma_resv_lockdep+0x10e/0x1f5
<4> [604.892602]do_one_initcall+0x58/0x300
<4> [604.892604]kernel_init_freeable+0x17b/0x1dc
<4> [604.892605]kernel_init+0x5/0x100
<4> [604.892606]ret_from_fork+0x24/0x50
<4> [604.892607]
-> #5 (reservation_ww_class_acquire){+.+.}:
<4> [604.892609]dma_resv_lockdep+0xec/0x1f5
<4> [604.892610]do_one_initcall+0x58/0x300
<4> [604.892610]kernel_init_freeable+0x17b/0x1dc
<4> [604.892611]kernel_init+0x5/0x100
<4> [604.892612]ret_from_fork+0x24/0x50
<4> [604.892613]
-> #4 (&mm->mmap_sem#2){}:
<4> [604.892615]__might_fault+0x63/0x90
<4> [604.892617]_copy_to_user+0x1e/0x80
<4> [604.892619]perf_read+0x200/0x2b0
<4> [604.892621]vfs_read+0x96/0x160
<4> [604.892622]ksys_read+0x9f/0xe0
<4> [604.892623]do_syscall_64+0x4f/0x220
<4> [604.892624]entry_SYSCALL_64_after_hwframe+0x49/0xbe
<4> [604.892625]
-> #3 (&cpuctx_mutex){+.+.}:
<4> [604.892626]__mutex_lock+0x9a/0x9c0
<4> [604.892627]perf_event_init_cpu+0xa4/0x140
<4> [604.892629]perf_event_init+0x19d/0x1cd
<4> [604.892630]start_kernel+0x362/0x4e4
<4> [604.892631]secondary_startup_64+0xa4/0xb0
<4> [604.892631]
-> #2 (pmus_lock){+.+.}:
<4> [604.892633]__mutex_lock+0x9a/0x9c0
<4> [604.892633]perf_event_init_cpu+0x6b/0x140
<4> [604.892635]cpuhp_invoke_callback+0x9b/0x9d0
<4> [604.892636]_cpu_up+0xa2/0x140
<4> [604.892637]do_cpu_up+0x61/0xa0
<4> [604.892639]smp_init+0x57/0x96
<4> [604.892639]kernel_init_freeable+0x87/0x1dc
<4> [604.892640]kernel_init+0x5/0x100
<4> [604.892642]ret_from_fork+0x24/0x50
<4> [604.892642]
-> #1 (cpu_hotplug_lock.rw_sem){}:
<4> [604.892643]cpus_read_lock+0x34/0xd0
<4> [604.892644]rcu_barrier+0xaa/0x190
<4> [604.892645]kernel_init+0x21/0x100
<4> [604.892647]ret_from_fork+0x24/0x50
<4> [604.892647]
-> #0 (rcu_state.barrier_mutex){+.+.}:
<4> [604.892649]__lock_acquire+0x1328/0x15d0
<4> [604.892650]lock_acquire+0xa7/0x1c0
<4> [604.892651]__mutex_lock+0x9a/0x9c0
<4> [604.892652]rcu_barrier+0x23/0x190
<4> [604.892680]i915_gem_object_unbind+0x29d/0x3f0 [i915]
<4> [604.892707]i915_gem_object_pin_to_display_plane+0x141/0x270 [i915]
<4> [604.892737]intel_pin_and_fence_fb_obj+0xec/0x1f0 [i915]
<4> [604.892767]intel_plane_pin_fb+0x3f/0xd0 [i915]
<4> [604.892797]intel_prepare_plane_fb+0x13b/0x5c0 [i915]
<4> [604.892798]drm_atomic_helper_prepare_planes+0x85/0x110
<4> [604.892827]intel_atomic_commit+0xda/0x390 [i915]
<4> [604.892828]drm_atomic_helper_set_config+0x57/0xa0
<4> [604.892830]drm_mode_setcrtc+0x1c4/0x720
<4> [604.892830]drm_ioctl_kernel+0xb0/0xf0
<4> [604.892831]drm_ioctl+0x2e1/0x390
<4> [604.892833]ksys_ioctl+0x7b/0x90
<4> [604.892835]__x64_sys_ioctl+0x11/0x20
<4> [604.892835]do_syscall_64+0x4f/0x220
<4> [604.892836]entry_SYSCALL_64_after_hwframe+0x49/0xbe
<4> [604.892837]

Signed-off-by: Maarten Lankhorst 
---
 kernel/events/core.c | 59 +++-
 1 file changed, 31 insertions(+), 28 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 085d9263d595..8b95a6512e31 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4926,20 +4926,20 @@ static int __perf_read_group_add(struct perf_event 
*leader,
 }
 
 static int perf_read_group(struct perf_event *event,
-  u64 read_format, char __user *buf)
+  u64 read_format, char __user *buf,
+  u64 **values)
 {
struct perf_event *leader = event->group_leader, *child;
struc

[Intel-gfx] [PATCH 10/22] drm/i915: Pin engine before pinning all objects, v3.

2020-03-30 Thread Maarten Lankhorst
We want to lock all gem objects, including the engine context objects,
rework the throttling to ensure that we can do this. Now we only throttle
once, but can take eb_pin_engine while acquiring objects. This means we
will have to drop the lock to wait. If we don't have to throttle we can
still take the fastpath, if not we will take the slowpath and wait for
the throttle request while unlocked.

The engine has to be pinned as first step, otherwise gpu relocations
won't work.

Changes since v1:
- Only need to get a throttled request in the fastpath, no need for
  a global flag any more.
- Always free the waited request correctly.
Changes since v2:
- Use intel_engine_pm_get()/put() to keeep engine pool alive during
  EDEADLK handling.

Signed-off-by: Maarten Lankhorst 
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 174 --
 1 file changed, 118 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 74146623b8ae..05f6e1a94977 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -16,6 +16,7 @@
 #include "gem/i915_gem_ioctls.h"
 #include "gt/intel_context.h"
 #include "gt/intel_engine_pool.h"
+#include "gt/intel_engine_pm.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_ring.h"
@@ -55,7 +56,8 @@ enum {
 #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | 
__EXEC_OBJECT_HAS_FENCE)
 
 #define __EXEC_HAS_RELOC   BIT(31)
-#define __EXEC_INTERNAL_FLAGS  (~0u << 31)
+#define __EXEC_ENGINE_PINNED   BIT(30)
+#define __EXEC_INTERNAL_FLAGS  (~0u << 30)
 #define UPDATE PIN_OFFSET_FIXED
 
 #define BATCH_OFFSET_BIAS (256*1024)
@@ -288,6 +290,9 @@ struct i915_execbuffer {
 };
 
 static int eb_parse(struct i915_execbuffer *eb);
+static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb,
+ bool throttle);
+static void eb_unpin_engine(struct i915_execbuffer *eb);
 
 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
 {
@@ -896,7 +901,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long 
handle)
}
 }
 
-static void eb_release_vmas(const struct i915_execbuffer *eb, bool final)
+static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
 {
const unsigned int count = eb->buffer_count;
unsigned int i;
@@ -913,6 +918,8 @@ static void eb_release_vmas(const struct i915_execbuffer 
*eb, bool final)
if (final)
i915_vma_put(vma);
}
+
+   eb_unpin_engine(eb);
 }
 
 static void eb_destroy(const struct i915_execbuffer *eb)
@@ -1713,7 +1720,8 @@ static int eb_prefault_relocations(const struct 
i915_execbuffer *eb)
return 0;
 }
 
-static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
+static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
+  struct i915_request *rq)
 {
bool have_copy = false;
struct eb_vma *ev;
@@ -1729,6 +1737,21 @@ static noinline int eb_relocate_parse_slow(struct 
i915_execbuffer *eb)
eb_release_vmas(eb, false);
i915_gem_ww_ctx_fini(&eb->ww);
 
+   if (rq) {
+   /* nonblocking is always false */
+   if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT) < 0) {
+   i915_request_put(rq);
+   rq = NULL;
+
+   err = -EINTR;
+   goto err_relock;
+   }
+
+   i915_request_put(rq);
+   rq = NULL;
+   }
+
/*
 * We take 3 passes through the slowpatch.
 *
@@ -1752,14 +1775,25 @@ static noinline int eb_relocate_parse_slow(struct 
i915_execbuffer *eb)
err = 0;
}
 
-   flush_workqueue(eb->i915->mm.userptr_wq);
+   if (!err)
+   flush_workqueue(eb->i915->mm.userptr_wq);
 
+err_relock:
i915_gem_ww_ctx_init(&eb->ww, true);
if (err)
goto out;
 
/* reacquire the objects */
 repeat_validate:
+   rq = eb_pin_engine(eb, false);
+   if (IS_ERR(rq)) {
+   err = PTR_ERR(rq);
+   goto err;
+   }
+
+   /* We didn't throttle, should be NULL */
+   GEM_WARN_ON(rq);
+
err = eb_validate_vmas(eb);
if (err)
goto err;
@@ -1823,14 +1857,47 @@ static noinline int eb_relocate_parse_slow(struct 
i915_execbuffer *eb)
}
}
 
+   if (rq)
+   i915_request_put(rq);
+
return err;
 }
 
 static int eb_relocate_parse(struct i915_execbuffer *eb)
 {
int err;
+   struct i915_request *rq = NULL;
+   bool throttle = true;
 
 retry:
+   rq = eb_pin_engine(eb, throttle);
+   if (IS_ERR(rq)) {
+   err = PTR_ERR(rq);
+ 

[Intel-gfx] [PATCH 07/22] drm/i915: Use ww locking in intel_renderstate.

2020-03-30 Thread Maarten Lankhorst
We want to start using ww locking in intel_context_pin, for this
we need to lock multiple objects, and the single i915_gem_object_lock
is not enough.

Convert to using ww-waiting, and make sure we always pin intel_context_state,
even if we don't have a renderstate object.

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gt/intel_gt.c  | 21 +++---
 drivers/gpu/drm/i915/gt/intel_renderstate.c | 71 ++---
 drivers/gpu/drm/i915/gt/intel_renderstate.h |  9 ++-
 3 files changed, 65 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index 6eae4c791007..c11e89472ad8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -406,21 +406,20 @@ static int __engines_record_defaults(struct intel_gt *gt)
/* We must be able to switch to something! */
GEM_BUG_ON(!engine->kernel_context);
 
-   err = intel_renderstate_init(&so, engine);
-   if (err)
-   goto out;
-
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
goto out;
}
 
-   rq = intel_context_create_request(ce);
+   err = intel_renderstate_init(&so, ce);
+   if (err)
+   goto err;
+
+   rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
-   intel_context_put(ce);
-   goto out;
+   goto err_fini;
}
 
err = intel_engine_emit_ctx_wa(rq);
@@ -434,9 +433,13 @@ static int __engines_record_defaults(struct intel_gt *gt)
 err_rq:
requests[id] = i915_request_get(rq);
i915_request_add(rq);
-   intel_renderstate_fini(&so);
-   if (err)
+err_fini:
+   intel_renderstate_fini(&so, ce);
+err:
+   if (err) {
+   intel_context_put(ce);
goto out;
+   }
}
 
/* Flush the default context image to memory, and enable powersaving. */
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c 
b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index ca533d98d14d..c65554c431f8 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -27,6 +27,7 @@
 
 #include "i915_drv.h"
 #include "intel_renderstate.h"
+#include "gt/intel_context.h"
 #include "intel_ring.h"
 
 static const struct intel_renderstate_rodata *
@@ -74,10 +75,9 @@ static int render_state_setup(struct intel_renderstate *so,
u32 *d;
int ret;
 
-   i915_gem_object_lock(so->vma->obj, NULL);
ret = i915_gem_object_prepare_write(so->vma->obj, &needs_clflush);
if (ret)
-   goto out_unlock;
+   return ret;
 
d = kmap_atomic(i915_gem_object_get_dirty_page(so->vma->obj, 0));
 
@@ -158,8 +158,6 @@ static int render_state_setup(struct intel_renderstate *so,
ret = 0;
 out:
i915_gem_object_finish_access(so->vma->obj);
-out_unlock:
-   i915_gem_object_unlock(so->vma->obj);
return ret;
 
 err:
@@ -171,33 +169,47 @@ static int render_state_setup(struct intel_renderstate 
*so,
 #undef OUT_BATCH
 
 int intel_renderstate_init(struct intel_renderstate *so,
-  struct intel_engine_cs *engine)
+  struct intel_context *ce)
 {
-   struct drm_i915_gem_object *obj;
+   struct intel_engine_cs *engine = ce->engine;
+   struct drm_i915_gem_object *obj = NULL;
int err;
 
memset(so, 0, sizeof(*so));
 
so->rodata = render_state_get_rodata(engine);
-   if (!so->rodata)
-   return 0;
+   if (so->rodata) {
+   if (so->rodata->batch_items * 4 > PAGE_SIZE)
+   return -EINVAL;
+
+   obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
+   if (IS_ERR(obj))
+   return PTR_ERR(obj);
+
+   so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
+   if (IS_ERR(so->vma)) {
+   err = PTR_ERR(so->vma);
+   goto err_obj;
+   }
+   }
 
-   if (so->rodata->batch_items * 4 > PAGE_SIZE)
-   return -EINVAL;
+   i915_gem_ww_ctx_init(&so->ww, true);
+retry:
+   err = intel_context_pin(ce);
+   if (err)
+   goto err_fini;
 
-   obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
-   if (IS_ERR(obj))
-   return PTR_ERR(obj);
+   /* return early if there's nothing to setup */
+   if (!err && !so->rodata)
+   return 0;
 
-   so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
-   if (

[Intel-gfx] [PATCH 04/22] drm/i915: Remove locking from i915_gem_object_prepare_read/write

2020-03-30 Thread Maarten Lankhorst
Execbuffer submission will perform its own WW locking, and we
cannot rely on the implicit lock there.

This also makes it clear that the GVT code will get a lockdep splat when
multiple batchbuffer shadows need to be performed in the same instance,
fix that up.

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 20 ++-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 13 ++--
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  1 -
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  5 -
 .../i915/gem/selftests/i915_gem_coherency.c   | 14 +
 .../drm/i915/gem/selftests/i915_gem_context.c | 12 ---
 drivers/gpu/drm/i915/gt/intel_renderstate.c   |  5 -
 drivers/gpu/drm/i915/gvt/cmd_parser.c |  9 -
 drivers/gpu/drm/i915/i915_gem.c   | 20 +--
 9 files changed, 70 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c 
b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index f4602faa8db9..e9d3b587f562 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -581,19 +581,17 @@ int i915_gem_object_prepare_read(struct 
drm_i915_gem_object *obj,
if (!i915_gem_object_has_struct_page(obj))
return -ENODEV;
 
-   ret = i915_gem_object_lock_interruptible(obj, NULL);
-   if (ret)
-   return ret;
+   assert_object_held(obj);
 
ret = i915_gem_object_wait(obj,
   I915_WAIT_INTERRUPTIBLE,
   MAX_SCHEDULE_TIMEOUT);
if (ret)
-   goto err_unlock;
+   return ret;
 
ret = i915_gem_object_pin_pages(obj);
if (ret)
-   goto err_unlock;
+   return ret;
 
if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
!static_cpu_has(X86_FEATURE_CLFLUSH)) {
@@ -621,8 +619,6 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object 
*obj,
 
 err_unpin:
i915_gem_object_unpin_pages(obj);
-err_unlock:
-   i915_gem_object_unlock(obj);
return ret;
 }
 
@@ -635,20 +631,18 @@ int i915_gem_object_prepare_write(struct 
drm_i915_gem_object *obj,
if (!i915_gem_object_has_struct_page(obj))
return -ENODEV;
 
-   ret = i915_gem_object_lock_interruptible(obj, NULL);
-   if (ret)
-   return ret;
+   assert_object_held(obj);
 
ret = i915_gem_object_wait(obj,
   I915_WAIT_INTERRUPTIBLE |
   I915_WAIT_ALL,
   MAX_SCHEDULE_TIMEOUT);
if (ret)
-   goto err_unlock;
+   return ret;
 
ret = i915_gem_object_pin_pages(obj);
if (ret)
-   goto err_unlock;
+   return ret;
 
if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
!static_cpu_has(X86_FEATURE_CLFLUSH)) {
@@ -685,7 +679,5 @@ int i915_gem_object_prepare_write(struct 
drm_i915_gem_object *obj,
 
 err_unpin:
i915_gem_object_unpin_pages(obj);
-err_unlock:
-   i915_gem_object_unlock(obj);
return ret;
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index df931e84173c..cc2be6964037 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -960,11 +960,14 @@ static void reloc_cache_reset(struct reloc_cache *cache)
 
vaddr = unmask_page(cache->vaddr);
if (cache->vaddr & KMAP) {
+   struct drm_i915_gem_object *obj =
+   (struct drm_i915_gem_object *)cache->node.mm;
if (cache->vaddr & CLFLUSH_AFTER)
mb();
 
kunmap_atomic(vaddr);
-   i915_gem_object_finish_access((struct drm_i915_gem_object 
*)cache->node.mm);
+   i915_gem_object_finish_access(obj);
+   i915_gem_object_unlock(obj);
} else {
struct i915_ggtt *ggtt = cache_to_ggtt(cache);
 
@@ -999,10 +1002,16 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
unsigned int flushes;
int err;
 
-   err = i915_gem_object_prepare_write(obj, &flushes);
+   err = i915_gem_object_lock_interruptible(obj, NULL);
if (err)
return ERR_PTR(err);
 
+   err = i915_gem_object_prepare_write(obj, &flushes);
+   if (err) {
+   i915_gem_object_unlock(obj);
+   return ERR_PTR(err);
+   }
+
BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 5103067269b0..11b8e27350

[Intel-gfx] [PATCH 14/22] drm/i915: Kill last user of intel_context_create_request outside of selftests

2020-03-30 Thread Maarten Lankhorst
Instead of using intel_context_create_request(), use intel_context_pin()
and i915_create_request directly.

Now all those calls are gone outside of selftests. :)

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 43 ++---
 1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index e96cc7fa0936..d866f5903554 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1744,6 +1744,7 @@ static int engine_wa_list_verify(struct intel_context *ce,
const struct i915_wa *wa;
struct i915_request *rq;
struct i915_vma *vma;
+   struct i915_gem_ww_ctx ww;
unsigned int i;
u32 *results;
int err;
@@ -1756,29 +1757,34 @@ static int engine_wa_list_verify(struct intel_context 
*ce,
return PTR_ERR(vma);
 
intel_engine_pm_get(ce->engine);
-   rq = intel_context_create_request(ce);
-   intel_engine_pm_put(ce->engine);
+   i915_gem_ww_ctx_init(&ww, false);
+retry:
+   err = i915_gem_object_lock(vma->obj, &ww);
+   if (err == 0)
+   err = intel_context_pin_ww(ce, &ww);
+   if (err)
+   goto err_pm;
+
+   rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
-   goto err_vma;
+   goto err_unpin;
}
 
-   i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, true);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-   i915_vma_unlock(vma);
-   if (err) {
-   i915_request_add(rq);
-   goto err_vma;
-   }
-
-   err = wa_list_srm(rq, wal, vma);
-   if (err)
-   goto err_vma;
+   if (err == 0)
+   err = wa_list_srm(rq, wal, vma);
 
i915_request_get(rq);
+   if (err)
+   i915_request_set_error_once(rq, err);
i915_request_add(rq);
+
+   if (err)
+   goto err_rq;
+
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
err = -ETIME;
goto err_rq;
@@ -1803,7 +1809,16 @@ static int engine_wa_list_verify(struct intel_context 
*ce,
 
 err_rq:
i915_request_put(rq);
-err_vma:
+err_unpin:
+   intel_context_unpin(ce);
+err_pm:
+   if (err == -EDEADLK) {
+   err = i915_gem_ww_ctx_backoff(&ww);
+   if (!err)
+   goto retry;
+   }
+   i915_gem_ww_ctx_fini(&ww);
+   intel_engine_pm_put(ce->engine);
i915_vma_unpin(vma);
i915_vma_put(vma);
return err;
-- 
2.25.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 13/22] drm/i915: Convert i915_gem_object/client_blt.c to use ww locking as well, v2.

2020-03-30 Thread Maarten Lankhorst
This is the last part outside of selftests that still don't use the
correct lock ordering of timeline->mutex vs resv_lock.

With gem fixed, there are a few places that still get locking wrong:
- gvt/scheduler.c
- i915_perf.c
- Most if not all selftests.

Changes since v1:
- Add intel_engine_pm_get/put() calls to fix use-after-free when using
  intel_engine_get_pool().

Signed-off-by: Maarten Lankhorst 
---
 .../gpu/drm/i915/gem/i915_gem_client_blt.c|  80 +++--
 .../gpu/drm/i915/gem/i915_gem_object_blt.c| 156 +++---
 .../gpu/drm/i915/gem/i915_gem_object_blt.h|   3 +
 3 files changed, 165 insertions(+), 74 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c 
b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index 5d94a77f9bdd..10df576e785f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -157,6 +157,7 @@ static void clear_pages_worker(struct work_struct *work)
struct clear_pages_work *w = container_of(work, typeof(*w), work);
struct drm_i915_gem_object *obj = w->sleeve->vma->obj;
struct i915_vma *vma = w->sleeve->vma;
+   struct i915_gem_ww_ctx ww;
struct i915_request *rq;
struct i915_vma *batch;
int err = w->dma.error;
@@ -172,17 +173,20 @@ static void clear_pages_worker(struct work_struct *work)
obj->read_domains = I915_GEM_GPU_DOMAINS;
obj->write_domain = 0;
 
-   err = i915_vma_pin(vma, 0, 0, PIN_USER);
-   if (unlikely(err))
+   i915_gem_ww_ctx_init(&ww, false);
+   intel_engine_pm_get(w->ce->engine);
+retry:
+   err = intel_context_pin_ww(w->ce, &ww);
+   if (err)
goto out_signal;
 
-   batch = intel_emit_vma_fill_blt(w->ce, vma, w->value);
+   batch = intel_emit_vma_fill_blt(w->ce, vma, &ww, w->value);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
-   goto out_unpin;
+   goto out_ctx;
}
 
-   rq = intel_context_create_request(w->ce);
+   rq = i915_request_create(w->ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_batch;
@@ -224,9 +228,19 @@ static void clear_pages_worker(struct work_struct *work)
i915_request_add(rq);
 out_batch:
intel_emit_vma_release(w->ce, batch);
-out_unpin:
-   i915_vma_unpin(vma);
+out_ctx:
+   intel_context_unpin(w->ce);
 out_signal:
+   if (err == -EDEADLK) {
+   err = i915_gem_ww_ctx_backoff(&ww);
+   if (!err)
+   goto retry;
+   }
+   i915_gem_ww_ctx_fini(&ww);
+
+   i915_vma_unpin(w->sleeve->vma);
+   intel_engine_pm_put(w->ce->engine);
+
if (unlikely(err)) {
dma_fence_set_error(&w->dma, err);
dma_fence_signal(&w->dma);
@@ -234,6 +248,45 @@ static void clear_pages_worker(struct work_struct *work)
}
 }
 
+static int pin_wait_clear_pages_work(struct clear_pages_work *w,
+struct intel_context *ce)
+{
+   struct i915_vma *vma = w->sleeve->vma;
+   struct i915_gem_ww_ctx ww;
+   int err;
+
+   i915_gem_ww_ctx_init(&ww, false);
+retry:
+   err = i915_gem_object_lock(vma->obj, &ww);
+   if (err)
+   goto out;
+
+   err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
+   if (unlikely(err))
+   goto out;
+
+   err = i915_sw_fence_await_reservation(&w->wait,
+ vma->obj->base.resv, NULL,
+ true, I915_FENCE_TIMEOUT,
+ I915_FENCE_GFP);
+   if (err)
+   goto err_unpin_vma;
+
+   dma_resv_add_excl_fence(vma->obj->base.resv, &w->dma);
+
+err_unpin_vma:
+   if (err)
+   i915_vma_unpin(vma);
+out:
+   if (err == -EDEADLK) {
+   err = i915_gem_ww_ctx_backoff(&ww);
+   if (!err)
+   goto retry;
+   }
+   i915_gem_ww_ctx_fini(&ww);
+   return err;
+}
+
 static int __i915_sw_fence_call
 clear_pages_work_notify(struct i915_sw_fence *fence,
enum i915_sw_fence_notify state)
@@ -287,18 +340,9 @@ int i915_gem_schedule_fill_pages_blt(struct 
drm_i915_gem_object *obj,
dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0);
i915_sw_fence_init(&work->wait, clear_pages_work_notify);
 
-   i915_gem_object_lock(obj, NULL);
-   err = i915_sw_fence_await_reservation(&work->wait,
- obj->base.resv, NULL,
- true, I915_FENCE_TIMEOUT,
- I915_FENCE_GFP);
-   if (err < 0) {
+   err = pin_wait_clear_pages_work(work, ce);
+   if (err < 0)
dma_fence_set_error(&work->dma, err);
-   } else {
-   dma_resv_add_e

[Intel-gfx] [PATCH 11/22] drm/i915: Rework intel_context pinning to do everything outside of pin_mutex

2020-03-30 Thread Maarten Lankhorst
Instead of doing everything inside of pin_mutex, we move all pinning
outside. Because i915_active has its own reference counting and
pinning is also having the same issues vs mutexes, we make sure
everything is pinned first, so the pinning in i915_active only needs
to bump refcounts. This allows us to take pin refcounts correctly
all the time.

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gt/intel_context.c   | 233 +++---
 drivers/gpu/drm/i915/gt/intel_context_types.h |   4 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c   |  34 ++-
 drivers/gpu/drm/i915/gt/intel_renderstate.c   |   1 -
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  13 +-
 drivers/gpu/drm/i915/gt/mock_engine.c |  13 +-
 6 files changed, 191 insertions(+), 107 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index e4aece20bc80..bc0ed268ccb8 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -93,79 +93,6 @@ static void intel_context_active_release(struct 
intel_context *ce)
i915_active_release(&ce->active);
 }
 
-int __intel_context_do_pin(struct intel_context *ce)
-{
-   int err;
-
-   if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
-   err = intel_context_alloc_state(ce);
-   if (err)
-   return err;
-   }
-
-   err = i915_active_acquire(&ce->active);
-   if (err)
-   return err;
-
-   if (mutex_lock_interruptible(&ce->pin_mutex)) {
-   err = -EINTR;
-   goto out_release;
-   }
-
-   if (unlikely(intel_context_is_closed(ce))) {
-   err = -ENOENT;
-   goto out_unlock;
-   }
-
-   if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
-   err = intel_context_active_acquire(ce);
-   if (unlikely(err))
-   goto out_unlock;
-
-   err = ce->ops->pin(ce);
-   if (unlikely(err))
-   goto err_active;
-
-   CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
-i915_ggtt_offset(ce->ring->vma),
-ce->ring->head, ce->ring->tail);
-
-   smp_mb__before_atomic(); /* flush pin before it is visible */
-   atomic_inc(&ce->pin_count);
-   }
-
-   GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
-   GEM_BUG_ON(i915_active_is_idle(&ce->active));
-   goto out_unlock;
-
-err_active:
-   intel_context_active_release(ce);
-out_unlock:
-   mutex_unlock(&ce->pin_mutex);
-out_release:
-   i915_active_release(&ce->active);
-   return err;
-}
-
-void intel_context_unpin(struct intel_context *ce)
-{
-   if (!atomic_dec_and_test(&ce->pin_count))
-   return;
-
-   CE_TRACE(ce, "unpin\n");
-   ce->ops->unpin(ce);
-
-   /*
-* Once released, we may asynchronously drop the active reference.
-* As that may be the only reference keeping the context alive,
-* take an extra now so that it is not freed before we finish
-* dereferencing it.
-*/
-   intel_context_get(ce);
-   intel_context_active_release(ce);
-   intel_context_put(ce);
-}
-
 static int __context_pin_state(struct i915_vma *vma)
 {
unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
@@ -225,6 +152,138 @@ static void __ring_retire(struct intel_ring *ring)
i915_active_release(&ring->vma->active);
 }
 
+static int intel_context_pre_pin(struct intel_context *ce)
+{
+   int err;
+
+   CE_TRACE(ce, "active\n");
+
+   err = __ring_active(ce->ring);
+   if (err)
+   return err;
+
+   err = intel_timeline_pin(ce->timeline);
+   if (err)
+   goto err_ring;
+
+   if (!ce->state)
+   return 0;
+
+   err = __context_pin_state(ce->state);
+   if (err)
+   goto err_timeline;
+
+
+   return 0;
+
+err_timeline:
+   intel_timeline_unpin(ce->timeline);
+err_ring:
+   __ring_retire(ce->ring);
+   return err;
+}
+
+static void intel_context_post_unpin(struct intel_context *ce)
+{
+   if (ce->state)
+   __context_unpin_state(ce->state);
+
+   intel_timeline_unpin(ce->timeline);
+   __ring_retire(ce->ring);
+}
+
+int __intel_context_do_pin(struct intel_context *ce)
+{
+   bool handoff = false;
+   void *vaddr;
+   int err = 0;
+
+   if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
+   err = intel_context_alloc_state(ce);
+   if (err)
+   return err;
+   }
+
+   /*
+* We always pin the context/ring/timeline here, to ensure a pin
+* refcount for __intel_context_active(), which prevent a lock
+* inversion of ce->pin_mutex vs dma_resv_lock().
+*/
+   err = intel_context_pre_pin

[Intel-gfx] [PATCH 09/22] drm/i915: Nuke arguments to eb_pin_engine

2020-03-30 Thread Maarten Lankhorst
Those arguments are already set as eb.file and eb.args, so kill off
the extra arguments. This will allow us to move eb_pin_engine() to
after we reserved all BO's.

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 17 +++--
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index a337f3054ce3..74146623b8ae 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2412,11 +2412,10 @@ static void eb_unpin_engine(struct i915_execbuffer *eb)
 }
 
 static unsigned int
-eb_select_legacy_ring(struct i915_execbuffer *eb,
- struct drm_file *file,
- struct drm_i915_gem_execbuffer2 *args)
+eb_select_legacy_ring(struct i915_execbuffer *eb)
 {
struct drm_i915_private *i915 = eb->i915;
+   struct drm_i915_gem_execbuffer2 *args = eb->args;
unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
 
if (user_ring_id != I915_EXEC_BSD &&
@@ -2431,7 +2430,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
 
if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
-   bsd_idx = gen8_dispatch_bsd_engine(i915, file);
+   bsd_idx = gen8_dispatch_bsd_engine(i915, eb->file);
} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
   bsd_idx <= I915_EXEC_BSD_RING2) {
bsd_idx >>= I915_EXEC_BSD_SHIFT;
@@ -2456,18 +2455,16 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
 }
 
 static int
-eb_pin_engine(struct i915_execbuffer *eb,
- struct drm_file *file,
- struct drm_i915_gem_execbuffer2 *args)
+eb_pin_engine(struct i915_execbuffer *eb)
 {
struct intel_context *ce;
unsigned int idx;
int err;
 
if (i915_gem_context_user_engines(eb->gem_context))
-   idx = args->flags & I915_EXEC_RING_MASK;
+   idx = eb->args->flags & I915_EXEC_RING_MASK;
else
-   idx = eb_select_legacy_ring(eb, file, args);
+   idx = eb_select_legacy_ring(eb);
 
ce = i915_gem_context_get_engine(eb->gem_context, idx);
if (IS_ERR(ce))
@@ -2765,7 +2762,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (unlikely(err))
goto err_destroy;
 
-   err = eb_pin_engine(&eb, file, args);
+   err = eb_pin_engine(&eb);
if (unlikely(err))
goto err_context;
 
-- 
2.25.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 15/22] drm/i915: Convert i915_perf to ww locking as well

2020-03-30 Thread Maarten Lankhorst
We have the ordering of timeline->mutex vs resv_lock wrong,
convert the i915_pin_vma and intel_context_pin as well to
future-proof this.

We may need to do future changes to do this more transaction-like,
and only get down to a single i915_gem_ww_ctx, but for now this
should work.

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/i915_perf.c | 57 +++-
 1 file changed, 42 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c74ebac50015..718ea9a743c7 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1192,24 +1192,39 @@ static struct intel_context *oa_pin_context(struct 
i915_perf_stream *stream)
struct i915_gem_engines_iter it;
struct i915_gem_context *ctx = stream->ctx;
struct intel_context *ce;
-   int err;
+   struct i915_gem_ww_ctx ww;
+   int err = -ENODEV;
 
for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
if (ce->engine != stream->engine) /* first match! */
continue;
 
-   /*
-* As the ID is the gtt offset of the context's vma we
-* pin the vma to ensure the ID remains fixed.
-*/
-   err = intel_context_pin(ce);
-   if (err == 0) {
-   stream->pinned_ctx = ce;
-   break;
-   }
+   err = 0;
+   break;
}
i915_gem_context_unlock_engines(ctx);
 
+   if (err)
+   return ERR_PTR(err);
+
+   i915_gem_ww_ctx_init(&ww, true);
+retry:
+   /*
+* As the ID is the gtt offset of the context's vma we
+* pin the vma to ensure the ID remains fixed.
+*/
+   err = intel_context_pin_ww(ce, &ww);
+   if (err == -EDEADLK) {
+   err = i915_gem_ww_ctx_backoff(&ww);
+   if (!err)
+   goto retry;
+   }
+   i915_gem_ww_ctx_fini(&ww);
+
+   if (err)
+   return ERR_PTR(err);
+
+   stream->pinned_ctx = ce;
return stream->pinned_ctx;
 }
 
@@ -1923,15 +1938,22 @@ emit_oa_config(struct i915_perf_stream *stream,
 {
struct i915_request *rq;
struct i915_vma *vma;
+   struct i915_gem_ww_ctx ww;
int err;
 
vma = get_oa_vma(stream, oa_config);
if (IS_ERR(vma))
return ERR_CAST(vma);
 
-   err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+   i915_gem_ww_ctx_init(&ww, true);
+retry:
+   err = i915_gem_object_lock(vma->obj, &ww);
if (err)
-   goto err_vma_put;
+   goto err;
+
+   err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
+   if (err)
+   goto err;
 
intel_engine_pm_get(ce->engine);
rq = i915_request_create(ce);
@@ -1941,11 +1963,9 @@ emit_oa_config(struct i915_perf_stream *stream,
goto err_vma_unpin;
}
 
-   i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, 0);
if (!err)
err = i915_vma_move_to_active(vma, rq, 0);
-   i915_vma_unlock(vma);
if (err)
goto err_add_request;
 
@@ -1960,7 +1980,14 @@ emit_oa_config(struct i915_perf_stream *stream,
i915_request_add(rq);
 err_vma_unpin:
i915_vma_unpin(vma);
-err_vma_put:
+err:
+   if (err == -EDEADLK) {
+   err = i915_gem_ww_ctx_backoff(&ww);
+   if (!err)
+   goto retry;
+   }
+
+   i915_gem_ww_ctx_fini(&ww);
i915_vma_put(vma);
return err ? ERR_PTR(err) : rq;
 }
-- 
2.25.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 17/22] drm/i915/selftests: Fix locking inversion in lrc selftest.

2020-03-30 Thread Maarten Lankhorst
This function does not use intel_context_create_request, so it has
to use the same locking order as normal code. This is required to
shut up lockdep in selftests.

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c 
b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 6f06ba750a0a..64959a0c68ce 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -4283,6 +4283,7 @@ static int __live_lrc_state(struct intel_engine_cs 
*engine,
 {
struct intel_context *ce;
struct i915_request *rq;
+   struct i915_gem_ww_ctx ww;
enum {
RING_START_IDX = 0,
RING_TAIL_IDX,
@@ -4297,7 +4298,11 @@ static int __live_lrc_state(struct intel_engine_cs 
*engine,
if (IS_ERR(ce))
return PTR_ERR(ce);
 
-   err = intel_context_pin(ce);
+   i915_gem_ww_ctx_init(&ww, false);
+retry:
+   err = i915_gem_object_lock(scratch->obj, &ww);
+   if (!err)
+   err = intel_context_pin_ww(ce, &ww);
if (err)
goto err_put;
 
@@ -4326,11 +4331,9 @@ static int __live_lrc_state(struct intel_engine_cs 
*engine,
*cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
*cs++ = 0;
 
-   i915_vma_lock(scratch);
err = i915_request_await_object(rq, scratch->obj, true);
if (!err)
err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
-   i915_vma_unlock(scratch);
 
i915_request_get(rq);
i915_request_add(rq);
@@ -4367,6 +4370,12 @@ static int __live_lrc_state(struct intel_engine_cs 
*engine,
 err_unpin:
intel_context_unpin(ce);
 err_put:
+   if (err == -EDEADLK) {
+   err = i915_gem_ww_ctx_backoff(&ww);
+   if (!err)
+   goto retry;
+   }
+   i915_gem_ww_ctx_fini(&ww);
intel_context_put(ce);
return err;
 }
-- 
2.25.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/huc: Add more errors for I915_PARAM_HUC_STATUS

2020-03-30 Thread Chris Wilson
Quoting Michal Wajdeczko (2020-03-30 15:02:53)
> 
> 
> On 30.03.2020 14:28, Chris Wilson wrote:
> > There's nothing else between us loading the fw and the huc rejecting
> > it?
> > 
> > FIRMWARE_FAIL? That's set as the opposite of FIRMWARE_TRANSFERRED in
> > that we failed to upload the image to the HW. The firmware itself hasn't
> > had a chance to run.
> > 
> > case INTEL_UC_FIRMWARE_FAIL:
> >   return -ENXIO;
> > 
> > Or is that being overridden to FIRMWARE_ERROR?
> 
> No, it's not overridden by FIRMWARE_ERROR (since we use FIRMWARE_ERROR
> as final state, while with FIRMWARE_FAIL there is a chance for recovery
> during reset)
> 
> Also note that FIRMWARE_FAIL case is covered by the register check that
> we have below, which provides HuC runtime status.

Yes, if it only reports on the auth failure.

> And if we decide to use FIRMWARE_FAIL to report -ENXIO, then it is
> unlikely that we will ever report 0 again for any other fw error that
> could prevent fw from successful load (now recall your and Joonas
> position that this param shall stay as reflection of register read).
> 
> Michal
> 
> ps. on other hand, if we trust our uc_fw_status() then we can drop that
> register read, finally decouple GET_PARAM from MMIO_READ and fully rely
> on cached status:

imo, that register read is the icing on the cake. We can tell whether
the FW got to the HW, but we can't tell if the HW was truly happy with
the FW without asking it.

I look at it as exposing an interface for the final capability bits to
userspace that the kernel does not have to understand, that go above and
beyond the kernel loading the firmware and confirming execution.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC PATCH v1 12/50] Treewide: Extirpate prandom_u32_state(rnd) % range

2020-03-30 Thread George Spelvin
There's no prandom_u32_state_max, so we're using reciprocal_scale()
here directly.

(Also add a missing "const" to drivers/gpu/drm/i915/selftests/scatterist.c)

Signed-off-by: George Spelvin 
Cc: Jani Nikula 
Cc: Joonas Lahtinen 
Cc: Rodrigo Vivi 
Cc: intel-gfx@lists.freedesktop.org
Cc: Davidlohr Bueso 
Cc: Chris Wilson 
---
 drivers/gpu/drm/i915/selftests/scatterlist.c | 4 ++--
 lib/interval_tree_test.c | 7 ---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/scatterlist.c 
b/drivers/gpu/drm/i915/selftests/scatterlist.c
index d599186d5b714..be9ff9d03eada 100644
--- a/drivers/gpu/drm/i915/selftests/scatterlist.c
+++ b/drivers/gpu/drm/i915/selftests/scatterlist.c
@@ -195,13 +195,13 @@ static unsigned int random_page_size_pages(unsigned long 
n,
   struct rnd_state *rnd)
 {
/* 4K, 64K, 2M */
-   static unsigned int page_count[] = {
+   static const unsigned int page_count[] = {
BIT(12) >> PAGE_SHIFT,
BIT(16) >> PAGE_SHIFT,
BIT(21) >> PAGE_SHIFT,
};
 
-   return page_count[(prandom_u32_state(rnd) % 3)];
+   return page_count[reciprocal_scale(prandom_u32_state(rnd), 3)];
 }
 
 static inline bool page_contiguous(struct page *first,
diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c
index f37f4d44faa90..8c129c8c638b9 100644
--- a/lib/interval_tree_test.c
+++ b/lib/interval_tree_test.c
@@ -43,8 +43,8 @@ static void init(void)
int i;
 
for (i = 0; i < nnodes; i++) {
-   u32 b = (prandom_u32_state(&rnd) >> 4) % max_endpoint;
-   u32 a = (prandom_u32_state(&rnd) >> 4) % b;
+   u32 b = reciprocal_scale(prandom_u32_state(&rnd), max_endpoint);
+   u32 a = reciprocal_scale(prandom_u32_state(&rnd), b);
 
nodes[i].start = a;
nodes[i].last = b;
@@ -56,7 +56,8 @@ static void init(void)
 * which is pointless.
 */
for (i = 0; i < nsearches; i++)
-   queries[i] = (prandom_u32_state(&rnd) >> 4) % max_endpoint;
+   queries[i] = reciprocal_scale(prandom_u32_state(&rnd),
+ max_endpoint);
 }
 
 static int interval_tree_test_init(void)
-- 
2.26.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] Kernel 5.2 to current: possible i915 related problems

2020-03-30 Thread Dirk Gouders
Hello,

because of the current pandemic situation the usage of my laptop has
changed.  It is now running at home 24/7 with a monitor attached to it
and after about 12 days running a somewhat older kernel (5.2), it
stopped working.

After a reboot I found some information in the syslog that I attach to
this mail.  The next hang happened one day later but without any
information.

With a current 5.6.0-rc7+ I seem to get more frequent hangs but without any
information in the log file and somewhat non-reproducable.  Today, I
experienced two hangs when starting xterms or other programs but after
this (and necessary reboots) I am unable to reproduce a hang.

Perhaps, someone has suggestion for me how to produce debugging
information that survives the hangs and reboots.

Dirk


Mar 27 19:36:51 lena kernel: [drm:intel_cpu_fifo_underrun_irq_handler [i915]] 
*ERROR* CPU pipe B FIFO underrun
Mar 27 21:45:19 lena kernel: usb 1-1: USB disconnect, device number 15
Mar 27 21:45:19 lena kernel: sd 2:0:0:0: [sdb] Synchronizing SCSI cache
Mar 27 21:45:19 lena kernel: sd 2:0:0:0: [sdb] Synchronize Cache(10) failed: 
Result: hostbyte=DID_NO_CONNECT driverbyte=DRIVER_OK
Mar 27 22:00:53 lena kernel: [drm:intel_cpu_fifo_underrun_irq_handler [i915]] 
*ERROR* CPU pipe B FIFO underrun
Mar 27 23:46:13 lena kernel: [ cut here ]
Mar 27 23:46:13 lena kernel: vblank wait timed out on crtc 1
Mar 27 23:46:13 lena kernel: WARNING: CPU: 0 PID: 4221 at 
drm_wait_one_vblank+0xfa/0x12a [drm]
Mar 27 23:46:13 lena kernel: Modules linked in: usblp uas usb_storage uvcvideo 
videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videobuf2_common snd_hda_codec
_hdmi snd_hda_codec_realtek snd_hda_codec_generic crc32_pclmul crc32c_intel 
ghash_clmulni_intel i915 aesni_intel drm_kms_helper cfbfillrect crypto_simd 
glue_he
lper syscopyarea cfbimgblt sysfillrect sysimgblt snd_hda_intel fb_sys_fops 
cfbcopyarea snd_hda_codec sdhci_acpi drm xhci_pci snd_hwdep sdhci 
drm_panel_orientat
ion_quirks snd_hda_core intel_gtt mmc_core xhci_hcd iosf_mbi
Mar 27 23:46:13 lena kernel: CPU: 0 PID: 4221 Comm: X Not tainted 5.2.0+ #44
Mar 27 23:46:13 lena kernel: Hardware name: Acer Aspire ES1-131/Garp_BA, BIOS 
V1.23 06/22/2016
Mar 27 23:46:13 lena kernel: RIP: 0010:drm_wait_one_vblank+0xfa/0x12a [drm]
Mar 27 23:46:13 lena kernel: Code: 89 e7 e8 31 eb 74 e1 49 89 c4 eb bf 48 89 e6 
4c 89 f7 e8 d5 b5 ff e0 45 85 e4 75 10 89 de 48 c7 c7 cf de 0d a0 e8 2e bd fc e
0 <0f> 0b 89 de 48 89 ef e8 82 fe ff ff 48 8b 44 24 28 65 48 33 04 25
Mar 27 23:46:13 lena kernel: RSP: 0018:c9e73ac0 EFLAGS: 00010296
Mar 27 23:46:13 lena kernel: RAX:  RBX: 0001 RCX: 
0007
Mar 27 23:46:13 lena kernel: RDX:  RSI: 0002 RDI: 
888277a163a0
Mar 27 23:46:13 lena kernel: RBP: 888271b4 R08: 0306 R09: 
0001
Mar 27 23:46:13 lena kernel: R10: c9e739d0 R11: 000597d6da905e00 R12: 

Mar 27 23:46:13 lena kernel: R13: 00bd6280 R14: 8882765eb160 R15: 
0001
Mar 27 23:46:13 lena kernel: FS:  7f6d744bd200() 
GS:888277a0() knlGS:
Mar 27 23:46:13 lena kernel: CS:  0010 DS:  ES:  CR0: 80050033
Mar 27 23:46:13 lena kernel: CR2: 7f6d6f20d000 CR3: 000268a7 CR4: 
001006f0
Mar 27 23:46:13 lena kernel: Call Trace:
Mar 27 23:46:13 lena kernel: ? wait_woken+0x68/0x68
Mar 27 23:46:13 lena kernel: intel_pre_plane_update+0x165/0x1ea [i915]
Mar 27 23:46:13 lena kernel: intel_atomic_commit_tail+0xcb/0xf10 [i915]
Mar 27 23:46:13 lena kernel: ? flush_workqueue+0x2ab/0x2d4
Mar 27 23:46:13 lena kernel: intel_atomic_commit+0x23a/0x248 [i915]
Mar 27 23:46:13 lena kernel: drm_atomic_connector_commit_dpms+0xc0/0xda [drm]
Mar 27 23:46:13 lena kernel: drm_mode_obj_set_property_ioctl+0x133/0x241 [drm]
Mar 27 23:46:13 lena kernel: ? drm_connector_set_obj_prop+0x67/0x67 [drm]
Mar 27 23:46:13 lena kernel: drm_connector_property_set_ioctl+0x39/0x53 [drm]
Mar 27 23:46:13 lena kernel: drm_ioctl_kernel+0x8e/0xe2 [drm]
Mar 27 23:46:13 lena kernel: drm_ioctl+0x1fd/0x2dc [drm]
Mar 27 23:46:13 lena kernel: ? drm_connector_set_obj_prop+0x67/0x67 [drm]
Mar 27 23:46:13 lena kernel: ? hrtimer_cancel+0xc/0x16
Mar 27 23:46:13 lena kernel: ? schedule_hrtimeout_range_clock+0xb3/0xef
Mar 27 23:46:13 lena kernel: ? hrtimer_init+0x2/0x2
Mar 27 23:46:13 lena kernel: vfs_ioctl+0x19/0x26
Mar 27 23:46:13 lena kernel: do_vfs_ioctl+0x52c/0x554
Mar 27 23:46:13 lena kernel: ? wake_up_q+0x4e/0x4e
Mar 27 23:46:13 lena kernel: ksys_ioctl+0x39/0x58
Mar 27 23:46:13 lena kernel: __x64_sys_ioctl+0x11/0x14
Mar 27 23:46:13 lena kernel: do_syscall_64+0x4a/0xf4
Mar 27 23:46:13 lena kernel: entry_SYSCALL_64_after_hwframe+0x44/0xa9
Mar 27 23:46:13 lena kernel: RIP: 0033:0x7f6d74ce12b7
Mar 27 23:46:13 lena kernel: Code: 00 00 00 75 0c 48 c7 c0 ff ff ff ff 48 83 c4 
18

[Intel-gfx] [PATCH] drm/i915: check to see if the FPU is available before using it

2020-03-30 Thread Jason A. Donenfeld
It's not safe to just grab the FPU willy nilly without first checking to
see if it's available. This patch adds the usual call to may_use_simd()
and falls back to boring memcpy if it's not available.

Signed-off-by: Jason A. Donenfeld 
---
 drivers/gpu/drm/i915/i915_memcpy.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_memcpy.c 
b/drivers/gpu/drm/i915/i915_memcpy.c
index fdd550405fd3..7c0e022586bc 100644
--- a/drivers/gpu/drm/i915/i915_memcpy.c
+++ b/drivers/gpu/drm/i915/i915_memcpy.c
@@ -24,6 +24,7 @@
 
 #include 
 #include 
+#include 
 
 #include "i915_memcpy.h"
 
@@ -38,6 +39,12 @@ static DEFINE_STATIC_KEY_FALSE(has_movntdqa);
 #ifdef CONFIG_AS_MOVNTDQA
 static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len)
 {
+   if (unlikely(!may_use_simd())) {
+   memcpy(dst, src, len);
+   return;
+   }
+
+
kernel_fpu_begin();
 
while (len >= 4) {
@@ -67,6 +74,11 @@ static void __memcpy_ntdqa(void *dst, const void *src, 
unsigned long len)
 
 static void __memcpy_ntdqu(void *dst, const void *src, unsigned long len)
 {
+   if (unlikely(!may_use_simd())) {
+   memcpy(dst, src, len);
+   return;
+   }
+
kernel_fpu_begin();
 
while (len >= 4) {
-- 
2.26.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] Kernel 5.2 to current: possible i915 related problems

2020-03-30 Thread Dirk Gouders
Dirk Gouders  writes:

> Some additional information:
>
> I tried to get more information by using netconsole with kernel
> 5.6.0-rc7+.  After some time, the system stopped to respond and I
> checked the messages sent to the remote machine.  Unfortunately they
> gave no other information than the local logfile.
>
> Dirk
>
> Dirk Gouders  writes:
>
>> Hello,
>>
>> because of the current pandemic situation the usage of my laptop has
>> changed.  It is now running at home 24/7 with a monitor attached to it
>> and after about 12 days running a somewhat older kernel (5.2), it
>> stopped working.
>>
>> After a reboot I found some information in the syslog that I attach to
>> this mail.  The next hang happened one day later but without any
>> information.
>>
>> With a current 5.6.0-rc7+ I seem to get more frequent hangs but without any
>> information in the log file and somewhat non-reproducable.  Today, I
>> experienced two hangs when starting xterms or other programs but after
>> this (and necessary reboots) I am unable to reproduce a hang.
>>
>> Perhaps, someone has suggestion for me how to produce debugging
>> information that survives the hangs and reboots.
>>

This time, I have some information from a hang with the current kernel
5.6.0-rc7+ that obviously could be written to the logfile while the
system was starting to get problems.  A minute later or so, it
completely stopped to respond and a hard reset was necessary:

Mar 30 10:37:52 lena kernel: i915 :00:02.0: GPU HANG: ecode 8:1:85d9, 
in X [4278]
Mar 30 10:37:52 lena kernel: GPU hangs can indicate a bug anywhere in the 
entire gfx stack, including userspace.
Mar 30 10:37:52 lena kernel: Please file a _new_ bug report at 
https://gitlab.freedesktop.org/drm/intel/issues/new.
Mar 30 10:37:52 lena kernel: Please see 
https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs for 
details.
Mar 30 10:37:52 lena kernel: drm/i915 developers can then reassign to the right 
component if it's not a kernel issue.
Mar 30 10:37:52 lena kernel: The GPU crash dump is required to analyze GPU 
hangs, so please always attach it.
Mar 30 10:37:52 lena kernel: GPU crash dump saved to /sys/class/drm/card0/error
Mar 30 10:37:52 lena kernel: i915 :00:02.0: Resetting rcs0 for stopped 
heartbeat on rcs0
Mar 30 10:37:52 lena kernel: i915 :00:02.0: X[4278] context reset due to 
GPU hang

Adding the i915 maintainers because I doubt the data in
/sys/class/drm/card0/error is useful after a reboot.  Should I file a
bug report as suggested above, anyway?

Dirk

>>
>> 
>> Mar 27 19:36:51 lena kernel: [drm:intel_cpu_fifo_underrun_irq_handler
>> [i915]] *ERROR* CPU pipe B FIFO underrun
>> Mar 27 21:45:19 lena kernel: usb 1-1: USB disconnect, device number 15
>> Mar 27 21:45:19 lena kernel: sd 2:0:0:0: [sdb] Synchronizing SCSI cache
>> Mar 27 21:45:19 lena kernel: sd 2:0:0:0: [sdb] Synchronize Cache(10)
>> failed: Result: hostbyte=DID_NO_CONNECT driverbyte=DRIVER_OK
>> Mar 27 22:00:53 lena kernel: [drm:intel_cpu_fifo_underrun_irq_handler
>> [i915]] *ERROR* CPU pipe B FIFO underrun
>> Mar 27 23:46:13 lena kernel: [ cut here ]
>> Mar 27 23:46:13 lena kernel: vblank wait timed out on crtc 1
>> Mar 27 23:46:13 lena kernel: WARNING: CPU: 0 PID: 4221 at 
>> drm_wait_one_vblank+0xfa/0x12a [drm]
>> Mar 27 23:46:13 lena kernel: Modules linked in: usblp uas usb_storage
>> uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_v4l2
>> videobuf2_common snd_hda_codec
>> _hdmi snd_hda_codec_realtek snd_hda_codec_generic crc32_pclmul
>> crc32c_intel ghash_clmulni_intel i915 aesni_intel drm_kms_helper
>> cfbfillrect crypto_simd glue_he
>> lper syscopyarea cfbimgblt sysfillrect sysimgblt snd_hda_intel
>> fb_sys_fops cfbcopyarea snd_hda_codec sdhci_acpi drm xhci_pci
>> snd_hwdep sdhci drm_panel_orientat
>> ion_quirks snd_hda_core intel_gtt mmc_core xhci_hcd iosf_mbi
>> Mar 27 23:46:13 lena kernel: CPU: 0 PID: 4221 Comm: X Not tainted 5.2.0+ #44
>> Mar 27 23:46:13 lena kernel: Hardware name: Acer Aspire ES1-131/Garp_BA, 
>> BIOS V1.23 06/22/2016
>> Mar 27 23:46:13 lena kernel: RIP: 0010:drm_wait_one_vblank+0xfa/0x12a [drm]
>> Mar 27 23:46:13 lena kernel: Code: 89 e7 e8 31 eb 74 e1 49 89 c4 eb bf
>> 48 89 e6 4c 89 f7 e8 d5 b5 ff e0 45 85 e4 75 10 89 de 48 c7 c7 cf de
>> 0d a0 e8 2e bd fc e
>> 0 <0f> 0b 89 de 48 89 ef e8 82 fe ff ff 48 8b 44 24 28 65 48 33 04 25
>> Mar 27 23:46:13 lena kernel: RSP: 0018:c9e73ac0 EFLAGS: 00010296
>> Mar 27 23:46:13 lena kernel: RAX:  RBX: 0001 
>> RCX: 0007
>> Mar 27 23:46:13 lena kernel: RDX:  RSI: 0002 
>> RDI: 888277a163a0
>> Mar 27 23:46:13 lena kernel: RBP: 888271b4 R08: 0306 
>> R09: 0001
>> Mar 27 23:46:13 lena kernel: R10: c9e739d0 R11: 000597d6da905e00 
>> R12: 
>> Mar 27 23:46:13 lena kernel: R13: 00bd6280

Re: [Intel-gfx] Kernel 5.2 to current: possible i915 related problems

2020-03-30 Thread Dirk Gouders
Some additional information:

I tried to get more information by using netconsole with kernel
5.6.0-rc7+.  After some time, the system stopped to respond and I
checked the messages sent to the remote machine.  Unfortunately they
gave no other information than the local logfile.

Dirk

Dirk Gouders  writes:

> Hello,
>
> because of the current pandemic situation the usage of my laptop has
> changed.  It is now running at home 24/7 with a monitor attached to it
> and after about 12 days running a somewhat older kernel (5.2), it
> stopped working.
>
> After a reboot I found some information in the syslog that I attach to
> this mail.  The next hang happened one day later but without any
> information.
>
> With a current 5.6.0-rc7+ I seem to get more frequent hangs but without any
> information in the log file and somewhat non-reproducable.  Today, I
> experienced two hangs when starting xterms or other programs but after
> this (and necessary reboots) I am unable to reproduce a hang.
>
> Perhaps, someone has suggestion for me how to produce debugging
> information that survives the hangs and reboots.
>
> Dirk
>
> 
> Mar 27 19:36:51 lena kernel: [drm:intel_cpu_fifo_underrun_irq_handler
> [i915]] *ERROR* CPU pipe B FIFO underrun
> Mar 27 21:45:19 lena kernel: usb 1-1: USB disconnect, device number 15
> Mar 27 21:45:19 lena kernel: sd 2:0:0:0: [sdb] Synchronizing SCSI cache
> Mar 27 21:45:19 lena kernel: sd 2:0:0:0: [sdb] Synchronize Cache(10)
> failed: Result: hostbyte=DID_NO_CONNECT driverbyte=DRIVER_OK
> Mar 27 22:00:53 lena kernel: [drm:intel_cpu_fifo_underrun_irq_handler
> [i915]] *ERROR* CPU pipe B FIFO underrun
> Mar 27 23:46:13 lena kernel: [ cut here ]
> Mar 27 23:46:13 lena kernel: vblank wait timed out on crtc 1
> Mar 27 23:46:13 lena kernel: WARNING: CPU: 0 PID: 4221 at 
> drm_wait_one_vblank+0xfa/0x12a [drm]
> Mar 27 23:46:13 lena kernel: Modules linked in: usblp uas usb_storage
> uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_v4l2
> videobuf2_common snd_hda_codec
> _hdmi snd_hda_codec_realtek snd_hda_codec_generic crc32_pclmul
> crc32c_intel ghash_clmulni_intel i915 aesni_intel drm_kms_helper
> cfbfillrect crypto_simd glue_he
> lper syscopyarea cfbimgblt sysfillrect sysimgblt snd_hda_intel
> fb_sys_fops cfbcopyarea snd_hda_codec sdhci_acpi drm xhci_pci
> snd_hwdep sdhci drm_panel_orientat
> ion_quirks snd_hda_core intel_gtt mmc_core xhci_hcd iosf_mbi
> Mar 27 23:46:13 lena kernel: CPU: 0 PID: 4221 Comm: X Not tainted 5.2.0+ #44
> Mar 27 23:46:13 lena kernel: Hardware name: Acer Aspire ES1-131/Garp_BA, BIOS 
> V1.23 06/22/2016
> Mar 27 23:46:13 lena kernel: RIP: 0010:drm_wait_one_vblank+0xfa/0x12a [drm]
> Mar 27 23:46:13 lena kernel: Code: 89 e7 e8 31 eb 74 e1 49 89 c4 eb bf
> 48 89 e6 4c 89 f7 e8 d5 b5 ff e0 45 85 e4 75 10 89 de 48 c7 c7 cf de
> 0d a0 e8 2e bd fc e
> 0 <0f> 0b 89 de 48 89 ef e8 82 fe ff ff 48 8b 44 24 28 65 48 33 04 25
> Mar 27 23:46:13 lena kernel: RSP: 0018:c9e73ac0 EFLAGS: 00010296
> Mar 27 23:46:13 lena kernel: RAX:  RBX: 0001 RCX: 
> 0007
> Mar 27 23:46:13 lena kernel: RDX:  RSI: 0002 RDI: 
> 888277a163a0
> Mar 27 23:46:13 lena kernel: RBP: 888271b4 R08: 0306 R09: 
> 0001
> Mar 27 23:46:13 lena kernel: R10: c9e739d0 R11: 000597d6da905e00 R12: 
> 
> Mar 27 23:46:13 lena kernel: R13: 00bd6280 R14: 8882765eb160 R15: 
> 0001
> Mar 27 23:46:13 lena kernel: FS:  7f6d744bd200() 
> GS:888277a0() knlGS:
> Mar 27 23:46:13 lena kernel: CS:  0010 DS:  ES:  CR0: 80050033
> Mar 27 23:46:13 lena kernel: CR2: 7f6d6f20d000 CR3: 000268a7 CR4: 
> 001006f0
> Mar 27 23:46:13 lena kernel: Call Trace:
> Mar 27 23:46:13 lena kernel: ? wait_woken+0x68/0x68
> Mar 27 23:46:13 lena kernel: intel_pre_plane_update+0x165/0x1ea [i915]
> Mar 27 23:46:13 lena kernel: intel_atomic_commit_tail+0xcb/0xf10 [i915]
> Mar 27 23:46:13 lena kernel: ? flush_workqueue+0x2ab/0x2d4
> Mar 27 23:46:13 lena kernel: intel_atomic_commit+0x23a/0x248 [i915]
> Mar 27 23:46:13 lena kernel: drm_atomic_connector_commit_dpms+0xc0/0xda [drm]
> Mar 27 23:46:13 lena kernel: drm_mode_obj_set_property_ioctl+0x133/0x241 [drm]
> Mar 27 23:46:13 lena kernel: ? drm_connector_set_obj_prop+0x67/0x67 [drm]
> Mar 27 23:46:13 lena kernel: drm_connector_property_set_ioctl+0x39/0x53 [drm]
> Mar 27 23:46:13 lena kernel: drm_ioctl_kernel+0x8e/0xe2 [drm]
> Mar 27 23:46:13 lena kernel: drm_ioctl+0x1fd/0x2dc [drm]
> Mar 27 23:46:13 lena kernel: ? drm_connector_set_obj_prop+0x67/0x67 [drm]
> Mar 27 23:46:13 lena kernel: ? hrtimer_cancel+0xc/0x16
> Mar 27 23:46:13 lena kernel: ? schedule_hrtimeout_range_clock+0xb3/0xef
> Mar 27 23:46:13 lena kernel: ? hrtimer_init+0x2/0x2
> Mar 27 23:46:13 lena kernel: vfs_ioctl+0x19/0x26
> Mar 

Re: [Intel-gfx] rcu_barrier() no longer allowed within mmap_sem?

2020-03-30 Thread Paul E. McKenney
On Mon, Mar 30, 2020 at 03:00:35PM +0200, Daniel Vetter wrote:
> Hi all, for all = rcu, cpuhotplug and perf maintainers
> 
> We've hit an interesting new lockdep splat in our drm/i915 CI:
> 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17096/shard-tglb7/igt@kms_frontbuffer_track...@fbcpsr-rgb101010-draw-mmap-gtt.html#dmesg-warnings861
> 
> Summarizing away the driver parts we have
> 
> < gpu locks which are held within mm->mmap_sem in various gpu fault handlers >
> 
> -> #4 (&mm->mmap_sem#2){}:
> <4> [604.892615] __might_fault+0x63/0x90
> <4> [604.892617] _copy_to_user+0x1e/0x80
> <4> [604.892619] perf_read+0x200/0x2b0
> <4> [604.892621] vfs_read+0x96/0x160
> <4> [604.892622] ksys_read+0x9f/0xe0
> <4> [604.892623] do_syscall_64+0x4f/0x220
> <4> [604.892624] entry_SYSCALL_64_after_hwframe+0x49/0xbe
> <4> [604.892625]
> -> #3 (&cpuctx_mutex){+.+.}:
> <4> [604.892626] __mutex_lock+0x9a/0x9c0
> <4> [604.892627] perf_event_init_cpu+0xa4/0x140
> <4> [604.892629] perf_event_init+0x19d/0x1cd
> <4> [604.892630] start_kernel+0x362/0x4e4
> <4> [604.892631] secondary_startup_64+0xa4/0xb0
> <4> [604.892631]
> -> #2 (pmus_lock){+.+.}:
> <4> [604.892633] __mutex_lock+0x9a/0x9c0
> <4> [604.892633] perf_event_init_cpu+0x6b/0x140
> <4> [604.892635] cpuhp_invoke_callback+0x9b/0x9d0
> <4> [604.892636] _cpu_up+0xa2/0x140
> <4> [604.892637] do_cpu_up+0x61/0xa0
> <4> [604.892639] smp_init+0x57/0x96
> <4> [604.892639] kernel_init_freeable+0x87/0x1dc
> <4> [604.892640] kernel_init+0x5/0x100
> <4> [604.892642] ret_from_fork+0x24/0x50
> <4> [604.892642]
> -> #1 (cpu_hotplug_lock.rw_sem){}:
> <4> [604.892643] cpus_read_lock+0x34/0xd0
> <4> [604.892644] rcu_barrier+0xaa/0x190
> <4> [604.892645] kernel_init+0x21/0x100
> <4> [604.892647] ret_from_fork+0x24/0x50
> <4> [604.892647]
> -> #0 (rcu_state.barrier_mutex){+.+.}:
> <4> [604.892649] __lock_acquire+0x1328/0x15d0
> <4> [604.892650] lock_acquire+0xa7/0x1c0
> <4> [604.892651] __mutex_lock+0x9a/0x9c0
> <4> [604.892652] rcu_barrier+0x23/0x190
> <4> [604.892680] i915_gem_object_unbind+0x29d/0x3f0 [i915]
> <4> [604.892707] i915_gem_object_pin_to_display_plane+0x141/0x270 [i915]
> <4> [604.892737] intel_pin_and_fence_fb_obj+0xec/0x1f0 [i915]
> <4> [604.892767] intel_plane_pin_fb+0x3f/0xd0 [i915]
> <4> [604.892797] intel_prepare_plane_fb+0x13b/0x5c0 [i915]
> <4> [604.892798] drm_atomic_helper_prepare_planes+0x85/0x110
> <4> [604.892827] intel_atomic_commit+0xda/0x390 [i915]
> <4> [604.892828] drm_atomic_helper_set_config+0x57/0xa0
> <4> [604.892830] drm_mode_setcrtc+0x1c4/0x720
> <4> [604.892830] drm_ioctl_kernel+0xb0/0xf0
> <4> [604.892831] drm_ioctl+0x2e1/0x390
> <4> [604.892833] ksys_ioctl+0x7b/0x90
> <4> [604.892835] __x64_sys_ioctl+0x11/0x20
> <4> [604.892835] do_syscall_64+0x4f/0x220
> <4> [604.892836] entry_SYSCALL_64_after_hwframe+0x49/0xbe
> 
> The last backtrace boils down to i915 driver code which holds the same
> locks we are holding within mm->mmap_sem, and then ends up calling
> rcu_barrier(). From what I can see i915 is just the messenger here,
> any driver with this pattern of a lock held within mmap_sem which also
> has a path of calling rcu_barrier while holding that lock should be
> hitting this splat.
> 
> Two questions:
> - This suggests that calling rcu_barrier() isn't ok anymore while
> holding mmap_sem, or anything that has a dependency upon mmap_sem. I
> guess that's not the idea, please confirm.
> - Assuming this depedency is indeed not intended, where should the
> loop be broken? It goes through perf, cpuhotplug and rcu subsystems,
> and I don't have a clue about any of those.

Indeed, rcu_barrier() excludes CPU hotplug in order to eliminate a number
of interesting races.

Am I interpreting the above trace correctly in thinking that the various
calls to cpus_read_lock() are with mmap_sem held?  If so, can the calls
to rcu_barrier() be moved out from under the regions of code protected
by cpus_read_lock()?  Invoking rcu_barrier() with cpus_read_lock() held
is an immediate self-deadlock.

Or is rcu_barrier() somehow indirectly sometimes acquiring mmap_sem
or pmus_lock?  (Not seeing it myself, but...)

Thanx, Paul
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC PATCH v1 03/50] fault-inject: Shrink struct fault_attr

2020-03-30 Thread George Spelvin
Probaility has a useful range of 0 to 100.
Verbose has a useful range of 0 to 2.

Reduce them both from unsigned long to unsigned char.  Since there
was already a hole they can fit into, this saves 16 bytes.

There's one consequential fix required: i915 selftests set the
probability to 999 for some reason, which had the same effect as
100.  Leaving it alone would have worked with a compiler warning
(999 % 256 = 231 is also >= 100, so would have the same effect),
but it seemed better to clean it up.

Signed-off-by: George Spelvin 
Cc: Akinobu Mita 
Cc: Jani Nikula 
Cc: Joonas Lahtinen 
Cc: Rodrigo Vivi 
Cc: intel-gfx@lists.freedesktop.org
---
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |  2 +-
 include/linux/fault-inject.h  |  4 ++--
 lib/fault-inject.c| 10 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 3f7e80fb3bbd1..3342e3c0ec10b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -901,7 +901,7 @@ static int shrink_hole(struct drm_i915_private *i915,
unsigned long prime;
int err;
 
-   vm->fault_attr.probability = 999;
+   vm->fault_attr.probability = 100;
atomic_set(&vm->fault_attr.times, -1);
 
for_each_prime_number_from(prime, 0, ULONG_MAX - 1) {
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index e525f6957c49f..094dd7d2a2ce6 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -14,11 +14,11 @@
  * Documentation/fault-injection/fault-injection.rst
  */
 struct fault_attr {
-   unsigned long probability;
unsigned long interval;
atomic_t times;
atomic_t space;
-   unsigned long verbose;
+   unsigned char probability;
+   unsigned char verbose;
bool task_filter;
unsigned long stacktrace_depth;
unsigned long require_start;
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index e20151fa5515e..406e27ba8e60f 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -17,13 +17,13 @@
  */
 int setup_fault_attr(struct fault_attr *attr, char *str)
 {
-   unsigned long probability;
+   unsigned char probability;
unsigned long interval;
int times;
int space;
 
/* ",,," */
-   if (sscanf(str, "%lu,%lu,%d,%d",
+   if (sscanf(str, "%lu,%hhu,%d,%d",
&interval, &probability, &space, ×) < 4) {
printk(KERN_WARNING
"FAULT_INJECTION: failed to parse arguments\n");
@@ -43,7 +43,7 @@ static void fail_dump(struct fault_attr *attr)
 {
if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
-  "name %pd, interval %lu, probability %lu, "
+  "name %pd, interval %lu, probability %d, "
   "space %d, times %d\n", attr->dname,
   attr->interval, attr->probability,
   atomic_read(&attr->space),
@@ -204,11 +204,11 @@ struct dentry *fault_create_debugfs_attr(const char *name,
if (IS_ERR(dir))
return dir;
 
-   debugfs_create_ul("probability", mode, dir, &attr->probability);
+   debugfs_create_u8("probability", mode, dir, &attr->probability);
debugfs_create_ul("interval", mode, dir, &attr->interval);
debugfs_create_atomic_t("times", mode, dir, &attr->times);
debugfs_create_atomic_t("space", mode, dir, &attr->space);
-   debugfs_create_ul("verbose", mode, dir, &attr->verbose);
+   debugfs_create_u8("verbose", mode, dir, &attr->verbose);
debugfs_create_u32("verbose_ratelimit_interval_ms", mode, dir,
   &attr->ratelimit_state.interval);
debugfs_create_u32("verbose_ratelimit_burst", mode, dir,
-- 
2.26.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915/huc: Add more errors for I915_PARAM_HUC_STATUS

2020-03-30 Thread Patchwork
== Series Details ==

Series: drm/i915/huc: Add more errors for I915_PARAM_HUC_STATUS
URL   : https://patchwork.freedesktop.org/series/75230/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_8213 -> Patchwork_17127


Summary
---

  **SUCCESS**

  No regressions found.

  External URL: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17127/index.html

Known issues


  Here are the changes found in Patchwork_17127 that come from known issues:

### IGT changes ###

 Issues hit 

  * igt@i915_selftest@live@execlists:
- fi-icl-y:   [PASS][1] -> [DMESG-FAIL][2] ([fdo#108569])
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8213/fi-icl-y/igt@i915_selftest@l...@execlists.html
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17127/fi-icl-y/igt@i915_selftest@l...@execlists.html

  
 Possible fixes 

  * igt@i915_selftest@live@execlists:
- fi-bxt-dsi: [INCOMPLETE][3] ([i915#656]) -> [PASS][4]
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8213/fi-bxt-dsi/igt@i915_selftest@l...@execlists.html
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17127/fi-bxt-dsi/igt@i915_selftest@l...@execlists.html

  
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [i915#656]: https://gitlab.freedesktop.org/drm/intel/issues/656


Participating hosts (47 -> 42)
--

  Additional (5): fi-hsw-4770r fi-bsw-n3050 fi-byt-j1900 fi-glk-dsi fi-tgl-y 
  Missing(10): fi-ilk-m540 fi-hsw-4200u fi-skl-6770hq fi-byt-squawks 
fi-bsw-cyan fi-ctg-p8600 fi-gdg-551 fi-bsw-kefka fi-byt-clapper fi-bdw-samus 


Build changes
-

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_8213 -> Patchwork_17127

  CI-20190529: 20190529
  CI_DRM_8213: 3cebf14c87d0d4508d4cc9c49db14061af752c37 @ 
git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5544: 477c562fc9932939083d732b77dd7b083c6bc0a1 @ 
git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_17127: 05764bf4913c5b0ea98ec5ffe935b58f12623805 @ 
git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

05764bf4913c drm/i915/huc: Add more errors for I915_PARAM_HUC_STATUS

== Logs ==

For more details see: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17127/index.html
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 02/22] perf/core: Only copy-to-user after completely unlocking all locks. (CI test)

2020-03-30 Thread Maarten Lankhorst
We inadvertently create a dependency on mmap_sem with a whole chain.

This breaks any user who wants to take a lock and call rcu_barrier(),
while also taking that lock inside mmap_sem:

<4> [604.892532] ==
<4> [604.892534] WARNING: possible circular locking dependency detected
<4> [604.892536] 5.6.0-rc7-CI-Patchwork_17096+ #1 Tainted: G U
<4> [604.892537] --
<4> [604.892538] kms_frontbuffer/2595 is trying to acquire lock:
<4> [604.892540] 8264a558 (rcu_state.barrier_mutex){+.+.}, at: 
rcu_barrier+0x23/0x190
<4> [604.892547]
but task is already holding lock:
<4> [604.892547] 888484716050 (reservation_ww_class_mutex){+.+.}, at: 
i915_gem_object_pin_to_display_plane+0x89/0x270 [i915]
<4> [604.892592]
which lock already depends on the new lock.
<4> [604.892593]
the existing dependency chain (in reverse order) is:
<4> [604.892594]
-> #6 (reservation_ww_class_mutex){+.+.}:
<4> [604.892597]__ww_mutex_lock.constprop.15+0xc3/0x1090
<4> [604.892598]ww_mutex_lock+0x39/0x70
<4> [604.892600]dma_resv_lockdep+0x10e/0x1f5
<4> [604.892602]do_one_initcall+0x58/0x300
<4> [604.892604]kernel_init_freeable+0x17b/0x1dc
<4> [604.892605]kernel_init+0x5/0x100
<4> [604.892606]ret_from_fork+0x24/0x50
<4> [604.892607]
-> #5 (reservation_ww_class_acquire){+.+.}:
<4> [604.892609]dma_resv_lockdep+0xec/0x1f5
<4> [604.892610]do_one_initcall+0x58/0x300
<4> [604.892610]kernel_init_freeable+0x17b/0x1dc
<4> [604.892611]kernel_init+0x5/0x100
<4> [604.892612]ret_from_fork+0x24/0x50
<4> [604.892613]
-> #4 (&mm->mmap_sem#2){}:
<4> [604.892615]__might_fault+0x63/0x90
<4> [604.892617]_copy_to_user+0x1e/0x80
<4> [604.892619]perf_read+0x200/0x2b0
<4> [604.892621]vfs_read+0x96/0x160
<4> [604.892622]ksys_read+0x9f/0xe0
<4> [604.892623]do_syscall_64+0x4f/0x220
<4> [604.892624]entry_SYSCALL_64_after_hwframe+0x49/0xbe
<4> [604.892625]
-> #3 (&cpuctx_mutex){+.+.}:
<4> [604.892626]__mutex_lock+0x9a/0x9c0
<4> [604.892627]perf_event_init_cpu+0xa4/0x140
<4> [604.892629]perf_event_init+0x19d/0x1cd
<4> [604.892630]start_kernel+0x362/0x4e4
<4> [604.892631]secondary_startup_64+0xa4/0xb0
<4> [604.892631]
-> #2 (pmus_lock){+.+.}:
<4> [604.892633]__mutex_lock+0x9a/0x9c0
<4> [604.892633]perf_event_init_cpu+0x6b/0x140
<4> [604.892635]cpuhp_invoke_callback+0x9b/0x9d0
<4> [604.892636]_cpu_up+0xa2/0x140
<4> [604.892637]do_cpu_up+0x61/0xa0
<4> [604.892639]smp_init+0x57/0x96
<4> [604.892639]kernel_init_freeable+0x87/0x1dc
<4> [604.892640]kernel_init+0x5/0x100
<4> [604.892642]ret_from_fork+0x24/0x50
<4> [604.892642]
-> #1 (cpu_hotplug_lock.rw_sem){}:
<4> [604.892643]cpus_read_lock+0x34/0xd0
<4> [604.892644]rcu_barrier+0xaa/0x190
<4> [604.892645]kernel_init+0x21/0x100
<4> [604.892647]ret_from_fork+0x24/0x50
<4> [604.892647]
-> #0 (rcu_state.barrier_mutex){+.+.}:
<4> [604.892649]__lock_acquire+0x1328/0x15d0
<4> [604.892650]lock_acquire+0xa7/0x1c0
<4> [604.892651]__mutex_lock+0x9a/0x9c0
<4> [604.892652]rcu_barrier+0x23/0x190
<4> [604.892680]i915_gem_object_unbind+0x29d/0x3f0 [i915]
<4> [604.892707]i915_gem_object_pin_to_display_plane+0x141/0x270 [i915]
<4> [604.892737]intel_pin_and_fence_fb_obj+0xec/0x1f0 [i915]
<4> [604.892767]intel_plane_pin_fb+0x3f/0xd0 [i915]
<4> [604.892797]intel_prepare_plane_fb+0x13b/0x5c0 [i915]
<4> [604.892798]drm_atomic_helper_prepare_planes+0x85/0x110
<4> [604.892827]intel_atomic_commit+0xda/0x390 [i915]
<4> [604.892828]drm_atomic_helper_set_config+0x57/0xa0
<4> [604.892830]drm_mode_setcrtc+0x1c4/0x720
<4> [604.892830]drm_ioctl_kernel+0xb0/0xf0
<4> [604.892831]drm_ioctl+0x2e1/0x390
<4> [604.892833]ksys_ioctl+0x7b/0x90
<4> [604.892835]__x64_sys_ioctl+0x11/0x20
<4> [604.892835]do_syscall_64+0x4f/0x220
<4> [604.892836]entry_SYSCALL_64_after_hwframe+0x49/0xbe
<4> [604.892837]

Signed-off-by: Maarten Lankhorst 
---
 kernel/events/core.c | 59 +++-
 1 file changed, 31 insertions(+), 28 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 085d9263d595..8b95a6512e31 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4926,20 +4926,20 @@ static int __perf_read_group_add(struct perf_event 
*leader,
 }
 
 static int perf_read_group(struct perf_event *event,
-  u64 read_format, char __user *buf)
+  u64 read_format, char __user *buf,
+  u64 **values)
 {
struct perf_event *leader = event->group_leader, *child;
struc

[Intel-gfx] [PATCH 12/22] drm/i915: Make sure execbuffer always passes ww state to i915_vma_pin.

2020-03-30 Thread Maarten Lankhorst
As a preparation step for full object locking and wait/wound handling
during pin and object mapping, ensure that we always pass the ww context
in i915_gem_execbuffer.c to i915_vma_pin, use lockdep to ensure this
happens.

This also requires changing the order of eb_parse slightly, to ensure
we pass ww at a point where we could still handle -EDEADLK safely.

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/display/intel_display.c  |   2 +-
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |   4 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 125 ++
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  |   4 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.h  |   4 +-
 drivers/gpu/drm/i915/gt/intel_context.c   |  65 +
 drivers/gpu/drm/i915/gt/intel_context.h   |  13 ++
 drivers/gpu/drm/i915/gt/intel_context_types.h |   3 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c |   2 +-
 drivers/gpu/drm/i915/gt/intel_gt.c|   2 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c   |   5 +-
 drivers/gpu/drm/i915/gt/intel_renderstate.c   |   2 +-
 drivers/gpu/drm/i915/gt/intel_ring.c  |  10 +-
 drivers/gpu/drm/i915/gt/intel_ring.h  |   3 +-
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  15 +--
 drivers/gpu/drm/i915/gt/intel_timeline.c  |  12 +-
 drivers/gpu/drm/i915/gt/intel_timeline.h  |   3 +-
 drivers/gpu/drm/i915/gt/mock_engine.c |   3 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |   4 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc.c|   2 +-
 drivers/gpu/drm/i915/i915_drv.h   |  13 +-
 drivers/gpu/drm/i915/i915_gem.c   |  11 +-
 drivers/gpu/drm/i915/i915_vma.c   |  13 +-
 drivers/gpu/drm/i915/i915_vma.h   |  13 +-
 24 files changed, 207 insertions(+), 126 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index 2e2e5ce82dc2..a429e90956f5 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -3441,7 +3441,7 @@ initial_plane_vma(struct drm_i915_private *i915,
if (IS_ERR(vma))
goto err_obj;
 
-   if (i915_ggtt_pin(vma, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base))
+   if (i915_ggtt_pin(vma, NULL, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base))
goto err_obj;
 
if (i915_gem_object_is_tiled(obj) &&
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 062848951095..f5b01e70eb61 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1145,7 +1145,7 @@ static int context_barrier_task(struct i915_gem_context 
*ctx,
 
i915_gem_ww_ctx_init(&ww, true);
 retry:
-   err = intel_context_pin(ce);
+   err = intel_context_pin_ww(ce, &ww);
if (err)
goto err;
 
@@ -1238,7 +1238,7 @@ static int pin_ppgtt_update(struct intel_context *ce, 
struct i915_gem_ww_ctx *ww
 
if (!HAS_LOGICAL_RING_CONTEXTS(vm->i915))
/* ppGTT is not part of the legacy context image */
-   return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm));
+   return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm), ww);
 
return 0;
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 05f6e1a94977..0a2121429913 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -394,7 +394,7 @@ eb_pin_vma(struct i915_execbuffer *eb,
if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT))
pin_flags |= PIN_GLOBAL;
 
-   if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags)))
+   if (unlikely(i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags)))
return false;
 
if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
@@ -535,7 +535,7 @@ static inline int use_cpu_reloc(const struct reloc_cache 
*cache,
obj->cache_level != I915_CACHE_NONE);
 }
 
-static int eb_reserve_vma(const struct i915_execbuffer *eb,
+static int eb_reserve_vma(struct i915_execbuffer *eb,
  struct eb_vma *ev,
  u64 pin_flags)
 {
@@ -569,7 +569,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
return err;
}
 
-   err = i915_vma_pin(vma,
+   err = i915_vma_pin_ww(vma, &eb->ww,
   entry->pad_to_size, entry->alignment,
   pin_flags);
if (err)
@@ -1060,9 +1060,10 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
 }
 
 static void *reloc_iomap(struct drm_i915_gem_object *obj,
-struct reloc_cache *cache,
+struct i915_execbuffer *eb,
 unsigned long page)
 {
+   struct reloc_cache *cache = &eb-

[Intel-gfx] [PATCH 16/22] drm/i915: Dirty hack to fix selftests locking inversion

2020-03-30 Thread Maarten Lankhorst
Some i915 selftests still use i915_vma_lock() as inner lock, and
intel_context_create_request() intel_timeline->mutex as outer lock.
Fortunately for selftests this is not an issue, they should be fixed
but we can move ahead and cleanify lockdep now.

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gt/intel_context.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 113d0bda1bcf..5c7acddf9651 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -460,6 +460,18 @@ struct i915_request *intel_context_create_request(struct 
intel_context *ce)
rq = i915_request_create(ce);
intel_context_unpin(ce);
 
+   if (IS_ERR(rq))
+   return rq;
+
+   /*
+* timeline->mutex should be the inner lock, but is used as outer lock.
+* Hack around this to shut up lockdep in selftests..
+*/
+   lockdep_unpin_lock(&ce->timeline->mutex, rq->cookie);
+   mutex_release(&ce->timeline->mutex.dep_map, _RET_IP_);
+   mutex_acquire(&ce->timeline->mutex.dep_map, SINGLE_DEPTH_NESTING, 0, 
_RET_IP_);
+   rq->cookie = lockdep_pin_lock(&ce->timeline->mutex);
+
return rq;
 }
 
-- 
2.25.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 01/22] Revert "drm/i915/gem: Drop relocation slowpath"

2020-03-30 Thread Maarten Lankhorst
This reverts commit 7dc8f1143778 ("drm/i915/gem: Drop relocation
slowpath"). We need the slowpath relocation for taking ww-mutex
inside the page fault handler, and we will take this mutex when
pinning all objects.

Cc: Chris Wilson 
Cc: Matthew Auld 
Signed-off-by: Maarten Lankhorst 
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 239 +-
 1 file changed, 235 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index f347e595a773..347c929b508d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1480,7 +1480,9 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, 
struct eb_vma *ev)
 * we would try to acquire the struct mutex again. Obviously
 * this is bad and so lockdep complains vehemently.
 */
-   copied = __copy_from_user(r, urelocs, count * sizeof(r[0]));
+   pagefault_disable();
+   copied = __copy_from_user_inatomic(r, urelocs, count * 
sizeof(r[0]));
+   pagefault_enable();
if (unlikely(copied)) {
remain = -EFAULT;
goto out;
@@ -1530,6 +1532,236 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, 
struct eb_vma *ev)
return remain;
 }
 
+static int
+eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev)
+{
+   const struct drm_i915_gem_exec_object2 *entry = ev->exec;
+   struct drm_i915_gem_relocation_entry *relocs =
+   u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
+   unsigned int i;
+   int err;
+
+   for (i = 0; i < entry->relocation_count; i++) {
+   u64 offset = eb_relocate_entry(eb, ev, &relocs[i]);
+
+   if ((s64)offset < 0) {
+   err = (int)offset;
+   goto err;
+   }
+   }
+   err = 0;
+err:
+   reloc_cache_reset(&eb->reloc_cache);
+   return err;
+}
+
+static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
+{
+   const char __user *addr, *end;
+   unsigned long size;
+   char __maybe_unused c;
+
+   size = entry->relocation_count;
+   if (size == 0)
+   return 0;
+
+   if (size > N_RELOC(ULONG_MAX))
+   return -EINVAL;
+
+   addr = u64_to_user_ptr(entry->relocs_ptr);
+   size *= sizeof(struct drm_i915_gem_relocation_entry);
+   if (!access_ok(addr, size))
+   return -EFAULT;
+
+   end = addr + size;
+   for (; addr < end; addr += PAGE_SIZE) {
+   int err = __get_user(c, addr);
+   if (err)
+   return err;
+   }
+   return __get_user(c, end - 1);
+}
+
+static int eb_copy_relocations(const struct i915_execbuffer *eb)
+{
+   struct drm_i915_gem_relocation_entry *relocs;
+   const unsigned int count = eb->buffer_count;
+   unsigned int i;
+   int err;
+
+   for (i = 0; i < count; i++) {
+   const unsigned int nreloc = eb->exec[i].relocation_count;
+   struct drm_i915_gem_relocation_entry __user *urelocs;
+   unsigned long size;
+   unsigned long copied;
+
+   if (nreloc == 0)
+   continue;
+
+   err = check_relocations(&eb->exec[i]);
+   if (err)
+   goto err;
+
+   urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
+   size = nreloc * sizeof(*relocs);
+
+   relocs = kvmalloc_array(size, 1, GFP_KERNEL);
+   if (!relocs) {
+   err = -ENOMEM;
+   goto err;
+   }
+
+   /* copy_from_user is limited to < 4GiB */
+   copied = 0;
+   do {
+   unsigned int len =
+   min_t(u64, BIT_ULL(31), size - copied);
+
+   if (__copy_from_user((char *)relocs + copied,
+(char __user *)urelocs + copied,
+len))
+   goto end;
+
+   copied += len;
+   } while (copied < size);
+
+   /*
+* As we do not update the known relocation offsets after
+* relocating (due to the complexities in lock handling),
+* we need to mark them as invalid now so that we force the
+* relocation processing next time. Just in case the target
+* object is evicted and then rebound into its old
+* presumed_offset before the next execbuffer - if that
+* happened we would make the mistake of assuming that the
+* relocations were valid.
+*/
+   if (!user_access_begin

[Intel-gfx] [PATCH 10/22] drm/i915: Pin engine before pinning all objects, v3.

2020-03-30 Thread Maarten Lankhorst
We want to lock all gem objects, including the engine context objects,
rework the throttling to ensure that we can do this. Now we only throttle
once, but can take eb_pin_engine while acquiring objects. This means we
will have to drop the lock to wait. If we don't have to throttle we can
still take the fastpath, if not we will take the slowpath and wait for
the throttle request while unlocked.

The engine has to be pinned as first step, otherwise gpu relocations
won't work.

Changes since v1:
- Only need to get a throttled request in the fastpath, no need for
  a global flag any more.
- Always free the waited request correctly.
Changes since v2:
- Use intel_engine_pm_get()/put() to keeep engine pool alive during
  EDEADLK handling.

Signed-off-by: Maarten Lankhorst 
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 174 --
 1 file changed, 118 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 74146623b8ae..05f6e1a94977 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -16,6 +16,7 @@
 #include "gem/i915_gem_ioctls.h"
 #include "gt/intel_context.h"
 #include "gt/intel_engine_pool.h"
+#include "gt/intel_engine_pm.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_ring.h"
@@ -55,7 +56,8 @@ enum {
 #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | 
__EXEC_OBJECT_HAS_FENCE)
 
 #define __EXEC_HAS_RELOC   BIT(31)
-#define __EXEC_INTERNAL_FLAGS  (~0u << 31)
+#define __EXEC_ENGINE_PINNED   BIT(30)
+#define __EXEC_INTERNAL_FLAGS  (~0u << 30)
 #define UPDATE PIN_OFFSET_FIXED
 
 #define BATCH_OFFSET_BIAS (256*1024)
@@ -288,6 +290,9 @@ struct i915_execbuffer {
 };
 
 static int eb_parse(struct i915_execbuffer *eb);
+static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb,
+ bool throttle);
+static void eb_unpin_engine(struct i915_execbuffer *eb);
 
 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
 {
@@ -896,7 +901,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long 
handle)
}
 }
 
-static void eb_release_vmas(const struct i915_execbuffer *eb, bool final)
+static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
 {
const unsigned int count = eb->buffer_count;
unsigned int i;
@@ -913,6 +918,8 @@ static void eb_release_vmas(const struct i915_execbuffer 
*eb, bool final)
if (final)
i915_vma_put(vma);
}
+
+   eb_unpin_engine(eb);
 }
 
 static void eb_destroy(const struct i915_execbuffer *eb)
@@ -1713,7 +1720,8 @@ static int eb_prefault_relocations(const struct 
i915_execbuffer *eb)
return 0;
 }
 
-static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
+static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
+  struct i915_request *rq)
 {
bool have_copy = false;
struct eb_vma *ev;
@@ -1729,6 +1737,21 @@ static noinline int eb_relocate_parse_slow(struct 
i915_execbuffer *eb)
eb_release_vmas(eb, false);
i915_gem_ww_ctx_fini(&eb->ww);
 
+   if (rq) {
+   /* nonblocking is always false */
+   if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT) < 0) {
+   i915_request_put(rq);
+   rq = NULL;
+
+   err = -EINTR;
+   goto err_relock;
+   }
+
+   i915_request_put(rq);
+   rq = NULL;
+   }
+
/*
 * We take 3 passes through the slowpatch.
 *
@@ -1752,14 +1775,25 @@ static noinline int eb_relocate_parse_slow(struct 
i915_execbuffer *eb)
err = 0;
}
 
-   flush_workqueue(eb->i915->mm.userptr_wq);
+   if (!err)
+   flush_workqueue(eb->i915->mm.userptr_wq);
 
+err_relock:
i915_gem_ww_ctx_init(&eb->ww, true);
if (err)
goto out;
 
/* reacquire the objects */
 repeat_validate:
+   rq = eb_pin_engine(eb, false);
+   if (IS_ERR(rq)) {
+   err = PTR_ERR(rq);
+   goto err;
+   }
+
+   /* We didn't throttle, should be NULL */
+   GEM_WARN_ON(rq);
+
err = eb_validate_vmas(eb);
if (err)
goto err;
@@ -1823,14 +1857,47 @@ static noinline int eb_relocate_parse_slow(struct 
i915_execbuffer *eb)
}
}
 
+   if (rq)
+   i915_request_put(rq);
+
return err;
 }
 
 static int eb_relocate_parse(struct i915_execbuffer *eb)
 {
int err;
+   struct i915_request *rq = NULL;
+   bool throttle = true;
 
 retry:
+   rq = eb_pin_engine(eb, throttle);
+   if (IS_ERR(rq)) {
+   err = PTR_ERR(rq);
+ 

[Intel-gfx] [PATCH 08/22] drm/i915: Add ww context handling to context_barrier_task

2020-03-30 Thread Maarten Lankhorst
This is required if we want to pass a ww context in intel_context_pin
and gen6_ppgtt_pin().

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 55 ++-
 .../drm/i915/gem/selftests/i915_gem_context.c | 22 +++-
 2 files changed, 48 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index ac2b88ca00ce..062848951095 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1097,6 +1097,7 @@ I915_SELFTEST_DECLARE(static intel_engine_mask_t 
context_barrier_inject_fault);
 static int context_barrier_task(struct i915_gem_context *ctx,
intel_engine_mask_t engines,
bool (*skip)(struct intel_context *ce, void 
*data),
+   int (*pin)(struct intel_context *ce, struct 
i915_gem_ww_ctx *ww, void *data),
int (*emit)(struct i915_request *rq, void 
*data),
void (*task)(void *data),
void *data)
@@ -1104,6 +1105,7 @@ static int context_barrier_task(struct i915_gem_context 
*ctx,
struct context_barrier_task *cb;
struct i915_gem_engines_iter it;
struct i915_gem_engines *e;
+   struct i915_gem_ww_ctx ww;
struct intel_context *ce;
int err = 0;
 
@@ -1141,10 +1143,21 @@ static int context_barrier_task(struct i915_gem_context 
*ctx,
if (skip && skip(ce, data))
continue;
 
-   rq = intel_context_create_request(ce);
+   i915_gem_ww_ctx_init(&ww, true);
+retry:
+   err = intel_context_pin(ce);
+   if (err)
+   goto err;
+
+   if (pin)
+   err = pin(ce, &ww, data);
+   if (err)
+   goto err_unpin;
+
+   rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
-   break;
+   goto err_unpin;
}
 
err = 0;
@@ -1154,6 +1167,16 @@ static int context_barrier_task(struct i915_gem_context 
*ctx,
err = i915_active_add_request(&cb->base, rq);
 
i915_request_add(rq);
+err_unpin:
+   intel_context_unpin(ce);
+err:
+   if (err == -EDEADLK) {
+   err = i915_gem_ww_ctx_backoff(&ww);
+   if (!err)
+   goto retry;
+   }
+   i915_gem_ww_ctx_fini(&ww);
+
if (err)
break;
}
@@ -1209,6 +1232,17 @@ static void set_ppgtt_barrier(void *data)
i915_vm_close(old);
 }
 
+static int pin_ppgtt_update(struct intel_context *ce, struct i915_gem_ww_ctx 
*ww, void *data)
+{
+   struct i915_address_space *vm = ce->vm;
+
+   if (!HAS_LOGICAL_RING_CONTEXTS(vm->i915))
+   /* ppGTT is not part of the legacy context image */
+   return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm));
+
+   return 0;
+}
+
 static int emit_ppgtt_update(struct i915_request *rq, void *data)
 {
struct i915_address_space *vm = rq->context->vm;
@@ -1265,20 +1299,10 @@ static int emit_ppgtt_update(struct i915_request *rq, 
void *data)
 
 static bool skip_ppgtt_update(struct intel_context *ce, void *data)
 {
-   if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
-   return true;
-
if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915))
-   return false;
-
-   if (!atomic_read(&ce->pin_count))
-   return true;
-
-   /* ppGTT is not part of the legacy context image */
-   if (gen6_ppgtt_pin(i915_vm_to_ppgtt(ce->vm)))
-   return true;
-
-   return false;
+   return !ce->state;
+   else
+   return !atomic_read(&ce->pin_count);
 }
 
 static int set_ppgtt(struct drm_i915_file_private *file_priv,
@@ -1329,6 +1353,7 @@ static int set_ppgtt(struct drm_i915_file_private 
*file_priv,
 */
err = context_barrier_task(ctx, ALL_ENGINES,
   skip_ppgtt_update,
+  pin_ppgtt_update,
   emit_ppgtt_update,
   set_ppgtt_barrier,
   old);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 42edbd0f3c14..78356031ec61 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -1903,8 +1903,8 @@ static int mock_context_barrier(void *arg)
return -ENOMEM;
 
counter = 0;
-   err = context_barrier_task(ctx, 0,
-

[Intel-gfx] [PATCH 14/22] drm/i915: Kill last user of intel_context_create_request outside of selftests

2020-03-30 Thread Maarten Lankhorst
Instead of using intel_context_create_request(), use intel_context_pin()
and i915_create_request directly.

Now all those calls are gone outside of selftests. :)

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 43 ++---
 1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index e96cc7fa0936..d866f5903554 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1744,6 +1744,7 @@ static int engine_wa_list_verify(struct intel_context *ce,
const struct i915_wa *wa;
struct i915_request *rq;
struct i915_vma *vma;
+   struct i915_gem_ww_ctx ww;
unsigned int i;
u32 *results;
int err;
@@ -1756,29 +1757,34 @@ static int engine_wa_list_verify(struct intel_context 
*ce,
return PTR_ERR(vma);
 
intel_engine_pm_get(ce->engine);
-   rq = intel_context_create_request(ce);
-   intel_engine_pm_put(ce->engine);
+   i915_gem_ww_ctx_init(&ww, false);
+retry:
+   err = i915_gem_object_lock(vma->obj, &ww);
+   if (err == 0)
+   err = intel_context_pin_ww(ce, &ww);
+   if (err)
+   goto err_pm;
+
+   rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
-   goto err_vma;
+   goto err_unpin;
}
 
-   i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, true);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-   i915_vma_unlock(vma);
-   if (err) {
-   i915_request_add(rq);
-   goto err_vma;
-   }
-
-   err = wa_list_srm(rq, wal, vma);
-   if (err)
-   goto err_vma;
+   if (err == 0)
+   err = wa_list_srm(rq, wal, vma);
 
i915_request_get(rq);
+   if (err)
+   i915_request_set_error_once(rq, err);
i915_request_add(rq);
+
+   if (err)
+   goto err_rq;
+
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
err = -ETIME;
goto err_rq;
@@ -1803,7 +1809,16 @@ static int engine_wa_list_verify(struct intel_context 
*ce,
 
 err_rq:
i915_request_put(rq);
-err_vma:
+err_unpin:
+   intel_context_unpin(ce);
+err_pm:
+   if (err == -EDEADLK) {
+   err = i915_gem_ww_ctx_backoff(&ww);
+   if (!err)
+   goto retry;
+   }
+   i915_gem_ww_ctx_fini(&ww);
+   intel_engine_pm_put(ce->engine);
i915_vma_unpin(vma);
i915_vma_put(vma);
return err;
-- 
2.25.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 18/22] drm/i915: Use ww pinning for intel_context_create_request()

2020-03-30 Thread Maarten Lankhorst
We want to get rid of intel_context_pin(), convert
intel_context_create_request() first. :)

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gt/intel_context.c | 20 +++-
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 5c7acddf9651..f70135685552 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -450,15 +450,25 @@ int intel_context_prepare_remote_request(struct 
intel_context *ce,
 
 struct i915_request *intel_context_create_request(struct intel_context *ce)
 {
+   struct i915_gem_ww_ctx ww;
struct i915_request *rq;
int err;
 
-   err = intel_context_pin(ce);
-   if (unlikely(err))
-   return ERR_PTR(err);
+   i915_gem_ww_ctx_init(&ww, true);
+retry:
+   err = intel_context_pin_ww(ce, &ww);
+   if (!err) {
+   rq = i915_request_create(ce);
+   intel_context_unpin(ce);
+   } else if (err == -EDEADLK) {
+   err = i915_gem_ww_ctx_backoff(&ww);
+   if (!err)
+   goto retry;
+   } else {
+   rq = ERR_PTR(err);
+   }
 
-   rq = i915_request_create(ce);
-   intel_context_unpin(ce);
+   i915_gem_ww_ctx_fini(&ww);
 
if (IS_ERR(rq))
return rq;
-- 
2.25.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 20/22] drm/i915: Add ww locking to vm_fault_gtt

2020-03-30 Thread Maarten Lankhorst
Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gem/i915_gem_mman.c | 51 +++-
 1 file changed, 33 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index b39c24dae64e..e35e8d0b6938 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -283,37 +283,46 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
struct intel_runtime_pm *rpm = &i915->runtime_pm;
struct i915_ggtt *ggtt = &i915->ggtt;
bool write = area->vm_flags & VM_WRITE;
+   struct i915_gem_ww_ctx ww;
intel_wakeref_t wakeref;
struct i915_vma *vma;
pgoff_t page_offset;
int srcu;
int ret;
 
-   /* Sanity check that we allow writing into this object */
-   if (i915_gem_object_is_readonly(obj) && write)
-   return VM_FAULT_SIGBUS;
-
/* We don't use vmf->pgoff since that has the fake offset */
page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
 
trace_i915_gem_object_fault(obj, page_offset, true, write);
 
-   ret = i915_gem_object_pin_pages(obj);
+   wakeref = intel_runtime_pm_get(rpm);
+
+   i915_gem_ww_ctx_init(&ww, true);
+retry:
+   ret = i915_gem_object_lock(obj, &ww);
if (ret)
-   goto err;
+   goto err_rpm;
 
-   wakeref = intel_runtime_pm_get(rpm);
+   /* Sanity check that we allow writing into this object */
+   if (i915_gem_object_is_readonly(obj) && write) {
+   ret = -EFAULT;
+   goto err_rpm;
+   }
 
-   ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
+   ret = i915_gem_object_pin_pages(obj);
if (ret)
goto err_rpm;
 
+   ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
+   if (ret)
+   goto err_pages;
+
/* Now pin it into the GTT as needed */
-   vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
-  PIN_MAPPABLE |
-  PIN_NONBLOCK /* NOWARN */ |
-  PIN_NOEVICT);
-   if (IS_ERR(vma)) {
+   vma = i915_gem_object_ggtt_pin_ww(obj, &ww, NULL, 0, 0,
+ PIN_MAPPABLE |
+ PIN_NONBLOCK /* NOWARN */ |
+ PIN_NOEVICT);
+   if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) {
/* Use a partial view if it is bigger than available space */
struct i915_ggtt_view view =
compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
@@ -328,11 +337,11 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
 * all hope that the hardware is able to track future writes.
 */
 
-   vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
-   if (IS_ERR(vma)) {
+   vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
+   if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) {
flags = PIN_MAPPABLE;
view.type = I915_GGTT_VIEW_PARTIAL;
-   vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+   vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 
0, flags);
}
 
/* The entire mappable GGTT is pinned? Unexpected! */
@@ -389,10 +398,16 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
__i915_vma_unpin(vma);
 err_reset:
intel_gt_reset_unlock(ggtt->vm.gt, srcu);
+err_pages:
+   i915_gem_object_unpin_pages(obj);
 err_rpm:
+   if (ret == -EDEADLK) {
+   ret = i915_gem_ww_ctx_backoff(&ww);
+   if (!ret)
+   goto retry;
+   }
+   i915_gem_ww_ctx_fini(&ww);
intel_runtime_pm_put(rpm, wakeref);
-   i915_gem_object_unpin_pages(obj);
-err:
return i915_error_to_vmf_fault(ret);
 }
 
-- 
2.25.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 22/22] drm/i915: Ensure we hold the pin mutex

2020-03-30 Thread Maarten Lankhorst
Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gt/intel_renderstate.c | 2 +-
 drivers/gpu/drm/i915/i915_vma.c | 9 -
 drivers/gpu/drm/i915/i915_vma.h | 1 +
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c 
b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index c39d73142950..df42ba06711a 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -207,7 +207,7 @@ int intel_renderstate_init(struct intel_renderstate *so,
if (err)
goto err_context;
 
-   err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+   err = i915_vma_pin_ww(so->vma, &so->ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
if (err)
goto err_context;
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index e3d82be503dc..e22f287ba382 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -892,6 +892,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct 
i915_gem_ww_ctx *ww,
 #ifdef CONFIG_PROVE_LOCKING
if (debug_locks && lockdep_is_held(&vma->vm->i915->drm.struct_mutex))
WARN_ON(!ww);
+   if (debug_locks && ww && vma->resv)
+   assert_vma_held(vma);
 #endif
 
BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
@@ -1032,8 +1034,13 @@ int i915_ggtt_pin(struct i915_vma *vma, struct 
i915_gem_ww_ctx *ww,
 
GEM_BUG_ON(!i915_vma_is_ggtt(vma));
 
+   WARN_ON(!ww && vma->resv && dma_resv_held(vma->resv));
+
do {
-   err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL);
+   if (ww)
+   err = i915_vma_pin_ww(vma, ww, 0, align, flags | 
PIN_GLOBAL);
+   else
+   err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
if (err != -ENOSPC) {
if (!err) {
err = i915_vma_wait_for_bind(vma);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index da577729931f..b730f86e54f4 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -242,6 +242,7 @@ i915_vma_pin_ww(struct i915_vma *vma, struct 
i915_gem_ww_ctx *ww,
 static inline int __must_check
 i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
+   WARN_ON_ONCE(vma->resv && dma_resv_held(vma->resv));
return i915_vma_pin_ww(vma, NULL, size, alignment, flags);
 }
 
-- 
2.25.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 05/22] drm/i915: Parse command buffer earlier in eb_relocate(slow)

2020-03-30 Thread Maarten Lankhorst
We want to introduce backoff logic, but we need to lock the
pool object as well for command parsing. Because of this, we
will need backoff logic for the engine pool obj, move the batch
validation up slightly to eb_lookup_vmas, and the actual command
parsing in a separate function which can get called from execbuf
relocation fast and slowpath.

Signed-off-by: Maarten Lankhorst 
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 68 ++-
 1 file changed, 37 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index cc2be6964037..55b06d7a1329 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -285,6 +285,8 @@ struct i915_execbuffer {
struct hlist_head *buckets; /** ht for relocation handles */
 };
 
+static int eb_parse(struct i915_execbuffer *eb);
+
 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
 {
return intel_engine_requires_cmd_parser(eb->engine) ||
@@ -814,6 +816,7 @@ static struct i915_vma *eb_lookup_vma(struct 
i915_execbuffer *eb, u32 handle)
 
 static int eb_lookup_vmas(struct i915_execbuffer *eb)
 {
+   struct drm_i915_private *i915 = eb->i915;
unsigned int batch = eb_batch_index(eb);
unsigned int i;
int err = 0;
@@ -827,18 +830,37 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
vma = eb_lookup_vma(eb, eb->exec[i].handle);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
-   break;
+   goto err;
}
 
err = eb_validate_vma(eb, &eb->exec[i], vma);
if (unlikely(err)) {
i915_vma_put(vma);
-   break;
+   goto err;
}
 
eb_add_vma(eb, i, batch, vma);
}
 
+   if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) {
+   drm_dbg(&i915->drm,
+   "Attempting to use self-modifying batch buffer\n");
+   return -EINVAL;
+   }
+
+   if (range_overflows_t(u64,
+ eb->batch_start_offset, eb->batch_len,
+ eb->batch->vma->size)) {
+   drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
+   return -EINVAL;
+   }
+
+   if (eb->batch_len == 0)
+   eb->batch_len = eb->batch->vma->size - eb->batch_start_offset;
+
+   return 0;
+
+err:
eb->vma[i].vma = NULL;
return err;
 }
@@ -1688,7 +1710,7 @@ static int eb_prefault_relocations(const struct 
i915_execbuffer *eb)
return 0;
 }
 
-static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
+static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
 {
bool have_copy = false;
struct eb_vma *ev;
@@ -1739,6 +1761,11 @@ static noinline int eb_relocate_slow(struct 
i915_execbuffer *eb)
}
}
 
+   /* as last step, parse the command buffer */
+   err = eb_parse(eb);
+   if (err)
+   goto err;
+
/*
 * Leave the user relocations as are, this is the painfully slow path,
 * and we want to avoid the complication of dropping the lock whilst
@@ -1771,7 +1798,7 @@ static noinline int eb_relocate_slow(struct 
i915_execbuffer *eb)
return err;
 }
 
-static int eb_relocate(struct i915_execbuffer *eb)
+static int eb_relocate_parse(struct i915_execbuffer *eb)
 {
int err;
 
@@ -1791,11 +1818,11 @@ static int eb_relocate(struct i915_execbuffer *eb)
 
list_for_each_entry(ev, &eb->relocs, reloc_link) {
if (eb_relocate_vma(eb, ev))
-   return eb_relocate_slow(eb);
+   return eb_relocate_parse_slow(eb);
}
}
 
-   return 0;
+   return eb_parse(eb);
 }
 
 static int eb_move_to_gpu(struct i915_execbuffer *eb)
@@ -2731,7 +2758,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (unlikely(err))
goto err_context;
 
-   err = eb_relocate(&eb);
+   err = eb_relocate_parse(&eb);
if (err) {
/*
 * If the user expects the execobject.offset and
@@ -2744,33 +2771,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_vma;
}
 
-   if (unlikely(eb.batch->flags & EXEC_OBJECT_WRITE)) {
-   drm_dbg(&i915->drm,
-   "Attempting to use self-modifying batch buffer\n");
-   err = -EINVAL;
-   goto err_vma;
-   }
-
-   if (range_overflows_t(u64,
- eb.batch_start_offset, eb.batch_len,
- eb.batch->vma->size)) {
-   drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
-   

[Intel-gfx] [PATCH 15/22] drm/i915: Convert i915_perf to ww locking as well

2020-03-30 Thread Maarten Lankhorst
We have the ordering of timeline->mutex vs resv_lock wrong,
convert the i915_pin_vma and intel_context_pin as well to
future-proof this.

We may need to do future changes to do this more transaction-like,
and only get down to a single i915_gem_ww_ctx, but for now this
should work.

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/i915_perf.c | 57 +++-
 1 file changed, 42 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c74ebac50015..718ea9a743c7 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1192,24 +1192,39 @@ static struct intel_context *oa_pin_context(struct 
i915_perf_stream *stream)
struct i915_gem_engines_iter it;
struct i915_gem_context *ctx = stream->ctx;
struct intel_context *ce;
-   int err;
+   struct i915_gem_ww_ctx ww;
+   int err = -ENODEV;
 
for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
if (ce->engine != stream->engine) /* first match! */
continue;
 
-   /*
-* As the ID is the gtt offset of the context's vma we
-* pin the vma to ensure the ID remains fixed.
-*/
-   err = intel_context_pin(ce);
-   if (err == 0) {
-   stream->pinned_ctx = ce;
-   break;
-   }
+   err = 0;
+   break;
}
i915_gem_context_unlock_engines(ctx);
 
+   if (err)
+   return ERR_PTR(err);
+
+   i915_gem_ww_ctx_init(&ww, true);
+retry:
+   /*
+* As the ID is the gtt offset of the context's vma we
+* pin the vma to ensure the ID remains fixed.
+*/
+   err = intel_context_pin_ww(ce, &ww);
+   if (err == -EDEADLK) {
+   err = i915_gem_ww_ctx_backoff(&ww);
+   if (!err)
+   goto retry;
+   }
+   i915_gem_ww_ctx_fini(&ww);
+
+   if (err)
+   return ERR_PTR(err);
+
+   stream->pinned_ctx = ce;
return stream->pinned_ctx;
 }
 
@@ -1923,15 +1938,22 @@ emit_oa_config(struct i915_perf_stream *stream,
 {
struct i915_request *rq;
struct i915_vma *vma;
+   struct i915_gem_ww_ctx ww;
int err;
 
vma = get_oa_vma(stream, oa_config);
if (IS_ERR(vma))
return ERR_CAST(vma);
 
-   err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+   i915_gem_ww_ctx_init(&ww, true);
+retry:
+   err = i915_gem_object_lock(vma->obj, &ww);
if (err)
-   goto err_vma_put;
+   goto err;
+
+   err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
+   if (err)
+   goto err;
 
intel_engine_pm_get(ce->engine);
rq = i915_request_create(ce);
@@ -1941,11 +1963,9 @@ emit_oa_config(struct i915_perf_stream *stream,
goto err_vma_unpin;
}
 
-   i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, 0);
if (!err)
err = i915_vma_move_to_active(vma, rq, 0);
-   i915_vma_unlock(vma);
if (err)
goto err_add_request;
 
@@ -1960,7 +1980,14 @@ emit_oa_config(struct i915_perf_stream *stream,
i915_request_add(rq);
 err_vma_unpin:
i915_vma_unpin(vma);
-err_vma_put:
+err:
+   if (err == -EDEADLK) {
+   err = i915_gem_ww_ctx_backoff(&ww);
+   if (!err)
+   goto retry;
+   }
+
+   i915_gem_ww_ctx_fini(&ww);
i915_vma_put(vma);
return err ? ERR_PTR(err) : rq;
 }
-- 
2.25.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 06/22] drm/i915: Use per object locking in execbuf, v7.

2020-03-30 Thread Maarten Lankhorst
Now that we changed execbuf submission slightly to allow us to do all
pinning in one place, we can now simply add ww versions on top of
struct_mutex. All we have to do is a separate path for -EDEADLK
handling, which needs to unpin all gem bo's before dropping the lock,
then starting over.

This finally allows us to do parallel submission, but because not
all of the pinning code uses the ww ctx yet, we cannot completely
drop struct_mutex yet.

Changes since v1:
- Keep struct_mutex for now. :(
Changes since v2:
- Make sure we always lock the ww context in slowpath.
Changes since v3:
- Don't call __eb_unreserve_vma in eb_move_to_gpu now; this can be
  done on normal unlock path.
- Unconditionally release vmas and context.
Changes since v4:
- Rebased on top of struct_mutex reduction.
Changes since v5:
- Remove training wheels.
Changes since v6:
- Fix accidentally broken -ENOSPC handling.

Signed-off-by: Maarten Lankhorst 
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 273 ++
 1 file changed, 148 insertions(+), 125 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 55b06d7a1329..a337f3054ce3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -249,6 +249,8 @@ struct i915_execbuffer {
/** list of vma that have execobj.relocation_count */
struct list_head relocs;
 
+   struct i915_gem_ww_ctx ww;
+
/**
 * Track the most recently used object for relocations, as we
 * frequently have to perform multiple relocations within the same
@@ -404,24 +406,18 @@ eb_pin_vma(struct i915_execbuffer *eb,
return !eb_vma_misplaced(entry, vma, ev->flags);
 }
 
-static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
-{
-   GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
-
-   if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
-   __i915_vma_unpin_fence(vma);
-
-   __i915_vma_unpin(vma);
-}
-
 static inline void
 eb_unreserve_vma(struct eb_vma *ev)
 {
if (!(ev->flags & __EXEC_OBJECT_HAS_PIN))
return;
 
-   __eb_unreserve_vma(ev->vma, ev->flags);
ev->flags &= ~__EXEC_OBJECT_RESERVED;
+
+   if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
+   __i915_vma_unpin_fence(ev->vma);
+
+   __i915_vma_unpin(ev->vma);
 }
 
 static int
@@ -515,16 +511,6 @@ eb_add_vma(struct i915_execbuffer *eb,
 
eb->batch = ev;
}
-
-   if (eb_pin_vma(eb, entry, ev)) {
-   if (entry->offset != vma->node.start) {
-   entry->offset = vma->node.start | UPDATE;
-   eb->args->flags |= __EXEC_HAS_RELOC;
-   }
-   } else {
-   eb_unreserve_vma(ev);
-   list_add_tail(&ev->bind_link, &eb->unbound);
-   }
 }
 
 static inline int use_cpu_reloc(const struct reloc_cache *cache,
@@ -628,10 +614,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
 * This avoid unnecessary unbinding of later objects in order to make
 * room for the earlier objects *unless* we need to defragment.
 */
-
-   if (mutex_lock_interruptible(&eb->i915->drm.struct_mutex))
-   return -EINTR;
-
pass = 0;
do {
list_for_each_entry(ev, &eb->unbound, bind_link) {
@@ -639,8 +621,8 @@ static int eb_reserve(struct i915_execbuffer *eb)
if (err)
break;
}
-   if (!(err == -ENOSPC || err == -EAGAIN))
-   break;
+   if (err != -ENOSPC)
+   return err;
 
/* Resort *all* the objects into priority order */
INIT_LIST_HEAD(&eb->unbound);
@@ -670,13 +652,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
}
list_splice_tail(&last, &eb->unbound);
 
-   if (err == -EAGAIN) {
-   mutex_unlock(&eb->i915->drm.struct_mutex);
-   flush_workqueue(eb->i915->mm.userptr_wq);
-   mutex_lock(&eb->i915->drm.struct_mutex);
-   continue;
-   }
-
switch (pass++) {
case 0:
break;
@@ -687,20 +662,15 @@ static int eb_reserve(struct i915_execbuffer *eb)
err = i915_gem_evict_vm(eb->context->vm);
mutex_unlock(&eb->context->vm->mutex);
if (err)
-   goto unlock;
+   return err;
break;
 
default:
-   err = -ENOSPC;
-   goto unlock;
+   return -ENOSPC;
}
 
pin_flags = PIN_USER;
} while (1);
-
-unlock:
-   mutex_unlock(&eb-

[Intel-gfx] [PATCH 19/22] drm/i915: Move i915_vma_lock in the selftests to avoid lock inversion, v2.

2020-03-30 Thread Maarten Lankhorst
Make sure vma_lock is not used as inner lock when kernel context is used,
and add ww handling where appropriate.

Signed-off-by: Maarten Lankhorst 
---
 .../i915/gem/selftests/i915_gem_coherency.c   | 26 ++--
 .../drm/i915/gem/selftests/i915_gem_mman.c| 41 ++-
 drivers/gpu/drm/i915/selftests/i915_request.c | 18 +---
 3 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 99f8466a108a..d93b7d9ad174 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -199,25 +199,25 @@ static int gpu_set(struct context *ctx, unsigned long 
offset, u32 v)
 
i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
-   i915_gem_object_unlock(ctx->obj);
if (err)
-   return err;
+   goto out_unlock;
 
vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0);
-   if (IS_ERR(vma))
-   return PTR_ERR(vma);
+   if (IS_ERR(vma)) {
+   err = PTR_ERR(vma);
+   goto out_unlock;
+   }
 
rq = intel_engine_create_kernel_request(ctx->engine);
if (IS_ERR(rq)) {
-   i915_vma_unpin(vma);
-   return PTR_ERR(rq);
+   err = PTR_ERR(rq);
+   goto out_unpin;
}
 
cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs)) {
-   i915_request_add(rq);
-   i915_vma_unpin(vma);
-   return PTR_ERR(cs);
+   err = PTR_ERR(cs);
+   goto out_rq;
}
 
if (INTEL_GEN(ctx->engine->i915) >= 8) {
@@ -238,14 +238,16 @@ static int gpu_set(struct context *ctx, unsigned long 
offset, u32 v)
}
intel_ring_advance(rq, cs);
 
-   i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, true);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-   i915_vma_unlock(vma);
-   i915_vma_unpin(vma);
 
+out_rq:
i915_request_add(rq);
+out_unpin:
+   i915_vma_unpin(vma);
+out_unlock:
+   i915_gem_object_unlock(ctx->obj);
 
return err;
 }
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index a67d9e59fe12..d4aaf603a78f 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -528,31 +528,42 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
for_each_uabi_engine(engine, i915) {
struct i915_request *rq;
struct i915_vma *vma;
+   struct i915_gem_ww_ctx ww;
int err;
 
vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
if (IS_ERR(vma))
return PTR_ERR(vma);
 
-   err = i915_vma_pin(vma, 0, 0, PIN_USER);
+   i915_gem_ww_ctx_init(&ww, false);
+retry:
+   err = i915_gem_object_lock(obj, &ww);
+   if (!err)
+   err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
if (err)
-   return err;
+   goto err;
 
rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq)) {
-   i915_vma_unpin(vma);
-   return PTR_ERR(rq);
+   err = PTR_ERR(rq);
+   goto err_unpin;
}
 
-   i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, true);
if (err == 0)
err = i915_vma_move_to_active(vma, rq,
  EXEC_OBJECT_WRITE);
-   i915_vma_unlock(vma);
 
i915_request_add(rq);
+err_unpin:
i915_vma_unpin(vma);
+err:
+   if (err == -EDEADLK) {
+   err = i915_gem_ww_ctx_backoff(&ww);
+   if (!err)
+   goto retry;
+   }
+   i915_gem_ww_ctx_fini(&ww);
if (err)
return err;
}
@@ -1000,6 +1011,7 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
for_each_uabi_engine(engine, i915) {
struct i915_request *rq;
struct i915_vma *vma;
+   struct i915_gem_ww_ctx ww;
 
vma = i915_vma_instance(obj, engine->kernel_context->vm, NULL);
if (IS_ERR(vma)) {
@@ -1007,9 +1019,13 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
goto out_unmap;
}
 
-   err = i915_vma_pin(vma, 0, 0, PIN_USER);
+   i915_gem

[Intel-gfx] [PATCH 04/22] drm/i915: Remove locking from i915_gem_object_prepare_read/write

2020-03-30 Thread Maarten Lankhorst
Execbuffer submission will perform its own WW locking, and we
cannot rely on the implicit lock there.

This also makes it clear that the GVT code will get a lockdep splat when
multiple batchbuffer shadows need to be performed in the same instance,
fix that up.

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 20 ++-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 13 ++--
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  1 -
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  5 -
 .../i915/gem/selftests/i915_gem_coherency.c   | 14 +
 .../drm/i915/gem/selftests/i915_gem_context.c | 12 ---
 drivers/gpu/drm/i915/gt/intel_renderstate.c   |  5 -
 drivers/gpu/drm/i915/gvt/cmd_parser.c |  9 -
 drivers/gpu/drm/i915/i915_gem.c   | 20 +--
 9 files changed, 70 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c 
b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index f4602faa8db9..e9d3b587f562 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -581,19 +581,17 @@ int i915_gem_object_prepare_read(struct 
drm_i915_gem_object *obj,
if (!i915_gem_object_has_struct_page(obj))
return -ENODEV;
 
-   ret = i915_gem_object_lock_interruptible(obj, NULL);
-   if (ret)
-   return ret;
+   assert_object_held(obj);
 
ret = i915_gem_object_wait(obj,
   I915_WAIT_INTERRUPTIBLE,
   MAX_SCHEDULE_TIMEOUT);
if (ret)
-   goto err_unlock;
+   return ret;
 
ret = i915_gem_object_pin_pages(obj);
if (ret)
-   goto err_unlock;
+   return ret;
 
if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
!static_cpu_has(X86_FEATURE_CLFLUSH)) {
@@ -621,8 +619,6 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object 
*obj,
 
 err_unpin:
i915_gem_object_unpin_pages(obj);
-err_unlock:
-   i915_gem_object_unlock(obj);
return ret;
 }
 
@@ -635,20 +631,18 @@ int i915_gem_object_prepare_write(struct 
drm_i915_gem_object *obj,
if (!i915_gem_object_has_struct_page(obj))
return -ENODEV;
 
-   ret = i915_gem_object_lock_interruptible(obj, NULL);
-   if (ret)
-   return ret;
+   assert_object_held(obj);
 
ret = i915_gem_object_wait(obj,
   I915_WAIT_INTERRUPTIBLE |
   I915_WAIT_ALL,
   MAX_SCHEDULE_TIMEOUT);
if (ret)
-   goto err_unlock;
+   return ret;
 
ret = i915_gem_object_pin_pages(obj);
if (ret)
-   goto err_unlock;
+   return ret;
 
if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
!static_cpu_has(X86_FEATURE_CLFLUSH)) {
@@ -685,7 +679,5 @@ int i915_gem_object_prepare_write(struct 
drm_i915_gem_object *obj,
 
 err_unpin:
i915_gem_object_unpin_pages(obj);
-err_unlock:
-   i915_gem_object_unlock(obj);
return ret;
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index df931e84173c..cc2be6964037 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -960,11 +960,14 @@ static void reloc_cache_reset(struct reloc_cache *cache)
 
vaddr = unmask_page(cache->vaddr);
if (cache->vaddr & KMAP) {
+   struct drm_i915_gem_object *obj =
+   (struct drm_i915_gem_object *)cache->node.mm;
if (cache->vaddr & CLFLUSH_AFTER)
mb();
 
kunmap_atomic(vaddr);
-   i915_gem_object_finish_access((struct drm_i915_gem_object 
*)cache->node.mm);
+   i915_gem_object_finish_access(obj);
+   i915_gem_object_unlock(obj);
} else {
struct i915_ggtt *ggtt = cache_to_ggtt(cache);
 
@@ -999,10 +1002,16 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
unsigned int flushes;
int err;
 
-   err = i915_gem_object_prepare_write(obj, &flushes);
+   err = i915_gem_object_lock_interruptible(obj, NULL);
if (err)
return ERR_PTR(err);
 
+   err = i915_gem_object_prepare_write(obj, &flushes);
+   if (err) {
+   i915_gem_object_unlock(obj);
+   return ERR_PTR(err);
+   }
+
BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 5103067269b0..11b8e27350

[Intel-gfx] [PATCH 13/22] drm/i915: Convert i915_gem_object/client_blt.c to use ww locking as well, v2.

2020-03-30 Thread Maarten Lankhorst
This is the last part outside of selftests that still don't use the
correct lock ordering of timeline->mutex vs resv_lock.

With gem fixed, there are a few places that still get locking wrong:
- gvt/scheduler.c
- i915_perf.c
- Most if not all selftests.

Changes since v1:
- Add intel_engine_pm_get/put() calls to fix use-after-free when using
  intel_engine_get_pool().

Signed-off-by: Maarten Lankhorst 
---
 .../gpu/drm/i915/gem/i915_gem_client_blt.c|  80 +++--
 .../gpu/drm/i915/gem/i915_gem_object_blt.c| 156 +++---
 .../gpu/drm/i915/gem/i915_gem_object_blt.h|   3 +
 3 files changed, 165 insertions(+), 74 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c 
b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index 5d94a77f9bdd..10df576e785f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -157,6 +157,7 @@ static void clear_pages_worker(struct work_struct *work)
struct clear_pages_work *w = container_of(work, typeof(*w), work);
struct drm_i915_gem_object *obj = w->sleeve->vma->obj;
struct i915_vma *vma = w->sleeve->vma;
+   struct i915_gem_ww_ctx ww;
struct i915_request *rq;
struct i915_vma *batch;
int err = w->dma.error;
@@ -172,17 +173,20 @@ static void clear_pages_worker(struct work_struct *work)
obj->read_domains = I915_GEM_GPU_DOMAINS;
obj->write_domain = 0;
 
-   err = i915_vma_pin(vma, 0, 0, PIN_USER);
-   if (unlikely(err))
+   i915_gem_ww_ctx_init(&ww, false);
+   intel_engine_pm_get(w->ce->engine);
+retry:
+   err = intel_context_pin_ww(w->ce, &ww);
+   if (err)
goto out_signal;
 
-   batch = intel_emit_vma_fill_blt(w->ce, vma, w->value);
+   batch = intel_emit_vma_fill_blt(w->ce, vma, &ww, w->value);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
-   goto out_unpin;
+   goto out_ctx;
}
 
-   rq = intel_context_create_request(w->ce);
+   rq = i915_request_create(w->ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_batch;
@@ -224,9 +228,19 @@ static void clear_pages_worker(struct work_struct *work)
i915_request_add(rq);
 out_batch:
intel_emit_vma_release(w->ce, batch);
-out_unpin:
-   i915_vma_unpin(vma);
+out_ctx:
+   intel_context_unpin(w->ce);
 out_signal:
+   if (err == -EDEADLK) {
+   err = i915_gem_ww_ctx_backoff(&ww);
+   if (!err)
+   goto retry;
+   }
+   i915_gem_ww_ctx_fini(&ww);
+
+   i915_vma_unpin(w->sleeve->vma);
+   intel_engine_pm_put(w->ce->engine);
+
if (unlikely(err)) {
dma_fence_set_error(&w->dma, err);
dma_fence_signal(&w->dma);
@@ -234,6 +248,45 @@ static void clear_pages_worker(struct work_struct *work)
}
 }
 
+static int pin_wait_clear_pages_work(struct clear_pages_work *w,
+struct intel_context *ce)
+{
+   struct i915_vma *vma = w->sleeve->vma;
+   struct i915_gem_ww_ctx ww;
+   int err;
+
+   i915_gem_ww_ctx_init(&ww, false);
+retry:
+   err = i915_gem_object_lock(vma->obj, &ww);
+   if (err)
+   goto out;
+
+   err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
+   if (unlikely(err))
+   goto out;
+
+   err = i915_sw_fence_await_reservation(&w->wait,
+ vma->obj->base.resv, NULL,
+ true, I915_FENCE_TIMEOUT,
+ I915_FENCE_GFP);
+   if (err)
+   goto err_unpin_vma;
+
+   dma_resv_add_excl_fence(vma->obj->base.resv, &w->dma);
+
+err_unpin_vma:
+   if (err)
+   i915_vma_unpin(vma);
+out:
+   if (err == -EDEADLK) {
+   err = i915_gem_ww_ctx_backoff(&ww);
+   if (!err)
+   goto retry;
+   }
+   i915_gem_ww_ctx_fini(&ww);
+   return err;
+}
+
 static int __i915_sw_fence_call
 clear_pages_work_notify(struct i915_sw_fence *fence,
enum i915_sw_fence_notify state)
@@ -287,18 +340,9 @@ int i915_gem_schedule_fill_pages_blt(struct 
drm_i915_gem_object *obj,
dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0);
i915_sw_fence_init(&work->wait, clear_pages_work_notify);
 
-   i915_gem_object_lock(obj, NULL);
-   err = i915_sw_fence_await_reservation(&work->wait,
- obj->base.resv, NULL,
- true, I915_FENCE_TIMEOUT,
- I915_FENCE_GFP);
-   if (err < 0) {
+   err = pin_wait_clear_pages_work(work, ce);
+   if (err < 0)
dma_fence_set_error(&work->dma, err);
-   } else {
-   dma_resv_add_e

[Intel-gfx] [PATCH 03/22] drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2.

2020-03-30 Thread Maarten Lankhorst
i915_gem_ww_ctx is used to lock all gem bo's for pinning and memory
eviction. We don't use it yet, but lets start adding the definition
first.

To use it, we have to pass a non-NULL ww to gem_object_lock, and don't
unlock directly. It is done in i915_gem_ww_ctx_fini.

Changes since v1:
- Change ww_ctx and obj order in locking functions (Jonas Lahtinen)

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/display/intel_display.c  |  4 +-
 .../gpu/drm/i915/gem/i915_gem_client_blt.c|  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 10 ++--
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h| 38 +++---
 .../gpu/drm/i915/gem/i915_gem_object_blt.c|  2 +-
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  9 
 drivers/gpu/drm/i915/gem/i915_gem_pm.c|  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_tiling.c|  2 +-
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../i915/gem/selftests/i915_gem_client_blt.c  |  2 +-
 .../i915/gem/selftests/i915_gem_coherency.c   | 10 ++--
 .../drm/i915/gem/selftests/i915_gem_context.c |  4 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  4 +-
 .../drm/i915/gem/selftests/i915_gem_phys.c|  2 +-
 drivers/gpu/drm/i915/gt/intel_gt.c|  2 +-
 .../gpu/drm/i915/gt/selftest_workarounds.c|  2 +-
 drivers/gpu/drm/i915/gvt/cmd_parser.c |  2 +-
 drivers/gpu/drm/i915/i915_gem.c   | 52 +--
 drivers/gpu/drm/i915/i915_gem.h   | 11 
 drivers/gpu/drm/i915/selftests/i915_gem.c | 41 +++
 drivers/gpu/drm/i915/selftests/i915_vma.c |  2 +-
 .../drm/i915/selftests/intel_memory_region.c  |  2 +-
 26 files changed, 175 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index e09a11b1e509..2e2e5ce82dc2 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -2303,7 +2303,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 
 void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
 {
-   i915_gem_object_lock(vma->obj);
+   i915_gem_object_lock(vma->obj, NULL);
if (flags & PLANE_HAS_FENCE)
i915_vma_unpin_fence(vma);
i915_gem_object_unpin_from_display_plane(vma);
@@ -17047,7 +17047,7 @@ static int intel_framebuffer_init(struct 
intel_framebuffer *intel_fb,
if (!intel_fb->frontbuffer)
return -ENOMEM;
 
-   i915_gem_object_lock(obj);
+   i915_gem_object_lock(obj, NULL);
tiling = i915_gem_object_get_tiling(obj);
stride = i915_gem_object_get_stride(obj);
i915_gem_object_unlock(obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c 
b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index 0598e5382a1d..5d94a77f9bdd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -287,7 +287,7 @@ int i915_gem_schedule_fill_pages_blt(struct 
drm_i915_gem_object *obj,
dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0);
i915_sw_fence_init(&work->wait, clear_pages_work_notify);
 
-   i915_gem_object_lock(obj);
+   i915_gem_object_lock(obj, NULL);
err = i915_sw_fence_await_reservation(&work->wait,
  obj->base.resv, NULL,
  true, I915_FENCE_TIMEOUT,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 50e7580f9337..ac2b88ca00ce 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -113,7 +113,7 @@ static void lut_close(struct i915_gem_context *ctx)
continue;
 
rcu_read_unlock();
-   i915_gem_object_lock(obj);
+   i915_gem_object_lock(obj, NULL);
list_for_each_entry(lut, &obj->lut_list, obj_link) {
if (lut->ctx != ctx)
continue;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 7db5a793739d..cfadccfc2990 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -128,7 +128,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf 
*dma_buf, enum dma_data_dire
if (err)
return err;
 
-   err = i915_gem_object_lock_interruptible(obj);
+   err = i915_gem_object_lock_interruptible(obj, NULL);
if (err)
goto out;
 
@@ -149,7 +149,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, 
enum dma_data_direct
if (err)
 

[Intel-gfx] [PATCH 11/22] drm/i915: Rework intel_context pinning to do everything outside of pin_mutex

2020-03-30 Thread Maarten Lankhorst
Instead of doing everything inside of pin_mutex, we move all pinning
outside. Because i915_active has its own reference counting and
pinning is also having the same issues vs mutexes, we make sure
everything is pinned first, so the pinning in i915_active only needs
to bump refcounts. This allows us to take pin refcounts correctly
all the time.

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gt/intel_context.c   | 233 +++---
 drivers/gpu/drm/i915/gt/intel_context_types.h |   4 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c   |  34 ++-
 drivers/gpu/drm/i915/gt/intel_renderstate.c   |   1 -
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  13 +-
 drivers/gpu/drm/i915/gt/mock_engine.c |  13 +-
 6 files changed, 191 insertions(+), 107 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index e4aece20bc80..bc0ed268ccb8 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -93,79 +93,6 @@ static void intel_context_active_release(struct 
intel_context *ce)
i915_active_release(&ce->active);
 }
 
-int __intel_context_do_pin(struct intel_context *ce)
-{
-   int err;
-
-   if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
-   err = intel_context_alloc_state(ce);
-   if (err)
-   return err;
-   }
-
-   err = i915_active_acquire(&ce->active);
-   if (err)
-   return err;
-
-   if (mutex_lock_interruptible(&ce->pin_mutex)) {
-   err = -EINTR;
-   goto out_release;
-   }
-
-   if (unlikely(intel_context_is_closed(ce))) {
-   err = -ENOENT;
-   goto out_unlock;
-   }
-
-   if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
-   err = intel_context_active_acquire(ce);
-   if (unlikely(err))
-   goto out_unlock;
-
-   err = ce->ops->pin(ce);
-   if (unlikely(err))
-   goto err_active;
-
-   CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
-i915_ggtt_offset(ce->ring->vma),
-ce->ring->head, ce->ring->tail);
-
-   smp_mb__before_atomic(); /* flush pin before it is visible */
-   atomic_inc(&ce->pin_count);
-   }
-
-   GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
-   GEM_BUG_ON(i915_active_is_idle(&ce->active));
-   goto out_unlock;
-
-err_active:
-   intel_context_active_release(ce);
-out_unlock:
-   mutex_unlock(&ce->pin_mutex);
-out_release:
-   i915_active_release(&ce->active);
-   return err;
-}
-
-void intel_context_unpin(struct intel_context *ce)
-{
-   if (!atomic_dec_and_test(&ce->pin_count))
-   return;
-
-   CE_TRACE(ce, "unpin\n");
-   ce->ops->unpin(ce);
-
-   /*
-* Once released, we may asynchronously drop the active reference.
-* As that may be the only reference keeping the context alive,
-* take an extra now so that it is not freed before we finish
-* dereferencing it.
-*/
-   intel_context_get(ce);
-   intel_context_active_release(ce);
-   intel_context_put(ce);
-}
-
 static int __context_pin_state(struct i915_vma *vma)
 {
unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
@@ -225,6 +152,138 @@ static void __ring_retire(struct intel_ring *ring)
i915_active_release(&ring->vma->active);
 }
 
+static int intel_context_pre_pin(struct intel_context *ce)
+{
+   int err;
+
+   CE_TRACE(ce, "active\n");
+
+   err = __ring_active(ce->ring);
+   if (err)
+   return err;
+
+   err = intel_timeline_pin(ce->timeline);
+   if (err)
+   goto err_ring;
+
+   if (!ce->state)
+   return 0;
+
+   err = __context_pin_state(ce->state);
+   if (err)
+   goto err_timeline;
+
+
+   return 0;
+
+err_timeline:
+   intel_timeline_unpin(ce->timeline);
+err_ring:
+   __ring_retire(ce->ring);
+   return err;
+}
+
+static void intel_context_post_unpin(struct intel_context *ce)
+{
+   if (ce->state)
+   __context_unpin_state(ce->state);
+
+   intel_timeline_unpin(ce->timeline);
+   __ring_retire(ce->ring);
+}
+
+int __intel_context_do_pin(struct intel_context *ce)
+{
+   bool handoff = false;
+   void *vaddr;
+   int err = 0;
+
+   if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
+   err = intel_context_alloc_state(ce);
+   if (err)
+   return err;
+   }
+
+   /*
+* We always pin the context/ring/timeline here, to ensure a pin
+* refcount for __intel_context_active(), which prevent a lock
+* inversion of ce->pin_mutex vs dma_resv_lock().
+*/
+   err = intel_context_pre_pin

[Intel-gfx] [PATCH 07/22] drm/i915: Use ww locking in intel_renderstate.

2020-03-30 Thread Maarten Lankhorst
We want to start using ww locking in intel_context_pin, for this
we need to lock multiple objects, and the single i915_gem_object_lock
is not enough.

Convert to using ww-waiting, and make sure we always pin intel_context_state,
even if we don't have a renderstate object.

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gt/intel_gt.c  | 21 +++---
 drivers/gpu/drm/i915/gt/intel_renderstate.c | 71 ++---
 drivers/gpu/drm/i915/gt/intel_renderstate.h |  9 ++-
 3 files changed, 65 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index 6eae4c791007..c11e89472ad8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -406,21 +406,20 @@ static int __engines_record_defaults(struct intel_gt *gt)
/* We must be able to switch to something! */
GEM_BUG_ON(!engine->kernel_context);
 
-   err = intel_renderstate_init(&so, engine);
-   if (err)
-   goto out;
-
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
goto out;
}
 
-   rq = intel_context_create_request(ce);
+   err = intel_renderstate_init(&so, ce);
+   if (err)
+   goto err;
+
+   rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
-   intel_context_put(ce);
-   goto out;
+   goto err_fini;
}
 
err = intel_engine_emit_ctx_wa(rq);
@@ -434,9 +433,13 @@ static int __engines_record_defaults(struct intel_gt *gt)
 err_rq:
requests[id] = i915_request_get(rq);
i915_request_add(rq);
-   intel_renderstate_fini(&so);
-   if (err)
+err_fini:
+   intel_renderstate_fini(&so, ce);
+err:
+   if (err) {
+   intel_context_put(ce);
goto out;
+   }
}
 
/* Flush the default context image to memory, and enable powersaving. */
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c 
b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index ca533d98d14d..c65554c431f8 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -27,6 +27,7 @@
 
 #include "i915_drv.h"
 #include "intel_renderstate.h"
+#include "gt/intel_context.h"
 #include "intel_ring.h"
 
 static const struct intel_renderstate_rodata *
@@ -74,10 +75,9 @@ static int render_state_setup(struct intel_renderstate *so,
u32 *d;
int ret;
 
-   i915_gem_object_lock(so->vma->obj, NULL);
ret = i915_gem_object_prepare_write(so->vma->obj, &needs_clflush);
if (ret)
-   goto out_unlock;
+   return ret;
 
d = kmap_atomic(i915_gem_object_get_dirty_page(so->vma->obj, 0));
 
@@ -158,8 +158,6 @@ static int render_state_setup(struct intel_renderstate *so,
ret = 0;
 out:
i915_gem_object_finish_access(so->vma->obj);
-out_unlock:
-   i915_gem_object_unlock(so->vma->obj);
return ret;
 
 err:
@@ -171,33 +169,47 @@ static int render_state_setup(struct intel_renderstate 
*so,
 #undef OUT_BATCH
 
 int intel_renderstate_init(struct intel_renderstate *so,
-  struct intel_engine_cs *engine)
+  struct intel_context *ce)
 {
-   struct drm_i915_gem_object *obj;
+   struct intel_engine_cs *engine = ce->engine;
+   struct drm_i915_gem_object *obj = NULL;
int err;
 
memset(so, 0, sizeof(*so));
 
so->rodata = render_state_get_rodata(engine);
-   if (!so->rodata)
-   return 0;
+   if (so->rodata) {
+   if (so->rodata->batch_items * 4 > PAGE_SIZE)
+   return -EINVAL;
+
+   obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
+   if (IS_ERR(obj))
+   return PTR_ERR(obj);
+
+   so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
+   if (IS_ERR(so->vma)) {
+   err = PTR_ERR(so->vma);
+   goto err_obj;
+   }
+   }
 
-   if (so->rodata->batch_items * 4 > PAGE_SIZE)
-   return -EINVAL;
+   i915_gem_ww_ctx_init(&so->ww, true);
+retry:
+   err = intel_context_pin(ce);
+   if (err)
+   goto err_fini;
 
-   obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
-   if (IS_ERR(obj))
-   return PTR_ERR(obj);
+   /* return early if there's nothing to setup */
+   if (!err && !so->rodata)
+   return 0;
 
-   so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
-   if (

[Intel-gfx] [PATCH 17/22] drm/i915/selftests: Fix locking inversion in lrc selftest.

2020-03-30 Thread Maarten Lankhorst
This function does not use intel_context_create_request, so it has
to use the same locking order as normal code. This is required to
shut up lockdep in selftests.

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c 
b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 6f06ba750a0a..64959a0c68ce 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -4283,6 +4283,7 @@ static int __live_lrc_state(struct intel_engine_cs 
*engine,
 {
struct intel_context *ce;
struct i915_request *rq;
+   struct i915_gem_ww_ctx ww;
enum {
RING_START_IDX = 0,
RING_TAIL_IDX,
@@ -4297,7 +4298,11 @@ static int __live_lrc_state(struct intel_engine_cs 
*engine,
if (IS_ERR(ce))
return PTR_ERR(ce);
 
-   err = intel_context_pin(ce);
+   i915_gem_ww_ctx_init(&ww, false);
+retry:
+   err = i915_gem_object_lock(scratch->obj, &ww);
+   if (!err)
+   err = intel_context_pin_ww(ce, &ww);
if (err)
goto err_put;
 
@@ -4326,11 +4331,9 @@ static int __live_lrc_state(struct intel_engine_cs 
*engine,
*cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
*cs++ = 0;
 
-   i915_vma_lock(scratch);
err = i915_request_await_object(rq, scratch->obj, true);
if (!err)
err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
-   i915_vma_unlock(scratch);
 
i915_request_get(rq);
i915_request_add(rq);
@@ -4367,6 +4370,12 @@ static int __live_lrc_state(struct intel_engine_cs 
*engine,
 err_unpin:
intel_context_unpin(ce);
 err_put:
+   if (err == -EDEADLK) {
+   err = i915_gem_ww_ctx_backoff(&ww);
+   if (!err)
+   goto retry;
+   }
+   i915_gem_ww_ctx_fini(&ww);
intel_context_put(ce);
return err;
 }
-- 
2.25.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 09/22] drm/i915: Nuke arguments to eb_pin_engine

2020-03-30 Thread Maarten Lankhorst
Those arguments are already set as eb.file and eb.args, so kill off
the extra arguments. This will allow us to move eb_pin_engine() to
after we reserved all BO's.

Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 17 +++--
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index a337f3054ce3..74146623b8ae 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2412,11 +2412,10 @@ static void eb_unpin_engine(struct i915_execbuffer *eb)
 }
 
 static unsigned int
-eb_select_legacy_ring(struct i915_execbuffer *eb,
- struct drm_file *file,
- struct drm_i915_gem_execbuffer2 *args)
+eb_select_legacy_ring(struct i915_execbuffer *eb)
 {
struct drm_i915_private *i915 = eb->i915;
+   struct drm_i915_gem_execbuffer2 *args = eb->args;
unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
 
if (user_ring_id != I915_EXEC_BSD &&
@@ -2431,7 +2430,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
 
if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
-   bsd_idx = gen8_dispatch_bsd_engine(i915, file);
+   bsd_idx = gen8_dispatch_bsd_engine(i915, eb->file);
} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
   bsd_idx <= I915_EXEC_BSD_RING2) {
bsd_idx >>= I915_EXEC_BSD_SHIFT;
@@ -2456,18 +2455,16 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
 }
 
 static int
-eb_pin_engine(struct i915_execbuffer *eb,
- struct drm_file *file,
- struct drm_i915_gem_execbuffer2 *args)
+eb_pin_engine(struct i915_execbuffer *eb)
 {
struct intel_context *ce;
unsigned int idx;
int err;
 
if (i915_gem_context_user_engines(eb->gem_context))
-   idx = args->flags & I915_EXEC_RING_MASK;
+   idx = eb->args->flags & I915_EXEC_RING_MASK;
else
-   idx = eb_select_legacy_ring(eb, file, args);
+   idx = eb_select_legacy_ring(eb);
 
ce = i915_gem_context_get_engine(eb->gem_context, idx);
if (IS_ERR(ce))
@@ -2765,7 +2762,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (unlikely(err))
goto err_destroy;
 
-   err = eb_pin_engine(&eb, file, args);
+   err = eb_pin_engine(&eb);
if (unlikely(err))
goto err_context;
 
-- 
2.25.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 21/22] drm/i915: Add ww locking to pin_to_display_plane

2020-03-30 Thread Maarten Lankhorst
Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c | 65 --
 drivers/gpu/drm/i915/gem/i915_gem_object.h |  1 +
 2 files changed, 49 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c 
b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index e9d3b587f562..def8254b5fc2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -37,6 +37,12 @@ void i915_gem_object_flush_if_display(struct 
drm_i915_gem_object *obj)
i915_gem_object_unlock(obj);
 }
 
+void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
+{
+   if (i915_gem_object_is_framebuffer(obj))
+   __i915_gem_object_flush_for_display(obj);
+}
+
 /**
  * Moves a single object to the WC read, and possibly write domain.
  * @obj: object to act on
@@ -197,18 +203,12 @@ int i915_gem_object_set_cache_level(struct 
drm_i915_gem_object *obj,
if (ret)
return ret;
 
-   ret = i915_gem_object_lock_interruptible(obj, NULL);
-   if (ret)
-   return ret;
-
/* Always invalidate stale cachelines */
if (obj->cache_level != cache_level) {
i915_gem_object_set_cache_coherency(obj, cache_level);
obj->cache_dirty = true;
}
 
-   i915_gem_object_unlock(obj);
-
/* The cache-level will be applied when each vma is rebound. */
return i915_gem_object_unbind(obj,
  I915_GEM_OBJECT_UNBIND_ACTIVE |
@@ -255,6 +255,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void 
*data,
struct drm_i915_gem_caching *args = data;
struct drm_i915_gem_object *obj;
enum i915_cache_level level;
+   struct i915_gem_ww_ctx ww;
int ret = 0;
 
switch (args->caching) {
@@ -293,7 +294,18 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, 
void *data,
goto out;
}
 
-   ret = i915_gem_object_set_cache_level(obj, level);
+   i915_gem_ww_ctx_init(&ww, true);
+retry:
+   ret = i915_gem_object_lock(obj, &ww);
+   if (!ret)
+   ret = i915_gem_object_set_cache_level(obj, level);
+
+   if (ret == -EDEADLK) {
+   ret = i915_gem_ww_ctx_backoff(&ww);
+   if (!ret)
+   goto retry;
+   }
+   i915_gem_ww_ctx_fini(&ww);
 
 out:
i915_gem_object_put(obj);
@@ -313,6 +325,7 @@ i915_gem_object_pin_to_display_plane(struct 
drm_i915_gem_object *obj,
 unsigned int flags)
 {
struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   struct i915_gem_ww_ctx ww;
struct i915_vma *vma;
int ret;
 
@@ -320,6 +333,11 @@ i915_gem_object_pin_to_display_plane(struct 
drm_i915_gem_object *obj,
if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
return ERR_PTR(-EINVAL);
 
+   i915_gem_ww_ctx_init(&ww, true);
+retry:
+   ret = i915_gem_object_lock(obj, &ww);
+   if (ret)
+   goto err;
/*
 * The display engine is not coherent with the LLC cache on gen6.  As
 * a result, we make sure that the pinning that is about to occur is
@@ -334,7 +352,7 @@ i915_gem_object_pin_to_display_plane(struct 
drm_i915_gem_object *obj,
  HAS_WT(i915) ?
  I915_CACHE_WT : I915_CACHE_NONE);
if (ret)
-   return ERR_PTR(ret);
+   goto err;
 
/*
 * As the user may map the buffer once pinned in the display plane
@@ -347,18 +365,31 @@ i915_gem_object_pin_to_display_plane(struct 
drm_i915_gem_object *obj,
vma = ERR_PTR(-ENOSPC);
if ((flags & PIN_MAPPABLE) == 0 &&
(!view || view->type == I915_GGTT_VIEW_NORMAL))
-   vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
-  flags |
-  PIN_MAPPABLE |
-  PIN_NONBLOCK);
-   if (IS_ERR(vma))
-   vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
-   if (IS_ERR(vma))
-   return vma;
+   vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, alignment,
+ flags | PIN_MAPPABLE |
+ PIN_NONBLOCK);
+   if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
+   vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0,
+ alignment, flags);
+   if (IS_ERR(vma)) {
+   ret = PTR_ERR(vma);
+   goto err;
+   }
 
vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
 
-   i915_gem_object_flush_if_display(obj);
+   i915_gem_object_flush_if_display_loc

Re: [Intel-gfx] [PATCH] drm/i915/huc: Add more errors for I915_PARAM_HUC_STATUS

2020-03-30 Thread Michal Wajdeczko



On 30.03.2020 16:12, Chris Wilson wrote:
> Quoting Michal Wajdeczko (2020-03-30 15:02:53)
>>
>>
>> On 30.03.2020 14:28, Chris Wilson wrote:
>>> There's nothing else between us loading the fw and the huc rejecting
>>> it?
>>>
>>> FIRMWARE_FAIL? That's set as the opposite of FIRMWARE_TRANSFERRED in
>>> that we failed to upload the image to the HW. The firmware itself hasn't
>>> had a chance to run.
>>>
>>> case INTEL_UC_FIRMWARE_FAIL:
>>>   return -ENXIO;
>>>
>>> Or is that being overridden to FIRMWARE_ERROR?
>>
>> No, it's not overridden by FIRMWARE_ERROR (since we use FIRMWARE_ERROR
>> as final state, while with FIRMWARE_FAIL there is a chance for recovery
>> during reset)
>>
>> Also note that FIRMWARE_FAIL case is covered by the register check that
>> we have below, which provides HuC runtime status.
> 
> Yes, if it only reports on the auth failure.
> 
>> And if we decide to use FIRMWARE_FAIL to report -ENXIO, then it is
>> unlikely that we will ever report 0 again for any other fw error that
>> could prevent fw from successful load (now recall your and Joonas
>> position that this param shall stay as reflection of register read).
>>
>> Michal
>>
>> ps. on other hand, if we trust our uc_fw_status() then we can drop that
>> register read, finally decouple GET_PARAM from MMIO_READ and fully rely
>> on cached status:
> 
> imo, that register read is the icing on the cake. We can tell whether
> the FW got to the HW, but we can't tell if the HW was truly happy with
> the FW without asking it.
> 
> I look at it as exposing an interface for the final capability bits to
> userspace that the kernel does not have to understand, that go above and
> beyond the kernel loading the firmware and confirming execution.

note that kernel already asked HW in intel_huc_auth() for FW status and
based on that info changed our cached fw status to RUNNING if and only
if HW was happy with that FW (and that shall not change until reset)

Michal
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RESEND PATCH] drm/i915: do AUD_FREQ_CNTRL state save on all gen9+ platforms

2020-03-30 Thread Kai Vehmanen
Replace the TGL/ICL specific platform checks with a more generic check
using INTEL_GEN(). Fixes bug with broken audio after S3 resume on JSL
platforms.

An initial version of state save and restore of AUD_FREQ_CNTRL register
was added for subset of platforms in commit 87c1694533c9
("drm/i915: save AUD_FREQ_CNTRL state at audio domain suspend"). The state
save has proven to work well and it is needed in newer platforms, so needs
to be extended. Although the logic is not in practise needed on GEN9/10
systems, follow the hardware specification and apply state and restore on
all gen9+ platforms.

Bspec: 49281
Link: https://github.com/thesofproject/linux/issues/1719
Signed-off-by: Kai Vehmanen 
---
 drivers/gpu/drm/i915/display/intel_audio.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_audio.c 
b/drivers/gpu/drm/i915/display/intel_audio.c
index 19bf206037c2..f4ed3acddc07 100644
--- a/drivers/gpu/drm/i915/display/intel_audio.c
+++ b/drivers/gpu/drm/i915/display/intel_audio.c
@@ -883,7 +883,7 @@ static unsigned long i915_audio_component_get_power(struct 
device *kdev)
ret = intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO);
 
if (dev_priv->audio_power_refcount++ == 0) {
-   if (IS_TIGERLAKE(dev_priv) || IS_ICELAKE(dev_priv)) {
+   if (INTEL_GEN(dev_priv) >= 9) {
intel_de_write(dev_priv, AUD_FREQ_CNTRL,
   dev_priv->audio_freq_cntrl);
drm_dbg_kms(&dev_priv->drm,
@@ -1165,7 +1165,7 @@ static void i915_audio_component_init(struct 
drm_i915_private *dev_priv)
return;
}
 
-   if (IS_TIGERLAKE(dev_priv) || IS_ICELAKE(dev_priv)) {
+   if (INTEL_GEN(dev_priv) >= 9) {
dev_priv->audio_freq_cntrl = intel_de_read(dev_priv,
   AUD_FREQ_CNTRL);
drm_dbg_kms(&dev_priv->drm,
-- 
2.17.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/3] drm/i915/dp: Return the right vswing tables

2020-03-30 Thread Ville Syrjälä
On Fri, Mar 27, 2020 at 02:34:11PM -0700, José Roberto de Souza wrote:
> DDI ports have its encoders initialized with INTEL_OUTPUT_DDI type and
> later eDP ports that have the type changed to INTEL_OUTPUT_EDP.
> But for all other DDI ports it can drive HDMI or DP depending on what
> user connects to the ports.
> 
> ehl_get_combo_buf_trans() and tgl_get_combo_buf_trans() was checking
> for INTEL_OUTPUT_DP that was never true, causing eDP vswing tables
> being used.
> 
> So here changing the check to INTEL_OUTPUT_DDI, HDMI cases will be
> correctly handled as it do not use encoder->type, instead it calls the
> functions with INTEL_OUTPUT_HDMI as type parameter and HDMI don't have
> retraining.
> 
> Fixes: bd3cf6f7ce20 ("drm/i915/dp/tgl+: Update combo phy vswing tables")
> Cc: Clinton A Taylor 
> Cc: Matt Roper 
> Signed-off-by: José Roberto de Souza 
> ---
>  drivers/gpu/drm/i915/display/intel_ddi.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c 
> b/drivers/gpu/drm/i915/display/intel_ddi.c
> index 916a802af788..7af1572d4f1d 100644
> --- a/drivers/gpu/drm/i915/display/intel_ddi.c
> +++ b/drivers/gpu/drm/i915/display/intel_ddi.c
> @@ -947,7 +947,7 @@ static const struct cnl_ddi_buf_trans *
>  ehl_get_combo_buf_trans(struct drm_i915_private *dev_priv, int type, int 
> rate,
>   int *n_entries)
>  {
> - if (type == INTEL_OUTPUT_DP && rate > 27) {
> + if (type == INTEL_OUTPUT_DDI && rate > 27) {

Please no. I'd rather not see "DDI" here. We want to check which mode
we're driving the output in, and "DDI" isn't one of the valid choices.

The fact that we sometimes pass in encoder->type is a bit of shortcut
to make the DP vs. EDP distinction. And so far every function knew to
only compare the value against EDP/HDMI and neve against DP. Looks like
someone broke that (admittedly crappy) convention.

We should probably fix this a bit higher up and make sure we only ever
pass in EDP/DP/HDMI, never DDI.

>   *n_entries = 
> ARRAY_SIZE(ehl_combo_phy_ddi_translations_hbr2_hbr3);
>   return ehl_combo_phy_ddi_translations_hbr2_hbr3;
>   }
> @@ -959,7 +959,7 @@ static const struct cnl_ddi_buf_trans *
>  tgl_get_combo_buf_trans(struct drm_i915_private *dev_priv, int type, int 
> rate,
>   int *n_entries)
>  {
> - if (type != INTEL_OUTPUT_DP) {
> + if (type != INTEL_OUTPUT_DDI) {
>   return icl_get_combo_buf_trans(dev_priv, type, rate, n_entries);
>   } else if (rate > 27) {
>   *n_entries = ARRAY_SIZE(tgl_combo_phy_ddi_translations_dp_hbr2);
> -- 
> 2.26.0
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915/icl+: Don't enable DDI IO power on a TypeC port in TBT mode

2020-03-30 Thread Imre Deak
The DDI IO power well must not be enabled for a TypeC port in TBT mode,
ensure this during driver loading/system resume.

This gets rid of error messages like
[drm] *ERROR* power well DDI E TC2 IO state mismatch (refcount 1/enabled 0)

and avoids leaking the power ref when disabling the output.

Cc:  # v5.4+
Signed-off-by: Imre Deak 
---
 drivers/gpu/drm/i915/display/intel_ddi.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c 
b/drivers/gpu/drm/i915/display/intel_ddi.c
index 916a802af788..654151d9a6db 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -1899,7 +1899,11 @@ static void intel_ddi_get_power_domains(struct 
intel_encoder *encoder,
return;
 
dig_port = enc_to_dig_port(encoder);
-   intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain);
+
+   if (!intel_phy_is_tc(dev_priv, phy) ||
+   dig_port->tc_mode != TC_PORT_TBT_ALT)
+   intel_display_power_get(dev_priv,
+   dig_port->ddi_io_power_domain);
 
/*
 * AUX power is only needed for (e)DP mode, and for HDMI mode on TC
-- 
2.23.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3 1/5] drm/i915: Decouple cdclk calculation from modeset checks

2020-03-30 Thread Ville Syrjälä
On Mon, Mar 30, 2020 at 03:23:50PM +0300, Stanislav Lisovskiy wrote:
> We need to calculate cdclk after watermarks/ddb has been calculated
> as with recent hw CDCLK needs to be adjusted accordingly to DBuf
> requirements, which is not possible with current code organization.
> 
> Setting CDCLK according to DBuf BW requirements and not just rejecting
> if it doesn't satisfy BW requirements, will allow us to save power when
> it is possible and gain additional bandwidth when it's needed - i.e
> boosting both our power management and perfomance capabilities.
> 
> This patch is preparation for that, first we now extract modeset
> calculation from modeset checks, in order to call it after wm/ddb
> has been calculated.
> 
> v2: - Extract only intel_modeset_calc_cdclk from intel_modeset_checks
>   (Ville Syrjälä)
> 
> Signed-off-by: Stanislav Lisovskiy 
> ---
>  drivers/gpu/drm/i915/display/intel_display.c | 18 ++
>  1 file changed, 10 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
> b/drivers/gpu/drm/i915/display/intel_display.c
> index 7c45d676c9b7..17d83f37f49f 100644
> --- a/drivers/gpu/drm/i915/display/intel_display.c
> +++ b/drivers/gpu/drm/i915/display/intel_display.c
> @@ -14545,10 +14545,6 @@ static int intel_modeset_checks(struct 
> intel_atomic_state *state)
>   return ret;
>   }
>  
> - ret = intel_modeset_calc_cdclk(state);
> - if (ret)
> - return ret;
> -
>   intel_modeset_clear_plls(state);
>  
>   if (IS_HASWELL(dev_priv))
> @@ -14882,10 +14878,6 @@ static int intel_atomic_check(struct drm_device *dev,
>   goto fail;
>   }
>  
> - ret = intel_atomic_check_crtcs(state);
> - if (ret)
> - goto fail;
> -
>   intel_fbc_choose_crtc(dev_priv, state);
>   ret = calc_watermark_data(state);
>   if (ret)
> @@ -14895,6 +14887,16 @@ static int intel_atomic_check(struct drm_device *dev,
>   if (ret)
>   goto fail;
>  
> + if (any_ms) {
> + ret = intel_modeset_calc_cdclk(state);
> + if (ret)
> + return ret;
> + }
> +
> + ret = intel_atomic_check_crtcs(state);
> + if (ret)
> + goto fail;

I was thinking we'd do this as two patches. One with just the
extraction, and another one with the bigger reordering. But I think I
convinced myself that it should be safe, so maybe a single patch is
fine.

Reviewed-by: Ville Syrjälä 

> +
>   for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
>   new_crtc_state, i) {
>   if (!needs_modeset(new_crtc_state) &&
> -- 
> 2.24.1.485.gad05a3d8e5

-- 
Ville Syrjälä
Intel
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/execlists: Include priority info in trace_ports

2020-03-30 Thread Matthew Auld
On Mon, 30 Mar 2020 at 12:32, Chris Wilson  wrote:
>
> Add some extra information into trace_ports to help with reviewing
> correctness.
>
> Signed-off-by: Chris Wilson 
> Cc: Mika Kuoppala 
> Cc: Tvrtko Ursulin 
Reviewed-by: Matthew Auld 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/perf: don't read head/tail pointers outside critical section

2020-03-30 Thread Dixit, Ashutosh
On Mon, 30 Mar 2020 03:09:20 -0700, Chris Wilson wrote:
>
> Quoting Lionel Landwerlin (2020-03-30 10:14:11)
> > Reading or writing those fields should only happen under
> > stream->oa_buffer.ptr_lock.
>
> Writing, yes. Reading as a pair, sure. There are other ways you can
> ensure that the tail/head are read as one, but fair enough.

Sorry but I am trying to understand exactly what the purpose of
stream->oa_buffer.ptr_lock is? This is a classic ring buffer producer
consumer situation where producer updates tail and consumer updates
head. Since both are u32's can't those operations be done without requiring
a lock?

>
> > Signed-off-by: Lionel Landwerlin 
> > Fixes: d1df41eb72ef ("drm/i915/perf: rework aging tail workaround")
> > ---
> >  drivers/gpu/drm/i915/i915_perf.c | 8 ++--
> >  1 file changed, 6 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_perf.c 
> > b/drivers/gpu/drm/i915/i915_perf.c
> > index c74ebac50015..ec9421f02ebd 100644
> > --- a/drivers/gpu/drm/i915/i915_perf.c
> > +++ b/drivers/gpu/drm/i915/i915_perf.c
> > @@ -463,6 +463,7 @@ static bool oa_buffer_check_unlocked(struct 
> > i915_perf_stream *stream)
> > u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
> > int report_size = stream->oa_buffer.format_size;
> > unsigned long flags;
> > +   bool pollin;
> > u32 hw_tail;
> > u64 now;
> >
> > @@ -532,10 +533,13 @@ static bool oa_buffer_check_unlocked(struct 
> > i915_perf_stream *stream)
> > stream->oa_buffer.aging_timestamp = now;
> > }
> >
> > +   pollin = OA_TAKEN(stream->oa_buffer.tail - gtt_offset,
> > + stream->oa_buffer.head - gtt_offset) >= 
> > report_size;
> > +
> > +
>
> Bonus \n
>
> Reviewed-by: Chris Wilson 
>
> > spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
> >
> > -   return OA_TAKEN(stream->oa_buffer.tail - gtt_offset,
> > -   stream->oa_buffer.head - gtt_offset) >= report_size;
> > +   return pollin;

In what way is the original code incorrect? As I mentioned head is u32 and
can be read atomically without requiring a lock? We had deliberately moved
this code outside the lock so as to pick up the the latest value of head if
it had been updated in the consumer (read).
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3 3/5] drm/i915: Introduce for_each_dbuf_slice_in_mask macro

2020-03-30 Thread Ville Syrjälä
On Mon, Mar 30, 2020 at 03:23:52PM +0300, Stanislav Lisovskiy wrote:
> We quite often need now to iterate only particular dbuf slices
> in mask, whether they are active or related to particular crtc.
> 
> Let's make our life a bit easier and use a macro for that.
> 
> Signed-off-by: Stanislav Lisovskiy 
> ---
>  drivers/gpu/drm/i915/display/intel_display.h   | 7 +++
>  drivers/gpu/drm/i915/display/intel_display_power.h | 3 +++
>  2 files changed, 10 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_display.h 
> b/drivers/gpu/drm/i915/display/intel_display.h
> index adb1225a3480..c898285f0dc3 100644
> --- a/drivers/gpu/drm/i915/display/intel_display.h
> +++ b/drivers/gpu/drm/i915/display/intel_display.h
> @@ -187,6 +187,13 @@ enum plane_id {
>   for ((__p) = PLANE_PRIMARY; (__p) < I915_MAX_PLANES; (__p)++) \
>   for_each_if((__crtc)->plane_ids_mask & BIT(__p))
>  
> +#define for_each_dbuf_slice_in_mask(__slice, __mask) \

Please stick to established conventions.

> + for ((__slice) = 0; (__slice) < I915_MAX_DBUF_SLICES; (__slice)++) \
> + for_each_if((1 << (__slice)) & (__mask))
> +
> +#define for_each_dbuf_slice(__slice) \
> + for_each_dbuf_slice_in_mask(__slice, (1 << I915_MAX_DBUF_SLICES) - 1)
> +
>  enum port {
>   PORT_NONE = -1,
>  
> diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h 
> b/drivers/gpu/drm/i915/display/intel_display_power.h
> index da64a5edae7a..468e8fb0203a 100644
> --- a/drivers/gpu/drm/i915/display/intel_display_power.h
> +++ b/drivers/gpu/drm/i915/display/intel_display_power.h
> @@ -311,8 +311,11 @@ intel_display_power_put_async(struct drm_i915_private 
> *i915,
>  enum dbuf_slice {
>   DBUF_S1,
>   DBUF_S2,
> + DBUF_SLICE_MAX
>  };
>  
> +#define I915_DBUF_MAX_SLICES DBUF_SLICE_MAX
> +

Huh?

>  #define with_intel_display_power(i915, domain, wf) \
>   for ((wf) = intel_display_power_get((i915), (domain)); (wf); \
>intel_display_power_put_async((i915), (domain), (wf)), (wf) = 0)
> -- 
> 2.24.1.485.gad05a3d8e5

-- 
Ville Syrjälä
Intel
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


  1   2   3   >