[Bug 209159] New: AMD Vega 20 framebuffer switch fails on 5.9rc2+

2020-09-05 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=209159

Bug ID: 209159
   Summary: AMD Vega 20 framebuffer switch fails on 5.9rc2+
   Product: Drivers
   Version: 2.5
Kernel Version: 5.9-rc2/3
  Hardware: x86-64
OS: Linux
  Tree: Mainline
Status: NEW
  Severity: normal
  Priority: P1
 Component: Video(DRI - non Intel)
  Assignee: drivers_video-...@kernel-bugs.osdl.org
  Reporter: r...@testtoast.com
Regression: No

Created attachment 292349
  --> https://bugzilla.kernel.org/attachment.cgi?id=292349&action=edit
lspci

Vega 20 (Radeon VII) on X99 platform - boot freezes during FB switch. Booting
without quiet and with earlyprintk=efi,keep shows a stall immediately after

...
[1.941238] AMD-Vi: AMD IOMMUv2 driver by Joerg Roedel 
[1.941239] AMD-Vi: AMD IOMMUv2 functionality not available on this system
[1.948454] nvme nvme0: 20/0/0 default/read/poll queues
[1.953176]  nvme0n1: p1 p2 p3 p4 p5 p6 p7
[1.956313] usb 1-14: new full-speed USB device number 5 using xhci_hcd
[1.989790] [drm] amdgpu kernel modesetting enabled.
[1.989849] CRAT table not found
[1.989850] Virtual CRAT table created for CPU
[1.989857] amdgpu: Topology: Add CPU node
[2.006244] checking generic (c000 30) vs hw (c000 1000)
[2.006246] fb0: switching to amdgpudrmfb from EFI VGA


On -rc1 boot continues normally with
...
[2.006315] amdgpu :67:00.0: vgaarb: deactivate vga console
[2.006345] amdgpu :67:00.0: enabling device (0106 -> 0107)
[2.006408] [drm] initializing kernel modesetting (VEGA20 0x1002:0x66AF
0x1002:0x081E 0xC1).
...

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[RFC PATCH v2 15/17] drm/tegra: Add power_on/power_off engine callbacks

2020-09-05 Thread Mikko Perttunen
With the new UAPI implementation, engines are powered on and off
when there are active jobs, and the core code handles channel
allocation. To accommodate that, add the power_on and power_off
callbacks. The open_channel and close_channel callbacks are now only
used for the staging path.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/drm.h |  11 +++-
 drivers/gpu/drm/tegra/vic.c | 127 
 2 files changed, 78 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index b25443255be6..b915a3946ad4 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -67,14 +67,19 @@ struct tegra_drm_context {
 };
 
 struct tegra_drm_client_ops {
-   int (*open_channel)(struct tegra_drm_client *client,
-   struct tegra_drm_context *context);
-   void (*close_channel)(struct tegra_drm_context *context);
+   int (*power_on)(struct tegra_drm_client *client);
+   void (*power_off)(struct tegra_drm_client *client);
+
int (*is_addr_reg)(struct device *dev, u32 class, u32 offset);
int (*is_valid_class)(u32 class);
int (*submit)(struct tegra_drm_context *context,
  struct drm_tegra_submit *args, struct drm_device *drm,
  struct drm_file *file);
+
+   /* Legacy UAPI callbacks */
+   int (*open_channel)(struct tegra_drm_client *client,
+   struct tegra_drm_context *context);
+   void (*close_channel)(struct tegra_drm_context *context);
 };
 
 int tegra_drm_submit(struct tegra_drm_context *context,
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index cb476da59adc..4783c7254de9 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -52,48 +52,6 @@ static void vic_writel(struct vic *vic, u32 value, unsigned 
int offset)
writel(value, vic->regs + offset);
 }
 
-static int vic_runtime_resume(struct device *dev)
-{
-   struct vic *vic = dev_get_drvdata(dev);
-   int err;
-
-   err = clk_prepare_enable(vic->clk);
-   if (err < 0)
-   return err;
-
-   usleep_range(10, 20);
-
-   err = reset_control_deassert(vic->rst);
-   if (err < 0)
-   goto disable;
-
-   usleep_range(10, 20);
-
-   return 0;
-
-disable:
-   clk_disable_unprepare(vic->clk);
-   return err;
-}
-
-static int vic_runtime_suspend(struct device *dev)
-{
-   struct vic *vic = dev_get_drvdata(dev);
-   int err;
-
-   err = reset_control_assert(vic->rst);
-   if (err < 0)
-   return err;
-
-   usleep_range(2000, 4000);
-
-   clk_disable_unprepare(vic->clk);
-
-   vic->booted = false;
-
-   return 0;
-}
-
 static int vic_boot(struct vic *vic)
 {
 #ifdef CONFIG_IOMMU_API
@@ -308,47 +266,102 @@ static int vic_load_firmware(struct vic *vic)
return err;
 }
 
-static int vic_open_channel(struct tegra_drm_client *client,
-   struct tegra_drm_context *context)
+
+static int vic_runtime_resume(struct device *dev)
 {
-   struct vic *vic = to_vic(client);
+   struct vic *vic = dev_get_drvdata(dev);
int err;
 
-   err = pm_runtime_get_sync(vic->dev);
+   err = clk_prepare_enable(vic->clk);
if (err < 0)
return err;
 
+   usleep_range(10, 20);
+
+   err = reset_control_deassert(vic->rst);
+   if (err < 0)
+   goto disable;
+
+   usleep_range(10, 20);
+
err = vic_load_firmware(vic);
if (err < 0)
-   goto rpm_put;
+   goto assert;
 
err = vic_boot(vic);
if (err < 0)
-   goto rpm_put;
+   goto assert;
+
+   return 0;
+
+assert:
+   reset_control_assert(vic->rst);
+disable:
+   clk_disable_unprepare(vic->clk);
+   return err;
+}
+
+static int vic_runtime_suspend(struct device *dev)
+{
+   struct vic *vic = dev_get_drvdata(dev);
+   int err;
+
+   err = reset_control_assert(vic->rst);
+   if (err < 0)
+   return err;
+
+   usleep_range(2000, 4000);
+
+   clk_disable_unprepare(vic->clk);
+
+   vic->booted = false;
+
+   return 0;
+}
+
+static int vic_power_on(struct tegra_drm_client *client)
+{
+   struct vic *vic = to_vic(client);
+
+   return pm_runtime_get_sync(vic->dev);
+}
+
+static void vic_power_off(struct tegra_drm_client *client)
+{
+   struct vic *vic = to_vic(client);
+
+   pm_runtime_put(vic->dev);
+}
+
+static int vic_open_channel(struct tegra_drm_client *client,
+   struct tegra_drm_context *context)
+{
+   struct vic *vic = to_vic(client);
+   int err;
+
+   err = vic_power_on(client);
+   if (err < 0)
+   return err;
 
context->channel = host1x_channel_get(vic->channel);
if (!context->channel) {
-   err = -ENOMEM;
-   g

[RFC PATCH v2 07/17] gpu: host1x: Introduce UAPI header

2020-09-05 Thread Mikko Perttunen
Add the userspace interface header, specifying interfaces
for allocating and accessing syncpoints from userspace,
and for creating sync_file based fences based on syncpoint
thresholds.

Signed-off-by: Mikko Perttunen 
---
 include/uapi/linux/host1x.h | 134 
 1 file changed, 134 insertions(+)
 create mode 100644 include/uapi/linux/host1x.h

diff --git a/include/uapi/linux/host1x.h b/include/uapi/linux/host1x.h
new file mode 100644
index ..9c8fb9425cb2
--- /dev/null
+++ b/include/uapi/linux/host1x.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2020 NVIDIA Corporation */
+
+#ifndef _UAPI__LINUX_HOST1X_H
+#define _UAPI__LINUX_HOST1X_H
+
+#include 
+#include 
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+struct host1x_allocate_syncpoint {
+   /**
+* @fd: [out]
+*
+* New file descriptor representing the allocated syncpoint.
+*/
+   __s32 fd;
+
+   __u32 reserved[3];
+};
+
+struct host1x_syncpoint_info {
+   /**
+* @id: [out]
+*
+* System-global ID of the syncpoint.
+*/
+   __u32 id;
+
+   __u32 reserved[3];
+};
+
+struct host1x_syncpoint_increment {
+   /**
+* @count: [in]
+*
+* Number of times to increment the syncpoint. The syncpoint can
+* be observed at in-between values, but each increment is atomic.
+*/
+   __u32 count;
+};
+
+struct host1x_read_syncpoint {
+   /**
+* @id: [in]
+*
+* ID of the syncpoint to read.
+*/
+   __u32 id;
+
+   /**
+* @value: [out]
+*
+* Current value of the syncpoint.
+*/
+   __u32 value;
+};
+
+struct host1x_create_fence {
+   /**
+* @id: [in]
+*
+* ID of the syncpoint to create a fence for.
+*/
+   __u32 id;
+
+   /**
+* @threshold: [in]
+*
+* When the syncpoint reaches this value, the fence will be signaled.
+* The syncpoint is considered to have reached the threshold when the
+* following condition is true:
+*
+*  ((value - threshold) & 0x8000U) == 0U
+*
+*/
+   __u32 threshold;
+
+   /**
+* @fence_fd: [out]
+*
+* New sync_file file descriptor containing the created fence.
+*/
+   __s32 fence_fd;
+
+   __u32 reserved[1];
+};
+
+struct host1x_fence_extract_fence {
+   __u32 id;
+   __u32 threshold;
+};
+
+struct host1x_fence_extract {
+   /**
+* @fence_fd: [in]
+*
+* sync_file file descriptor
+*/
+   __s32 fence_fd;
+
+   /**
+* @num_fences: [in,out]
+*
+* In: size of the `fences_ptr` array counted in elements.
+* Out: required size of the `fences_ptr` array counted in elements.
+*/
+   __u32 num_fences;
+
+   /**
+* @fences_ptr: [in]
+*
+* Pointer to array of `struct host1x_fence_extract_fence`.
+*/
+   __u64 fences_ptr;
+
+   __u32 reserved[2];
+};
+
+#define HOST1X_IOCTL_ALLOCATE_SYNCPOINT  _IOWR('X', 0x00, struct 
host1x_allocate_syncpoint)
+#define HOST1X_IOCTL_READ_SYNCPOINT  _IOR ('X', 0x01, struct 
host1x_read_syncpoint)
+#define HOST1X_IOCTL_CREATE_FENCE_IOWR('X', 0x02, struct 
host1x_create_fence)
+#define HOST1X_IOCTL_SYNCPOINT_INFO  _IOWR('X', 0x03, struct 
host1x_syncpoint_info)
+#define HOST1X_IOCTL_SYNCPOINT_INCREMENT _IOWR('X', 0x04, struct 
host1x_syncpoint_increment)
+#define HOST1X_IOCTL_FENCE_EXTRACT   _IOWR('X', 0x05, struct 
host1x_fence_extract)
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
-- 
2.28.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[RFC PATCH v2 09/17] gpu: host1x: DMA fences and userspace fence creation

2020-09-05 Thread Mikko Perttunen
Add an implementation of dma_fences based on syncpoints. Syncpoint
interrupts are used to signal fences. Additionally, after
software signaling has been enabled, a 30 second timeout is started.
If the syncpoint threshold is not reached within this period,
the fence is signalled with an -ETIMEDOUT error code. This is to
allow fences that would never reach their syncpoint threshold to
be cleaned up.

Additionally, add a new /dev/host1x IOCTL for creating sync_file
file descriptors backed by syncpoint fences.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/Makefile |   1 +
 drivers/gpu/host1x/fence.c  | 207 
 drivers/gpu/host1x/fence.h  |  15 +++
 drivers/gpu/host1x/intr.c   |   9 ++
 drivers/gpu/host1x/intr.h   |   2 +
 drivers/gpu/host1x/uapi.c   | 106 ++
 include/linux/host1x.h  |   3 +
 7 files changed, 343 insertions(+)
 create mode 100644 drivers/gpu/host1x/fence.c
 create mode 100644 drivers/gpu/host1x/fence.h

diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index 882f928d75e1..a48af2cefae1 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -10,6 +10,7 @@ host1x-y = \
debug.o \
mipi.o \
uapi.o \
+   fence.o \
hw/host1x01.o \
hw/host1x02.o \
hw/host1x04.o \
diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c
new file mode 100644
index ..400da6c1ab48
--- /dev/null
+++ b/drivers/gpu/host1x/fence.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Syncpoint dma_fence implementation
+ *
+ * Copyright (c) 2020, NVIDIA Corporation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "intr.h"
+#include "syncpt.h"
+
+DEFINE_SPINLOCK(lock);
+
+struct host1x_syncpt_fence {
+   struct dma_fence base;
+
+   atomic_t signaling;
+
+   struct host1x_syncpt *sp;
+   u32 threshold;
+
+   struct host1x_waitlist *waiter;
+   void *waiter_ref;
+
+   struct delayed_work timeout_work;
+};
+
+static const char *syncpt_fence_get_driver_name(struct dma_fence *f)
+{
+   return "host1x";
+}
+
+static const char *syncpt_fence_get_timeline_name(struct dma_fence *f)
+{
+   return "syncpoint";
+}
+
+static bool syncpt_fence_enable_signaling(struct dma_fence *f)
+{
+   struct host1x_syncpt_fence *sf =
+   container_of(f, struct host1x_syncpt_fence, base);
+   int err;
+
+   if (host1x_syncpt_is_expired(sf->sp, sf->threshold))
+   return false;
+
+   dma_fence_get(f);
+
+   /*
+* The dma_fence framework requires the fence driver to keep a
+* reference to any fences for which 'enable_signaling' has been
+* called (and that have not been signalled).
+* 
+* We provide a userspace API to create arbitrary syncpoint fences,
+* so we cannot normally guarantee that all fences get signalled.
+* As such, setup a timeout, so that long-lasting fences will get
+* reaped eventually.
+*/
+   schedule_delayed_work(&sf->timeout_work, msecs_to_jiffies(3));
+
+   err = host1x_intr_add_action(sf->sp->host, sf->sp, sf->threshold,
+HOST1X_INTR_ACTION_SIGNAL_FENCE, f,
+sf->waiter, &sf->waiter_ref);
+   if (err) {
+   cancel_delayed_work_sync(&sf->timeout_work);
+   dma_fence_put(f);
+   return false;
+   }
+
+   /* intr framework takes ownership of waiter */
+   sf->waiter = NULL;
+
+   /*
+* The fence may get signalled at any time after the above call,
+* so we need to initialize all state used by signalling
+* before it.
+*/
+
+   return true;
+}
+
+static void syncpt_fence_release(struct dma_fence *f)
+{
+   struct host1x_syncpt_fence *sf =
+   container_of(f, struct host1x_syncpt_fence, base);
+
+   if (sf->waiter)
+   kfree(sf->waiter);
+
+   dma_fence_free(f);
+}
+
+const struct dma_fence_ops syncpt_fence_ops = {
+   .get_driver_name = syncpt_fence_get_driver_name,
+   .get_timeline_name = syncpt_fence_get_timeline_name,
+   .enable_signaling = syncpt_fence_enable_signaling,
+   .release = syncpt_fence_release,
+};
+
+void host1x_fence_signal(struct host1x_syncpt_fence *f)
+{
+   if (atomic_xchg(&f->signaling, 1))
+   return;
+
+   /*
+* Cancel pending timeout work - if it races, it will
+* not get 'f->signaling' and return.
+*/
+   cancel_delayed_work_sync(&f->timeout_work);
+
+   host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref);
+
+   dma_fence_signal(&f->base);
+   dma_fence_put(&f->base);
+}
+
+static void do_fence_timeout(struct work_struct *work)
+{
+   struct delayed_work *dwork = (struct delayed_work *)work;
+   struct host1x_syncpt_fence *f =
+   cont

[RFC PATCH v2 04/17] gpu: host1x: Remove cancelled waiters immediately

2020-09-05 Thread Mikko Perttunen
Before this patch, cancelled waiters would only be cleaned up
once their threshold value was reached. Make host1x_intr_put_ref
process the cancellation immediately to fix this.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/intr.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
index 9245add23b5d..5d328d20ce6d 100644
--- a/drivers/gpu/host1x/intr.c
+++ b/drivers/gpu/host1x/intr.c
@@ -247,13 +247,17 @@ void host1x_intr_put_ref(struct host1x *host, unsigned 
int id, void *ref)
struct host1x_waitlist *waiter = ref;
struct host1x_syncpt *syncpt;
 
-   while (atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED) ==
-  WLS_REMOVED)
-   schedule();
+   atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED);
 
syncpt = host->syncpt + id;
-   (void)process_wait_list(host, syncpt,
-   host1x_syncpt_load(host->syncpt + id));
+
+   spin_lock(&syncpt->intr.lock);
+   if (atomic_cmpxchg(&waiter->state, WLS_CANCELLED, WLS_HANDLED) ==
+   WLS_CANCELLED) {
+   list_del(&waiter->list);
+   kref_put(&waiter->refcount, waiter_release);
+   }
+   spin_unlock(&syncpt->intr.lock);
 
kref_put(&waiter->refcount, waiter_release);
 }
-- 
2.28.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[RFC PATCH v2 11/17] gpu: host1x: Add job release callback

2020-09-05 Thread Mikko Perttunen
Add a callback field to the job structure, to be called just before
the job is to be freed. This allows the job's submitter to clean
up any of its own state, like decrement runtime PM refcounts.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/job.c | 3 +++
 include/linux/host1x.h   | 4 
 2 files changed, 7 insertions(+)

diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index e4f16fc899b0..acf322beb56c 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -79,6 +79,9 @@ static void job_free(struct kref *ref)
 {
struct host1x_job *job = container_of(ref, struct host1x_job, ref);
 
+   if (job->release)
+   job->release(job);
+
if (job->waiter)
host1x_intr_put_ref(job->syncpt->host, job->syncpt->id,
job->waiter);
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 3ffe16152ebc..cabc5bef5bae 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -261,6 +261,10 @@ struct host1x_job {
 
/* Fast-forward syncpoint increments on job timeout */
bool syncpt_recovery;
+
+   /* Callback called when job is freed */
+   void (*release)(struct host1x_job *job);
+   void *user_data;
 };
 
 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
-- 
2.28.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[RFC PATCH v2 02/17] gpu: host1x: Allow syncpoints without associated client

2020-09-05 Thread Mikko Perttunen
Syncpoints don't need to be associated with any client,
so remove the property, and expose host1x_syncpt_alloc.
This will allow allocating syncpoints without prior knowledge
of the engine that it will be used with.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/syncpt.c | 8 +++-
 drivers/gpu/host1x/syncpt.h | 6 +-
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index fce7892d5137..7cb80d4768b1 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -42,9 +42,9 @@ static void host1x_syncpt_base_free(struct host1x_syncpt_base 
*base)
base->requested = false;
 }
 
-static struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host,
-struct host1x_client *client,
-unsigned long flags)
+struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host,
+ struct host1x_client *client,
+ unsigned long flags)
 {
struct host1x_syncpt *sp = host->syncpt;
unsigned int i;
@@ -69,7 +69,6 @@ static struct host1x_syncpt *host1x_syncpt_alloc(struct 
host1x *host,
if (!name)
goto free_base;
 
-   sp->client = client;
sp->name = name;
 
if (flags & HOST1X_SYNCPT_CLIENT_MANAGED)
@@ -447,7 +446,6 @@ void host1x_syncpt_free(struct host1x_syncpt *sp)
host1x_syncpt_base_free(sp->base);
kfree(sp->name);
sp->base = NULL;
-   sp->client = NULL;
sp->name = NULL;
sp->client_managed = false;
 
diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h
index 8e1d04dacaa0..77e7206cc316 100644
--- a/drivers/gpu/host1x/syncpt.h
+++ b/drivers/gpu/host1x/syncpt.h
@@ -33,7 +33,6 @@ struct host1x_syncpt {
const char *name;
bool client_managed;
struct host1x *host;
-   struct host1x_client *client;
struct host1x_syncpt_base *base;
 
/* interrupt data */
@@ -113,4 +112,9 @@ static inline int host1x_syncpt_is_valid(struct 
host1x_syncpt *sp)
return sp->id < host1x_syncpt_nb_pts(sp->host);
 }
 
+/* Allocate a syncpoint. */
+struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host,
+ struct host1x_client *client,
+ unsigned long flags);
+
 #endif
-- 
2.28.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[RFC PATCH v2 14/17] drm/tegra: Add new UAPI to header

2020-09-05 Thread Mikko Perttunen
Update the tegra_drm.h UAPI header, adding the new proposed UAPI.
The old staging UAPI is left in for now, with minor modification
to avoid name collisions.

Signed-off-by: Mikko Perttunen 
---
 include/uapi/drm/tegra_drm.h | 431 ---
 1 file changed, 404 insertions(+), 27 deletions(-)

diff --git a/include/uapi/drm/tegra_drm.h b/include/uapi/drm/tegra_drm.h
index c4df3c3668b3..6db5fa242715 100644
--- a/include/uapi/drm/tegra_drm.h
+++ b/include/uapi/drm/tegra_drm.h
@@ -1,24 +1,5 @@
-/*
- * Copyright (c) 2012-2013, NVIDIA CORPORATION.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
+/* SPDX-License-Identifier: MIT */
+/* Copyright (c) 2012-2020 NVIDIA Corporation */
 
 #ifndef _UAPI_TEGRA_DRM_H_
 #define _UAPI_TEGRA_DRM_H_
@@ -29,6 +10,8 @@
 extern "C" {
 #endif
 
+/* TegraDRM legacy UAPI. Only enabled with STAGING */
+
 #define DRM_TEGRA_GEM_CREATE_TILED (1 << 0)
 #define DRM_TEGRA_GEM_CREATE_BOTTOM_UP (1 << 1)
 
@@ -644,13 +627,13 @@ struct drm_tegra_gem_get_flags {
__u32 flags;
 };
 
-#define DRM_TEGRA_GEM_CREATE   0x00
-#define DRM_TEGRA_GEM_MMAP 0x01
+#define DRM_TEGRA_GEM_CREATE_LEGACY0x00
+#define DRM_TEGRA_GEM_MMAP_LEGACY  0x01
 #define DRM_TEGRA_SYNCPT_READ  0x02
 #define DRM_TEGRA_SYNCPT_INCR  0x03
 #define DRM_TEGRA_SYNCPT_WAIT  0x04
-#define DRM_TEGRA_OPEN_CHANNEL 0x05
-#define DRM_TEGRA_CLOSE_CHANNEL0x06
+#define DRM_TEGRA_OPEN_CHANNEL 0x05
+#define DRM_TEGRA_CLOSE_CHANNEL0x06
 #define DRM_TEGRA_GET_SYNCPT   0x07
 #define DRM_TEGRA_SUBMIT   0x08
 #define DRM_TEGRA_GET_SYNCPT_BASE  0x09
@@ -659,8 +642,8 @@ struct drm_tegra_gem_get_flags {
 #define DRM_TEGRA_GEM_SET_FLAGS0x0c
 #define DRM_TEGRA_GEM_GET_FLAGS0x0d
 
-#define DRM_IOCTL_TEGRA_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + 
DRM_TEGRA_GEM_CREATE, struct drm_tegra_gem_create)
-#define DRM_IOCTL_TEGRA_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + 
DRM_TEGRA_GEM_MMAP, struct drm_tegra_gem_mmap)
+#define DRM_IOCTL_TEGRA_GEM_CREATE_LEGACY DRM_IOWR(DRM_COMMAND_BASE + 
DRM_TEGRA_GEM_CREATE_LEGACY, struct drm_tegra_gem_create)
+#define DRM_IOCTL_TEGRA_GEM_MMAP_LEGACY DRM_IOWR(DRM_COMMAND_BASE + 
DRM_TEGRA_GEM_MMAP_LEGACY, struct drm_tegra_gem_mmap)
 #define DRM_IOCTL_TEGRA_SYNCPT_READ DRM_IOWR(DRM_COMMAND_BASE + 
DRM_TEGRA_SYNCPT_READ, struct drm_tegra_syncpt_read)
 #define DRM_IOCTL_TEGRA_SYNCPT_INCR DRM_IOWR(DRM_COMMAND_BASE + 
DRM_TEGRA_SYNCPT_INCR, struct drm_tegra_syncpt_incr)
 #define DRM_IOCTL_TEGRA_SYNCPT_WAIT DRM_IOWR(DRM_COMMAND_BASE + 
DRM_TEGRA_SYNCPT_WAIT, struct drm_tegra_syncpt_wait)
@@ -674,6 +657,400 @@ struct drm_tegra_gem_get_flags {
 #define DRM_IOCTL_TEGRA_GEM_SET_FLAGS DRM_IOWR(DRM_COMMAND_BASE + 
DRM_TEGRA_GEM_SET_FLAGS, struct drm_tegra_gem_set_flags)
 #define DRM_IOCTL_TEGRA_GEM_GET_FLAGS DRM_IOWR(DRM_COMMAND_BASE + 
DRM_TEGRA_GEM_GET_FLAGS, struct drm_tegra_gem_get_flags)
 
+/* New TegraDRM UAPI */
+
+struct drm_tegra_channel_open {
+   /**
+* @host1x_class: [in]
+*
+* Host1x class of the engine that will be programmed using this
+* channel.
+*/
+   __u32 host1x_class;
+
+   /**
+* @flags: [in]
+*
+* Flags.
+*/
+   __u32 flags;
+
+   /**
+* @channel_ctx: [out]
+*
+* Opaque identifier corresponding to the opened channel.
+*/
+   __u32 channel_ctx;
+
+   /**
+* @hardware_version: [out]
+*
+* Version of the engine hardware. This can be used by userspace
+* to determine how the engine needs to be programmed.
+*/
+   __u32 hardware_version;
+
+   __u32 reserved[2];
+};
+
+struct drm_tegra_channel_close {
+   /**
+* @channel_ctx: [in]
+*
+* Identifier of the channel to close.
+   

[RFC PATCH v2 13/17] gpu: host1x: Reset max value when freeing a syncpoint

2020-09-05 Thread Mikko Perttunen
With job recovery becoming optional, syncpoints may have a mismatch
between their value and max value when freed. As such, when freeing,
set the max value to the current value of the syncpoint so that it
is in a sane state for the next user.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/syncpt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 2fad8b2a55cc..82ecb4ac387e 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -385,6 +385,7 @@ static void syncpt_release(struct kref *ref)
 {
struct host1x_syncpt *sp = container_of(ref, struct host1x_syncpt, ref);
 
+   atomic_set(&sp->max_val, host1x_syncpt_read_min(sp));
sp->locked = false;
 
mutex_lock(&sp->host->syncpt_mutex);
-- 
2.28.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[RFC PATCH v2 01/17] gpu: host1x: Use different lock classes for each client

2020-09-05 Thread Mikko Perttunen
To avoid false lockdep warnings, give each client lock a different
lock class, passed from the initialization site by macro.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/bus.c | 7 ---
 include/linux/host1x.h   | 9 -
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index e201f62d62c0..4101f64bd545 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -714,13 +714,14 @@ EXPORT_SYMBOL(host1x_driver_unregister);
  * device and call host1x_device_init(), which will in turn call each client's
  * &host1x_client_ops.init implementation.
  */
-int host1x_client_register(struct host1x_client *client)
+int __host1x_client_register(struct host1x_client *client,
+  struct lock_class_key *key)
 {
struct host1x *host1x;
int err;
 
INIT_LIST_HEAD(&client->list);
-   mutex_init(&client->lock);
+   __mutex_init(&client->lock, "host1x client lock", key);
client->usecount = 0;
 
mutex_lock(&devices_lock);
@@ -741,7 +742,7 @@ int host1x_client_register(struct host1x_client *client)
 
return 0;
 }
-EXPORT_SYMBOL(host1x_client_register);
+EXPORT_SYMBOL(__host1x_client_register);
 
 /**
  * host1x_client_unregister() - unregister a host1x client
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 20c885d0bddc..f711fc0154f4 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -320,7 +320,14 @@ static inline struct host1x_device 
*to_host1x_device(struct device *dev)
 int host1x_device_init(struct host1x_device *device);
 int host1x_device_exit(struct host1x_device *device);
 
-int host1x_client_register(struct host1x_client *client);
+int __host1x_client_register(struct host1x_client *client,
+struct lock_class_key *key);
+#define host1x_client_register(class) \
+   ({ \
+   static struct lock_class_key __key; \
+   __host1x_client_register(class, &__key); \
+   })
+
 int host1x_client_unregister(struct host1x_client *client);
 
 int host1x_client_suspend(struct host1x_client *client);
-- 
2.28.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[RFC PATCH v2 08/17] gpu: host1x: Implement /dev/host1x device node

2020-09-05 Thread Mikko Perttunen
Add the /dev/host1x device node, implementing the following
functionality:

- Reading syncpoint values
- Allocating syncpoints (providing syncpoint FDs)
- Incrementing syncpoints (based on syncpoint FD)

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/Makefile |   1 +
 drivers/gpu/host1x/dev.c|   9 ++
 drivers/gpu/host1x/dev.h|   3 +
 drivers/gpu/host1x/uapi.c   | 275 
 drivers/gpu/host1x/uapi.h   |  22 +++
 include/linux/host1x.h  |   2 +
 6 files changed, 312 insertions(+)
 create mode 100644 drivers/gpu/host1x/uapi.c
 create mode 100644 drivers/gpu/host1x/uapi.h

diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index 096017b8789d..882f928d75e1 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -9,6 +9,7 @@ host1x-y = \
job.o \
debug.o \
mipi.o \
+   uapi.o \
hw/host1x01.o \
hw/host1x02.o \
hw/host1x04.o \
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index d0ebb70e2fdd..641317d23828 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -461,6 +461,12 @@ static int host1x_probe(struct platform_device *pdev)
goto deinit_syncpt;
}
 
+   err = host1x_uapi_init(&host->uapi, host);
+   if (err) {
+   dev_err(&pdev->dev, "failed to initialize uapi\n");
+   goto deinit_intr;
+   }
+
host1x_debug_init(host);
 
if (host->info->has_hypervisor)
@@ -480,6 +486,8 @@ static int host1x_probe(struct platform_device *pdev)
host1x_unregister(host);
 deinit_debugfs:
host1x_debug_deinit(host);
+   host1x_uapi_deinit(&host->uapi);
+deinit_intr:
host1x_intr_deinit(host);
 deinit_syncpt:
host1x_syncpt_deinit(host);
@@ -501,6 +509,7 @@ static int host1x_remove(struct platform_device *pdev)
 
host1x_unregister(host);
host1x_debug_deinit(host);
+   host1x_uapi_deinit(&host->uapi);
host1x_intr_deinit(host);
host1x_syncpt_deinit(host);
reset_control_assert(host->rst);
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 63010ae37a97..7b8b7e20e32b 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -17,6 +17,7 @@
 #include "intr.h"
 #include "job.h"
 #include "syncpt.h"
+#include "uapi.h"
 
 struct host1x_syncpt;
 struct host1x_syncpt_base;
@@ -143,6 +144,8 @@ struct host1x {
struct list_head list;
 
struct device_dma_parameters dma_parms;
+
+   struct host1x_uapi uapi;
 };
 
 void host1x_hypervisor_writel(struct host1x *host1x, u32 r, u32 v);
diff --git a/drivers/gpu/host1x/uapi.c b/drivers/gpu/host1x/uapi.c
new file mode 100644
index ..bc10e5fc0813
--- /dev/null
+++ b/drivers/gpu/host1x/uapi.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * /dev/host1x syncpoint interface
+ *
+ * Copyright (c) 2020, NVIDIA Corporation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "dev.h"
+#include "syncpt.h"
+#include "uapi.h"
+
+#include 
+
+static int syncpt_file_release(struct inode *inode, struct file *file)
+{
+   struct host1x_syncpt *sp = file->private_data;
+
+   host1x_syncpt_put(sp);
+
+   return 0;
+}
+
+static int syncpt_file_ioctl_info(struct host1x_syncpt *sp, void __user *data)
+{
+   struct host1x_syncpoint_info args;
+   unsigned long copy_err;
+
+   copy_err = copy_from_user(&args, data, sizeof(args));
+   if (copy_err)
+   return -EFAULT;
+
+   if (args.reserved[0] || args.reserved[1] || args.reserved[2])
+   return -EINVAL;
+
+   args.id = sp->id;
+
+   copy_err = copy_to_user(data, &args, sizeof(args));
+   if (copy_err)
+   return -EFAULT;
+
+   return 0;
+}
+
+static int syncpt_file_ioctl_incr(struct host1x_syncpt *sp, void __user *data)
+{
+   struct host1x_syncpoint_increment args;
+   unsigned long copy_err;
+   u32 i;
+
+   copy_err = copy_from_user(&args, data, sizeof(args));
+   if (copy_err)
+   return -EFAULT;
+
+   for (i = 0; i < args.count; i++) {
+   host1x_syncpt_incr(sp);
+   if (signal_pending(current))
+   return -EINTR;
+   }
+
+   return 0;
+}
+
+static long syncpt_file_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+   void __user *data = (void __user *)arg;
+   long err;
+
+   switch (cmd) {
+   case HOST1X_IOCTL_SYNCPOINT_INFO:
+   err = syncpt_file_ioctl_info(file->private_data, data);
+   break;
+
+   case HOST1X_IOCTL_SYNCPOINT_INCREMENT:
+   err = syncpt_file_ioctl_incr(file->private_data, data);
+   break;
+
+   default:
+   err = -ENOTTY;
+   }
+
+   return err;
+}
+
+static const struct file_operations syncpt_file_fops =

[RFC PATCH v2 05/17] gpu: host1x: Use HW-equivalent syncpoint expiration check

2020-09-05 Thread Mikko Perttunen
Make syncpoint expiration checks always use the same logic used by
the hardware. This ensures that there are no race conditions that
could occur because of the hardware triggering a syncpoint interrupt
and then the driver disagreeing.

One situation where this could occur is if a job incremented a
syncpoint too many times -- then the hardware would trigger an
interrupt, but the driver would assume that a syncpoint value
greater than the syncpoint's max value is in the future, and not
clean up the job.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/syncpt.c | 51 ++---
 1 file changed, 2 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 7cb80d4768b1..5329a0886d29 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -306,59 +306,12 @@ EXPORT_SYMBOL(host1x_syncpt_wait);
 bool host1x_syncpt_is_expired(struct host1x_syncpt *sp, u32 thresh)
 {
u32 current_val;
-   u32 future_val;
 
smp_rmb();
 
current_val = (u32)atomic_read(&sp->min_val);
-   future_val = (u32)atomic_read(&sp->max_val);
-
-   /* Note the use of unsigned arithmetic here (mod 1<<32).
-*
-* c = current_val = min_val= the current value of the syncpoint.
-* t = thresh   = the value we are checking
-* f = future_val  = max_val= the value c will reach when all
-*outstanding increments have completed.
-*
-* Note that c always chases f until it reaches f.
-*
-* Dtf = (f - t)
-* Dtc = (c - t)
-*
-*  Consider all cases:
-*
-*  A) .c..t..f.Dtf < Dtc   need to wait
-*  B) .c.f..t..Dtf > Dtc   expired
-*  C) ..t..c.f.Dtf > Dtc   expired(Dct very 
large)
-*
-*  Any case where f==c: always expired (for any t).Dtf == Dcf
-*  Any case where t==c: always expired (for any f).Dtf >= Dtc 
(because Dtc==0)
-*  Any case where t==f!=c: always wait.Dtf <  Dtc 
(because Dtf==0,
-*  Dtc!=0)
-*
-*  Other cases:
-*
-*  A) .t..f..c.Dtf < Dtc   need to wait
-*  A) .f..c..t.Dtf < Dtc   need to wait
-*  A) .f..t..c.Dtf > Dtc   expired
-*
-*   So:
-* Dtf >= Dtc implies EXPIRED   (return true)
-* Dtf <  Dtc implies WAIT  (return false)
-*
-* Note: If t is expired then we *cannot* wait on it. We would wait
-* forever (hang the system).
-*
-* Note: do NOT get clever and remove the -thresh from both sides. It
-* is NOT the same.
-*
-* If future valueis zero, we have a client managed sync point. In that
-* case we do a direct comparison.
-*/
-   if (!host1x_syncpt_client_managed(sp))
-   return future_val - thresh >= current_val - thresh;
-   else
-   return (s32)(current_val - thresh) >= 0;
+
+   return ((current_val - thresh) & 0x8000U) == 0U;
 }
 
 int host1x_syncpt_init(struct host1x *host)
-- 
2.28.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[RFC PATCH v2 16/17] drm/tegra: Allocate per-engine channel in core code

2020-09-05 Thread Mikko Perttunen
To avoid duplication, allocate the per-engine shared channel in the
core code instead. Once MLOCKs are implemented on Host1x side, we
can also update this to avoid allocating a shared channel when
MLOCKs are enabled.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/drm.c | 11 +++
 drivers/gpu/drm/tegra/drm.h |  4 
 2 files changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 7437c67924aa..7124b0b0154b 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -887,6 +887,14 @@ static struct drm_driver tegra_drm_driver = {
 int tegra_drm_register_client(struct tegra_drm *tegra,
  struct tegra_drm_client *client)
 {
+   /*
+* When MLOCKs are implemented, change to allocate a shared channel
+* only when MLOCKs are disabled.
+*/
+   client->shared_channel = host1x_channel_request(&client->base);
+   if (!client->shared_channel)
+   return -EBUSY;
+
mutex_lock(&tegra->clients_lock);
list_add_tail(&client->list, &tegra->clients);
client->drm = tegra;
@@ -903,6 +911,9 @@ int tegra_drm_unregister_client(struct tegra_drm *tegra,
client->drm = NULL;
mutex_unlock(&tegra->clients_lock);
 
+   if (client->shared_channel)
+   host1x_channel_put(client->shared_channel);
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index b915a3946ad4..984925d0ad3e 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -91,8 +91,12 @@ struct tegra_drm_client {
struct list_head list;
struct tegra_drm *drm;
 
+   /* Set by driver */
unsigned int version;
const struct tegra_drm_client_ops *ops;
+
+   /* Set by TegraDRM core */
+   struct host1x_channel *shared_channel;
 };
 
 static inline struct tegra_drm_client *
-- 
2.28.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[RFC PATCH v2 03/17] gpu: host1x: Show number of pending waiters in debugfs

2020-09-05 Thread Mikko Perttunen
Show the number of pending waiters in the debugfs status file.
This is useful for testing to verify that waiters do not leak
or accumulate incorrectly.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/debug.c | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
index 3eee4318b158..2d06a7406b3b 100644
--- a/drivers/gpu/host1x/debug.c
+++ b/drivers/gpu/host1x/debug.c
@@ -69,6 +69,7 @@ static int show_channel(struct host1x_channel *ch, void 
*data, bool show_fifo)
 
 static void show_syncpts(struct host1x *m, struct output *o)
 {
+   struct list_head *pos;
unsigned int i;
 
host1x_debug_output(o, " syncpts \n");
@@ -76,12 +77,19 @@ static void show_syncpts(struct host1x *m, struct output *o)
for (i = 0; i < host1x_syncpt_nb_pts(m); i++) {
u32 max = host1x_syncpt_read_max(m->syncpt + i);
u32 min = host1x_syncpt_load(m->syncpt + i);
+   unsigned int waiters = 0;
 
-   if (!min && !max)
+   spin_lock(&m->syncpt[i].intr.lock);
+   list_for_each(pos, &m->syncpt[i].intr.wait_head)
+   waiters++;
+   spin_unlock(&m->syncpt[i].intr.lock);
+
+   if (!min && !max && !waiters)
continue;
 
-   host1x_debug_output(o, "id %u (%s) min %d max %d\n",
-   i, m->syncpt[i].name, min, max);
+   host1x_debug_output(o,
+   "id %u (%s) min %d max %d (%d waiters)\n",
+   i, m->syncpt[i].name, min, max, waiters);
}
 
for (i = 0; i < host1x_syncpt_nb_bases(m); i++) {
-- 
2.28.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[RFC PATCH v2 10/17] WIP: gpu: host1x: Add no-recovery mode

2020-09-05 Thread Mikko Perttunen
Add a new property for jobs to enable or disable recovery i.e.
CPU increments of syncpoints to max value on job timeout. This
allows for a more solid model for hanged jobs, where userspace
doesn't need to guess if a syncpoint increment happened because
the job completed, or because job timeout was triggered.

On job timeout, we stop the channel, NOP all future jobs on the
channel using the same syncpoint, mark the syncpoint as locked
and resume the channel from the next job, if any.

The future jobs are NOPed, since because we don't do the CPU
increments, the value of the syncpoint is no longer synchronized,
and any waiters would become confused if a future job incremented
the syncpoint. The syncpoint is marked locked to ensure that any
future jobs cannot increment the syncpoint either, until the
application has recognized the situation and reallocated the
syncpoint.

WIP: There is a race condition between the locking and submission:
* Submission passes locking check
* Concurrent existing job timeouts, locking the syncpoint
* Submission still goes ahead

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/drm.c|  1 +
 drivers/gpu/host1x/cdma.c  | 42 +-
 drivers/gpu/host1x/hw/channel_hw.c |  6 -
 drivers/gpu/host1x/job.c   |  4 +++
 drivers/gpu/host1x/syncpt.c|  2 ++
 drivers/gpu/host1x/syncpt.h| 12 +
 include/linux/host1x.h |  9 +++
 7 files changed, 69 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index ceea9db341f0..7437c67924aa 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -197,6 +197,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
job->client = client;
job->class = client->class;
job->serialize = true;
+   job->syncpt_recovery = true;
 
/*
 * Track referenced BOs so that they can be unreferenced after the
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 6e6ca774f68d..59ad4ca38292 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -312,10 +312,6 @@ static void update_cdma_locked(struct host1x_cdma *cdma)
bool signal = false;
struct host1x_job *job, *n;
 
-   /* If CDMA is stopped, queue is cleared and we can return */
-   if (!cdma->running)
-   return;
-
/*
 * Walk the sync queue, reading the sync point registers as necessary,
 * to consume as many sync queue entries as possible without blocking
@@ -324,7 +320,8 @@ static void update_cdma_locked(struct host1x_cdma *cdma)
struct host1x_syncpt *sp = job->syncpt;
 
/* Check whether this syncpt has completed, and bail if not */
-   if (!host1x_syncpt_is_expired(sp, job->syncpt_end)) {
+   if (!host1x_syncpt_is_expired(sp, job->syncpt_end) &&
+   !job->cancelled) {
/* Start timer on next pending syncpt */
if (job->timeout)
cdma_start_timer_locked(cdma, job);
@@ -413,8 +410,11 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma 
*cdma,
else
restart_addr = cdma->last_pos;
 
+   if (!job)
+   goto resume;
+
/* do CPU increments for the remaining syncpts */
-   if (job) {
+   if (job->syncpt_recovery) {
dev_dbg(dev, "%s: perform CPU incr on pending buffers\n",
__func__);
 
@@ -433,8 +433,38 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma 
*cdma,
 
dev_dbg(dev, "%s: finished sync_queue modification\n",
__func__);
+   } else {
+   struct host1x_job *failed_job = job;
+
+   host1x_job_dump(dev, job);
+
+   host1x_syncpt_set_locked(job->syncpt);
+   failed_job->cancelled = true;
+
+   list_for_each_entry_continue(job, &cdma->sync_queue, list) {
+   unsigned int i;
+
+   if (job->syncpt != failed_job->syncpt)
+   continue;
+
+   for (i = 0; i < job->num_slots; i++) {
+   unsigned int slot = (job->first_get/8 + i) %
+   HOST1X_PUSHBUFFER_SLOTS;
+   u32 *mapped = cdma->push_buffer.mapped;
+
+   mapped[2*slot+0] = 0x1bad;
+   mapped[2*slot+1] = 0x1bad;
+   }
+
+   job->cancelled = true;
+   }
+
+   wmb();
+
+   update_cdma_locked(cdma);
}
 
+resume:
/* roll back DMAGET and start up channel again */
host1x_hw_cdma_resume(host1x, cdma, restart_addr);
 }
diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
b/drivers/gpu

[RFC PATCH v2 17/17] WIP: drm/tegra: Implement new UAPI

2020-09-05 Thread Mikko Perttunen
Implement the new UAPI, and bump the TegraDRM major version.

WIP:
- Wait DMA reservations
- Implement firewall on TegraDRM side

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/Makefile  |   2 +
 drivers/gpu/drm/tegra/drm.c |  46 +-
 drivers/gpu/drm/tegra/drm.h |   5 +
 drivers/gpu/drm/tegra/uapi.h|  59 +++
 drivers/gpu/drm/tegra/uapi/submit.c | 687 
 drivers/gpu/drm/tegra/uapi/uapi.c   | 328 +
 6 files changed, 1109 insertions(+), 18 deletions(-)
 create mode 100644 drivers/gpu/drm/tegra/uapi.h
 create mode 100644 drivers/gpu/drm/tegra/uapi/submit.c
 create mode 100644 drivers/gpu/drm/tegra/uapi/uapi.c

diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile
index d6cf202414f0..d480491564b7 100644
--- a/drivers/gpu/drm/tegra/Makefile
+++ b/drivers/gpu/drm/tegra/Makefile
@@ -3,6 +3,8 @@ ccflags-$(CONFIG_DRM_TEGRA_DEBUG) += -DDEBUG
 
 tegra-drm-y := \
drm.o \
+   uapi/uapi.o \
+   uapi/submit.o \
gem.o \
fb.o \
dp.o \
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 7124b0b0154b..acd734104c9a 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -20,24 +20,20 @@
 #include 
 #include 
 
+#include "uapi.h"
 #include "drm.h"
 #include "gem.h"
 
 #define DRIVER_NAME "tegra"
 #define DRIVER_DESC "NVIDIA Tegra graphics"
 #define DRIVER_DATE "20120330"
-#define DRIVER_MAJOR 0
+#define DRIVER_MAJOR 1
 #define DRIVER_MINOR 0
 #define DRIVER_PATCHLEVEL 0
 
 #define CARVEOUT_SZ SZ_64M
 #define CDMA_GATHER_FETCHES_MAX_NB 16383
 
-struct tegra_drm_file {
-   struct idr contexts;
-   struct mutex lock;
-};
-
 static int tegra_atomic_check(struct drm_device *drm,
  struct drm_atomic_state *state)
 {
@@ -90,7 +86,8 @@ static int tegra_drm_open(struct drm_device *drm, struct 
drm_file *filp)
if (!fpriv)
return -ENOMEM;
 
-   idr_init(&fpriv->contexts);
+   idr_init(&fpriv->legacy_contexts);
+   xa_init_flags(&fpriv->contexts, XA_FLAGS_ALLOC);
mutex_init(&fpriv->lock);
filp->driver_priv = fpriv;
 
@@ -432,7 +429,7 @@ static int tegra_client_open(struct tegra_drm_file *fpriv,
if (err < 0)
return err;
 
-   err = idr_alloc(&fpriv->contexts, context, 1, 0, GFP_KERNEL);
+   err = idr_alloc(&fpriv->legacy_contexts, context, 1, 0, GFP_KERNEL);
if (err < 0) {
client->ops->close_channel(context);
return err;
@@ -487,13 +484,13 @@ static int tegra_close_channel(struct drm_device *drm, 
void *data,
 
mutex_lock(&fpriv->lock);
 
-   context = idr_find(&fpriv->contexts, args->context);
+   context = idr_find(&fpriv->legacy_contexts, args->context);
if (!context) {
err = -EINVAL;
goto unlock;
}
 
-   idr_remove(&fpriv->contexts, context->id);
+   idr_remove(&fpriv->legacy_contexts, context->id);
tegra_drm_context_free(context);
 
 unlock:
@@ -512,7 +509,7 @@ static int tegra_get_syncpt(struct drm_device *drm, void 
*data,
 
mutex_lock(&fpriv->lock);
 
-   context = idr_find(&fpriv->contexts, args->context);
+   context = idr_find(&fpriv->legacy_contexts, args->context);
if (!context) {
err = -ENODEV;
goto unlock;
@@ -541,7 +538,7 @@ static int tegra_submit(struct drm_device *drm, void *data,
 
mutex_lock(&fpriv->lock);
 
-   context = idr_find(&fpriv->contexts, args->context);
+   context = idr_find(&fpriv->legacy_contexts, args->context);
if (!context) {
err = -ENODEV;
goto unlock;
@@ -566,7 +563,7 @@ static int tegra_get_syncpt_base(struct drm_device *drm, 
void *data,
 
mutex_lock(&fpriv->lock);
 
-   context = idr_find(&fpriv->contexts, args->context);
+   context = idr_find(&fpriv->legacy_contexts, args->context);
if (!context) {
err = -ENODEV;
goto unlock;
@@ -734,11 +731,23 @@ static int tegra_gem_get_flags(struct drm_device *drm, 
void *data,
 #endif
 
 static const struct drm_ioctl_desc tegra_drm_ioctls[] = {
-#ifdef CONFIG_DRM_TEGRA_STAGING
-   DRM_IOCTL_DEF_DRV(TEGRA_GEM_CREATE, tegra_gem_create,
+   DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_OPEN, tegra_drm_ioctl_channel_open,
+ DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_CLOSE, tegra_drm_ioctl_channel_close,
+ DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_MAP, tegra_drm_ioctl_channel_map,
  DRM_RENDER_ALLOW),
-   DRM_IOCTL_DEF_DRV(TEGRA_GEM_MMAP, tegra_gem_mmap,
+   DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_UNMAP, tegra_drm_ioctl_channel_unmap,
  DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_SUBMIT, tegra_drm_ioctl_channel_submit,
+

[RFC PATCH v2 12/17] gpu: host1x: Add support for syncpoint waits in CDMA pushbuffer

2020-09-05 Thread Mikko Perttunen
Add support for inserting syncpoint waits in the CDMA pushbuffer.
These waits need to be done in HOST1X class, while gather submitted
by the application execute in engine class.

Support is added by converting the gather list of job into a command
list that can include both gathers and waits. When the job is
submitted, these commands are pushed as the appropriate opcodes
on the CDMA pushbuffer.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/hw/channel_hw.c | 51 +++
 drivers/gpu/host1x/hw/debug_hw.c   |  9 +++-
 drivers/gpu/host1x/job.c   | 67 +-
 drivers/gpu/host1x/job.h   | 14 +++
 include/linux/host1x.h |  5 ++-
 5 files changed, 105 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
b/drivers/gpu/host1x/hw/channel_hw.c
index 145746c6f6fb..57e99de528de 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -55,31 +55,46 @@ static void submit_gathers(struct host1x_job *job)
 #endif
unsigned int i;
 
-   for (i = 0; i < job->num_gathers; i++) {
-   struct host1x_job_gather *g = &job->gathers[i];
-   dma_addr_t addr = g->base + g->offset;
-   u32 op2, op3;
+   for (i = 0; i < job->num_cmds; i++) {
+   struct host1x_job_cmd *cmd = &job->cmds[i];
 
-   op2 = lower_32_bits(addr);
-   op3 = upper_32_bits(addr);
+   if (cmd->is_wait) {
+   /* TODO use modern wait */
+   host1x_cdma_push(cdma,
+host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
+   host1x_uclass_wait_syncpt_r(), 1),
+host1x_class_host_wait_syncpt(cmd->wait.id,
+   cmd->wait.threshold));
+   host1x_cdma_push(
+   cdma, host1x_opcode_setclass(job->class, 0, 0),
+   HOST1X_OPCODE_NOP);
+   } else {
+   struct host1x_job_gather *g = &cmd->gather;
 
-   trace_write_gather(cdma, g->bo, g->offset, g->words);
+   dma_addr_t addr = g->base + g->offset;
+   u32 op2, op3;
 
-   if (op3 != 0) {
+   op2 = lower_32_bits(addr);
+   op3 = upper_32_bits(addr);
+
+   trace_write_gather(cdma, g->bo, g->offset, g->words);
+
+   if (op3 != 0) {
 #if HOST1X_HW >= 6
-   u32 op1 = host1x_opcode_gather_wide(g->words);
-   u32 op4 = HOST1X_OPCODE_NOP;
+   u32 op1 = host1x_opcode_gather_wide(g->words);
+   u32 op4 = HOST1X_OPCODE_NOP;
 
-   host1x_cdma_push_wide(cdma, op1, op2, op3, op4);
+   host1x_cdma_push_wide(cdma, op1, op2, op3, op4);
 #else
-   dev_err(dev, "invalid gather for push buffer %pad\n",
-   &addr);
-   continue;
+   dev_err(dev, "invalid gather for push buffer 
%pad\n",
+   &addr);
+   continue;
 #endif
-   } else {
-   u32 op1 = host1x_opcode_gather(g->words);
+   } else {
+   u32 op1 = host1x_opcode_gather(g->words);
 
-   host1x_cdma_push(cdma, op1, op2);
+   host1x_cdma_push(cdma, op1, op2);
+   }
}
}
 }
@@ -126,7 +141,7 @@ static int channel_submit(struct host1x_job *job)
struct host1x *host = dev_get_drvdata(ch->dev->parent);
 
trace_host1x_channel_submit(dev_name(ch->dev),
-   job->num_gathers, job->num_relocs,
+   job->num_cmds, job->num_relocs,
job->syncpt->id, job->syncpt_incrs);
 
/* TODO this is racy */
diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
index ceb48229d14b..35952fd5597e 100644
--- a/drivers/gpu/host1x/hw/debug_hw.c
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -208,10 +208,15 @@ static void show_channel_gathers(struct output *o, struct 
host1x_cdma *cdma)
job->first_get, job->timeout,
job->num_slots, job->num_unpins);
 
-   for (i = 0; i < job->num_gathers; i++) {
-   struct host1x_job_gather *g = &job->gathers[i];
+   for (i = 0; i < job->num_cmds; i++) {
+   struct host1x_job_gather *g;
u32 *mapped;
 
+   if (job->cmds[i].is_wait)
+   continue

[RFC PATCH v2 00/17] Host1x/TegraDRM UAPI

2020-09-05 Thread Mikko Perttunen
Hi all,

here's a second revision of the Host1x/TegraDRM UAPI proposal,
hopefully with most issues from v1 resolved, and also with
an implementation. There are still open issues with the
implementation:

* Relocs are now handled on TegraDRM side instead of Host1x,
  so the firewall is not aware of them, causing submission
  failure where the firewall is enabled. Proposed solution
  is to move the firewall to TegraDRM side, but this hasn't
  been done yet.
* For the new UAPI, syncpoint recovery on job timeout is
  disabled. What this means is that upon job timeout,
  all further jobs using that syncpoint are cancelled,
  and the syncpoint is marked unusable until it is freed.
  However, there is currently a race between the timeout
  handler and job submission, where submission can observe
  the syncpoint in non-locked state and yet the job
  cancellations won't cancel the new job.
* Waiting for DMA reservation fences is not implemented yet.
* I have only tested on Tegra186.

The series consists of three parts:

* The first part contains some fixes and improvements to
  the Host1x driver of more general nature,
* The second part adds the Host1x side UAPI, as well as
  Host1x-side changes needed for the new TegraDRM UAPI,
* The third part adds the new TegraDRM UAPI.

I have written some tests to test the new interface,
see https://github.com/cyndis/uapi-test. Porting of proper
userspace (e.g. opentegra, vdpau-tegra) will come once
there is some degree of conclusion on the UAPI definition.

The series can be also found in
https://github.com/cyndis/linux/commits/work/host1x-uapi.

Older versions:
v1: https://www.spinics.net/lists/linux-tegra/msg51000.html

Thank you,
Mikko

Mikko Perttunen (17):
  gpu: host1x: Use different lock classes for each client
  gpu: host1x: Allow syncpoints without associated client
  gpu: host1x: Show number of pending waiters in debugfs
  gpu: host1x: Remove cancelled waiters immediately
  gpu: host1x: Use HW-equivalent syncpoint expiration check
  gpu: host1x: Cleanup and refcounting for syncpoints
  gpu: host1x: Introduce UAPI header
  gpu: host1x: Implement /dev/host1x device node
  gpu: host1x: DMA fences and userspace fence creation
  WIP: gpu: host1x: Add no-recovery mode
  gpu: host1x: Add job release callback
  gpu: host1x: Add support for syncpoint waits in CDMA pushbuffer
  gpu: host1x: Reset max value when freeing a syncpoint
  drm/tegra: Add new UAPI to header
  drm/tegra: Add power_on/power_off engine callbacks
  drm/tegra: Allocate per-engine channel in core code
  WIP: drm/tegra: Implement new UAPI

 drivers/gpu/drm/tegra/Makefile  |   2 +
 drivers/gpu/drm/tegra/dc.c  |   4 +-
 drivers/gpu/drm/tegra/drm.c |  75 ++-
 drivers/gpu/drm/tegra/drm.h |  20 +-
 drivers/gpu/drm/tegra/gr2d.c|   4 +-
 drivers/gpu/drm/tegra/gr3d.c|   4 +-
 drivers/gpu/drm/tegra/uapi.h|  59 +++
 drivers/gpu/drm/tegra/uapi/submit.c | 687 
 drivers/gpu/drm/tegra/uapi/uapi.c   | 328 +
 drivers/gpu/drm/tegra/vic.c | 131 +++---
 drivers/gpu/host1x/Makefile |   2 +
 drivers/gpu/host1x/bus.c|   7 +-
 drivers/gpu/host1x/cdma.c   |  53 ++-
 drivers/gpu/host1x/debug.c  |  14 +-
 drivers/gpu/host1x/dev.c|   9 +
 drivers/gpu/host1x/dev.h|  10 +-
 drivers/gpu/host1x/fence.c  | 207 +
 drivers/gpu/host1x/fence.h  |  15 +
 drivers/gpu/host1x/hw/cdma_hw.c |   2 +-
 drivers/gpu/host1x/hw/channel_hw.c  |  67 ++-
 drivers/gpu/host1x/hw/debug_hw.c|  11 +-
 drivers/gpu/host1x/intr.c   |  23 +-
 drivers/gpu/host1x/intr.h   |   2 +
 drivers/gpu/host1x/job.c|  79 +++-
 drivers/gpu/host1x/job.h|  14 +
 drivers/gpu/host1x/syncpt.c | 137 +++---
 drivers/gpu/host1x/syncpt.h |  21 +-
 drivers/gpu/host1x/uapi.c   | 381 +++
 drivers/gpu/host1x/uapi.h   |  22 +
 include/linux/host1x.h  |  40 +-
 include/uapi/drm/tegra_drm.h| 431 +++--
 include/uapi/linux/host1x.h | 134 ++
 32 files changed, 2718 insertions(+), 277 deletions(-)
 create mode 100644 drivers/gpu/drm/tegra/uapi.h
 create mode 100644 drivers/gpu/drm/tegra/uapi/submit.c
 create mode 100644 drivers/gpu/drm/tegra/uapi/uapi.c
 create mode 100644 drivers/gpu/host1x/fence.c
 create mode 100644 drivers/gpu/host1x/fence.h
 create mode 100644 drivers/gpu/host1x/uapi.c
 create mode 100644 drivers/gpu/host1x/uapi.h
 create mode 100644 include/uapi/linux/host1x.h

-- 
2.28.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[RFC PATCH v2 06/17] gpu: host1x: Cleanup and refcounting for syncpoints

2020-09-05 Thread Mikko Perttunen
Add reference counting for allocated syncpoints to allow keeping
them allocated while jobs are referencing them. Additionally,
clean up various places using syncpoint IDs to use host1x_syncpt
pointers instead.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/dc.c |  4 +-
 drivers/gpu/drm/tegra/drm.c| 17 ---
 drivers/gpu/drm/tegra/gr2d.c   |  4 +-
 drivers/gpu/drm/tegra/gr3d.c   |  4 +-
 drivers/gpu/drm/tegra/vic.c|  4 +-
 drivers/gpu/host1x/cdma.c  | 11 ++---
 drivers/gpu/host1x/dev.h   |  7 ++-
 drivers/gpu/host1x/hw/cdma_hw.c|  2 +-
 drivers/gpu/host1x/hw/channel_hw.c | 10 ++--
 drivers/gpu/host1x/hw/debug_hw.c   |  2 +-
 drivers/gpu/host1x/job.c   |  5 +-
 drivers/gpu/host1x/syncpt.c| 75 +++---
 drivers/gpu/host1x/syncpt.h|  3 ++
 include/linux/host1x.h |  8 ++--
 14 files changed, 99 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 9a0b3240bc58..efb41c10dad4 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -2127,7 +2127,7 @@ static int tegra_dc_init(struct host1x_client *client)
drm_plane_cleanup(primary);
 
host1x_client_iommu_detach(client);
-   host1x_syncpt_free(dc->syncpt);
+   host1x_syncpt_put(dc->syncpt);
 
return err;
 }
@@ -2152,7 +2152,7 @@ static int tegra_dc_exit(struct host1x_client *client)
}
 
host1x_client_iommu_detach(client);
-   host1x_syncpt_free(dc->syncpt);
+   host1x_syncpt_put(dc->syncpt);
 
return 0;
 }
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index ba9d1c3e7cac..ceea9db341f0 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -171,7 +171,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
struct drm_tegra_syncpt syncpt;
struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
struct drm_gem_object **refs;
-   struct host1x_syncpt *sp;
+   struct host1x_syncpt *sp = NULL;
struct host1x_job *job;
unsigned int num_refs;
int err;
@@ -298,8 +298,8 @@ int tegra_drm_submit(struct tegra_drm_context *context,
goto fail;
}
 
-   /* check whether syncpoint ID is valid */
-   sp = host1x_syncpt_get(host1x, syncpt.id);
+   /* Syncpoint ref will be dropped on job release. */
+   sp = host1x_syncpt_get_by_id(host1x, syncpt.id);
if (!sp) {
err = -ENOENT;
goto fail;
@@ -308,7 +308,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
job->is_addr_reg = context->client->ops->is_addr_reg;
job->is_valid_class = context->client->ops->is_valid_class;
job->syncpt_incrs = syncpt.incrs;
-   job->syncpt_id = syncpt.id;
+   job->syncpt = sp;
job->timeout = 1;
 
if (args->timeout && args->timeout < 1)
@@ -327,6 +327,9 @@ int tegra_drm_submit(struct tegra_drm_context *context,
args->fence = job->syncpt_end;
 
 fail:
+   if (sp)
+   host1x_syncpt_put(sp);
+
while (num_refs--)
drm_gem_object_put(refs[num_refs]);
 
@@ -380,7 +383,7 @@ static int tegra_syncpt_read(struct drm_device *drm, void 
*data,
struct drm_tegra_syncpt_read *args = data;
struct host1x_syncpt *sp;
 
-   sp = host1x_syncpt_get(host, args->id);
+   sp = host1x_syncpt_get_by_id_noref(host, args->id);
if (!sp)
return -EINVAL;
 
@@ -395,7 +398,7 @@ static int tegra_syncpt_incr(struct drm_device *drm, void 
*data,
struct drm_tegra_syncpt_incr *args = data;
struct host1x_syncpt *sp;
 
-   sp = host1x_syncpt_get(host1x, args->id);
+   sp = host1x_syncpt_get_by_id_noref(host1x, args->id);
if (!sp)
return -EINVAL;
 
@@ -409,7 +412,7 @@ static int tegra_syncpt_wait(struct drm_device *drm, void 
*data,
struct drm_tegra_syncpt_wait *args = data;
struct host1x_syncpt *sp;
 
-   sp = host1x_syncpt_get(host1x, args->id);
+   sp = host1x_syncpt_get_by_id_noref(host1x, args->id);
if (!sp)
return -EINVAL;
 
diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
index 1a0d3ba6e525..d857a99b21a7 100644
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -67,7 +67,7 @@ static int gr2d_init(struct host1x_client *client)
 detach:
host1x_client_iommu_detach(client);
 free:
-   host1x_syncpt_free(client->syncpts[0]);
+   host1x_syncpt_put(client->syncpts[0]);
 put:
host1x_channel_put(gr2d->channel);
return err;
@@ -86,7 +86,7 @@ static int gr2d_exit(struct host1x_client *client)
return err;
 
host1x_client_iommu_detach(client);
-   host1x_syncpt_free(client->syncpts[0]);
+   host1x_syncpt_put(client->syncpts[0]);
host1x_channe

Re: [PATCH v9 0/5] Add support for iMX8MQ Display Controller Subsystem

2020-09-05 Thread Guido Günther
Hi Laurentiu,
On Fri, Jul 31, 2020 at 10:54:29AM +0200, Guido Günther wrote:
> Hi,
> On Fri, Jul 31, 2020 at 11:18:28AM +0300, Laurentiu Palcu wrote:
> > From: Laurentiu Palcu 
> > 
> > Hi,
> > 
> > This patchset adds initial DCSS support for iMX8MQ chip. Initial support
> > includes only graphics plane support (no video planes), no HDR10 
> > capabilities,
> > no graphics decompression (only linear, tiled and super-tiled buffers 
> > allowed).
> > 
> > Support for the rest of the features will be added incrementally, in 
> > subsequent
> > patches.
> > 
> > The patchset was tested with both HDP driver (in the downstream tree) and 
> > the upstream
> > MIPI-DSI driver (with a couple of patches on top, to make it work correctly 
> > with DCSS).
> > 
> > Thanks,
> > Laurentiu
> > 
> > Changes in v9:
> >  * Fixed a compilation issue found by Guido in his setup: 'select
> >VIDEOMODE_HELPERS' was missing from Kconfig;
> >  * Use imx8mq-clock.h in the bindings file so one can understand what
> >those clock values mean;
> >  * no other changes done. Couldn't address the hang Guido reported as
> >it's not happening in my setup. However, in my tree, there are some
> >extra NWL and ADV patches applied on top of upstream ones... Also,
> >removing them and testing only with upstream, even if there's no
> >image out, does not produce a hang... :/
> 
> I don't think this should hold up merging.

And i retested your v9 series on next-20200903 on a librem5 devkit and
it works. Looking back I spotted an error in my clock configuration, so

Tested-by: Guido Günther 

Cheers,
 -- Guido

> Cheers,
>  -- Guido
> 
> > 
> > Changes in v8:
> >  * Removed 'select RESET_CONTROLLER" from Kconfig as Philipp pointed
> >out. SRC is not used in DCSS driver;
> >  * Nothing else changed;
> > 
> > Changes in v7:
> >  * Added a patch to initialize the connector using the drm_bridge_connector
> >API as Sam suggested. Tested it using NWL_DSI and ADV7535 with
> >Guido's patch [1] applied and one fix for ADV [2]. Also, some extra
> >patches for ADV and NWL were needed, from our downstream tree, which
> >will be upstreamed soon by their author;
> >  * Rest of the patches are untouched;
> > 
> > [1] https://lists.freedesktop.org/archives/dri-devel/2020-July/273025.html
> > [2] https://lists.freedesktop.org/archives/dri-devel/2020-July/273132.html
> > 
> > Changes in v6:
> >  * Addressed Rob's comment and added "additionalProperties: false" at
> >the end of the bindings' properties. However, this change surfaced
> >an issue with the assigned-clock* properties not being documented in
> >the properties section. Added the descriptions and the bindings patch
> >will need another review;
> >  * Added an entry for DCSS driver in the MAINTAINERS file;
> >  * Removed the component framework patch altogether;
> > 
> > Changes in v5:
> >  * Rebased to latest;
> >  * Took out component framework support and made it a separate patch so
> >that people can still test with HDP driver, which makes use of it.
> >But the idea is to get rid of it once HDP driver's next versions
> >will remove component framework as well;
> >  * Slight improvement to modesetting: avoid cutting off the pixel clock
> >if the new mode and the old one are equal. Also, in this case, is
> >not necessary to wait for DTG to shut off. This would allow to switch
> >from 8b RGB to 12b YUV422, for example, with no interruptions (at least
> >from DCSS point of view);
> >  * Do not fire off CTXLD when going to suspend, unless it still has
> >entries that need to be committed to DCSS;
> >  * Addressed Rob's comments on bindings;
> > 
> > Changes in v4:
> >  * Addressed Lucas and Philipp's comments:
> >* Added DRM_KMS_CMA_HELPER dependency in Kconfig;
> >* Removed usage of devm_ functions since I'm already doing all the
> >  clean-up in the submodules_deinit();
> >* Moved the drm_crtc_arm_vblank_event() in dcss_crtc_atomic_flush();
> >* Removed en_completion variable from dcss_crtc since this was
> >  introduced mainly to avoid vblank timeout warnings which were fixed
> >  by arming the vblank event in flush() instead of begin();
> >* Removed clks_on and irq_enabled flags since all the calls to
> >  enabling/disabling clocks and interrupts were balanced;
> >* Removed the custom atomic_commit callback and used the DRM core
> >  helper and, in the process, got rid of a workqueue that wasn't
> >  necessary anymore;
> >* Fixed some minor DT binding issues flagged by Philipp;
> >* Some other minor changes suggested by Lucas;
> >  * Removed YUV formats from the supported formats as these cannot work
> >without the HDR10 module CSCs and LUTs. Will add them back when I
> >will add support for video planes;
> > 
> > Changes in v3:
> >  * rebased to latest linux-next and made it compile as drmP.h was
> >removed;
> >  * removed the patch adding the VIDE

[Bug 209163] New: amdgpu: The CS has been cancelled because the context is lost

2020-09-05 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=209163

Bug ID: 209163
   Summary: amdgpu: The CS has been cancelled because the context
is lost
   Product: Drivers
   Version: 2.5
Kernel Version: 4.9.118
  Hardware: x86-64
OS: Linux
  Tree: Mainline
Status: NEW
  Severity: high
  Priority: P1
 Component: Video(DRI - non Intel)
  Assignee: drivers_video-...@kernel-bugs.osdl.org
  Reporter: satish...@outlook.in
Regression: No

Created attachment 292355
  --> https://bugzilla.kernel.org/attachment.cgi?id=292355&action=edit
dmesg log

I am getting error after playing application continuously .

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[Bug 209163] amdgpu: The CS has been cancelled because the context is lost

2020-09-05 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=209163

--- Comment #1 from Satish patel (satish...@outlook.in) ---
Created attachment 292357
  --> https://bugzilla.kernel.org/attachment.cgi?id=292357&action=edit
AMDGPU version information

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[Bug 209163] amdgpu: The CS has been cancelled because the context is lost

2020-09-05 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=209163

--- Comment #2 from Satish patel (satish...@outlook.in) ---
Created attachment 292359
  --> https://bugzilla.kernel.org/attachment.cgi?id=292359&action=edit
Mesa_opencl version information

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[Bug 209163] amdgpu: The CS has been cancelled because the context is lost

2020-09-05 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=209163

--- Comment #3 from Satish patel (satish...@outlook.in) ---
Created attachment 292361
  --> https://bugzilla.kernel.org/attachment.cgi?id=292361&action=edit
lspci information

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [RFC PATCH v2 06/17] gpu: host1x: Cleanup and refcounting for syncpoints

2020-09-05 Thread Mikko Perttunen

On 9/5/20 5:30 PM, Dmitry Osipenko wrote:

05.09.2020 13:34, Mikko Perttunen пишет:
...

+
+/**
+ * host1x_syncpt_put() - free a requested syncpoint
+ * @sp: host1x syncpoint
+ *
+ * Release a syncpoint previously allocated using host1x_syncpt_request(). A
+ * host1x client driver should call this when the syncpoint is no longer in
+ * use.
+ */
+void host1x_syncpt_put(struct host1x_syncpt *sp)
+{
+   if (!sp)
+   return;
+
+   kref_put(&sp->ref, syncpt_release);
+}
+EXPORT_SYMBOL(host1x_syncpt_put);
  
  void host1x_syncpt_deinit(struct host1x *host)

  {
@@ -471,16 +478,48 @@ unsigned int host1x_syncpt_nb_mlocks(struct host1x *host)
  }
  
  /**

- * host1x_syncpt_get() - obtain a syncpoint by ID
+ * host1x_syncpt_get_by_id() - obtain a syncpoint by ID
+ * @host: host1x controller
+ * @id: syncpoint ID
+ */
+struct host1x_syncpt *host1x_syncpt_get_by_id(struct host1x *host,
+ unsigned int id)
+{
+   if (id >= host->info->nb_pts)
+   return NULL;
+
+   if (kref_get_unless_zero(&host->syncpt[id].ref))
+   return &host->syncpt[id];
+   else
+   return NULL;
+}
+EXPORT_SYMBOL(host1x_syncpt_get_by_id);
+
+/**
+ * host1x_syncpt_get_by_id_noref() - obtain a syncpoint by ID but don't
+ * increase the refcount.
   * @host: host1x controller
   * @id: syncpoint ID
   */
-struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, unsigned int id)
+struct host1x_syncpt *host1x_syncpt_get_by_id_noref(struct host1x *host,
+   unsigned int id)
  {
if (id >= host->info->nb_pts)
return NULL;
  
-	return host->syncpt + id;

+   return &host->syncpt[id];
+}
+EXPORT_SYMBOL(host1x_syncpt_get_by_id_noref);
+
+/**
+ * host1x_syncpt_get() - increment syncpoint refcount
+ * @sp: syncpoint
+ */
+struct host1x_syncpt *host1x_syncpt_get(struct host1x_syncpt *sp)
+{
+   kref_get(&sp->ref);
+
+   return sp;
  }
  EXPORT_SYMBOL(host1x_syncpt_get);


Hello, Mikko!

What do you think about to open-code all the host1x structs by moving
them all out into the public linux/host1x.h? Then we could inline all
these trivial single-line functions by having them defined in the public
header. This will avoid all the unnecessary overhead by allowing
compiler to optimize the code nicely.

Of course this could be a separate change and it could be done sometime
later, I just wanted to share this quick thought for the start of the
review.



Hi :)

I think for such micro-optimizations we should have a benchmark to 
evaluate against. I'm not sure we have all that many function calls into 
here overall that it would make a noticeable difference. In any case, as 
you said, I'd prefer to keep further refactoring to a separate series to 
avoid growing this series too much.


Mikko
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[Bug 209159] AMD Vega 20 framebuffer switch fails on 5.9rc2+

2020-09-05 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=209159

Alex Deucher (alexdeuc...@gmail.com) changed:

   What|Removed |Added

 CC||alexdeuc...@gmail.com

--- Comment #1 from Alex Deucher (alexdeuc...@gmail.com) ---
Please attach your full dmesg output.  Can you bisect?

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 1/1] drm/amdgpu: Convert to using devm_drm_dev_alloc()

2020-09-05 Thread Alex Deucher
On Thu, Sep 3, 2020 at 9:22 PM Luben Tuikov  wrote:
>
> Convert to using devm_drm_dev_alloc(),
> as drm_dev_init() is going away.
>
> Signed-off-by: Luben Tuikov 

I think we can drop the final drm_put in the error case?  I think the
unwinding in current devm code should take care of it.

Alex

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 11 +++
>  1 file changed, 3 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 146a85c8df1c..06d994187c24 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -1142,18 +1142,13 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
> if (ret)
> return ret;
>
> -   adev = kzalloc(sizeof(*adev), GFP_KERNEL);
> -   if (!adev)
> -   return -ENOMEM;
> +   adev = devm_drm_dev_alloc(&pdev->dev, &kms_driver, typeof(*adev), 
> ddev);
> +   if (IS_ERR(adev))
> +   return PTR_ERR(adev);
>
> adev->dev  = &pdev->dev;
> adev->pdev = pdev;
> ddev = adev_to_drm(adev);
> -   ret = drm_dev_init(ddev, &kms_driver, &pdev->dev);
> -   if (ret)
> -   goto err_free;
> -
> -   drmm_add_final_kfree(ddev, adev);
>
> if (!supports_atomic)
> ddev->driver_features &= ~DRIVER_ATOMIC;
> --
> 2.28.0.394.ge197136389
>
> ___
> amd-gfx mailing list
> amd-...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 4/5] dt-bindings: display/bridge: nwl-dsi: Document fsl,clock-drop-level property

2020-09-05 Thread Laurent Pinchart
Hi Robert,

Thank you for the patch.

On Fri, Aug 28, 2020 at 02:13:31PM +0300, Robert Chiras (OSS) wrote:
> From: Robert Chiras 
> 
> Add documentation for a new property: 'fsl,clock-drop-level'.
> 
> Signed-off-by: Robert Chiras 
> ---
>  Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml 
> b/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml
> index 8b5741b..b415f4e 100644
> --- a/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml
> +++ b/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml
> @@ -143,6 +143,10 @@ properties:
>  
>  additionalProperties: false
>  
> +  clock-drop-level:
> +description:
> +  Specifies the level at wich the crtc_clock should be dropped
> +

There's no "crtc_clock" defined in the bindings. As DT bindings
shouldn't be tied to a particular driver implementation, could you
document this property without referring to concepts specific to the
driver ? I think the documentation should also be extended, looking at
this patch I have no idea what this does and how to compute the value
that should be set.

>  patternProperties:
>"^panel@[0-9]+$":
>  type: object

-- 
Regards,

Laurent Pinchart
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[GIT PULL FOR v5.9] Fix Kconfig dependency issue with DMAENGINES selection

2020-09-05 Thread Laurent Pinchart
Hi Dave and Daniel,

This small pull request fixes a Kconfig dependency issue introduced in
v5.9-rc1. Among the three patches required to fix the issue, the ASoC
fix has been merged in Linus' tree already. I haven't been able to get
the RapidIO patch reviewed by the subsystem maintainers, so I've
included it here as it's a dependency for the DRM patch.

The following changes since commit f75aef392f869018f78cfedf3c320a6b3fcfda6b:

  Linux 5.9-rc3 (2020-08-30 16:01:54 -0700)

are available in the Git repository at:

  git://linuxtv.org/pinchartl/media.git tags/drm-xlnx-dpsub-fixes-20200905

for you to fetch changes up to 3e8b2403545efd46c6347002e27eae4708205fd4:

  drm: xlnx: dpsub: Fix DMADEVICES Kconfig dependency (2020-09-05 19:52:54 
+0300)


Kconfig fixes for DRM_ZYNQMP_DPSUB DMA engine dependency


Laurent Pinchart (2):
  rapidio: Replace 'select' DMAENGINES 'with depends on'
  drm: xlnx: dpsub: Fix DMADEVICES Kconfig dependency

 drivers/gpu/drm/xlnx/Kconfig | 1 +
 drivers/rapidio/Kconfig  | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

-- 
Regards,

Laurent Pinchart
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH v16 14/20] iommu/arm-smmu: Prepare for the adreno-smmu implementation

2020-09-05 Thread Rob Clark
On Fri, Sep 4, 2020 at 9:00 AM Bjorn Andersson
 wrote:
>
> On Tue 01 Sep 11:46 CDT 2020, Rob Clark wrote:
>
> > From: Jordan Crouse 
> >
> > Do a bit of prep work to add the upcoming adreno-smmu implementation.
> >
> > Add an hook to allow the implementation to choose which context banks
> > to allocate.
> >
> > Move some of the common structs to arm-smmu.h in anticipation of them
> > being used by the implementations and update some of the existing hooks
> > to pass more information that the implementation will need.
> >
> > These modifications will be used by the upcoming Adreno SMMU
> > implementation to identify the GPU device and properly configure it
> > for pagetable switching.
> >
> > Co-developed-by: Rob Clark 
> > Signed-off-by: Jordan Crouse 
> > Signed-off-by: Rob Clark 
>
> As I built the handoff support on top of this patch I ended up
> reworking the alloc_context_bank() prototype to something I found a
> little bit cleaner.
>
> So perhaps you would be interested in squashing
> https://lore.kernel.org/linux-arm-msm/20200904155513.282067-2-bjorn.anders...@linaro.org/
> into this patch?

Yeah, I think this looks nicer, thanks

BR,
-R

> Otherwise, feel free to add my:
>
> Reviewed-by: Bjorn Andersson 
>
> Regards,
> Bjorn
>
> > ---
> >  drivers/iommu/arm/arm-smmu/arm-smmu-impl.c |  2 +-
> >  drivers/iommu/arm/arm-smmu/arm-smmu.c  | 69 ++
> >  drivers/iommu/arm/arm-smmu/arm-smmu.h  | 51 +++-
> >  3 files changed, 68 insertions(+), 54 deletions(-)
> >
> > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c 
> > b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
> > index a9861dcd0884..88f17cc33023 100644
> > --- a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
> > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
> > @@ -69,7 +69,7 @@ static int cavium_cfg_probe(struct arm_smmu_device *smmu)
> >  }
> >
> >  static int cavium_init_context(struct arm_smmu_domain *smmu_domain,
> > - struct io_pgtable_cfg *pgtbl_cfg)
> > + struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
> >  {
> >   struct cavium_smmu *cs = container_of(smmu_domain->smmu,
> > struct cavium_smmu, smmu);
> > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
> > b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > index 8e884e58f208..68b7b9e6140e 100644
> > --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > @@ -65,41 +65,10 @@ module_param(disable_bypass, bool, S_IRUGO);
> >  MODULE_PARM_DESC(disable_bypass,
> >   "Disable bypass streams such that incoming transactions from devices 
> > that are not attached to an iommu domain will report an abort back to the 
> > device and will not be allowed to pass through the SMMU.");
> >
> > -struct arm_smmu_s2cr {
> > - struct iommu_group  *group;
> > - int count;
> > - enum arm_smmu_s2cr_type type;
> > - enum arm_smmu_s2cr_privcfg  privcfg;
> > - u8  cbndx;
> > -};
> > -
> >  #define s2cr_init_val (struct arm_smmu_s2cr){  
> >   \
> >   .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,\
> >  }
> >
> > -struct arm_smmu_smr {
> > - u16 mask;
> > - u16 id;
> > - boolvalid;
> > -};
> > -
> > -struct arm_smmu_cb {
> > - u64 ttbr[2];
> > - u32 tcr[2];
> > - u32 mair[2];
> > - struct arm_smmu_cfg *cfg;
> > -};
> > -
> > -struct arm_smmu_master_cfg {
> > - struct arm_smmu_device  *smmu;
> > - s16 smendx[];
> > -};
> > -#define INVALID_SMENDX   -1
> > -#define cfg_smendx(cfg, fw, i) \
> > - (i >= fw->num_ids ? INVALID_SMENDX : cfg->smendx[i])
> > -#define for_each_cfg_sme(cfg, fw, i, idx) \
> > - for (i = 0; idx = cfg_smendx(cfg, fw, i), i < fw->num_ids; ++i)
> > -
> >  static bool using_legacy_binding, using_generic_binding;
> >
> >  static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
> > @@ -234,19 +203,6 @@ static int arm_smmu_register_legacy_master(struct 
> > device *dev,
> >  }
> >  #endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */
> >
> > -static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
> > -{
> > - int idx;
> > -
> > - do {
> > - idx = find_next_zero_bit(map, end, start);
> > - if (idx == end)
> > - return -ENOSPC;
> > - } while (test_and_set_bit(idx, map));
> > -
> > - return idx;
> > -}
> > -
> >  static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
> >  {
> >   clear_bit(idx, map);
> > @@ -578,7 +534,7 @@ static void arm_smmu_init_context_bank(struct 
> > arm_smmu_domain *smmu_domain,
> >   }
> >  }
> >
> > -s

[PATCH v17 03/20] drm/msm/gpu: Add dev_to_gpu() helper

2020-09-05 Thread Rob Clark
From: Rob Clark 

In a later patch, the drvdata will not directly be 'struct msm_gpu *',
so add a helper to reduce the churn.

Signed-off-by: Rob Clark 
Reviewed-by: Jordan Crouse 
Reviewed-by: Bjorn Andersson 
---
 drivers/gpu/drm/msm/adreno/adreno_device.c | 10 --
 drivers/gpu/drm/msm/msm_gpu.c  |  6 +++---
 drivers/gpu/drm/msm/msm_gpu.h  |  5 +
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c 
b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 9eeb46bf2a5d..26664e1b30c0 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -282,7 +282,7 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev)
int ret;
 
if (pdev)
-   gpu = platform_get_drvdata(pdev);
+   gpu = dev_to_gpu(&pdev->dev);
 
if (!gpu) {
dev_err_once(dev->dev, "no GPU device was found\n");
@@ -425,7 +425,7 @@ static int adreno_bind(struct device *dev, struct device 
*master, void *data)
 static void adreno_unbind(struct device *dev, struct device *master,
void *data)
 {
-   struct msm_gpu *gpu = dev_get_drvdata(dev);
+   struct msm_gpu *gpu = dev_to_gpu(dev);
 
pm_runtime_force_suspend(dev);
gpu->funcs->destroy(gpu);
@@ -490,16 +490,14 @@ static const struct of_device_id dt_match[] = {
 #ifdef CONFIG_PM
 static int adreno_resume(struct device *dev)
 {
-   struct platform_device *pdev = to_platform_device(dev);
-   struct msm_gpu *gpu = platform_get_drvdata(pdev);
+   struct msm_gpu *gpu = dev_to_gpu(dev);
 
return gpu->funcs->pm_resume(gpu);
 }
 
 static int adreno_suspend(struct device *dev)
 {
-   struct platform_device *pdev = to_platform_device(dev);
-   struct msm_gpu *gpu = platform_get_drvdata(pdev);
+   struct msm_gpu *gpu = dev_to_gpu(dev);
 
return gpu->funcs->pm_suspend(gpu);
 }
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 57ddc9438351..4c67aedc5c33 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -24,7 +24,7 @@
 static int msm_devfreq_target(struct device *dev, unsigned long *freq,
u32 flags)
 {
-   struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));
+   struct msm_gpu *gpu = dev_to_gpu(dev);
struct dev_pm_opp *opp;
 
opp = devfreq_recommended_opp(dev, freq, flags);
@@ -45,7 +45,7 @@ static int msm_devfreq_target(struct device *dev, unsigned 
long *freq,
 static int msm_devfreq_get_dev_status(struct device *dev,
struct devfreq_dev_status *status)
 {
-   struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));
+   struct msm_gpu *gpu = dev_to_gpu(dev);
ktime_t time;
 
if (gpu->funcs->gpu_get_freq)
@@ -64,7 +64,7 @@ static int msm_devfreq_get_dev_status(struct device *dev,
 
 static int msm_devfreq_get_cur_freq(struct device *dev, unsigned long *freq)
 {
-   struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));
+   struct msm_gpu *gpu = dev_to_gpu(dev);
 
if (gpu->funcs->gpu_get_freq)
*freq = gpu->funcs->gpu_get_freq(gpu);
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 37cffac4cbe3..da1ae2263047 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -144,6 +144,11 @@ struct msm_gpu {
bool hw_apriv;
 };
 
+static inline struct msm_gpu *dev_to_gpu(struct device *dev)
+{
+   return dev_get_drvdata(dev);
+}
+
 /* It turns out that all targets use the same ringbuffer size */
 #define MSM_GPU_RINGBUFFER_SZ SZ_32K
 #define MSM_GPU_RINGBUFFER_BLKSIZE 32
-- 
2.26.2

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v17 00/20] iommu/arm-smmu + drm/msm: per-process GPU pgtables

2020-09-05 Thread Rob Clark
From: Rob Clark 

NOTE: I have re-ordered the series, and propose that we could merge this
  series in the following order:

   1) 01-11 - merge via drm / msm-next
   2) 12-15 - merge via iommu, no dependency on msm-next pull req
   3) 16-18 - patch 16 has a dependency on 02 and 04, so it would
  need to come post -rc1 or on following cycle, but I
  think it would be unlikely to conflict with other
  arm-smmu patches (other than Bjorn's smmu handover
  series?)
   4) 19-20 - dt bits should be safe to land in any order without
  breaking anything



This series adds an Adreno SMMU implementation to arm-smmu to allow GPU hardware
pagetable switching.

The Adreno GPU has built in capabilities to switch the TTBR0 pagetable during
runtime to allow each individual instance or application to have its own
pagetable.  In order to take advantage of the HW capabilities there are certain
requirements needed of the SMMU hardware.

This series adds support for an Adreno specific arm-smmu implementation. The new
implementation 1) ensures that the GPU domain is always assigned context bank 0,
2) enables split pagetable support (TTBR1) so that the instance specific
pagetable can be swapped while the global memory remains in place and 3) shares
the current pagetable configuration with the GPU driver to allow it to create
its own io-pgtable instances.

The series then adds the drm/msm code to enable these features. For targets that
support it allocate new pagetables using the io-pgtable configuration shared by
the arm-smmu driver and swap them in during runtime.

This version of the series merges the previous patchset(s) [1] and [2]
with the following improvements:

v17: (Respin by Rob)
  - Squash cleanup from Bjorn into 14/20
  - Small fix in 10/20 for issue found in testing
v16: (Respin by Rob)
  - Fix indentation
  - Re-order series to split drm and iommu parts
v15: (Respin by Rob)
  - Adjust dt bindings to keep SoC specific compatible (Doug)
  - Add dts workaround for cheza fw limitation
  - Add missing 'select IOMMU_IO_PGTABLE' (Guenter)
v14: (Respin by Rob)
  - Minor update to 16/20 (only force ASID to zero in one place)
  - Addition of sc7180 dtsi patch.
v13: (Respin by Rob)
  - Switch to a private interface between adreno-smmu and GPU driver,
dropping the custom domain attr (Will Deacon)
  - Rework the SCTLR.HUPCF patch to add new fields in smmu_domain->cfg
rather than adding new impl hook (Will Deacon)
  - Drop for_each_cfg_sme() in favor of plain for() loop (Will Deacon)
  - Fix context refcnt'ing issue which was causing problems with GPU
crash recover stress testing.
  - Spiff up $debugfs/gem to show process information associated with
VMAs
v12:
  - Nitpick cleanups in gpu/drm/msm/msm_iommu.c (Rob Clark)
  - Reorg in gpu/drm/msm/msm_gpu.c (Rob Clark)
  - Use the default asid for the context bank so that iommu_tlb_flush_all works
  - Flush the UCHE after a page switch
  - Add the SCTLR.HUPCF patch at the end of the series
v11:
  - Add implementation specific get_attr/set_attr functions (per Rob Clark)
  - Fix context bank allocation (per Bjorn Andersson)
v10:
  - arm-smmu: add implementation hook to allocate context banks
  - arm-smmu: Match the GPU domain by stream ID instead of compatible string
  - arm-smmu: Make DOMAIN_ATTR_PGTABLE_CFG bi-directional. The leaf driver
queries the configuration to create a pagetable and then sends the newly
created configuration back to the smmu-driver to enable TTBR0
  - drm/msm: Add context reference counting for submissions
  - drm/msm: Use dummy functions to skip TLB operations on per-instance
pagetables

[1] https://lists.linuxfoundation.org/pipermail/iommu/2020-June/045653.html
[2] https://lists.linuxfoundation.org/pipermail/iommu/2020-June/045659.html

Jordan Crouse (12):
  drm/msm: Add a context pointer to the submitqueue
  drm/msm: Drop context arg to gpu->submit()
  drm/msm: Set the global virtual address range from the IOMMU domain
  drm/msm: Add support to create a local pagetable
  drm/msm: Add support for private address space instances
  drm/msm/a6xx: Add support for per-instance pagetables
  iommu/arm-smmu: Pass io-pgtable config to implementation specific
function
  iommu/arm-smmu: Add support for split pagetables
  iommu/arm-smmu: Prepare for the adreno-smmu implementation
  iommu/arm-smmu-qcom: Add implementation for the adreno GPU SMMU
  dt-bindings: arm-smmu: Add compatible string for Adreno GPU SMMU
  arm: dts: qcom: sm845: Set the compatible string for the GPU SMMU

Rob Clark (8):
  drm/msm: Remove dangling submitqueue references
  drm/msm: Add private interface for adreno-smmu
  drm/msm/gpu: Add dev_to_gpu() helper
  drm/msm: Set adreno_smmu as gpu's drvdata
  drm/msm: Show process names in gem_describe
  iommu/arm-smmu: Constify some helpers
  iommu/arm-smmu: Add a way for implementations to influence SCTLR
  arm: dts: qc

[PATCH v17 02/20] drm/msm: Add private interface for adreno-smmu

2020-09-05 Thread Rob Clark
From: Rob Clark 

This interface will be used for drm/msm to coordinate with the
qcom_adreno_smmu_impl to enable/disable TTBR0 translation.

Once TTBR0 translation is enabled, the GPU's CP (Command Processor)
will directly switch TTBR0 pgtables (and do the necessary TLB inv)
synchronized to the GPU's operation.  But help from the SMMU driver
is needed to initially bootstrap TTBR0 translation, which cannot be
done from the GPU.

Since this is a very special case, a private interface is used to
avoid adding highly driver specific things to the public iommu
interface.

Signed-off-by: Rob Clark 
Reviewed-by: Jordan Crouse 
Reviewed-by: Bjorn Andersson 
---
 include/linux/adreno-smmu-priv.h | 36 
 1 file changed, 36 insertions(+)
 create mode 100644 include/linux/adreno-smmu-priv.h

diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
new file mode 100644
index ..a889f28afb42
--- /dev/null
+++ b/include/linux/adreno-smmu-priv.h
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Google, Inc
+ */
+
+#ifndef __ADRENO_SMMU_PRIV_H
+#define __ADRENO_SMMU_PRIV_H
+
+#include 
+
+/**
+ * struct adreno_smmu_priv - private interface between adreno-smmu and GPU
+ *
+ * @cookie:An opque token provided by adreno-smmu and passed
+ * back into the callbacks
+ * @get_ttbr1_cfg: Get the TTBR1 config for the GPUs context-bank
+ * @set_ttbr0_cfg: Set the TTBR0 config for the GPUs context bank.  A
+ * NULL config disables TTBR0 translation, otherwise
+ * TTBR0 translation is enabled with the specified cfg
+ *
+ * The GPU driver (drm/msm) and adreno-smmu work together for controlling
+ * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
+ * updating the SMMU for context switches, while on the other hand we do
+ * not want to duplicate all of the initial setup logic from arm-smmu.
+ *
+ * This private interface is used for the two drivers to coordinate.  The
+ * cookie and callback functions are populated when the GPU driver attaches
+ * it's domain.
+ */
+struct adreno_smmu_priv {
+const void *cookie;
+const struct io_pgtable_cfg *(*get_ttbr1_cfg)(const void *cookie);
+int (*set_ttbr0_cfg)(const void *cookie, const struct io_pgtable_cfg *cfg);
+};
+
+#endif /* __ADRENO_SMMU_PRIV_H */
\ No newline at end of file
-- 
2.26.2

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v17 05/20] drm/msm: Add a context pointer to the submitqueue

2020-09-05 Thread Rob Clark
From: Jordan Crouse 

Each submitqueue is attached to a context. Add a pointer to the
context to the submitqueue at create time and refcount it so
that it stays around through the life of the queue.

Co-developed-by: Rob Clark 
Signed-off-by: Jordan Crouse 
Signed-off-by: Rob Clark 
Reviewed-by: Bjorn Andersson 
---
 drivers/gpu/drm/msm/msm_drv.c |  3 ++-
 drivers/gpu/drm/msm/msm_drv.h | 20 
 drivers/gpu/drm/msm/msm_gem.h |  1 +
 drivers/gpu/drm/msm/msm_gem_submit.c  |  6 +++---
 drivers/gpu/drm/msm/msm_gpu.h |  1 +
 drivers/gpu/drm/msm/msm_submitqueue.c |  3 +++
 6 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 79333842f70a..75cd7639f560 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -594,6 +594,7 @@ static int context_init(struct drm_device *dev, struct 
drm_file *file)
if (!ctx)
return -ENOMEM;
 
+   kref_init(&ctx->ref);
msm_submitqueue_init(dev, ctx);
 
ctx->aspace = priv->gpu ? priv->gpu->aspace : NULL;
@@ -615,7 +616,7 @@ static int msm_open(struct drm_device *dev, struct drm_file 
*file)
 static void context_close(struct msm_file_private *ctx)
 {
msm_submitqueue_close(ctx);
-   kfree(ctx);
+   msm_file_private_put(ctx);
 }
 
 static void msm_postclose(struct drm_device *dev, struct drm_file *file)
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index af259b0573ea..4561bfb5e745 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -57,6 +57,7 @@ struct msm_file_private {
struct list_head submitqueues;
int queueid;
struct msm_gem_address_space *aspace;
+   struct kref ref;
 };
 
 enum msm_mdp_plane_property {
@@ -428,6 +429,25 @@ void msm_submitqueue_close(struct msm_file_private *ctx);
 
 void msm_submitqueue_destroy(struct kref *kref);
 
+static inline void __msm_file_private_destroy(struct kref *kref)
+{
+   struct msm_file_private *ctx = container_of(kref,
+   struct msm_file_private, ref);
+
+   kfree(ctx);
+}
+
+static inline void msm_file_private_put(struct msm_file_private *ctx)
+{
+   kref_put(&ctx->ref, __msm_file_private_destroy);
+}
+
+static inline struct msm_file_private *msm_file_private_get(
+   struct msm_file_private *ctx)
+{
+   kref_get(&ctx->ref);
+   return ctx;
+}
 
 #define DBG(fmt, ...) DRM_DEBUG_DRIVER(fmt"\n", ##__VA_ARGS__)
 #define VERB(fmt, ...) if (0) DRM_DEBUG_DRIVER(fmt"\n", ##__VA_ARGS__)
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 972490b14ba5..9c573c4269cb 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -142,6 +142,7 @@ struct msm_gem_submit {
bool valid; /* true if no cmdstream patching needed */
bool in_rb; /* "sudo" mode, copy cmds into RB */
struct msm_ringbuffer *ring;
+   struct msm_file_private *ctx;
unsigned int nr_cmds;
unsigned int nr_bos;
u32 ident; /* A "identifier" for the submit for logging */
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c 
b/drivers/gpu/drm/msm/msm_gem_submit.c
index 8cb9aa15ff90..1464b04d25d3 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -27,7 +27,7 @@
 #define BO_PINNED   0x2000
 
 static struct msm_gem_submit *submit_create(struct drm_device *dev,
-   struct msm_gpu *gpu, struct msm_gem_address_space *aspace,
+   struct msm_gpu *gpu,
struct msm_gpu_submitqueue *queue, uint32_t nr_bos,
uint32_t nr_cmds)
 {
@@ -43,7 +43,7 @@ static struct msm_gem_submit *submit_create(struct drm_device 
*dev,
return NULL;
 
submit->dev = dev;
-   submit->aspace = aspace;
+   submit->aspace = queue->ctx->aspace;
submit->gpu = gpu;
submit->fence = NULL;
submit->cmd = (void *)&submit->bos[nr_bos];
@@ -677,7 +677,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
}
}
 
-   submit = submit_create(dev, gpu, ctx->aspace, queue, args->nr_bos,
+   submit = submit_create(dev, gpu, queue, args->nr_bos,
args->nr_cmds);
if (!submit) {
ret = -ENOMEM;
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 1f65aec57a8f..c4ce462c30c5 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -193,6 +193,7 @@ struct msm_gpu_submitqueue {
u32 flags;
u32 prio;
int faults;
+   struct msm_file_private *ctx;
struct list_head node;
struct kref ref;
 };
diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c 
b/drivers/gpu/drm/msm/msm_submitqueue.c
index 90c9d84e6155..c3d206105d28 100644
--- a/drivers/gpu/drm/msm/msm_submitqueue.c
+++ b/drivers/gpu/drm/m

[PATCH v17 01/20] drm/msm: Remove dangling submitqueue references

2020-09-05 Thread Rob Clark
From: Rob Clark 

Currently it doesn't matter, since we free the ctx immediately.  But
when we start refcnt'ing the ctx, we don't want old dangling list
entries to hang around.

Signed-off-by: Rob Clark 
Reviewed-by: Jordan Crouse 
Reviewed-by: Bjorn Andersson 
---
 drivers/gpu/drm/msm/msm_submitqueue.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c 
b/drivers/gpu/drm/msm/msm_submitqueue.c
index a1d94be7883a..90c9d84e6155 100644
--- a/drivers/gpu/drm/msm/msm_submitqueue.c
+++ b/drivers/gpu/drm/msm/msm_submitqueue.c
@@ -49,8 +49,10 @@ void msm_submitqueue_close(struct msm_file_private *ctx)
 * No lock needed in close and there won't
 * be any more user ioctls coming our way
 */
-   list_for_each_entry_safe(entry, tmp, &ctx->submitqueues, node)
+   list_for_each_entry_safe(entry, tmp, &ctx->submitqueues, node) {
+   list_del(&entry->node);
msm_submitqueue_put(entry);
+   }
 }
 
 int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private 
*ctx,
-- 
2.26.2

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v17 04/20] drm/msm: Set adreno_smmu as gpu's drvdata

2020-09-05 Thread Rob Clark
From: Rob Clark 

This will be populated by adreno-smmu, to provide a way for coordinating
enabling/disabling TTBR0 translation.

Signed-off-by: Rob Clark 
Reviewed-by: Jordan Crouse 
Reviewed-by: Bjorn Andersson 
---
 drivers/gpu/drm/msm/adreno/adreno_device.c | 2 --
 drivers/gpu/drm/msm/msm_gpu.c  | 2 +-
 drivers/gpu/drm/msm/msm_gpu.h  | 6 +-
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c 
b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 26664e1b30c0..58e03b20e1c7 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -417,8 +417,6 @@ static int adreno_bind(struct device *dev, struct device 
*master, void *data)
return PTR_ERR(gpu);
}
 
-   dev_set_drvdata(dev, gpu);
-
return 0;
 }
 
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 4c67aedc5c33..144dd63e747e 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -892,7 +892,7 @@ int msm_gpu_init(struct drm_device *drm, struct 
platform_device *pdev,
gpu->gpu_cx = NULL;
 
gpu->pdev = pdev;
-   platform_set_drvdata(pdev, gpu);
+   platform_set_drvdata(pdev, &gpu->adreno_smmu);
 
msm_devfreq_init(gpu);
 
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index da1ae2263047..1f65aec57a8f 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -7,6 +7,7 @@
 #ifndef __MSM_GPU_H__
 #define __MSM_GPU_H__
 
+#include 
 #include 
 #include 
 #include 
@@ -74,6 +75,8 @@ struct msm_gpu {
struct platform_device *pdev;
const struct msm_gpu_funcs *funcs;
 
+   struct adreno_smmu_priv adreno_smmu;
+
/* performance counters (hw & sw): */
spinlock_t perf_lock;
bool perfcntr_active;
@@ -146,7 +149,8 @@ struct msm_gpu {
 
 static inline struct msm_gpu *dev_to_gpu(struct device *dev)
 {
-   return dev_get_drvdata(dev);
+   struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(dev);
+   return container_of(adreno_smmu, struct msm_gpu, adreno_smmu);
 }
 
 /* It turns out that all targets use the same ringbuffer size */
-- 
2.26.2

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v17 10/20] drm/msm/a6xx: Add support for per-instance pagetables

2020-09-05 Thread Rob Clark
From: Jordan Crouse 

Add support for using per-instance pagetables if all the dependencies are
available.

Signed-off-by: Jordan Crouse 
Signed-off-by: Rob Clark 
Reviewed-by: Akhil P Oommen 
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 62 +++
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h |  1 +
 drivers/gpu/drm/msm/msm_ringbuffer.h  |  1 +
 3 files changed, 64 insertions(+)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index f6aad038d8b6..92ebc73f51e6 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -81,6 +81,49 @@ static void get_stats_counter(struct msm_ringbuffer *ring, 
u32 counter,
OUT_RING(ring, upper_32_bits(iova));
 }
 
+static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
+   struct msm_ringbuffer *ring, struct msm_file_private *ctx)
+{
+   phys_addr_t ttbr;
+   u32 asid;
+   u64 memptr = rbmemptr(ring, ttbr0);
+
+   if (ctx == a6xx_gpu->cur_ctx)
+   return;
+
+   if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid))
+   return;
+
+   /* Execute the table update */
+   OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4);
+   OUT_RING(ring, CP_SMMU_TABLE_UPDATE_0_TTBR0_LO(lower_32_bits(ttbr)));
+
+   OUT_RING(ring,
+   CP_SMMU_TABLE_UPDATE_1_TTBR0_HI(upper_32_bits(ttbr)) |
+   CP_SMMU_TABLE_UPDATE_1_ASID(asid));
+   OUT_RING(ring, CP_SMMU_TABLE_UPDATE_2_CONTEXTIDR(0));
+   OUT_RING(ring, CP_SMMU_TABLE_UPDATE_3_CONTEXTBANK(0));
+
+   /*
+* Write the new TTBR0 to the memstore. This is good for debugging.
+*/
+   OUT_PKT7(ring, CP_MEM_WRITE, 4);
+   OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr)));
+   OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr)));
+   OUT_RING(ring, lower_32_bits(ttbr));
+   OUT_RING(ring, (asid << 16) | upper_32_bits(ttbr));
+
+   /*
+* And finally, trigger a uche flush to be sure there isn't anything
+* lingering in that part of the GPU
+*/
+
+   OUT_PKT7(ring, CP_EVENT_WRITE, 1);
+   OUT_RING(ring, 0x31);
+
+   a6xx_gpu->cur_ctx = ctx;
+}
+
 static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 {
unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
@@ -90,6 +133,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct 
msm_gem_submit *submit)
struct msm_ringbuffer *ring = submit->ring;
unsigned int i;
 
+   a6xx_set_pagetable(a6xx_gpu, ring, submit->queue->ctx);
+
get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP_0_LO,
rbmemptr_stats(ring, index, cpcycles_start));
 
@@ -704,6 +749,8 @@ static int a6xx_hw_init(struct msm_gpu *gpu)
/* Always come up on rb 0 */
a6xx_gpu->cur_ring = gpu->rb[0];
 
+   a6xx_gpu->cur_ctx = NULL;
+
/* Enable the SQE_to start the CP engine */
gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1);
 
@@ -1016,6 +1063,20 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu)
return (unsigned long)busy_time;
 }
 
+static struct msm_gem_address_space *
+a6xx_create_private_address_space(struct msm_gpu *gpu)
+{
+   struct msm_mmu *mmu;
+
+   mmu = msm_iommu_pagetable_create(gpu->aspace->mmu);
+
+   if (IS_ERR(mmu))
+   return ERR_CAST(mmu);
+
+   return msm_gem_address_space_create(mmu,
+   "gpu", 0x1ULL, 0x1ULL);
+}
+
 static const struct adreno_gpu_funcs funcs = {
.base = {
.get_param = adreno_get_param,
@@ -1039,6 +1100,7 @@ static const struct adreno_gpu_funcs funcs = {
.gpu_state_put = a6xx_gpu_state_put,
 #endif
.create_address_space = adreno_iommu_create_address_space,
+   .create_private_address_space = 
a6xx_create_private_address_space,
},
.get_timestamp = a6xx_get_timestamp,
 };
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
index 03ba60d5b07f..da22d7549d9b 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
@@ -19,6 +19,7 @@ struct a6xx_gpu {
uint64_t sqe_iova;
 
struct msm_ringbuffer *cur_ring;
+   struct msm_file_private *cur_ctx;
 
struct a6xx_gmu gmu;
 };
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h 
b/drivers/gpu/drm/msm/msm_ringbuffer.h
index 7764373d0ed2..0987d6bf848c 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.h
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.h
@@ -31,6 +31,7 @@ struct msm_rbmemptrs {
volatile uint32_t fence;
 
volatile struct msm_gpu_submit_stats stats[MSM_GPU_SUBMIT_STATS_COUNT];
+   volatile u64 ttbr0;
 };
 
 struct msm_ringbuffer {
-- 
2.26.2

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-deve

[PATCH v17 16/20] iommu/arm-smmu-qcom: Add implementation for the adreno GPU SMMU

2020-09-05 Thread Rob Clark
From: Jordan Crouse 

Add a special implementation for the SMMU attached to most Adreno GPU
target triggered from the qcom,adreno-smmu compatible string.

The new Adreno SMMU implementation will enable split pagetables
(TTBR1) for the domain attached to the GPU device (SID 0) and
hard code it context bank 0 so the GPU hardware can implement
per-instance pagetables.

Co-developed-by: Rob Clark 
Signed-off-by: Jordan Crouse 
Signed-off-by: Rob Clark 
Reviewed-by: Bjorn Andersson 
---
 drivers/iommu/arm/arm-smmu/arm-smmu-impl.c |   3 +
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 151 -
 drivers/iommu/arm/arm-smmu/arm-smmu.h  |   1 +
 3 files changed, 153 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
index 88f17cc33023..d199b4bff15d 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
@@ -223,6 +223,9 @@ struct arm_smmu_device *arm_smmu_impl_init(struct 
arm_smmu_device *smmu)
of_device_is_compatible(np, "qcom,sm8250-smmu-500"))
return qcom_smmu_impl_init(smmu);
 
+   if (of_device_is_compatible(smmu->dev->of_node, "qcom,adreno-smmu"))
+   return qcom_adreno_smmu_impl_init(smmu);
+
if (of_device_is_compatible(np, "marvell,ap806-smmu-500"))
smmu->impl = &mrvl_mmu500_impl;
 
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index be4318044f96..1e942eed2dfc 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -3,6 +3,7 @@
  * Copyright (c) 2019, The Linux Foundation. All rights reserved.
  */
 
+#include 
 #include 
 #include 
 
@@ -12,6 +13,134 @@ struct qcom_smmu {
struct arm_smmu_device smmu;
 };
 
+#define QCOM_ADRENO_SMMU_GPU_SID 0
+
+static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
+{
+   struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+   int i;
+
+   /*
+* The GPU will always use SID 0 so that is a handy way to uniquely
+* identify it and configure it for per-instance pagetables
+*/
+   for (i = 0; i < fwspec->num_ids; i++) {
+   u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
+
+   if (sid == QCOM_ADRENO_SMMU_GPU_SID)
+   return true;
+   }
+
+   return false;
+}
+
+static const struct io_pgtable_cfg *qcom_adreno_smmu_get_ttbr1_cfg(
+   const void *cookie)
+{
+   struct arm_smmu_domain *smmu_domain = (void *)cookie;
+   struct io_pgtable *pgtable =
+   io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops);
+   return &pgtable->cfg;
+}
+
+/*
+ * Local implementation to configure TTBR0 with the specified pagetable config.
+ * The GPU driver will call this to enable TTBR0 when per-instance pagetables
+ * are active
+ */
+
+static int qcom_adreno_smmu_set_ttbr0_cfg(const void *cookie,
+   const struct io_pgtable_cfg *pgtbl_cfg)
+{
+   struct arm_smmu_domain *smmu_domain = (void *)cookie;
+   struct io_pgtable *pgtable = 
io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops);
+   struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+   struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
+
+   /* The domain must have split pagetables already enabled */
+   if (cb->tcr[0] & ARM_SMMU_TCR_EPD1)
+   return -EINVAL;
+
+   /* If the pagetable config is NULL, disable TTBR0 */
+   if (!pgtbl_cfg) {
+   /* Do nothing if it is already disabled */
+   if ((cb->tcr[0] & ARM_SMMU_TCR_EPD0))
+   return -EINVAL;
+
+   /* Set TCR to the original configuration */
+   cb->tcr[0] = arm_smmu_lpae_tcr(&pgtable->cfg);
+   cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID, cb->cfg->asid);
+   } else {
+   u32 tcr = cb->tcr[0];
+
+   /* Don't call this again if TTBR0 is already enabled */
+   if (!(cb->tcr[0] & ARM_SMMU_TCR_EPD0))
+   return -EINVAL;
+
+   tcr |= arm_smmu_lpae_tcr(pgtbl_cfg);
+   tcr &= ~(ARM_SMMU_TCR_EPD0 | ARM_SMMU_TCR_EPD1);
+
+   cb->tcr[0] = tcr;
+   cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
+   cb->ttbr[0] |= FIELD_PREP(ARM_SMMU_TTBRn_ASID, cb->cfg->asid);
+   }
+
+   arm_smmu_write_context_bank(smmu_domain->smmu, cb->cfg->cbndx);
+
+   return 0;
+}
+
+static int qcom_adreno_smmu_alloc_context_bank(struct arm_smmu_domain 
*smmu_domain,
+  struct arm_smmu_device *smmu,
+  struct device *dev, int start)
+{
+   int count;
+
+   /*
+* Assign context bank 0 to the GPU device so the GPU hardware can
+* switch pagetables
+*/
+   if (qcom_adreno

[PATCH v17 09/20] drm/msm: Add support for private address space instances

2020-09-05 Thread Rob Clark
From: Jordan Crouse 

Add support for allocating private address space instances. Targets that
support per-context pagetables should implement their own function to
allocate private address spaces.

The default will return a pointer to the global address space.

Signed-off-by: Jordan Crouse 
Signed-off-by: Rob Clark 
Reviewed-by: Bjorn Andersson 
---
 drivers/gpu/drm/msm/msm_drv.c | 13 +++--
 drivers/gpu/drm/msm/msm_drv.h |  5 +
 drivers/gpu/drm/msm/msm_gem_vma.c |  9 +
 drivers/gpu/drm/msm/msm_gpu.c | 22 ++
 drivers/gpu/drm/msm/msm_gpu.h |  5 +
 5 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 75cd7639f560..7e963f707852 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -597,7 +597,7 @@ static int context_init(struct drm_device *dev, struct 
drm_file *file)
kref_init(&ctx->ref);
msm_submitqueue_init(dev, ctx);
 
-   ctx->aspace = priv->gpu ? priv->gpu->aspace : NULL;
+   ctx->aspace = msm_gpu_create_private_address_space(priv->gpu);
file->driver_priv = ctx;
 
return 0;
@@ -780,18 +780,19 @@ static int msm_ioctl_gem_cpu_fini(struct drm_device *dev, 
void *data,
 }
 
 static int msm_ioctl_gem_info_iova(struct drm_device *dev,
-   struct drm_gem_object *obj, uint64_t *iova)
+   struct drm_file *file, struct drm_gem_object *obj,
+   uint64_t *iova)
 {
-   struct msm_drm_private *priv = dev->dev_private;
+   struct msm_file_private *ctx = file->driver_priv;
 
-   if (!priv->gpu)
+   if (!ctx->aspace)
return -EINVAL;
 
/*
 * Don't pin the memory here - just get an address so that userspace can
 * be productive
 */
-   return msm_gem_get_iova(obj, priv->gpu->aspace, iova);
+   return msm_gem_get_iova(obj, ctx->aspace, iova);
 }
 
 static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
@@ -830,7 +831,7 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void 
*data,
args->value = msm_gem_mmap_offset(obj);
break;
case MSM_INFO_GET_IOVA:
-   ret = msm_ioctl_gem_info_iova(dev, obj, &args->value);
+   ret = msm_ioctl_gem_info_iova(dev, file, obj, &args->value);
break;
case MSM_INFO_SET_NAME:
/* length check should leave room for terminating null: */
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 4561bfb5e745..2ca9c3c03845 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -249,6 +249,10 @@ int msm_gem_map_vma(struct msm_gem_address_space *aspace,
 void msm_gem_close_vma(struct msm_gem_address_space *aspace,
struct msm_gem_vma *vma);
 
+
+struct msm_gem_address_space *
+msm_gem_address_space_get(struct msm_gem_address_space *aspace);
+
 void msm_gem_address_space_put(struct msm_gem_address_space *aspace);
 
 struct msm_gem_address_space *
@@ -434,6 +438,7 @@ static inline void __msm_file_private_destroy(struct kref 
*kref)
struct msm_file_private *ctx = container_of(kref,
struct msm_file_private, ref);
 
+   msm_gem_address_space_put(ctx->aspace);
kfree(ctx);
 }
 
diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c 
b/drivers/gpu/drm/msm/msm_gem_vma.c
index 5f6a11211b64..29cc1305cf37 100644
--- a/drivers/gpu/drm/msm/msm_gem_vma.c
+++ b/drivers/gpu/drm/msm/msm_gem_vma.c
@@ -27,6 +27,15 @@ void msm_gem_address_space_put(struct msm_gem_address_space 
*aspace)
kref_put(&aspace->kref, msm_gem_address_space_destroy);
 }
 
+struct msm_gem_address_space *
+msm_gem_address_space_get(struct msm_gem_address_space *aspace)
+{
+   if (!IS_ERR_OR_NULL(aspace))
+   kref_get(&aspace->kref);
+
+   return aspace;
+}
+
 /* Actually unmap memory for the vma */
 void msm_gem_purge_vma(struct msm_gem_address_space *aspace,
struct msm_gem_vma *vma)
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 57532b6b4702..9f1bd17dfa47 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -823,6 +823,28 @@ static int get_clocks(struct platform_device *pdev, struct 
msm_gpu *gpu)
return 0;
 }
 
+/* Return a new address space for a msm_drm_private instance */
+struct msm_gem_address_space *
+msm_gpu_create_private_address_space(struct msm_gpu *gpu)
+{
+   struct msm_gem_address_space *aspace = NULL;
+
+   if (!gpu)
+   return NULL;
+
+   /*
+* If the target doesn't support private address spaces then return
+* the global one
+*/
+   if (gpu->funcs->create_private_address_space)
+   aspace = gpu->funcs->create_private_address_space(gpu);
+
+   if (IS_ERR_OR_NULL(aspace))
+   aspace = msm_gem_address_space_get(gpu->as

[PATCH v17 13/20] iommu/arm-smmu: Add support for split pagetables

2020-09-05 Thread Rob Clark
From: Jordan Crouse 

Enable TTBR1 for a context bank if IO_PGTABLE_QUIRK_ARM_TTBR1 is selected
by the io-pgtable configuration.

Signed-off-by: Jordan Crouse 
Signed-off-by: Rob Clark 
Reviewed-by: Bjorn Andersson 
---
 drivers/iommu/arm/arm-smmu/arm-smmu.c | 19 +++
 drivers/iommu/arm/arm-smmu/arm-smmu.h | 25 +++--
 2 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 37d8d49299b4..8e884e58f208 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -552,11 +552,15 @@ static void arm_smmu_init_context_bank(struct 
arm_smmu_domain *smmu_domain,
cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr;
cb->ttbr[1] = 0;
} else {
-   cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
-   cb->ttbr[0] |= FIELD_PREP(ARM_SMMU_TTBRn_ASID,
- cfg->asid);
+   cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
+cfg->asid);
cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
 cfg->asid);
+
+   if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
+   cb->ttbr[1] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
+   else
+   cb->ttbr[0] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
}
} else {
cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
@@ -822,7 +826,14 @@ static int arm_smmu_init_domain_context(struct 
iommu_domain *domain,
 
/* Update the domain's page sizes to reflect the page table format */
domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
-   domain->geometry.aperture_end = (1UL << ias) - 1;
+
+   if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) {
+   domain->geometry.aperture_start = ~0UL << ias;
+   domain->geometry.aperture_end = ~0UL;
+   } else {
+   domain->geometry.aperture_end = (1UL << ias) - 1;
+   }
+
domain->geometry.force_aperture = true;
 
/* Initialise the context bank with our page table cfg */
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h 
b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index 83294516ac08..f3e456893f28 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -169,10 +169,12 @@ enum arm_smmu_cbar_type {
 #define ARM_SMMU_CB_TCR0x30
 #define ARM_SMMU_TCR_EAE   BIT(31)
 #define ARM_SMMU_TCR_EPD1  BIT(23)
+#define ARM_SMMU_TCR_A1BIT(22)
 #define ARM_SMMU_TCR_TG0   GENMASK(15, 14)
 #define ARM_SMMU_TCR_SH0   GENMASK(13, 12)
 #define ARM_SMMU_TCR_ORGN0 GENMASK(11, 10)
 #define ARM_SMMU_TCR_IRGN0 GENMASK(9, 8)
+#define ARM_SMMU_TCR_EPD0  BIT(7)
 #define ARM_SMMU_TCR_T0SZ  GENMASK(5, 0)
 
 #define ARM_SMMU_VTCR_RES1 BIT(31)
@@ -350,12 +352,23 @@ struct arm_smmu_domain {
 
 static inline u32 arm_smmu_lpae_tcr(struct io_pgtable_cfg *cfg)
 {
-   return ARM_SMMU_TCR_EPD1 |
-  FIELD_PREP(ARM_SMMU_TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg) |
-  FIELD_PREP(ARM_SMMU_TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) |
-  FIELD_PREP(ARM_SMMU_TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) |
-  FIELD_PREP(ARM_SMMU_TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) |
-  FIELD_PREP(ARM_SMMU_TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz);
+   u32 tcr = FIELD_PREP(ARM_SMMU_TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg) |
+   FIELD_PREP(ARM_SMMU_TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) |
+   FIELD_PREP(ARM_SMMU_TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) |
+   FIELD_PREP(ARM_SMMU_TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) |
+   FIELD_PREP(ARM_SMMU_TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz);
+
+   /*
+   * When TTBR1 is selected shift the TCR fields by 16 bits and disable
+   * translation in TTBR0
+   */
+   if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) {
+   tcr = (tcr << 16) & ~ARM_SMMU_TCR_A1;
+   tcr |= ARM_SMMU_TCR_EPD0;
+   } else
+   tcr |= ARM_SMMU_TCR_EPD1;
+
+   return tcr;
 }
 
 static inline u32 arm_smmu_lpae_tcr2(struct io_pgtable_cfg *cfg)
-- 
2.26.2

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v17 12/20] iommu/arm-smmu: Pass io-pgtable config to implementation specific function

2020-09-05 Thread Rob Clark
From: Jordan Crouse 

Construct the io-pgtable config before calling the implementation specific
init_context function and pass it so the implementation specific function
can get a chance to change it before the io-pgtable is created.

Signed-off-by: Jordan Crouse 
Signed-off-by: Rob Clark 
Reviewed-by: Bjorn Andersson 
---
 drivers/iommu/arm/arm-smmu/arm-smmu-impl.c |  3 ++-
 drivers/iommu/arm/arm-smmu/arm-smmu.c  | 11 ++-
 drivers/iommu/arm/arm-smmu/arm-smmu.h  |  3 ++-
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
index f4ff124a1967..a9861dcd0884 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
@@ -68,7 +68,8 @@ static int cavium_cfg_probe(struct arm_smmu_device *smmu)
return 0;
 }
 
-static int cavium_init_context(struct arm_smmu_domain *smmu_domain)
+static int cavium_init_context(struct arm_smmu_domain *smmu_domain,
+   struct io_pgtable_cfg *pgtbl_cfg)
 {
struct cavium_smmu *cs = container_of(smmu_domain->smmu,
  struct cavium_smmu, smmu);
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 09c42af9f31e..37d8d49299b4 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -795,11 +795,6 @@ static int arm_smmu_init_domain_context(struct 
iommu_domain *domain,
cfg->asid = cfg->cbndx;
 
smmu_domain->smmu = smmu;
-   if (smmu->impl && smmu->impl->init_context) {
-   ret = smmu->impl->init_context(smmu_domain);
-   if (ret)
-   goto out_unlock;
-   }
 
pgtbl_cfg = (struct io_pgtable_cfg) {
.pgsize_bitmap  = smmu->pgsize_bitmap,
@@ -810,6 +805,12 @@ static int arm_smmu_init_domain_context(struct 
iommu_domain *domain,
.iommu_dev  = smmu->dev,
};
 
+   if (smmu->impl && smmu->impl->init_context) {
+   ret = smmu->impl->init_context(smmu_domain, &pgtbl_cfg);
+   if (ret)
+   goto out_clear_smmu;
+   }
+
if (smmu_domain->non_strict)
pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
 
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h 
b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index d890a4a968e8..83294516ac08 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -386,7 +386,8 @@ struct arm_smmu_impl {
u64 val);
int (*cfg_probe)(struct arm_smmu_device *smmu);
int (*reset)(struct arm_smmu_device *smmu);
-   int (*init_context)(struct arm_smmu_domain *smmu_domain);
+   int (*init_context)(struct arm_smmu_domain *smmu_domain,
+   struct io_pgtable_cfg *cfg);
void (*tlb_sync)(struct arm_smmu_device *smmu, int page, int sync,
 int status);
int (*def_domain_type)(struct device *dev);
-- 
2.26.2

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v17 14/20] iommu/arm-smmu: Prepare for the adreno-smmu implementation

2020-09-05 Thread Rob Clark
From: Jordan Crouse 

Do a bit of prep work to add the upcoming adreno-smmu implementation.

Add an hook to allow the implementation to choose which context banks
to allocate.

Move some of the common structs to arm-smmu.h in anticipation of them
being used by the implementations and update some of the existing hooks
to pass more information that the implementation will need.

These modifications will be used by the upcoming Adreno SMMU
implementation to identify the GPU device and properly configure it
for pagetable switching.

Co-developed-by: Rob Clark 
Signed-off-by: Jordan Crouse 
Signed-off-by: Rob Clark 
Reviewed-by: Bjorn Andersson 
---
 drivers/iommu/arm/arm-smmu/arm-smmu-impl.c |  2 +-
 drivers/iommu/arm/arm-smmu/arm-smmu.c  | 74 ++
 drivers/iommu/arm/arm-smmu/arm-smmu.h  | 52 ++-
 3 files changed, 73 insertions(+), 55 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
index a9861dcd0884..88f17cc33023 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
@@ -69,7 +69,7 @@ static int cavium_cfg_probe(struct arm_smmu_device *smmu)
 }
 
 static int cavium_init_context(struct arm_smmu_domain *smmu_domain,
-   struct io_pgtable_cfg *pgtbl_cfg)
+   struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
 {
struct cavium_smmu *cs = container_of(smmu_domain->smmu,
  struct cavium_smmu, smmu);
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 8e884e58f208..dad7fa86fbd4 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -65,41 +65,10 @@ module_param(disable_bypass, bool, S_IRUGO);
 MODULE_PARM_DESC(disable_bypass,
"Disable bypass streams such that incoming transactions from devices 
that are not attached to an iommu domain will report an abort back to the 
device and will not be allowed to pass through the SMMU.");
 
-struct arm_smmu_s2cr {
-   struct iommu_group  *group;
-   int count;
-   enum arm_smmu_s2cr_type type;
-   enum arm_smmu_s2cr_privcfg  privcfg;
-   u8  cbndx;
-};
-
 #define s2cr_init_val (struct arm_smmu_s2cr){  \
.type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,\
 }
 
-struct arm_smmu_smr {
-   u16 mask;
-   u16 id;
-   boolvalid;
-};
-
-struct arm_smmu_cb {
-   u64 ttbr[2];
-   u32 tcr[2];
-   u32 mair[2];
-   struct arm_smmu_cfg *cfg;
-};
-
-struct arm_smmu_master_cfg {
-   struct arm_smmu_device  *smmu;
-   s16 smendx[];
-};
-#define INVALID_SMENDX -1
-#define cfg_smendx(cfg, fw, i) \
-   (i >= fw->num_ids ? INVALID_SMENDX : cfg->smendx[i])
-#define for_each_cfg_sme(cfg, fw, i, idx) \
-   for (i = 0; idx = cfg_smendx(cfg, fw, i), i < fw->num_ids; ++i)
-
 static bool using_legacy_binding, using_generic_binding;
 
 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
@@ -234,19 +203,6 @@ static int arm_smmu_register_legacy_master(struct device 
*dev,
 }
 #endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */
 
-static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
-{
-   int idx;
-
-   do {
-   idx = find_next_zero_bit(map, end, start);
-   if (idx == end)
-   return -ENOSPC;
-   } while (test_and_set_bit(idx, map));
-
-   return idx;
-}
-
 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
 {
clear_bit(idx, map);
@@ -578,7 +534,7 @@ static void arm_smmu_init_context_bank(struct 
arm_smmu_domain *smmu_domain,
}
 }
 
-static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
+void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
 {
u32 reg;
bool stage1;
@@ -664,8 +620,19 @@ static void arm_smmu_write_context_bank(struct 
arm_smmu_device *smmu, int idx)
arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
 }
 
+static int arm_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_domain,
+  struct arm_smmu_device *smmu,
+  struct device *dev, unsigned int start)
+{
+   if (smmu->impl && smmu->impl->alloc_context_bank)
+   return smmu->impl->alloc_context_bank(smmu_domain, smmu, dev, 
start);
+
+   return __arm_smmu_alloc_bitmap(smmu->context_map, start, 
smmu->num_context_banks);
+}
+
 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
-   

[PATCH v17 18/20] dt-bindings: arm-smmu: Add compatible string for Adreno GPU SMMU

2020-09-05 Thread Rob Clark
From: Jordan Crouse 

Every Qcom Adreno GPU has an embedded SMMU for its own use. These
devices depend on unique features such as split pagetables,
different stall/halt requirements and other settings. Identify them
with a compatible string so that they can be identified in the
arm-smmu implementation specific code.

Signed-off-by: Jordan Crouse 
Reviewed-by: Rob Herring 
Signed-off-by: Rob Clark 
Reviewed-by: Bjorn Andersson 
---
 Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml 
b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 503160a7b9a0..3b63f2ae24db 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -28,8 +28,6 @@ properties:
   - enum:
   - qcom,msm8996-smmu-v2
   - qcom,msm8998-smmu-v2
-  - qcom,sc7180-smmu-v2
-  - qcom,sdm845-smmu-v2
   - const: qcom,smmu-v2
 
   - description: Qcom SoCs implementing "arm,mmu-500"
@@ -40,6 +38,13 @@ properties:
   - qcom,sm8150-smmu-500
   - qcom,sm8250-smmu-500
   - const: arm,mmu-500
+  - description: Qcom Adreno GPUs implementing "arm,smmu-v2"
+items:
+  - enum:
+  - qcom,sc7180-smmu-v2
+  - qcom,sdm845-smmu-v2
+  - const: qcom,adreno-smmu
+  - const: qcom,smmu-v2
   - description: Marvell SoCs implementing "arm,mmu-500"
 items:
   - const: marvell,ap806-smmu-500
-- 
2.26.2

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v17 07/20] drm/msm: Set the global virtual address range from the IOMMU domain

2020-09-05 Thread Rob Clark
From: Jordan Crouse 

Use the aperture settings from the IOMMU domain to set up the virtual
address range for the GPU. This allows us to transparently deal with
IOMMU side features (like split pagetables).

Signed-off-by: Jordan Crouse 
Signed-off-by: Rob Clark 
Reviewed-by: Bjorn Andersson 
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 13 +++--
 drivers/gpu/drm/msm/msm_iommu.c |  7 +++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index a712e1cfcba8..b703e5308b01 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -192,9 +192,18 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu,
struct iommu_domain *iommu = iommu_domain_alloc(&platform_bus_type);
struct msm_mmu *mmu = msm_iommu_new(&pdev->dev, iommu);
struct msm_gem_address_space *aspace;
+   u64 start, size;
 
-   aspace = msm_gem_address_space_create(mmu, "gpu", SZ_16M,
-   0x - SZ_16M);
+   /*
+* Use the aperture start or SZ_16M, whichever is greater. This will
+* ensure that we align with the allocated pagetable range while still
+* allowing room in the lower 32 bits for GMEM and whatnot
+*/
+   start = max_t(u64, SZ_16M, iommu->geometry.aperture_start);
+   size = iommu->geometry.aperture_end - start + 1;
+
+   aspace = msm_gem_address_space_create(mmu, "gpu",
+   start & GENMASK(48, 0), size);
 
if (IS_ERR(aspace) && !IS_ERR(mmu))
mmu->funcs->destroy(mmu);
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index 3a381a9674c9..1b6635504069 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -36,6 +36,10 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova,
struct msm_iommu *iommu = to_msm_iommu(mmu);
size_t ret;
 
+   /* The arm-smmu driver expects the addresses to be sign extended */
+   if (iova & BIT_ULL(48))
+   iova |= GENMASK_ULL(63, 49);
+
ret = iommu_map_sg(iommu->domain, iova, sgt->sgl, sgt->nents, prot);
WARN_ON(!ret);
 
@@ -46,6 +50,9 @@ static int msm_iommu_unmap(struct msm_mmu *mmu, uint64_t 
iova, size_t len)
 {
struct msm_iommu *iommu = to_msm_iommu(mmu);
 
+   if (iova & BIT_ULL(48))
+   iova |= GENMASK_ULL(63, 49);
+
iommu_unmap(iommu->domain, iova, len);
 
return 0;
-- 
2.26.2

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v17 08/20] drm/msm: Add support to create a local pagetable

2020-09-05 Thread Rob Clark
From: Jordan Crouse 

Add support to create a io-pgtable for use by targets that support
per-instance pagetables. In order to support per-instance pagetables the
GPU SMMU device needs to have the qcom,adreno-smmu compatible string and
split pagetables enabled.

Signed-off-by: Jordan Crouse 
Signed-off-by: Rob Clark 
Reviewed-by: Bjorn Andersson 
---
 drivers/gpu/drm/msm/Kconfig  |   1 +
 drivers/gpu/drm/msm/msm_gpummu.c |   2 +-
 drivers/gpu/drm/msm/msm_iommu.c  | 199 ++-
 drivers/gpu/drm/msm/msm_mmu.h|  16 ++-
 4 files changed, 215 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index 6deaa7d01654..5102a58830b9 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -8,6 +8,7 @@ config DRM_MSM
depends on MMU
depends on INTERCONNECT || !INTERCONNECT
depends on QCOM_OCMEM || QCOM_OCMEM=n
+   select IOMMU_IO_PGTABLE
select QCOM_MDT_LOADER if ARCH_QCOM
select REGULATOR
select DRM_KMS_HELPER
diff --git a/drivers/gpu/drm/msm/msm_gpummu.c b/drivers/gpu/drm/msm/msm_gpummu.c
index 310a31b05faa..aab121f4beb7 100644
--- a/drivers/gpu/drm/msm/msm_gpummu.c
+++ b/drivers/gpu/drm/msm/msm_gpummu.c
@@ -102,7 +102,7 @@ struct msm_mmu *msm_gpummu_new(struct device *dev, struct 
msm_gpu *gpu)
}
 
gpummu->gpu = gpu;
-   msm_mmu_init(&gpummu->base, dev, &funcs);
+   msm_mmu_init(&gpummu->base, dev, &funcs, MSM_MMU_GPUMMU);
 
return &gpummu->base;
 }
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index 1b6635504069..697cc0a059d6 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -4,15 +4,210 @@
  * Author: Rob Clark 
  */
 
+#include 
+#include 
 #include "msm_drv.h"
 #include "msm_mmu.h"
 
 struct msm_iommu {
struct msm_mmu base;
struct iommu_domain *domain;
+   atomic_t pagetables;
 };
+
 #define to_msm_iommu(x) container_of(x, struct msm_iommu, base)
 
+struct msm_iommu_pagetable {
+   struct msm_mmu base;
+   struct msm_mmu *parent;
+   struct io_pgtable_ops *pgtbl_ops;
+   phys_addr_t ttbr;
+   u32 asid;
+};
+static struct msm_iommu_pagetable *to_pagetable(struct msm_mmu *mmu)
+{
+   return container_of(mmu, struct msm_iommu_pagetable, base);
+}
+
+static int msm_iommu_pagetable_unmap(struct msm_mmu *mmu, u64 iova,
+   size_t size)
+{
+   struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);
+   struct io_pgtable_ops *ops = pagetable->pgtbl_ops;
+   size_t unmapped = 0;
+
+   /* Unmap the block one page at a time */
+   while (size) {
+   unmapped += ops->unmap(ops, iova, 4096, NULL);
+   iova += 4096;
+   size -= 4096;
+   }
+
+   iommu_flush_tlb_all(to_msm_iommu(pagetable->parent)->domain);
+
+   return (unmapped == size) ? 0 : -EINVAL;
+}
+
+static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova,
+   struct sg_table *sgt, size_t len, int prot)
+{
+   struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);
+   struct io_pgtable_ops *ops = pagetable->pgtbl_ops;
+   struct scatterlist *sg;
+   size_t mapped = 0;
+   u64 addr = iova;
+   unsigned int i;
+
+   for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+   size_t size = sg->length;
+   phys_addr_t phys = sg_phys(sg);
+
+   /* Map the block one page at a time */
+   while (size) {
+   if (ops->map(ops, addr, phys, 4096, prot, GFP_KERNEL)) {
+   msm_iommu_pagetable_unmap(mmu, iova, mapped);
+   return -EINVAL;
+   }
+
+   phys += 4096;
+   addr += 4096;
+   size -= 4096;
+   mapped += 4096;
+   }
+   }
+
+   return 0;
+}
+
+static void msm_iommu_pagetable_destroy(struct msm_mmu *mmu)
+{
+   struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);
+   struct msm_iommu *iommu = to_msm_iommu(pagetable->parent);
+   struct adreno_smmu_priv *adreno_smmu =
+   dev_get_drvdata(pagetable->parent->dev);
+
+   /*
+* If this is the last attached pagetable for the parent,
+* disable TTBR0 in the arm-smmu driver
+*/
+   if (atomic_dec_return(&iommu->pagetables) == 0)
+   adreno_smmu->set_ttbr0_cfg(adreno_smmu->cookie, NULL);
+
+   free_io_pgtable_ops(pagetable->pgtbl_ops);
+   kfree(pagetable);
+}
+
+int msm_iommu_pagetable_params(struct msm_mmu *mmu,
+   phys_addr_t *ttbr, int *asid)
+{
+   struct msm_iommu_pagetable *pagetable;
+
+   if (mmu->type != MSM_MMU_IOMMU_PAGETABLE)
+   return -EINVAL;
+
+   pagetable = to_pagetable(mmu);
+
+   if (ttbr)
+   *ttbr = pagetable->ttbr;
+
+  

[PATCH v17 11/20] drm/msm: Show process names in gem_describe

2020-09-05 Thread Rob Clark
From: Rob Clark 

In $debugfs/gem we already show any vma(s) associated with an object.
Also show process names if the vma's address space is a per-process
address space.

Signed-off-by: Rob Clark 
Reviewed-by: Jordan Crouse 
Reviewed-by: Bjorn Andersson 
---
 drivers/gpu/drm/msm/msm_drv.c |  2 +-
 drivers/gpu/drm/msm/msm_gem.c | 25 +
 drivers/gpu/drm/msm/msm_gem.h |  5 +
 drivers/gpu/drm/msm/msm_gem_vma.c |  1 +
 drivers/gpu/drm/msm/msm_gpu.c |  8 +---
 drivers/gpu/drm/msm/msm_gpu.h |  2 +-
 6 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 7e963f707852..7143756b7e83 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -597,7 +597,7 @@ static int context_init(struct drm_device *dev, struct 
drm_file *file)
kref_init(&ctx->ref);
msm_submitqueue_init(dev, ctx);
 
-   ctx->aspace = msm_gpu_create_private_address_space(priv->gpu);
+   ctx->aspace = msm_gpu_create_private_address_space(priv->gpu, current);
file->driver_priv = ctx;
 
return 0;
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 3cb7aeb93fd3..76a6c5271e57 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -842,11 +842,28 @@ void msm_gem_describe(struct drm_gem_object *obj, struct 
seq_file *m)
 
seq_puts(m, "  vmas:");
 
-   list_for_each_entry(vma, &msm_obj->vmas, list)
-   seq_printf(m, " [%s: %08llx,%s,inuse=%d]",
-   vma->aspace != NULL ? vma->aspace->name : NULL,
-   vma->iova, vma->mapped ? "mapped" : "unmapped",
+   list_for_each_entry(vma, &msm_obj->vmas, list) {
+   const char *name, *comm;
+   if (vma->aspace) {
+   struct msm_gem_address_space *aspace = 
vma->aspace;
+   struct task_struct *task =
+   get_pid_task(aspace->pid, PIDTYPE_PID);
+   if (task) {
+   comm = kstrdup(task->comm, GFP_KERNEL);
+   } else {
+   comm = NULL;
+   }
+   name = aspace->name;
+   } else {
+   name = comm = NULL;
+   }
+   seq_printf(m, " [%s%s%s: aspace=%p, 
%08llx,%s,inuse=%d]",
+   name, comm ? ":" : "", comm ? comm : "",
+   vma->aspace, vma->iova,
+   vma->mapped ? "mapped" : "unmapped",
vma->inuse);
+   kfree(comm);
+   }
 
seq_puts(m, "\n");
}
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 9c573c4269cb..7b1c7a5f8eef 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -24,6 +24,11 @@ struct msm_gem_address_space {
spinlock_t lock; /* Protects drm_mm node allocation/removal */
struct msm_mmu *mmu;
struct kref kref;
+
+   /* For address spaces associated with a specific process, this
+* will be non-NULL:
+*/
+   struct pid *pid;
 };
 
 struct msm_gem_vma {
diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c 
b/drivers/gpu/drm/msm/msm_gem_vma.c
index 29cc1305cf37..80a8a266d68f 100644
--- a/drivers/gpu/drm/msm/msm_gem_vma.c
+++ b/drivers/gpu/drm/msm/msm_gem_vma.c
@@ -17,6 +17,7 @@ msm_gem_address_space_destroy(struct kref *kref)
drm_mm_takedown(&aspace->mm);
if (aspace->mmu)
aspace->mmu->funcs->destroy(aspace->mmu);
+   put_pid(aspace->pid);
kfree(aspace);
 }
 
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 9f1bd17dfa47..59eed0fb12fc 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -825,10 +825,9 @@ static int get_clocks(struct platform_device *pdev, struct 
msm_gpu *gpu)
 
 /* Return a new address space for a msm_drm_private instance */
 struct msm_gem_address_space *
-msm_gpu_create_private_address_space(struct msm_gpu *gpu)
+msm_gpu_create_private_address_space(struct msm_gpu *gpu, struct task_struct 
*task)
 {
struct msm_gem_address_space *aspace = NULL;
-
if (!gpu)
return NULL;
 
@@ -836,8 +835,11 @@ msm_gpu_create_private_address_space(struct msm_gpu *gpu)
 * If the target doesn't support private address spaces then return
 * the global one
 */
-   if (gpu->funcs->create_private_address_space)
+   if (gpu->funcs->create_private_address_space) {
aspace = gpu->funcs->create_private_address_space(gpu);
+   

[PATCH v17 06/20] drm/msm: Drop context arg to gpu->submit()

2020-09-05 Thread Rob Clark
From: Jordan Crouse 

Now that we can get the ctx from the submitqueue, the extra arg is
redundant.

Signed-off-by: Jordan Crouse 
[split out of previous patch to reduce churny noise]
Signed-off-by: Rob Clark 
Reviewed-by: Bjorn Andersson 
---
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c   | 12 +---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c   |  5 ++---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c |  5 ++---
 drivers/gpu/drm/msm/adreno/adreno_gpu.h |  3 +--
 drivers/gpu/drm/msm/msm_gem_submit.c|  2 +-
 drivers/gpu/drm/msm/msm_gpu.c   |  9 -
 drivers/gpu/drm/msm/msm_gpu.h   |  6 ++
 7 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index ce3c0b5c167b..616d9e798058 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -43,8 +43,7 @@ static void a5xx_flush(struct msm_gpu *gpu, struct 
msm_ringbuffer *ring)
gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
 }
 
-static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit 
*submit,
-   struct msm_file_private *ctx)
+static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit 
*submit)
 {
struct msm_drm_private *priv = gpu->dev->dev_private;
struct msm_ringbuffer *ring = submit->ring;
@@ -57,7 +56,7 @@ static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct 
msm_gem_submit *submit
case MSM_SUBMIT_CMD_IB_TARGET_BUF:
break;
case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
-   if (priv->lastctx == ctx)
+   if (priv->lastctx == submit->queue->ctx)
break;
/* fall-thru */
case MSM_SUBMIT_CMD_BUF:
@@ -103,8 +102,7 @@ static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct 
msm_gem_submit *submit
msm_gpu_retire(gpu);
 }
 
-static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
-   struct msm_file_private *ctx)
+static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
@@ -114,7 +112,7 @@ static void a5xx_submit(struct msm_gpu *gpu, struct 
msm_gem_submit *submit,
 
if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
priv->lastctx = NULL;
-   a5xx_submit_in_rb(gpu, submit, ctx);
+   a5xx_submit_in_rb(gpu, submit);
return;
}
 
@@ -148,7 +146,7 @@ static void a5xx_submit(struct msm_gpu *gpu, struct 
msm_gem_submit *submit,
case MSM_SUBMIT_CMD_IB_TARGET_BUF:
break;
case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
-   if (priv->lastctx == ctx)
+   if (priv->lastctx == submit->queue->ctx)
break;
/* fall-thru */
case MSM_SUBMIT_CMD_BUF:
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 74bc27eb4203..f6aad038d8b6 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -81,8 +81,7 @@ static void get_stats_counter(struct msm_ringbuffer *ring, 
u32 counter,
OUT_RING(ring, upper_32_bits(iova));
 }
 
-static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
-   struct msm_file_private *ctx)
+static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 {
unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
struct msm_drm_private *priv = gpu->dev->dev_private;
@@ -115,7 +114,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct 
msm_gem_submit *submit,
case MSM_SUBMIT_CMD_IB_TARGET_BUF:
break;
case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
-   if (priv->lastctx == ctx)
+   if (priv->lastctx == submit->queue->ctx)
break;
/* fall-thru */
case MSM_SUBMIT_CMD_BUF:
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 459f10a3710b..a712e1cfcba8 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -434,8 +434,7 @@ void adreno_recover(struct msm_gpu *gpu)
}
 }
 
-void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
-   struct msm_file_private *ctx)
+void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct msm_drm_private *priv = gpu->dev->dev_private;
@@ -449,7 +448,7 @@ void adreno_submit(struct msm_gpu *gpu, struct 
msm_gem_submit *submit,
break;
 

[PATCH v17 15/20] iommu/arm-smmu: Constify some helpers

2020-09-05 Thread Rob Clark
From: Rob Clark 

Sprinkle a few `const`s where helpers don't need write access.

Signed-off-by: Rob Clark 
Reviewed-by: Bjorn Andersson 
---
 drivers/iommu/arm/arm-smmu/arm-smmu.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h 
b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index 9aaacc906597..1a746476927c 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -377,7 +377,7 @@ struct arm_smmu_master_cfg {
s16 smendx[];
 };
 
-static inline u32 arm_smmu_lpae_tcr(struct io_pgtable_cfg *cfg)
+static inline u32 arm_smmu_lpae_tcr(const struct io_pgtable_cfg *cfg)
 {
u32 tcr = FIELD_PREP(ARM_SMMU_TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg) |
FIELD_PREP(ARM_SMMU_TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) |
@@ -398,13 +398,13 @@ static inline u32 arm_smmu_lpae_tcr(struct io_pgtable_cfg 
*cfg)
return tcr;
 }
 
-static inline u32 arm_smmu_lpae_tcr2(struct io_pgtable_cfg *cfg)
+static inline u32 arm_smmu_lpae_tcr2(const struct io_pgtable_cfg *cfg)
 {
return FIELD_PREP(ARM_SMMU_TCR2_PASIZE, cfg->arm_lpae_s1_cfg.tcr.ips) |
   FIELD_PREP(ARM_SMMU_TCR2_SEP, ARM_SMMU_TCR2_SEP_UPSTREAM);
 }
 
-static inline u32 arm_smmu_lpae_vtcr(struct io_pgtable_cfg *cfg)
+static inline u32 arm_smmu_lpae_vtcr(const struct io_pgtable_cfg *cfg)
 {
return ARM_SMMU_VTCR_RES1 |
   FIELD_PREP(ARM_SMMU_VTCR_PS, cfg->arm_lpae_s2_cfg.vtcr.ps) |
-- 
2.26.2

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v17 17/20] iommu/arm-smmu: Add a way for implementations to influence SCTLR

2020-09-05 Thread Rob Clark
From: Rob Clark 

For the Adreno GPU's SMMU, we want SCTLR.HUPCF set to ensure that
pending translations are not terminated on iova fault.  Otherwise
a terminated CP read could hang the GPU by returning invalid
command-stream data.

Signed-off-by: Rob Clark 
Reviewed-by: Bjorn Andersson 
---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 6 ++
 drivers/iommu/arm/arm-smmu/arm-smmu.c  | 3 +++
 drivers/iommu/arm/arm-smmu/arm-smmu.h  | 3 +++
 3 files changed, 12 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 1e942eed2dfc..0663d7d26908 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -129,6 +129,12 @@ static int qcom_adreno_smmu_init_context(struct 
arm_smmu_domain *smmu_domain,
(smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64))
pgtbl_cfg->quirks |= IO_PGTABLE_QUIRK_ARM_TTBR1;
 
+   /*
+* On the GPU device we want to process subsequent transactions after a
+* fault to keep the GPU from hanging
+*/
+   smmu_domain->cfg.sctlr_set |= ARM_SMMU_SCTLR_HUPCF;
+
/*
 * Initialize private interface with GPU:
 */
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index dad7fa86fbd4..1f06ab219819 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -617,6 +617,9 @@ void arm_smmu_write_context_bank(struct arm_smmu_device 
*smmu, int idx)
if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
reg |= ARM_SMMU_SCTLR_E;
 
+   reg |= cfg->sctlr_set;
+   reg &= ~cfg->sctlr_clr;
+
arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
 }
 
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h 
b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index 6c5ffeae..ddf2ca4c923d 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -144,6 +144,7 @@ enum arm_smmu_cbar_type {
 #define ARM_SMMU_CB_SCTLR  0x0
 #define ARM_SMMU_SCTLR_S1_ASIDPNE  BIT(12)
 #define ARM_SMMU_SCTLR_CFCFG   BIT(7)
+#define ARM_SMMU_SCTLR_HUPCF   BIT(8)
 #define ARM_SMMU_SCTLR_CFIEBIT(6)
 #define ARM_SMMU_SCTLR_CFREBIT(5)
 #define ARM_SMMU_SCTLR_E   BIT(4)
@@ -341,6 +342,8 @@ struct arm_smmu_cfg {
u16 asid;
u16 vmid;
};
+   u32 sctlr_set;/* extra bits to set in 
SCTLR */
+   u32 sctlr_clr;/* bits to mask in SCTLR 
*/
enum arm_smmu_cbar_type cbar;
enum arm_smmu_context_fmt   fmt;
 };
-- 
2.26.2

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v17 20/20] arm: dts: qcom: sc7180: Set the compatible string for the GPU SMMU

2020-09-05 Thread Rob Clark
From: Rob Clark 

Set the qcom,adreno-smmu compatible string for the GPU SMMU to enable
split pagetables and per-instance pagetables for drm/msm.

Signed-off-by: Rob Clark 
Reviewed-by: Bjorn Andersson 
---
 arch/arm64/boot/dts/qcom/sc7180.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index d46b3833e52f..f3bef1cad889 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -1937,7 +1937,7 @@ opp-18000 {
};
 
adreno_smmu: iommu@504 {
-   compatible = "qcom,sc7180-smmu-v2", "qcom,smmu-v2";
+   compatible = "qcom,sc7180-smmu-v2", "qcom,adreno-smmu", 
"qcom,smmu-v2";
reg = <0 0x0504 0 0x1>;
#iommu-cells = <1>;
#global-interrupts = <2>;
-- 
2.26.2

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v17 19/20] arm: dts: qcom: sm845: Set the compatible string for the GPU SMMU

2020-09-05 Thread Rob Clark
From: Jordan Crouse 

Set the qcom,adreno-smmu compatible string for the GPU SMMU to enable
split pagetables and per-instance pagetables for drm/msm.

Signed-off-by: Jordan Crouse 
Signed-off-by: Rob Clark 
Reviewed-by: Bjorn Andersson 
---
 arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi | 9 +
 arch/arm64/boot/dts/qcom/sdm845.dtsi   | 2 +-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi 
b/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi
index 64fc1bfd66fa..39f23cdcbd02 100644
--- a/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi
@@ -633,6 +633,15 @@ &mdss_mdp {
status = "okay";
 };
 
+/*
+ * Cheza fw does not properly program the GPU aperture to allow the
+ * GPU to update the SMMU pagetables for context switches.  Work
+ * around this by dropping the "qcom,adreno-smmu" compat string.
+ */
+&adreno_smmu {
+   compatible = "qcom,sdm845-smmu-v2", "qcom,smmu-v2";
+};
+
 &mss_pil {
iommus = <&apps_smmu 0x781 0x0>,
 <&apps_smmu 0x724 0x3>;
diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi 
b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index 2884577dcb77..76a8a34640ae 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -4058,7 +4058,7 @@ opp-25700 {
};
 
adreno_smmu: iommu@504 {
-   compatible = "qcom,sdm845-smmu-v2", "qcom,smmu-v2";
+   compatible = "qcom,sdm845-smmu-v2", "qcom,adreno-smmu", 
"qcom,smmu-v2";
reg = <0 0x504 0 0x1>;
#iommu-cells = <1>;
#global-interrupts = <2>;
-- 
2.26.2

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [GIT PULL FOR v5.9] Fix Kconfig dependency issue with DMAENGINES selection

2020-09-05 Thread Laurent Pinchart
With Dave and Daniel on the recipients' list this time.

On Sat, Sep 05, 2020 at 08:27:51PM +0300, Laurent Pinchart wrote:
> Hi Dave and Daniel,
> 
> This small pull request fixes a Kconfig dependency issue introduced in
> v5.9-rc1. Among the three patches required to fix the issue, the ASoC
> fix has been merged in Linus' tree already. I haven't been able to get
> the RapidIO patch reviewed by the subsystem maintainers, so I've
> included it here as it's a dependency for the DRM patch.
> 
> The following changes since commit f75aef392f869018f78cfedf3c320a6b3fcfda6b:
> 
>   Linux 5.9-rc3 (2020-08-30 16:01:54 -0700)
> 
> are available in the Git repository at:
> 
>   git://linuxtv.org/pinchartl/media.git tags/drm-xlnx-dpsub-fixes-20200905
> 
> for you to fetch changes up to 3e8b2403545efd46c6347002e27eae4708205fd4:
> 
>   drm: xlnx: dpsub: Fix DMADEVICES Kconfig dependency (2020-09-05 19:52:54 
> +0300)
> 
> 
> Kconfig fixes for DRM_ZYNQMP_DPSUB DMA engine dependency
> 
> 
> Laurent Pinchart (2):
>   rapidio: Replace 'select' DMAENGINES 'with depends on'
>   drm: xlnx: dpsub: Fix DMADEVICES Kconfig dependency
> 
>  drivers/gpu/drm/xlnx/Kconfig | 1 +
>  drivers/rapidio/Kconfig  | 2 +-
>  2 files changed, 2 insertions(+), 1 deletion(-)
> 

-- 
Regards,

Laurent Pinchart
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[Bug 208981] trace with B550I AORUS PRO AX and AMD Ryzen 5 PRO 4650G

2020-09-05 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=208981

ahren (lili...@163.com) changed:

   What|Removed |Added

 CC||lili...@163.com

--- Comment #4 from ahren (lili...@163.com) ---
Hi, have you solved your problem? I have the same problem as you.
Here is my environment:


asrock A520M-ITX/AC + AMD 4750G
Ubuntu 20.04.1 LTS + AMDGPU-pro-20.30-1109583 - Ubuntu-20.04.tar.xz

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[Bug 209159] AMD Vega 20 framebuffer switch fails on 5.9rc2+

2020-09-05 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=209159

--- Comment #2 from r...@testtoast.com ---
Created attachment 292367
  --> https://bugzilla.kernel.org/attachment.cgi?id=292367&action=edit
dmesg

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[Bug 209159] AMD Vega 20 framebuffer switch fails on 5.9rc2+

2020-09-05 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=209159

--- Comment #3 from r...@testtoast.com ---
Thanks, sorry had to bypass LUKS to get the whole thing. This looks bad, looks
like the firmware upload to the card is failing. It is also happening on all my
installed kernels sorry, including the Fedora provided 5.8 series. Bad
hardware?

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[Bug 209159] AMD Vega 20 framebuffer switch fails on 5.9rc2+

2020-09-05 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=209159

r...@testtoast.com changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |INVALID

--- Comment #4 from r...@testtoast.com ---
Whoops seems this was PEBKAC, I'd previously overridden (and forgot about
overriding) my dracut.conf to load a specific firmware version and when that
fell off the bottom of the kernel rotation, obviously it didn't load into my
initrd and failed. Cleared that and all well, sorry for the noise.

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel