Debugger needs access to the client's vm to read and write. For
example inspecting ISA/ELF and setting up breakpoints.

Add ioctl to open target vm with debugger client and vm_handle
and hook up pread/pwrite possibility.

Open will take timeout argument so that standard fsync
can be used for explicit flushing between cpu/gpu for
the target vm.

Implement this for bo backed storage. userptr will
be done in following patch.

v2: - checkpatch (Maciej)
    - 32bit fixes (Andrzej)
    - bo_vmap (Mika)
    - fix vm leak if can't allocate k_buffer (Mika)
    - assert vm write held for vma (Matthew)

v3: - fw ref, ttm_bo_access
    - timeout boundary check (Dominik)
    - dont try to copy to user on zero bytes (Mika)

Cc: Matthew Brost <matthew.br...@intel.com>
Signed-off-by: Mika Kuoppala <mika.kuopp...@linux.intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  24 ++
 drivers/gpu/drm/xe/xe_eudebug.c      | 442 +++++++++++++++++++++++++++
 include/uapi/drm/xe_drm_eudebug.h    |  19 ++
 3 files changed, 485 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h 
b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 5fcf06835ef0..4c620f95b466 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -551,6 +551,30 @@
 #define   CCS_MODE_CSLICE(cslice, ccs) \
        ((ccs) << ((cslice) * CCS_MODE_CSLICE_WIDTH))
 
+#define RCU_ASYNC_FLUSH                                XE_REG(0x149fc)
+#define   RCU_ASYNC_FLUSH_IN_PROGRESS  REG_BIT(31)
+#define   RCU_ASYNC_FLUSH_ENGINE_ID_SHIFT      28
+#define   RCU_ASYNC_FLUSH_ENGINE_ID_DECODE1 REG_BIT(26)
+#define   RCU_ASYNC_FLUSH_AMFS         REG_BIT(8)
+#define   RCU_ASYNC_FLUSH_PREFETCH     REG_BIT(7)
+#define   RCU_ASYNC_FLUSH_DATA_PORT    REG_BIT(6)
+#define   RCU_ASYNC_FLUSH_DATA_CACHE   REG_BIT(5)
+#define   RCU_ASYNC_FLUSH_HDC_PIPELINE REG_BIT(4)
+#define   RCU_ASYNC_INVALIDATE_HDC_PIPELINE REG_BIT(3)
+#define   RCU_ASYNC_INVALIDATE_CONSTANT_CACHE REG_BIT(2)
+#define   RCU_ASYNC_INVALIDATE_TEXTURE_CACHE REG_BIT(1)
+#define   RCU_ASYNC_INVALIDATE_INSTRUCTION_CACHE REG_BIT(0)
+#define   RCU_ASYNC_FLUSH_AND_INVALIDATE_ALL ( \
+       RCU_ASYNC_FLUSH_AMFS | \
+       RCU_ASYNC_FLUSH_PREFETCH | \
+       RCU_ASYNC_FLUSH_DATA_PORT | \
+       RCU_ASYNC_FLUSH_DATA_CACHE | \
+       RCU_ASYNC_FLUSH_HDC_PIPELINE | \
+       RCU_ASYNC_INVALIDATE_HDC_PIPELINE | \
+       RCU_ASYNC_INVALIDATE_CONSTANT_CACHE | \
+       RCU_ASYNC_INVALIDATE_TEXTURE_CACHE | \
+       RCU_ASYNC_INVALIDATE_INSTRUCTION_CACHE)
+
 #define RCU_DEBUG_1                            XE_REG(0x14a00)
 #define   RCU_DEBUG_1_ENGINE_STATUS            REG_GENMASK(2, 0)
 #define   RCU_DEBUG_1_RUNALONE_ACTIVE          REG_BIT(2)
diff --git a/drivers/gpu/drm/xe/xe_eudebug.c b/drivers/gpu/drm/xe/xe_eudebug.c
index 3cf3616e546d..9d87df75348b 100644
--- a/drivers/gpu/drm/xe/xe_eudebug.c
+++ b/drivers/gpu/drm/xe/xe_eudebug.c
@@ -5,9 +5,12 @@
 
 #include <linux/anon_inodes.h>
 #include <linux/delay.h>
+#include <linux/file.h>
 #include <linux/poll.h>
 #include <linux/uaccess.h>
+#include <linux/vmalloc.h>
 
+#include <drm/drm_drv.h>
 #include <drm/drm_managed.h>
 
 #include <generated/xe_wa_oob.h>
@@ -16,6 +19,7 @@
 #include "regs/xe_engine_regs.h"
 
 #include "xe_assert.h"
+#include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_eudebug.h"
 #include "xe_eudebug_types.h"
@@ -1222,6 +1226,8 @@ static long xe_eudebug_eu_control(struct xe_eudebug *d, 
const u64 arg)
        return ret;
 }
 
+static long xe_eudebug_vm_open_ioctl(struct xe_eudebug *d, unsigned long arg);
+
 static long xe_eudebug_ioctl(struct file *file,
                             unsigned int cmd,
                             unsigned long arg)
@@ -1246,6 +1252,11 @@ static long xe_eudebug_ioctl(struct file *file,
                ret = xe_eudebug_ack_event_ioctl(d, cmd, arg);
                eu_dbg(d, "ioctl cmd=EVENT_ACK ret=%ld\n", ret);
                break;
+       case DRM_XE_EUDEBUG_IOCTL_VM_OPEN:
+               ret = xe_eudebug_vm_open_ioctl(d, arg);
+               eu_dbg(d, "ioctl cmd=VM_OPEN ret=%ld\n", ret);
+               break;
+
        default:
                ret = -EINVAL;
        }
@@ -3038,3 +3049,434 @@ void xe_eudebug_ufence_fini(struct xe_user_fence 
*ufence)
        xe_eudebug_put(ufence->eudebug.debugger);
        ufence->eudebug.debugger = NULL;
 }
+
+static int xe_eudebug_vma_access(struct xe_vma *vma, u64 offset_in_vma,
+                                void *buf, u64 len, bool write)
+{
+       struct xe_bo *bo;
+       u64 bytes;
+
+       lockdep_assert_held_write(&xe_vma_vm(vma)->lock);
+
+       if (XE_WARN_ON(offset_in_vma >= xe_vma_size(vma)))
+               return -EINVAL;
+
+       bytes = min_t(u64, len, xe_vma_size(vma) - offset_in_vma);
+       if (!bytes)
+               return 0;
+
+       bo = xe_bo_get(xe_vma_bo(vma));
+       if (bo) {
+               int ret;
+
+               ret = ttm_bo_access(&bo->ttm, offset_in_vma, buf, bytes, write);
+
+               xe_bo_put(bo);
+
+               return ret;
+       }
+
+       return -EINVAL;
+}
+
+static int xe_eudebug_vm_access(struct xe_vm *vm, u64 offset,
+                               void *buf, u64 len, bool write)
+{
+       struct xe_vma *vma;
+       int ret;
+
+       down_write(&vm->lock);
+
+       vma = xe_vm_find_overlapping_vma(vm, offset, len);
+       if (vma) {
+               /* XXX: why find overlapping returns below start? */
+               if (offset < xe_vma_start(vma) ||
+                   offset >= (xe_vma_start(vma) + xe_vma_size(vma))) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+
+               /* Offset into vma */
+               offset -= xe_vma_start(vma);
+               ret = xe_eudebug_vma_access(vma, offset, buf, len, write);
+       } else {
+               ret = -EINVAL;
+       }
+
+out:
+       up_write(&vm->lock);
+
+       return ret;
+}
+
+struct vm_file {
+       struct xe_eudebug *debugger;
+       struct xe_file *xef;
+       struct xe_vm *vm;
+       u64 flags;
+       u64 client_id;
+       u64 vm_handle;
+       unsigned int timeout_us;
+};
+
+static ssize_t __vm_read_write(struct xe_vm *vm,
+                              void *bb,
+                              char __user *r_buffer,
+                              const char __user *w_buffer,
+                              unsigned long offset,
+                              unsigned long len,
+                              const bool write)
+{
+       ssize_t ret;
+
+       if (!len)
+               return 0;
+
+       if (write) {
+               ret = copy_from_user(bb, w_buffer, len);
+               if (ret)
+                       return -EFAULT;
+
+               ret = xe_eudebug_vm_access(vm, offset, bb, len, true);
+               if (ret <= 0)
+                       return ret;
+
+               len = ret;
+       } else {
+               ret = xe_eudebug_vm_access(vm, offset, bb, len, false);
+               if (ret <= 0)
+                       return ret;
+
+               len = ret;
+
+               ret = copy_to_user(r_buffer, bb, len);
+               if (ret)
+                       return -EFAULT;
+       }
+
+       return len;
+}
+
+static struct xe_vm *find_vm_get(struct xe_eudebug *d, const u32 id)
+{
+       struct xe_vm *vm;
+
+       mutex_lock(&d->res->lock);
+       vm = find_resource__unlocked(d->res, XE_EUDEBUG_RES_TYPE_VM, id);
+       if (vm)
+               xe_vm_get(vm);
+
+       mutex_unlock(&d->res->lock);
+
+       return vm;
+}
+
+static ssize_t __xe_eudebug_vm_access(struct file *file,
+                                     char __user *r_buffer,
+                                     const char __user *w_buffer,
+                                     size_t count, loff_t *__pos)
+{
+       struct vm_file *vmf = file->private_data;
+       struct xe_eudebug * const d = vmf->debugger;
+       struct xe_device * const xe = d->xe;
+       const bool write = !!w_buffer;
+       struct xe_vm *vm;
+       ssize_t copied = 0;
+       ssize_t bytes_left = count;
+       ssize_t ret;
+       unsigned long alloc_len;
+       loff_t pos = *__pos;
+       void *k_buffer;
+
+       if (XE_IOCTL_DBG(xe, write && r_buffer))
+               return -EINVAL;
+
+       vm = find_vm_get(d, vmf->vm_handle);
+       if (XE_IOCTL_DBG(xe, !vm))
+               return -EINVAL;
+
+       if (XE_IOCTL_DBG(xe, vm != vmf->vm)) {
+               eu_warn(d, "vm_access(%s): vm handle mismatch 
client_handle=%llu, vm_handle=%llu, flags=0x%llx, pos=%llu, count=%zu\n",
+                       write ? "write" : "read",
+                       vmf->client_id, vmf->vm_handle, vmf->flags, pos, count);
+               xe_vm_put(vm);
+               return -EINVAL;
+       }
+
+       if (!count) {
+               xe_vm_put(vm);
+               return 0;
+       }
+
+       alloc_len = min_t(unsigned long, ALIGN(count, PAGE_SIZE), 64 * SZ_1M);
+       do  {
+               k_buffer = vmalloc(alloc_len);
+               if (k_buffer)
+                       break;
+
+               alloc_len >>= 1;
+       } while (alloc_len > PAGE_SIZE);
+
+       if (XE_IOCTL_DBG(xe, !k_buffer)) {
+               xe_vm_put(vm);
+               return -ENOMEM;
+       }
+
+       do {
+               const ssize_t len = min_t(ssize_t, bytes_left, alloc_len);
+
+               ret = __vm_read_write(vm, k_buffer,
+                                     write ? NULL : r_buffer + copied,
+                                     write ? w_buffer + copied : NULL,
+                                     pos + copied,
+                                     len,
+                                     write);
+               if (ret <= 0)
+                       break;
+
+               bytes_left -= ret;
+               copied += ret;
+       } while (bytes_left > 0);
+
+       vfree(k_buffer);
+       xe_vm_put(vm);
+
+       if (XE_WARN_ON(copied < 0))
+               copied = 0;
+
+       *__pos += copied;
+
+       return copied ?: ret;
+}
+
+static ssize_t xe_eudebug_vm_read(struct file *file,
+                                 char __user *buffer,
+                                 size_t count, loff_t *pos)
+{
+       return __xe_eudebug_vm_access(file, buffer, NULL, count, pos);
+}
+
+static ssize_t xe_eudebug_vm_write(struct file *file,
+                                  const char __user *buffer,
+                                  size_t count, loff_t *pos)
+{
+       return __xe_eudebug_vm_access(file, NULL, buffer, count, pos);
+}
+
+static int engine_rcu_flush(struct xe_eudebug *d,
+                           struct xe_hw_engine *hwe,
+                           unsigned int timeout_us)
+{
+       const struct xe_reg psmi_addr = RING_PSMI_CTL(hwe->mmio_base);
+       struct xe_gt *gt = hwe->gt;
+       unsigned int fw_ref;
+       u32 mask = RCU_ASYNC_FLUSH_AND_INVALIDATE_ALL;
+       u32 psmi_ctrl;
+       u32 id;
+       int ret;
+
+       if (hwe->class == XE_ENGINE_CLASS_RENDER)
+               id = 0;
+       else if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
+               id = hwe->instance + 1;
+       else
+               return -EINVAL;
+
+       if (id < 8)
+               mask |= id << RCU_ASYNC_FLUSH_ENGINE_ID_SHIFT;
+       else
+               mask |= (id - 8) << RCU_ASYNC_FLUSH_ENGINE_ID_SHIFT |
+                       RCU_ASYNC_FLUSH_ENGINE_ID_DECODE1;
+
+       fw_ref = xe_force_wake_get(gt_to_fw(gt), hwe->domain);
+       if (!fw_ref)
+               return -ETIMEDOUT;
+
+       /* Prevent concurrent flushes */
+       mutex_lock(&d->eu_lock);
+       psmi_ctrl = xe_mmio_read32(&gt->mmio, psmi_addr);
+       if (!(psmi_ctrl & IDLE_MSG_DISABLE))
+               xe_mmio_write32(&gt->mmio, psmi_addr, 
_MASKED_BIT_ENABLE(IDLE_MSG_DISABLE));
+
+       /* XXX: Timeout is per operation but in here we flush previous */
+       ret = xe_mmio_wait32(&gt->mmio, RCU_ASYNC_FLUSH,
+                            RCU_ASYNC_FLUSH_IN_PROGRESS, 0,
+                            timeout_us, NULL, false);
+       if (ret)
+               goto out;
+
+       xe_mmio_write32(&gt->mmio, RCU_ASYNC_FLUSH, mask);
+
+       ret = xe_mmio_wait32(&gt->mmio, RCU_ASYNC_FLUSH,
+                            RCU_ASYNC_FLUSH_IN_PROGRESS, 0,
+                            timeout_us, NULL, false);
+out:
+       if (!(psmi_ctrl & IDLE_MSG_DISABLE))
+               xe_mmio_write32(&gt->mmio, psmi_addr, 
_MASKED_BIT_DISABLE(IDLE_MSG_DISABLE));
+
+       mutex_unlock(&d->eu_lock);
+       xe_force_wake_put(gt_to_fw(gt), fw_ref);
+
+       return ret;
+}
+
+static int xe_eudebug_vm_fsync(struct file *file, loff_t start, loff_t end, 
int datasync)
+{
+       struct vm_file *vmf = file->private_data;
+       struct xe_eudebug *d = vmf->debugger;
+       struct xe_gt *gt;
+       int gt_id;
+       int ret = -EINVAL;
+
+       eu_dbg(d, "vm_fsync: client_handle=%llu, vm_handle=%llu, flags=0x%llx, 
start=%llu, end=%llu datasync=%d\n",
+              vmf->client_id, vmf->vm_handle, vmf->flags, start, end, 
datasync);
+
+       for_each_gt(gt, d->xe, gt_id) {
+               struct xe_hw_engine *hwe;
+               enum xe_hw_engine_id id;
+
+               /* XXX: vm open per engine? */
+               for_each_hw_engine(hwe, gt, id) {
+                       if (hwe->class != XE_ENGINE_CLASS_RENDER &&
+                           hwe->class != XE_ENGINE_CLASS_COMPUTE)
+                               continue;
+
+                       ret = engine_rcu_flush(d, hwe, vmf->timeout_us);
+                       if (ret)
+                               break;
+               }
+       }
+
+       return ret;
+}
+
+static int xe_eudebug_vm_release(struct inode *inode, struct file *file)
+{
+       struct vm_file *vmf = file->private_data;
+       struct xe_eudebug *d = vmf->debugger;
+
+       eu_dbg(d, "vm_release: client_handle=%llu, vm_handle=%llu, 
flags=0x%llx",
+              vmf->client_id, vmf->vm_handle, vmf->flags);
+
+       xe_vm_put(vmf->vm);
+       xe_file_put(vmf->xef);
+       xe_eudebug_put(d);
+       drm_dev_put(&d->xe->drm);
+
+       kfree(vmf);
+
+       return 0;
+}
+
+static const struct file_operations vm_fops = {
+       .owner   = THIS_MODULE,
+       .llseek  = generic_file_llseek,
+       .read    = xe_eudebug_vm_read,
+       .write   = xe_eudebug_vm_write,
+       .fsync   = xe_eudebug_vm_fsync,
+       .mmap    = NULL,
+       .release = xe_eudebug_vm_release,
+};
+
+static long
+xe_eudebug_vm_open_ioctl(struct xe_eudebug *d, unsigned long arg)
+{
+       const u64 max_timeout_ns = DRM_XE_EUDEBUG_VM_SYNC_MAX_TIMEOUT_NSECS;
+       struct drm_xe_eudebug_vm_open param;
+       struct xe_device * const xe = d->xe;
+       struct vm_file *vmf = NULL;
+       struct xe_file *xef;
+       struct xe_vm *vm;
+       struct file *file;
+       long ret = 0;
+       int fd;
+
+       if (XE_IOCTL_DBG(xe, _IOC_SIZE(DRM_XE_EUDEBUG_IOCTL_VM_OPEN) != 
sizeof(param)))
+               return -EINVAL;
+
+       if (XE_IOCTL_DBG(xe, !(_IOC_DIR(DRM_XE_EUDEBUG_IOCTL_VM_OPEN) & 
_IOC_WRITE)))
+               return -EINVAL;
+
+       if (XE_IOCTL_DBG(xe, copy_from_user(&param, (void __user *)arg, 
sizeof(param))))
+               return -EFAULT;
+
+       if (XE_IOCTL_DBG(xe, param.flags))
+               return -EINVAL;
+
+       if (XE_IOCTL_DBG(xe, param.timeout_ns > max_timeout_ns))
+               return -EINVAL;
+
+       if (XE_IOCTL_DBG(xe, xe_eudebug_detached(d)))
+               return -ENOTCONN;
+
+       xef = find_client_get(d, param.client_handle);
+       if (xef)
+               vm = find_vm_get(d, param.vm_handle);
+       else
+               vm = NULL;
+
+       if (XE_IOCTL_DBG(xe, !xef))
+               return -EINVAL;
+
+       if (XE_IOCTL_DBG(xe, !vm)) {
+               ret = -EINVAL;
+               goto out_file_put;
+       }
+
+       vmf = kzalloc(sizeof(*vmf), GFP_KERNEL);
+       if (XE_IOCTL_DBG(xe, !vmf)) {
+               ret = -ENOMEM;
+               goto out_vm_put;
+       }
+
+       fd = get_unused_fd_flags(O_CLOEXEC);
+       if (XE_IOCTL_DBG(xe, fd < 0)) {
+               ret = fd;
+               goto out_free;
+       }
+
+       kref_get(&d->ref);
+       vmf->debugger = d;
+       vmf->vm = vm;
+       vmf->xef = xef;
+       vmf->flags = param.flags;
+       vmf->client_id = param.client_handle;
+       vmf->vm_handle = param.vm_handle;
+       vmf->timeout_us = div64_u64(param.timeout_ns, 1000ull);
+
+       file = anon_inode_getfile("[xe_eudebug.vm]", &vm_fops, vmf, O_RDWR);
+       if (IS_ERR(file)) {
+               ret = PTR_ERR(file);
+               XE_IOCTL_DBG(xe, ret);
+               file = NULL;
+               goto out_fd_put;
+       }
+
+       file->f_mode |= FMODE_PREAD | FMODE_PWRITE |
+               FMODE_READ | FMODE_WRITE | FMODE_LSEEK;
+
+       fd_install(fd, file);
+
+       eu_dbg(d, "vm_open: client_handle=%llu, handle=%llu, flags=0x%llx, 
fd=%d",
+              vmf->client_id, vmf->vm_handle, vmf->flags, fd);
+
+       XE_WARN_ON(ret);
+
+       drm_dev_get(&xe->drm);
+
+       return fd;
+
+out_fd_put:
+       put_unused_fd(fd);
+       xe_eudebug_put(d);
+out_free:
+       kfree(vmf);
+out_vm_put:
+       xe_vm_put(vm);
+out_file_put:
+       xe_file_put(xef);
+
+       XE_WARN_ON(ret >= 0);
+
+       return ret;
+}
diff --git a/include/uapi/drm/xe_drm_eudebug.h 
b/include/uapi/drm/xe_drm_eudebug.h
index 1d5f1411c9a8..a5f13563b3b9 100644
--- a/include/uapi/drm/xe_drm_eudebug.h
+++ b/include/uapi/drm/xe_drm_eudebug.h
@@ -18,6 +18,7 @@ extern "C" {
 #define DRM_XE_EUDEBUG_IOCTL_READ_EVENT                _IO('j', 0x0)
 #define DRM_XE_EUDEBUG_IOCTL_EU_CONTROL                _IOWR('j', 0x2, struct 
drm_xe_eudebug_eu_control)
 #define DRM_XE_EUDEBUG_IOCTL_ACK_EVENT         _IOW('j', 0x4, struct 
drm_xe_eudebug_ack_event)
+#define DRM_XE_EUDEBUG_IOCTL_VM_OPEN           _IOW('j', 0x1, struct 
drm_xe_eudebug_vm_open)
 
 /* XXX: Document events to match their internal counterparts when moved to 
xe_drm.h */
 struct drm_xe_eudebug_event {
@@ -187,6 +188,24 @@ struct drm_xe_eudebug_ack_event {
        __u64 seqno;
 };
 
+struct drm_xe_eudebug_vm_open {
+       /** @extensions: Pointer to the first extension struct, if any */
+       __u64 extensions;
+
+       /** @client_handle: id of client */
+       __u64 client_handle;
+
+       /** @vm_handle: id of vm */
+       __u64 vm_handle;
+
+       /** @flags: flags */
+       __u64 flags;
+
+#define DRM_XE_EUDEBUG_VM_SYNC_MAX_TIMEOUT_NSECS (10ULL * NSEC_PER_SEC)
+       /** @timeout_ns: Timeout value in nanoseconds operations (fsync) */
+       __u64 timeout_ns;
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
2.43.0

Reply via email to