Add iopf-capable hw page table attach/detach/replace helpers. The pointer
to iommufd_device is stored in the domain attachment handle, so that it
can be echo'ed back in the iopf_group.

The iopf-capable hw page tables can only be attached to devices that
support the IOMMU_DEV_FEAT_IOPF feature. On the first attachment of an
iopf-capable hw_pagetable to the device, the IOPF feature is enabled on
the device. Similarly, after the last iopf-capable hwpt is detached from
the device, the IOPF feature is disabled on the device.

The current implementation allows a replacement between iopf-capable and
non-iopf-capable hw page tables. This matches the nested translation use
case, where a parent domain is attached by default and can then be
replaced with a nested user domain with iopf support.

Signed-off-by: Lu Baolu <baolu...@linux.intel.com>
---
 drivers/iommu/iommufd/iommufd_private.h |  12 ++
 drivers/iommu/iommufd/device.c          |  16 +-
 drivers/iommu/iommufd/fault.c           | 191 ++++++++++++++++++++++++
 3 files changed, 216 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/iommufd/iommufd_private.h 
b/drivers/iommu/iommufd/iommufd_private.h
index c8a4519f1405..ba89c86e1af7 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -293,6 +293,7 @@ int iommufd_check_iova_range(struct io_pagetable *iopt,
 struct iommufd_hw_pagetable {
        struct iommufd_object obj;
        struct iommu_domain *domain;
+       struct iommufd_fault *fault;
 };
 
 struct iommufd_hwpt_paging {
@@ -396,6 +397,9 @@ struct iommufd_device {
        /* always the physical device */
        struct device *dev;
        bool enforce_cache_coherency;
+       /* protect iopf_enabled counter */
+       struct mutex iopf_lock;
+       unsigned int iopf_enabled;
 };
 
 static inline struct iommufd_device *
@@ -456,6 +460,14 @@ struct iommufd_attach_handle {
 int iommufd_fault_alloc(struct iommufd_ucmd *ucmd);
 void iommufd_fault_destroy(struct iommufd_object *obj);
 
+int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
+                                   struct iommufd_device *idev);
+void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt,
+                                    struct iommufd_device *idev);
+int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
+                                    struct iommufd_hw_pagetable *hwpt,
+                                    struct iommufd_hw_pagetable *old);
+
 #ifdef CONFIG_IOMMUFD_TEST
 int iommufd_test(struct iommufd_ucmd *ucmd);
 void iommufd_selftest_destroy(struct iommufd_object *obj);
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 873630c111c1..63681d79b72d 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -215,6 +215,7 @@ struct iommufd_device *iommufd_device_bind(struct 
iommufd_ctx *ictx,
        refcount_inc(&idev->obj.users);
        /* igroup refcount moves into iommufd_device */
        idev->igroup = igroup;
+       mutex_init(&idev->iopf_lock);
 
        /*
         * If the caller fails after this success it must call
@@ -376,7 +377,10 @@ int iommufd_hw_pagetable_attach(struct 
iommufd_hw_pagetable *hwpt,
         * attachment.
         */
        if (list_empty(&idev->igroup->device_list)) {
-               rc = iommu_attach_group(hwpt->domain, idev->igroup->group);
+               if (hwpt->fault)
+                       rc = iommufd_fault_domain_attach_dev(hwpt, idev);
+               else
+                       rc = iommu_attach_group(hwpt->domain, 
idev->igroup->group);
                if (rc)
                        goto err_unresv;
                idev->igroup->hwpt = hwpt;
@@ -402,7 +406,10 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev)
        mutex_lock(&idev->igroup->lock);
        list_del(&idev->group_item);
        if (list_empty(&idev->igroup->device_list)) {
-               iommu_detach_group(hwpt->domain, idev->igroup->group);
+               if (hwpt->fault)
+                       iommufd_fault_domain_detach_dev(hwpt, idev);
+               else
+                       iommu_detach_group(hwpt->domain, idev->igroup->group);
                idev->igroup->hwpt = NULL;
        }
        if (hwpt_is_paging(hwpt))
@@ -497,7 +504,10 @@ iommufd_device_do_replace(struct iommufd_device *idev,
                        goto err_unlock;
        }
 
-       rc = iommu_group_replace_domain(igroup->group, hwpt->domain);
+       if (old_hwpt->fault || hwpt->fault)
+               rc = iommufd_fault_domain_replace_dev(idev, hwpt, old_hwpt);
+       else
+               rc = iommu_group_replace_domain(igroup->group, hwpt->domain);
        if (rc)
                goto err_unresv;
 
diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c
index d0dafe761075..94dde1f57cfc 100644
--- a/drivers/iommu/iommufd/fault.c
+++ b/drivers/iommu/iommufd/fault.c
@@ -8,6 +8,7 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/iommufd.h>
+#include <linux/pci.h>
 #include <linux/poll.h>
 #include <linux/anon_inodes.h>
 #include <uapi/linux/iommufd.h>
@@ -15,6 +16,196 @@
 #include "../iommu-priv.h"
 #include "iommufd_private.h"
 
+static int iommufd_fault_iopf_enable(struct iommufd_device *idev)
+{
+       struct device *dev = idev->dev;
+       int ret;
+
+       /*
+        * Once we turn on PCI/PRI support for VF, the response failure code
+        * could not be forwarded to the hardware due to PRI being a shared
+        * resource between PF and VFs. There is no coordination for this
+        * shared capability. This waits for a vPRI reset to recover.
+        */
+       if (dev_is_pci(dev) && to_pci_dev(dev)->is_virtfn)
+               return -EINVAL;
+
+       mutex_lock(&idev->iopf_lock);
+       /* Device iopf has already been on. */
+       if (++idev->iopf_enabled > 1) {
+               mutex_unlock(&idev->iopf_lock);
+               return 0;
+       }
+
+       ret = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_IOPF);
+       if (ret)
+               --idev->iopf_enabled;
+       mutex_unlock(&idev->iopf_lock);
+
+       return ret;
+}
+
+static void iommufd_fault_iopf_disable(struct iommufd_device *idev)
+{
+       mutex_lock(&idev->iopf_lock);
+       if (!WARN_ON(idev->iopf_enabled == 0)) {
+               if (--idev->iopf_enabled == 0)
+                       iommu_dev_disable_feature(idev->dev, 
IOMMU_DEV_FEAT_IOPF);
+       }
+       mutex_unlock(&idev->iopf_lock);
+}
+
+static int __fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
+                                    struct iommufd_device *idev)
+{
+       struct iommufd_attach_handle *handle;
+       int ret;
+
+       handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+       if (!handle)
+               return -ENOMEM;
+
+       handle->handle.domain = hwpt->domain;
+       handle->idev = idev;
+       ret = iommu_attach_group_handle(hwpt->domain, idev->igroup->group,
+                                       &handle->handle);
+       if (ret)
+               kfree(handle);
+
+       return ret;
+}
+
+int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
+                                   struct iommufd_device *idev)
+{
+       int ret;
+
+       if (!hwpt->fault)
+               return -EINVAL;
+
+       ret = iommufd_fault_iopf_enable(idev);
+       if (ret)
+               return ret;
+
+       ret = __fault_domain_attach_dev(hwpt, idev);
+       if (ret)
+               iommufd_fault_iopf_disable(idev);
+
+       return ret;
+}
+
+static void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
+                                        struct iommufd_attach_handle *handle)
+{
+       struct iommufd_fault *fault = hwpt->fault;
+       struct iopf_group *group, *next;
+       unsigned long index;
+
+       if (!fault)
+               return;
+
+       mutex_lock(&fault->mutex);
+       list_for_each_entry_safe(group, next, &fault->deliver, node) {
+               if (group->attach_handle != &handle->handle)
+                       continue;
+               list_del(&group->node);
+               iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
+               iopf_free_group(group);
+       }
+
+       xa_for_each(&fault->response, index, group) {
+               if (group->attach_handle != &handle->handle)
+                       continue;
+               xa_erase(&fault->response, index);
+               iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
+               iopf_free_group(group);
+       }
+       mutex_unlock(&fault->mutex);
+}
+
+static struct iommufd_attach_handle *
+iommufd_device_get_attach_handle(struct iommufd_device *idev)
+{
+       struct iommu_attach_handle *handle;
+
+       handle = iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 
0);
+       if (!handle)
+               return NULL;
+
+       return to_iommufd_handle(handle);
+}
+
+void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt,
+                                    struct iommufd_device *idev)
+{
+       struct iommufd_attach_handle *handle;
+
+       handle = iommufd_device_get_attach_handle(idev);
+       iommu_detach_group_handle(hwpt->domain, idev->igroup->group);
+       iommufd_auto_response_faults(hwpt, handle);
+       iommufd_fault_iopf_disable(idev);
+       kfree(handle);
+}
+
+static int __fault_domain_replace_dev(struct iommufd_device *idev,
+                                     struct iommufd_hw_pagetable *hwpt,
+                                     struct iommufd_hw_pagetable *old)
+{
+       struct iommufd_attach_handle *handle, *curr = NULL;
+       int ret;
+
+       if (old->fault)
+               curr = iommufd_device_get_attach_handle(idev);
+
+       if (hwpt->fault) {
+               handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+               if (!handle)
+                       return -ENOMEM;
+
+               handle->handle.domain = hwpt->domain;
+               handle->idev = idev;
+               ret = iommu_replace_group_handle(idev->igroup->group,
+                                                hwpt->domain, &handle->handle);
+       } else {
+               ret = iommu_replace_group_handle(idev->igroup->group,
+                                                hwpt->domain, NULL);
+       }
+
+       if (!ret && curr) {
+               iommufd_auto_response_faults(old, curr);
+               kfree(curr);
+       }
+
+       return ret;
+}
+
+int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
+                                    struct iommufd_hw_pagetable *hwpt,
+                                    struct iommufd_hw_pagetable *old)
+{
+       bool iopf_off = !hwpt->fault && old->fault;
+       bool iopf_on = hwpt->fault && !old->fault;
+       int ret;
+
+       if (iopf_on) {
+               ret = iommufd_fault_iopf_enable(idev);
+               if (ret)
+                       return ret;
+       }
+
+       ret = __fault_domain_replace_dev(idev, hwpt, old);
+       if (ret) {
+               if (iopf_on)
+                       iommufd_fault_iopf_disable(idev);
+               return ret;
+       }
+
+       if (iopf_off)
+               iommufd_fault_iopf_disable(idev);
+
+       return 0;
+}
+
 void iommufd_fault_destroy(struct iommufd_object *obj)
 {
        struct iommufd_fault *fault = container_of(obj, struct iommufd_fault, 
obj);
-- 
2.34.1


Reply via email to