[PATCH v2 03/12] iommu/vt-d: Change flags type to unsigned int in binding mm

2020-06-13 Thread Fenghua Yu
"flags" passed to intel_svm_bind_mm() is a bit mask and should be
defined as "unsigned int" instead of "int".

Change its type to "unsigned int".

Suggested-by: Thomas Gleixner 
Signed-off-by: Fenghua Yu 
Reviewed-by: Tony Luck 
---
v2:
- Add this new patch per Thomas' comment.

 drivers/iommu/intel/svm.c   | 7 ---
 include/linux/intel-iommu.h | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index b5618341b4b1..4e775e12ae52 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -427,7 +427,8 @@ int intel_svm_unbind_gpasid(struct device *dev, unsigned 
int pasid)
 
 /* Caller must hold pasid_mutex, mm reference */
 static int
-intel_svm_bind_mm(struct device *dev, int flags, struct svm_dev_ops *ops,
+intel_svm_bind_mm(struct device *dev, unsigned int flags,
+ struct svm_dev_ops *ops,
  struct mm_struct *mm, struct intel_svm_dev **sd)
 {
struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
@@ -954,7 +955,7 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, 
void *drvdata)
 {
struct iommu_sva *sva = ERR_PTR(-EINVAL);
struct intel_svm_dev *sdev = NULL;
-   int flags = 0;
+   unsigned int flags = 0;
int ret;
 
/*
@@ -963,7 +964,7 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, 
void *drvdata)
 * and intel_svm etc.
 */
if (drvdata)
-   flags = *(int *)drvdata;
+   flags = *(unsigned int *)drvdata;
mutex_lock(&pasid_mutex);
ret = intel_svm_bind_mm(dev, flags, NULL, mm, &sdev);
if (ret)
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 44fa8879f829..9abc30cf10fc 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -759,7 +759,7 @@ struct intel_svm {
struct mm_struct *mm;
 
struct intel_iommu *iommu;
-   int flags;
+   unsigned int flags;
unsigned int pasid;
int gpasid; /* In case that guest PASID is different from host PASID */
struct list_head devs;
-- 
2.19.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 07/12] x86/msr-index: Define IA32_PASID MSR

2020-06-13 Thread Fenghua Yu
The IA32_PASID MSR (0xd93) contains the Process Address Space Identifier
(PASID), a 20-bit value. Bit 31 must be set to indicate the value
programmed in the MSR is valid. Hardware uses PASID to identify process
address space and direct responses to the right address space.

Signed-off-by: Fenghua Yu 
Reviewed-by: Tony Luck 
---
v2:
- Change "identify process" to "identify process address space" in the
  commit message (Thomas)

 arch/x86/include/asm/msr-index.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index e8370e64a155..e5f699ff1dd6 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -237,6 +237,9 @@
 #define MSR_IA32_LASTINTFROMIP 0x01dd
 #define MSR_IA32_LASTINTTOIP   0x01de
 
+#define MSR_IA32_PASID 0x0d93
+#define MSR_IA32_PASID_VALID   BIT_ULL(31)
+
 /* DEBUGCTLMSR bits (others vary by model): */
 #define DEBUGCTLMSR_LBR(1UL <<  0) /* last branch 
recording */
 #define DEBUGCTLMSR_BTF_SHIFT  1
-- 
2.19.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 02/12] ocxl: Change type of pasid to unsigned int

2020-06-13 Thread Fenghua Yu
PASID is defined as "int" although it's a 20-bit value and shouldn't be
negative int. To be consistent with type defined in iommu, define PASID
as "unsigned int".

Suggested-by: Thomas Gleixner 
Signed-off-by: Fenghua Yu 
Reviewed-by: Tony Luck 
---
v2:
- Create this new patch to define PASID as "unsigned int" consistently in
  ocxl (Thomas)

 drivers/misc/ocxl/config.c|  3 ++-
 drivers/misc/ocxl/link.c  |  6 +++---
 drivers/misc/ocxl/ocxl_internal.h |  6 +++---
 drivers/misc/ocxl/pasid.c |  2 +-
 drivers/misc/ocxl/trace.h | 20 ++--
 include/misc/ocxl.h   |  6 +++---
 6 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
index c8e19bfb5ef9..22d034caed3d 100644
--- a/drivers/misc/ocxl/config.c
+++ b/drivers/misc/ocxl/config.c
@@ -806,7 +806,8 @@ int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec)
 }
 EXPORT_SYMBOL_GPL(ocxl_config_set_TL);
 
-int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control, int 
pasid)
+int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control,
+   unsigned int pasid)
 {
u32 val;
unsigned long timeout;
diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
index 58d111afd9f6..931f6ae022db 100644
--- a/drivers/misc/ocxl/link.c
+++ b/drivers/misc/ocxl/link.c
@@ -492,7 +492,7 @@ static u64 calculate_cfg_state(bool kernel)
return state;
 }
 
-int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
+int ocxl_link_add_pe(void *link_handle, unsigned int pasid, u32 pidr, u32 tidr,
u64 amr, struct mm_struct *mm,
void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
void *xsl_err_data)
@@ -572,7 +572,7 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 
pidr, u32 tidr,
 }
 EXPORT_SYMBOL_GPL(ocxl_link_add_pe);
 
-int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid)
+int ocxl_link_update_pe(void *link_handle, unsigned int pasid, __u16 tid)
 {
struct ocxl_link *link = (struct ocxl_link *) link_handle;
struct spa *spa = link->spa;
@@ -608,7 +608,7 @@ int ocxl_link_update_pe(void *link_handle, int pasid, __u16 
tid)
return rc;
 }
 
-int ocxl_link_remove_pe(void *link_handle, int pasid)
+int ocxl_link_remove_pe(void *link_handle, unsigned int pasid)
 {
struct ocxl_link *link = (struct ocxl_link *) link_handle;
struct spa *spa = link->spa;
diff --git a/drivers/misc/ocxl/ocxl_internal.h 
b/drivers/misc/ocxl/ocxl_internal.h
index 345bf843a38e..3ca982ba7472 100644
--- a/drivers/misc/ocxl/ocxl_internal.h
+++ b/drivers/misc/ocxl/ocxl_internal.h
@@ -41,7 +41,7 @@ struct ocxl_afu {
struct ocxl_afu_config config;
int pasid_base;
int pasid_count; /* opened contexts */
-   int pasid_max; /* maximum number of contexts */
+   unsigned int pasid_max; /* maximum number of contexts */
int actag_base;
int actag_enabled;
struct mutex contexts_lock;
@@ -69,7 +69,7 @@ struct ocxl_xsl_error {
 
 struct ocxl_context {
struct ocxl_afu *afu;
-   int pasid;
+   unsigned int pasid;
struct mutex status_mutex;
enum ocxl_context_status status;
struct address_space *mapping;
@@ -128,7 +128,7 @@ int ocxl_config_check_afu_index(struct pci_dev *dev,
  * pasid: the PASID for the AFU context
  * tid: the new thread id for the process element
  */
-int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid);
+int ocxl_link_update_pe(void *link_handle, unsigned int pasid, __u16 tid);
 
 int ocxl_context_mmap(struct ocxl_context *ctx,
struct vm_area_struct *vma);
diff --git a/drivers/misc/ocxl/pasid.c b/drivers/misc/ocxl/pasid.c
index d14cb56e6920..a151fc8f0bec 100644
--- a/drivers/misc/ocxl/pasid.c
+++ b/drivers/misc/ocxl/pasid.c
@@ -80,7 +80,7 @@ static void range_free(struct list_head *head, u32 start, u32 
size,
 
 int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size)
 {
-   int max_pasid;
+   unsigned int max_pasid;
 
if (fn->config.max_pasid_log < 0)
return -ENOSPC;
diff --git a/drivers/misc/ocxl/trace.h b/drivers/misc/ocxl/trace.h
index 17e21cb2addd..019e2fc63b1d 100644
--- a/drivers/misc/ocxl/trace.h
+++ b/drivers/misc/ocxl/trace.h
@@ -9,13 +9,13 @@
 #include 
 
 DECLARE_EVENT_CLASS(ocxl_context,
-   TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
+   TP_PROTO(pid_t pid, void *spa, unsigned int pasid, u32 pidr, u32 tidr),
TP_ARGS(pid, spa, pasid, pidr, tidr),
 
TP_STRUCT__entry(
__field(pid_t, pid)
__field(void*, spa)
-   __field(int, pasid)
+   __field(unsigned int, pasid)
__field(u32, pidr)
__field(u32, tidr)
),
@@ -38,21 +38,21 @@ DECLARE_EVENT_CLASS(ocxl_context,
 );
 
 DEFINE_EVENT(ocxl_context, ocxl_co

[PATCH v2 10/12] x86/process: Clear PASID state for a newly forked/cloned thread

2020-06-13 Thread Fenghua Yu
The PASID state has to be cleared on forks, since the child has a
different address space. The PASID is also cleared for thread clone. While
it would be correct to inherit the PASID in this case, it is unknown
whether the new task will use ENQCMD. Giving it the PASID "just in case"
would have the downside of increased context switch overhead to setting
the PASID MSR.

Since #GP faults have to be handled on any threads that were created before
the PASID was assigned to the mm of the process, newly created threads
might as well be treated in a consistent way.

Suggested-by: Thomas Gleixner 
Signed-off-by: Fenghua Yu 
Reviewed-by: Tony Luck 
---
v2:
- Modify init_task_pasid().

 arch/x86/kernel/process.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index f362ce0d5ac0..1b1492e337a6 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -121,6 +121,21 @@ static int set_new_tls(struct task_struct *p, unsigned 
long tls)
return do_set_thread_area_64(p, ARCH_SET_FS, tls);
 }
 
+/* Initialize the PASID state for the forked/cloned thread. */
+static void init_task_pasid(struct task_struct *task)
+{
+   struct ia32_pasid_state *ppasid;
+
+   /*
+* Initialize the PASID state so that the PASID MSR will be
+* initialized to its initial state (0) by XRSTORS when the task is
+* scheduled for the first time.
+*/
+   ppasid = get_xsave_addr(&task->thread.fpu.state.xsave, XFEATURE_PASID);
+   if (ppasid)
+   ppasid->pasid = INIT_PASID;
+}
+
 int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
unsigned long arg, struct task_struct *p, unsigned long tls)
 {
@@ -174,6 +189,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned 
long sp,
task_user_gs(p) = get_user_gs(current_pt_regs());
 #endif
 
+   if (static_cpu_has(X86_FEATURE_ENQCMD))
+   init_task_pasid(p);
+
/* Set a new TLS for the child thread? */
if (clone_flags & CLONE_SETTLS)
ret = set_new_tls(p, tls);
-- 
2.19.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 05/12] x86/cpufeatures: Enumerate ENQCMD and ENQCMDS instructions

2020-06-13 Thread Fenghua Yu
Work submission instruction comes in two flavors. ENQCMD can be called
both in ring 3 and ring 0 and always uses the contents of PASID MSR when
shipping the command to the device. ENQCMDS allows a kernel driver to
submit commands on behalf of a user process. The driver supplies the
PASID value in ENQCMDS. There isn't any usage of ENQCMD in the kernel
as of now.

The CPU feature flag is shown as "enqcmd" in /proc/cpuinfo.

Signed-off-by: Fenghua Yu 
Reviewed-by: Tony Luck 
---
v2:
- Re-write commit message (Thomas)

 arch/x86/include/asm/cpufeatures.h | 1 +
 arch/x86/kernel/cpu/cpuid-deps.c   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 02dabc9e77b0..4469618c410f 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -351,6 +351,7 @@
 #define X86_FEATURE_CLDEMOTE   (16*32+25) /* CLDEMOTE instruction */
 #define X86_FEATURE_MOVDIRI(16*32+27) /* MOVDIRI instruction */
 #define X86_FEATURE_MOVDIR64B  (16*32+28) /* MOVDIR64B instruction */
+#define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS 
instructions */
 
 /* AMD-defined CPU features, CPUID level 0x8007 (EBX), word 17 */
 #define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery 
support */
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index 3cbe24ca80ab..3a02707c1f4d 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -69,6 +69,7 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_CQM_MBM_TOTAL,X86_FEATURE_CQM_LLC   },
{ X86_FEATURE_CQM_MBM_LOCAL,X86_FEATURE_CQM_LLC   },
{ X86_FEATURE_AVX512_BF16,  X86_FEATURE_AVX512VL  },
+   { X86_FEATURE_ENQCMD,   X86_FEATURE_XSAVES},
{}
 };
 
-- 
2.19.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 11/12] x86/mmu: Allocate/free PASID

2020-06-13 Thread Fenghua Yu
A PASID is allocated for an "mm" the first time any thread attaches
to an SVM capable device. Later device attachments (whether to the same
device or another SVM device) will re-use the same PASID.

The PASID is freed when the process exits (so no need to keep
reference counts on how many SVM devices are sharing the PASID).

Signed-off-by: Fenghua Yu 
Reviewed-by: Tony Luck 
---
v2:
- Define a helper free_bind() to simplify error exit code in bind_mm()
  (Thomas)
- Fix a ret error code in bind_mm() (Thomas)
- Change pasid's type from "int" to "unsigned int" to have consistent
  pasid type in iommu (Thomas)
- Simplify alloc_pasid() a bit.

 arch/x86/include/asm/iommu.h   |   2 +
 arch/x86/include/asm/mmu_context.h |  14 
 drivers/iommu/intel/svm.c  | 101 +
 3 files changed, 105 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index bf1ed2ddc74b..ed41259fe7ac 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -26,4 +26,6 @@ arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
return -EINVAL;
 }
 
+void __free_pasid(struct mm_struct *mm);
+
 #endif /* _ASM_X86_IOMMU_H */
diff --git a/arch/x86/include/asm/mmu_context.h 
b/arch/x86/include/asm/mmu_context.h
index 47562147e70b..f8c91ce8c451 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 
 extern atomic64_t last_mm_ctx_id;
 
@@ -117,9 +118,22 @@ static inline int init_new_context(struct task_struct *tsk,
init_new_context_ldt(mm);
return 0;
 }
+
+static inline void free_pasid(struct mm_struct *mm)
+{
+   if (!IS_ENABLED(CONFIG_INTEL_IOMMU_SVM))
+   return;
+
+   if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
+   return;
+
+   __free_pasid(mm);
+}
+
 static inline void destroy_context(struct mm_struct *mm)
 {
destroy_context_ldt(mm);
+   free_pasid(mm);
 }
 
 extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 4e775e12ae52..27dc866b8461 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -425,6 +425,53 @@ int intel_svm_unbind_gpasid(struct device *dev, unsigned 
int pasid)
return ret;
 }
 
+static void free_bind(struct intel_svm *svm, struct intel_svm_dev *sdev,
+ bool new_pasid)
+{
+   if (new_pasid)
+   ioasid_free(svm->pasid);
+   kfree(svm);
+   kfree(sdev);
+}
+
+/*
+ * If this mm already has a PASID, use it. Otherwise allocate a new one.
+ * Let the caller know if a new PASID is allocated via 'new_pasid'.
+ */
+static int alloc_pasid(struct intel_svm *svm, struct mm_struct *mm,
+  unsigned int pasid_max, bool *new_pasid,
+  unsigned int flags)
+{
+   unsigned int pasid;
+
+   *new_pasid = false;
+
+   /*
+* Reuse the PASID if the mm already has a PASID and not a private
+* PASID is requested.
+*/
+   if (mm && mm->pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
+   /*
+* Once a PASID is allocated for this mm, the PASID
+* stays with the mm until the mm is dropped. Reuse
+* the PASID which has been already allocated for the
+* mm instead of allocating a new one.
+*/
+   ioasid_set_data(mm->pasid, svm);
+
+   return mm->pasid;
+   }
+
+   /* Allocate a new pasid. Do not use PASID 0, reserved for init PASID. */
+   pasid = ioasid_alloc(NULL, PASID_MIN, pasid_max - 1, svm);
+   if (pasid != INVALID_IOASID) {
+   /* A new pasid is allocated. */
+   *new_pasid = true;
+   }
+
+   return pasid;
+}
+
 /* Caller must hold pasid_mutex, mm reference */
 static int
 intel_svm_bind_mm(struct device *dev, unsigned int flags,
@@ -518,6 +565,8 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
init_rcu_head(&sdev->rcu);
 
if (!svm) {
+   bool new_pasid;
+
svm = kzalloc(sizeof(*svm), GFP_KERNEL);
if (!svm) {
ret = -ENOMEM;
@@ -529,12 +578,9 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
if (pasid_max > intel_pasid_max_id)
pasid_max = intel_pasid_max_id;
 
-   /* Do not use PASID 0, reserved for RID to PASID */
-   svm->pasid = ioasid_alloc(NULL, PASID_MIN,
- pasid_max - 1, svm);
+   svm->pasid = alloc_pasid(svm, mm, pasid_max, &new_pasid, flags);
if (svm->pasid == INVALID_IOASID) {
-   kfree(svm);
-   kfree(sdev);
+   free_bind(svm, sdev, new_pasid);
   

[PATCH v2 01/12] iommu: Change type of pasid to unsigned int

2020-06-13 Thread Fenghua Yu
PASID is defined as a few different types in iommu including "int",
"u32", and "unsigned int". To be consistent and to match with ioasid's
type, define PASID and its variations (e.g. max PASID) as "unsigned int".

No PASID type change in uapi.

Suggested-by: Thomas Gleixner 
Signed-off-by: Fenghua Yu 
Reviewed-by: Tony Luck 
---
v2:
- Create this new patch to define PASID as "unsigned int" consistently in
  iommu (Thomas)

 drivers/gpu/drm/amd/amdkfd/kfd_iommu.c |  5 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h  |  2 +-
 drivers/iommu/amd/amd_iommu.h  | 13 
 drivers/iommu/amd/amd_iommu_types.h| 12 
 drivers/iommu/amd/init.c   |  4 +--
 drivers/iommu/amd/iommu.c  | 41 ++
 drivers/iommu/amd/iommu_v2.c   | 22 +++---
 drivers/iommu/intel/debugfs.c  |  2 +-
 drivers/iommu/intel/dmar.c | 13 
 drivers/iommu/intel/intel-pasid.h  | 21 ++---
 drivers/iommu/intel/iommu.c|  4 +--
 drivers/iommu/intel/pasid.c| 36 +++---
 drivers/iommu/intel/svm.c  | 12 
 drivers/iommu/iommu.c  |  2 +-
 drivers/misc/uacce/uacce.c |  2 +-
 include/linux/amd-iommu.h  |  9 +++---
 include/linux/intel-iommu.h| 18 +--
 include/linux/intel-svm.h  |  2 +-
 include/linux/iommu.h  |  8 ++---
 include/linux/uacce.h  |  2 +-
 20 files changed, 121 insertions(+), 109 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
index 7c8786b9eb0a..703d23deca76 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
@@ -139,7 +139,8 @@ void kfd_iommu_unbind_process(struct kfd_process *p)
 }
 
 /* Callback for process shutdown invoked by the IOMMU driver */
-static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
+static void iommu_pasid_shutdown_callback(struct pci_dev *pdev,
+ unsigned int pasid)
 {
struct kfd_dev *dev = kfd_device_by_pci_dev(pdev);
struct kfd_process *p;
@@ -185,7 +186,7 @@ static void iommu_pasid_shutdown_callback(struct pci_dev 
*pdev, int pasid)
 }
 
 /* This function called by IOMMU driver on PPR failure */
-static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
+static int iommu_invalid_ppr_cb(struct pci_dev *pdev, unsigned int pasid,
unsigned long address, u16 flags)
 {
struct kfd_dev *dev;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index f0587d94294d..3c7d1f774afe 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -714,7 +714,7 @@ struct kfd_process {
/* We want to receive a notification when the mm_struct is destroyed */
struct mmu_notifier mmu_notifier;
 
-   uint16_t pasid;
+   unsigned int pasid;
unsigned int doorbell_index;
 
/*
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index f892992c8744..0914b5b6f879 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -45,12 +45,13 @@ extern int amd_iommu_register_ppr_notifier(struct 
notifier_block *nb);
 extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
 extern void amd_iommu_domain_direct_map(struct iommu_domain *dom);
 extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
-extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
+extern int amd_iommu_flush_page(struct iommu_domain *dom, unsigned int pasid,
u64 address);
-extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid);
-extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
-unsigned long cr3);
-extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid);
+extern int amd_iommu_flush_tlb(struct iommu_domain *dom, unsigned int pasid);
+extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom,
+unsigned int pasid, unsigned long cr3);
+extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom,
+  unsigned int pasid);
 extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev);
 
 #ifdef CONFIG_IRQ_REMAP
@@ -66,7 +67,7 @@ static inline int amd_iommu_create_irq_domain(struct 
amd_iommu *iommu)
 #define PPR_INVALID0x1
 #define PPR_FAILURE0xf
 
-extern int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
+extern int amd_iommu_complete_ppr(struct pci_dev *pdev, unsigned int pasid,
  int status, int tag);
 
 static inline bool is_rd890_iommu(struct pci_dev *pdev)
diff --git a/drivers/iommu/amd/amd_iommu_types.h 

[PATCH v2 08/12] mm: Define pasid in mm

2020-06-13 Thread Fenghua Yu
PASID is shared by all threads in a process. So the logical place to keep
track of it is in the "mm". Both ARM and X86 need to use the PASID in the
"mm".

Suggested-by: Christoph Hellwig 
Signed-off-by: Fenghua Yu 
Reviewed-by: Tony Luck 
---
v2:
- This new patch moves "pasid" from x86 specific mm_context_t to generic
  struct mm_struct per Christopher's comment: 
https://lore.kernel.org/linux-iommu/20200414170252.714402-1-jean-phili...@linaro.org/T/#mb57110ffe1aaa24750eeea4f93b611f0d1913911
- Jean-Philippe Brucker released a virtually same patch. I still put this
  patch in the series for better review. The upstream kernel only needs one
  of the two patches eventually.
https://lore.kernel.org/linux-iommu/20200519175502.2504091-2-jean-phili...@linaro.org/
- Change CONFIG_IOASID to CONFIG_PCI_PASID (Ashok)

 include/linux/mm_types.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 64ede5f150dc..5778db3aa42d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -538,6 +538,10 @@ struct mm_struct {
atomic_long_t hugetlb_usage;
 #endif
struct work_struct async_put_work;
+
+#ifdef CONFIG_PCI_PASID
+   unsigned int pasid;
+#endif
} __randomize_layout;
 
/*
-- 
2.19.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 04/12] docs: x86: Add documentation for SVA (Shared Virtual Addressing)

2020-06-13 Thread Fenghua Yu
From: Ashok Raj 

ENQCMD and Data Streaming Accelerator (DSA) and all of their associated
features are a complicated stack with lots of interconnected pieces.
This documentation provides a big picture overview for all of the
features.

Signed-off-by: Ashok Raj 
Co-developed-by: Fenghua Yu 
Signed-off-by: Fenghua Yu 
Reviewed-by: Tony Luck 
---
v2:
- Fix the doc format and add the doc in toctree (Thomas)
- Modify the doc for better description (Thomas, Tony, Dave)

 Documentation/x86/index.rst |   1 +
 Documentation/x86/sva.rst   | 287 
 2 files changed, 288 insertions(+)
 create mode 100644 Documentation/x86/sva.rst

diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index 265d9e9a093b..e5d5ff096685 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -30,3 +30,4 @@ x86-specific Documentation
usb-legacy-support
i386/index
x86_64/index
+   sva
diff --git a/Documentation/x86/sva.rst b/Documentation/x86/sva.rst
new file mode 100644
index ..1e52208c7dda
--- /dev/null
+++ b/Documentation/x86/sva.rst
@@ -0,0 +1,287 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===
+Shared Virtual Addressing (SVA) with ENQCMD
+===
+
+Background
+==
+
+Shared Virtual Addressing (SVA) allows the processor and device to use the
+same virtual addresses avoiding the need for software to translate virtual
+addresses to physical addresses. SVA is what PCIe calls Shared Virtual
+Memory (SVM)
+
+In addition to the convenience of using application virtual addresses
+by the device, it also doesn't require pinning pages for DMA.
+PCIe Address Translation Services (ATS) along with Page Request Interface
+(PRI) allow devices to function much the same way as the CPU handling
+application page-faults. For more information please refer to PCIe
+specification Chapter 10: ATS Specification.
+
+Use of SVA requires IOMMU support in the platform. IOMMU also is required
+to support PCIe features ATS and PRI. ATS allows devices to cache
+translations for the virtual address. IOMMU driver uses the mmu_notifier()
+support to keep the device tlb cache and the CPU cache in sync. PRI allows
+the device to request paging the virtual address before using if they are
+not paged in the CPU page tables.
+
+
+Shared Hardware Workqueues
+==
+
+Unlike Single Root I/O Virtualization (SRIOV), Scalable IOV (SIOV) permits
+the use of Shared Work Queues (SWQ) by both applications and Virtual
+Machines (VM's). This allows better hardware utilization vs. hard
+partitioning resources that could result in under utilization. In order to
+allow the hardware to distinguish the context for which work is being
+executed in the hardware by SWQ interface, SIOV uses Process Address Space
+ID (PASID), which is a 20bit number defined by the PCIe SIG.
+
+PASID value is encoded in all transactions from the device. This allows the
+IOMMU to track I/O on a per-PASID granularity in addition to using the PCIe
+Resource Identifier (RID) which is the Bus/Device/Function.
+
+
+ENQCMD
+==
+
+ENQCMD is a new instruction on Intel platforms that atomically submits a
+work descriptor to a device. The descriptor includes the operation to be
+performed, virtual addresses of all parameters, virtual address of a completion
+record, and the PASID (process address space ID) of the current process.
+
+ENQCMD works with non-posted semantics and carries a status back if the
+command was accepted by hardware. This allows the submitter to know if the
+submission needs to be retried or other device specific mechanisms to
+implement implement fairness or ensure forward progress can be made.
+
+ENQCMD is the glue that ensures applications can directly submit commands
+to the hardware and also permit hardware to be aware of application context
+to perform I/O operations via use of PASID.
+
+Process Address Space Tagging
+=
+
+A new thread scoped MSR (IA32_PASID) provides the connection between
+user processes and the rest of the hardware. When an application first
+accesses an SVA capable device this MSR is initialized with a newly
+allocated PASID. The driver for the device calls an IOMMU specific api
+that sets up the routing for DMA and page-requests.
+
+For example, the Intel Data Streaming Accelerator (DSA) uses
+intel_svm_bind_mm(), which will do the following.
+
+- Allocate the PASID, and program the process page-table (cr3) in the PASID
+  context entries.
+- Register for mmu_notifier() to track any page-table invalidations to keep
+  the device tlb in sync. For example, when a page-table entry is invalidated,
+  IOMMU propagates the invalidation to device tlb. This will force any
+  future access by the device to this virtual address to participate in
+  ATS. If the IOMMU responds with proper response that a page is not
+  present, the device would request the pa

[PATCH v2 09/12] fork: Clear PASID for new mm

2020-06-13 Thread Fenghua Yu
When a new mm is created, its PASID should be cleared, i.e. the PASID is
initialized to its init state 0 on both ARM and X86.

Signed-off-by: Fenghua Yu 
Reviewed-by: Tony Luck 
---
v2:
- Add this patch to initialize PASID value for a new mm.

 include/linux/mm_types.h | 2 ++
 kernel/fork.c| 8 
 2 files changed, 10 insertions(+)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5778db3aa42d..904bc07411a9 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -22,6 +22,8 @@
 #endif
 #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1))
 
+/* Initial PASID value is 0. */
+#define INIT_PASID 0
 
 struct address_space;
 struct mem_cgroup;
diff --git a/kernel/fork.c b/kernel/fork.c
index 142b23645d82..085e72d3e9eb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1007,6 +1007,13 @@ static void mm_init_owner(struct mm_struct *mm, struct 
task_struct *p)
 #endif
 }
 
+static void mm_init_pasid(struct mm_struct *mm)
+{
+#ifdef CONFIG_PCI_PASID
+   mm->pasid = INIT_PASID;
+#endif
+}
+
 static void mm_init_uprobes_state(struct mm_struct *mm)
 {
 #ifdef CONFIG_UPROBES
@@ -1035,6 +1042,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, 
struct task_struct *p,
mm_init_cpumask(mm);
mm_init_aio(mm);
mm_init_owner(mm, p);
+   mm_init_pasid(mm);
RCU_INIT_POINTER(mm->exe_file, NULL);
mmu_notifier_subscriptions_init(mm);
init_tlb_flush_pending(mm);
-- 
2.19.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 06/12] x86/fpu/xstate: Add supervisor PASID state for ENQCMD feature

2020-06-13 Thread Fenghua Yu
From: Yu-cheng Yu 

ENQCMD instruction reads PASID from IA32_PASID MSR. The MSR is stored
in the task's supervisor FPU PASID state and is context switched by
XSAVES/XRSTORS.

Signed-off-by: Yu-cheng Yu 
Co-developed-by: Fenghua Yu 
Signed-off-by: Fenghua Yu 
Reviewed-by: Tony Luck 
---
v2:
- Modify the commit message (Thomas)

 arch/x86/include/asm/fpu/types.h  | 10 ++
 arch/x86/include/asm/fpu/xstate.h |  2 +-
 arch/x86/kernel/fpu/xstate.c  |  4 
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index f098f6cab94b..00f8efd4c07d 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -114,6 +114,7 @@ enum xfeature {
XFEATURE_Hi16_ZMM,
XFEATURE_PT_UNIMPLEMENTED_SO_FAR,
XFEATURE_PKRU,
+   XFEATURE_PASID,
 
XFEATURE_MAX,
 };
@@ -128,6 +129,7 @@ enum xfeature {
 #define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM)
 #define XFEATURE_MASK_PT   (1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR)
 #define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU)
+#define XFEATURE_MASK_PASID(1 << XFEATURE_PASID)
 
 #define XFEATURE_MASK_FPSSE(XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
 #define XFEATURE_MASK_AVX512   (XFEATURE_MASK_OPMASK \
@@ -229,6 +231,14 @@ struct pkru_state {
u32 pad;
 } __packed;
 
+/*
+ * State component 10 is supervisor state used for context-switching the
+ * PASID state.
+ */
+struct ia32_pasid_state {
+   u64 pasid;
+} __packed;
+
 struct xstate_header {
u64 xfeatures;
u64 xcomp_bv;
diff --git a/arch/x86/include/asm/fpu/xstate.h 
b/arch/x86/include/asm/fpu/xstate.h
index 422d8369012a..ab9833c57aaa 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -33,7 +33,7 @@
  XFEATURE_MASK_BNDCSR)
 
 /* All currently supported supervisor features */
-#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (0)
+#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID)
 
 /*
  * Unsupported supervisor features. When a supervisor feature in this mask is
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index bda2e5eaca0e..31629e43383c 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -37,6 +37,7 @@ static const char *xfeature_names[] =
"AVX-512 ZMM_Hi256" ,
"Processor Trace (unused)"  ,
"Protection Keys User registers",
+   "PASID state",
"unknown xstate feature",
 };
 
@@ -51,6 +52,7 @@ static short xsave_cpuid_features[] __initdata = {
X86_FEATURE_AVX512F,
X86_FEATURE_INTEL_PT,
X86_FEATURE_PKU,
+   X86_FEATURE_ENQCMD,
 };
 
 /*
@@ -316,6 +318,7 @@ static void __init print_xstate_features(void)
print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
print_xstate_feature(XFEATURE_MASK_PKRU);
+   print_xstate_feature(XFEATURE_MASK_PASID);
 }
 
 /*
@@ -590,6 +593,7 @@ static void check_xstate_against_struct(int nr)
XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state);
XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM,  struct avx_512_hi16_state);
XCHECK_SZ(sz, nr, XFEATURE_PKRU,  struct pkru_state);
+   XCHECK_SZ(sz, nr, XFEATURE_PASID, struct ia32_pasid_state);
 
/*
 * Make *SURE* to add any feature numbers in below if
-- 
2.19.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 00/12] x86: tag application address space for devices

2020-06-13 Thread Fenghua Yu
Typical hardware devices require a driver stack to translate application
buffers to hardware addresses, and a kernel-user transition to notify the
hardware of new work. What if both the translation and transition overhead
could be eliminated? This is what Shared Virtual Address (SVA) and ENQCMD
enabled hardware like Data Streaming Accelerator (DSA) aims to achieve.
Applications map portals in their local-address-space and directly submit
work to them using a new instruction.

This series enables ENQCMD and associated management of the new MSR
(MSR_IA32_PASID). This new MSR allows an application address space to be
associated with what the PCIe spec calls a Process Address Space ID (PASID).
This PASID tag is carried along with all requests between applications and
devices and allows devices to interact with the process address space.

SVA and ENQCMD enabled device drivers need this series. The phase 2 DSA
patches with SVA and ENQCMD support was released on the top of this series:
https://lore.kernel.org/patchwork/cover/1244060/

This series only provides simple and basic support for ENQCMD and the MSR:
1. Clean up type definitions (patch 1-3). These patches can be in a
   separate series.
   - Define "pasid" as "unsigned int" consistently (patch 1 and 2).
   - Define "flags" as "unsigned int"
2. Explain different various technical terms used in the series (patch 4).
3. Enumerate support for ENQCMD in the processor (patch 5).
4. Handle FPU PASID state and the MSR during context switch (patches 6-7).
5. Define "pasid" in mm_struct (patch 8).
5. Clear PASID state for new mm and forked and cloned thread (patch 9-10).
6. Allocate and free PASID for a process (patch 11).
7. Fix up the PASID MSR in #GP handler when one thread in a process
   executes ENQCMD for the first time (patches 12).

This patch series and the DSA phase 2 series are in
https://github.com/intel/idxd-driver/tree/idxd-stage2

References:
1. Detailed information on the ENQCMD/ENQCMDS instructions and the
IA32_PASID MSR can be found in Intel Architecture Instruction Set
Extensions and Future Features Programming Reference:
https://software.intel.com/sites/default/files/managed/c5/15/architecture-instruction-set-extensions-programming-reference.pdf

2. Detailed information on DSA can be found in DSA specification:
https://software.intel.com/en-us/download/intel-data-streaming-accelerator-preliminary-architecture-specification

Chang log:
v2:
- Add patches 1-3 to define "pasid" and "flags" as "unsigned int"
  consistently (Thomas)
  (these 3 patches could be in a separate patch set)
- Add patch 8 to move "pasid" to generic mm_struct (Christoph).
  Jean-Philippe Brucker released a virtually same patch. Upstream only
  needs one of the two.
- Add patch 9 to initialize PASID in a new mm.
- Plus other changes described in each patch (Thomas)

Ashok Raj (1):
  docs: x86: Add documentation for SVA (Shared Virtual Addressing)

Fenghua Yu (10):
  iommu: Change type of pasid to unsigned int
  ocxl: Change type of pasid to unsigned int
  iommu/vt-d: Change flags type to unsigned int in binding mm
  x86/cpufeatures: Enumerate ENQCMD and ENQCMDS instructions
  x86/msr-index: Define IA32_PASID MSR
  mm: Define pasid in mm
  fork: Clear PASID for new mm
  x86/process: Clear PASID state for a newly forked/cloned thread
  x86/mmu: Allocate/free PASID
  x86/traps: Fix up invalid PASID

Yu-cheng Yu (1):
  x86/fpu/xstate: Add supervisor PASID state for ENQCMD feature

 Documentation/x86/index.rst|   1 +
 Documentation/x86/sva.rst  | 287 +
 arch/x86/include/asm/cpufeatures.h |   1 +
 arch/x86/include/asm/fpu/types.h   |  10 +
 arch/x86/include/asm/fpu/xstate.h  |   2 +-
 arch/x86/include/asm/iommu.h   |   3 +
 arch/x86/include/asm/mmu_context.h |  14 ++
 arch/x86/include/asm/msr-index.h   |   3 +
 arch/x86/kernel/cpu/cpuid-deps.c   |   1 +
 arch/x86/kernel/fpu/xstate.c   |   4 +
 arch/x86/kernel/process.c  |  18 ++
 arch/x86/kernel/traps.c|  23 ++
 drivers/gpu/drm/amd/amdkfd/kfd_iommu.c |   5 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h  |   2 +-
 drivers/iommu/amd/amd_iommu.h  |  13 +-
 drivers/iommu/amd/amd_iommu_types.h|  12 +-
 drivers/iommu/amd/init.c   |   4 +-
 drivers/iommu/amd/iommu.c  |  41 ++--
 drivers/iommu/amd/iommu_v2.c   |  22 +-
 drivers/iommu/intel/debugfs.c  |   2 +-
 drivers/iommu/intel/dmar.c |  13 +-
 drivers/iommu/intel/intel-pasid.h  |  21 +-
 drivers/iommu/intel/iommu.c|   4 +-
 drivers/iommu/intel/pasid.c|  36 ++--
 drivers/iommu/intel/svm.c  | 159 --
 drivers/iommu/iommu.c  |   2 +-
 drivers/misc/ocxl/config.c |   3 +-
 drivers/misc/ocxl/link.c   |   6 +-
 drivers/misc/ocxl/ocxl_internal.h  |   6 +-
 drivers/misc/ocxl/pasid.c  |   2 +-
 drivers/misc/ocxl/trace.h

[PATCH v2 12/12] x86/traps: Fix up invalid PASID

2020-06-13 Thread Fenghua Yu
A #GP fault is generated when ENQCMD instruction is executed without
a valid PASID value programmed in the current thread's PASID MSR. The
#GP fault handler will initialize the MSR if a PASID has been allocated
for this process.

Decoding the user instruction is ugly and sets a bad architecture
precedent. It may not function if the faulting instruction is modified
after #GP.

Thomas suggested to provide a reason for the #GP caused by executing ENQCMD
without a valid PASID value programmed. #GP error codes are 16 bits and all
16 bits are taken. Refer to SDM Vol 3, Chapter 16.13 for details. The other
choice was to reflect the error code in an MSR. ENQCMD can also cause #GP
when loading from the source operand, so its not fully comprehending all
the reasons. Rather than special case the ENQCMD, in future Intel may
choose a different fault mechanism for such cases if recovery is needed on
#GP.

The following heuristic is used to avoid decoding the user instructions
to determine the precise reason for the #GP fault:
1) If the mm for the process has not been allocated a PASID, this #GP
   cannot be fixed.
2) If the PASID MSR is already initialized, then the #GP was for some
   other reason
3) Try initializing the PASID MSR and returning. If the #GP was from
   an ENQCMD this will fix it. If not, the #GP fault will be repeated
   and will hit case "2".

Suggested-by: Thomas Gleixner 
Signed-off-by: Fenghua Yu 
Reviewed-by: Tony Luck 
---
v2:
- Update the first paragraph of the commit message (Thomas)
- Add reasons why don't decode the user instruction and don't use
  #GP error code (Thomas)
- Change get_task_mm() to current->mm (Thomas)
- Add comments on why IRQ is disabled during PASID fixup (Thomas)
- Add comment in fixup() that the function is called when #GP is from
  user (so mm is not NULL) (Dave Hansen)

 arch/x86/include/asm/iommu.h |  1 +
 arch/x86/kernel/traps.c  | 23 +
 drivers/iommu/intel/svm.c| 39 
 3 files changed, 63 insertions(+)

diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index ed41259fe7ac..e9365a5d6f7d 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -27,5 +27,6 @@ arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
 }
 
 void __free_pasid(struct mm_struct *mm);
+bool __fixup_pasid_exception(void);
 
 #endif /* _ASM_X86_IOMMU_H */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 4cc541051994..0f78d5cdddfe 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -59,6 +59,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_X86_64
 #include 
@@ -436,6 +437,16 @@ static enum kernel_gp_hint get_kernel_gp_address(struct 
pt_regs *regs,
return GP_CANONICAL;
 }
 
+static bool fixup_pasid_exception(void)
+{
+   if (!IS_ENABLED(CONFIG_INTEL_IOMMU_SVM))
+   return false;
+   if (!static_cpu_has(X86_FEATURE_ENQCMD))
+   return false;
+
+   return __fixup_pasid_exception();
+}
+
 #define GPFSTR "general protection fault"
 
 dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code)
@@ -447,6 +458,18 @@ dotraplinkage void do_general_protection(struct pt_regs 
*regs, long error_code)
int ret;
 
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
+
+   /*
+* Perform the check for a user mode PASID exception before enable
+* interrupts. Doing this here ensures that the PASID MSR can be simply
+* accessed because the contents are known to be still associated
+* with the current process.
+*/
+   if (user_mode(regs) && fixup_pasid_exception()) {
+   cond_local_irq_enable(regs);
+   return;
+   }
+
cond_local_irq_enable(regs);
 
if (static_cpu_has(X86_FEATURE_UMIP)) {
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 27dc866b8461..81fd2380c0f9 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -1078,3 +1078,42 @@ void __free_pasid(struct mm_struct *mm)
 */
ioasid_free(pasid);
 }
+
+/*
+ * Apply some heuristics to see if the #GP fault was caused by a thread
+ * that hasn't had the IA32_PASID MSR initialized.  If it looks like that
+ * is the problem, try initializing the IA32_PASID MSR. If the heuristic
+ * guesses incorrectly, take one more #GP fault.
+ */
+bool __fixup_pasid_exception(void)
+{
+   u64 pasid_msr;
+   unsigned int pasid;
+
+   /*
+* This function is called only when this #GP was triggered from user
+* space. So the mm cannot be NULL.
+*/
+   pasid = current->mm->pasid;
+   /* If the mm doesn't have a valid PASID, then can't help. */
+   if (invalid_pasid(pasid))
+   return false;
+
+   /*
+* Since IRQ is disabled now, the current task still owns the FPU on
+* this CPU and the PASID MSR can be 

Re: [PATCH v2 11/12] x86/mmu: Allocate/free PASID

2020-06-13 Thread Lu Baolu

Hi Fenghua,

On 2020/6/13 8:41, Fenghua Yu wrote:

A PASID is allocated for an "mm" the first time any thread attaches
to an SVM capable device. Later device attachments (whether to the same
device or another SVM device) will re-use the same PASID.

The PASID is freed when the process exits (so no need to keep
reference counts on how many SVM devices are sharing the PASID).


FYI.

Jean-Philippe Brucker has a patch for mm->pasid management in the vendor
agnostic manner.

https://www.spinics.net/lists/iommu/msg44459.html

Best regards,
baolu



Signed-off-by: Fenghua Yu 
Reviewed-by: Tony Luck 
---
v2:
- Define a helper free_bind() to simplify error exit code in bind_mm()
   (Thomas)
- Fix a ret error code in bind_mm() (Thomas)
- Change pasid's type from "int" to "unsigned int" to have consistent
   pasid type in iommu (Thomas)
- Simplify alloc_pasid() a bit.

  arch/x86/include/asm/iommu.h   |   2 +
  arch/x86/include/asm/mmu_context.h |  14 
  drivers/iommu/intel/svm.c  | 101 +
  3 files changed, 105 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index bf1ed2ddc74b..ed41259fe7ac 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -26,4 +26,6 @@ arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
return -EINVAL;
  }
  
+void __free_pasid(struct mm_struct *mm);

+
  #endif /* _ASM_X86_IOMMU_H */
diff --git a/arch/x86/include/asm/mmu_context.h 
b/arch/x86/include/asm/mmu_context.h
index 47562147e70b..f8c91ce8c451 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -13,6 +13,7 @@
  #include 
  #include 
  #include 
+#include 
  
  extern atomic64_t last_mm_ctx_id;
  
@@ -117,9 +118,22 @@ static inline int init_new_context(struct task_struct *tsk,

init_new_context_ldt(mm);
return 0;
  }
+
+static inline void free_pasid(struct mm_struct *mm)
+{
+   if (!IS_ENABLED(CONFIG_INTEL_IOMMU_SVM))
+   return;
+
+   if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
+   return;
+
+   __free_pasid(mm);
+}
+
  static inline void destroy_context(struct mm_struct *mm)
  {
destroy_context_ldt(mm);
+   free_pasid(mm);
  }
  
  extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,

diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 4e775e12ae52..27dc866b8461 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -425,6 +425,53 @@ int intel_svm_unbind_gpasid(struct device *dev, unsigned 
int pasid)
return ret;
  }
  
+static void free_bind(struct intel_svm *svm, struct intel_svm_dev *sdev,

+ bool new_pasid)
+{
+   if (new_pasid)
+   ioasid_free(svm->pasid);
+   kfree(svm);
+   kfree(sdev);
+}
+
+/*
+ * If this mm already has a PASID, use it. Otherwise allocate a new one.
+ * Let the caller know if a new PASID is allocated via 'new_pasid'.
+ */
+static int alloc_pasid(struct intel_svm *svm, struct mm_struct *mm,
+  unsigned int pasid_max, bool *new_pasid,
+  unsigned int flags)
+{
+   unsigned int pasid;
+
+   *new_pasid = false;
+
+   /*
+* Reuse the PASID if the mm already has a PASID and not a private
+* PASID is requested.
+*/
+   if (mm && mm->pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
+   /*
+* Once a PASID is allocated for this mm, the PASID
+* stays with the mm until the mm is dropped. Reuse
+* the PASID which has been already allocated for the
+* mm instead of allocating a new one.
+*/
+   ioasid_set_data(mm->pasid, svm);
+
+   return mm->pasid;
+   }
+
+   /* Allocate a new pasid. Do not use PASID 0, reserved for init PASID. */
+   pasid = ioasid_alloc(NULL, PASID_MIN, pasid_max - 1, svm);
+   if (pasid != INVALID_IOASID) {
+   /* A new pasid is allocated. */
+   *new_pasid = true;
+   }
+
+   return pasid;
+}
+
  /* Caller must hold pasid_mutex, mm reference */
  static int
  intel_svm_bind_mm(struct device *dev, unsigned int flags,
@@ -518,6 +565,8 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
init_rcu_head(&sdev->rcu);
  
  	if (!svm) {

+   bool new_pasid;
+
svm = kzalloc(sizeof(*svm), GFP_KERNEL);
if (!svm) {
ret = -ENOMEM;
@@ -529,12 +578,9 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
if (pasid_max > intel_pasid_max_id)
pasid_max = intel_pasid_max_id;
  
-		/* Do not use PASID 0, reserved for RID to PASID */

-   svm->pasid = ioasid_alloc(NULL, PASID_MIN,
- pasid_max - 1, svm);
+   svm->pasid = alloc_pasid(svm, 

Re: [PATCH v2 04/12] docs: x86: Add documentation for SVA (Shared Virtual Addressing)

2020-06-13 Thread Lu Baolu

Hi Fenghua,

On 2020/6/13 8:41, Fenghua Yu wrote:

From: Ashok Raj 

ENQCMD and Data Streaming Accelerator (DSA) and all of their associated
features are a complicated stack with lots of interconnected pieces.
This documentation provides a big picture overview for all of the
features.

Signed-off-by: Ashok Raj 
Co-developed-by: Fenghua Yu 
Signed-off-by: Fenghua Yu 
Reviewed-by: Tony Luck 
---
v2:
- Fix the doc format and add the doc in toctree (Thomas)
- Modify the doc for better description (Thomas, Tony, Dave)

  Documentation/x86/index.rst |   1 +
  Documentation/x86/sva.rst   | 287 
  2 files changed, 288 insertions(+)
  create mode 100644 Documentation/x86/sva.rst

diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index 265d9e9a093b..e5d5ff096685 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -30,3 +30,4 @@ x86-specific Documentation
 usb-legacy-support
 i386/index
 x86_64/index
+   sva
diff --git a/Documentation/x86/sva.rst b/Documentation/x86/sva.rst
new file mode 100644
index ..1e52208c7dda
--- /dev/null
+++ b/Documentation/x86/sva.rst
@@ -0,0 +1,287 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===
+Shared Virtual Addressing (SVA) with ENQCMD
+===
+
+Background
+==
+
+Shared Virtual Addressing (SVA) allows the processor and device to use the
+same virtual addresses avoiding the need for software to translate virtual
+addresses to physical addresses. SVA is what PCIe calls Shared Virtual
+Memory (SVM)
+
+In addition to the convenience of using application virtual addresses
+by the device, it also doesn't require pinning pages for DMA.
+PCIe Address Translation Services (ATS) along with Page Request Interface
+(PRI) allow devices to function much the same way as the CPU handling
+application page-faults. For more information please refer to PCIe
+specification Chapter 10: ATS Specification.
+
+Use of SVA requires IOMMU support in the platform. IOMMU also is required
+to support PCIe features ATS and PRI. ATS allows devices to cache
+translations for the virtual address. IOMMU driver uses the mmu_notifier()
+support to keep the device tlb cache and the CPU cache in sync. PRI allows
+the device to request paging the virtual address before using if they are
+not paged in the CPU page tables.
+
+
+Shared Hardware Workqueues
+==
+
+Unlike Single Root I/O Virtualization (SRIOV), Scalable IOV (SIOV) permits
+the use of Shared Work Queues (SWQ) by both applications and Virtual
+Machines (VM's). This allows better hardware utilization vs. hard
+partitioning resources that could result in under utilization. In order to
+allow the hardware to distinguish the context for which work is being
+executed in the hardware by SWQ interface, SIOV uses Process Address Space
+ID (PASID), which is a 20bit number defined by the PCIe SIG.
+
+PASID value is encoded in all transactions from the device. This allows the
+IOMMU to track I/O on a per-PASID granularity in addition to using the PCIe
+Resource Identifier (RID) which is the Bus/Device/Function.
+
+
+ENQCMD
+==
+
+ENQCMD is a new instruction on Intel platforms that atomically submits a
+work descriptor to a device. The descriptor includes the operation to be
+performed, virtual addresses of all parameters, virtual address of a completion
+record, and the PASID (process address space ID) of the current process.
+
+ENQCMD works with non-posted semantics and carries a status back if the
+command was accepted by hardware. This allows the submitter to know if the
+submission needs to be retried or other device specific mechanisms to
+implement implement fairness or ensure forward progress can be made.


Repeated "implement".


+
+ENQCMD is the glue that ensures applications can directly submit commands
+to the hardware and also permit hardware to be aware of application context
+to perform I/O operations via use of PASID.
+
+Process Address Space Tagging
+=
+
+A new thread scoped MSR (IA32_PASID) provides the connection between
+user processes and the rest of the hardware. When an application first
+accesses an SVA capable device this MSR is initialized with a newly
+allocated PASID. The driver for the device calls an IOMMU specific api
+that sets up the routing for DMA and page-requests.
+
+For example, the Intel Data Streaming Accelerator (DSA) uses
+intel_svm_bind_mm(), which will do the following.


The Intel SVM APIs have been deprecated. Drivers should use
iommu_sva_bind_device() instead. Please also update other places in
this document.


+
+- Allocate the PASID, and program the process page-table (cr3) in the PASID
+  context entries.
+- Register for mmu_notifier() to track any page-table invalidations to keep
+  the device tlb in sync. For example, when a page-table entry is invalidated,
+  IOMMU propagates the in