[PATCH intel_iommu 1/7] intel_iommu: fix FRCD construction macro.

2024-04-22 Thread CLEMENT MATHIEU--DRIF
The constant must be unsigned, otherwise the two's complement
overrides the other fields when a PASID is present

Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu_internal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index f8cf99bddf..cbc4030031 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -267,7 +267,7 @@
 /* For the low 64-bit of 128-bit */
 #define VTD_FRCD_FI(val)((val) & ~0xfffULL)
 #define VTD_FRCD_PV(val)(((val) & 0xULL) << 40)
-#define VTD_FRCD_PP(val)(((val) & 0x1) << 31)
+#define VTD_FRCD_PP(val)(((val) & 0x1ULL) << 31)
 #define VTD_FRCD_IR_IDX(val)(((val) & 0xULL) << 48)
 
 /* DMA Remapping Fault Conditions */
-- 
2.44.0


[PATCH intel_iommu 3/7] intel_iommu: make types match

2024-04-22 Thread CLEMENT MATHIEU--DRIF
The 'level' field in vtd_iotlb_key is an uint8_t.
We don't need to store level as an int in vtd_lookup_iotlb

Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 6f1364b3fd..ba545590b1 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -333,7 +333,7 @@ static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, 
uint16_t source_id,
 {
 struct vtd_iotlb_key key;
 VTDIOTLBEntry *entry;
-int level;
+uint8_t level;
 
 for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) {
 key.gfn = vtd_get_iotlb_gfn(addr, level);
-- 
2.44.0


[PATCH intel_iommu 5/7] intel_iommu: extract device IOTLB invalidation logic

2024-04-22 Thread CLEMENT MATHIEU--DRIF
This piece of code can be shared by both IOTLB invalidation and
PASID-based IOTLB invalidation

Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c | 57 +--
 1 file changed, 33 insertions(+), 24 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 3b9f120dec..aaac61bf6a 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2890,13 +2890,42 @@ static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
 return true;
 }
 
+static void do_invalidate_device_tlb(VTDAddressSpace *vtd_dev_as,
+ bool size, hwaddr addr)
+{
+/*
+ * According to ATS spec table 2.4:
+ * S = 0, bits 15:12 =  range size: 4K
+ * S = 1, bits 15:12 = xxx0 range size: 8K
+ * S = 1, bits 15:12 = xx01 range size: 16K
+ * S = 1, bits 15:12 = x011 range size: 32K
+ * S = 1, bits 15:12 = 0111 range size: 64K
+ * ...
+ */
+
+IOMMUTLBEvent event;
+uint64_t sz;
+
+if (size) {
+sz = (VTD_PAGE_SIZE * 2) << cto64(addr >> VTD_PAGE_SHIFT);
+addr &= ~(sz - 1);
+} else {
+sz = VTD_PAGE_SIZE;
+}
+
+event.type = IOMMU_NOTIFIER_DEVIOTLB_UNMAP;
+event.entry.target_as = &vtd_dev_as->as;
+event.entry.addr_mask = sz - 1;
+event.entry.iova = addr;
+event.entry.perm = IOMMU_NONE;
+event.entry.translated_addr = 0;
+memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event);
+}
 static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
   VTDInvDesc *inv_desc)
 {
 VTDAddressSpace *vtd_dev_as;
-IOMMUTLBEvent event;
 hwaddr addr;
-uint64_t sz;
 uint16_t sid;
 bool size;
 
@@ -2912,6 +2941,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState 
*s,
 return false;
 }
 
+
 /*
  * Using sid is OK since the guest should have finished the
  * initialization of both the bus and device.
@@ -2921,28 +2951,7 @@ static bool 
vtd_process_device_iotlb_desc(IntelIOMMUState *s,
 goto done;
 }
 
-/* According to ATS spec table 2.4:
- * S = 0, bits 15:12 =  range size: 4K
- * S = 1, bits 15:12 = xxx0 range size: 8K
- * S = 1, bits 15:12 = xx01 range size: 16K
- * S = 1, bits 15:12 = x011 range size: 32K
- * S = 1, bits 15:12 = 0111 range size: 64K
- * ...
- */
-if (size) {
-sz = (VTD_PAGE_SIZE * 2) << cto64(addr >> VTD_PAGE_SHIFT);
-addr &= ~(sz - 1);
-} else {
-sz = VTD_PAGE_SIZE;
-}
-
-event.type = IOMMU_NOTIFIER_DEVIOTLB_UNMAP;
-event.entry.target_as = &vtd_dev_as->as;
-event.entry.addr_mask = sz - 1;
-event.entry.iova = addr;
-event.entry.perm = IOMMU_NONE;
-event.entry.translated_addr = 0;
-memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event);
+do_invalidate_device_tlb(vtd_dev_as, size, addr);
 
 done:
 return true;
-- 
2.44.0


[PATCH intel_iommu 6/7] intel_iommu: add PASID-based IOTLB invalidation

2024-04-22 Thread CLEMENT MATHIEU--DRIF
Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c  | 130 ++---
 hw/i386/intel_iommu_internal.h |  51 +++--
 2 files changed, 150 insertions(+), 31 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index aaac61bf6a..4b54a45107 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -277,9 +277,22 @@ static gboolean vtd_hash_remove_by_page(gpointer key, 
gpointer value,
 VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
 uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask;
 uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K;
-return (entry->domain_id == info->domain_id) &&
-(((entry->gfn & info->mask) == gfn) ||
- (entry->gfn == gfn_tlb));
+return (
+(entry->domain_id == info->domain_id) &&
+(info->pasid == entry->pasid)
+) && (
+((entry->gfn & info->mask) == gfn) ||
+(entry->gfn == gfn_tlb)
+);
+}
+
+static gboolean vtd_hash_remove_by_pasid(gpointer key, gpointer value,
+gpointer user_data)
+{
+VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
+VTDIOTLBPasidEntryInvInfo *info = (VTDIOTLBPasidEntryInvInfo *)user_data;
+return ((entry->domain_id == info->domain_id) &&
+(info->pasid == entry->pasid));
 }
 
 /* Reset all the gen of VTDAddressSpace to zero and set the gen of
@@ -1287,8 +1300,10 @@ static int vtd_iova_to_pte_sl(IntelIOMMUState *s,  
VTDContextEntry *ce,
 if (ret != 0) {
 return ret;
 }
+
 *reads = (*reads) && (slpte & VTD_SL_R);
 *writes = (*writes) && (slpte & VTD_SL_W);
+
 if ((slpte & access_right_check) != access_right_check) {
 error_report_once("%s: detected slpte permission error "
   "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", "
@@ -2484,23 +2499,61 @@ static void 
vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
 }
 }
 
+static VTDIOTLBPageInvInfo vtd_build_tlb_page_inv_info(uint16_t domain_id,
+   hwaddr addr, uint8_t am,
+   uint32_t pasid)
+{
+assert(am <= VTD_MAMV);
+VTDIOTLBPageInvInfo info = {
+.domain_id = domain_id,
+.addr = addr,
+.mask = ~((1ULL << am) - 1),
+.pasid = pasid
+};
+return info;
+}
+
 static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
   hwaddr addr, uint8_t am)
 {
-VTDIOTLBPageInvInfo info;
+VTDIOTLBPageInvInfo info = vtd_build_tlb_page_inv_info(domain_id, addr,
+   am, PCI_NO_PASID);
 
 trace_vtd_inv_desc_iotlb_pages(domain_id, addr, am);
 
-assert(am <= VTD_MAMV);
-info.domain_id = domain_id;
-info.addr = addr;
-info.mask = ~((1 << am) - 1);
 vtd_iommu_lock(s);
 g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
 vtd_iommu_unlock(s);
+
 vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, PCI_NO_PASID);
 }
 
+static void vtd_pasid_based_iotlb_page_invalidate(IntelIOMMUState *s,
+  uint16_t domain_id,
+  hwaddr addr,
+  uint8_t am, uint32_t pasid)
+{
+VTDIOTLBPageInvInfo info = vtd_build_tlb_page_inv_info(domain_id, addr,
+   am, pasid);
+vtd_iommu_lock(s);
+g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
+vtd_iommu_unlock(s);
+}
+
+static void vtd_pasid_based_iotlb_invalidate(IntelIOMMUState *s,
+ uint16_t domain_id,
+ uint32_t pasid)
+{
+assert(pasid != PCI_NO_PASID);
+VTDIOTLBPasidEntryInvInfo info = {
+.domain_id = domain_id,
+.pasid = pasid
+};
+vtd_iommu_lock(s);
+g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_pasid, &info);
+vtd_iommu_unlock(s);
+}
+
 /* Flush IOTLB
  * Returns the IOTLB Actual Invalidation Granularity.
  * @val: the content of the IOTLB_REG
@@ -2759,7 +2812,7 @@ static bool vtd_get_inv_desc(IntelIOMMUState *s,
 static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
 {
 if ((inv_desc->hi & VTD_INV_DESC_WAIT_RSVD_HI) ||
-(inv_desc->lo & VTD_INV_DESC_WAIT_RSVD_LO)) {
+(inv_desc->lo & VTD_INV_DESC_WAIT_RSVD_LO(s->ecap))) {
 error_report_once("%s: invalid wait desc: hi=%"PRIx64", lo=%"PRIx64
   " (reserved nonzero)", __func__, inv_desc->hi,
   inv_desc->lo);
@@ -2785,6 +2838,11 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, 
VTDInvDesc *inv_desc)

[PATCH intel_iommu 0/7] FLTS for VT-d

2024-04-22 Thread CLEMENT MATHIEU--DRIF
This series is the first of a list that add support for SVM in the Intel IOMMU.

Here, we implement support for first-stage translation in VT-d.
The PASID-based IOTLB invalidation is also added in this series as it is a
requirement of FLTS.

The last patch introduces the 'flts' option to enable the feature from
the command line.
Once enabled, several drivers of the Linux kernel use this feature.

This work is based on the VT-d specification version 4.1 (March 2023)

Here is a link to a GitHub repository where you can find the following elements 
:
- Qemu with all the patches for SVM
- ATS
- PRI
- PASID based IOTLB invalidation
- Device IOTLB invalidations
- First-stage translations
- Requests with already translated addresses
- A demo device
- A simple driver for the demo device
- A userspace program (for testing and demonstration purposes)

https://github.com/BullSequana/Qemu-in-guest-SVM-demo

Clément Mathieu--Drif (7):
  intel_iommu: fix FRCD construction macro.
  intel_iommu: rename slpte to pte before adding FLTS
  intel_iommu: make types match
  intel_iommu: add support for first-stage translation
  intel_iommu: extract device IOTLB invalidation logic
  intel_iommu: add PASID-based IOTLB invalidation
  intel_iommu: add a CLI option to enable FLTS

 hw/i386/intel_iommu.c  | 655 ++---
 hw/i386/intel_iommu_internal.h | 114 --
 include/hw/i386/intel_iommu.h  |   3 +-
 3 files changed, 609 insertions(+), 163 deletions(-)

-- 
2.44.0


[PATCH intel_iommu 2/7] intel_iommu: rename slpte to pte before adding FLTS

2024-04-22 Thread CLEMENT MATHIEU--DRIF
Some variables struct fields and functions can be used for both
slpte and flpte. We can modify certain identifiers to make them
more generic.

- slpte in IOMMUTLBEntry becomes pte and will be used for both FL and SL
- VTD_SL_PT_LEVEL, VTD_SL_PT_PAGE_SIZE_MASK and VTD_SL_LEVEL_BITS can be
  renamed and considered as a common constants
- vtd_iova_range_check becomes vtd_iova_sl_range_check because the range
  check depends on the translation type
- vtd_do_iommu_translate now handles both FL and SL so we can rename
  slpte to pte
- VTD_SL_PT_BASE_ADDR_MASK becomes VTD_PT_BASE_ADDR_MASK because the
  address offset within a 64bits word of a Scalable-Mode PASID Table
  Entry is the same for FL and SL. As a consequence, vtd_get_slpte_addr
  is also renamed to vtd_get_pte_addr.
- vtd_is_last_slpte becomes vtd_is_last_slpte because the same bit is
  used for FL and SL.
- vtd_slpt_level_page_mask becomes vtd_pt_level_page_mask
- vtd_get_slpte becomes vtd_get_pte

Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c  | 106 -
 hw/i386/intel_iommu_internal.h |  10 ++--
 include/hw/i386/intel_iommu.h  |   2 +-
 3 files changed, 60 insertions(+), 58 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index cc8e59674e..6f1364b3fd 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -259,15 +259,15 @@ static gboolean vtd_hash_remove_by_domain(gpointer key, 
gpointer value,
 }
 
 /* The shift of an addr for a certain level of paging structure */
-static inline uint32_t vtd_slpt_level_shift(uint32_t level)
+static inline uint32_t vtd_pt_level_shift(uint32_t level)
 {
 assert(level != 0);
-return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS;
+return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_LEVEL_BITS;
 }
 
-static inline uint64_t vtd_slpt_level_page_mask(uint32_t level)
+static inline uint64_t vtd_pt_level_page_mask(uint32_t level)
 {
-return ~((1ULL << vtd_slpt_level_shift(level)) - 1);
+return ~((1ULL << vtd_pt_level_shift(level)) - 1);
 }
 
 static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
@@ -324,7 +324,7 @@ static void vtd_reset_caches(IntelIOMMUState *s)
 
 static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level)
 {
-return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K;
+return (addr & vtd_pt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K;
 }
 
 /* Must be called with IOMMU lock held */
@@ -352,7 +352,7 @@ out:
 
 /* Must be with IOMMU lock held */
 static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
- uint16_t domain_id, hwaddr addr, uint64_t slpte,
+ uint16_t domain_id, hwaddr addr, uint64_t pte,
  uint8_t access_flags, uint32_t level,
  uint32_t pasid)
 {
@@ -360,7 +360,7 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t 
source_id,
 struct vtd_iotlb_key *key = g_malloc(sizeof(*key));
 uint64_t gfn = vtd_get_iotlb_gfn(addr, level);
 
-trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id);
+trace_vtd_iotlb_page_update(source_id, addr, pte, domain_id);
 if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) {
 trace_vtd_iotlb_reset("iotlb exceeds size limit");
 vtd_reset_iotlb_locked(s);
@@ -368,9 +368,9 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t 
source_id,
 
 entry->gfn = gfn;
 entry->domain_id = domain_id;
-entry->slpte = slpte;
+entry->pte = pte;
 entry->access_flags = access_flags;
-entry->mask = vtd_slpt_level_page_mask(level);
+entry->mask = vtd_pt_level_page_mask(level);
 entry->pasid = pasid;
 
 key->gfn = gfn;
@@ -685,32 +685,32 @@ static inline dma_addr_t 
vtd_ce_get_slpt_base(VTDContextEntry *ce)
 return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR;
 }
 
-static inline uint64_t vtd_get_slpte_addr(uint64_t slpte, uint8_t aw)
+static inline uint64_t vtd_get_pte_addr(uint64_t pte, uint8_t aw)
 {
-return slpte & VTD_SL_PT_BASE_ADDR_MASK(aw);
+return pte & VTD_PT_BASE_ADDR_MASK(aw);
 }
 
 /* Whether the pte indicates the address of the page frame */
-static inline bool vtd_is_last_slpte(uint64_t slpte, uint32_t level)
+static inline bool vtd_is_last_pte(uint64_t pte, uint32_t level)
 {
-return level == VTD_SL_PT_LEVEL || (slpte & VTD_SL_PT_PAGE_SIZE_MASK);
+return level == VTD_COMMON_PT_LEVEL || (pte & VTD_PT_PAGE_SIZE_MASK);
 }
 
-/* Get the content of a spte located in @base_addr[@index] */
-static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index)
+/* Get the content of a pte located in @base_addr[@index] */
+static uint64_t vtd_get_pte(dma_addr_t base_addr, uint32_t index)
 {
-uint64_t slpte;
+uint64_t pte;
 
-assert(index < VTD_SL_PT_ENTRY_NR);
+assert(index < VTD_PT_ENTRY_NR);
 
 if (dma_memory_read(&address_space_memory,
-base_addr + index * sizeof(sl

[PATCH intel_iommu 7/7] intel_iommu: add a CLI option to enable FLTS

2024-04-22 Thread CLEMENT MATHIEU--DRIF
Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c | 6 ++
 include/hw/i386/intel_iommu.h | 1 +
 2 files changed, 7 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 4b54a45107..c35ccc3a98 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3704,6 +3704,7 @@ static Property vtd_properties[] = {
 DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
 DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
 DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
+DEFINE_PROP_BOOL("flts", IntelIOMMUState, flts, false),
 DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
 DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, 
true),
 DEFINE_PROP_END_OF_LIST(),
@@ -4413,6 +4414,11 @@ static void vtd_init(IntelIOMMUState *s)
 s->ecap |= VTD_ECAP_PASID;
 }
 
+if (s->flts) {
+s->ecap |= VTD_ECAP_FLTS;
+s->cap |= VTD_CAP_FS1GP;
+}
+
 vtd_reset_caches(s);
 
 /* Define registers with default values and bit semantics */
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index b9a01556ec..6ecc8bb8a9 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -263,6 +263,7 @@ struct IntelIOMMUState {
 bool caching_mode;  /* RO - is cap CM enabled? */
 bool scalable_mode; /* RO - is Scalable Mode supported? */
 bool snoop_control; /* RO - is SNP filed supported? */
+bool flts;  /* RO - is FS translation supported? */
 
 dma_addr_t root;/* Current root table pointer */
 bool root_scalable; /* Type of root table (scalable or not) */
-- 
2.44.0


[PATCH intel_iommu 4/7] intel_iommu: add support for first-stage translation

2024-04-22 Thread CLEMENT MATHIEU--DRIF
This translation mode will only be made available in scalable mode

Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c  | 364 -
 hw/i386/intel_iommu_internal.h |  51 -
 2 files changed, 362 insertions(+), 53 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index ba545590b1..3b9f120dec 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -713,6 +713,21 @@ static uint64_t vtd_get_pte(dma_addr_t base_addr, uint32_t 
index)
 return pte;
 }
 
+static MemTxResult vtd_set_flag_in_pte(dma_addr_t base_addr, uint32_t index,
+   uint64_t pte, uint64_t flag)
+{
+assert(index < VTD_PT_ENTRY_NR);
+if (pte & flag) {
+return MEMTX_OK;
+}
+pte |= flag;
+pte = cpu_to_le64(pte);
+return dma_memory_write(&address_space_memory,
+base_addr + index * sizeof(pte),
+&pte, sizeof(pte),
+MEMTXATTRS_UNSPECIFIED);
+}
+
 /* Given an iova and the level of paging structure, return the offset
  * of current level.
  */
@@ -730,11 +745,17 @@ static inline bool vtd_is_level_supported(IntelIOMMUState 
*s, uint32_t level)
 }
 
 /* Return true if check passed, otherwise false */
-static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu,
+static inline bool vtd_pe_type_check(IntelIOMMUState *s,
  VTDPASIDEntry *pe)
 {
+X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
+
 switch (VTD_PE_GET_TYPE(pe)) {
 case VTD_SM_PASID_ENTRY_FLT:
+if (!(s->ecap & VTD_ECAP_FLTS)) {
+return false;
+}
+break;
 case VTD_SM_PASID_ENTRY_SLT:
 case VTD_SM_PASID_ENTRY_NESTED:
 break;
@@ -784,6 +805,11 @@ static inline bool vtd_pe_present(VTDPASIDEntry *pe)
 return pe->val[0] & VTD_PASID_ENTRY_P;
 }
 
+static inline bool vtd_fl_pte_present(uint64_t pte)
+{
+return pte & VTD_FL_PTE_P;
+}
+
 static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
   uint32_t pasid,
   dma_addr_t addr,
@@ -791,7 +817,6 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState 
*s,
 {
 uint32_t index;
 dma_addr_t entry_size;
-X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
 
 index = VTD_PASID_TABLE_INDEX(pasid);
 entry_size = VTD_PASID_ENTRY_SIZE;
@@ -805,7 +830,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState 
*s,
 }
 
 /* Do translation type check */
-if (!vtd_pe_type_check(x86_iommu, pe)) {
+if (!vtd_pe_type_check(s, pe)) {
 return -VTD_FR_PASID_TABLE_INV;
 }
 
@@ -1027,6 +1052,34 @@ static inline bool 
vtd_iova_sl_range_check(IntelIOMMUState *s,
 return !(iova & ~(vtd_iova_limit(s, ce, aw, pasid) - 1));
 }
 
+/* Return true if IOVA is canonical, otherwise false. */
+static bool vtd_iova_fl_check_canonical(IntelIOMMUState *s,
+uint64_t iova, VTDContextEntry *ce,
+uint8_t aw, uint32_t pasid)
+{
+uint64_t iova_limit = vtd_iova_limit(s, ce, aw, pasid);
+uint64_t upper_bits_mask = ~(iova_limit - 1);
+uint64_t upper_bits = iova & upper_bits_mask;
+bool msb = ((iova & (iova_limit >> 1)) != 0);
+return !(
+ (!msb && (upper_bits != 0)) ||
+ (msb && (upper_bits != upper_bits_mask))
+);
+}
+
+/* Return the page table base address corresponding to the translation type. */
+static dma_addr_t vtd_pe_get_pgtbl_base(VTDPASIDEntry *pe)
+{
+uint16_t pgtt = VTD_PE_GET_TYPE(pe);
+if (pgtt == VTD_SM_PASID_ENTRY_FLT) {
+return pe->val[2] & VTD_SM_PASID_ENTRY_PTPTR;
+} else if (pgtt == VTD_SM_PASID_ENTRY_SLT) {
+return pe->val[0] & VTD_SM_PASID_ENTRY_PTPTR;
+}
+
+return 0; /* Not supported */
+}
+
 static dma_addr_t vtd_get_iova_pgtbl_base(IntelIOMMUState *s,
   VTDContextEntry *ce,
   uint32_t pasid)
@@ -1035,7 +1088,7 @@ static dma_addr_t vtd_get_iova_pgtbl_base(IntelIOMMUState 
*s,
 
 if (s->root_scalable) {
 vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
-return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR;
+return vtd_pe_get_pgtbl_base(&pe);
 }
 
 return vtd_ce_get_slpt_base(ce);
@@ -1053,6 +1106,10 @@ static dma_addr_t 
vtd_get_iova_pgtbl_base(IntelIOMMUState *s,
 static uint64_t vtd_spte_rsvd[VTD_SPTE_RSVD_LEN];
 static uint64_t vtd_spte_rsvd_large[VTD_SPTE_RSVD_LEN];
 
+#define VTD_FPTE_RSVD_LEN 5
+static uint64_t vtd_fpte_rsvd[VTD_FPTE_RSVD_LEN];
+static uint64_t vtd_fpte_rsvd_large[VTD_FPTE_RSVD_LEN];
+
 static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
 {
 uint64_t rsvd_mask;
@@ -1079,21 +1136,140 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, 
uint32_t level)
 return slpte & rsvd_

Re: [PATCH intel_iommu 3/7] intel_iommu: make types match

2024-04-22 Thread CLEMENT MATHIEU--DRIF

On 22/04/2024 19:03, Philippe Mathieu-Daudé wrote:
> On 22/4/24 17:52, CLEMENT MATHIEU--DRIF wrote:
>> The 'level' field in vtd_iotlb_key is an uint8_t.
>> We don't need to store level as an int in vtd_lookup_iotlb
>>
>> Signed-off-by: Clément Mathieu--Drif 
>> ---
>>   hw/i386/intel_iommu.c | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 6f1364b3fd..ba545590b1 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -333,7 +333,7 @@ static VTDIOTLBEntry 
>> *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
>>   {
>>   struct vtd_iotlb_key key;
>>   VTDIOTLBEntry *entry;
>> -    int level;
>> +    uint8_t level;
>
> Or simply 'unsigned' up to vtd_slpt_level_shift()?
vtd_iotlb_key.level is an uint8_t, just avoiding a warning here

Re: [PATCH intel_iommu 5/7] intel_iommu: extract device IOTLB invalidation logic

2024-04-22 Thread CLEMENT MATHIEU--DRIF

On 22/04/2024 18:59, Philippe Mathieu-Daudé wrote:
> On 22/4/24 17:52, CLEMENT MATHIEU--DRIF wrote:
>> This piece of code can be shared by both IOTLB invalidation and
>> PASID-based IOTLB invalidation
>>
>> Signed-off-by: Clément Mathieu--Drif 
>> ---
>>   hw/i386/intel_iommu.c | 57 +--
>>   1 file changed, 33 insertions(+), 24 deletions(-)
>
>
>>   static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
>>     VTDInvDesc *inv_desc)
>>   {
>>   VTDAddressSpace *vtd_dev_as;
>> -    IOMMUTLBEvent event;
>>   hwaddr addr;
>> -    uint64_t sz;
>>   uint16_t sid;
>>   bool size;
>>
>> @@ -2912,6 +2941,7 @@ static bool 
>> vtd_process_device_iotlb_desc(IntelIOMMUState *s,
>>   return false;
>>   }
>>
>> +
>
> Spurious newline ;)
Oups, sorry, it's fixed
>
> Reviewed-by: Philippe Mathieu-Daudé 
>
>>   /*
>

Re: [PATCH intel_iommu 3/7] intel_iommu: make types match

2024-04-23 Thread CLEMENT MATHIEU--DRIF

On 23/04/2024 10:19, Philippe Mathieu-Daudé wrote:
>
> On 23/4/24 07:05, CLEMENT MATHIEU--DRIF wrote:
>>
>> On 22/04/2024 19:03, Philippe Mathieu-Daudé wrote:
>>> On 22/4/24 17:52, CLEMENT MATHIEU--DRIF wrote:
>>>> The 'level' field in vtd_iotlb_key is an uint8_t.
>>>> We don't need to store level as an int in vtd_lookup_iotlb
>>>>
>>>> Signed-off-by: Clément Mathieu--Drif 
>>>> 
>>>> ---
>>>>    hw/i386/intel_iommu.c | 2 +-
>>>>    1 file changed, 1 insertion(+), 1 deletion(-)
>>>>
>>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>>> index 6f1364b3fd..ba545590b1 100644
>>>> --- a/hw/i386/intel_iommu.c
>>>> +++ b/hw/i386/intel_iommu.c
>>>> @@ -333,7 +333,7 @@ static VTDIOTLBEntry
>>>> *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
>>>>    {
>>>>    struct vtd_iotlb_key key;
>>>>    VTDIOTLBEntry *entry;
>>>> -    int level;
>>>> +    uint8_t level;
>>>
>>> Or simply 'unsigned' up to vtd_slpt_level_shift()?
>> vtd_iotlb_key.level is an uint8_t, just avoiding a warning here
>
> What warning?
A linter warning, but it's not a big deal in our case because we know 
the value is lower than 5

Re: [PATCH intel_iommu 0/7] FLTS for VT-d

2024-05-01 Thread CLEMENT MATHIEU--DRIF
Hi Zhenzhong,

I will rebase,

thanks

On 01/05/2024 14:40, Duan, Zhenzhong wrote:
> Caution: External email. Do not open attachments or click links, unless this 
> email comes from a known sender and you know the content is safe.
>
>
> Ah, this is a duplicate effort on stage-1 translation.
>
> Hi Clement,
>
> We had ever sent a rfcv1 series "intel_iommu: Enable stage-1 translation"
> for both emulated and passthrough device, link:
> https://lists.gnu.org/archive/html/qemu-devel/2024-01/msg02740.html
> which now evolves to rfcv2, link:
> https://github.com/yiliu1765/qemu/commits/zhenzhong/iommufd_nesting_rfcv2/
>
> It had addressed recent community comments, also the comments in old history 
> series:
> https://patchwork.kernel.org/project/kvm/cover/20210302203827.437645-1-yi.l@intel.com/
>
> Would you mind rebasing your remaining part, i.e., ATS, PRI emulation, etc on 
> to our rfcv2?
>
> Thanks
> Zhenzhong
>
>> -Original Message-
>> From: Cédric Le Goater 
>> Subject: Re: [PATCH intel_iommu 0/7] FLTS for VT-d
>>
>> Hello,
>>
>> Adding a few people in Cc: who are familiar with the Intel IOMMU.
>>
>> Thanks,
>>
>> C.
>>
>>
>>
>>
>> On 4/22/24 17:52, CLEMENT MATHIEU--DRIF wrote:
>>> This series is the first of a list that add support for SVM in the Intel 
>>> IOMMU.
>>>
>>> Here, we implement support for first-stage translation in VT-d.
>>> The PASID-based IOTLB invalidation is also added in this series as it is a
>>> requirement of FLTS.
>>>
>>> The last patch introduces the 'flts' option to enable the feature from
>>> the command line.
>>> Once enabled, several drivers of the Linux kernel use this feature.
>>>
>>> This work is based on the VT-d specification version 4.1 (March 2023)
>>>
>>> Here is a link to a GitHub repository where you can find the following
>> elements :
>>>   - Qemu with all the patches for SVM
>>>   - ATS
>>>   - PRI
>>>   - PASID based IOTLB invalidation
>>>   - Device IOTLB invalidations
>>>   - First-stage translations
>>>   - Requests with already translated addresses
>>>   - A demo device
>>>   - A simple driver for the demo device
>>>   - A userspace program (for testing and demonstration purposes)
>>>
>>> https://github.com/BullSequana/Qemu-in-guest-SVM-demo
>>>
>>> Clément Mathieu--Drif (7):
>>> intel_iommu: fix FRCD construction macro.
>>> intel_iommu: rename slpte to pte before adding FLTS
>>> intel_iommu: make types match
>>> intel_iommu: add support for first-stage translation
>>> intel_iommu: extract device IOTLB invalidation logic
>>> intel_iommu: add PASID-based IOTLB invalidation
>>> intel_iommu: add a CLI option to enable FLTS
>>>
>>>hw/i386/intel_iommu.c  | 655 ++--
>> -
>>>hw/i386/intel_iommu_internal.h | 114 --
>>>include/hw/i386/intel_iommu.h  |   3 +-
>>>3 files changed, 609 insertions(+), 163 deletions(-)
>>>


[PATCH ats_vtd v1 15/24] pci: add a pci-level initialization function for iommu notifiers

2024-05-02 Thread CLEMENT MATHIEU--DRIF
We add a convenient way to initialize an device-iotlb notifier.
This is meant to be used by ATS-capable devices.

pci_device_iommu_memory_region_pasid is introduces in this commit and
will be used in several other SVM-related functions exposed in
the PCI API.

Signed-off-by: Clément Mathieu--Drif 
---
 hw/pci/pci.c | 39 +++
 include/hw/pci/pci.h | 13 +
 2 files changed, 52 insertions(+)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 9ed788c95d..d10cdb3d75 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2747,6 +2747,45 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice 
*dev)
 return &address_space_memory;
 }
 
+static IOMMUMemoryRegion *pci_device_iommu_memory_region_pasid(PCIDevice *dev,
+   uint32_t pasid)
+{
+PCIBus *bus;
+PCIBus *iommu_bus;
+int devfn;
+
+/*
+ * This function is for internal use in the module,
+ * we can call it with PCI_NO_PASID
+ */
+if (!dev->is_master ||
+((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) {
+return NULL;
+}
+
+pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops &&
+iommu_bus->iommu_ops->get_memory_region_pasid) {
+return iommu_bus->iommu_ops->get_memory_region_pasid(bus,
+ iommu_bus->iommu_opaque, devfn, pasid);
+}
+return NULL;
+}
+
+bool pci_iommu_init_iotlb_notifier(PCIDevice *dev, uint32_t pasid,
+   IOMMUNotifier *n, IOMMUNotify fn)
+{
+IOMMUMemoryRegion *iommu_mr = pci_device_iommu_memory_region_pasid(dev,
+pasid);
+if (!iommu_mr) {
+return false;
+}
+iommu_notifier_init(n, fn, IOMMU_NOTIFIER_DEVIOTLB_EVENTS, 0, HWADDR_MAX,
+memory_region_iommu_attrs_to_index(iommu_mr,
+   
MEMTXATTRS_UNSPECIFIED));
+return true;
+}
+
 AddressSpace *pci_device_iommu_address_space_pasid(PCIDevice *dev,
uint32_t pasid)
 {
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 0c532c563c..1587c18cd9 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -458,6 +458,19 @@ int pci_device_set_iommu_device(PCIDevice *dev, 
HostIOMMUDevice *hiod,
 Error **errp);
 void pci_device_unset_iommu_device(PCIDevice *dev);
 
+/**
+ * pci_iommu_init_iotlb_notifier: initialize an IOMMU notifier
+ *
+ * This function is used by devices before registering an IOTLB notifier
+ *
+ * @dev: the device
+ * @pasid: the pasid of the address space to watch
+ * @n: the notifier to initialize
+ * @fn: the callback to be installed
+ */
+bool pci_iommu_init_iotlb_notifier(PCIDevice *dev, uint32_t pasid,
+   IOMMUNotifier *n, IOMMUNotify fn);
+
 /**
  * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus
  *
-- 
2.44.0


[PATCH ats_vtd v1 17/24] intel_iommu: implement the get_memory_region_pasid iommu operation

2024-05-02 Thread CLEMENT MATHIEU--DRIF
Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index e9fa48b378..a62cbf303d 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -6000,9 +6000,24 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, 
void *opaque, int devfn)
 return vtd_host_dma_iommu_pasid(bus, opaque, devfn, PCI_NO_PASID);
 }
 
+static IOMMUMemoryRegion *vtd_get_memory_region_pasid(PCIBus *bus,
+  void *opaque,
+  int devfn,
+  uint32_t pasid)
+{
+IntelIOMMUState *s = opaque;
+VTDAddressSpace *vtd_as;
+
+assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
+
+vtd_as = vtd_find_add_as(s, bus, devfn, pasid);
+return &vtd_as->iommu;
+}
+
 static PCIIOMMUOps vtd_iommu_ops = {
 .get_address_space = vtd_host_dma_iommu,
 .get_address_space_pasid = vtd_host_dma_iommu_pasid,
+.get_memory_region_pasid = vtd_get_memory_region_pasid,
 .set_iommu_device = vtd_dev_set_iommu_device,
 .unset_iommu_device = vtd_dev_unset_iommu_device,
 };
-- 
2.44.0


[PATCH ats_vtd v1 12/24] intel_iommu: add support for PASID-based device IOTLB invalidation

2024-05-02 Thread CLEMENT MATHIEU--DRIF
Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c  | 42 ++
 hw/i386/intel_iommu_internal.h | 10 
 2 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index fe97930774..e7c1a5582a 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4344,11 +4344,43 @@ static void do_invalidate_device_tlb(VTDAddressSpace 
*vtd_dev_as,
 static bool vtd_process_device_piotlb_desc(IntelIOMMUState *s,
VTDInvDesc *inv_desc)
 {
-/*
- * no need to handle it for passthru device, for emulated
- * devices with device tlb, it may be required, but for now,
- * return is enough
- */
+uint16_t sid;
+VTDAddressSpace *vtd_dev_as;
+bool size;
+bool global;
+hwaddr addr;
+uint32_t pasid;
+
+if ((inv_desc->hi & VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_HI) ||
+ (inv_desc->lo & VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_LO)) {
+error_report_once("%s: invalid pasid-based dev iotlb inv desc:"
+  "hi=%"PRIx64 "(reserved nonzero)",
+  __func__, inv_desc->hi);
+return false;
+}
+
+global = VTD_INV_DESC_PASID_DEVICE_IOTLB_GLOBAL(inv_desc->hi);
+size = VTD_INV_DESC_PASID_DEVICE_IOTLB_SIZE(inv_desc->hi);
+addr = VTD_INV_DESC_PASID_DEVICE_IOTLB_ADDR(inv_desc->hi);
+sid = VTD_INV_DESC_PASID_DEVICE_IOTLB_SID(inv_desc->lo);
+if (global) {
+QLIST_FOREACH(vtd_dev_as, &s->vtd_as_with_notifiers, next) {
+if ((vtd_dev_as->pasid != PCI_NO_PASID) &&
+(PCI_BUILD_BDF(pci_bus_num(vtd_dev_as->bus),
+   vtd_dev_as->devfn) == sid)) {
+do_invalidate_device_tlb(vtd_dev_as, size, addr);
+}
+}
+} else {
+pasid = VTD_INV_DESC_PASID_DEVICE_IOTLB_PASID(inv_desc->lo);
+vtd_dev_as = vtd_get_as_by_sid_and_pasid(s, sid, pasid);
+if (!vtd_dev_as) {
+return true;
+}
+
+do_invalidate_device_tlb(vtd_dev_as, size, addr);
+}
+
 return true;
 }
 
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index d63ff049a7..3d59e10488 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -424,6 +424,16 @@ typedef union VTDInvDesc VTDInvDesc;
 #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI 0xffeULL
 #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffe0fff8
 
+/* Mask for PASID Device IOTLB Invalidate Descriptor */
+#define VTD_INV_DESC_PASID_DEVICE_IOTLB_ADDR(val) ((val) & \
+   0xf000ULL)
+#define VTD_INV_DESC_PASID_DEVICE_IOTLB_SIZE(val) ((val >> 11) & 0x1)
+#define VTD_INV_DESC_PASID_DEVICE_IOTLB_GLOBAL(val) ((val) & 0x1)
+#define VTD_INV_DESC_PASID_DEVICE_IOTLB_SID(val) (((val) >> 16) & 0xULL)
+#define VTD_INV_DESC_PASID_DEVICE_IOTLB_PASID(val) ((val >> 32) & 0xfULL)
+#define VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_HI 0x7feULL
+#define VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_LO 0xfff0f000ULL
+
 /* Rsvd field masks for spte */
 #define VTD_SPTE_SNP 0x800ULL
 
-- 
2.44.0


[PATCH ats_vtd v1 20/24] atc: generic ATC that can be used by PCIe devices that support SVM

2024-05-02 Thread CLEMENT MATHIEU--DRIF
As the SVM-capable devices will need to cache translations, we provide
an first implementation.

This cache uses a two-level design based on hash tables.
The first level is indexed by a PASID and the second by a virtual addresse.

Signed-off-by: Clément Mathieu--Drif 
---
 tests/unit/meson.build |   1 +
 tests/unit/test-atc.c  | 502 +
 util/atc.c | 211 +
 util/atc.h | 117 ++
 util/meson.build   |   1 +
 5 files changed, 832 insertions(+)
 create mode 100644 tests/unit/test-atc.c
 create mode 100644 util/atc.c
 create mode 100644 util/atc.h

diff --git a/tests/unit/meson.build b/tests/unit/meson.build
index 228a21d03c..5c9a6fe9f4 100644
--- a/tests/unit/meson.build
+++ b/tests/unit/meson.build
@@ -52,6 +52,7 @@ tests = {
   'test-interval-tree': [],
   'test-xs-node': [qom],
   'test-virtio-dmabuf': [meson.project_source_root() / 
'hw/display/virtio-dmabuf.c'],
+  'test-atc': []
 }
 
 if have_system or have_tools
diff --git a/tests/unit/test-atc.c b/tests/unit/test-atc.c
new file mode 100644
index 00..60fa60924a
--- /dev/null
+++ b/tests/unit/test-atc.c
@@ -0,0 +1,502 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see .
+ */
+
+#include "util/atc.h"
+
+static inline bool tlb_entry_equal(IOMMUTLBEntry *e1, IOMMUTLBEntry *e2)
+{
+if (!e1 || !e2) {
+return !e1 && !e2;
+}
+return e1->iova == e2->iova &&
+e1->addr_mask == e2->addr_mask &&
+e1->pasid == e2->pasid &&
+e1->perm == e2->perm &&
+e1->target_as == e2->target_as &&
+e1->translated_addr == e2->translated_addr;
+}
+
+static void assert_lookup_equals(ATC *atc, IOMMUTLBEntry *target,
+ uint32_t pasid, hwaddr iova)
+{
+IOMMUTLBEntry *result;
+result = atc_lookup(atc, pasid, iova);
+g_assert(tlb_entry_equal(result, target));
+}
+
+static void check_creation(uint64_t page_size, uint8_t address_width,
+   uint8_t levels, uint8_t level_offset,
+   bool should_work) {
+ATC *atc = atc_new(page_size, address_width);
+if (atc) {
+if (atc->levels != levels || atc->level_offset != level_offset) {
+g_assert(false); /* ATC created but invalid configuration : fail */
+}
+atc_destroy(atc);
+g_assert(should_work);
+} else {
+g_assert(!should_work);
+}
+}
+
+static void test_creation_parameters(void)
+{
+check_creation(8, 39, 3, 9, false);
+check_creation(4095, 39, 3, 9, false);
+check_creation(4097, 39, 3, 9, false);
+check_creation(8192, 48, 0, 0, false);
+
+check_creation(4096, 38, 0, 0, false);
+check_creation(4096, 39, 3, 9, true);
+check_creation(4096, 40, 0, 0, false);
+check_creation(4096, 47, 0, 0, false);
+check_creation(4096, 48, 4, 9, true);
+check_creation(4096, 49, 0, 0, false);
+check_creation(4096, 56, 0, 0, false);
+check_creation(4096, 57, 5, 9, true);
+check_creation(4096, 58, 0, 0, false);
+
+check_creation(16384, 35, 0, 0, false);
+check_creation(16384, 36, 2, 11, true);
+check_creation(16384, 37, 0, 0, false);
+check_creation(16384, 46, 0, 0, false);
+check_creation(16384, 47, 3, 11, true);
+check_creation(16384, 48, 0, 0, false);
+check_creation(16384, 57, 0, 0, false);
+check_creation(16384, 58, 4, 11, true);
+check_creation(16384, 59, 0, 0, false);
+}
+
+static void test_single_entry(void)
+{
+IOMMUTLBEntry entry = {
+.iova = 0x123456789000ULL,
+.addr_mask = 0xfffULL,
+.pasid = 5,
+.perm = IOMMU_RW,
+.translated_addr = 0xdeadbeefULL,
+};
+
+ATC *atc = atc_new(4096, 48);
+g_assert(atc);
+
+assert_lookup_equals(atc, NULL, entry.pasid,
+ entry.iova + (entry.addr_mask / 2));
+
+atc_create_address_space_cache(atc, entry.pasid);
+g_assert(atc_update(atc, &entry) == 0);
+
+assert_lookup_equals(atc, NULL, entry.pasid + 1,
+ entry.iova + (entry.addr_mask / 2));
+assert_lookup_equals(atc, &entry, entry.pasid,
+ entry.iova + (entry.addr_mask / 2));
+
+atc_destroy(atc);
+}
+
+static void test_page_boundaries(void)
+{
+static const uint32_t pasid = 5;
+static const hwaddr page_size = 4096;

[PATCH ats_vtd v1 21/24] memory: add an API for ATS support

2024-05-02 Thread CLEMENT MATHIEU--DRIF
IOMMU have to implement iommu_ats_request_translation to support ATS.

Devices can use IOMMU_TLB_ENTRY_TRANSLATION_ERROR to check the tlb
entries returned by a translation request.

Signed-off-by: Clément Mathieu--Drif 
---
 include/exec/memory.h | 26 ++
 system/memory.c   | 20 
 2 files changed, 46 insertions(+)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index f4b33415d7..5b157b9711 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -148,6 +148,10 @@ struct IOMMUTLBEntry {
 uint32_tpasid;
 };
 
+/* Check if an IOMMU TLB entry indicates a translation error */
+#define IOMMU_TLB_ENTRY_TRANSLATION_ERROR(entry) entry)->perm) & IOMMU_RW) 
\
+== IOMMU_NONE)
+
 /*
  * Bitmap for different IOMMUNotifier capabilities. Each notifier can
  * register with one or multiple IOMMU Notifier capability bit(s).
@@ -567,6 +571,20 @@ struct IOMMUMemoryRegionClass {
  int (*iommu_set_iova_ranges)(IOMMUMemoryRegion *iommu,
   GList *iova_ranges,
   Error **errp);
+
+/**
+ * @iommu_ats_request_translation:
+ * This method must be implemented if the IOMMU has ATS enabled
+ *
+ * @see pci_ats_request_translation_pasid
+ */
+ssize_t (*iommu_ats_request_translation)(IOMMUMemoryRegion *iommu,
+ bool priv_req, bool exec_req,
+ hwaddr addr, size_t length,
+ bool no_write,
+ IOMMUTLBEntry *result,
+ size_t result_length,
+ uint32_t *err_count);
 };
 
 typedef struct RamDiscardListener RamDiscardListener;
@@ -1870,6 +1888,14 @@ void memory_region_iommu_replay(IOMMUMemoryRegion 
*iommu_mr, IOMMUNotifier *n);
 void memory_region_unregister_iommu_notifier(MemoryRegion *mr,
  IOMMUNotifier *n);
 
+ssize_t memory_region_iommu_ats_request_translation(IOMMUMemoryRegion 
*iommu_mr,
+bool priv_req, bool exec_req,
+hwaddr addr, size_t length,
+bool no_write,
+IOMMUTLBEntry *result,
+size_t result_length,
+uint32_t *err_count);
+
 /**
  * memory_region_iommu_get_attr: return an IOMMU attr if get_attr() is
  * defined on the IOMMU.
diff --git a/system/memory.c b/system/memory.c
index a229a79988..9c9418c5ee 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -2000,6 +2000,26 @@ void 
memory_region_unregister_iommu_notifier(MemoryRegion *mr,
 memory_region_update_iommu_notify_flags(iommu_mr, NULL);
 }
 
+ssize_t memory_region_iommu_ats_request_translation(IOMMUMemoryRegion 
*iommu_mr,
+bool priv_req,
+bool exec_req,
+hwaddr addr, size_t length,
+bool no_write,
+IOMMUTLBEntry *result,
+size_t result_length,
+uint32_t *err_count)
+{
+IOMMUMemoryRegionClass *imrc = 
memory_region_get_iommu_class_nocheck(iommu_mr);
+
+if (!imrc->iommu_ats_request_translation) {
+return -ENODEV;
+}
+
+return imrc->iommu_ats_request_translation(iommu_mr, priv_req, exec_req,
+   addr, length, no_write, result,
+   result_length, err_count);
+}
+
 void memory_region_notify_iommu_one(IOMMUNotifier *notifier,
 IOMMUTLBEvent *event)
 {
-- 
2.44.0


[PATCH ats_vtd v1 23/24] intel_iommu: set the address mask even when a translation fails

2024-05-02 Thread CLEMENT MATHIEU--DRIF
Implements the behavior defined in section 10.2.3.5 of PCIe spec rev 5.
This is needed by devices that support ATS.

Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 02c5f0fa4f..aac7677063 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2167,7 +2167,8 @@ static bool vtd_do_iommu_translate(VTDAddressSpace 
*vtd_as, PCIBus *bus,
 uint8_t bus_num = pci_bus_num(bus);
 VTDContextCacheEntry *cc_entry;
 uint64_t pte, page_mask;
-uint32_t level, pasid = vtd_as->pasid;
+uint32_t level = UINT32_MAX;
+uint32_t pasid = vtd_as->pasid;
 uint16_t source_id = PCI_BUILD_BDF(bus_num, devfn);
 int ret_fr;
 bool is_fpd_set = false;
@@ -2309,7 +2310,12 @@ error:
 vtd_iommu_unlock(s);
 entry->iova = 0;
 entry->translated_addr = 0;
-entry->addr_mask = 0;
+/*
+ * Set the mask for ATS (the range must be present even when the
+ * translation fails : PCIe rev 5 10.2.3.5)
+ */
+entry->addr_mask = (level != UINT32_MAX) ?
+   (~vtd_slpt_level_page_mask(level)) : 
(~VTD_PAGE_MASK_4K);
 entry->perm = IOMMU_NONE;
 entry->pasid = PCI_NO_PASID;
 return false;
-- 
2.44.0


[PATCH ats_vtd v1 13/24] pci: cache the bus mastering status in the device

2024-05-02 Thread CLEMENT MATHIEU--DRIF
Signed-off-by: Clément Mathieu--Drif 
---
 hw/pci/pci.c| 24 ++--
 include/hw/pci/pci_device.h |  1 +
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 045d69f4c1..e5f72f9f1d 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -116,6 +116,12 @@ static GSequence *pci_acpi_index_list(void)
 return used_acpi_index_list;
 }
 
+static void pci_set_master(PCIDevice *d, bool enable)
+{
+memory_region_set_enabled(&d->bus_master_enable_region, enable);
+d->is_master = enable; /* cache the status */
+}
+
 static void pci_init_bus_master(PCIDevice *pci_dev)
 {
 AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
@@ -123,7 +129,7 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
 memory_region_init_alias(&pci_dev->bus_master_enable_region,
  OBJECT(pci_dev), "bus master",
  dma_as->root, 0, 
memory_region_size(dma_as->root));
-memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
+pci_set_master(pci_dev, false);
 memory_region_add_subregion(&pci_dev->bus_master_container_region, 0,
 &pci_dev->bus_master_enable_region);
 }
@@ -657,9 +663,8 @@ static int get_pci_config_device(QEMUFile *f, void *pv, 
size_t size,
 pci_bridge_update_mappings(PCI_BRIDGE(s));
 }
 
-memory_region_set_enabled(&s->bus_master_enable_region,
-  pci_get_word(s->config + PCI_COMMAND)
-  & PCI_COMMAND_MASTER);
+pci_set_master(s,
+   pci_get_word(s->config + PCI_COMMAND) & PCI_COMMAND_MASTER);
 
 g_free(config);
 return 0;
@@ -1611,9 +1616,9 @@ void pci_default_write_config(PCIDevice *d, uint32_t 
addr, uint32_t val_in, int
 
 if (ranges_overlap(addr, l, PCI_COMMAND, 2)) {
 pci_update_irq_disabled(d, was_irq_disabled);
-memory_region_set_enabled(&d->bus_master_enable_region,
-  (pci_get_word(d->config + PCI_COMMAND)
-   & PCI_COMMAND_MASTER) && d->has_power);
+pci_set_master(d,
+  (pci_get_word(d->config + PCI_COMMAND) &
+PCI_COMMAND_MASTER) && d->has_power);
 }
 
 msi_write_config(d, addr, val_in, l);
@@ -2888,9 +2893,8 @@ void pci_set_power(PCIDevice *d, bool state)
 
 d->has_power = state;
 pci_update_mappings(d);
-memory_region_set_enabled(&d->bus_master_enable_region,
-  (pci_get_word(d->config + PCI_COMMAND)
-   & PCI_COMMAND_MASTER) && d->has_power);
+pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND)
+& PCI_COMMAND_MASTER) && d->has_power);
 if (!d->has_power) {
 pci_device_reset(d);
 }
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
index d3dd0f64b2..7fa501569a 100644
--- a/include/hw/pci/pci_device.h
+++ b/include/hw/pci/pci_device.h
@@ -87,6 +87,7 @@ struct PCIDevice {
 char name[64];
 PCIIORegion io_regions[PCI_NUM_REGIONS];
 AddressSpace bus_master_as;
+bool is_master;
 MemoryRegion bus_master_container_region;
 MemoryRegion bus_master_enable_region;
 
-- 
2.44.0


[PATCH ats_vtd v1 16/24] intel_iommu: implement the get_address_space_pasid iommu operation

2024-05-02 Thread CLEMENT MATHIEU--DRIF
Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c | 13 ++---
 include/hw/i386/intel_iommu.h |  2 +-
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index e7c1a5582a..e9fa48b378 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -5432,7 +5432,7 @@ static const MemoryRegionOps vtd_mem_ir_fault_ops = {
 };
 
 VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
- int devfn, unsigned int pasid)
+ int devfn, uint32_t pasid)
 {
 /*
  * We can't simply use sid here since the bus number might not be
@@ -5983,19 +5983,26 @@ static void vtd_reset(DeviceState *dev)
 vtd_refresh_pasid_bind(s);
 }
 
-static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
+static AddressSpace *vtd_host_dma_iommu_pasid(PCIBus *bus, void *opaque,
+  int devfn, uint32_t pasid)
 {
 IntelIOMMUState *s = opaque;
 VTDAddressSpace *vtd_as;
 
 assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
 
-vtd_as = vtd_find_add_as(s, bus, devfn, PCI_NO_PASID);
+vtd_as = vtd_find_add_as(s, bus, devfn, pasid);
 return &vtd_as->as;
 }
 
+static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
+{
+return vtd_host_dma_iommu_pasid(bus, opaque, devfn, PCI_NO_PASID);
+}
+
 static PCIIOMMUOps vtd_iommu_ops = {
 .get_address_space = vtd_host_dma_iommu,
+.get_address_space_pasid = vtd_host_dma_iommu_pasid,
 .set_iommu_device = vtd_dev_set_iommu_device,
 .unset_iommu_device = vtd_dev_unset_iommu_device,
 };
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 0d5b933159..bac40e4d40 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -325,6 +325,6 @@ struct IntelIOMMUState {
  * create a new one if none exists
  */
 VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
- int devfn, unsigned int pasid);
+ int devfn, uint32_t pasid);
 
 #endif
-- 
2.44.0


[PATCH ats_vtd v1 03/24] intel_iommu: check if the input address is canonical

2024-05-02 Thread CLEMENT MATHIEU--DRIF
First stage translation must fail if the address to translate is
not canonical.

Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c  | 22 ++
 hw/i386/intel_iommu_internal.h |  2 ++
 2 files changed, 24 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 80cdf37870..240ecb8f72 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1912,6 +1912,7 @@ static const bool vtd_qualified_faults[] = {
 [VTD_FR_PASID_ENTRY_P] = true,
 [VTD_FR_PASID_TABLE_ENTRY_INV] = true,
 [VTD_FR_SM_INTERRUPT_ADDR] = true,
+[VTD_FR_FS_NON_CANONICAL] = true,
 [VTD_FR_MAX] = false,
 };
 
@@ -2023,6 +2024,21 @@ static inline uint64_t vtd_get_flpte_addr(uint64_t 
flpte, uint8_t aw)
 return flpte & VTD_FL_PT_BASE_ADDR_MASK(aw);
 }
 
+/* Return true if IOVA is canonical, otherwise false. */
+static bool vtd_iova_fl_check_canonical(IntelIOMMUState *s,
+uint64_t iova, VTDContextEntry *ce,
+uint8_t aw, uint32_t pasid)
+{
+uint64_t iova_limit = vtd_iova_limit(s, ce, aw, pasid);
+uint64_t upper_bits_mask = ~(iova_limit - 1);
+uint64_t upper_bits = iova & upper_bits_mask;
+bool msb = ((iova & (iova_limit >> 1)) != 0);
+return !(
+ (!msb && (upper_bits != 0)) ||
+ (msb && (upper_bits != upper_bits_mask))
+);
+}
+
 /*
  * Given the @iova, get relevant @flptep. @flpte_level will be the last level
  * of the translation, can be used for deciding the size of large page.
@@ -2038,6 +2054,12 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, 
VTDContextEntry *ce,
 uint32_t offset;
 uint64_t flpte;
 
+if (!vtd_iova_fl_check_canonical(s, iova, ce, aw_bits, pasid)) {
+error_report_once("%s: detected non canonical IOVA (iova=0x%" PRIx64 
","
+  "pasid=0x%" PRIx32 ")", __func__, iova, pasid);
+return -VTD_FR_FS_NON_CANONICAL;
+}
+
 while (true) {
 offset = vtd_iova_fl_level_offset(iova, level);
 flpte = vtd_get_flpte(addr, offset);
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 901691afb9..e9448291a4 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -324,6 +324,8 @@ typedef enum VTDFaultReason {
 VTD_FR_PASID_ENTRY_P = 0x59, /* The Present(P) field of pasidt-entry is 0 
*/
 VTD_FR_PASID_TABLE_ENTRY_INV = 0x5b,  /*Invalid PASID table entry */
 
+VTD_FR_FS_NON_CANONICAL = 0x80, /* SNG.1 : Address for FS not canonical.*/
+
 /* Output address in the interrupt address range for scalable mode */
 VTD_FR_SM_INTERRUPT_ADDR = 0x87,
 VTD_FR_MAX, /* Guard */
-- 
2.44.0


[PATCH ats_vtd v1 00/24] ATS support for VT-d

2024-05-02 Thread CLEMENT MATHIEU--DRIF
This series belongs to a list of series that add SVM support for VT-d.

As a starting point, we use the series called 'intel_iommu: Enable stage-1 
translation' (rfc2) by Zhenzhong Duan and Yi Liu.

Here we focus on the implementation of ATS support in the IOMMU and on a 
PCI-level
API for ATS to be used by virtual devices.

This work is based on the VT-d specification version 4.1 (March 2023).
Here is a link to a GitHub repository where you can find the following elements 
:
- Qemu with all the patches for SVM
- ATS
- PRI
- Device IOTLB invalidations
- Requests with already translated addresses
- A demo device
- A simple driver for the demo device
- A userspace program (for testing and demonstration purposes)

https://github.com/BullSequana/Qemu-in-guest-SVM-demo

Clément Mathieu--Drif (24):
  intel_iommu: fix FRCD construction macro.
  intel_iommu: make types match
  intel_iommu: check if the input address is canonical
  intel_iommu: set accessed and dirty bits during first stage
translation
  intel_iommu: extract device IOTLB invalidation logic
  intel_iommu: do not consider wait_desc as an invalid descriptor
  memory: add permissions in IOMMUAccessFlags
  pcie: add helper to declare PASID capability for a pcie device
  pcie: helper functions to check if PASID and ATS are enabled
  intel_iommu: declare supported PASID size
  intel_iommu: add an internal API to find an address space with PASID
  intel_iommu: add support for PASID-based device IOTLB invalidation
  pci: cache the bus mastering status in the device
  pci: add IOMMU operations to get address spaces and memory regions
with PASID
  pci: add a pci-level initialization function for iommu notifiers
  intel_iommu: implement the get_address_space_pasid iommu operation
  intel_iommu: implement the get_memory_region_pasid iommu operation
  memory: Allow to store the PASID in IOMMUTLBEntry
  intel_iommu: fill the PASID field when creating an instance of
IOMMUTLBEntry
  atc: generic ATC that can be used by PCIe devices that support SVM
  memory: add an API for ATS support
  pci: add a pci-level API for ATS
  intel_iommu: set the address mask even when a translation fails
  intel_iommu: add support for ATS

 hw/i386/intel_iommu.c  | 313 
 hw/i386/intel_iommu_internal.h |  21 +-
 hw/pci/pci.c   | 127 -
 hw/pci/pcie.c  |  42 +++
 include/exec/memory.h  |  60 +++-
 include/hw/i386/intel_iommu.h  |   2 +-
 include/hw/pci/pci.h   |  99 +++
 include/hw/pci/pci_device.h|   1 +
 include/hw/pci/pcie.h  |   9 +-
 include/hw/pci/pcie_regs.h |   3 +
 system/memory.c|  20 ++
 tests/unit/meson.build |   1 +
 tests/unit/test-atc.c  | 502 +
 util/atc.c | 211 ++
 util/atc.h | 117 
 util/meson.build   |   1 +
 16 files changed, 1454 insertions(+), 75 deletions(-)
 create mode 100644 tests/unit/test-atc.c
 create mode 100644 util/atc.c
 create mode 100644 util/atc.h

-- 
2.44.0


[PATCH ats_vtd v1 24/24] intel_iommu: add support for ATS

2024-05-02 Thread CLEMENT MATHIEU--DRIF
Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c  | 75 --
 hw/i386/intel_iommu_internal.h |  1 +
 2 files changed, 73 insertions(+), 3 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index aac7677063..400b27fc95 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -5395,12 +5395,10 @@ static void 
vtd_report_ir_illegal_access(VTDAddressSpace *vtd_as,
 bool is_fpd_set = false;
 VTDContextEntry ce;
 
-assert(vtd_as->pasid != PCI_NO_PASID);
-
 /* Try out best to fetch FPD, we can't do anything more */
 if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) {
 is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
-if (!is_fpd_set && s->root_scalable) {
+if (!is_fpd_set && s->root_scalable && vtd_as->pasid != PCI_NO_PASID) {
 vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, vtd_as->pasid);
 }
 }
@@ -6025,6 +6023,75 @@ static IOMMUMemoryRegion 
*vtd_get_memory_region_pasid(PCIBus *bus,
 return &vtd_as->iommu;
 }
 
+static IOMMUTLBEntry vtd_iommu_ats_do_translate(IOMMUMemoryRegion *iommu,
+hwaddr addr,
+IOMMUAccessFlags flags,
+int iommu_idx)
+{
+IOMMUTLBEntry entry;
+VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
+
+if (vtd_is_interrupt_addr(addr)) {
+vtd_report_ir_illegal_access(vtd_as, addr, flags & IOMMU_WO);
+entry.iova = 0;
+entry.translated_addr = 0;
+entry.addr_mask = ~VTD_PAGE_MASK_4K;
+entry.perm = IOMMU_NONE;
+entry.pasid = PCI_NO_PASID;
+} else {
+entry = vtd_iommu_translate(iommu, addr, flags, iommu_idx);
+}
+return entry;
+}
+
+static ssize_t vtd_iommu_ats_request_translation(IOMMUMemoryRegion *iommu,
+ bool priv_req, bool exec_req,
+ hwaddr addr, size_t length,
+ bool no_write,
+ IOMMUTLBEntry *result,
+ size_t result_length,
+ uint32_t *err_count)
+{
+IOMMUAccessFlags flags = IOMMU_ACCESS_FLAG_FULL(true, !no_write, exec_req,
+priv_req, false, false);
+ssize_t res_index = 0;
+hwaddr target_address = addr + length;
+IOMMUTLBEntry entry;
+
+*err_count = 0;
+
+while ((addr < target_address) && (res_index < result_length)) {
+entry = vtd_iommu_ats_do_translate(iommu, addr, flags, 0);
+if (!IOMMU_TLB_ENTRY_TRANSLATION_ERROR(&entry)) { /* Translation done 
*/
+if (no_write) {
+/* The device should not use this entry for a write access */
+entry.perm &= ~IOMMU_WO;
+}
+/*
+ * 4.1.2 : Global Mapping (G) : Remapping hardware provides a value
+ * of 0 in this field
+ */
+entry.perm &= ~IOMMU_GLOBAL;
+} else {
+*err_count += 1;
+}
+result[res_index] = entry;
+res_index += 1;
+addr = (addr & (~entry.addr_mask)) + (entry.addr_mask + 1);
+}
+
+/* Buffer too small */
+if (addr < target_address) {
+return -ENOMEM;
+}
+return res_index;
+}
+
+static uint64_t vtd_get_min_page_size(IOMMUMemoryRegion *iommu)
+{
+return VTD_PAGE_SIZE;
+}
+
 static PCIIOMMUOps vtd_iommu_ops = {
 .get_address_space = vtd_host_dma_iommu,
 .get_address_space_pasid = vtd_host_dma_iommu_pasid,
@@ -6231,6 +6298,8 @@ static void 
vtd_iommu_memory_region_class_init(ObjectClass *klass,
 imrc->translate = vtd_iommu_translate;
 imrc->notify_flag_changed = vtd_iommu_notify_flag_changed;
 imrc->replay = vtd_iommu_replay;
+imrc->iommu_ats_request_translation = vtd_iommu_ats_request_translation;
+imrc->get_min_page_size = vtd_get_min_page_size;
 }
 
 static const TypeInfo vtd_iommu_memory_region_info = {
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 3d59e10488..aa4d0d5f16 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -193,6 +193,7 @@
 #define VTD_ECAP_MHMV   (15ULL << 20)
 #define VTD_ECAP_NEST   (1ULL << 26)
 #define VTD_ECAP_SRS(1ULL << 31)
+#define VTD_ECAP_NWFS   (1ULL << 33)
 #define VTD_ECAP_PSS(19ULL << 35)
 #define VTD_ECAP_PASID  (1ULL << 40)
 #define VTD_ECAP_SMTS   (1ULL << 43)
-- 
2.44.0


[PATCH ats_vtd v1 05/24] intel_iommu: extract device IOTLB invalidation logic

2024-05-02 Thread CLEMENT MATHIEU--DRIF
This piece of code can be shared by both IOTLB invalidation and
PASID-based IOTLB invalidation

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c | 57 +--
 1 file changed, 33 insertions(+), 24 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index cad70e0d05..85a7ebac67 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4295,6 +4295,38 @@ static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
 return true;
 }
 
+static void do_invalidate_device_tlb(VTDAddressSpace *vtd_dev_as,
+ bool size, hwaddr addr)
+{
+/*
+ * According to ATS spec table 2.4:
+ * S = 0, bits 15:12 =  range size: 4K
+ * S = 1, bits 15:12 = xxx0 range size: 8K
+ * S = 1, bits 15:12 = xx01 range size: 16K
+ * S = 1, bits 15:12 = x011 range size: 32K
+ * S = 1, bits 15:12 = 0111 range size: 64K
+ * ...
+ */
+
+IOMMUTLBEvent event;
+uint64_t sz;
+
+if (size) {
+sz = (VTD_PAGE_SIZE * 2) << cto64(addr >> VTD_PAGE_SHIFT);
+addr &= ~(sz - 1);
+} else {
+sz = VTD_PAGE_SIZE;
+}
+
+event.type = IOMMU_NOTIFIER_DEVIOTLB_UNMAP;
+event.entry.target_as = &vtd_dev_as->as;
+event.entry.addr_mask = sz - 1;
+event.entry.iova = addr;
+event.entry.perm = IOMMU_NONE;
+event.entry.translated_addr = 0;
+memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event);
+}
+
 static bool vtd_process_device_piotlb_desc(IntelIOMMUState *s,
VTDInvDesc *inv_desc)
 {
@@ -4310,9 +4342,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState 
*s,
   VTDInvDesc *inv_desc)
 {
 VTDAddressSpace *vtd_dev_as;
-IOMMUTLBEvent event;
 hwaddr addr;
-uint64_t sz;
 uint16_t sid;
 bool size;
 
@@ -4337,28 +4367,7 @@ static bool 
vtd_process_device_iotlb_desc(IntelIOMMUState *s,
 goto done;
 }
 
-/* According to ATS spec table 2.4:
- * S = 0, bits 15:12 =  range size: 4K
- * S = 1, bits 15:12 = xxx0 range size: 8K
- * S = 1, bits 15:12 = xx01 range size: 16K
- * S = 1, bits 15:12 = x011 range size: 32K
- * S = 1, bits 15:12 = 0111 range size: 64K
- * ...
- */
-if (size) {
-sz = (VTD_PAGE_SIZE * 2) << cto64(addr >> VTD_PAGE_SHIFT);
-addr &= ~(sz - 1);
-} else {
-sz = VTD_PAGE_SIZE;
-}
-
-event.type = IOMMU_NOTIFIER_DEVIOTLB_UNMAP;
-event.entry.target_as = &vtd_dev_as->as;
-event.entry.addr_mask = sz - 1;
-event.entry.iova = addr;
-event.entry.perm = IOMMU_NONE;
-event.entry.translated_addr = 0;
-memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event);
+do_invalidate_device_tlb(vtd_dev_as, size, addr);
 
 done:
 return true;
-- 
2.44.0


[PATCH ats_vtd v1 09/24] pcie: helper functions to check if PASID and ATS are enabled

2024-05-02 Thread CLEMENT MATHIEU--DRIF
ats_enabled and pasid_enabled check whether the capabilities are
present or not. If so, we read the configuration space to get
the status of the feature (enabled or not).

Signed-off-by: Clément Mathieu--Drif 
---
 hw/pci/pcie.c | 18 ++
 include/hw/pci/pcie.h |  3 +++
 2 files changed, 21 insertions(+)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index c8e9d4c0f7..2a638a9c3f 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -1201,3 +1201,21 @@ void pcie_pasid_init(PCIDevice *dev, uint16_t offset, 
uint8_t pasid_width,
 
 dev->exp.pasid_cap = offset;
 }
+
+bool pcie_pasid_enabled(const PCIDevice *dev)
+{
+if (!pci_is_express(dev) || !dev->exp.pasid_cap) {
+return false;
+}
+return (pci_get_word(dev->config + dev->exp.pasid_cap + PCI_PASID_CTRL) &
+PCI_PASID_CTRL_ENABLE) != 0;
+}
+
+bool pcie_ats_enabled(const PCIDevice *dev)
+{
+if (!pci_is_express(dev) || !dev->exp.ats_cap) {
+return false;
+}
+return (pci_get_word(dev->config + dev->exp.ats_cap + PCI_ATS_CTRL) &
+PCI_ATS_CTRL_ENABLE) != 0;
+}
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index c59627d556..8c222f09da 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -151,4 +151,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler 
*hotplug_dev,
 
 void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width,
  bool exec_perm, bool priv_mod);
+
+bool pcie_pasid_enabled(const PCIDevice *dev);
+bool pcie_ats_enabled(const PCIDevice *dev);
 #endif /* QEMU_PCIE_H */
-- 
2.44.0


[PATCH ats_vtd v1 01/24] intel_iommu: fix FRCD construction macro.

2024-05-02 Thread CLEMENT MATHIEU--DRIF
The constant must be unsigned, otherwise the two's complement
overrides the other fields when a PASID is present

Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu_internal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index c5efcff9fd..4f6b0154b5 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -271,7 +271,7 @@
 /* For the low 64-bit of 128-bit */
 #define VTD_FRCD_FI(val)((val) & ~0xfffULL)
 #define VTD_FRCD_PV(val)(((val) & 0xULL) << 40)
-#define VTD_FRCD_PP(val)(((val) & 0x1) << 31)
+#define VTD_FRCD_PP(val)(((val) & 0x1ULL) << 31)
 #define VTD_FRCD_IR_IDX(val)(((val) & 0xULL) << 48)
 
 /* DMA Remapping Fault Conditions */
-- 
2.44.0


[PATCH ats_vtd v1 11/24] intel_iommu: add an internal API to find an address space with PASID

2024-05-02 Thread CLEMENT MATHIEU--DRIF
This will be used to implement the device IOTLB invalidation

Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c | 39 ---
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 98c4a70fe0..fe97930774 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -65,6 +65,11 @@ struct vtd_as_key {
 uint32_t pasid;
 };
 
+struct vtd_as_raw_key {
+uint16_t sid;
+uint32_t pasid;
+};
+
 struct vtd_iotlb_key {
 uint64_t gfn;
 uint32_t pasid;
@@ -1931,29 +1936,33 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr)
 return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST;
 }
 
-static gboolean vtd_find_as_by_sid(gpointer key, gpointer value,
-   gpointer user_data)
+static gboolean vtd_find_as_by_sid_and_pasid(gpointer key, gpointer value,
+ gpointer user_data)
 {
 struct vtd_as_key *as_key = (struct vtd_as_key *)key;
-uint16_t target_sid = *(uint16_t *)user_data;
+struct vtd_as_raw_key target = *(struct vtd_as_raw_key *)user_data;
 uint16_t sid = PCI_BUILD_BDF(pci_bus_num(as_key->bus), as_key->devfn);
-return sid == target_sid;
+
+return (as_key->pasid == target.pasid) &&
+   (sid == target.sid);
 }
 
-static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid)
+static VTDAddressSpace *vtd_get_as_by_sid_and_pasid(IntelIOMMUState *s,
+uint16_t sid,
+uint32_t pasid)
 {
-uint8_t bus_num = PCI_BUS_NUM(sid);
-VTDAddressSpace *vtd_as = s->vtd_as_cache[bus_num];
-
-if (vtd_as &&
-(sid == PCI_BUILD_BDF(pci_bus_num(vtd_as->bus), vtd_as->devfn))) {
-return vtd_as;
-}
+struct vtd_as_raw_key key = {
+.sid = sid,
+.pasid = pasid
+};
 
-vtd_as = g_hash_table_find(s->vtd_address_spaces, vtd_find_as_by_sid, 
&sid);
-s->vtd_as_cache[bus_num] = vtd_as;
+return g_hash_table_find(s->vtd_address_spaces,
+ vtd_find_as_by_sid_and_pasid, &key);
+}
 
-return vtd_as;
+static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid)
+{
+return vtd_get_as_by_sid_and_pasid(s, sid, PCI_NO_PASID);
 }
 
 static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id)
-- 
2.44.0


[PATCH ats_vtd v1 14/24] pci: add IOMMU operations to get address spaces and memory regions with PASID

2024-05-02 Thread CLEMENT MATHIEU--DRIF
Signed-off-by: Clément Mathieu--Drif 
---
 hw/pci/pci.c | 20 
 include/hw/pci/pci.h | 34 ++
 2 files changed, 54 insertions(+)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index e5f72f9f1d..9ed788c95d 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2747,6 +2747,26 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice 
*dev)
 return &address_space_memory;
 }
 
+AddressSpace *pci_device_iommu_address_space_pasid(PCIDevice *dev,
+   uint32_t pasid)
+{
+PCIBus *bus;
+PCIBus *iommu_bus;
+int devfn;
+
+if (!dev->is_master || !pcie_pasid_enabled(dev) || pasid == PCI_NO_PASID) {
+return NULL;
+}
+
+pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops &&
+iommu_bus->iommu_ops->get_address_space_pasid) {
+return iommu_bus->iommu_ops->get_address_space_pasid(bus,
+iommu_bus->iommu_opaque, devfn, pasid);
+}
+return NULL;
+}
+
 int pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod,
 Error **errp)
 {
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 849e391813..0c532c563c 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -385,6 +385,38 @@ typedef struct PCIIOMMUOps {
  * @devfn: device and function number
  */
 AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn);
+/**
+ * @get_address_space_pasid: same as get_address_space but returns an
+ * address space with the requested PASID
+ *
+ * This callback is required for PASID-based operations
+ *
+ * @bus: the #PCIBus being accessed.
+ *
+ * @opaque: the data passed to pci_setup_iommu().
+ *
+ * @devfn: device and function number
+ *
+ * @pasid: the pasid associated with the requested memory region
+ */
+AddressSpace * (*get_address_space_pasid)(PCIBus *bus, void *opaque,
+  int devfn, uint32_t pasid);
+/**
+ * @get_memory_region_pasid: get the iommu memory region for a given
+ * device and pasid
+ *
+ * @bus: the #PCIBus being accessed.
+ *
+ * @opaque: the data passed to pci_setup_iommu().
+ *
+ * @devfn: device and function number
+ *
+ * @pasid: the pasid associated with the requested memory region
+ */
+IOMMUMemoryRegion * (*get_memory_region_pasid)(PCIBus *bus,
+   void *opaque,
+   int devfn,
+   uint32_t pasid);
 /**
  * @set_iommu_device: attach a HostIOMMUDevice to a vIOMMU
  *
@@ -420,6 +452,8 @@ typedef struct PCIIOMMUOps {
 } PCIIOMMUOps;
 
 AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
+AddressSpace *pci_device_iommu_address_space_pasid(PCIDevice *dev,
+   uint32_t pasid);
 int pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod,
 Error **errp);
 void pci_device_unset_iommu_device(PCIDevice *dev);
-- 
2.44.0


[PATCH ats_vtd v1 08/24] pcie: add helper to declare PASID capability for a pcie device

2024-05-02 Thread CLEMENT MATHIEU--DRIF
Signed-off-by: Clément Mathieu--Drif 
---
 hw/pci/pcie.c  | 24 
 include/hw/pci/pcie.h  |  6 +-
 include/hw/pci/pcie_regs.h |  3 +++
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 4b2f0805c6..c8e9d4c0f7 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -1177,3 +1177,27 @@ void pcie_acs_reset(PCIDevice *dev)
 pci_set_word(dev->config + dev->exp.acs_cap + PCI_ACS_CTRL, 0);
 }
 }
+
+/* PASID */
+void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width,
+ bool exec_perm, bool priv_mod)
+{
+assert(pasid_width <= PCI_EXT_CAP_PASID_MAX_WIDTH);
+static const uint16_t control_reg_rw_mask = 0x07;
+uint16_t capability_reg = pasid_width;
+
+pcie_add_capability(dev, PCI_EXT_CAP_ID_PASID, PCI_PASID_VER, offset,
+PCI_EXT_CAP_PASID_SIZEOF);
+
+capability_reg <<= PCI_EXT_CAP_PASID_SIZEOF;
+capability_reg |= exec_perm ? PCI_PASID_CAP_EXEC : 0;
+capability_reg |= priv_mod  ? PCI_PASID_CAP_PRIV : 0;
+pci_set_word(dev->config + offset + PCI_PASID_CAP, capability_reg);
+
+/* Everything is disabled by default */
+pci_set_word(dev->config + offset + PCI_PASID_CTRL, 0);
+
+pci_set_word(dev->wmask + offset + PCI_PASID_CTRL, control_reg_rw_mask);
+
+dev->exp.pasid_cap = offset;
+}
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index 11f5a91bbb..c59627d556 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -69,8 +69,9 @@ struct PCIExpressDevice {
 uint16_t aer_cap;
 PCIEAERLog aer_log;
 
-/* Offset of ATS capability in config space */
+/* Offset of ATS and PASID capabilities in config space */
 uint16_t ats_cap;
+uint16_t pasid_cap;
 
 /* ACS */
 uint16_t acs_cap;
@@ -147,4 +148,7 @@ void pcie_cap_slot_unplug_cb(HotplugHandler *hotplug_dev, 
DeviceState *dev,
  Error **errp);
 void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
  DeviceState *dev, Error **errp);
+
+void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width,
+ bool exec_perm, bool priv_mod);
 #endif /* QEMU_PCIE_H */
diff --git a/include/hw/pci/pcie_regs.h b/include/hw/pci/pcie_regs.h
index 9d3b6868dc..0a86598f80 100644
--- a/include/hw/pci/pcie_regs.h
+++ b/include/hw/pci/pcie_regs.h
@@ -86,6 +86,9 @@ typedef enum PCIExpLinkWidth {
 #define PCI_ARI_VER 1
 #define PCI_ARI_SIZEOF  8
 
+/* PASID */
+#define PCI_PASID_VER   1
+#define PCI_EXT_CAP_PASID_MAX_WIDTH 20
 /* AER */
 #define PCI_ERR_VER 2
 #define PCI_ERR_SIZEOF  0x48
-- 
2.44.0


[PATCH ats_vtd v1 06/24] intel_iommu: do not consider wait_desc as an invalid descriptor

2024-05-02 Thread CLEMENT MATHIEU--DRIF
Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 85a7ebac67..c475a354a0 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3365,6 +3365,11 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, 
VTDInvDesc *inv_desc)
 } else if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) {
 /* Interrupt flag */
 vtd_generate_completion_event(s);
+} else if (inv_desc->lo & VTD_INV_DESC_WAIT_FN) {
+/*
+ * SW = 0, IF = 0, FN = 1
+ * Nothing to do as we process the events sequentially
+ */
 } else {
 error_report_once("%s: invalid wait desc: hi=%"PRIx64", lo=%"PRIx64
   " (unknown type)", __func__, inv_desc->hi,
-- 
2.44.0


[PATCH ats_vtd v1 19/24] intel_iommu: fill the PASID field when creating an instance of IOMMUTLBEntry

2024-05-02 Thread CLEMENT MATHIEU--DRIF
Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index a62cbf303d..02c5f0fa4f 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2302,6 +2302,7 @@ out:
 entry->translated_addr = vtd_get_slpte_addr(pte, s->aw_bits) & page_mask;
 entry->addr_mask = ~page_mask;
 entry->perm = access_flags;
+entry->pasid = pasid;
 return true;
 
 error:
@@ -2310,6 +2311,7 @@ error:
 entry->translated_addr = 0;
 entry->addr_mask = 0;
 entry->perm = IOMMU_NONE;
+entry->pasid = PCI_NO_PASID;
 return false;
 }
 
@@ -4338,6 +4340,7 @@ static void do_invalidate_device_tlb(VTDAddressSpace 
*vtd_dev_as,
 event.entry.iova = addr;
 event.entry.perm = IOMMU_NONE;
 event.entry.translated_addr = 0;
+event.entry.pasid = vtd_dev_as->pasid;
 memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event);
 }
 
@@ -4914,6 +4917,7 @@ static IOMMUTLBEntry 
vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
 IOMMUTLBEntry iotlb = {
 /* We'll fill in the rest later. */
 .target_as = &address_space_memory,
+.pasid = vtd_as->pasid,
 };
 bool success;
 
@@ -4926,6 +4930,7 @@ static IOMMUTLBEntry 
vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
 iotlb.translated_addr = addr & VTD_PAGE_MASK_4K;
 iotlb.addr_mask = ~VTD_PAGE_MASK_4K;
 iotlb.perm = IOMMU_RW;
+iotlb.pasid = PCI_NO_PASID;
 success = true;
 }
 
-- 
2.44.0


[PATCH ats_vtd v1 10/24] intel_iommu: declare supported PASID size

2024-05-02 Thread CLEMENT MATHIEU--DRIF
Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c  | 2 +-
 hw/i386/intel_iommu_internal.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index c475a354a0..98c4a70fe0 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -5822,7 +5822,7 @@ static void vtd_cap_init(IntelIOMMUState *s)
 }
 
 if (s->pasid) {
-s->ecap |= VTD_ECAP_PASID;
+s->ecap |= VTD_ECAP_PASID | VTD_ECAP_PSS;
 }
 }
 
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 14879d3a58..d63ff049a7 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -193,6 +193,7 @@
 #define VTD_ECAP_MHMV   (15ULL << 20)
 #define VTD_ECAP_NEST   (1ULL << 26)
 #define VTD_ECAP_SRS(1ULL << 31)
+#define VTD_ECAP_PSS(19ULL << 35)
 #define VTD_ECAP_PASID  (1ULL << 40)
 #define VTD_ECAP_SMTS   (1ULL << 43)
 #define VTD_ECAP_SLTS   (1ULL << 46)
-- 
2.44.0


[PATCH ats_vtd v1 04/24] intel_iommu: set accessed and dirty bits during first stage translation

2024-05-02 Thread CLEMENT MATHIEU--DRIF
Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c  | 26 ++
 hw/i386/intel_iommu_internal.h |  3 +++
 2 files changed, 29 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 240ecb8f72..cad70e0d05 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1913,6 +1913,7 @@ static const bool vtd_qualified_faults[] = {
 [VTD_FR_PASID_TABLE_ENTRY_INV] = true,
 [VTD_FR_SM_INTERRUPT_ADDR] = true,
 [VTD_FR_FS_NON_CANONICAL] = true,
+[VTD_FR_FS_BIT_UPDATE_FAILED] = true,
 [VTD_FR_MAX] = false,
 };
 
@@ -2039,6 +2040,20 @@ static bool vtd_iova_fl_check_canonical(IntelIOMMUState 
*s,
 );
 }
 
+static MemTxResult vtd_set_flag_in_pte(dma_addr_t base_addr, uint32_t index,
+   uint64_t pte, uint64_t flag)
+{
+if (pte & flag) {
+return MEMTX_OK;
+}
+pte |= flag;
+pte = cpu_to_le64(pte);
+return dma_memory_write(&address_space_memory,
+base_addr + index * sizeof(pte),
+&pte, sizeof(pte),
+MEMTXATTRS_UNSPECIFIED);
+}
+
 /*
  * Given the @iova, get relevant @flptep. @flpte_level will be the last level
  * of the translation, can be used for deciding the size of large page.
@@ -2080,11 +2095,22 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, 
VTDContextEntry *ce,
 
 *reads = true;
 *writes = (*writes) && (flpte & VTD_FL_RW_MASK);
+
+if (vtd_set_flag_in_pte(addr, offset, flpte, VTD_FL_PTE_A)
+!= MEMTX_OK) {
+return -VTD_FR_FS_BIT_UPDATE_FAILED;
+}
+
 if (is_write && !(flpte & VTD_FL_RW_MASK)) {
 return -VTD_FR_WRITE;
 }
 
 if (vtd_is_last_flpte(flpte, level)) {
+if (is_write &&
+(vtd_set_flag_in_pte(addr, offset, flpte, VTD_FL_PTE_D) !=
+MEMTX_OK)) 
{
+return -VTD_FR_FS_BIT_UPDATE_FAILED;
+}
 *flptep = flpte;
 *flpte_level = level;
 return 0;
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index e9448291a4..14879d3a58 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -328,6 +328,7 @@ typedef enum VTDFaultReason {
 
 /* Output address in the interrupt address range for scalable mode */
 VTD_FR_SM_INTERRUPT_ADDR = 0x87,
+VTD_FR_FS_BIT_UPDATE_FAILED = 0x91, /* SFS.10 */
 VTD_FR_MAX, /* Guard */
 } VTDFaultReason;
 
@@ -649,6 +650,8 @@ typedef struct VTDPIOTLBInvInfo {
 /* First Level Paging Structure */
 #define VTD_FL_PT_LEVEL 1
 #define VTD_FL_PT_ENTRY_NR  512
+#define VTD_FL_PTE_A0x20
+#define VTD_FL_PTE_D0x40
 
 /* Masks for First Level Paging Entry */
 #define VTD_FL_RW_MASK  (1ULL << 1)
-- 
2.44.0


[PATCH ats_vtd v1 07/24] memory: add permissions in IOMMUAccessFlags

2024-05-02 Thread CLEMENT MATHIEU--DRIF
This will be necessary for devices implementing ATS.
We also define a new macro IOMMU_ACCESS_FLAG_FULL in addition to
IOMMU_ACCESS_FLAG to support more access flags.
IOMMU_ACCESS_FLAG is kept for convenience and backward compatibility.

Here are the flags added (defined by the PCIe 5 specification) :
- Execute Requested
- Privileged Mode Requested
- Global
- Untranslated Only

IOMMU_ACCESS_FLAG sets the additional flags to 0

Signed-off-by: Clément Mathieu--Drif 
---
 include/exec/memory.h | 33 ++---
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 8626a355b3..304504de02 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -110,22 +110,41 @@ struct MemoryRegionSection {
 
 typedef struct IOMMUTLBEntry IOMMUTLBEntry;
 
-/* See address_space_translate: bit 0 is read, bit 1 is write.  */
+/*
+ * See address_space_translate:
+ *  - bit 0 : read
+ *  - bit 1 : write
+ *  - bit 2 : exec
+ *  - bit 3 : priv
+ *  - bit 4 : global
+ *  - bit 5 : untranslated only
+ */
 typedef enum {
 IOMMU_NONE = 0,
 IOMMU_RO   = 1,
 IOMMU_WO   = 2,
 IOMMU_RW   = 3,
+IOMMU_EXEC = 4,
+IOMMU_PRIV = 8,
+IOMMU_GLOBAL = 16,
+IOMMU_UNTRANSLATED_ONLY = 32,
 } IOMMUAccessFlags;
 
-#define IOMMU_ACCESS_FLAG(r, w) (((r) ? IOMMU_RO : 0) | ((w) ? IOMMU_WO : 0))
+#define IOMMU_ACCESS_FLAG(r, w) (((r) ? IOMMU_RO : 0) | \
+((w) ? IOMMU_WO : 0))
+#define IOMMU_ACCESS_FLAG_FULL(r, w, x, p, g, uo) \
+(IOMMU_ACCESS_FLAG(r, w) | \
+((x) ? IOMMU_EXEC : 0) | \
+((p) ? IOMMU_PRIV : 0) | \
+((g) ? IOMMU_GLOBAL : 0) | \
+((uo) ? IOMMU_UNTRANSLATED_ONLY : 0))
 
 struct IOMMUTLBEntry {
-AddressSpace*target_as;
-hwaddr   iova;
-hwaddr   translated_addr;
-hwaddr   addr_mask;  /* 0xfff = 4k translation */
-IOMMUAccessFlags perm;
+AddressSpace*target_as;
+hwaddr  iova;
+hwaddr  translated_addr;
+hwaddr  addr_mask;  /* 0xfff = 4k translation */
+IOMMUAccessFlagsperm;
 };
 
 /*
-- 
2.44.0


[PATCH ats_vtd v1 02/24] intel_iommu: make types match

2024-05-02 Thread CLEMENT MATHIEU--DRIF
The 'level' field in vtd_iotlb_key is an uint8_t.
We don't need to store level as an int in vtd_lookup_iotlb (avoids a
'loosing precision' warning).

VTDIOTLBPageInvInfo.mask is used in binary operations with addresses.

Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c  | 2 +-
 hw/i386/intel_iommu_internal.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 70735e2379..80cdf37870 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -386,7 +386,7 @@ static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, 
uint16_t source_id,
 {
 struct vtd_iotlb_key key;
 VTDIOTLBEntry *entry;
-int level;
+uint8_t level;
 
 for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) {
 key.gfn = vtd_get_iotlb_gfn(addr, level);
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 4f6b0154b5..901691afb9 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -473,7 +473,7 @@ struct VTDIOTLBPageInvInfo {
 uint16_t domain_id;
 uint32_t pasid;
 uint64_t addr;
-uint8_t mask;
+uint64_t mask;
 };
 typedef struct VTDIOTLBPageInvInfo VTDIOTLBPageInvInfo;
 
-- 
2.44.0


[PATCH ats_vtd v1 22/24] pci: add a pci-level API for ATS

2024-05-02 Thread CLEMENT MATHIEU--DRIF
Devices implementing ATS can send translation requests using
pci_ats_request_translation_pasid.

The invalidation events are sent back to the device using the iommu
notifier managed with pci_register_iommu_tlb_event_notifier and
pci_unregister_iommu_tlb_event_notifier

Signed-off-by: Clément Mathieu--Drif 
---
 hw/pci/pci.c | 44 +
 include/hw/pci/pci.h | 52 
 2 files changed, 96 insertions(+)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index d10cdb3d75..fea9b57006 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2833,6 +2833,50 @@ void pci_device_unset_iommu_device(PCIDevice *dev)
 }
 }
 
+ssize_t pci_ats_request_translation_pasid(PCIDevice *dev, uint32_t pasid,
+  bool priv_req, bool exec_req, hwaddr 
addr,
+  size_t length, bool no_write,
+  IOMMUTLBEntry *result,
+  size_t result_length,
+  uint32_t *err_count)
+{
+assert(result_length);
+IOMMUMemoryRegion *iommu_mr = pci_device_iommu_memory_region_pasid(dev,
+pasid);
+if (!iommu_mr || !pcie_ats_enabled(dev)) {
+return -EPERM;
+}
+return memory_region_iommu_ats_request_translation(iommu_mr, priv_req,
+   exec_req, addr, length,
+   no_write, result,
+   result_length,
+   err_count);
+}
+
+int pci_register_iommu_tlb_event_notifier(PCIDevice *dev, uint32_t pasid,
+  IOMMUNotifier *n)
+{
+IOMMUMemoryRegion *iommu_mr = pci_device_iommu_memory_region_pasid(dev,
+pasid);
+if (!iommu_mr) {
+return -EPERM;
+}
+return memory_region_register_iommu_notifier(MEMORY_REGION(iommu_mr), n,
+ &error_fatal);
+}
+
+int pci_unregister_iommu_tlb_event_notifier(PCIDevice *dev, uint32_t pasid,
+ IOMMUNotifier *n)
+{
+IOMMUMemoryRegion *iommu_mr = pci_device_iommu_memory_region_pasid(dev,
+pasid);
+if (!iommu_mr) {
+return -EPERM;
+}
+memory_region_unregister_iommu_notifier(MEMORY_REGION(iommu_mr), n);
+return 0;
+}
+
 void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
 {
 /*
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 1587c18cd9..dc247d24bd 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -471,6 +471,58 @@ void pci_device_unset_iommu_device(PCIDevice *dev);
 bool pci_iommu_init_iotlb_notifier(PCIDevice *dev, uint32_t pasid,
IOMMUNotifier *n, IOMMUNotify fn);
 
+/**
+ * pci_ats_request_translation_pasid: perform an ATS request
+ *
+ * Return the number of translations stored in @result in case of success,
+ * a negative error code otherwise.
+ * -ENOMEM is returned when the result buffer is not large enough to store
+ * all the translations
+ *
+ * @dev: the ATS-capable PCI device
+ * @pasid: the pasid of the address space in which the translation will be made
+ * @priv_req: privileged mode bit (PASID TLP)
+ * @exec_req: execute request bit (PASID TLP)
+ * @addr: start address of the memory range to be translated
+ * @length: length of the memory range in bytes
+ * @no_write: request a read-only access translation (if supported by the 
IOMMU)
+ * @result: buffer in which the TLB entries will be stored
+ * @result_length: result buffer length
+ * @err_count: number of untranslated subregions
+ */
+ssize_t pci_ats_request_translation_pasid(PCIDevice *dev, uint32_t pasid,
+  bool priv_req, bool exec_req, hwaddr 
addr,
+  size_t length, bool no_write,
+  IOMMUTLBEntry *result,
+  size_t result_length,
+  uint32_t *err_count);
+
+/**
+ * pci_register_iommu_tlb_event_notifier: register a notifier for changes to
+ * IOMMU translation entries in a specific address space.
+ *
+ * Returns 0 on success, or a negative errno otherwise.
+ *
+ * @dev: the device that wants to get notified
+ * @pasid: the pasid of the address space to track
+ * @n: the notifier to register
+ */
+int pci_register_iommu_tlb_event_notifier(PCIDevice *dev, uint32_t pasid,
+  IOMMUNotifier *n);
+
+/**
+ * pci_unregister_iommu_tlb_event_notifier: unregister a notifier that has 

[PATCH ats_vtd v1 18/24] memory: Allow to store the PASID in IOMMUTLBEntry

2024-05-02 Thread CLEMENT MATHIEU--DRIF
This will be useful for devices that support ATS

Signed-off-by: Clément Mathieu--Drif 
---
 include/exec/memory.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 304504de02..f4b33415d7 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -145,6 +145,7 @@ struct IOMMUTLBEntry {
 hwaddr  translated_addr;
 hwaddr  addr_mask;  /* 0xfff = 4k translation */
 IOMMUAccessFlagsperm;
+uint32_tpasid;
 };
 
 /*
-- 
2.44.0


Re: [PATCH v4 19/19] intel_iommu: Check compatibility with host IOMMU capabilities

2024-05-07 Thread CLEMENT MATHIEU--DRIF
Hi Zhenzhong,

On 07/05/2024 11:20, Zhenzhong Duan wrote:

Caution: External email. Do not open attachments or click links, unless this 
email comes from a known sender and you know the content is safe.


If check fails, host device (either VFIO or VDPA device) is not
compatible with current vIOMMU config and should not be passed to
guest.

Only aw_bits is checked for now, we don't care other capabilities
before scalable modern mode is introduced.

Signed-off-by: Yi Liu 
Signed-off-by: Zhenzhong Duan 

---
 hw/i386/intel_iommu.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 747c988bc4..146fde23fc 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -35,6 +35,7 @@
 #include "sysemu/kvm.h"
 #include "sysemu/dma.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/host_iommu_device.h"
 #include "hw/i386/apic_internal.h"
 #include "kvm/kvm_i386.h"
 #include "migration/vmstate.h"
@@ -3819,6 +3820,25 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, 
PCIBus *bus,
 return vtd_dev_as;
 }

+static bool vtd_check_hdev(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hdev,
+   Error **errp)
+{
+HostIOMMUDevice *hiod = vtd_hdev->dev;

Why not passing the hiod pointer as parameter directly? Maybe you have 
something in mind for a future patch?

It would allow us to allocate the VTDHostIOMMUDevice later in 
vtd_dev_set_iommu_device.


+int ret;
+
+/* Common checks */
+ret = host_iommu_device_get_cap(hiod, HOST_IOMMU_DEVICE_CAP_AW_BITS, errp);
+if (ret < 0) {
+return false;
+}
+if (s->aw_bits > ret) {
+error_setg(errp, "aw-bits %d > host aw-bits %d", s->aw_bits, ret);
+return false;
+}
+
+return true;
+}
+
 static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
  HostIOMMUDevice *hiod, Error **errp)
 {
@@ -3848,6 +3868,12 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void 
*opaque, int devfn,
 vtd_hdev->iommu_state = s;
 vtd_hdev->dev = hiod;

+if (!vtd_check_hdev(s, vtd_hdev, errp)) {
+g_free(vtd_hdev);
+vtd_iommu_unlock(s);
+return false;
+}
+
 new_key = g_malloc(sizeof(*new_key));
 new_key->bus = bus;
 new_key->devfn = devfn;
--
2.34.1




[PATCH ats_vtd v5 00/22] ATS support for VT-d

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clement Mathieu--Drif 

This series belongs to a list of series that add SVM support for VT-d.

As a starting point, we use the series called 'intel_iommu: Enable stage-1 
translation' (rfc2) by Zhenzhong Duan and Yi Liu.

Here we focus on the implementation of ATS support in the IOMMU and on a 
PCI-level
API for ATS to be used by virtual devices.

This work is based on the VT-d specification version 4.1 (March 2023).
Here is a link to a GitHub repository where you can find the following elements 
:
- Qemu with all the patches for SVM
- ATS
- PRI
- Device IOTLB invalidations
- Requests with already translated addresses
- A demo device
- A simple driver for the demo device
- A userspace program (for testing and demonstration purposes)

https://github.com/BullSequana/Qemu-in-guest-SVM-demo

v2
- handle huge pages better by detecting the page table level at which the 
translation errors occur
- Changes after review by ZhenZhong Duan :
- Set the access bit after checking permissions
- helper for PASID and ATS : make the commit message more accurate 
('present' replaced with 'enabled')
- pcie_pasid_init: add PCI_PASID_CAP_WIDTH_SHIFT and use it instead of 
PCI_EXT_CAP_PASID_SIZEOF for shifting the pasid width when preparing the 
capability register
- pci: do not check pci_bus_bypass_iommu after calling 
pci_device_get_iommu_bus_devfn
- do not alter formatting of IOMMUTLBEntry declaration
- vtd_iova_fl_check_canonical : directly use s->aw_bits instead of aw 
for the sake of clarity

v3
- rebase on new version of Zhenzhong's flts implementation
- fix the atc lookup operation (check the mask before returning an entry)
- add a unit test for the ATC
- store a user pointer in the iommu notifiers to simplify the 
implementation of svm devices
Changes after review by Zhenzhong :
- store the input pasid instead of rid2pasid when returning an entry 
after a translation
- split the ATC implementation and its unit tests

v4
Changes after internal review
- Fix the nowrite optimization, an ATS translation without the nowrite 
flag should not fail when the write permission is not set

v5
Changes after review by Philippe :
- change the type of 'level' to unsigned in vtd_lookup_iotlb

Clément Mathieu--Drif (22):
  intel_iommu: fix FRCD construction macro.
  intel_iommu: make types match
  intel_iommu: return page walk level even when the translation fails
  intel_iommu: do not consider wait_desc as an invalid descriptor
  memory: add permissions in IOMMUAccessFlags
  pcie: add helper to declare PASID capability for a pcie device
  pcie: helper functions to check if PASID and ATS are enabled
  intel_iommu: declare supported PASID size
  pci: cache the bus mastering status in the device
  pci: add IOMMU operations to get address spaces and memory regions
with PASID
  memory: store user data pointer in the IOMMU notifiers
  pci: add a pci-level initialization function for iommu notifiers
  intel_iommu: implement the get_address_space_pasid iommu operation
  intel_iommu: implement the get_memory_region_pasid iommu operation
  memory: Allow to store the PASID in IOMMUTLBEntry
  intel_iommu: fill the PASID field when creating an instance of
IOMMUTLBEntry
  atc: generic ATC that can be used by PCIe devices that support SVM
  atc: add unit tests
  memory: add an API for ATS support
  pci: add a pci-level API for ATS
  intel_iommu: set the address mask even when a translation fails
  intel_iommu: add support for ATS

 hw/i386/intel_iommu.c | 146 +-
 hw/i386/intel_iommu_internal.h|   6 +-
 hw/pci/pci.c  | 127 +-
 hw/pci/pcie.c |  42 ++
 include/exec/memory.h |  51 ++-
 include/hw/i386/intel_iommu.h |   2 +-
 include/hw/pci/pci.h  | 101 +
 include/hw/pci/pci_device.h   |   1 +
 include/hw/pci/pcie.h |   9 +-
 include/hw/pci/pcie_regs.h|   3 +
 include/standard-headers/linux/pci_regs.h |   1 +
 system/memory.c   |  20 +
 tests/unit/meson.build|   1 +
 tests/unit/test-atc.c | 527 ++
 util/atc.c| 211 +
 util/atc.h| 117 +
 util/meson.build  |   1 +
 17 files changed, 1332 insertions(+), 34 deletions(-)
 create mode 100644 tests/unit/test-atc.c
 create mode 100644 util/atc.c
 create mode 100644 util/atc.h

-- 
2.45.2

[PATCH ats_vtd v5 03/22] intel_iommu: return page walk level even when the translation fails

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

We use this information in vtd_do_iommu_translate to populate the
IOMMUTLBEntry and indicate the correct page mask. This prevents ATS
devices from sending many useless translation requests when a megapage
or gigapage iova is not mapped to a physical address.

Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index c6474ae735..98996ededc 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2096,9 +2096,9 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, 
VTDContextEntry *ce,
  uint32_t pasid)
 {
 dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid);
-uint32_t level = vtd_get_iova_level(s, ce, pasid);
 uint32_t offset;
 uint64_t flpte;
+*flpte_level = vtd_get_iova_level(s, ce, pasid);
 
 if (!vtd_iova_fl_check_canonical(s, iova, ce, pasid)) {
 error_report_once("%s: detected non canonical IOVA (iova=0x%" PRIx64 
","
@@ -2107,11 +2107,11 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, 
VTDContextEntry *ce,
 }
 
 while (true) {
-offset = vtd_iova_level_offset(iova, level);
+offset = vtd_iova_level_offset(iova, *flpte_level);
 flpte = vtd_get_pte(addr, offset);
 
 if (flpte == (uint64_t)-1) {
-if (level == vtd_get_iova_level(s, ce, pasid)) {
+if (*flpte_level == vtd_get_iova_level(s, ce, pasid)) {
 /* Invalid programming of context-entry */
 return -VTD_FR_CONTEXT_ENTRY_INV;
 } else {
@@ -2128,11 +2128,11 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, 
VTDContextEntry *ce,
 if (is_write && !(flpte & VTD_FL_RW_MASK)) {
 return -VTD_FR_WRITE;
 }
-if (vtd_flpte_nonzero_rsvd(flpte, level)) {
+if (vtd_flpte_nonzero_rsvd(flpte, *flpte_level)) {
 error_report_once("%s: detected flpte reserved non-zero "
   "iova=0x%" PRIx64 ", level=0x%" PRIx32
   "flpte=0x%" PRIx64 ", pasid=0x%" PRIX32 ")",
-  __func__, iova, level, flpte, pasid);
+  __func__, iova, *flpte_level, flpte, pasid);
 return -VTD_FR_PAGING_ENTRY_RSVD;
 }
 
@@ -2140,19 +2140,18 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, 
VTDContextEntry *ce,
 return -VTD_FR_FS_BIT_UPDATE_FAILED;
 }
 
-if (vtd_is_last_pte(flpte, level)) {
+if (vtd_is_last_pte(flpte, *flpte_level)) {
 if (is_write &&
 (vtd_set_flag_in_pte(addr, offset, flpte, VTD_FL_D) !=
 MEMTX_OK)) 
{
 return -VTD_FR_FS_BIT_UPDATE_FAILED;
 }
 *flptep = flpte;
-*flpte_level = level;
 return 0;
 }
 
 addr = vtd_get_pte_addr(flpte, aw_bits);
-level--;
+(*flpte_level)--;
 }
 }
 
-- 
2.45.2

[PATCH ats_vtd v5 16/22] intel_iommu: fill the PASID field when creating an instance of IOMMUTLBEntry

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 2e4f535dd1..f77972130f 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2210,6 +2210,9 @@ static bool vtd_do_iommu_translate(VTDAddressSpace 
*vtd_as, PCIBus *bus,
 
 vtd_iommu_lock(s);
 
+/* fill the pasid before getting rid2pasid */
+entry->pasid = pasid;
+
 cc_entry = &vtd_as->context_cache_entry;
 
 /* Try to fetch pte form IOTLB, we don't need RID2PASID logic */
@@ -2328,6 +2331,7 @@ out:
 entry->translated_addr = vtd_get_pte_addr(pte, s->aw_bits) & page_mask;
 entry->addr_mask = ~page_mask;
 entry->perm = access_flags;
+/* pasid already set */
 return true;
 
 error:
@@ -2336,6 +2340,7 @@ error:
 entry->translated_addr = 0;
 entry->addr_mask = 0;
 entry->perm = IOMMU_NONE;
+entry->pasid = PCI_NO_PASID;
 return false;
 }
 
@@ -3697,6 +3702,7 @@ static void vtd_piotlb_page_invalidate(IntelIOMMUState 
*s, uint16_t domain_id,
 event.entry.target_as = &address_space_memory;
 event.entry.iova = addr;
 event.entry.perm = IOMMU_NONE;
+event.entry.pasid = pasid;
 event.entry.addr_mask = size - 1;
 event.entry.translated_addr = 0;
 memory_region_notify_iommu(&vtd_as->iommu, 0, event);
@@ -4344,6 +4350,7 @@ static void do_invalidate_device_tlb(VTDAddressSpace 
*vtd_dev_as,
 event.entry.iova = addr;
 event.entry.perm = IOMMU_NONE;
 event.entry.translated_addr = 0;
+event.entry.pasid = vtd_dev_as->pasid;
 memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event);
 }
 
@@ -4920,6 +4927,7 @@ static IOMMUTLBEntry 
vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
 IOMMUTLBEntry iotlb = {
 /* We'll fill in the rest later. */
 .target_as = &address_space_memory,
+.pasid = vtd_as->pasid,
 };
 bool success;
 
@@ -4932,6 +4940,7 @@ static IOMMUTLBEntry 
vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
 iotlb.translated_addr = addr & VTD_PAGE_MASK_4K;
 iotlb.addr_mask = ~VTD_PAGE_MASK_4K;
 iotlb.perm = IOMMU_RW;
+iotlb.pasid = PCI_NO_PASID;
 success = true;
 }
 
-- 
2.45.2

[PATCH ats_vtd v5 22/22] intel_iommu: add support for ATS

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c  | 75 --
 hw/i386/intel_iommu_internal.h |  1 +
 2 files changed, 73 insertions(+), 3 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 9a1bce9ae2..191d7cf0a9 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -5405,12 +5405,10 @@ static void 
vtd_report_ir_illegal_access(VTDAddressSpace *vtd_as,
 bool is_fpd_set = false;
 VTDContextEntry ce;
 
-assert(vtd_as->pasid != PCI_NO_PASID);
-
 /* Try out best to fetch FPD, we can't do anything more */
 if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) {
 is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
-if (!is_fpd_set && s->root_scalable) {
+if (!is_fpd_set && s->root_scalable && vtd_as->pasid != PCI_NO_PASID) {
 vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, vtd_as->pasid);
 }
 }
@@ -6041,6 +6039,75 @@ static IOMMUMemoryRegion 
*vtd_get_memory_region_pasid(PCIBus *bus,
 return &vtd_as->iommu;
 }
 
+static IOMMUTLBEntry vtd_iommu_ats_do_translate(IOMMUMemoryRegion *iommu,
+hwaddr addr,
+IOMMUAccessFlags flags,
+int iommu_idx)
+{
+IOMMUTLBEntry entry;
+VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
+
+if (vtd_is_interrupt_addr(addr)) {
+vtd_report_ir_illegal_access(vtd_as, addr, flags & IOMMU_WO);
+entry.iova = 0;
+entry.translated_addr = 0;
+entry.addr_mask = ~VTD_PAGE_MASK_4K;
+entry.perm = IOMMU_NONE;
+entry.pasid = PCI_NO_PASID;
+} else {
+entry = vtd_iommu_translate(iommu, addr, flags, iommu_idx);
+}
+return entry;
+}
+
+static ssize_t vtd_iommu_ats_request_translation(IOMMUMemoryRegion *iommu,
+ bool priv_req, bool exec_req,
+ hwaddr addr, size_t length,
+ bool no_write,
+ IOMMUTLBEntry *result,
+ size_t result_length,
+ uint32_t *err_count)
+{
+IOMMUAccessFlags flags = IOMMU_ACCESS_FLAG_FULL(true, !no_write, exec_req,
+priv_req, false, false);
+ssize_t res_index = 0;
+hwaddr target_address = addr + length;
+IOMMUTLBEntry entry;
+
+*err_count = 0;
+
+while ((addr < target_address) && (res_index < result_length)) {
+entry = vtd_iommu_ats_do_translate(iommu, addr, flags, 0);
+if (!IOMMU_TLB_ENTRY_TRANSLATION_ERROR(&entry)) { /* Translation done 
*/
+if (no_write) {
+/* The device should not use this entry for a write access */
+entry.perm &= ~IOMMU_WO;
+}
+/*
+ * 4.1.2 : Global Mapping (G) : Remapping hardware provides a value
+ * of 0 in this field
+ */
+entry.perm &= ~IOMMU_GLOBAL;
+} else {
+*err_count += 1;
+}
+result[res_index] = entry;
+res_index += 1;
+addr = (addr & (~entry.addr_mask)) + (entry.addr_mask + 1);
+}
+
+/* Buffer too small */
+if (addr < target_address) {
+return -ENOMEM;
+}
+return res_index;
+}
+
+static uint64_t vtd_get_min_page_size(IOMMUMemoryRegion *iommu)
+{
+return VTD_PAGE_SIZE;
+}
+
 static PCIIOMMUOps vtd_iommu_ops = {
 .get_address_space = vtd_host_dma_iommu,
 .get_address_space_pasid = vtd_host_dma_iommu_pasid,
@@ -6246,6 +6313,8 @@ static void 
vtd_iommu_memory_region_class_init(ObjectClass *klass,
 imrc->translate = vtd_iommu_translate;
 imrc->notify_flag_changed = vtd_iommu_notify_flag_changed;
 imrc->replay = vtd_iommu_replay;
+imrc->iommu_ats_request_translation = vtd_iommu_ats_request_translation;
+imrc->get_min_page_size = vtd_get_min_page_size;
 }
 
 static const TypeInfo vtd_iommu_memory_region_info = {
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 117dc96d22..d4831522ed 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -194,6 +194,7 @@
 #define VTD_ECAP_MHMV   (15ULL << 20)
 #define VTD_ECAP_NEST   (1ULL << 26)
 #define VTD_ECAP_SRS(1ULL << 31)
+#define VTD_ECAP_NWFS   (1ULL << 33)
 #define VTD_ECAP_PSS(19ULL << 35)
 #define VTD_ECAP_PASID  (1ULL << 40)
 #define VTD_ECAP_SMTS   (1ULL << 43)
-- 
2.45.2

[PATCH ats_vtd v5 01/22] intel_iommu: fix FRCD construction macro.

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

The constant must be unsigned, otherwise the two's complement
overrides the other fields when a PASID is present

Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu_internal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index e8396575eb..b19f14ef63 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -272,7 +272,7 @@
 /* For the low 64-bit of 128-bit */
 #define VTD_FRCD_FI(val)((val) & ~0xfffULL)
 #define VTD_FRCD_PV(val)(((val) & 0xULL) << 40)
-#define VTD_FRCD_PP(val)(((val) & 0x1) << 31)
+#define VTD_FRCD_PP(val)(((val) & 0x1ULL) << 31)
 #define VTD_FRCD_IR_IDX(val)(((val) & 0xULL) << 48)
 
 /* DMA Remapping Fault Conditions */
-- 
2.45.2

[PATCH ats_vtd v5 02/22] intel_iommu: make types match

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

The 'level' field in vtd_iotlb_key is an unsigned integer.
We don't need to store level as an int in vtd_lookup_iotlb.

VTDIOTLBPageInvInfo.mask is used in binary operations with addresses.

Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c  | 2 +-
 hw/i386/intel_iommu_internal.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index c3c0ecca71..c6474ae735 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -417,7 +417,7 @@ static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, 
uint16_t source_id,
 {
 struct vtd_iotlb_key key;
 VTDIOTLBEntry *entry;
-int level;
+unsigned level;
 
 for (level = VTD_PT_LEVEL; level < VTD_PML4_LEVEL; level++) {
 key.gfn = vtd_get_iotlb_gfn(addr, level);
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index b19f14ef63..bd20746318 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -506,7 +506,7 @@ struct VTDIOTLBPageInvInfo {
 uint16_t domain_id;
 uint32_t pasid;
 uint64_t addr;
-uint8_t mask;
+uint64_t mask;
 };
 typedef struct VTDIOTLBPageInvInfo VTDIOTLBPageInvInfo;
 
-- 
2.45.2

[PATCH ats_vtd v5 10/22] pci: add IOMMU operations to get address spaces and memory regions with PASID

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

Signed-off-by: Clément Mathieu--Drif 
---
 hw/pci/pci.c | 19 +++
 include/hw/pci/pci.h | 34 ++
 2 files changed, 53 insertions(+)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 51feede3cf..3fe47d4002 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2747,6 +2747,25 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice 
*dev)
 return &address_space_memory;
 }
 
+AddressSpace *pci_device_iommu_address_space_pasid(PCIDevice *dev,
+   uint32_t pasid)
+{
+PCIBus *bus;
+PCIBus *iommu_bus;
+int devfn;
+
+if (!dev->is_master || !pcie_pasid_enabled(dev) || pasid == PCI_NO_PASID) {
+return NULL;
+}
+
+pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+if (iommu_bus && iommu_bus->iommu_ops->get_address_space_pasid) {
+return iommu_bus->iommu_ops->get_address_space_pasid(bus,
+iommu_bus->iommu_opaque, devfn, pasid);
+}
+return NULL;
+}
+
 bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod,
  Error **errp)
 {
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index eb26cac810..ad7bd2ade5 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -385,6 +385,38 @@ typedef struct PCIIOMMUOps {
  * @devfn: device and function number
  */
 AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn);
+/**
+ * @get_address_space_pasid: same as get_address_space but returns an
+ * address space with the requested PASID
+ *
+ * This callback is required for PASID-based operations
+ *
+ * @bus: the #PCIBus being accessed.
+ *
+ * @opaque: the data passed to pci_setup_iommu().
+ *
+ * @devfn: device and function number
+ *
+ * @pasid: the pasid associated with the requested memory region
+ */
+AddressSpace * (*get_address_space_pasid)(PCIBus *bus, void *opaque,
+  int devfn, uint32_t pasid);
+/**
+ * @get_memory_region_pasid: get the iommu memory region for a given
+ * device and pasid
+ *
+ * @bus: the #PCIBus being accessed.
+ *
+ * @opaque: the data passed to pci_setup_iommu().
+ *
+ * @devfn: device and function number
+ *
+ * @pasid: the pasid associated with the requested memory region
+ */
+IOMMUMemoryRegion * (*get_memory_region_pasid)(PCIBus *bus,
+   void *opaque,
+   int devfn,
+   uint32_t pasid);
 /**
  * @set_iommu_device: attach a HostIOMMUDevice to a vIOMMU
  *
@@ -420,6 +452,8 @@ typedef struct PCIIOMMUOps {
 } PCIIOMMUOps;
 
 AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
+AddressSpace *pci_device_iommu_address_space_pasid(PCIDevice *dev,
+   uint32_t pasid);
 bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod,
  Error **errp);
 void pci_device_unset_iommu_device(PCIDevice *dev);
-- 
2.45.2

[PATCH ats_vtd v5 05/22] memory: add permissions in IOMMUAccessFlags

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

This will be necessary for devices implementing ATS.
We also define a new macro IOMMU_ACCESS_FLAG_FULL in addition to
IOMMU_ACCESS_FLAG to support more access flags.
IOMMU_ACCESS_FLAG is kept for convenience and backward compatibility.

Here are the flags added (defined by the PCIe 5 specification) :
- Execute Requested
- Privileged Mode Requested
- Global
- Untranslated Only

IOMMU_ACCESS_FLAG sets the additional flags to 0

Signed-off-by: Clément Mathieu--Drif 
---
 include/exec/memory.h | 23 +--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 1be58f694c..aa8e114e77 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -110,15 +110,34 @@ struct MemoryRegionSection {
 
 typedef struct IOMMUTLBEntry IOMMUTLBEntry;
 
-/* See address_space_translate: bit 0 is read, bit 1 is write.  */
+/*
+ * See address_space_translate:
+ *  - bit 0 : read
+ *  - bit 1 : write
+ *  - bit 2 : exec
+ *  - bit 3 : priv
+ *  - bit 4 : global
+ *  - bit 5 : untranslated only
+ */
 typedef enum {
 IOMMU_NONE = 0,
 IOMMU_RO   = 1,
 IOMMU_WO   = 2,
 IOMMU_RW   = 3,
+IOMMU_EXEC = 4,
+IOMMU_PRIV = 8,
+IOMMU_GLOBAL = 16,
+IOMMU_UNTRANSLATED_ONLY = 32,
 } IOMMUAccessFlags;
 
-#define IOMMU_ACCESS_FLAG(r, w) (((r) ? IOMMU_RO : 0) | ((w) ? IOMMU_WO : 0))
+#define IOMMU_ACCESS_FLAG(r, w) (((r) ? IOMMU_RO : 0) | \
+((w) ? IOMMU_WO : 0))
+#define IOMMU_ACCESS_FLAG_FULL(r, w, x, p, g, uo) \
+(IOMMU_ACCESS_FLAG(r, w) | \
+((x) ? IOMMU_EXEC : 0) | \
+((p) ? IOMMU_PRIV : 0) | \
+((g) ? IOMMU_GLOBAL : 0) | \
+((uo) ? IOMMU_UNTRANSLATED_ONLY : 0))
 
 struct IOMMUTLBEntry {
 AddressSpace*target_as;
-- 
2.45.2

[PATCH ats_vtd v5 15/22] memory: Allow to store the PASID in IOMMUTLBEntry

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

This will be useful for devices that support ATS

Signed-off-by: Clément Mathieu--Drif 
---
 include/exec/memory.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index bf91c4bed7..003ee06610 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -145,6 +145,7 @@ struct IOMMUTLBEntry {
 hwaddr   translated_addr;
 hwaddr   addr_mask;  /* 0xfff = 4k translation */
 IOMMUAccessFlags perm;
+uint32_t pasid;
 };
 
 /*
-- 
2.45.2

[PATCH ats_vtd v5 20/22] pci: add a pci-level API for ATS

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

Devices implementing ATS can send translation requests using
pci_ats_request_translation_pasid.

The invalidation events are sent back to the device using the iommu
notifier managed with pci_register_iommu_tlb_event_notifier and
pci_unregister_iommu_tlb_event_notifier

Signed-off-by: Clément Mathieu--Drif 
---
 hw/pci/pci.c | 44 +
 include/hw/pci/pci.h | 52 
 2 files changed, 96 insertions(+)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 7a483dd05d..93b816aff2 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2833,6 +2833,50 @@ void pci_device_unset_iommu_device(PCIDevice *dev)
 }
 }
 
+ssize_t pci_ats_request_translation_pasid(PCIDevice *dev, uint32_t pasid,
+  bool priv_req, bool exec_req,
+  hwaddr addr, size_t length,
+  bool no_write, IOMMUTLBEntry *result,
+  size_t result_length,
+  uint32_t *err_count)
+{
+assert(result_length);
+IOMMUMemoryRegion *iommu_mr = pci_device_iommu_memory_region_pasid(dev,
+pasid);
+if (!iommu_mr || !pcie_ats_enabled(dev)) {
+return -EPERM;
+}
+return memory_region_iommu_ats_request_translation(iommu_mr, priv_req,
+   exec_req, addr, length,
+   no_write, result,
+   result_length,
+   err_count);
+}
+
+int pci_register_iommu_tlb_event_notifier(PCIDevice *dev, uint32_t pasid,
+  IOMMUNotifier *n)
+{
+IOMMUMemoryRegion *iommu_mr = pci_device_iommu_memory_region_pasid(dev,
+pasid);
+if (!iommu_mr) {
+return -EPERM;
+}
+return memory_region_register_iommu_notifier(MEMORY_REGION(iommu_mr), n,
+ &error_fatal);
+}
+
+int pci_unregister_iommu_tlb_event_notifier(PCIDevice *dev, uint32_t pasid,
+ IOMMUNotifier *n)
+{
+IOMMUMemoryRegion *iommu_mr = pci_device_iommu_memory_region_pasid(dev,
+pasid);
+if (!iommu_mr) {
+return -EPERM;
+}
+memory_region_unregister_iommu_notifier(MEMORY_REGION(iommu_mr), n);
+return 0;
+}
+
 void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
 {
 /*
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index b2a9ed7782..d656f2656a 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -473,6 +473,58 @@ bool pci_iommu_init_iotlb_notifier(PCIDevice *dev, 
uint32_t pasid,
IOMMUNotifier *n, IOMMUNotify fn,
void *opaque);
 
+/**
+ * pci_ats_request_translation_pasid: perform an ATS request
+ *
+ * Return the number of translations stored in @result in case of success,
+ * a negative error code otherwise.
+ * -ENOMEM is returned when the result buffer is not large enough to store
+ * all the translations
+ *
+ * @dev: the ATS-capable PCI device
+ * @pasid: the pasid of the address space in which the translation will be made
+ * @priv_req: privileged mode bit (PASID TLP)
+ * @exec_req: execute request bit (PASID TLP)
+ * @addr: start address of the memory range to be translated
+ * @length: length of the memory range in bytes
+ * @no_write: request a read-only access translation (if supported by the 
IOMMU)
+ * @result: buffer in which the TLB entries will be stored
+ * @result_length: result buffer length
+ * @err_count: number of untranslated subregions
+ */
+ssize_t pci_ats_request_translation_pasid(PCIDevice *dev, uint32_t pasid,
+  bool priv_req, bool exec_req,
+  hwaddr addr, size_t length,
+  bool no_write, IOMMUTLBEntry *result,
+  size_t result_length,
+  uint32_t *err_count);
+
+/**
+ * pci_register_iommu_tlb_event_notifier: register a notifier for changes to
+ * IOMMU translation entries in a specific address space.
+ *
+ * Returns 0 on success, or a negative errno otherwise.
+ *
+ * @dev: the device that wants to get notified
+ * @pasid: the pasid of the address space to track
+ * @n: the notifier to register
+ */
+int pci_register_iommu_tlb_event_notifier(PCIDevice *dev, uint32_t pasid,
+  IOMMUNotifier *n);
+
+/**
+ * pci_unregister_iommu_tlb_event_notifier: unreg

[PATCH ats_vtd v5 04/22] intel_iommu: do not consider wait_desc as an invalid descriptor

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

Signed-off-by: Clément Mathieu--Drif 
Reviewed-by: Zhenzhong Duan 
---
 hw/i386/intel_iommu.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 98996ededc..71cebe2fd3 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3500,6 +3500,11 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, 
VTDInvDesc *inv_desc)
 } else if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) {
 /* Interrupt flag */
 vtd_generate_completion_event(s);
+} else if (inv_desc->lo & VTD_INV_DESC_WAIT_FN) {
+/*
+ * SW = 0, IF = 0, FN = 1
+ * Nothing to do as we process the events sequentially
+ */
 } else {
 error_report_once("%s: invalid wait desc: hi=%"PRIx64", lo=%"PRIx64
   " (unknown type)", __func__, inv_desc->hi,
-- 
2.45.2

[PATCH ats_vtd v5 21/22] intel_iommu: set the address mask even when a translation fails

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

Implements the behavior defined in section 10.2.3.5 of PCIe spec rev 5.
This is needed by devices that support ATS.

Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index f77972130f..9a1bce9ae2 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2192,7 +2192,8 @@ static bool vtd_do_iommu_translate(VTDAddressSpace 
*vtd_as, PCIBus *bus,
 uint8_t bus_num = pci_bus_num(bus);
 VTDContextCacheEntry *cc_entry;
 uint64_t pte, page_mask;
-uint32_t level, pasid = vtd_as->pasid;
+uint32_t level = UINT32_MAX;
+uint32_t pasid = vtd_as->pasid;
 uint16_t source_id = PCI_BUILD_BDF(bus_num, devfn);
 int ret_fr;
 bool is_fpd_set = false;
@@ -2338,7 +2339,12 @@ error:
 vtd_iommu_unlock(s);
 entry->iova = 0;
 entry->translated_addr = 0;
-entry->addr_mask = 0;
+/*
+ * Set the mask for ATS (the range must be present even when the
+ * translation fails : PCIe rev 5 10.2.3.5)
+ */
+entry->addr_mask = (level != UINT32_MAX) ?
+   (~vtd_pt_level_page_mask(level)) : (~VTD_PAGE_MASK_4K);
 entry->perm = IOMMU_NONE;
 entry->pasid = PCI_NO_PASID;
 return false;
-- 
2.45.2

[PATCH ats_vtd v5 12/22] pci: add a pci-level initialization function for iommu notifiers

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

We add a convenient way to initialize an device-iotlb notifier.
This is meant to be used by ATS-capable devices.

pci_device_iommu_memory_region_pasid is introduces in this commit and
will be used in several other SVM-related functions exposed in
the PCI API.

Signed-off-by: Clément Mathieu--Drif 
---
 hw/pci/pci.c | 40 
 include/hw/pci/pci.h | 15 +++
 2 files changed, 55 insertions(+)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 3fe47d4002..7a483dd05d 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2747,6 +2747,46 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice 
*dev)
 return &address_space_memory;
 }
 
+static IOMMUMemoryRegion *pci_device_iommu_memory_region_pasid(PCIDevice *dev,
+   uint32_t pasid)
+{
+PCIBus *bus;
+PCIBus *iommu_bus;
+int devfn;
+
+/*
+ * This function is for internal use in the module,
+ * we can call it with PCI_NO_PASID
+ */
+if (!dev->is_master ||
+((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) {
+return NULL;
+}
+
+pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+if (iommu_bus && iommu_bus->iommu_ops->get_memory_region_pasid) {
+return iommu_bus->iommu_ops->get_memory_region_pasid(bus,
+ iommu_bus->iommu_opaque, devfn, pasid);
+}
+return NULL;
+}
+
+bool pci_iommu_init_iotlb_notifier(PCIDevice *dev, uint32_t pasid,
+   IOMMUNotifier *n, IOMMUNotify fn,
+   void *opaque)
+{
+IOMMUMemoryRegion *iommu_mr = pci_device_iommu_memory_region_pasid(dev,
+pasid);
+if (!iommu_mr) {
+return false;
+}
+iommu_notifier_init(n, fn, IOMMU_NOTIFIER_DEVIOTLB_EVENTS, 0, HWADDR_MAX,
+memory_region_iommu_attrs_to_index(iommu_mr,
+   
MEMTXATTRS_UNSPECIFIED));
+n->opaque = opaque;
+return true;
+}
+
 AddressSpace *pci_device_iommu_address_space_pasid(PCIDevice *dev,
uint32_t pasid)
 {
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index ad7bd2ade5..b2a9ed7782 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -458,6 +458,21 @@ bool pci_device_set_iommu_device(PCIDevice *dev, 
HostIOMMUDevice *hiod,
  Error **errp);
 void pci_device_unset_iommu_device(PCIDevice *dev);
 
+/**
+ * pci_iommu_init_iotlb_notifier: initialize an IOMMU notifier
+ *
+ * This function is used by devices before registering an IOTLB notifier
+ *
+ * @dev: the device
+ * @pasid: the pasid of the address space to watch
+ * @n: the notifier to initialize
+ * @fn: the callback to be installed
+ * @opaque: user pointer that can be used to store a state
+ */
+bool pci_iommu_init_iotlb_notifier(PCIDevice *dev, uint32_t pasid,
+   IOMMUNotifier *n, IOMMUNotify fn,
+   void *opaque);
+
 /**
  * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus
  *
-- 
2.45.2

[PATCH ats_vtd v5 17/22] atc: generic ATC that can be used by PCIe devices that support SVM

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

As the SVM-capable devices will need to cache translations, we provide
an first implementation.

This cache uses a two-level design based on hash tables.
The first level is indexed by a PASID and the second by a virtual addresse.

Signed-off-by: Clément Mathieu--Drif 
---
 util/atc.c   | 211 +++
 util/atc.h   | 117 ++
 util/meson.build |   1 +
 3 files changed, 329 insertions(+)
 create mode 100644 util/atc.c
 create mode 100644 util/atc.h

diff --git a/util/atc.c b/util/atc.c
new file mode 100644
index 00..584ce045db
--- /dev/null
+++ b/util/atc.c
@@ -0,0 +1,211 @@
+/*
+ * QEMU emulation of an ATC
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see .
+ */
+
+#include "util/atc.h"
+
+
+#define PAGE_TABLE_ENTRY_SIZE 8
+
+/* a pasid is hashed using the identity function */
+static guint atc_pasid_key_hash(gconstpointer v)
+{
+return (guint)(uintptr_t)v; /* pasid */
+}
+
+/* pasid equality */
+static gboolean atc_pasid_key_equal(gconstpointer v1, gconstpointer v2)
+{
+return v1 == v2;
+}
+
+/* Hash function for IOTLB entries */
+static guint atc_addr_key_hash(gconstpointer v)
+{
+hwaddr addr = (hwaddr)v;
+return (guint)((addr >> 32) ^ (addr & 0xU));
+}
+
+/* Equality test for IOTLB entries */
+static gboolean atc_addr_key_equal(gconstpointer v1, gconstpointer v2)
+{
+return (hwaddr)v1 == (hwaddr)v2;
+}
+
+static void atc_address_space_free(void *as)
+{
+g_hash_table_unref(as);
+}
+
+/* return log2(val), or UINT8_MAX if val is not a power of 2 */
+static uint8_t ilog2(uint64_t val)
+{
+uint8_t result = 0;
+while (val != 1) {
+if (val & 1) {
+return UINT8_MAX;
+}
+
+val >>= 1;
+result += 1;
+}
+return result;
+}
+
+ATC *atc_new(uint64_t page_size, uint8_t address_width)
+{
+ATC *atc;
+uint8_t log_page_size = ilog2(page_size);
+/* number of bits each used to store all the intermediate indexes */
+uint64_t addr_lookup_indexes_size;
+
+if (log_page_size == UINT8_MAX) {
+return NULL;
+}
+/*
+ * We only support page table entries of 8 (PAGE_TABLE_ENTRY_SIZE) bytes
+ * log2(page_size / 8) = log2(page_size) - 3
+ * is the level offset
+ */
+if (log_page_size <= 3) {
+return NULL;
+}
+
+atc = g_new0(ATC, 1);
+atc->address_spaces = g_hash_table_new_full(atc_pasid_key_hash,
+atc_pasid_key_equal,
+NULL, atc_address_space_free);
+atc->level_offset = log_page_size - 3;
+/* at this point, we know that page_size is a power of 2 */
+atc->min_addr_mask = page_size - 1;
+addr_lookup_indexes_size = address_width - log_page_size;
+if ((addr_lookup_indexes_size % atc->level_offset) != 0) {
+goto error;
+}
+atc->levels = addr_lookup_indexes_size / atc->level_offset;
+atc->page_size = page_size;
+return atc;
+
+error:
+g_free(atc);
+return NULL;
+}
+
+static inline GHashTable *atc_get_address_space_cache(ATC *atc, uint32_t pasid)
+{
+return g_hash_table_lookup(atc->address_spaces,
+   (gconstpointer)(uintptr_t)pasid);
+}
+
+void atc_create_address_space_cache(ATC *atc, uint32_t pasid)
+{
+GHashTable *as_cache;
+
+as_cache = atc_get_address_space_cache(atc, pasid);
+if (!as_cache) {
+as_cache = g_hash_table_new_full(atc_addr_key_hash,
+ atc_addr_key_equal,
+ NULL, g_free);
+g_hash_table_replace(atc->address_spaces,
+ (gpointer)(uintptr_t)pasid, as_cache);
+}
+}
+
+void atc_delete_address_space_cache(ATC *atc, uint32_t pasid)
+{
+g_hash_table_remove(atc->address_spaces, (gpointer)(uintptr_t)pasid);
+}
+
+int atc_update(ATC *atc, IOMMUTLBEntry *entry)
+{
+IOMMUTLBEntry *value;
+GHashTable *as_cache = atc_get_address_space_cache(atc, entry->pasid);
+if (!as_cache) {
+return -ENODEV;
+}
+value = g_memdup2(entry, sizeof(*value));
+g_hash_table_replace(as_cache, (gpointer)(entry->iova), value);
+return 0;
+}
+
+IOMMUTLBEntry *atc_lookup(ATC *atc, uint32_t pasid, hwaddr addr)
+{
+IOMMUTLBEntry *entry;
+hwaddr mas

[PATCH ats_vtd v5 18/22] atc: add unit tests

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

Signed-off-by: Clément Mathieu--Drif 
---
 tests/unit/meson.build |   1 +
 tests/unit/test-atc.c  | 527 +
 2 files changed, 528 insertions(+)
 create mode 100644 tests/unit/test-atc.c

diff --git a/tests/unit/meson.build b/tests/unit/meson.build
index 26c109c968..d6c6c574de 100644
--- a/tests/unit/meson.build
+++ b/tests/unit/meson.build
@@ -47,6 +47,7 @@ tests = {
   'test-logging': [],
   'test-qapi-util': [],
   'test-interval-tree': [],
+  'test-atc': []
 }
 
 if have_system or have_tools
diff --git a/tests/unit/test-atc.c b/tests/unit/test-atc.c
new file mode 100644
index 00..89378f7f63
--- /dev/null
+++ b/tests/unit/test-atc.c
@@ -0,0 +1,527 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see .
+ */
+
+#include "util/atc.h"
+
+static inline bool tlb_entry_equal(IOMMUTLBEntry *e1, IOMMUTLBEntry *e2)
+{
+if (!e1 || !e2) {
+return !e1 && !e2;
+}
+return e1->iova == e2->iova &&
+e1->addr_mask == e2->addr_mask &&
+e1->pasid == e2->pasid &&
+e1->perm == e2->perm &&
+e1->target_as == e2->target_as &&
+e1->translated_addr == e2->translated_addr;
+}
+
+static void assert_lookup_equals(ATC *atc, IOMMUTLBEntry *target,
+ uint32_t pasid, hwaddr iova)
+{
+IOMMUTLBEntry *result;
+result = atc_lookup(atc, pasid, iova);
+g_assert(tlb_entry_equal(result, target));
+}
+
+static void check_creation(uint64_t page_size, uint8_t address_width,
+   uint8_t levels, uint8_t level_offset,
+   bool should_work) {
+ATC *atc = atc_new(page_size, address_width);
+if (atc) {
+if (atc->levels != levels || atc->level_offset != level_offset) {
+g_assert(false); /* ATC created but invalid configuration : fail */
+}
+atc_destroy(atc);
+g_assert(should_work);
+} else {
+g_assert(!should_work);
+}
+}
+
+static void test_creation_parameters(void)
+{
+check_creation(8, 39, 3, 9, false);
+check_creation(4095, 39, 3, 9, false);
+check_creation(4097, 39, 3, 9, false);
+check_creation(8192, 48, 0, 0, false);
+
+check_creation(4096, 38, 0, 0, false);
+check_creation(4096, 39, 3, 9, true);
+check_creation(4096, 40, 0, 0, false);
+check_creation(4096, 47, 0, 0, false);
+check_creation(4096, 48, 4, 9, true);
+check_creation(4096, 49, 0, 0, false);
+check_creation(4096, 56, 0, 0, false);
+check_creation(4096, 57, 5, 9, true);
+check_creation(4096, 58, 0, 0, false);
+
+check_creation(16384, 35, 0, 0, false);
+check_creation(16384, 36, 2, 11, true);
+check_creation(16384, 37, 0, 0, false);
+check_creation(16384, 46, 0, 0, false);
+check_creation(16384, 47, 3, 11, true);
+check_creation(16384, 48, 0, 0, false);
+check_creation(16384, 57, 0, 0, false);
+check_creation(16384, 58, 4, 11, true);
+check_creation(16384, 59, 0, 0, false);
+}
+
+static void test_single_entry(void)
+{
+IOMMUTLBEntry entry = {
+.iova = 0x123456789000ULL,
+.addr_mask = 0xfffULL,
+.pasid = 5,
+.perm = IOMMU_RW,
+.translated_addr = 0xdeadbeefULL,
+};
+
+ATC *atc = atc_new(4096, 48);
+g_assert(atc);
+
+assert_lookup_equals(atc, NULL, entry.pasid,
+ entry.iova + (entry.addr_mask / 2));
+
+atc_create_address_space_cache(atc, entry.pasid);
+g_assert(atc_update(atc, &entry) == 0);
+
+assert_lookup_equals(atc, NULL, entry.pasid + 1,
+ entry.iova + (entry.addr_mask / 2));
+assert_lookup_equals(atc, &entry, entry.pasid,
+ entry.iova + (entry.addr_mask / 2));
+
+atc_destroy(atc);
+}
+
+static void test_single_entry_2(void)
+{
+static uint64_t page_size = 4096;
+IOMMUTLBEntry e1 = {
+.iova = 0xabcdef20ULL,
+.addr_mask = 0xfffULL,
+.pasid = 1,
+.perm = IOMMU_RW,
+.translated_addr = 0x5eedULL,
+};
+
+ATC *atc = atc_new(page_size , 48);
+atc_create_address_space_cache(atc, e1.pasid);
+atc_update(atc, &e1);
+
+assert_lookup_equals(atc, NULL, e1.pasid, 0xabcdef201000ULL);
+
+atc_destroy(atc);
+}
+
+static void test_page_boundaries(void)
+{
+static const uint32_t pasid = 5;
+ 

[PATCH ats_vtd v5 11/22] memory: store user data pointer in the IOMMU notifiers

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

This will help developers of svm devices to track a state

Signed-off-by: Clément Mathieu--Drif 
---
 include/exec/memory.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index aa8e114e77..bf91c4bed7 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -203,6 +203,7 @@ struct IOMMUNotifier {
 hwaddr start;
 hwaddr end;
 int iommu_idx;
+void *opaque;
 QLIST_ENTRY(IOMMUNotifier) node;
 };
 typedef struct IOMMUNotifier IOMMUNotifier;
-- 
2.45.2

[PATCH ats_vtd v5 07/22] pcie: helper functions to check if PASID and ATS are enabled

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

ats_enabled and pasid_enabled check whether the capabilities are
present or not. If so, we read the configuration space to get
the status of the feature (enabled or not).

Signed-off-by: Clément Mathieu--Drif 
---
 hw/pci/pcie.c | 18 ++
 include/hw/pci/pcie.h |  3 +++
 2 files changed, 21 insertions(+)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index d6a052b616..4efd84fed5 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -1201,3 +1201,21 @@ void pcie_pasid_init(PCIDevice *dev, uint16_t offset, 
uint8_t pasid_width,
 
 dev->exp.pasid_cap = offset;
 }
+
+bool pcie_pasid_enabled(const PCIDevice *dev)
+{
+if (!pci_is_express(dev) || !dev->exp.pasid_cap) {
+return false;
+}
+return (pci_get_word(dev->config + dev->exp.pasid_cap + PCI_PASID_CTRL) &
+PCI_PASID_CTRL_ENABLE) != 0;
+}
+
+bool pcie_ats_enabled(const PCIDevice *dev)
+{
+if (!pci_is_express(dev) || !dev->exp.ats_cap) {
+return false;
+}
+return (pci_get_word(dev->config + dev->exp.ats_cap + PCI_ATS_CTRL) &
+PCI_ATS_CTRL_ENABLE) != 0;
+}
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index b870958c99..0c127b29dc 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -154,4 +154,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler 
*hotplug_dev,
 
 void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width,
  bool exec_perm, bool priv_mod);
+
+bool pcie_pasid_enabled(const PCIDevice *dev);
+bool pcie_ats_enabled(const PCIDevice *dev);
 #endif /* QEMU_PCIE_H */
-- 
2.45.2

[PATCH ats_vtd v5 13/22] intel_iommu: implement the get_address_space_pasid iommu operation

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c | 13 ++---
 include/hw/i386/intel_iommu.h |  2 +-
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 2a78fc823f..e047d2ca83 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -5438,7 +5438,7 @@ static const MemoryRegionOps vtd_mem_ir_fault_ops = {
 };
 
 VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
- int devfn, unsigned int pasid)
+ int devfn, uint32_t pasid)
 {
 /*
  * We can't simply use sid here since the bus number might not be
@@ -5995,19 +5995,26 @@ static void vtd_reset(DeviceState *dev)
 vtd_refresh_pasid_bind(s);
 }
 
-static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
+static AddressSpace *vtd_host_dma_iommu_pasid(PCIBus *bus, void *opaque,
+  int devfn, uint32_t pasid)
 {
 IntelIOMMUState *s = opaque;
 VTDAddressSpace *vtd_as;
 
 assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
 
-vtd_as = vtd_find_add_as(s, bus, devfn, PCI_NO_PASID);
+vtd_as = vtd_find_add_as(s, bus, devfn, pasid);
 return &vtd_as->as;
 }
 
+static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
+{
+return vtd_host_dma_iommu_pasid(bus, opaque, devfn, PCI_NO_PASID);
+}
+
 static PCIIOMMUOps vtd_iommu_ops = {
 .get_address_space = vtd_host_dma_iommu,
+.get_address_space_pasid = vtd_host_dma_iommu_pasid,
 .set_iommu_device = vtd_dev_set_iommu_device,
 .unset_iommu_device = vtd_dev_unset_iommu_device,
 };
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index b32d711802..e334a3de6d 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -325,6 +325,6 @@ struct IntelIOMMUState {
  * create a new one if none exists
  */
 VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
- int devfn, unsigned int pasid);
+ int devfn, uint32_t pasid);
 
 #endif
-- 
2.45.2

[PATCH ats_vtd v5 19/22] memory: add an API for ATS support

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

IOMMU have to implement iommu_ats_request_translation to support ATS.

Devices can use IOMMU_TLB_ENTRY_TRANSLATION_ERROR to check the tlb
entries returned by a translation request.

Signed-off-by: Clément Mathieu--Drif 
---
 include/exec/memory.h | 26 ++
 system/memory.c   | 20 
 2 files changed, 46 insertions(+)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 003ee06610..48555c87c6 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -148,6 +148,10 @@ struct IOMMUTLBEntry {
 uint32_t pasid;
 };
 
+/* Check if an IOMMU TLB entry indicates a translation error */
+#define IOMMU_TLB_ENTRY_TRANSLATION_ERROR(entry) entry)->perm) & IOMMU_RW) 
\
+== IOMMU_NONE)
+
 /*
  * Bitmap for different IOMMUNotifier capabilities. Each notifier can
  * register with one or multiple IOMMU Notifier capability bit(s).
@@ -571,6 +575,20 @@ struct IOMMUMemoryRegionClass {
  int (*iommu_set_iova_ranges)(IOMMUMemoryRegion *iommu,
   GList *iova_ranges,
   Error **errp);
+
+/**
+ * @iommu_ats_request_translation:
+ * This method must be implemented if the IOMMU has ATS enabled
+ *
+ * @see pci_ats_request_translation_pasid
+ */
+ssize_t (*iommu_ats_request_translation)(IOMMUMemoryRegion *iommu,
+ bool priv_req, bool exec_req,
+ hwaddr addr, size_t length,
+ bool no_write,
+ IOMMUTLBEntry *result,
+ size_t result_length,
+ uint32_t *err_count);
 };
 
 typedef struct RamDiscardListener RamDiscardListener;
@@ -1926,6 +1944,14 @@ void memory_region_iommu_replay(IOMMUMemoryRegion 
*iommu_mr, IOMMUNotifier *n);
 void memory_region_unregister_iommu_notifier(MemoryRegion *mr,
  IOMMUNotifier *n);
 
+ssize_t memory_region_iommu_ats_request_translation(IOMMUMemoryRegion 
*iommu_mr,
+bool priv_req, bool exec_req,
+hwaddr addr, size_t length,
+bool no_write,
+IOMMUTLBEntry *result,
+size_t result_length,
+uint32_t *err_count);
+
 /**
  * memory_region_iommu_get_attr: return an IOMMU attr if get_attr() is
  * defined on the IOMMU.
diff --git a/system/memory.c b/system/memory.c
index 74cd73ebc7..8268df7bf5 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -2005,6 +2005,26 @@ void 
memory_region_unregister_iommu_notifier(MemoryRegion *mr,
 memory_region_update_iommu_notify_flags(iommu_mr, NULL);
 }
 
+ssize_t memory_region_iommu_ats_request_translation(IOMMUMemoryRegion 
*iommu_mr,
+bool priv_req,
+bool exec_req,
+hwaddr addr, size_t length,
+bool no_write,
+IOMMUTLBEntry *result,
+size_t result_length,
+uint32_t *err_count)
+{
+IOMMUMemoryRegionClass *imrc = 
memory_region_get_iommu_class_nocheck(iommu_mr);
+
+if (!imrc->iommu_ats_request_translation) {
+return -ENODEV;
+}
+
+return imrc->iommu_ats_request_translation(iommu_mr, priv_req, exec_req,
+   addr, length, no_write, result,
+   result_length, err_count);
+}
+
 void memory_region_notify_iommu_one(IOMMUNotifier *notifier,
 IOMMUTLBEvent *event)
 {
-- 
2.45.2

[PATCH ats_vtd v5 08/22] intel_iommu: declare supported PASID size

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c  | 2 +-
 hw/i386/intel_iommu_internal.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 71cebe2fd3..2a78fc823f 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -5860,7 +5860,7 @@ static void vtd_cap_init(IntelIOMMUState *s)
 }
 
 if (s->pasid) {
-s->ecap |= VTD_ECAP_PASID;
+s->ecap |= VTD_ECAP_PASID | VTD_ECAP_PSS;
 }
 }
 
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index bd20746318..117dc96d22 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -194,6 +194,7 @@
 #define VTD_ECAP_MHMV   (15ULL << 20)
 #define VTD_ECAP_NEST   (1ULL << 26)
 #define VTD_ECAP_SRS(1ULL << 31)
+#define VTD_ECAP_PSS(19ULL << 35)
 #define VTD_ECAP_PASID  (1ULL << 40)
 #define VTD_ECAP_SMTS   (1ULL << 43)
 #define VTD_ECAP_SLTS   (1ULL << 46)
-- 
2.45.2

[PATCH ats_vtd v5 14/22] intel_iommu: implement the get_memory_region_pasid iommu operation

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index e047d2ca83..2e4f535dd1 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -6012,9 +6012,24 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, 
void *opaque, int devfn)
 return vtd_host_dma_iommu_pasid(bus, opaque, devfn, PCI_NO_PASID);
 }
 
+static IOMMUMemoryRegion *vtd_get_memory_region_pasid(PCIBus *bus,
+  void *opaque,
+  int devfn,
+  uint32_t pasid)
+{
+IntelIOMMUState *s = opaque;
+VTDAddressSpace *vtd_as;
+
+assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
+
+vtd_as = vtd_find_add_as(s, bus, devfn, pasid);
+return &vtd_as->iommu;
+}
+
 static PCIIOMMUOps vtd_iommu_ops = {
 .get_address_space = vtd_host_dma_iommu,
 .get_address_space_pasid = vtd_host_dma_iommu_pasid,
+.get_memory_region_pasid = vtd_get_memory_region_pasid,
 .set_iommu_device = vtd_dev_set_iommu_device,
 .unset_iommu_device = vtd_dev_unset_iommu_device,
 };
-- 
2.45.2

[PATCH ats_vtd v5 09/22] pci: cache the bus mastering status in the device

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

Signed-off-by: Clément Mathieu--Drif 
---
 hw/pci/pci.c| 24 ++--
 include/hw/pci/pci_device.h |  1 +
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index c8a8aab306..51feede3cf 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -116,6 +116,12 @@ static GSequence *pci_acpi_index_list(void)
 return used_acpi_index_list;
 }
 
+static void pci_set_master(PCIDevice *d, bool enable)
+{
+memory_region_set_enabled(&d->bus_master_enable_region, enable);
+d->is_master = enable; /* cache the status */
+}
+
 static void pci_init_bus_master(PCIDevice *pci_dev)
 {
 AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
@@ -123,7 +129,7 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
 memory_region_init_alias(&pci_dev->bus_master_enable_region,
  OBJECT(pci_dev), "bus master",
  dma_as->root, 0, 
memory_region_size(dma_as->root));
-memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
+pci_set_master(pci_dev, false);
 memory_region_add_subregion(&pci_dev->bus_master_container_region, 0,
 &pci_dev->bus_master_enable_region);
 }
@@ -657,9 +663,8 @@ static int get_pci_config_device(QEMUFile *f, void *pv, 
size_t size,
 pci_bridge_update_mappings(PCI_BRIDGE(s));
 }
 
-memory_region_set_enabled(&s->bus_master_enable_region,
-  pci_get_word(s->config + PCI_COMMAND)
-  & PCI_COMMAND_MASTER);
+pci_set_master(s,
+   pci_get_word(s->config + PCI_COMMAND) & PCI_COMMAND_MASTER);
 
 g_free(config);
 return 0;
@@ -1611,9 +1616,9 @@ void pci_default_write_config(PCIDevice *d, uint32_t 
addr, uint32_t val_in, int
 
 if (ranges_overlap(addr, l, PCI_COMMAND, 2)) {
 pci_update_irq_disabled(d, was_irq_disabled);
-memory_region_set_enabled(&d->bus_master_enable_region,
-  (pci_get_word(d->config + PCI_COMMAND)
-   & PCI_COMMAND_MASTER) && d->has_power);
+pci_set_master(d,
+  (pci_get_word(d->config + PCI_COMMAND) &
+PCI_COMMAND_MASTER) && d->has_power);
 }
 
 msi_write_config(d, addr, val_in, l);
@@ -2888,9 +2893,8 @@ void pci_set_power(PCIDevice *d, bool state)
 
 d->has_power = state;
 pci_update_mappings(d);
-memory_region_set_enabled(&d->bus_master_enable_region,
-  (pci_get_word(d->config + PCI_COMMAND)
-   & PCI_COMMAND_MASTER) && d->has_power);
+pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND)
+& PCI_COMMAND_MASTER) && d->has_power);
 if (!d->has_power) {
 pci_device_reset(d);
 }
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
index d3dd0f64b2..7fa501569a 100644
--- a/include/hw/pci/pci_device.h
+++ b/include/hw/pci/pci_device.h
@@ -87,6 +87,7 @@ struct PCIDevice {
 char name[64];
 PCIIORegion io_regions[PCI_NUM_REGIONS];
 AddressSpace bus_master_as;
+bool is_master;
 MemoryRegion bus_master_container_region;
 MemoryRegion bus_master_enable_region;
 
-- 
2.45.2

[PATCH ats_vtd v5 06/22] pcie: add helper to declare PASID capability for a pcie device

2024-07-01 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

Signed-off-by: Clément Mathieu--Drif 
---
 hw/pci/pcie.c | 24 +++
 include/hw/pci/pcie.h |  6 +-
 include/hw/pci/pcie_regs.h|  3 +++
 include/standard-headers/linux/pci_regs.h |  1 +
 4 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 4b2f0805c6..d6a052b616 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -1177,3 +1177,27 @@ void pcie_acs_reset(PCIDevice *dev)
 pci_set_word(dev->config + dev->exp.acs_cap + PCI_ACS_CTRL, 0);
 }
 }
+
+/* PASID */
+void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width,
+ bool exec_perm, bool priv_mod)
+{
+assert(pasid_width <= PCI_EXT_CAP_PASID_MAX_WIDTH);
+static const uint16_t control_reg_rw_mask = 0x07;
+uint16_t capability_reg = pasid_width;
+
+pcie_add_capability(dev, PCI_EXT_CAP_ID_PASID, PCI_PASID_VER, offset,
+PCI_EXT_CAP_PASID_SIZEOF);
+
+capability_reg <<= PCI_PASID_CAP_WIDTH_SHIFT;
+capability_reg |= exec_perm ? PCI_PASID_CAP_EXEC : 0;
+capability_reg |= priv_mod  ? PCI_PASID_CAP_PRIV : 0;
+pci_set_word(dev->config + offset + PCI_PASID_CAP, capability_reg);
+
+/* Everything is disabled by default */
+pci_set_word(dev->config + offset + PCI_PASID_CTRL, 0);
+
+pci_set_word(dev->wmask + offset + PCI_PASID_CTRL, control_reg_rw_mask);
+
+dev->exp.pasid_cap = offset;
+}
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index 5eddb90976..b870958c99 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -72,8 +72,9 @@ struct PCIExpressDevice {
 uint16_t aer_cap;
 PCIEAERLog aer_log;
 
-/* Offset of ATS capability in config space */
+/* Offset of ATS and PASID capabilities in config space */
 uint16_t ats_cap;
+uint16_t pasid_cap;
 
 /* ACS */
 uint16_t acs_cap;
@@ -150,4 +151,7 @@ void pcie_cap_slot_unplug_cb(HotplugHandler *hotplug_dev, 
DeviceState *dev,
  Error **errp);
 void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
  DeviceState *dev, Error **errp);
+
+void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width,
+ bool exec_perm, bool priv_mod);
 #endif /* QEMU_PCIE_H */
diff --git a/include/hw/pci/pcie_regs.h b/include/hw/pci/pcie_regs.h
index 9d3b6868dc..0a86598f80 100644
--- a/include/hw/pci/pcie_regs.h
+++ b/include/hw/pci/pcie_regs.h
@@ -86,6 +86,9 @@ typedef enum PCIExpLinkWidth {
 #define PCI_ARI_VER 1
 #define PCI_ARI_SIZEOF  8
 
+/* PASID */
+#define PCI_PASID_VER   1
+#define PCI_EXT_CAP_PASID_MAX_WIDTH 20
 /* AER */
 #define PCI_ERR_VER 2
 #define PCI_ERR_SIZEOF  0x48
diff --git a/include/standard-headers/linux/pci_regs.h 
b/include/standard-headers/linux/pci_regs.h
index a39193213f..406dce8e82 100644
--- a/include/standard-headers/linux/pci_regs.h
+++ b/include/standard-headers/linux/pci_regs.h
@@ -935,6 +935,7 @@
 #define  PCI_PASID_CAP_EXEC0x0002  /* Exec permissions Supported */
 #define  PCI_PASID_CAP_PRIV0x0004  /* Privilege Mode Supported */
 #define  PCI_PASID_CAP_WIDTH   0x1f00
+#define  PCI_PASID_CAP_WIDTH_SHIFT  8
 #define PCI_PASID_CTRL 0x06/* PASID control register */
 #define  PCI_PASID_CTRL_ENABLE 0x0001  /* Enable bit */
 #define  PCI_PASID_CTRL_EXEC   0x0002  /* Exec permissions Enable */
-- 
2.45.2

Re: [PATCH ats_vtd v5 00/22] ATS support for VT-d

2024-07-01 Thread CLEMENT MATHIEU--DRIF



From: Michael S. Tsirkin 
Sent: 01 July 2024 22:02
To: CLEMENT MATHIEU--DRIF 
Cc: qemu-devel@nongnu.org ; jasow...@redhat.com 
; zhenzhong.d...@intel.com ; 
kevin.t...@intel.com ; yi.l@intel.com 
; joao.m.mart...@oracle.com ; 
pet...@redhat.com 
Subject: Re: [PATCH ats_vtd v5 00/22] ATS support for VT-d

Caution: External email. Do not open attachments or click links, unless this 
email comes from a known sender and you know the content is safe.


On Mon, Jun 03, 2024 at 05:59:38AM +, CLEMENT MATHIEU--DRIF wrote:
> From: Clément Mathieu--Drif 
>
> This series belongs to a list of series that add SVM support for VT-d.
>
> As a starting point, we use the series called 'intel_iommu: Enable stage-1 
> translation' (rfc2) by Zhenzhong Duan and Yi Liu.
>
> Here we focus on the implementation of ATS support in the IOMMU and on a 
> PCI-level
> API for ATS to be used by virtual devices.
>
> This work is based on the VT-d specification version 4.1 (March 2023).
> Here is a link to a GitHub repository where you can find the following 
> elements :
> - Qemu with all the patches for SVM
> - ATS
> - PRI
> - Device IOTLB invalidations
> - Requests with already translated addresses
> - A demo device
> - A simple driver for the demo device
> - A userspace program (for testing and demonstration purposes)
>
> https://eur06.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2FBullSequana%2FQemu-in-guest-SVM-demo&data=05%7C02%7Cclement.mathieu--drif%40eviden.com%7Cf5759aefcc5f4e7d4e6c08dc9a08d29a%7C7d1c77852d8a437db8421ed5d8fbe00a%7C0%7C0%7C638554609882544195%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0%7C%7C%7C&sdata=2Gza1VD7hKr1Sx3fOLoRh6tk3taSPKTn5nfimhPLz70%3D&reserved=0<https://github.com/BullSequana/Qemu-in-guest-SVM-demo>

I will merge, but could you please resend this using git format-patch
for formatting?  The patches have trailing CRs and don't show which sha1
they are for, which makes re-applying them after each change painful.



Hi Michael,
I sent the series again without the trailing new line.
Tell me if it's better.

Is Zhenzhong's FLTS series merged? If not, it might the cause of the sha1 
problem you are facing

Thanks
>cmd


> v2
> - handle huge pages better by detecting the page table level at which the 
> translation errors occur
> - Changes after review by ZhenZhong Duan :
>   - Set the access bit after checking permissions
>   - helper for PASID and ATS : make the commit message more accurate 
> ('present' replaced with 'enabled')
>   - pcie_pasid_init: add PCI_PASID_CAP_WIDTH_SHIFT and use it instead of 
> PCI_EXT_CAP_PASID_SIZEOF for shifting the pasid width when preparing the 
> capability register
>   - pci: do not check pci_bus_bypass_iommu after calling 
> pci_device_get_iommu_bus_devfn
>   - do not alter formatting of IOMMUTLBEntry declaration
>   - vtd_iova_fl_check_canonical : directly use s->aw_bits instead of aw 
> for the sake of clarity
>
> v3
> - rebase on new version of Zhenzhong's flts implementation
> - fix the atc lookup operation (check the mask before returning an entry)
> - add a unit test for the ATC
> - store a user pointer in the iommu notifiers to simplify the 
> implementation of svm devices
> Changes after review by Zhenzhong :
>   - store the input pasid instead of rid2pasid when returning an entry 
> after a translation
>   - split the ATC implementation and its unit tests
>
> v4
> Changes after internal review
>   - Fix the nowrite optimization, an ATS translation without the nowrite 
> flag should not fail when the write permission is not set
>
> v5
> Changes after review by Philippe :
>   - change the type of 'level' to unsigned in vtd_lookup_iotlb
>
>
>
> Clément Mathieu--Drif (22):
>   intel_iommu: fix FRCD construction macro.
>   intel_iommu: make types match
>   intel_iommu: return page walk level even when the translation fails
>   intel_iommu: do not consider wait_desc as an invalid descriptor
>   memory: add permissions in IOMMUAccessFlags
>   pcie: add helper to declare PASID capability for a pcie device
>   pcie: helper functions to check if PASID and ATS are enabled
>   intel_iommu: declare supported PASID size
>   pci: cache the bus mastering status in the device
>   pci: add IOMMU operations to get address spaces and memory regions
> with PASID
>   memory: store user data pointer in the IOMMU notifiers
>   pci: add a pci-level initialization function for iommu notifiers
>   intel_iommu: implement the get_address_space_pa

Re: [PATCH ats_vtd v5 00/22] ATS support for VT-d

2024-07-02 Thread CLEMENT MATHIEU--DRIF

On 02/07/2024 14:16, Michael S. Tsirkin wrote:
> Caution: External email. Do not open attachments or click links, unless this 
> email comes from a known sender and you know the content is safe.
>
>
> On Tue, Jul 02, 2024 at 05:52:29AM +, CLEMENT MATHIEU--DRIF wrote:
>> From: Clement Mathieu--Drif 
>>
>> This series belongs to a list of series that add SVM support for VT-d.
> You don't need ats_vtd as a tag, I think, so if it's helpful
> for someone, I don't mind. What you do need is "repost" so
> people know how it's related to your previous v5 of the
> same patchset.
>
Ok fine, I will remove it in future versions, sorry

Re: [PATCH ats_vtd v5 01/22] intel_iommu: fix FRCD construction macro.

2024-07-02 Thread CLEMENT MATHIEU--DRIF

On 02/07/2024 15:01, Yi Liu wrote:
> Caution: External email. Do not open attachments or click links, 
> unless this email comes from a known sender and you know the content 
> is safe.
>
>
> On 2024/7/2 13:52, CLEMENT MATHIEU--DRIF wrote:
>> From: Clément Mathieu--Drif 
>>
>> The constant must be unsigned, otherwise the two's complement
>> overrides the other fields when a PASID is present
>
> does it need a fix tag since it overrides the other fields?
yes, will add the tag
>
> Reviewed-by: Yi Liu 
>
>> Signed-off-by: Clément Mathieu--Drif 
>> ---
>>   hw/i386/intel_iommu_internal.h | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h 
>> b/hw/i386/intel_iommu_internal.h
>> index e8396575eb..b19f14ef63 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -272,7 +272,7 @@
>>   /* For the low 64-bit of 128-bit */
>>   #define VTD_FRCD_FI(val)    ((val) & ~0xfffULL)
>>   #define VTD_FRCD_PV(val)    (((val) & 0xULL) << 40)
>> -#define VTD_FRCD_PP(val)    (((val) & 0x1) << 31)
>> +#define VTD_FRCD_PP(val)    (((val) & 0x1ULL) << 31)
>>   #define VTD_FRCD_IR_IDX(val)    (((val) & 0xULL) << 48)
>>
>>   /* DMA Remapping Fault Conditions */
>
> -- 
> Regards,
> Yi Liu

Re: [PATCH ats_vtd v5 00/22] ATS support for VT-d

2024-07-02 Thread CLEMENT MATHIEU--DRIF

On 02/07/2024 15:44, Yi Liu wrote:
> Caution: External email. Do not open attachments or click links,
> unless this email comes from a known sender and you know the content
> is safe.
>
>
> On 2024/7/2 13:52, CLEMENT MATHIEU--DRIF wrote:
>> From: Clement Mathieu--Drif 
>>
>> This series belongs to a list of series that add SVM support for VT-d.
>>
>> As a starting point, we use the series called 'intel_iommu: Enable
>> stage-1 translation' (rfc2) by Zhenzhong Duan and Yi Liu.
>>
>> Here we focus on the implementation of ATS support in the IOMMU and
>> on a PCI-level
>> API for ATS to be used by virtual devices.
>>
>> This work is based on the VT-d specification version 4.1 (March 2023).
>> Here is a link to a GitHub repository where you can find the
>> following elements :
>>  - Qemu with all the patches for SVM
>>  - ATS
>>  - PRI
>>  - Device IOTLB invalidations
>>  - Requests with already translated addresses
>>  - A demo device
>>  - A simple driver for the demo device
>>  - A userspace program (for testing and demonstration purposes)
>>
>> https://github.com/BullSequana/Qemu-in-guest-SVM-demo
>>
>>
>> v2
>>  - handle huge pages better by detecting the page table level at
>> which the translation errors occur
>>  - Changes after review by ZhenZhong Duan :
>>   - Set the access bit after checking permissions
>>   - helper for PASID and ATS : make the commit message more
>> accurate ('present' replaced with 'enabled')
>>   - pcie_pasid_init: add PCI_PASID_CAP_WIDTH_SHIFT and use it
>> instead of PCI_EXT_CAP_PASID_SIZEOF for shifting the pasid width when
>> preparing the capability register
>>   - pci: do not check pci_bus_bypass_iommu after calling
>> pci_device_get_iommu_bus_devfn
>>   - do not alter formatting of IOMMUTLBEntry declaration
>>   - vtd_iova_fl_check_canonical : directly use s->aw_bits instead
>> of aw for the sake of clarity
>>
>> v3
>>  - rebase on new version of Zhenzhong's flts implementation
>>  - fix the atc lookup operation (check the mask before returning
>> an entry)
>>  - add a unit test for the ATC
>>  - store a user pointer in the iommu notifiers to simplify the
>> implementation of svm devices
>>  Changes after review by Zhenzhong :
>>   - store the input pasid instead of rid2pasid when returning an
>> entry after a translation
>>   - split the ATC implementation and its unit tests
>>
>> v4
>>  Changes after internal review
>>   - Fix the nowrite optimization, an ATS translation without the
>> nowrite flag should not fail when the write permission is not set
>>
>> v5
>>  Changes after review by Philippe :
>>   - change the type of 'level' to unsigned in vtd_lookup_iotlb
>
> Hi CMD,
>
> I saw two v5 in my inbox, are they the same? :)

Hi,

No, it's a resend following a request by Michael, sorry for that

>
>> Clément Mathieu--Drif (22):
>>intel_iommu: fix FRCD construction macro.
>>intel_iommu: make types match
>>intel_iommu: return page walk level even when the translation fails
>>intel_iommu: do not consider wait_desc as an invalid descriptor
>>memory: add permissions in IOMMUAccessFlags
>>pcie: add helper to declare PASID capability for a pcie device
>>pcie: helper functions to check if PASID and ATS are enabled
>>intel_iommu: declare supported PASID size
>>pci: cache the bus mastering status in the device
>>pci: add IOMMU operations to get address spaces and memory regions
>>  with PASID
>>memory: store user data pointer in the IOMMU notifiers
>>pci: add a pci-level initialization function for iommu notifiers
>>intel_iommu: implement the get_address_space_pasid iommu operation
>>intel_iommu: implement the get_memory_region_pasid iommu operation
>>memory: Allow to store the PASID in IOMMUTLBEntry
>>intel_iommu: fill the PASID field when creating an instance of
>>  IOMMUTLBEntry
>>atc: generic ATC that can be used by PCIe devices that support SVM
>>atc: add unit tests
>>memory: add an API for ATS support
>>pci: add a pci-level API for ATS
>>intel_iommu: set the address mask even when a translation fails
>>intel_iommu: add support for ATS
>>
>>   hw/i386/intel_iommu.c | 146 +-
>>   hw/i386/intel_io

Re: [PATCH ats_vtd v5 00/22] ATS support for VT-d

2024-07-02 Thread CLEMENT MATHIEU--DRIF

On 02/07/2024 15:42, Yi Liu wrote:
> Caution: External email. Do not open attachments or click links,
> unless this email comes from a known sender and you know the content
> is safe.
>
>
> On 2024/7/2 20:15, Michael S. Tsirkin wrote:
>> On Tue, Jul 02, 2024 at 05:57:57AM +0000, CLEMENT MATHIEU--DRIF wrote:
>>>
>>>
>>> ━━━
>>>
>>> From: Michael S. Tsirkin 
>>> Sent: 01 July 2024 22:02
>>> To: CLEMENT MATHIEU--DRIF 
>>> Cc: qemu-devel@nongnu.org ; jasow...@redhat.com
>>> ; zhenzhong.d...@intel.com
>>> ;
>>> kevin.t...@intel.com ; yi.l@intel.com
>>> ; joao.m.mart...@oracle.com
>>> ;
>>> pet...@redhat.com 
>>> Subject: Re: [PATCH ats_vtd v5 00/22] ATS support for VT-d
>>>
>>> Caution: External email. Do not open attachments or click links,
>>> unless this
>>> email comes from a known sender and you know the content is safe.
>>>
>>>
>>> On Mon, Jun 03, 2024 at 05:59:38AM +, CLEMENT MATHIEU--DRIF wrote:
>>>> From: Clément Mathieu--Drif 
>>>>
>>>> This series belongs to a list of series that add SVM support for VT-d.
>>>>
>>>> As a starting point, we use the series called 'intel_iommu: Enable
>>>> stage-1
>>> translation' (rfc2) by Zhenzhong Duan and Yi Liu.
>>>>
>>>> Here we focus on the implementation of ATS support in the IOMMU and
>>>> on a
>>> PCI-level
>>>> API for ATS to be used by virtual devices.
>>>>
>>>> This work is based on the VT-d specification version 4.1 (March 2023).
>>>> Here is a link to a GitHub repository where you can find the following
>>> elements :
>>>>  - Qemu with all the patches for SVM
>>>>  - ATS
>>>>  - PRI
>>>>  - Device IOTLB invalidations
>>>>  - Requests with already translated addresses
>>>>  - A demo device
>>>>  - A simple driver for the demo device
>>>>  - A userspace program (for testing and demonstration purposes)
>>>>
>>>> https://eur06.safelinks.protection.outlook.com/?url=
>>> https%3A%2F%2Fgithub.com%2FBullSequana%2FQemu-in-guest-SVM-demo&data=
>>> 05%7C02%7Cclement.mathieu--drif%40eviden.com%7Cf5759aefcc5f4e7d4e6c08dc9a08d29a%7C7d1c77852d8a437db8421ed5d8fbe00a%7C0%7C0%7C638554609882544195%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0%7C%7C%7C
>>>
>>> &sdata=2Gza1VD7hKr1Sx3fOLoRh6tk3taSPKTn5nfimhPLz70%3D&reserved=0
>>>
>>> I will merge, but could you please resend this using git format-patch
>>> for formatting?  The patches have trailing CRs and don't show which
>>> sha1
>>> they are for, which makes re-applying them after each change painful.
>>>
>>>
>>>
>>> Hi Michael,
>>> I sent the series again without the trailing new line.
>>> Tell me if it's better.
>>>
>>> Is Zhenzhong's FLTS series merged? If not, it might the cause of the
>>> sha1
>>> problem you are facing
>>
>> I don't think I have FLTS in any queue.
>>
>> If your series has a dependency please specify this in
>> the cover letter.
>>
>> Alternatively just include the dependency in the posting.
>
> seems this is the dependency.
>
> https://lore.kernel.org/qemu-devel/20240522062313.453317-1-zhenzhong.d...@intel.com/#t
>
>
Sorry if I didn't make it clear.

As mentioned in the cover letter, this series is based on Zhenzhong's
and Yi's FLTS implementation which (AFAIK) has only be posted as an RFC
so far (keep me up to date please).

v5 is based on that branch :
https://github.com/yiliu1765/qemu/tree/zhenzhong/iommufd_nesting_rfcv2

>>
>>
>>
>>
>>> Thanks
>>>> cmd
>>>
>>>
>>>> v2
>>>>  - handle huge pages better by detecting the page table level
>>>> at which the
>>> translation errors occur
>>>>  - Changes after review by ZhenZhong Duan :
>>>>- Set the access bit after checking permissions
>>>>- helper for PASID and ATS : make the commit message more
>>>> accurate
>>> ('present' replaced with 'enabled')
>>>>- pcie_pasid_init: add PCI_PASID_CAP_WIDTH_SHIFT and use it
>>>> instead of
>&

Re: [PATCH ats_vtd v5 04/22] intel_iommu: do not consider wait_desc as an invalid descriptor

2024-07-02 Thread CLEMENT MATHIEU--DRIF

On 02/07/2024 15:33, Yi Liu wrote:
> Caution: External email. Do not open attachments or click links, 
> unless this email comes from a known sender and you know the content 
> is safe.
>
>
> On 2024/7/2 13:52, CLEMENT MATHIEU--DRIF wrote:
>> From: Clément Mathieu--Drif 
>>
>> Signed-off-by: Clément Mathieu--Drif 
>> Reviewed-by: Zhenzhong Duan 
>> ---
>>   hw/i386/intel_iommu.c | 5 +
>>   1 file changed, 5 insertions(+)
>>
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 98996ededc..71cebe2fd3 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -3500,6 +3500,11 @@ static bool 
>> vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
>>   } else if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) {
>>   /* Interrupt flag */
>>   vtd_generate_completion_event(s);
>> +    } else if (inv_desc->lo & VTD_INV_DESC_WAIT_FN) {
>> +    /*
>> + * SW = 0, IF = 0, FN = 1
>> + * Nothing to do as we process the events sequentially
>> + */
>
> This code looks a bit weird. SW field does not co-exist with IF. But 
> either
> SW or IF can co-exist with FN flag. Is it? Have you already seen a wait
> descriptor that only has FN flag set but no SW nor IF flag?
Yes, my test suite triggers that condition
>
>>   } else {
>>   error_report_once("%s: invalid wait desc: hi=%"PRIx64", 
>> lo=%"PRIx64
>>     " (unknown type)", __func__, inv_desc->hi,
>
> -- 
> Regards,
> Yi Liu

Re: [PATCH ats_vtd v5 03/22] intel_iommu: return page walk level even when the translation fails

2024-07-03 Thread CLEMENT MATHIEU--DRIF

On 03/07/2024 13:59, Yi Liu wrote:
> Caution: External email. Do not open attachments or click links, 
> unless this email comes from a known sender and you know the content 
> is safe.
>
>
> On 2024/7/2 13:52, CLEMENT MATHIEU--DRIF wrote:
>> From: Clément Mathieu--Drif 
>>
>> We use this information in vtd_do_iommu_translate to populate the
>> IOMMUTLBEntry and indicate the correct page mask. This prevents ATS
>> devices from sending many useless translation requests when a megapage
>> or gigapage iova is not mapped to a physical address.
>
> you may move this patch prior to "[PATCH ats_vtd v5 22/22] 
> intel_iommu: add
> support for ATS" or just merge to it since it's the "user" of this 
> commit.
will do
>
>> Signed-off-by: Clément Mathieu--Drif 
>> ---
>>   hw/i386/intel_iommu.c | 15 +++
>>   1 file changed, 7 insertions(+), 8 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index c6474ae735..98996ededc 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -2096,9 +2096,9 @@ static int vtd_iova_to_flpte(IntelIOMMUState 
>> *s, VTDContextEntry *ce,
>>    uint32_t pasid)
>>   {
>>   dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid);
>> -    uint32_t level = vtd_get_iova_level(s, ce, pasid);
>>   uint32_t offset;
>>   uint64_t flpte;
>> +    *flpte_level = vtd_get_iova_level(s, ce, pasid);
>>
>>   if (!vtd_iova_fl_check_canonical(s, iova, ce, pasid)) {
>>   error_report_once("%s: detected non canonical IOVA 
>> (iova=0x%" PRIx64 ","
>> @@ -2107,11 +2107,11 @@ static int vtd_iova_to_flpte(IntelIOMMUState 
>> *s, VTDContextEntry *ce,
>>   }
>>
>>   while (true) {
>> -    offset = vtd_iova_level_offset(iova, level);
>> +    offset = vtd_iova_level_offset(iova, *flpte_level);
>>   flpte = vtd_get_pte(addr, offset);
>>
>>   if (flpte == (uint64_t)-1) {
>> -    if (level == vtd_get_iova_level(s, ce, pasid)) {
>> +    if (*flpte_level == vtd_get_iova_level(s, ce, pasid)) {
>>   /* Invalid programming of context-entry */
>>   return -VTD_FR_CONTEXT_ENTRY_INV;
>>   } else {
>> @@ -2128,11 +2128,11 @@ static int vtd_iova_to_flpte(IntelIOMMUState 
>> *s, VTDContextEntry *ce,
>>   if (is_write && !(flpte & VTD_FL_RW_MASK)) {
>>   return -VTD_FR_WRITE;
>>   }
>> -    if (vtd_flpte_nonzero_rsvd(flpte, level)) {
>> +    if (vtd_flpte_nonzero_rsvd(flpte, *flpte_level)) {
>>   error_report_once("%s: detected flpte reserved non-zero "
>>     "iova=0x%" PRIx64 ", level=0x%" PRIx32
>>     "flpte=0x%" PRIx64 ", pasid=0x%" 
>> PRIX32 ")",
>> -  __func__, iova, level, flpte, pasid);
>> +  __func__, iova, *flpte_level, flpte, 
>> pasid);
>>   return -VTD_FR_PAGING_ENTRY_RSVD;
>>   }
>>
>> @@ -2140,19 +2140,18 @@ static int vtd_iova_to_flpte(IntelIOMMUState 
>> *s, VTDContextEntry *ce,
>>   return -VTD_FR_FS_BIT_UPDATE_FAILED;
>>   }
>>
>> -    if (vtd_is_last_pte(flpte, level)) {
>> +    if (vtd_is_last_pte(flpte, *flpte_level)) {
>>   if (is_write &&
>>   (vtd_set_flag_in_pte(addr, offset, flpte, VTD_FL_D) !=
>> MEMTX_OK)) {
>>   return -VTD_FR_FS_BIT_UPDATE_FAILED;
>>   }
>>   *flptep = flpte;
>> -    *flpte_level = level;
>>   return 0;
>>   }
>>
>>   addr = vtd_get_pte_addr(flpte, aw_bits);
>> -    level--;
>> +    (*flpte_level)--;
>>   }
>>   }
>>
>
> -- 
> Regards,
> Yi Liu

Re: [PATCH ats_vtd v5 04/22] intel_iommu: do not consider wait_desc as an invalid descriptor

2024-07-03 Thread CLEMENT MATHIEU--DRIF

On 03/07/2024 09:29, Yi Liu wrote:
> Caution: External email. Do not open attachments or click links, 
> unless this email comes from a known sender and you know the content 
> is safe.
>
>
> On 2024/7/2 23:29, CLEMENT MATHIEU--DRIF wrote:
>>
>> On 02/07/2024 15:33, Yi Liu wrote:
>>> Caution: External email. Do not open attachments or click links,
>>> unless this email comes from a known sender and you know the content
>>> is safe.
>>>
>>>
>>> On 2024/7/2 13:52, CLEMENT MATHIEU--DRIF wrote:
>>>> From: Clément Mathieu--Drif 
>>>>
>>>> Signed-off-by: Clément Mathieu--Drif 
>>>> 
>>>> Reviewed-by: Zhenzhong Duan 
>>>> ---
>>>>    hw/i386/intel_iommu.c | 5 +
>>>>    1 file changed, 5 insertions(+)
>>>>
>>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>>> index 98996ededc..71cebe2fd3 100644
>>>> --- a/hw/i386/intel_iommu.c
>>>> +++ b/hw/i386/intel_iommu.c
>>>> @@ -3500,6 +3500,11 @@ static bool
>>>> vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
>>>>    } else if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) {
>>>>    /* Interrupt flag */
>>>>    vtd_generate_completion_event(s);
>>>> +    } else if (inv_desc->lo & VTD_INV_DESC_WAIT_FN) {
>>>> +    /*
>>>> + * SW = 0, IF = 0, FN = 1
>>>> + * Nothing to do as we process the events sequentially
>>>> + */
>>>
>>> This code looks a bit weird. SW field does not co-exist with IF. But
>>> either
>>> SW or IF can co-exist with FN flag. Is it? Have you already seen a wait
>>> descriptor that only has FN flag set but no SW nor IF flag?
>> Yes, my test suite triggers that condition
>
> I see. Spec indeed has such usage. Please add a comment for it.
> Since it does not need a response, so QEMU can just bypass it. Also
> please adjust the subject a bit. It's misleading. Perhaps
>
> "intel_iommu: Bypass barrier wait descriptor"
good idea, will do
>
> Spec CH 7.10
> a. Submit Invalidation Wait Descriptor (inv_wait_dsc) with Fence flag
> (FN=1) Set to Invalidation
> Queue. This ensures that all requests submitted to the Invalidation Queue
> ahead of this wait
> descriptor are processed and completed by remapping hardware before
> processing requests
> after the Invalidation Wait Descriptor. It is not required to specify SW
> flag (or IF flag) in this
> descriptor or for software to wait on its completion, as its function 
> is to
> only act as a barrier.
>
>>>
>>>>    } else {
>>>>    error_report_once("%s: invalid wait desc: hi=%"PRIx64",
>>>> lo=%"PRIx64
>>>>  " (unknown type)", __func__, 
>>>> inv_desc->hi,
>>>
>>> -- 
>>> Regards,
>>> Yi Liu
>
> -- 
> Regards,
> Yi Liu

Re: [PATCH ats_vtd v5 06/22] pcie: add helper to declare PASID capability for a pcie device

2024-07-03 Thread CLEMENT MATHIEU--DRIF

On 03/07/2024 14:04, Yi Liu wrote:
> Caution: External email. Do not open attachments or click links, 
> unless this email comes from a known sender and you know the content 
> is safe.
>
>
> On 2024/7/2 13:52, CLEMENT MATHIEU--DRIF wrote:
>> From: Clément Mathieu--Drif 
>>
>> Signed-off-by: Clément Mathieu--Drif 
>> ---
>>   hw/pci/pcie.c | 24 +++
>>   include/hw/pci/pcie.h |  6 +-
>>   include/hw/pci/pcie_regs.h    |  3 +++
>>   include/standard-headers/linux/pci_regs.h |  1 +
>>   4 files changed, 33 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
>> index 4b2f0805c6..d6a052b616 100644
>> --- a/hw/pci/pcie.c
>> +++ b/hw/pci/pcie.c
>> @@ -1177,3 +1177,27 @@ void pcie_acs_reset(PCIDevice *dev)
>>   pci_set_word(dev->config + dev->exp.acs_cap + PCI_ACS_CTRL, 
>> 0);
>>   }
>>   }
>> +
>> +/* PASID */
>> +void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t 
>> pasid_width,
>> + bool exec_perm, bool priv_mod)
>> +{
>> +    assert(pasid_width <= PCI_EXT_CAP_PASID_MAX_WIDTH);
>> +    static const uint16_t control_reg_rw_mask = 0x07;
>> +    uint16_t capability_reg = pasid_width;
>> +
>> +    pcie_add_capability(dev, PCI_EXT_CAP_ID_PASID, PCI_PASID_VER, 
>> offset,
>> +    PCI_EXT_CAP_PASID_SIZEOF);
>> +
>> +    capability_reg <<= PCI_PASID_CAP_WIDTH_SHIFT;
>> +    capability_reg |= exec_perm ? PCI_PASID_CAP_EXEC : 0;
>> +    capability_reg |= priv_mod  ? PCI_PASID_CAP_PRIV : 0;
>> +    pci_set_word(dev->config + offset + PCI_PASID_CAP, capability_reg);
>> +
>> +    /* Everything is disabled by default */
>> +    pci_set_word(dev->config + offset + PCI_PASID_CTRL, 0);
>> +
>> +    pci_set_word(dev->wmask + offset + PCI_PASID_CTRL, 
>> control_reg_rw_mask);
>> +
>> +    dev->exp.pasid_cap = offset;
>> +}
>
> seems no user of this helper in this series. If yes, you may drop this
> patch and include it when there is a caller of it.
You are right, I will move it to the series that implements the SVM demo 
device
>
>> diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
>> index 5eddb90976..b870958c99 100644
>> --- a/include/hw/pci/pcie.h
>> +++ b/include/hw/pci/pcie.h
>> @@ -72,8 +72,9 @@ struct PCIExpressDevice {
>>   uint16_t aer_cap;
>>   PCIEAERLog aer_log;
>>
>> -    /* Offset of ATS capability in config space */
>> +    /* Offset of ATS and PASID capabilities in config space */
>>   uint16_t ats_cap;
>> +    uint16_t pasid_cap;
>>
>>   /* ACS */
>>   uint16_t acs_cap;
>> @@ -150,4 +151,7 @@ void pcie_cap_slot_unplug_cb(HotplugHandler 
>> *hotplug_dev, DeviceState *dev,
>>    Error **errp);
>>   void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
>>    DeviceState *dev, Error **errp);
>> +
>> +void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t 
>> pasid_width,
>> + bool exec_perm, bool priv_mod);
>>   #endif /* QEMU_PCIE_H */
>> diff --git a/include/hw/pci/pcie_regs.h b/include/hw/pci/pcie_regs.h
>> index 9d3b6868dc..0a86598f80 100644
>> --- a/include/hw/pci/pcie_regs.h
>> +++ b/include/hw/pci/pcie_regs.h
>> @@ -86,6 +86,9 @@ typedef enum PCIExpLinkWidth {
>>   #define PCI_ARI_VER 1
>>   #define PCI_ARI_SIZEOF  8
>>
>> +/* PASID */
>> +#define PCI_PASID_VER   1
>> +#define PCI_EXT_CAP_PASID_MAX_WIDTH 20
>>   /* AER */
>>   #define PCI_ERR_VER 2
>>   #define PCI_ERR_SIZEOF  0x48
>> diff --git a/include/standard-headers/linux/pci_regs.h 
>> b/include/standard-headers/linux/pci_regs.h
>> index a39193213f..406dce8e82 100644
>> --- a/include/standard-headers/linux/pci_regs.h
>> +++ b/include/standard-headers/linux/pci_regs.h
>> @@ -935,6 +935,7 @@
>>   #define  PCI_PASID_CAP_EXEC 0x0002  /* Exec permissions Supported */
>>   #define  PCI_PASID_CAP_PRIV 0x0004  /* Privilege Mode Supported */
>>   #define  PCI_PASID_CAP_WIDTH    0x1f00
>> +#define  PCI_PASID_CAP_WIDTH_SHIFT  8
>>   #define PCI_PASID_CTRL  0x06    /* PASID control 
>> register */
>>   #define  PCI_PASID_CTRL_ENABLE  0x0001  /* Enable bit */
>>   #define  PCI_PASID_CTRL_EXEC    0x0002  /* Exec permissions 
>> Enable */
>
> -- 
> Regards,
> Yi Liu

Re: [PATCH ats_vtd v5 19/22] memory: add an API for ATS support

2024-07-03 Thread CLEMENT MATHIEU--DRIF

On 03/07/2024 14:14, Yi Liu wrote:
> Caution: External email. Do not open attachments or click links, 
> unless this email comes from a known sender and you know the content 
> is safe.
>
>
> On 2024/7/2 13:52, CLEMENT MATHIEU--DRIF wrote:
>> From: Clément Mathieu--Drif 
>>
>> IOMMU have to implement iommu_ats_request_translation to support ATS.
>>
>> Devices can use IOMMU_TLB_ENTRY_TRANSLATION_ERROR to check the tlb
>> entries returned by a translation request.
>>
>> Signed-off-by: Clément Mathieu--Drif 
>> ---
>>   include/exec/memory.h | 26 ++
>>   system/memory.c   | 20 
>>   2 files changed, 46 insertions(+)
>>
>> diff --git a/include/exec/memory.h b/include/exec/memory.h
>> index 003ee06610..48555c87c6 100644
>> --- a/include/exec/memory.h
>> +++ b/include/exec/memory.h
>> @@ -148,6 +148,10 @@ struct IOMMUTLBEntry {
>>   uint32_t pasid;
>>   };
>>
>> +/* Check if an IOMMU TLB entry indicates a translation error */
>> +#define IOMMU_TLB_ENTRY_TRANSLATION_ERROR(entry) entry)->perm) & 
>> IOMMU_RW) \
>> +    == IOMMU_NONE)
>> +
>>   /*
>>    * Bitmap for different IOMMUNotifier capabilities. Each notifier can
>>    * register with one or multiple IOMMU Notifier capability bit(s).
>> @@ -571,6 +575,20 @@ struct IOMMUMemoryRegionClass {
>>    int (*iommu_set_iova_ranges)(IOMMUMemoryRegion *iommu,
>>     GList *iova_ranges,
>>     Error **errp);
>> +
>> +    /**
>> + * @iommu_ats_request_translation:
>> + * This method must be implemented if the IOMMU has ATS enabled
>> + *
>> + * @see pci_ats_request_translation_pasid
>> + */
>> +    ssize_t (*iommu_ats_request_translation)(IOMMUMemoryRegion *iommu,
>> + bool priv_req, bool 
>> exec_req,
>> + hwaddr addr, size_t 
>> length,
>> + bool no_write,
>> + IOMMUTLBEntry *result,
>> + size_t result_length,
>> + uint32_t *err_count);
>>   };
>>
>
> I'm not quite understanding why the existing translate() does not work.
> Could you elaborate?
We need more parameters than what the existing translation function has.
This one is designed to get translations for a range instead of just a 
single address.
The main purpose is to expose an API that has the same parameters as a 
PCIe translation request message
and to give all the information the IOMMU needs to process the request.
>
>>   typedef struct RamDiscardListener RamDiscardListener;
>> @@ -1926,6 +1944,14 @@ void 
>> memory_region_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier 
>> *n);
>>   void memory_region_unregister_iommu_notifier(MemoryRegion *mr,
>>    IOMMUNotifier *n);
>>
>> +ssize_t 
>> memory_region_iommu_ats_request_translation(IOMMUMemoryRegion *iommu_mr,
>> +    bool priv_req, bool 
>> exec_req,
>> +    hwaddr addr, size_t 
>> length,
>> +    bool no_write,
>> +    IOMMUTLBEntry *result,
>> +    size_t result_length,
>> +    uint32_t *err_count);
>> +
>>   /**
>>    * memory_region_iommu_get_attr: return an IOMMU attr if get_attr() is
>>    * defined on the IOMMU.
>> diff --git a/system/memory.c b/system/memory.c
>> index 74cd73ebc7..8268df7bf5 100644
>> --- a/system/memory.c
>> +++ b/system/memory.c
>> @@ -2005,6 +2005,26 @@ void 
>> memory_region_unregister_iommu_notifier(MemoryRegion *mr,
>>   memory_region_update_iommu_notify_flags(iommu_mr, NULL);
>>   }
>>
>> +ssize_t 
>> memory_region_iommu_ats_request_translation(IOMMUMemoryRegion *iommu_mr,
>> +    bool priv_req,
>> +    bool exec_req,
>> +    hwaddr addr, 
>> size_t length,
>> +    bool

Re: [PATCH ats_vtd v5 00/22] ATS support for VT-d

2024-07-03 Thread CLEMENT MATHIEU--DRIF

On 03/07/2024 14:32, Yi Liu wrote:
> Caution: External email. Do not open attachments or click links,
> unless this email comes from a known sender and you know the content
> is safe.
>
Hi, thanks for your review! very efficient!
>
> Hi CMD,
>
> I've went through the series. Some general suggestions on the series.
>
> 1) Patch 01, 02, 04 can be sent separately as they are fixes.
Will do
> 2) This series mixed the ATS and PASID capability a bit. Actually,
>they don't have dependency. I'd suggest you split the series into
>   - support ATS for the requests without PASID
>   - support ATS for requests with PASID
>The second part should be an incremental change based on the first
>part. If you can make use of the existing translate() callback, then
>it is possible to remove the dependency on Zhenzhong's stage-1 series.
The final purpose is to support SVM, consequently, we only add support
for ATS with PASID here
> 3) Some commits do not have commit message. It would be good to have
>it.
Ok, I will be more verbose ;)
> 4) Some helpers look to be used by device model, if possible, it's better
>to submit them with a demo device.
The demo device is already in my GitHub repo
(https://github.com/BullSequana/qemu/tree/master)
It will be sent in a future series that adds the last features required
for SVM (splitting the series to make reviews less painful)
> 5) A design description in the cover-letter would be helpful.
Ok, I will elaborate
>
> On 2024/7/2 13:52, CLEMENT MATHIEU--DRIF wrote:
>> From: Clement Mathieu--Drif 
>>
>> This series belongs to a list of series that add SVM support for VT-d.
>>
>> As a starting point, we use the series called 'intel_iommu: Enable
>> stage-1 translation' (rfc2) by Zhenzhong Duan and Yi Liu.
>>
>> Here we focus on the implementation of ATS support in the IOMMU and
>> on a PCI-level
>> API for ATS to be used by virtual devices.
>>
>> This work is based on the VT-d specification version 4.1 (March 2023).
>> Here is a link to a GitHub repository where you can find the
>> following elements :
>>  - Qemu with all the patches for SVM
>>  - ATS
>>  - PRI
>>  - Device IOTLB invalidations
>>  - Requests with already translated addresses
>>  - A demo device
>>  - A simple driver for the demo device
>>  - A userspace program (for testing and demonstration purposes)
>>
>> https://github.com/BullSequana/Qemu-in-guest-SVM-demo
>>
>>
>> v2
>>  - handle huge pages better by detecting the page table level at
>> which the translation errors occur
>>  - Changes after review by ZhenZhong Duan :
>>   - Set the access bit after checking permissions
>>   - helper for PASID and ATS : make the commit message more
>> accurate ('present' replaced with 'enabled')
>>   - pcie_pasid_init: add PCI_PASID_CAP_WIDTH_SHIFT and use it
>> instead of PCI_EXT_CAP_PASID_SIZEOF for shifting the pasid width when
>> preparing the capability register
>>   - pci: do not check pci_bus_bypass_iommu after calling
>> pci_device_get_iommu_bus_devfn
>>   - do not alter formatting of IOMMUTLBEntry declaration
>>   - vtd_iova_fl_check_canonical : directly use s->aw_bits instead
>> of aw for the sake of clarity
>>
>> v3
>>  - rebase on new version of Zhenzhong's flts implementation
>>  - fix the atc lookup operation (check the mask before returning
>> an entry)
>>  - add a unit test for the ATC
>>  - store a user pointer in the iommu notifiers to simplify the
>> implementation of svm devices
>>  Changes after review by Zhenzhong :
>>   - store the input pasid instead of rid2pasid when returning an
>> entry after a translation
>>   - split the ATC implementation and its unit tests
>>
>> v4
>>  Changes after internal review
>>   - Fix the nowrite optimization, an ATS translation without the
>> nowrite flag should not fail when the write permission is not set
>
> It's strange to list internal review here.
>
>> v5
>>  Changes after review by Philippe :
>>   - change the type of 'level' to unsigned in vtd_lookup_iotlb
>
> list change log from latest to the earliest would be nice too. Look
> forward
> to your next version. :)
>
> Regards,
> Yi Liu
>
>> Clément Mathieu--Drif (22):
>>intel_iommu: fix FRCD construction macro.
>>intel_iommu: make types match
>>intel_iommu: return page walk level even when the

Re: [PATCH v2 16/17] intel_iommu: Introduce a property to control FS1GP cap bit setting

2024-08-05 Thread CLEMENT MATHIEU--DRIF
Reviewed-by: Clément Mathieu--Drif



On 05/08/2024 08:27, Zhenzhong Duan wrote:
> Caution: External email. Do not open attachments or click links, unless this 
> email comes from a known sender and you know the content is safe.
>
>
> When host IOMMU doesn't support FS1GP but vIOMMU does, host IOMMU
> can't translate stage-1 page table from guest correctly.
>
> Add a property x-cap-fs1gp for user to turn FS1GP off so that
> nested page table on host side works.
>
> This property has no effect when vIOMMU isn't in scalable modern
> mode.
>
> Signed-off-by: Zhenzhong Duan 
> ---
>   include/hw/i386/intel_iommu.h | 1 +
>   hw/i386/intel_iommu.c | 5 -
>   2 files changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 650641544c..f6d9b41b80 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -308,6 +308,7 @@ struct IntelIOMMUState {
>   bool dma_drain; /* Whether DMA r/w draining enabled */
>   bool dma_translation;   /* Whether DMA translation supported */
>   bool pasid; /* Whether to support PASID */
> +bool fs1gp; /* First Stage 1-GByte Page Support */
>
>   /*
>* Protects IOMMU states in general.  Currently it protects the
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 9e973bd710..d7e7354db4 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -3778,6 +3778,7 @@ static Property vtd_properties[] = {
>   DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
>   DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
>   DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, 
> true),
> +DEFINE_PROP_BOOL("x-cap-fs1gp", IntelIOMMUState, fs1gp, true),
>   DEFINE_PROP_END_OF_LIST(),
>   };
>
> @@ -4506,7 +4507,9 @@ static void vtd_cap_init(IntelIOMMUState *s)
>   /* TODO: read cap/ecap from host to decide which cap to be exposed. */
>   if (s->scalable_modern) {
>   s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_FLTS;
> -s->cap |= VTD_CAP_FS1GP;
> +if (s->fs1gp) {
> +s->cap |= VTD_CAP_FS1GP;
> +}
>   } else if (s->scalable_mode) {
>   s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
>   }
> --
> 2.34.1
>


Re: [PATCH v2 15/17] intel_iommu: Modify x-scalable-mode to be string option to expose scalable modern mode

2024-08-05 Thread CLEMENT MATHIEU--DRIF


On 05/08/2024 08:27, Zhenzhong Duan wrote:

Caution: External email. Do not open attachments or click links, unless this 
email comes from a known sender and you know the content is safe.


From: Yi Liu 

Intel VT-d 3.0 introduces scalable mode, and it has a bunch of capabilities
related to scalable mode translation, thus there are multiple combinations.
While this vIOMMU implementation wants to simplify it for user by providing
typical combinations. User could config it by "x-scalable-mode" option. The
usage is as below:

"-device intel-iommu,x-scalable-mode=["legacy"|"modern"|"off"]"

 - "legacy": gives support for stage-2 page table
 - "modern": gives support for stage-1 page table
 - "off": no scalable mode support
 - any other string, will throw error

If x-scalable-mode is not configured, it is equivalent to x-scalable-mode=off.

With scalable modern mode exposed to user, also accurate the pasid entry
check in vtd_pe_type_check().

Signed-off-by: Yi Liu 
Signed-off-by: Yi Sun 

Signed-off-by: Zhenzhong Duan 

---
 hw/i386/intel_iommu_internal.h |  2 ++
 include/hw/i386/intel_iommu.h  |  1 +
 hw/i386/intel_iommu.c  | 46 ++
 3 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 52bdbf3bc5..af99deb4cd 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -195,6 +195,7 @@
 #define VTD_ECAP_PASID  (1ULL << 40)
 #define VTD_ECAP_SMTS   (1ULL << 43)
 #define VTD_ECAP_SLTS   (1ULL << 46)
+#define VTD_ECAP_FLTS   (1ULL << 47)

 /* CAP_REG */
 /* (offset >> 4) << 24 */
@@ -211,6 +212,7 @@
 #define VTD_CAP_SLLPS   ((1ULL << 34) | (1ULL << 35))
 #define VTD_CAP_DRAIN_WRITE (1ULL << 54)
 #define VTD_CAP_DRAIN_READ  (1ULL << 55)
+#define VTD_CAP_FS1GP   (1ULL << 56)
 #define VTD_CAP_DRAIN   (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE)
 #define VTD_CAP_CM  (1ULL << 7)
 #define VTD_PASID_ID_SHIFT  20
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 48134bda11..650641544c 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -263,6 +263,7 @@ struct IntelIOMMUState {

 bool caching_mode;  /* RO - is cap CM enabled? */
 bool scalable_mode; /* RO - is Scalable Mode supported? */
+char *scalable_mode_str;/* RO - admin's Scalable Mode config */
 bool scalable_modern;   /* RO - is modern SM supported? */
 bool snoop_control; /* RO - is SNP filed supported? */

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 5469ab4f9b..9e973bd710 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -803,16 +803,18 @@ static inline bool 
vtd_is_fl_level_supported(IntelIOMMUState *s, uint32_t level)
 }

 /* Return true if check passed, otherwise false */
-static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu,
- VTDPASIDEntry *pe)
+static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
 {
 switch (VTD_PE_GET_TYPE(pe)) {
-case VTD_SM_PASID_ENTRY_SLT:
-return true;
-case VTD_SM_PASID_ENTRY_PT:
-return x86_iommu->pt_supported;
 case VTD_SM_PASID_ENTRY_FLT:
+return !!(s->ecap & VTD_ECAP_FLTS);
+case VTD_SM_PASID_ENTRY_SLT:
+return !!(s->ecap & VTD_ECAP_SLTS) || !(s->ecap & VTD_ECAP_SMTS);

Can '!(s->ecap & VTD_ECAP_SMTS)' be evaluated to true in this function event 
though we have found a pasid entry?


 case VTD_SM_PASID_ENTRY_NESTED:
+/* Not support NESTED page table type yet */
+return false;
+case VTD_SM_PASID_ENTRY_PT:
+return !!(s->ecap & VTD_ECAP_PT);
 default:
 /* Unknown type */
 return false;
@@ -861,7 +863,6 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState 
*s,
 uint8_t pgtt;
 uint32_t index;
 dma_addr_t entry_size;
-X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);

 index = VTD_PASID_TABLE_INDEX(pasid);
 entry_size = VTD_PASID_ENTRY_SIZE;
@@ -875,7 +876,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState 
*s,
 }

 /* Do translation type check */
-if (!vtd_pe_type_check(x86_iommu, pe)) {
+if (!vtd_pe_type_check(s, pe)) {
 return -VTD_FR_PASID_TABLE_ENTRY_INV;
 }

@@ -3772,7 +3773,7 @@ static Property vtd_properties[] = {
 DEFINE_PROP_UINT8("aw-bits", IntelIOMMUState, aw_bits,
   VTD_HOST_AW_AUTO),
 DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
-DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
+DEFINE_PROP_STRING("x-scalable-mode", IntelIOMMUState, scalable_mode_st

Re: [PATCH v2 14/17] intel_iommu: Set default aw_bits to 48 in scalable modren mode

2024-08-05 Thread CLEMENT MATHIEU--DRIF
Typo in the title : s/modren/modern

Reviewed-by: Clément Mathieu--Drif


On 05/08/2024 08:27, Zhenzhong Duan wrote:
> Caution: External email. Do not open attachments or click links, unless this 
> email comes from a known sender and you know the content is safe.
>
>
> According to VTD spec, stage-1 page table could support 4-level and
> 5-level paging.
>
> However, 5-level paging translation emulation is unsupported yet.
> That means the only supported value for aw_bits is 48.
>
> So default aw_bits to 48 in scalable modern mode. In other cases,
> it is still default to 39 for compatibility.
>
> Add a check to ensure user specified value is 48 in modern mode
> for now.
>
> Signed-off-by: Zhenzhong Duan 
> Reviewed-by: Clément Mathieu--Drif
> ---
>   include/hw/i386/intel_iommu.h |  2 +-
>   hw/i386/intel_iommu.c | 16 +++-
>   2 files changed, 16 insertions(+), 2 deletions(-)
>
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index b843d069cc..48134bda11 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -45,7 +45,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(IntelIOMMUState, 
> INTEL_IOMMU_DEVICE)
>   #define DMAR_REG_SIZE   0x230
>   #define VTD_HOST_AW_39BIT   39
>   #define VTD_HOST_AW_48BIT   48
> -#define VTD_HOST_ADDRESS_WIDTH  VTD_HOST_AW_39BIT
> +#define VTD_HOST_AW_AUTO0xff
>   #define VTD_HAW_MASK(aw)((1ULL << (aw)) - 1)
>
>   #define DMAR_REPORT_F_INTR  (1)
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 317e630e08..5469ab4f9b 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -3770,7 +3770,7 @@ static Property vtd_properties[] = {
>   ON_OFF_AUTO_AUTO),
>   DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false),
>   DEFINE_PROP_UINT8("aw-bits", IntelIOMMUState, aw_bits,
> -  VTD_HOST_ADDRESS_WIDTH),
> +  VTD_HOST_AW_AUTO),
>   DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
>   DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, 
> FALSE),
>   DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, 
> false),
> @@ -4685,6 +4685,14 @@ static bool vtd_decide_config(IntelIOMMUState *s, 
> Error **errp)
>   }
>   }
>
> +if (s->aw_bits == VTD_HOST_AW_AUTO) {
> +if (s->scalable_modern) {
> +s->aw_bits = VTD_HOST_AW_48BIT;
> +} else {
> +s->aw_bits = VTD_HOST_AW_39BIT;
> +}
> +}
> +
>   if ((s->aw_bits != VTD_HOST_AW_39BIT) &&
>   (s->aw_bits != VTD_HOST_AW_48BIT) &&
>   !s->scalable_modern) {
> @@ -4693,6 +4701,12 @@ static bool vtd_decide_config(IntelIOMMUState *s, 
> Error **errp)
>   return false;
>   }
>
> +if ((s->aw_bits != VTD_HOST_AW_48BIT) && s->scalable_modern) {
> +error_setg(errp, "Supported values for aw-bits are: %d",
> +   VTD_HOST_AW_48BIT);
> +return false;
> +}
> +
>   if (s->scalable_mode && !s->dma_drain) {
>   error_setg(errp, "Need to set dma_drain for scalable mode");
>   return false;
> --
> 2.34.1
>


Re: [PATCH v2 03/17] intel_iommu: Add a placeholder variable for scalable modern mode

2024-08-05 Thread CLEMENT MATHIEU--DRIF


On 05/08/2024 08:27, Zhenzhong Duan wrote:
> Caution: External email. Do not open attachments or click links, unless this 
> email comes from a known sender and you know the content is safe.
>
>
> Add an new element scalable_mode in IntelIOMMUState to mark scalable
> modern mode, this element will be exposed as an intel_iommu property
> finally.
>
> For now, it's only a placehholder and used for address width
> compatibility check and block host device passthrough until nesting
> is supported.
>
> Signed-off-by: Yi Liu 
> Signed-off-by: Zhenzhong Duan 
> ---
>   include/hw/i386/intel_iommu.h |  1 +
>   hw/i386/intel_iommu.c | 12 +---
>   2 files changed, 10 insertions(+), 3 deletions(-)
>
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 1eb05c29fc..788ed42477 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -262,6 +262,7 @@ struct IntelIOMMUState {
>
>   bool caching_mode;  /* RO - is cap CM enabled? */
>   bool scalable_mode; /* RO - is Scalable Mode supported? */
> +bool scalable_modern;   /* RO - is modern SM supported? */
>   bool snoop_control; /* RO - is SNP filed supported? */
>
>   dma_addr_t root;/* Current root table pointer */
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index e3465fc27d..c1382a5651 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -3872,7 +3872,13 @@ static bool vtd_check_hiod(IntelIOMMUState *s, 
> HostIOMMUDevice *hiod,
>   return false;
>   }
>
> -return true;
> +if (!s->scalable_modern) {
> +/* All checks requested by VTD non-modern mode pass */
> +return true;
> +}
> +
> +error_setg(errp, "host device is unsupported in scalable modern mode 
> yet");
> +return false;
>   }
>
>   static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
> @@ -4262,9 +4268,9 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error 
> **errp)
>   }
>   }
>
> -/* Currently only address widths supported are 39 and 48 bits */
>   if ((s->aw_bits != VTD_HOST_AW_39BIT) &&
> -(s->aw_bits != VTD_HOST_AW_48BIT)) {
> +(s->aw_bits != VTD_HOST_AW_48BIT) &&
> +!s->scalable_modern) {
Why does scalable_modern allow to use a value other than 39 or 48?
Is it safe?
>   error_setg(errp, "Supported values for aw-bits are: %d, %d",
>  VTD_HOST_AW_39BIT, VTD_HOST_AW_48BIT);
>   return false;
> --
> 2.34.1
>


Re: [PATCH v2 04/17] intel_iommu: Flush stage-2 cache in PASID-selective PASID-based iotlb invalidation

2024-08-05 Thread CLEMENT MATHIEU--DRIF


On 05/08/2024 08:27, Zhenzhong Duan wrote:
> Caution: External email. Do not open attachments or click links, unless this 
> email comes from a known sender and you know the content is safe.
>
>
> Per spec 6.5.2.4, PADID-selective PASID-based iotlb invalidation will
> flush stage-2 iotlb entries with matching domain id and pasid.
>
> With scalable modern mode introduced, guest could send PASID-selective
> PASID-based iotlb invalidation to flush both stage-1 and stage-2 entries.
>
> By this chance, remove old IOTLB related definition.
>
> Signed-off-by: Zhenzhong Duan 
> ---
>   hw/i386/intel_iommu_internal.h | 14 +++---
>   hw/i386/intel_iommu.c  | 81 ++
>   2 files changed, 90 insertions(+), 5 deletions(-)
>
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 8fa27c7f3b..19e4ed52ca 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -402,11 +402,6 @@ typedef union VTDInvDesc VTDInvDesc;
>   #define VTD_INV_DESC_IOTLB_AM(val)  ((val) & 0x3fULL)
>   #define VTD_INV_DESC_IOTLB_RSVD_LO  0xff00ULL
>   #define VTD_INV_DESC_IOTLB_RSVD_HI  0xf80ULL
> -#define VTD_INV_DESC_IOTLB_PASID_PASID  (2ULL << 4)
> -#define VTD_INV_DESC_IOTLB_PASID_PAGE   (3ULL << 4)
> -#define VTD_INV_DESC_IOTLB_PASID(val)   (((val) >> 32) & VTD_PASID_ID_MASK)
> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_LO  0xfff001c0ULL
> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI  0xf80ULL
>
>   /* Mask for Device IOTLB Invalidate Descriptor */
>   #define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 0xf000ULL)
> @@ -438,6 +433,15 @@ typedef union VTDInvDesc VTDInvDesc;
>   (0x3800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM | VTD_SL_TM)) 
> : \
>   (0x3800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
>
> +/* Masks for PIOTLB Invalidate Descriptor */
> +#define VTD_INV_DESC_PIOTLB_G (3ULL << 4)
> +#define VTD_INV_DESC_PIOTLB_ALL_IN_PASID  (2ULL << 4)
> +#define VTD_INV_DESC_PIOTLB_PSI_IN_PASID  (3ULL << 4)
> +#define VTD_INV_DESC_PIOTLB_DID(val)  (((val) >> 16) & 
> VTD_DOMAIN_ID_MASK)
> +#define VTD_INV_DESC_PIOTLB_PASID(val)(((val) >> 32) & 0xfULL)
> +#define VTD_INV_DESC_PIOTLB_RSVD_VAL0 0xfff0f1c0ULL
Why did this value change since last post? The 'type' field should 
always be zero in this desc
> +#define VTD_INV_DESC_PIOTLB_RSVD_VAL1 0xf80ULL
> +
>   /* Information about page-selective IOTLB invalidate */
>   struct VTDIOTLBPageInvInfo {
>   uint16_t domain_id;
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index c1382a5651..df591419b7 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -2656,6 +2656,83 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, 
> VTDInvDesc *inv_desc)
>   return true;
>   }
>
> +static gboolean vtd_hash_remove_by_pasid(gpointer key, gpointer value,
> + gpointer user_data)
> +{
> +VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
> +VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
> +
> +return ((entry->domain_id == info->domain_id) &&
> +(entry->pasid == info->pasid));
> +}
> +
> +static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
> +uint16_t domain_id, uint32_t pasid)
> +{
> +VTDIOTLBPageInvInfo info;
> +VTDAddressSpace *vtd_as;
> +VTDContextEntry ce;
> +
> +info.domain_id = domain_id;
> +info.pasid = pasid;
> +
> +vtd_iommu_lock(s);
> +g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_pasid,
> +&info);
> +vtd_iommu_unlock(s);
> +
> +QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
> +if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
> +  vtd_as->devfn, &ce) &&
> +domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
> +uint32_t rid2pasid = VTD_CE_GET_RID2PASID(&ce);
> +
> +if ((vtd_as->pasid != PCI_NO_PASID || pasid != rid2pasid) &&
> +vtd_as->pasid != pasid) {
> +continue;
> +}
> +
> +if (!s->scalable_modern) {
> +vtd_address_space_sync(vtd_as);
> +}
> +}
> +}
> +}
> +
> +static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
> +VTDInvDesc *inv_desc)
> +{
> +uint16_t domain_id;
> +uint32_t pasid;
> +
> +if ((inv_desc->val[0] & VTD_INV_DESC_PIOTLB_RSVD_VAL0) ||
> +(inv_desc->val[1] & VTD_INV_DESC_PIOTLB_RSVD_VAL1)) {
> +error_report_once("%s: invalid piotlb inv desc hi=0x%"PRIx64
> +  " lo=0x%"PRIx64" (reserved bits unzero)",
> +  __func__, inv_desc->val[1], inv_desc->val[0]);
> +return false;
> +}
> +
> +domain_id = VTD_INV_DESC_PIOTLB_DID

Re: [PATCH v2 04/17] intel_iommu: Flush stage-2 cache in PASID-selective PASID-based iotlb invalidation

2024-08-08 Thread CLEMENT MATHIEU--DRIF


On 08/08/2024 14:40, Duan, Zhenzhong wrote:
> Caution: External email. Do not open attachments or click links, 
> unless this email comes from a known sender and you know the content 
> is safe.
>
>
> On 8/6/2024 2:35 PM, CLEMENT MATHIEU--DRIF wrote:
>>
>> On 05/08/2024 08:27, Zhenzhong Duan wrote:
>>> Caution: External email. Do not open attachments or click links, 
>>> unless this email comes from a known sender and you know the content 
>>> is safe.
>>>
>>>
>>> Per spec 6.5.2.4, PADID-selective PASID-based iotlb invalidation will
>>> flush stage-2 iotlb entries with matching domain id and pasid.
>>>
>>> With scalable modern mode introduced, guest could send PASID-selective
>>> PASID-based iotlb invalidation to flush both stage-1 and stage-2 
>>> entries.
>>>
>>> By this chance, remove old IOTLB related definition.
>>>
>>> Signed-off-by: Zhenzhong Duan 
>>> ---
>>>    hw/i386/intel_iommu_internal.h | 14 +++---
>>>    hw/i386/intel_iommu.c  | 81 
>>> ++
>>>    2 files changed, 90 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/hw/i386/intel_iommu_internal.h 
>>> b/hw/i386/intel_iommu_internal.h
>>> index 8fa27c7f3b..19e4ed52ca 100644
>>> --- a/hw/i386/intel_iommu_internal.h
>>> +++ b/hw/i386/intel_iommu_internal.h
>>> @@ -402,11 +402,6 @@ typedef union VTDInvDesc VTDInvDesc;
>>>    #define VTD_INV_DESC_IOTLB_AM(val)  ((val) & 0x3fULL)
>>>    #define VTD_INV_DESC_IOTLB_RSVD_LO 0xff00ULL
>>>    #define VTD_INV_DESC_IOTLB_RSVD_HI  0xf80ULL
>>> -#define VTD_INV_DESC_IOTLB_PASID_PASID  (2ULL << 4)
>>> -#define VTD_INV_DESC_IOTLB_PASID_PAGE   (3ULL << 4)
>>> -#define VTD_INV_DESC_IOTLB_PASID(val)   (((val) >> 32) & 
>>> VTD_PASID_ID_MASK)
>>> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_LO 0xfff001c0ULL
>>> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI  0xf80ULL
>>>
>>>    /* Mask for Device IOTLB Invalidate Descriptor */
>>>    #define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 
>>> 0xf000ULL)
>>> @@ -438,6 +433,15 @@ typedef union VTDInvDesc VTDInvDesc;
>>>    (0x3800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM | 
>>> VTD_SL_TM)) : \
>>>    (0x3800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
>>>
>>> +/* Masks for PIOTLB Invalidate Descriptor */
>>> +#define VTD_INV_DESC_PIOTLB_G (3ULL << 4)
>>> +#define VTD_INV_DESC_PIOTLB_ALL_IN_PASID  (2ULL << 4)
>>> +#define VTD_INV_DESC_PIOTLB_PSI_IN_PASID  (3ULL << 4)
>>> +#define VTD_INV_DESC_PIOTLB_DID(val)  (((val) >> 16) & 
>>> VTD_DOMAIN_ID_MASK)
>>> +#define VTD_INV_DESC_PIOTLB_PASID(val)    (((val) >> 32) & 0xfULL)
>>> +#define VTD_INV_DESC_PIOTLB_RSVD_VAL0 0xfff0f1c0ULL
>> Why did this value change since last post? The 'type' field should
>> always be zero in this desc
>
> Yes, type[6:4] are all zero for all existing invalidation type. But they
> are not real reserved bits.
>
> So I removed them from VTD_INV_DESC_PIOTLB_RSVD_VAL0.
Other masks consider these zeroes as reserved.
I think we should do the same.
For instance, context cache invalidation is : #define 
VTD_INV_DESC_CC_RSVD 0xfffcffc0ULL
>
> Thanks
>
> Zhenzhong
>


Re: [PATCH v2 03/17] intel_iommu: Add a placeholder variable for scalable modern mode

2024-08-08 Thread CLEMENT MATHIEU--DRIF


On 08/08/2024 14:31, Duan, Zhenzhong wrote:
> Caution: External email. Do not open attachments or click links, 
> unless this email comes from a known sender and you know the content 
> is safe.
>
>
> On 8/6/2024 2:35 PM, CLEMENT MATHIEU--DRIF wrote:
>>
>> On 05/08/2024 08:27, Zhenzhong Duan wrote:
>>> Caution: External email. Do not open attachments or click links, 
>>> unless this email comes from a known sender and you know the content 
>>> is safe.
>>>
>>>
>>> Add an new element scalable_mode in IntelIOMMUState to mark scalable
>>> modern mode, this element will be exposed as an intel_iommu property
>>> finally.
>>>
>>> For now, it's only a placehholder and used for address width
>>> compatibility check and block host device passthrough until nesting
>>> is supported.
>>>
>>> Signed-off-by: Yi Liu 
>>> Signed-off-by: Zhenzhong Duan 
>>> ---
>>>    include/hw/i386/intel_iommu.h |  1 +
>>>    hw/i386/intel_iommu.c | 12 +---
>>>    2 files changed, 10 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/include/hw/i386/intel_iommu.h 
>>> b/include/hw/i386/intel_iommu.h
>>> index 1eb05c29fc..788ed42477 100644
>>> --- a/include/hw/i386/intel_iommu.h
>>> +++ b/include/hw/i386/intel_iommu.h
>>> @@ -262,6 +262,7 @@ struct IntelIOMMUState {
>>>
>>>    bool caching_mode;  /* RO - is cap CM enabled? */
>>>    bool scalable_mode; /* RO - is Scalable Mode 
>>> supported? */
>>> +    bool scalable_modern;   /* RO - is modern SM supported? */
>>>    bool snoop_control; /* RO - is SNP filed 
>>> supported? */
>>>
>>>    dma_addr_t root;    /* Current root table pointer */
>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>> index e3465fc27d..c1382a5651 100644
>>> --- a/hw/i386/intel_iommu.c
>>> +++ b/hw/i386/intel_iommu.c
>>> @@ -3872,7 +3872,13 @@ static bool vtd_check_hiod(IntelIOMMUState 
>>> *s, HostIOMMUDevice *hiod,
>>>    return false;
>>>    }
>>>
>>> -    return true;
>>> +    if (!s->scalable_modern) {
>>> +    /* All checks requested by VTD non-modern mode pass */
>>> +    return true;
>>> +    }
>>> +
>>> +    error_setg(errp, "host device is unsupported in scalable modern 
>>> mode yet");
>>> +    return false;
>>>    }
>>>
>>>    static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, 
>>> int devfn,
>>> @@ -4262,9 +4268,9 @@ static bool vtd_decide_config(IntelIOMMUState 
>>> *s, Error **errp)
>>>    }
>>>    }
>>>
>>> -    /* Currently only address widths supported are 39 and 48 bits */
>>>    if ((s->aw_bits != VTD_HOST_AW_39BIT) &&
>>> -    (s->aw_bits != VTD_HOST_AW_48BIT)) {
>>> +    (s->aw_bits != VTD_HOST_AW_48BIT) &&
>>> +    !s->scalable_modern) {
>> Why does scalable_modern allow to use a value other than 39 or 48?
>> Is it safe?
>
> The check for scalable_modern is in patch14:
>
> if ((s->aw_bits != VTD_HOST_AW_48BIT) && s->scalable_modern) {
>
> error_setg(errp, "Supported values for aw-bits are: %d", 
> VTD_HOST_AW_48BIT);
>
> return false;
>
> }
>
> Let me know if you prefer to move it in this patch.
Yes, you are right, it would be better to move the check here.

But I think the first check should also fail even when scalable_modern 
is enabled because values other than 39 and 48 are not supported at all, 
whatever the mode.
Then, we should check if the value is valid for scalable_modern mode.

Thanks
 >cmd
>
> Thanks
>
> Zhenzhong
>


Re: [PATCH v2 03/17] intel_iommu: Add a placeholder variable for scalable modern mode

2024-08-12 Thread CLEMENT MATHIEU--DRIF


On 13/08/2024 04:20, Duan, Zhenzhong wrote:
> Caution: External email. Do not open attachments or click links, unless this 
> email comes from a known sender and you know the content is safe.
>
>
>> -Original Message-
>> From: CLEMENT MATHIEU--DRIF 
>> Subject: Re: [PATCH v2 03/17] intel_iommu: Add a placeholder variable for
>> scalable modern mode
>>
>>
>>
>> On 08/08/2024 14:31, Duan, Zhenzhong wrote:
>>> Caution: External email. Do not open attachments or click links,
>>> unless this email comes from a known sender and you know the content
>>> is safe.
>>>
>>>
>>> On 8/6/2024 2:35 PM, CLEMENT MATHIEU--DRIF wrote:
>>>> On 05/08/2024 08:27, Zhenzhong Duan wrote:
>>>>> Caution: External email. Do not open attachments or click links,
>>>>> unless this email comes from a known sender and you know the content
>>>>> is safe.
>>>>>
>>>>>
>>>>> Add an new element scalable_mode in IntelIOMMUState to mark
>> scalable
>>>>> modern mode, this element will be exposed as an intel_iommu property
>>>>> finally.
>>>>>
>>>>> For now, it's only a placehholder and used for address width
>>>>> compatibility check and block host device passthrough until nesting
>>>>> is supported.
>>>>>
>>>>> Signed-off-by: Yi Liu 
>>>>> Signed-off-by: Zhenzhong Duan 
>>>>> ---
>>>>> include/hw/i386/intel_iommu.h |  1 +
>>>>> hw/i386/intel_iommu.c | 12 +---
>>>>> 2 files changed, 10 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/include/hw/i386/intel_iommu.h
>>>>> b/include/hw/i386/intel_iommu.h
>>>>> index 1eb05c29fc..788ed42477 100644
>>>>> --- a/include/hw/i386/intel_iommu.h
>>>>> +++ b/include/hw/i386/intel_iommu.h
>>>>> @@ -262,6 +262,7 @@ struct IntelIOMMUState {
>>>>>
>>>>> bool caching_mode;  /* RO - is cap CM enabled? */
>>>>> bool scalable_mode; /* RO - is Scalable Mode
>>>>> supported? */
>>>>> +bool scalable_modern;   /* RO - is modern SM supported? */
>>>>> bool snoop_control; /* RO - is SNP filed
>>>>> supported? */
>>>>>
>>>>> dma_addr_t root;/* Current root table pointer */
>>>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>>>> index e3465fc27d..c1382a5651 100644
>>>>> --- a/hw/i386/intel_iommu.c
>>>>> +++ b/hw/i386/intel_iommu.c
>>>>> @@ -3872,7 +3872,13 @@ static bool
>> vtd_check_hiod(IntelIOMMUState
>>>>> *s, HostIOMMUDevice *hiod,
>>>>> return false;
>>>>> }
>>>>>
>>>>> -return true;
>>>>> +if (!s->scalable_modern) {
>>>>> +/* All checks requested by VTD non-modern mode pass */
>>>>> +return true;
>>>>> +}
>>>>> +
>>>>> +error_setg(errp, "host device is unsupported in scalable modern
>>>>> mode yet");
>>>>> +return false;
>>>>> }
>>>>>
>>>>> static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque,
>>>>> int devfn,
>>>>> @@ -4262,9 +4268,9 @@ static bool
>> vtd_decide_config(IntelIOMMUState
>>>>> *s, Error **errp)
>>>>> }
>>>>> }
>>>>>
>>>>> -/* Currently only address widths supported are 39 and 48 bits */
>>>>> if ((s->aw_bits != VTD_HOST_AW_39BIT) &&
>>>>> -(s->aw_bits != VTD_HOST_AW_48BIT)) {
>>>>> +(s->aw_bits != VTD_HOST_AW_48BIT) &&
>>>>> +!s->scalable_modern) {
>>>> Why does scalable_modern allow to use a value other than 39 or 48?
>>>> Is it safe?
>>> The check for scalable_modern is in patch14:
>>>
>>> if ((s->aw_bits != VTD_HOST_AW_48BIT) && s->scalable_modern) {
>>>
>>> error_setg(errp, "Supported values for aw-bits are: %d",
>>> VTD_HOST_AW_48BIT);
>>>
>>> return false;
>>>
>>> }
>>>
>>> Let me know if you prefer to move it in this patch.
>> Yes, you are right, it would be better to move the check here.
>>
>> But I think the first check should also fail even when scalable_modern
>> is enabled because values other than 39 and 48 are not supported at all,
>> whatever the mode.
>> Then, we should check if the value is valid for scalable_modern mode.
> Right, I wrote that way with a possible plan to support VTD_HOST_AW_52BIT.
52 or 57?
> What about this:
>
This condition traps (non-scalable) legacy mode as well. I think we 
should change the error message to make it clear
Something like this: "Legacy and non-modern scalable modes: supported 
values for aw-bit are ..."
Or we could make the error message conditional.
>  if ((s->aw_bits != VTD_HOST_AW_39BIT) &&
>  (s->aw_bits != VTD_HOST_AW_48BIT) &&
>  !s->scalable_modern) {
>  error_setg(errp, "Scalable legacy mode: supported values for aw-bits 
> are: %d, %d",
> VTD_HOST_AW_39BIT, VTD_HOST_AW_48BIT);
>  return false;
>  }
>
>  if ((s->aw_bits != VTD_HOST_AW_48BIT) && s->scalable_modern) {
>  error_setg(errp, "Scalable modern mode: supported values for aw-bits 
> is: %d",
> VTD_HOST_AW_48BIT);
>  return false;
>  }


>
> Thanks
> Zhenzhong


Re: [PATCH v2 03/17] intel_iommu: Add a placeholder variable for scalable modern mode

2024-08-12 Thread CLEMENT MATHIEU--DRIF


On 13/08/2024 08:26, Duan, Zhenzhong wrote:
> Caution: External email. Do not open attachments or click links, unless this 
> email comes from a known sender and you know the content is safe.
>
>
>> -Original Message-
>> From: CLEMENT MATHIEU--DRIF 
>> Subject: Re: [PATCH v2 03/17] intel_iommu: Add a placeholder variable for
>> scalable modern mode
>>
>>
>>
>> On 13/08/2024 04:20, Duan, Zhenzhong wrote:
>>> Caution: External email. Do not open attachments or click links, unless this
>> email comes from a known sender and you know the content is safe.
>>>
>>>> -Original Message-
>>>> From: CLEMENT MATHIEU--DRIF 
>>>> Subject: Re: [PATCH v2 03/17] intel_iommu: Add a placeholder variable
>> for
>>>> scalable modern mode
>>>>
>>>>
>>>>
>>>> On 08/08/2024 14:31, Duan, Zhenzhong wrote:
>>>>> Caution: External email. Do not open attachments or click links,
>>>>> unless this email comes from a known sender and you know the content
>>>>> is safe.
>>>>>
>>>>>
>>>>> On 8/6/2024 2:35 PM, CLEMENT MATHIEU--DRIF wrote:
>>>>>> On 05/08/2024 08:27, Zhenzhong Duan wrote:
>>>>>>> Caution: External email. Do not open attachments or click links,
>>>>>>> unless this email comes from a known sender and you know the
>> content
>>>>>>> is safe.
>>>>>>>
>>>>>>>
>>>>>>> Add an new element scalable_mode in IntelIOMMUState to mark
>>>> scalable
>>>>>>> modern mode, this element will be exposed as an intel_iommu
>> property
>>>>>>> finally.
>>>>>>>
>>>>>>> For now, it's only a placehholder and used for address width
>>>>>>> compatibility check and block host device passthrough until nesting
>>>>>>> is supported.
>>>>>>>
>>>>>>> Signed-off-by: Yi Liu 
>>>>>>> Signed-off-by: Zhenzhong Duan 
>>>>>>> ---
>>>>>>>  include/hw/i386/intel_iommu.h |  1 +
>>>>>>>  hw/i386/intel_iommu.c | 12 +---
>>>>>>>  2 files changed, 10 insertions(+), 3 deletions(-)
>>>>>>>
>>>>>>> diff --git a/include/hw/i386/intel_iommu.h
>>>>>>> b/include/hw/i386/intel_iommu.h
>>>>>>> index 1eb05c29fc..788ed42477 100644
>>>>>>> --- a/include/hw/i386/intel_iommu.h
>>>>>>> +++ b/include/hw/i386/intel_iommu.h
>>>>>>> @@ -262,6 +262,7 @@ struct IntelIOMMUState {
>>>>>>>
>>>>>>>  bool caching_mode;  /* RO - is cap CM enabled? */
>>>>>>>  bool scalable_mode; /* RO - is Scalable Mode
>>>>>>> supported? */
>>>>>>> +bool scalable_modern;   /* RO - is modern SM supported? */
>>>>>>>  bool snoop_control; /* RO - is SNP filed
>>>>>>> supported? */
>>>>>>>
>>>>>>>  dma_addr_t root;/* Current root table pointer 
>>>>>>> */
>>>>>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>>>>>> index e3465fc27d..c1382a5651 100644
>>>>>>> --- a/hw/i386/intel_iommu.c
>>>>>>> +++ b/hw/i386/intel_iommu.c
>>>>>>> @@ -3872,7 +3872,13 @@ static bool
>>>> vtd_check_hiod(IntelIOMMUState
>>>>>>> *s, HostIOMMUDevice *hiod,
>>>>>>>  return false;
>>>>>>>  }
>>>>>>>
>>>>>>> -return true;
>>>>>>> +if (!s->scalable_modern) {
>>>>>>> +/* All checks requested by VTD non-modern mode pass */
>>>>>>> +return true;
>>>>>>> +}
>>>>>>> +
>>>>>>> +error_setg(errp, "host device is unsupported in scalable modern
>>>>>>> mode yet");
>>>>>>> +return false;
>>>>>>>  }
>>>>>>>
>>>>>>>  static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque,
>>>>>>> int devfn,
>&g

Re: [PATCH v2 04/17] intel_iommu: Flush stage-2 cache in PASID-selective PASID-based iotlb invalidation

2024-08-13 Thread CLEMENT MATHIEU--DRIF


On 13/08/2024 04:12, Duan, Zhenzhong wrote:
> Caution: External email. Do not open attachments or click links, unless this 
> email comes from a known sender and you know the content is safe.
>
>
>> -Original Message-
>> From: CLEMENT MATHIEU--DRIF 
>> Subject: Re: [PATCH v2 04/17] intel_iommu: Flush stage-2 cache in PASID-
>> selective PASID-based iotlb invalidation
>>
>>
>>
>> On 08/08/2024 14:40, Duan, Zhenzhong wrote:
>>> Caution: External email. Do not open attachments or click links,
>>> unless this email comes from a known sender and you know the content
>>> is safe.
>>>
>>>
>>> On 8/6/2024 2:35 PM, CLEMENT MATHIEU--DRIF wrote:
>>>> On 05/08/2024 08:27, Zhenzhong Duan wrote:
>>>>> Caution: External email. Do not open attachments or click links,
>>>>> unless this email comes from a known sender and you know the content
>>>>> is safe.
>>>>>
>>>>>
>>>>> Per spec 6.5.2.4, PADID-selective PASID-based iotlb invalidation will
>>>>> flush stage-2 iotlb entries with matching domain id and pasid.
>>>>>
>>>>> With scalable modern mode introduced, guest could send PASID-
>> selective
>>>>> PASID-based iotlb invalidation to flush both stage-1 and stage-2
>>>>> entries.
>>>>>
>>>>> By this chance, remove old IOTLB related definition.
>>>>>
>>>>> Signed-off-by: Zhenzhong Duan 
>>>>> ---
>>>>> hw/i386/intel_iommu_internal.h | 14 +++---
>>>>> hw/i386/intel_iommu.c  | 81
>>>>> ++
>>>>> 2 files changed, 90 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/hw/i386/intel_iommu_internal.h
>>>>> b/hw/i386/intel_iommu_internal.h
>>>>> index 8fa27c7f3b..19e4ed52ca 100644
>>>>> --- a/hw/i386/intel_iommu_internal.h
>>>>> +++ b/hw/i386/intel_iommu_internal.h
>>>>> @@ -402,11 +402,6 @@ typedef union VTDInvDesc VTDInvDesc;
>>>>> #define VTD_INV_DESC_IOTLB_AM(val)  ((val) & 0x3fULL)
>>>>> #define VTD_INV_DESC_IOTLB_RSVD_LO 0xff00ULL
>>>>> #define VTD_INV_DESC_IOTLB_RSVD_HI  0xf80ULL
>>>>> -#define VTD_INV_DESC_IOTLB_PASID_PASID  (2ULL << 4)
>>>>> -#define VTD_INV_DESC_IOTLB_PASID_PAGE   (3ULL << 4)
>>>>> -#define VTD_INV_DESC_IOTLB_PASID(val)   (((val) >> 32) &
>>>>> VTD_PASID_ID_MASK)
>>>>> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_LO
>> 0xfff001c0ULL
>>>>> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI  0xf80ULL
>>>>>
>>>>> /* Mask for Device IOTLB Invalidate Descriptor */
>>>>> #define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) &
>>>>> 0xf000ULL)
>>>>> @@ -438,6 +433,15 @@ typedef union VTDInvDesc VTDInvDesc;
>>>>> (0x3800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM |
>>>>> VTD_SL_TM)) : \
>>>>> (0x3800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
>>>>>
>>>>> +/* Masks for PIOTLB Invalidate Descriptor */
>>>>> +#define VTD_INV_DESC_PIOTLB_G (3ULL << 4)
>>>>> +#define VTD_INV_DESC_PIOTLB_ALL_IN_PASID  (2ULL << 4)
>>>>> +#define VTD_INV_DESC_PIOTLB_PSI_IN_PASID  (3ULL << 4)
>>>>> +#define VTD_INV_DESC_PIOTLB_DID(val)  (((val) >> 16) &
>>>>> VTD_DOMAIN_ID_MASK)
>>>>> +#define VTD_INV_DESC_PIOTLB_PASID(val)(((val) >> 32) & 0xfULL)
>>>>> +#define VTD_INV_DESC_PIOTLB_RSVD_VAL0 0xfff0f1c0ULL
>>>> Why did this value change since last post? The 'type' field should
>>>> always be zero in this desc
>>> Yes, type[6:4] are all zero for all existing invalidation type. But they
>>> are not real reserved bits.
>>>
>>> So I removed them from VTD_INV_DESC_PIOTLB_RSVD_VAL0.
>> Other masks consider these zeroes as reserved.
>> I think we should do the same.
>> For instance, context cache invalidation is : #define
>> VTD_INV_DESC_CC_RSVD 0xfffcffc0ULL
> Yes, I'll make a separate patch to fix it.
Oops, I just saw your patch, sorry for the misunderstanding!!
I think we should continue treating these bits as reserved because the 
descriptor type detection code only checks the 4 LSB.
>
> Thanks
> Zhenzhong


Re: [PATCH v2 04/17] intel_iommu: Flush stage-2 cache in PASID-selective PASID-based iotlb invalidation

2024-08-13 Thread CLEMENT MATHIEU--DRIF


On 13/08/2024 09:13, CLEMENT MATHIEU--DRIF wrote:
>
> On 13/08/2024 04:12, Duan, Zhenzhong wrote:
>> Caution: External email. Do not open attachments or click links, unless this 
>> email comes from a known sender and you know the content is safe.
>>
>>
>>> -Original Message-
>>> From: CLEMENT MATHIEU--DRIF 
>>> Subject: Re: [PATCH v2 04/17] intel_iommu: Flush stage-2 cache in PASID-
>>> selective PASID-based iotlb invalidation
>>>
>>>
>>>
>>> On 08/08/2024 14:40, Duan, Zhenzhong wrote:
>>>> Caution: External email. Do not open attachments or click links,
>>>> unless this email comes from a known sender and you know the content
>>>> is safe.
>>>>
>>>>
>>>> On 8/6/2024 2:35 PM, CLEMENT MATHIEU--DRIF wrote:
>>>>> On 05/08/2024 08:27, Zhenzhong Duan wrote:
>>>>>> Caution: External email. Do not open attachments or click links,
>>>>>> unless this email comes from a known sender and you know the content
>>>>>> is safe.
>>>>>>
>>>>>>
>>>>>> Per spec 6.5.2.4, PADID-selective PASID-based iotlb invalidation will
>>>>>> flush stage-2 iotlb entries with matching domain id and pasid.
>>>>>>
>>>>>> With scalable modern mode introduced, guest could send PASID-
>>> selective
>>>>>> PASID-based iotlb invalidation to flush both stage-1 and stage-2
>>>>>> entries.
>>>>>>
>>>>>> By this chance, remove old IOTLB related definition.
>>>>>>
>>>>>> Signed-off-by: Zhenzhong Duan 
>>>>>> ---
>>>>>>  hw/i386/intel_iommu_internal.h | 14 +++---
>>>>>>  hw/i386/intel_iommu.c  | 81
>>>>>> ++
>>>>>>  2 files changed, 90 insertions(+), 5 deletions(-)
>>>>>>
>>>>>> diff --git a/hw/i386/intel_iommu_internal.h
>>>>>> b/hw/i386/intel_iommu_internal.h
>>>>>> index 8fa27c7f3b..19e4ed52ca 100644
>>>>>> --- a/hw/i386/intel_iommu_internal.h
>>>>>> +++ b/hw/i386/intel_iommu_internal.h
>>>>>> @@ -402,11 +402,6 @@ typedef union VTDInvDesc VTDInvDesc;
>>>>>>  #define VTD_INV_DESC_IOTLB_AM(val)  ((val) & 0x3fULL)
>>>>>>  #define VTD_INV_DESC_IOTLB_RSVD_LO 0xff00ULL
>>>>>>  #define VTD_INV_DESC_IOTLB_RSVD_HI  0xf80ULL
>>>>>> -#define VTD_INV_DESC_IOTLB_PASID_PASID  (2ULL << 4)
>>>>>> -#define VTD_INV_DESC_IOTLB_PASID_PAGE   (3ULL << 4)
>>>>>> -#define VTD_INV_DESC_IOTLB_PASID(val)   (((val) >> 32) &
>>>>>> VTD_PASID_ID_MASK)
>>>>>> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_LO
>>> 0xfff001c0ULL
>>>>>> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI  0xf80ULL
>>>>>>
>>>>>>  /* Mask for Device IOTLB Invalidate Descriptor */
>>>>>>  #define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) &
>>>>>> 0xf000ULL)
>>>>>> @@ -438,6 +433,15 @@ typedef union VTDInvDesc VTDInvDesc;
>>>>>>  (0x3800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM |
>>>>>> VTD_SL_TM)) : \
>>>>>>  (0x3800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
>>>>>>
>>>>>> +/* Masks for PIOTLB Invalidate Descriptor */
>>>>>> +#define VTD_INV_DESC_PIOTLB_G (3ULL << 4)
>>>>>> +#define VTD_INV_DESC_PIOTLB_ALL_IN_PASID  (2ULL << 4)
>>>>>> +#define VTD_INV_DESC_PIOTLB_PSI_IN_PASID  (3ULL << 4)
>>>>>> +#define VTD_INV_DESC_PIOTLB_DID(val)  (((val) >> 16) &
>>>>>> VTD_DOMAIN_ID_MASK)
>>>>>> +#define VTD_INV_DESC_PIOTLB_PASID(val)(((val) >> 32) & 0xfULL)
>>>>>> +#define VTD_INV_DESC_PIOTLB_RSVD_VAL0 0xfff0f1c0ULL
>>>>> Why did this value change since last post? The 'type' field should
>>>>> always be zero in this desc
>>>> Yes, type[6:4] are all zero for all existing invalidation type. But they
>>>> are not real reserved bits.
>>>>
>>>> So I removed them from VTD_INV_DESC_PIOTLB_RSVD_VAL0.
>>> Other masks consider these zeroes as reserved.
>>> I think we should do the same.
>>> For instance, context cache invalidation is : #define
>>> VTD_INV_DESC_CC_RSVD 0xfffcffc0ULL
>> Yes, I'll make a separate patch to fix it.
> Oops, I just saw your patch, sorry for the misunderstanding!!
> I think we should continue treating these bits as reserved because the
> descriptor type detection code only checks the 4 LSB.
Oh, you fixed it as well,
so, ok I agree with the changes.
Sorry for that
>> Thanks
>> Zhenzhong


Re: [PATCH] intel_iommu: Fix invalidation descriptor type field

2024-08-13 Thread CLEMENT MATHIEU--DRIF


On 13/08/2024 09:06, Yi Liu wrote:
> Caution: External email. Do not open attachments or click links, 
> unless this email comes from a known sender and you know the content 
> is safe.
>
>
> On 2024/8/13 13:53, Zhenzhong Duan wrote:
>> According to spec, invalidation descriptor type is 7bits which is
>> concatenation of bits[11:9] and bits[3:0] of invalidation descriptor.
>>
>> Currently we only pick bits[3:0] as the invalidation type and treat
>> bits[11:9] as reserved zero. This is not a problem for now as bits[11:9]
>> is zero for all current invalidation types. But it will break if newer
>> type occupies bits[11:9].
>>
>> Fix it by take bits[11:9] into type and make reserved bits check 
>> accurate.
>
> s/take/taking/
>
> Reviewed-by: Yi Liu 

Reviewed-by: Clément Mathieu--Drif

>
> There is another fix you may add. In vtd_process_inv_desc(), it should
> treat the type VTD_INV_DESC_PC and VTD_INV_DESC_PIOTLB as invalid type
> if vIOMMU is running in legacy mode.
PASID based device IOTLB invalidation as well
>
>> Suggested-by: Clément Mathieu--Drif
>> Signed-off-by: Zhenzhong Duan 
>> ---
>> Tested intel-iommu.flat in kvm-unit-test: PASS
>> Tested vfio device hotplug: PASS
>> ---
>>   hw/i386/intel_iommu_internal.h | 11 ++-
>>   hw/i386/intel_iommu.c  |  2 +-
>>   2 files changed, 7 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h 
>> b/hw/i386/intel_iommu_internal.h
>> index 5f32c36943..13d5d129ae 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -356,7 +356,8 @@ union VTDInvDesc {
>>   typedef union VTDInvDesc VTDInvDesc;
>>
>>   /* Masks for struct VTDInvDesc */
>> -#define VTD_INV_DESC_TYPE   0xf
>> +#define VTD_INV_DESC_TYPE(val)  val) >> 5) & 0x70ULL) | \
>> + ((val) & 0xfULL))
>>   #define VTD_INV_DESC_CC 0x1 /* Context-cache 
>> Invalidate Desc */
>>   #define VTD_INV_DESC_IOTLB  0x2
>>   #define VTD_INV_DESC_DEVICE 0x3
>> @@ -372,7 +373,7 @@ typedef union VTDInvDesc VTDInvDesc;
>>   #define VTD_INV_DESC_WAIT_IF    (1ULL << 4)
>>   #define VTD_INV_DESC_WAIT_FN    (1ULL << 6)
>>   #define VTD_INV_DESC_WAIT_DATA_SHIFT    32
>> -#define VTD_INV_DESC_WAIT_RSVD_LO   0Xff80ULL
>> +#define VTD_INV_DESC_WAIT_RSVD_LO   0Xf180ULL
>>   #define VTD_INV_DESC_WAIT_RSVD_HI   3ULL
>>
>>   /* Masks for Context-cache Invalidation Descriptor */
>> @@ -383,7 +384,7 @@ typedef union VTDInvDesc VTDInvDesc;
>>   #define VTD_INV_DESC_CC_DID(val)    (((val) >> 16) & 
>> VTD_DOMAIN_ID_MASK)
>>   #define VTD_INV_DESC_CC_SID(val)    (((val) >> 32) & 0xUL)
>>   #define VTD_INV_DESC_CC_FM(val) (((val) >> 48) & 3UL)
>> -#define VTD_INV_DESC_CC_RSVD    0xfffcffc0ULL
>> +#define VTD_INV_DESC_CC_RSVD    0xfffcf1c0ULL
>>
>>   /* Masks for IOTLB Invalidate Descriptor */
>>   #define VTD_INV_DESC_IOTLB_G    (3ULL << 4)
>> @@ -393,7 +394,7 @@ typedef union VTDInvDesc VTDInvDesc;
>>   #define VTD_INV_DESC_IOTLB_DID(val) (((val) >> 16) & 
>> VTD_DOMAIN_ID_MASK)
>>   #define VTD_INV_DESC_IOTLB_ADDR(val)    ((val) & ~0xfffULL)
>>   #define VTD_INV_DESC_IOTLB_AM(val)  ((val) & 0x3fULL)
>> -#define VTD_INV_DESC_IOTLB_RSVD_LO  0xff00ULL
>> +#define VTD_INV_DESC_IOTLB_RSVD_LO  0xf100ULL
>>   #define VTD_INV_DESC_IOTLB_RSVD_HI  0xf80ULL
>>   #define VTD_INV_DESC_IOTLB_PASID_PASID  (2ULL << 4)
>>   #define VTD_INV_DESC_IOTLB_PASID_PAGE   (3ULL << 4)
>> @@ -406,7 +407,7 @@ typedef union VTDInvDesc VTDInvDesc;
>>   #define VTD_INV_DESC_DEVICE_IOTLB_SIZE(val) ((val) & 0x1)
>>   #define VTD_INV_DESC_DEVICE_IOTLB_SID(val) (((val) >> 32) & 0xULL)
>>   #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI 0xffeULL
>> -#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffe0fff8
>> +#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffe0f1f0
>>
>>   /* Rsvd field masks for spte */
>>   #define VTD_SPTE_SNP 0x800ULL
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 16d2885fcc..68cb72a481 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -2744,7 +2744,7 @@ static bool 
>> vtd_process_inv_desc(IntelIOMMUState *s)
>>   return false;
>>   }
>>
>> -    desc_type = inv_desc.lo & VTD_INV_DESC_TYPE;
>> +    desc_type = VTD_INV_DESC_TYPE(inv_desc.lo);
>>   /* FIXME: should update at first or at last? */
>>   s->iq_last_desc_type = desc_type;
>>
>
> -- 
> Regards,
> Yi Liu


Re: [PATCH v2 2/2] intel_iommu: Make PASID-cache and PIOTLB type invalid in legacy mode

2024-08-13 Thread CLEMENT MATHIEU--DRIF
Reviewed-by: Clément Mathieu--Drif

Super reactive!

Maybe we can continue along this path after the handlers are implemented.
It would be great to make sure we don't process PASID related descriptors when 
not in scalable mode.
What are your thoughts?

Thanks
>cmd



On 13/08/2024 09:44, Zhenzhong Duan wrote:
> Caution: External email. Do not open attachments or click links, unless this 
> email comes from a known sender and you know the content is safe.
>
>
> In vtd_process_inv_desc(), VTD_INV_DESC_PC and VTD_INV_DESC_PIOTLB are
> bypassed without scalable mode check. These two types are not valid
> in legacy mode and we should report error.
>
> Suggested-by: Yi Liu 
> Signed-off-by: Zhenzhong Duan 
> ---
>   hw/i386/intel_iommu.c | 22 +++---
>   1 file changed, 11 insertions(+), 11 deletions(-)
>
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 68cb72a481..90cd4e5044 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -2763,17 +2763,6 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
>   }
>   break;
>
> -/*
> - * TODO: the entity of below two cases will be implemented in future 
> series.
> - * To make guest (which integrates scalable mode support patch set in
> - * iommu driver) work, just return true is enough so far.
> - */
> -case VTD_INV_DESC_PC:
> -break;
> -
> -case VTD_INV_DESC_PIOTLB:
> -break;
> -
>   case VTD_INV_DESC_WAIT:
>   trace_vtd_inv_desc("wait", inv_desc.hi, inv_desc.lo);
>   if (!vtd_process_wait_desc(s, &inv_desc)) {
> @@ -2795,6 +2784,17 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
>   }
>   break;
>
> +/*
> + * TODO: the entity of below two cases will be implemented in future 
> series.
> + * To make guest (which integrates scalable mode support patch set in
> + * iommu driver) work, just return true is enough so far.
> + */
> +case VTD_INV_DESC_PC:
> +case VTD_INV_DESC_PIOTLB:
> +if (s->scalable_mode) {
> +break;
> +}
> +/* fallthrough */
>   default:
>   error_report_once("%s: invalid inv desc: hi=%"PRIx64", lo=%"PRIx64
> " (unknown type)", __func__, inv_desc.hi,
> --
> 2.34.1
>
>


Re: [PATCH v2 08/17] intel_iommu: Set accessed and dirty bits during first stage translation

2024-08-15 Thread CLEMENT MATHIEU--DRIF


On 14/08/2024 13:45, Yi Liu wrote:
> Caution: External email. Do not open attachments or click links, 
> unless this email comes from a known sender and you know the content 
> is safe.
>
>
> On 2024/8/5 14:27, Zhenzhong Duan wrote:
>> From: Clément Mathieu--Drif 
>>
>> Signed-off-by: Clément Mathieu--Drif 
>> Signed-off-by: Zhenzhong Duan 
>> ---
>>   hw/i386/intel_iommu_internal.h |  3 +++
>>   hw/i386/intel_iommu.c  | 24 
>>   2 files changed, 27 insertions(+)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h 
>> b/hw/i386/intel_iommu_internal.h
>> index 668583aeca..7786ef7624 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -324,6 +324,7 @@ typedef enum VTDFaultReason {
>>
>>   /* Output address in the interrupt address range for scalable 
>> mode */
>>   VTD_FR_SM_INTERRUPT_ADDR = 0x87,
>> +    VTD_FR_FS_BIT_UPDATE_FAILED = 0x91, /* SFS.10 */
>>   VTD_FR_MAX, /* Guard */
>>   } VTDFaultReason;
>>
>> @@ -549,6 +550,8 @@ typedef struct VTDRootEntry VTDRootEntry;
>>   /* Masks for First Level Paging Entry */
>>   #define VTD_FL_P    1ULL
>>   #define VTD_FL_RW_MASK  (1ULL << 1)
>> +#define VTD_FL_A    0x20
>> +#define VTD_FL_D    0x40
>>
>>   /* Second Level Page Translation Pointer*/
>>   #define VTD_SM_PASID_ENTRY_SLPTPTR (~0xfffULL)
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 6121cca4cd..3c2ceed284 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -1822,6 +1822,7 @@ static const bool vtd_qualified_faults[] = {
>>   [VTD_FR_PASID_TABLE_ENTRY_INV] = true,
>>   [VTD_FR_SM_INTERRUPT_ADDR] = true,
>>   [VTD_FR_FS_NON_CANONICAL] = true,
>> +    [VTD_FR_FS_BIT_UPDATE_FAILED] = true,
>>   [VTD_FR_MAX] = false,
>>   };
>>
>> @@ -1939,6 +1940,20 @@ static bool 
>> vtd_iova_fl_check_canonical(IntelIOMMUState *s, uint64_t iova,
>>   );
>>   }
>>
>> +static MemTxResult vtd_set_flag_in_pte(dma_addr_t base_addr, 
>> uint32_t index,
>> +   uint64_t pte, uint64_t flag)
>> +{
>> +    if (pte & flag) {
>> +    return MEMTX_OK;
>> +    }
>> +    pte |= flag;
>> +    pte = cpu_to_le64(pte);
>> +    return dma_memory_write(&address_space_memory,
>> +    base_addr + index * sizeof(pte),
>> +    &pte, sizeof(pte),
>> +    MEMTXATTRS_UNSPECIFIED);
>
> Can we ensure this write is atomic? A/D bit setting should be atomic from
> guest p.o.v.
As we only set one bit at a time, I don't think we can face atomicity issues
>
>> +}
>> +
>>   /*
>>    * Given the @iova, get relevant @flptep. @flpte_level will be the 
>> last level
>>    * of the translation, can be used for deciding the size of large 
>> page.
>> @@ -1990,7 +2005,16 @@ static int vtd_iova_to_flpte(IntelIOMMUState 
>> *s, VTDContextEntry *ce,
>>   return -VTD_FR_PAGING_ENTRY_RSVD;
>>   }
>>
>> +    if (vtd_set_flag_in_pte(addr, offset, flpte, VTD_FL_A) != 
>> MEMTX_OK) {
>> +    return -VTD_FR_FS_BIT_UPDATE_FAILED;
>> +    }
>> +
>>   if (vtd_is_last_pte(flpte, level)) {
>> +    if (is_write &&
>> +    (vtd_set_flag_in_pte(addr, offset, flpte, VTD_FL_D) !=
>> + MEMTX_OK)) {
>> +    return -VTD_FR_FS_BIT_UPDATE_FAILED;
>> +    }
>>   *flptep = flpte;
>>   *flpte_level = level;
>>   return 0;
>
> -- 
> Regards,
> Yi Liu


Re: [PATCH v2 08/17] intel_iommu: Set accessed and dirty bits during first stage translation

2024-08-15 Thread CLEMENT MATHIEU--DRIF


On 16/08/2024 04:37, Duan, Zhenzhong wrote:
> Caution: External email. Do not open attachments or click links, unless this 
> email comes from a known sender and you know the content is safe.
>
>
>> -Original Message-
>> From: Liu, Yi L 
>> Subject: Re: [PATCH v2 08/17] intel_iommu: Set accessed and dirty bits
>> during first stage translation
>>
>> On 2024/8/5 14:27, Zhenzhong Duan wrote:
>>> From: Clément Mathieu--Drif 
>>>
>>> Signed-off-by: Clément Mathieu--Drif 
>>> Signed-off-by: Zhenzhong Duan 
>>> ---
>>>hw/i386/intel_iommu_internal.h |  3 +++
>>>hw/i386/intel_iommu.c  | 24 
>>>2 files changed, 27 insertions(+)
>>>
>>> diff --git a/hw/i386/intel_iommu_internal.h
>> b/hw/i386/intel_iommu_internal.h
>>> index 668583aeca..7786ef7624 100644
>>> --- a/hw/i386/intel_iommu_internal.h
>>> +++ b/hw/i386/intel_iommu_internal.h
>>> @@ -324,6 +324,7 @@ typedef enum VTDFaultReason {
>>>
>>>/* Output address in the interrupt address range for scalable mode */
>>>VTD_FR_SM_INTERRUPT_ADDR = 0x87,
>>> +VTD_FR_FS_BIT_UPDATE_FAILED = 0x91, /* SFS.10 */
>>>VTD_FR_MAX, /* Guard */
>>>} VTDFaultReason;
>>>
>>> @@ -549,6 +550,8 @@ typedef struct VTDRootEntry VTDRootEntry;
>>>/* Masks for First Level Paging Entry */
>>>#define VTD_FL_P1ULL
>>>#define VTD_FL_RW_MASK  (1ULL << 1)
>>> +#define VTD_FL_A0x20
>>> +#define VTD_FL_D0x40
>>>
>>>/* Second Level Page Translation Pointer*/
>>>#define VTD_SM_PASID_ENTRY_SLPTPTR (~0xfffULL)
>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>> index 6121cca4cd..3c2ceed284 100644
>>> --- a/hw/i386/intel_iommu.c
>>> +++ b/hw/i386/intel_iommu.c
>>> @@ -1822,6 +1822,7 @@ static const bool vtd_qualified_faults[] = {
>>>[VTD_FR_PASID_TABLE_ENTRY_INV] = true,
>>>[VTD_FR_SM_INTERRUPT_ADDR] = true,
>>>[VTD_FR_FS_NON_CANONICAL] = true,
>>> +[VTD_FR_FS_BIT_UPDATE_FAILED] = true,
>>>[VTD_FR_MAX] = false,
>>>};
>>>
>>> @@ -1939,6 +1940,20 @@ static bool
>> vtd_iova_fl_check_canonical(IntelIOMMUState *s, uint64_t iova,
>>>);
>>>}
>>>
>>> +static MemTxResult vtd_set_flag_in_pte(dma_addr_t base_addr,
>> uint32_t index,
>>> +   uint64_t pte, uint64_t flag)
>>> +{
>>> +if (pte & flag) {
>>> +return MEMTX_OK;
>>> +}
>>> +pte |= flag;
>>> +pte = cpu_to_le64(pte);
>>> +return dma_memory_write(&address_space_memory,
>>> +base_addr + index * sizeof(pte),
>>> +&pte, sizeof(pte),
>>> +MEMTXATTRS_UNSPECIFIED);
>> Can we ensure this write is atomic? A/D bit setting should be atomic from
>> guest p.o.v.
> Yes, what about below:
>
> @@ -2096,7 +2096,7 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, 
> VTDContextEntry *ce,
>   dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid);
>   uint32_t level = vtd_get_iova_level(s, ce, pasid);
>   uint32_t offset;
> -uint64_t flpte;
> +uint64_t flpte, flag_ad = VTD_FL_A;
>
>   if (!vtd_iova_fl_check_canonical(s, iova, ce, pasid)) {
>   error_report_once("%s: detected non canonical IOVA (iova=0x%" 
> PRIx64 ","
> @@ -2134,16 +2134,15 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, 
> VTDContextEntry *ce,
>   return -VTD_FR_PAGING_ENTRY_RSVD;
>   }
>
> -if (vtd_set_flag_in_pte(addr, offset, flpte, VTD_FL_A) != MEMTX_OK) {
> +if (vtd_is_last_pte(flpte, level) && is_write) {
> +flag_ad |= VTD_FL_D;
> +}
> +
> +if (vtd_set_flag_in_pte(addr, offset, flpte, flag_ad) != MEMTX_OK) {
>   return -VTD_FR_FS_BIT_UPDATE_FAILED;
>   }
>
>   if (vtd_is_last_pte(flpte, level)) {
> -if (is_write &&
> -(vtd_set_flag_in_pte(addr, offset, flpte, VTD_FL_D) !=
> -
> MEMTX_OK)) {
> -return -VTD_FR_FS_BIT_UPDATE_FAILED;
> -}
>   *flptep = flpte;
>   *flpte_level = level;
>   return 0;
lgtm

Thanks
 >cmd
>
> Thanks
> Zhenzhong
>


[PATCH] MAINTAINERS: Add myself as a reviewer of VT-d

2024-08-20 Thread CLEMENT MATHIEU--DRIF
Signed-off-by: Clément Mathieu--Drif 
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 3584d6a6c6..b12973f595 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3672,6 +3672,7 @@ VT-d Emulation
 M: Michael S. Tsirkin 
 R: Jason Wang 
 R: Yi Liu 
+R: Clément Mathieu--Drif 
 S: Supported
 F: hw/i386/intel_iommu.c
 F: hw/i386/intel_iommu_internal.h
-- 
2.45.2


[PATCH v1 3/8] pcie: add a way to get the outstanding page request allocation (pri) from the config space.

2024-05-30 Thread CLEMENT MATHIEU--DRIF
Signed-off-by: Clément Mathieu--Drif 
---
 hw/pci/pcie.c | 8 
 include/hw/pci/pcie.h | 1 +
 2 files changed, 9 insertions(+)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 3fb6588c31..d11b11fc34 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -1227,6 +1227,14 @@ void pcie_pri_init(PCIDevice *dev, uint16_t offset, 
uint32_t outstanding_pr_cap,
 dev->exp.pri_cap = offset;
 }
 
+uint32_t pcie_pri_get_req_alloc(const PCIDevice *dev)
+{
+if (!pcie_pri_enabled(dev)) {
+return 0;
+}
+return pci_get_long(dev->config + dev->exp.pri_cap + PCI_PRI_ALLOC_REQ);
+}
+
 bool pcie_pri_enabled(const PCIDevice *dev)
 {
 if (!pci_is_express(dev) || !dev->exp.pri_cap) {
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index b976fd739a..7eb448148b 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -158,6 +158,7 @@ void pcie_pasid_init(PCIDevice *dev, uint16_t offset, 
uint8_t pasid_width,
 void pcie_pri_init(PCIDevice *dev, uint16_t offset, uint32_t 
outstanding_pr_cap,
bool prg_response_pasid_req);
 
+uint32_t pcie_pri_get_req_alloc(const PCIDevice *dev);
 bool pcie_pri_enabled(const PCIDevice *dev);
 bool pcie_pasid_enabled(const PCIDevice *dev);
 bool pcie_ats_enabled(const PCIDevice *dev);
-- 
2.45.1


[PATCH v1 2/8] pcie: helper functions to check to check if PRI is enabled

2024-05-30 Thread CLEMENT MATHIEU--DRIF
pri_enabled can be used to check whether the capability is present and
enabled on a PCIe device

Signed-off-by: Clément Mathieu--Drif 
---
 hw/pci/pcie.c | 9 +
 include/hw/pci/pcie.h | 1 +
 2 files changed, 10 insertions(+)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 053bca6949..3fb6588c31 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -1227,6 +1227,15 @@ void pcie_pri_init(PCIDevice *dev, uint16_t offset, 
uint32_t outstanding_pr_cap,
 dev->exp.pri_cap = offset;
 }
 
+bool pcie_pri_enabled(const PCIDevice *dev)
+{
+if (!pci_is_express(dev) || !dev->exp.pri_cap) {
+return false;
+}
+return (pci_get_word(dev->config + dev->exp.pri_cap + PCI_PRI_CTRL) &
+PCI_PRI_CTRL_ENABLE) != 0;
+}
+
 bool pcie_pasid_enabled(const PCIDevice *dev)
 {
 if (!pci_is_express(dev) || !dev->exp.pasid_cap) {
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index 198d6da817..b976fd739a 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -158,6 +158,7 @@ void pcie_pasid_init(PCIDevice *dev, uint16_t offset, 
uint8_t pasid_width,
 void pcie_pri_init(PCIDevice *dev, uint16_t offset, uint32_t 
outstanding_pr_cap,
bool prg_response_pasid_req);
 
+bool pcie_pri_enabled(const PCIDevice *dev);
 bool pcie_pasid_enabled(const PCIDevice *dev);
 bool pcie_ats_enabled(const PCIDevice *dev);
 #endif /* QEMU_PCIE_H */
-- 
2.45.1


[PATCH v1 1/8] pcie: add a helper to declare the PRI capability for a pcie device

2024-05-30 Thread CLEMENT MATHIEU--DRIF
the pri configuration offset is also stored into the PCIExpressDevice
to make it easier to get the PRI status register

Signed-off-by: Clément Mathieu--Drif 
---
 hw/pci/pcie.c  | 25 +
 include/hw/pci/pcie.h  |  5 -
 include/hw/pci/pcie_regs.h |  4 
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 4efd84fed5..053bca6949 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -1202,6 +1202,31 @@ void pcie_pasid_init(PCIDevice *dev, uint16_t offset, 
uint8_t pasid_width,
 dev->exp.pasid_cap = offset;
 }
 
+/* PRI */
+void pcie_pri_init(PCIDevice *dev, uint16_t offset, uint32_t 
outstanding_pr_cap,
+   bool prg_response_pasid_req)
+{
+static const uint16_t control_reg_rw_mask = 0x3;
+static const uint16_t status_reg_rw1_mask = 0x3;
+static const uint32_t pr_alloc_reg_rw_mask = 0x;
+
+uint16_t status_reg = prg_response_pasid_req ? PCI_PRI_STATUS_PASID : 0;
+status_reg |= PCI_PRI_STATUS_STOPPED; /* Stopped by default */
+
+pcie_add_capability(dev, PCI_EXT_CAP_ID_PRI, PCI_PRI_VER, offset,
+PCI_EXT_CAP_PRI_SIZEOF);
+/* Disabled by default */
+
+pci_set_word(dev->config + offset + PCI_PRI_STATUS, status_reg);
+pci_set_long(dev->config + offset + PCI_PRI_MAX_REQ, outstanding_pr_cap);
+
+pci_set_word(dev->wmask + offset + PCI_PRI_CTRL, control_reg_rw_mask);
+pci_set_word(dev->w1cmask + offset + PCI_PRI_STATUS, status_reg_rw1_mask);
+pci_set_long(dev->wmask + offset + PCI_PRI_ALLOC_REQ, 
pr_alloc_reg_rw_mask);
+
+dev->exp.pri_cap = offset;
+}
+
 bool pcie_pasid_enabled(const PCIDevice *dev)
 {
 if (!pci_is_express(dev) || !dev->exp.pasid_cap) {
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index 0c127b29dc..198d6da817 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -72,9 +72,10 @@ struct PCIExpressDevice {
 uint16_t aer_cap;
 PCIEAERLog aer_log;
 
-/* Offset of ATS and PASID capabilities in config space */
+/* Offset of ATS, PRI and PASID capabilities in config space */
 uint16_t ats_cap;
 uint16_t pasid_cap;
+uint16_t pri_cap;
 
 /* ACS */
 uint16_t acs_cap;
@@ -154,6 +155,8 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler 
*hotplug_dev,
 
 void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width,
  bool exec_perm, bool priv_mod);
+void pcie_pri_init(PCIDevice *dev, uint16_t offset, uint32_t 
outstanding_pr_cap,
+   bool prg_response_pasid_req);
 
 bool pcie_pasid_enabled(const PCIDevice *dev);
 bool pcie_ats_enabled(const PCIDevice *dev);
diff --git a/include/hw/pci/pcie_regs.h b/include/hw/pci/pcie_regs.h
index 0a86598f80..bb8791d1b3 100644
--- a/include/hw/pci/pcie_regs.h
+++ b/include/hw/pci/pcie_regs.h
@@ -89,6 +89,10 @@ typedef enum PCIExpLinkWidth {
 /* PASID */
 #define PCI_PASID_VER   1
 #define PCI_EXT_CAP_PASID_MAX_WIDTH 20
+
+/* PRI */
+#define PCI_PRI_VER 1
+
 /* AER */
 #define PCI_ERR_VER 2
 #define PCI_ERR_SIZEOF  0x48
-- 
2.45.1


[PATCH v1 5/8] pci: add a PCI-level API for PRI

2024-05-30 Thread CLEMENT MATHIEU--DRIF
A device can send a PRI request to the IOMMU using pci_pri_request_page_pasid.
The PRI response is sent back using the notifier managed with
pci_pri_register_notifier and pci_pri_unregister_notifier.

Signed-off-by: Clément Mathieu--Drif 
---
 hw/pci/pci.c  | 37 
 include/exec/memory.h | 35 +++
 include/hw/pci/pci.h  | 45 +++
 system/memory.c   | 49 +++
 4 files changed, 166 insertions(+)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 10b0708130..dd854fc18f 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2833,6 +2833,43 @@ void pci_device_unset_iommu_device(PCIDevice *dev)
 }
 }
 
+int pci_pri_request_page_pasid(PCIDevice *dev, uint32_t pasid, bool priv_req,
+   bool exec_req, hwaddr addr, bool lpig,
+   uint16_t prgi, bool is_read, bool is_write)
+{
+IOMMUMemoryRegion *iommu_mr = pci_device_iommu_memory_region_pasid(dev,
+pasid);
+if (!iommu_mr || !pcie_pri_enabled(dev)) {
+return -EPERM;
+}
+return memory_region_iommu_pri_request_page(iommu_mr, priv_req, exec_req,
+addr, lpig, prgi, is_read,
+is_write);
+}
+
+int pci_pri_register_notifier(PCIDevice *dev, uint32_t pasid,
+  IOMMUPRINotifier *notifier)
+{
+IOMMUMemoryRegion *iommu_mr = pci_device_iommu_memory_region_pasid(dev,
+pasid);
+if (!iommu_mr || !pcie_pri_enabled(dev)) {
+return -EPERM;
+}
+return memory_region_register_iommu_pri_notifier(MEMORY_REGION(iommu_mr),
+ notifier);
+}
+
+int pci_pri_unregister_notifier(PCIDevice *dev, uint32_t pasid)
+{
+IOMMUMemoryRegion *iommu_mr = pci_device_iommu_memory_region_pasid(dev,
+pasid);
+if (!iommu_mr || !pcie_pri_enabled(dev)) {
+return -EPERM;
+}
+memory_region_unregister_iommu_pri_notifier(MEMORY_REGION(iommu_mr));
+return 0;
+}
+
 ssize_t pci_ats_request_translation_pasid(PCIDevice *dev, uint32_t pasid,
   bool priv_req, bool exec_req, hwaddr 
addr,
   size_t length, bool no_write,
diff --git a/include/exec/memory.h b/include/exec/memory.h
index f4780d3920..71bdd7e64d 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -1870,6 +1870,16 @@ void memory_region_notify_iommu(IOMMUMemoryRegion 
*iommu_mr,
 int iommu_idx,
 IOMMUTLBEvent event);
 
+/**
+ * Notify the device attached to a memory region by calling the PRI
+ * callback (if exists)
+ *
+ * @iommu_mr: the region in which the PRI request has been performed
+ * @response: the response to be forwarded to the device
+ */
+void memory_region_notify_pri_iommu(IOMMUMemoryRegion *iommu_mr,
+IOMMUPRIResponse *response);
+
 /**
  * memory_region_notify_iommu_one: notify a change in an IOMMU translation
  *   entry to a single notifier
@@ -1944,6 +1954,31 @@ ssize_t 
memory_region_iommu_ats_request_translation(IOMMUMemoryRegion *iommu_mr,
 size_t result_length,
 uint32_t *err_count);
 
+/**
+ * Register a PRI callback in an IOMMU memory region
+ *
+ * Return 0 if the notifier has been installed,
+ * error code otherwise.
+ * An error occurs when the region already has a
+ * callback configured.
+ *
+ * @mr: the target iommu memory region
+ * @n: the notifier to be installed
+ */
+int memory_region_register_iommu_pri_notifier(MemoryRegion *mr,
+  IOMMUPRINotifier *n);
+
+/**
+ * Unregister a PRI callback from an IOMMU memory region
+ *
+ * @mr: the target iommu memory region
+ */
+void memory_region_unregister_iommu_pri_notifier(MemoryRegion *mr);
+
+int memory_region_iommu_pri_request_page(IOMMUMemoryRegion *iommu_mr,
+ bool priv_req, bool exec_req,
+ hwaddr addr, bool lpig, uint16_t prgi,
+ bool is_read, bool is_write);
 /**
  * memory_region_iommu_get_attr: return an IOMMU attr if get_attr() is
  * defined on the IOMMU.
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 8adba6af97..76a6031d8d 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -473,6 +473,51 @@ bool pci_iommu_init_iotlb_notifier(PCIDevice *dev, 
uint32_t pasid,
IOMMUNotifier *n, IOMMUNotify fn,

[PATCH v1 8/8] intel_iommu: add PRI operations support

2024-05-30 Thread CLEMENT MATHIEU--DRIF
Implement the iommu_pri_request_page IOMMU operation
and the behavior when receiving a page group response descriptor

Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c  | 235 +
 hw/i386/intel_iommu_internal.h |   2 +
 2 files changed, 237 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 899655928d..dcc92aae58 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -51,6 +51,8 @@
 ((ce)->val[1] & VTD_SM_CONTEXT_ENTRY_RID2PASID_MASK)
 #define VTD_CE_GET_PASID_DIR_TABLE(ce) \
 ((ce)->val[0] & VTD_PASID_DIR_BASE_ADDR_MASK)
+#define VTD_CE_GET_PRE(ce) \
+((ce)->val[0] & VTD_SM_CONTEXT_ENTRY_PRE)
 
 /* pe operations */
 #define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT)
@@ -1922,6 +1924,7 @@ static const bool vtd_qualified_faults[] = {
 [VTD_FR_PASID_ENTRY_P] = true,
 [VTD_FR_PASID_TABLE_ENTRY_INV] = true,
 [VTD_FR_SM_INTERRUPT_ADDR] = true,
+[VTD_FR_SM_PRE_ABS] = true,
 [VTD_FR_FS_NON_CANONICAL] = true,
 [VTD_FR_FS_BIT_UPDATE_FAILED] = true,
 [VTD_FR_MAX] = false,
@@ -4379,6 +4382,45 @@ static bool 
vtd_process_device_piotlb_desc(IntelIOMMUState *s,
 return true;
 }
 
+static bool vtd_process_page_group_response_desc(IntelIOMMUState *s,
+ VTDInvDesc *inv_desc)
+{
+VTDAddressSpace *vtd_dev_as;
+
+if ((inv_desc->lo & VTD_INV_DESC_PGRESP_RSVD_LO) ||
+(inv_desc->hi & VTD_INV_DESC_PGRESP_RSVD_HI)) {
+error_report_once("%s: invalid page group response desc: hi=%"PRIx64
+", lo=%"PRIx64" (reserved nonzero)", __func__,
+inv_desc->hi, inv_desc->lo);
+return false;
+}
+
+bool pasid_present = VTD_INV_DESC_PGRESP_PP(inv_desc->lo);
+uint8_t response_code = VTD_INV_DESC_PGRESP_RC(inv_desc->lo);
+uint16_t rid = VTD_INV_DESC_PGRESP_RID(inv_desc->lo);
+uint32_t pasid = VTD_INV_DESC_PGRESP_PASID(inv_desc->lo);
+uint16_t prgi = VTD_INV_DESC_PGRESP_PRGI(inv_desc->hi);
+
+if (!pasid_present) {
+error_report_once("Page group response without PASID is"
+  "not supported yet");
+return false;
+}
+
+vtd_dev_as = vtd_get_as_by_sid_and_pasid(s, rid, pasid);
+if (!vtd_dev_as) {
+return true;
+}
+
+IOMMUPRIResponse response = {
+.prgi = prgi,
+.response_code = response_code
+};
+memory_region_notify_pri_iommu(&vtd_dev_as->iommu, &response);
+
+return true;
+}
+
 static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
   VTDInvDesc *inv_desc)
 {
@@ -4486,6 +4528,13 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
 }
 break;
 
+case VTD_INV_DESC_PGRESP:
+trace_vtd_inv_desc("page group response", inv_desc.hi, inv_desc.lo);
+if (!vtd_process_page_group_response_desc(s, &inv_desc)) {
+return false;
+}
+break;
+
 default:
 error_report_once("%s: invalid inv desc: hi=%"PRIx64", lo=%"PRIx64
   " (unknown type)", __func__, inv_desc.hi,
@@ -6138,6 +6187,191 @@ static uint64_t vtd_get_min_page_size(IOMMUMemoryRegion 
*iommu)
 return VTD_PAGE_SIZE;
 }
 
+/* 11.4.11.3 : The number of entries in the page request queue is 2^(PQS + 7) 
*/
+static inline uint64_t vtd_prq_size(IntelIOMMUState *s)
+{
+return 1ULL << ((vtd_get_quad(s, DMAR_PQA_REG) & VTD_PQA_SIZE) + 7);
+}
+
+/**
+ * Return true if the bit is accessible and correctly set, false otherwise
+ */
+static bool vtd_check_pre_bit(VTDAddressSpace *vtd_as, hwaddr addr,
+  uint16_t sid, bool is_write)
+{
+int ret;
+IntelIOMMUState *s = vtd_as->iommu_state;
+uint8_t bus_n = pci_bus_num(vtd_as->bus);
+VTDContextEntry ce;
+bool is_fpd_set = false;
+
+ret = vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce);
+
+if (ret) {
+goto error_report;
+}
+
+if (!VTD_CE_GET_PRE(&ce)) {
+ret = -VTD_FR_SM_PRE_ABS;
+goto error_get_fpd_and_report;
+}
+
+return true;
+
+error_get_fpd_and_report:
+/* Try to get fpd (may not work but we are already on an error path) */
+is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
+vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, vtd_as->pasid);
+error_report:
+vtd_report_fault(s, -ret, is_fpd_set, sid, addr, is_write,
+ vtd_as->pasid != PCI_NO_PASID, vtd_as->pasid);
+return false;
+}
+
+/* Logic described in section 7.5 */
+static void vtd_generate_page_request_event(IntelIOMMUState *s,
+uint32_t old_pr_status)
+{
+uint32_t current_pectl = vtd_get_long(s, DMAR_PECTL_REG);
+/*
+ * Hardware evaluates PPR and PRO fields in the Page Request Status 
Register
+ * and if any of them is set, Page Request Event is not generated
+ */
+

[PATCH v1 7/8] intel_iommu: declare registers for PRI

2024-05-30 Thread CLEMENT MATHIEU--DRIF
Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu.c | 67 +++
 1 file changed, 67 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 2e78ebe6d2..899655928d 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4609,6 +4609,27 @@ static void vtd_handle_iectl_write(IntelIOMMUState *s)
 }
 }
 
+static void vtd_handle_prs_write(IntelIOMMUState *s)
+{
+uint32_t prs = vtd_get_long_raw(s, DMAR_PRS_REG);
+if (!(prs & VTD_PR_STATUS_PPR) && !(prs & VTD_PR_STATUS_PRO)) {
+vtd_set_clear_mask_long(s, DMAR_PECTL_REG, VTD_PR_PECTL_IP, 0);
+}
+}
+
+static void vtd_handle_pectl_write(IntelIOMMUState *s)
+{
+uint32_t pectl = vtd_get_long_raw(s, DMAR_PECTL_REG);
+if ((pectl & VTD_PR_PECTL_IP) && !(pectl & VTD_PR_PECTL_IM)) {
+/*
+ * If IP field was 1 when software clears the IM field,
+ * the  interrupt is generated along with clearing the IP field.
+ */
+vtd_set_clear_mask_long(s, DMAR_PECTL_REG, VTD_PR_PECTL_IP, 0);
+vtd_generate_interrupt(s, DMAR_PEADDR_REG, DMAR_PEDATA_REG);
+}
+}
+
 static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size)
 {
 IntelIOMMUState *s = opaque;
@@ -4649,6 +4670,17 @@ static uint64_t vtd_mem_read(void *opaque, hwaddr addr, 
unsigned size)
 val = s->iq >> 32;
 break;
 
+/* Page Request Event Address Register */
+case DMAR_PEADDR_REG:
+assert(size == 4);
+val = vtd_get_long_raw(s, DMAR_PEADDR_REG);
+break;
+
+case DMAR_PEUADDR_REG:
+assert(size == 4);
+val = vtd_get_long_raw(s, DMAR_PEUADDR_REG);
+break;
+
 default:
 if (size == 4) {
 val = vtd_get_long(s, addr);
@@ -4712,6 +4744,17 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
 vtd_handle_iotlb_write(s);
 break;
 
+/* Page Request Event Address Register */
+case DMAR_PEADDR_REG:
+assert(size == 4);
+vtd_set_long(s, addr, val);
+break;
+
+case DMAR_PEUADDR_REG:
+assert(size == 4);
+vtd_set_long(s, addr, val);
+break;
+
 /* Invalidate Address Register, 64-bit */
 case DMAR_IVA_REG:
 if (size == 4) {
@@ -4892,6 +4935,18 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
 vtd_set_long(s, addr, val);
 break;
 
+case DMAR_PRS_REG:
+assert(size == 4);
+vtd_set_long(s, addr, val);
+vtd_handle_prs_write(s);
+break;
+
+case DMAR_PECTL_REG:
+assert(size == 4);
+vtd_set_long(s, addr, val);
+vtd_handle_pectl_write(s);
+break;
+
 default:
 if (size == 4) {
 vtd_set_long(s, addr, val);
@@ -5957,6 +6012,18 @@ static void vtd_init(IntelIOMMUState *s)
  * Interrupt remapping registers.
  */
 vtd_define_quad(s, DMAR_IRTA_REG, 0, 0xf80fULL, 0);
+
+/* Page request registers */
+if (s->ecap & VTD_ECAP_PRS) {
+vtd_define_quad(s, DMAR_PQH_REG, 0, 0x7ffe0ULL, 0);
+vtd_define_quad(s, DMAR_PQT_REG, 0, 0x7ffe0ULL, 0);
+vtd_define_quad(s, DMAR_PQA_REG, 0, 0xf007ULL, 0);
+vtd_define_long(s, DMAR_PRS_REG, 0, 0, 0x3UL);
+vtd_define_long(s, DMAR_PECTL_REG, 0, 0x8000UL, 0);
+vtd_define_long(s, DMAR_PEDATA_REG, 0, 0xUL, 0);
+vtd_define_long(s, DMAR_PEADDR_REG, 0, 0xfffcUL, 0);
+vtd_define_long(s, DMAR_PEUADDR_REG, 0, 0xUL, 0);
+}
 }
 
 /* Should not reset address_spaces when reset because devices will still use
-- 
2.45.1


[PATCH v1 6/8] intel_iommu: declare PRI constants and structures

2024-05-30 Thread CLEMENT MATHIEU--DRIF
Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu_internal.h | 52 +-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 461158f588..9e01251335 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -191,6 +191,7 @@
 #define VTD_ECAP_EIM(1ULL << 4)
 #define VTD_ECAP_PT (1ULL << 6)
 #define VTD_ECAP_SC (1ULL << 7)
+#define VTD_ECAP_PRS(1ULL << 29)
 #define VTD_ECAP_MHMV   (15ULL << 20)
 #define VTD_ECAP_NEST   (1ULL << 26)
 #define VTD_ECAP_SRS(1ULL << 31)
@@ -373,6 +374,18 @@ union VTDInvDesc {
 };
 typedef union VTDInvDesc VTDInvDesc;
 
+/* Page Request Descriptor */
+union VTDPRDesc {
+struct {
+uint64_t lo;
+uint64_t hi;
+};
+struct {
+uint64_t val[4];
+};
+};
+typedef union VTDPRDesc VTDPRDesc;
+
 /* Masks for struct VTDInvDesc */
 #define VTD_INV_DESC_TYPE   0xf
 #define VTD_INV_DESC_CC 0x1 /* Context-cache Invalidate Desc */
@@ -384,6 +397,7 @@ typedef union VTDInvDesc VTDInvDesc;
 #define VTD_INV_DESC_PIOTLB 0x6 /* PASID-IOTLB Invalidate Desc */
 #define VTD_INV_DESC_PC 0x7 /* PASID-cache Invalidate Desc */
 #define VTD_INV_DESC_DEV_PIOTLB 0x8 /* PASID-based-DIOTLB inv_desc*/
+#define VTD_INV_DESC_PGRESP 0x9 /* Page Group Response Desc */
 #define VTD_INV_DESC_NONE   0   /* Not an Invalidate Descriptor */
 
 /* Masks for Invalidation Wait Descriptor*/
@@ -425,7 +439,16 @@ typedef union VTDInvDesc VTDInvDesc;
 #define VTD_INV_DESC_DEVICE_IOTLB_SIZE(val) ((val) & 0x1)
 #define VTD_INV_DESC_DEVICE_IOTLB_SID(val) (((val) >> 32) & 0xULL)
 #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI 0xffeULL
-#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffe0fff8
+#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffe0fff8ULL
+
+/* Mask for Page Group Response Descriptor */
+#define VTD_INV_DESC_PGRESP_RSVD_HI 0xf003ULL
+#define VTD_INV_DESC_PGRESP_RSVD_LO 0xfff00fe0ULL
+#define VTD_INV_DESC_PGRESP_PP(val) ((val >> 4) & 0x1ULL)
+#define VTD_INV_DESC_PGRESP_RC(val) ((val >> 12) & 0xfULL)
+#define VTD_INV_DESC_PGRESP_RID(val)((val >> 16) & 0xULL)
+#define VTD_INV_DESC_PGRESP_PASID(val)  ((val >> 32) & 0xfULL)
+#define VTD_INV_DESC_PGRESP_PRGI(val)   ((val >> 3) & 0x1ffULL)
 
 /* Mask for PASID Device IOTLB Invalidate Descriptor */
 #define VTD_INV_DESC_PASID_DEVICE_IOTLB_ADDR(val) ((val) & \
@@ -545,6 +568,7 @@ typedef struct VTDRootEntry VTDRootEntry;
 #define VTD_SM_CONTEXT_ENTRY_RID2PASID_MASK 0xf
 #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL | ~VTD_HAW_MASK(aw))
 #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1  0xffe0ULL
+#define VTD_SM_CONTEXT_ENTRY_PRE0x10ULL
 
 typedef struct VTDPASIDCacheEntry {
 struct VTDPASIDEntry pasid_entry;
@@ -700,4 +724,30 @@ typedef struct VTDHostIOMMUDevice {
 uint32_t errata;
 QLIST_ENTRY(VTDHostIOMMUDevice) next;
 } VTDHostIOMMUDevice;
+
+/* Page Request Descriptor */
+/* For the low 64-bit of 128-bit */
+#define VTD_PRD_TYPE(1ULL)
+#define VTD_PRD_PP(val) ((val & 1ULL) << 8)
+#define VTD_PRD_RID(val)((val & 0xULL) << 16)
+#define VTD_PRD_PASID(val)  ((val & 0xfULL) << 32)
+#define VTD_PRD_EXR(val)((val & 1ULL) << 52)
+#define VTD_PRD_PMR(val)((val & 1ULL) << 53)
+/* For the high 64-bit of 128-bit */
+#define VTD_PRD_RDR(val)(val & 1ULL)
+#define VTD_PRD_WRR(val)((val & 1ULL) << 1)
+#define VTD_PRD_LPIG(val)   ((val & 1ULL) << 2)
+#define VTD_PRD_PRGI(val)   ((val & 0x1ffULL) << 3)
+#define VTD_PRD_ADDR(val)   (val & 0xf000ULL)
+
+/* Page Request Queue constants */
+#define VTD_PQA_ENTRY_SIZE  32 /* Size of an entry in bytes */
+/* Page Request Queue masks */
+#define VTD_PQA_ADDR0xf000ULL /* PR queue address */
+#define VTD_PQA_SIZE0x7ULL /* PR queue size */
+#define VTD_PR_STATUS_PPR   1UL /* Pending page request */
+#define VTD_PR_STATUS_PRO   2UL /* Page request overflow */
+#define VTD_PR_PECTL_IP 0X4000UL /* PR control interrup pending */
+#define VTD_PR_PECTL_IM 0X8000UL /* PR control interrup mask */
+
 #endif
-- 
2.45.1


[PATCH v1 0/8] PRI support for VT-d

2024-05-30 Thread CLEMENT MATHIEU--DRIF
This series belongs to a list of series that add SVM support for VT-d.

Here we focus on the implementation of PRI support in the IOMMU and on a 
PCI-level
API for PRI to be used by virtual devices.

This work is based on the VT-d specification version 4.1 (March 2023).
Here is a link to a GitHub repository where you can find the following elements 
:
- Qemu with all the patches for SVM
- ATS
- PRI
- Device IOTLB invalidations
- Requests with already translated addresses
- A demo device
- A simple driver for the demo device
- A userspace program (for testing and demonstration purposes)

https://github.com/BullSequana/Qemu-in-guest-SVM-demo

Clément Mathieu--Drif (8):
  pcie: add a helper to declare the PRI capability for a pcie device
  pcie: helper functions to check to check if PRI is enabled
  pcie: add a way to get the outstanding page request allocation (pri)
from the config space.
  pci: declare structures and IOMMU operation for PRI
  pci: add a PCI-level API for PRI
  intel_iommu: declare PRI constants and structures
  intel_iommu: declare registers for PRI
  intel_iommu: add PRI operations support

 hw/i386/intel_iommu.c  | 302 +
 hw/i386/intel_iommu_internal.h |  54 +-
 hw/pci/pci.c   |  37 
 hw/pci/pcie.c  |  42 +
 include/exec/memory.h  |  65 +++
 include/hw/pci/pci.h   |  45 +
 include/hw/pci/pci_bus.h   |   1 +
 include/hw/pci/pcie.h  |   7 +-
 include/hw/pci/pcie_regs.h |   4 +
 system/memory.c|  49 ++
 10 files changed, 604 insertions(+), 2 deletions(-)

-- 
2.45.1


[PATCH v1 4/8] pci: declare structures and IOMMU operation for PRI

2024-05-30 Thread CLEMENT MATHIEU--DRIF
The API deliberately designed to be similar to the ATS one.
We define a struct that stores a function pointer to the device's callback.
Registering and unregistering a notifier is done using a pair of functions
that will be added in a future commit of this series.

An IOMMU can support PRI by implementing the iommu_pri_request_page
operation declared in IOMMUMemoryRegionClass.

Signed-off-by: Clément Mathieu--Drif 
---
 include/exec/memory.h| 30 ++
 include/hw/pci/pci_bus.h |  1 +
 2 files changed, 31 insertions(+)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 0ced7c33b1..f4780d3920 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -218,6 +218,25 @@ typedef struct IOMMUTLBEvent {
 IOMMUTLBEntry entry;
 } IOMMUTLBEvent;
 
+/* Page Request Interface */
+#define IOMMU_PRI_RESP_CODE_SUCCESS(val)(!(val))
+#define IOMMU_PRI_RESP_CODE_INVALID_REQUEST(val)((val) == 0x1u)
+#define IOMMU_PRI_RESP_CODE_FAILURE(val)((val) & 0xeu)
+
+typedef struct IOMMUPRIResponse {
+uint8_t response_code;
+uint16_t prgi;
+} IOMMUPRIResponse;
+
+struct IOMMUPRINotifier;
+
+typedef void (*IOMMUPRINotify)(struct IOMMUPRINotifier *notifier,
+   IOMMUPRIResponse *response);
+
+typedef struct IOMMUPRINotifier {
+IOMMUPRINotify notify;
+} IOMMUPRINotifier;
+
 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
 #define RAM_PREALLOC   (1 << 0)
 
@@ -589,6 +608,16 @@ struct IOMMUMemoryRegionClass {
  IOMMUTLBEntry *result,
  size_t result_length,
  uint32_t *err_count);
+
+/**
+ * @iommu_pri_request_page:
+ * This method must be implemented if the IOMMU has PRI enabled
+ *
+ * @see pci_pri_request_page_pasid
+ */
+int (*iommu_pri_request_page)(IOMMUMemoryRegion *iommu, hwaddr addr,
+  bool lpig, uint16_t prgi, bool is_read,
+  bool is_write, bool exec_req, bool priv_req);
 };
 
 typedef struct RamDiscardListener RamDiscardListener;
@@ -878,6 +907,7 @@ struct IOMMUMemoryRegion {
 
 QLIST_HEAD(, IOMMUNotifier) iommu_notify;
 IOMMUNotifierFlag iommu_notify_flags;
+IOMMUPRINotifier *pri_notifier;
 };
 
 #define IOMMU_NOTIFIER_FOREACH(n, mr) \
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index 2261312546..eaa777fde4 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -29,6 +29,7 @@ enum PCIBusFlags {
 };
 
 #define PCI_NO_PASID UINT32_MAX
+#define PCI_PRI_PRGI_MASK 0x1ffU
 
 struct PCIBus {
 BusState qbus;
-- 
2.45.1


Re: [PATCH intel_iommu 0/7] FLTS for VT-d

2024-06-02 Thread CLEMENT MATHIEU--DRIF

On 02/06/2024 16:10, Michael S. Tsirkin wrote:
> Caution: External email. Do not open attachments or click links, unless this 
> email comes from a known sender and you know the content is safe.
>
>
> On Mon, Apr 22, 2024 at 03:52:52PM +, CLEMENT MATHIEU--DRIF wrote:
>> This series is the first of a list that add support for SVM in the Intel 
>> IOMMU.
>>
>> Here, we implement support for first-stage translation in VT-d.
>> The PASID-based IOTLB invalidation is also added in this series as it is a
>> requirement of FLTS.
>>
>> The last patch introduces the 'flts' option to enable the feature from
>> the command line.
>> Once enabled, several drivers of the Linux kernel use this feature.
>>
>> This work is based on the VT-d specification version 4.1 (March 2023)
>>
>> Here is a link to a GitHub repository where you can find the following 
>> elements :
>>  - Qemu with all the patches for SVM
>>  - ATS
>>  - PRI
>>  - PASID based IOTLB invalidation
>>  - Device IOTLB invalidations
>>  - First-stage translations
>>  - Requests with already translated addresses
>>  - A demo device
>>  - A simple driver for the demo device
>>  - A userspace program (for testing and demonstration purposes)
>>
>> https://github.com/BullSequana/Qemu-in-guest-SVM-demo
> Pls post v2 addressing minor comments so far.

Hi Michael,

The comments have been addressed and I rebased my work for SVM support
on Zhenzhong's FLTS implementation (who cherry picked patches from this
series)

You can see all the changes in my series called 'ATS support for VT-d'
(posted on May 21st). I also posted the PRI series on May 30th.

I'm going to post a new version of ATS today to address Philippe's
comment on patch called 'make types match'

Tell me if you want me to change something

 >cmd

>
>> Clément Mathieu--Drif (7):
>>intel_iommu: fix FRCD construction macro.
>>intel_iommu: rename slpte to pte before adding FLTS
>>intel_iommu: make types match
>>intel_iommu: add support for first-stage translation
>>intel_iommu: extract device IOTLB invalidation logic
>>intel_iommu: add PASID-based IOTLB invalidation
>>intel_iommu: add a CLI option to enable FLTS
>>
>>   hw/i386/intel_iommu.c  | 655 ++---
>>   hw/i386/intel_iommu_internal.h | 114 --
>>   include/hw/i386/intel_iommu.h  |   3 +-
>>   3 files changed, 609 insertions(+), 163 deletions(-)
>>
>> --
>> 2.44.0


[PATCH ats_vtd v5 01/22] intel_iommu: fix FRCD construction macro.

2024-06-02 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

The constant must be unsigned, otherwise the two's complement
overrides the other fields when a PASID is present

Signed-off-by: Clément Mathieu--Drif 
---
 hw/i386/intel_iommu_internal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 7d0420e15d..1e37b98c65 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -272,7 +272,7 @@
 /* For the low 64-bit of 128-bit */
 #define VTD_FRCD_FI(val)((val) & ~0xfffULL)
 #define VTD_FRCD_PV(val)(((val) & 0xULL) << 40)
-#define VTD_FRCD_PP(val)(((val) & 0x1) << 31)
+#define VTD_FRCD_PP(val)(((val) & 0x1ULL) << 31)
 #define VTD_FRCD_IR_IDX(val)(((val) & 0xULL) << 48)
 
 /* DMA Remapping Fault Conditions */
-- 
2.45.1


[PATCH ats_vtd v5 19/22] memory: add an API for ATS support

2024-06-02 Thread CLEMENT MATHIEU--DRIF
From: Clément Mathieu--Drif 

IOMMU have to implement iommu_ats_request_translation to support ATS.

Devices can use IOMMU_TLB_ENTRY_TRANSLATION_ERROR to check the tlb
entries returned by a translation request.

Signed-off-by: Clément Mathieu--Drif 
---
 include/exec/memory.h | 26 ++
 system/memory.c   | 20 
 2 files changed, 46 insertions(+)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 56ef48780f..0ced7c33b1 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -148,6 +148,10 @@ struct IOMMUTLBEntry {
 uint32_t pasid;
 };
 
+/* Check if an IOMMU TLB entry indicates a translation error */
+#define IOMMU_TLB_ENTRY_TRANSLATION_ERROR(entry) entry)->perm) & IOMMU_RW) 
\
+== IOMMU_NONE)
+
 /*
  * Bitmap for different IOMMUNotifier capabilities. Each notifier can
  * register with one or multiple IOMMU Notifier capability bit(s).
@@ -571,6 +575,20 @@ struct IOMMUMemoryRegionClass {
  int (*iommu_set_iova_ranges)(IOMMUMemoryRegion *iommu,
   GList *iova_ranges,
   Error **errp);
+
+/**
+ * @iommu_ats_request_translation:
+ * This method must be implemented if the IOMMU has ATS enabled
+ *
+ * @see pci_ats_request_translation_pasid
+ */
+ssize_t (*iommu_ats_request_translation)(IOMMUMemoryRegion *iommu,
+ bool priv_req, bool exec_req,
+ hwaddr addr, size_t length,
+ bool no_write,
+ IOMMUTLBEntry *result,
+ size_t result_length,
+ uint32_t *err_count);
 };
 
 typedef struct RamDiscardListener RamDiscardListener;
@@ -1888,6 +1906,14 @@ void memory_region_iommu_replay(IOMMUMemoryRegion 
*iommu_mr, IOMMUNotifier *n);
 void memory_region_unregister_iommu_notifier(MemoryRegion *mr,
  IOMMUNotifier *n);
 
+ssize_t memory_region_iommu_ats_request_translation(IOMMUMemoryRegion 
*iommu_mr,
+bool priv_req, bool exec_req,
+hwaddr addr, size_t length,
+bool no_write,
+IOMMUTLBEntry *result,
+size_t result_length,
+uint32_t *err_count);
+
 /**
  * memory_region_iommu_get_attr: return an IOMMU attr if get_attr() is
  * defined on the IOMMU.
diff --git a/system/memory.c b/system/memory.c
index 49f1cb2c38..d9d66ae2e1 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -2005,6 +2005,26 @@ void 
memory_region_unregister_iommu_notifier(MemoryRegion *mr,
 memory_region_update_iommu_notify_flags(iommu_mr, NULL);
 }
 
+ssize_t memory_region_iommu_ats_request_translation(IOMMUMemoryRegion 
*iommu_mr,
+bool priv_req,
+bool exec_req,
+hwaddr addr, size_t length,
+bool no_write,
+IOMMUTLBEntry *result,
+size_t result_length,
+uint32_t *err_count)
+{
+IOMMUMemoryRegionClass *imrc = 
memory_region_get_iommu_class_nocheck(iommu_mr);
+
+if (!imrc->iommu_ats_request_translation) {
+return -ENODEV;
+}
+
+return imrc->iommu_ats_request_translation(iommu_mr, priv_req, exec_req,
+   addr, length, no_write, result,
+   result_length, err_count);
+}
+
 void memory_region_notify_iommu_one(IOMMUNotifier *notifier,
 IOMMUTLBEvent *event)
 {
-- 
2.45.1


  1   2   3   4   5   >