Add page-selective IOTLB flush using HVCALL_FLUSH_DEVICE_DOMAIN_LIST. This hypercall accepts a list of (page_number, page_mask_shift) entries, enabling finer-grained IOTLB invalidation compared to the domain-wide HVCALL_FLUSH_DEVICE_DOMAIN used by hv_iommu_flush_iotlb_all().
hv_iommu_fill_iova_list() decomposes a contiguous IOVA range into a minimal set of aligned power-of-two regions that fit in a single hypercall input page. When the range exceeds the page capacity, the code falls back to a full domain flush automatically. Signed-off-by: Yu Zhang <[email protected]> Signed-off-by: Easwar Hariharan <[email protected]> --- drivers/iommu/hyperv/iommu.c | 91 +++++++++++++++++++++++++++++++++++- include/hyperv/hvgdk_mini.h | 1 + include/hyperv/hvhdk_mini.h | 17 +++++++ 3 files changed, 108 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/hyperv/iommu.c b/drivers/iommu/hyperv/iommu.c index e5fc625314b5..3bca362b7815 100644 --- a/drivers/iommu/hyperv/iommu.c +++ b/drivers/iommu/hyperv/iommu.c @@ -486,10 +486,98 @@ static void hv_iommu_flush_iotlb_all(struct iommu_domain *domain) hv_flush_device_domain(to_hv_iommu_domain(domain)); } +/* Max number of iova_list entries in a single hypercall input page. */ +#define HV_IOMMU_MAX_FLUSH_VA_COUNT \ + ((HV_HYP_PAGE_SIZE - sizeof(struct hv_input_flush_device_domain_list)) / \ + sizeof(union hv_iommu_flush_va)) + +/* Returned by hv_iommu_fill_iova_list() when the range exceeds the capacity */ +#define HV_IOMMU_FLUSH_VA_OVERFLOW U16_MAX + +static inline u16 hv_iommu_fill_iova_list(union hv_iommu_flush_va *iova_list, + unsigned long start, + unsigned long end) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long end_pfn = PAGE_ALIGN(end) >> PAGE_SHIFT; + unsigned long nr_pages = end_pfn - start_pfn; + u16 count = 0; + + while (nr_pages > 0) { + unsigned long flush_pages; + int order; + unsigned long pfn_align; + unsigned long size_align; + + if (count >= HV_IOMMU_MAX_FLUSH_VA_COUNT) { + count = HV_IOMMU_FLUSH_VA_OVERFLOW; + break; + } + + if (start_pfn) + pfn_align = __ffs(start_pfn); + else + pfn_align = BITS_PER_LONG - 1; + + size_align = __fls(nr_pages); + order = min(pfn_align, size_align); + iova_list[count].page_mask_shift = order; + iova_list[count].page_number = start_pfn; + + flush_pages = 1UL << order; + start_pfn += flush_pages; + nr_pages -= flush_pages; + count++; + } + + return count; +} + +static void hv_flush_device_domain_list(struct hv_iommu_domain *hv_domain, + struct iommu_iotlb_gather *iotlb_gather) +{ + u64 status; + u16 count; + unsigned long flags; + struct hv_input_flush_device_domain_list *input; + + local_irq_save(flags); + + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + memset(input, 0, sizeof(*input)); + + input->device_domain = hv_domain->device_domain; + input->flags |= HV_FLUSH_DEVICE_DOMAIN_LIST_IOMMU_FORMAT; + count = hv_iommu_fill_iova_list(input->iova_list, + iotlb_gather->start, + iotlb_gather->end); + if (count == HV_IOMMU_FLUSH_VA_OVERFLOW) { + /* + * Range exceeds hypercall page capacity. Fall back to a full + * domain flush. + */ + struct hv_input_flush_device_domain *flush_all = (void *)input; + + memset(flush_all, 0, sizeof(*flush_all)); + flush_all->device_domain = hv_domain->device_domain; + status = hv_do_hypercall(HVCALL_FLUSH_DEVICE_DOMAIN, + flush_all, NULL); + } else { + status = hv_do_rep_hypercall( + HVCALL_FLUSH_DEVICE_DOMAIN_LIST, + count, 0, input, NULL); + } + + local_irq_restore(flags); + + if (!hv_result_success(status)) + pr_err("HVCALL_FLUSH_DEVICE_DOMAIN_LIST failed, status %lld\n", status); +} + static void hv_iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather) { - hv_flush_device_domain(to_hv_iommu_domain(domain)); + hv_flush_device_domain_list(to_hv_iommu_domain(domain), iotlb_gather); iommu_put_pages_list(&iotlb_gather->freelist); } @@ -543,6 +631,7 @@ static struct iommu_domain *hv_iommu_domain_alloc_paging(struct device *dev) cfg.common.hw_max_vasz_lg2 = hv_iommu_device->max_iova_width; cfg.common.hw_max_oasz_lg2 = 52; + cfg.common.features |= BIT(PT_FEAT_FLUSH_RANGE); cfg.top_level = (hv_iommu_device->max_iova_width > 48) ? 4 : 3; ret = pt_iommu_x86_64_init(&hv_domain->pt_iommu_x86_64, &cfg, GFP_KERNEL); diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h index 5bdbb44da112..eaaf87171478 100644 --- a/include/hyperv/hvgdk_mini.h +++ b/include/hyperv/hvgdk_mini.h @@ -496,6 +496,7 @@ union hv_vp_assist_msr_contents { /* HV_REGISTER_VP_ASSIST_PAGE */ #define HVCALL_GET_GPA_PAGES_ACCESS_STATES 0x00c9 #define HVCALL_CONFIGURE_DEVICE_DOMAIN 0x00ce #define HVCALL_FLUSH_DEVICE_DOMAIN 0x00d0 +#define HVCALL_FLUSH_DEVICE_DOMAIN_LIST 0x00d1 #define HVCALL_ACQUIRE_SPARSE_SPA_PAGE_HOST_ACCESS 0x00d7 #define HVCALL_RELEASE_SPARSE_SPA_PAGE_HOST_ACCESS 0x00d8 #define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY 0x00db diff --git a/include/hyperv/hvhdk_mini.h b/include/hyperv/hvhdk_mini.h index 493608e791b4..f51d5d9467f1 100644 --- a/include/hyperv/hvhdk_mini.h +++ b/include/hyperv/hvhdk_mini.h @@ -671,4 +671,21 @@ struct hv_input_flush_device_domain { u32 reserved; } __packed; +union hv_iommu_flush_va { + u64 iova; + struct { + u64 page_mask_shift : 12; + u64 page_number : 52; + }; +} __packed; + + +struct hv_input_flush_device_domain_list { + struct hv_input_device_domain device_domain; +#define HV_FLUSH_DEVICE_DOMAIN_LIST_IOMMU_FORMAT (1 << 0) + u32 flags; + u32 reserved; + union hv_iommu_flush_va iova_list[]; +} __packed; + #endif /* _HV_HVHDK_MINI_H */ -- 2.52.0

