IOMMU HW now supports updating a dirty bit in an entry when a DMA writes to the entry's VA range. iommufd has a uAPI to read and clear the dirty bits from the tables.
This is a trivial recursive descent algorithm to read and optionally clear the dirty bits. The format needs a function to tell if a contiguous entry is dirty, and a function to clear a contiguous entry back to clean. Signed-off-by: Jason Gunthorpe <j...@nvidia.com> --- drivers/iommu/generic_pt/iommu_pt.h | 103 ++++++++++++++++++++++++++++ include/linux/generic_pt/iommu.h | 6 ++ 2 files changed, 109 insertions(+) diff --git a/drivers/iommu/generic_pt/iommu_pt.h b/drivers/iommu/generic_pt/iommu_pt.h index 7a680017f35bff..c3ea0f7de0d190 100644 --- a/drivers/iommu/generic_pt/iommu_pt.h +++ b/drivers/iommu/generic_pt/iommu_pt.h @@ -161,6 +161,107 @@ phys_addr_t DOMAIN_NS(iova_to_phys)(struct iommu_domain *domain, } EXPORT_SYMBOL_NS_GPL(DOMAIN_NS(iova_to_phys), "GENERIC_PT_IOMMU"); +struct pt_iommu_dirty_args { + struct iommu_dirty_bitmap *dirty; + unsigned int flags; +}; + +static void record_dirty(struct pt_state *pts, + struct pt_iommu_dirty_args *dirty, + unsigned int num_contig_lg2) +{ + pt_vaddr_t dirty_len; + + if (num_contig_lg2 != ilog2(1)) { + unsigned int index = pts->index; + unsigned int end_index = log2_set_mod_max_t( + unsigned int, pts->index, num_contig_lg2); + + /* Adjust for being contained inside a contiguous page */ + end_index = min(end_index, pts->end_index); + dirty_len = (end_index - index) * + log2_to_int(pt_table_item_lg2sz(pts)); + } else { + dirty_len = log2_to_int(pt_table_item_lg2sz(pts)); + } + + if (dirty->dirty->bitmap) + iova_bitmap_set(dirty->dirty->bitmap, pts->range->va, + dirty_len); + + if (!(dirty->flags & IOMMU_DIRTY_NO_CLEAR)) { + pt_entry_set_write_clean(pts); + iommu_iotlb_gather_add_range(dirty->dirty->gather, + pts->range->va, dirty_len); + } +} + +static inline int __read_and_clear_dirty(struct pt_range *range, void *arg, + unsigned int level, + struct pt_table_p *table) +{ + struct pt_state pts = pt_init(range, level, table); + struct pt_iommu_dirty_args *dirty = arg; + int ret; + + for_each_pt_level_entry(&pts) { + if (pts.type == PT_ENTRY_TABLE) { + ret = pt_descend(&pts, arg, __read_and_clear_dirty); + if (ret) + return ret; + continue; + } + if (pts.type == PT_ENTRY_OA && pt_entry_write_is_dirty(&pts)) + record_dirty(&pts, dirty, + pt_entry_num_contig_lg2(&pts)); + } + return 0; +} + +/** + * read_and_clear_dirty() - Manipulate the HW set write dirty state + * @iommu_table: Table to manipulate + * @iova: IO virtual address to start + * @size: Length of the IOVA + * @flags: A bitmap of IOMMU_DIRTY_NO_CLEAR + * + * Iterate over all the entries in the mapped range and record their write dirty + * status in iommu_dirty_bitmap. If IOMMU_DIRTY_NO_CLEAR is not specified then + * the entries will be left dirty, otherwise they are returned to being not + * write dirty. + * + * Context: The caller must hold a read range lock that includes @iova. + * + * Returns: -ERRNO on failure, 0 on success. + */ +int DOMAIN_NS(read_and_clear_dirty)(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty) +{ + struct pt_iommu *iommu_table = + container_of(domain, struct pt_iommu, domain); + struct pt_iommu_dirty_args dirty_args = { + .dirty = dirty, + .flags = flags, + }; + struct pt_range range; + int ret; + +#if !IS_ENABLED(CONFIG_IOMMUFD_DRIVER) || !defined(pt_entry_write_is_dirty) + return -EOPNOTSUPP; +#endif + + ret = make_range(common_from_iommu(iommu_table), &range, iova, size); + if (ret) + return ret; + + ret = pt_walk_range(&range, __read_and_clear_dirty, &dirty_args); + PT_WARN_ON(ret); + return ret; +} +EXPORT_SYMBOL_NS_GPL(DOMAIN_NS(read_and_clear_dirty), "GENERIC_PT_IOMMU"); + struct pt_iommu_collect_args { struct iommu_pages_list free_list; u8 ignore_mapped : 1; @@ -1000,5 +1101,7 @@ EXPORT_SYMBOL_NS_GPL(pt_iommu_hw_info, "GENERIC_PT_IOMMU"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IOMMU Pagetable implementation for " __stringify(PTPFX_RAW)); MODULE_IMPORT_NS("GENERIC_PT"); +/* For iommu_dirty_bitmap_record() */ +MODULE_IMPORT_NS("IOMMUFD"); #endif diff --git a/include/linux/generic_pt/iommu.h b/include/linux/generic_pt/iommu.h index fd0c598526ce57..aac565611cbffa 100644 --- a/include/linux/generic_pt/iommu.h +++ b/include/linux/generic_pt/iommu.h @@ -12,6 +12,7 @@ struct iommu_iotlb_gather; struct pt_iommu_ops; struct pt_iommu_flush_ops; +struct iommu_dirty_bitmap; /** * DOC: IOMMU Radix Page Table @@ -175,6 +176,9 @@ struct pt_iommu_cfg { struct iommu_domain *domain, unsigned long iova, \ size_t pgsize, size_t pgcount, \ struct iommu_iotlb_gather *iotlb_gather); \ + int pt_iommu_##fmt##_read_and_clear_dirty( \ + struct iommu_domain *domain, unsigned long iova, size_t size, \ + unsigned long flags, struct iommu_dirty_bitmap *dirty); \ int pt_iommu_##fmt##_init(struct pt_iommu_##fmt *table, \ const struct pt_iommu_##fmt##_cfg *cfg, \ gfp_t gfp); \ @@ -195,6 +199,8 @@ struct pt_iommu_cfg { .iova_to_phys = &pt_iommu_##fmt##_iova_to_phys, \ .map_pages = &pt_iommu_##fmt##_map_pages, \ .unmap_pages = &pt_iommu_##fmt##_unmap_pages +#define IOMMU_PT_DIRTY_OPS(fmt) \ + .read_and_clear_dirty = &pt_iommu_##fmt##_read_and_clear_dirty /* * The driver should setup its domain struct like -- 2.43.0