On 08/12/2025 08:43, Vlastimil Babka wrote:
On 12/5/25 17:58, Kalyazin, Nikita wrote:+static int kvm_gmem_folio_zap_direct_map(struct folio *folio) +{ + int r = 0; + unsigned long addr = (unsigned long) folio_address(folio); + u64 gmem_flags = GMEM_I(folio_inode(folio))->flags; + + if (kvm_gmem_folio_no_direct_map(folio) || !(gmem_flags & GUEST_MEMFD_FLAG_NO_DIRECT_MAP)) + goto out; + + r = set_direct_map_valid_noflush(folio_page(folio, 0), folio_nr_pages(folio), + false); + + if (r) + goto out; + + folio->private = (void *) KVM_GMEM_FOLIO_NO_DIRECT_MAP;With Dave's suggestion on patch 1/13 to have folio_zap_direct_map(), setting this folio->private flag wouldn't be possible between the zap and tlb flush, but it's not an issue to set it before the zap, right?
I can't see an issue with that. Did it in the v9.
+ flush_tlb_kernel_range(addr, addr + folio_size(folio)); + +out: + return r; +} + +static void kvm_gmem_folio_restore_direct_map(struct folio *folio) +{ + /* + * Direct map restoration cannot fail, as the only error condition + * for direct map manipulation is failure to allocate page tables + * when splitting huge pages, but this split would have already + * happened in set_direct_map_invalid_noflush() in kvm_gmem_folio_zap_direct_map(). + * Thus set_direct_map_valid_noflush() here only updates prot bits. + */ + if (kvm_gmem_folio_no_direct_map(folio)) + set_direct_map_valid_noflush(folio_page(folio, 0), folio_nr_pages(folio), + true);I think you're missing here clearing KVM_GMEM_FOLIO_NO_DIRECT_MAP from folio->private, which means if there's another kvm_gmem_folio_zap_direct_map() call on it in the future, it will do nothing?
You're very right, thanks. Fixed in the v9.
+} + static inline void kvm_gmem_mark_prepared(struct folio *folio) { folio_mark_uptodate(folio); @@ -398,6 +444,7 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf) struct inode *inode = file_inode(vmf->vma->vm_file); struct folio *folio; vm_fault_t ret = VM_FAULT_LOCKED; + int err; if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) return VM_FAULT_SIGBUS; @@ -423,6 +470,12 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf) kvm_gmem_mark_prepared(folio); } + err = kvm_gmem_folio_zap_direct_map(folio); + if (err) { + ret = vmf_error(err); + goto out_folio; + } + vmf->page = folio_file_page(folio, vmf->pgoff); out_folio: @@ -533,6 +586,8 @@ static void kvm_gmem_free_folio(struct folio *folio) kvm_pfn_t pfn = page_to_pfn(page); int order = folio_order(folio); + kvm_gmem_folio_restore_direct_map(folio); + kvm_arch_gmem_invalidate(pfn, pfn + (1ul << order)); } @@ -596,6 +651,9 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) /* Unmovable mappings are supposed to be marked unevictable as well. */ WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping)); + if (flags & GUEST_MEMFD_FLAG_NO_DIRECT_MAP) + mapping_set_no_direct_map(inode->i_mapping); + GMEM_I(inode)->flags = flags; file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR, &kvm_gmem_fops); @@ -807,6 +865,8 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, if (!is_prepared) r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio); + kvm_gmem_folio_zap_direct_map(folio); + folio_unlock(folio); if (!r)
