Add support for releasing memory pages before unplugging in TDX guests. When memory is about to be unplugged by virtio-mem or other memory hotplug drivers, the TDX guest should release the memory pages back to the hypervisor using TDG.MEM.PAGE.RELEASE TDCALL to be more robust for buggy VMM behavior, e.g., VMM may do nothing for unplug request.
The implementation detects TDG.MEM.PAGE.RELEASE support and optimizes release operations by trying larger page sizes 1G/2M before falling back to 4K pages. If release fails, the function re-accepts any released pages to maintain consistency. Without proper memory release, re-plugging memory in TDX guests fails when guest accepts those memory because hypervisor can do no-op to memory unplug request and memory is already in "accepted" state. Signed-off-by: Zhenzhong Duan <[email protected]> --- arch/x86/include/asm/shared/tdx.h | 2 + arch/x86/coco/tdx/tdx.c | 135 ++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index 049638e3da74..910ec1e57528 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -19,6 +19,7 @@ #define TDG_MEM_PAGE_ACCEPT 6 #define TDG_VM_RD 7 #define TDG_VM_WR 8 +#define TDG_MEM_PAGE_RELEASE 30 /* TDX TD attributes */ #define TDX_TD_ATTR_DEBUG_BIT 0 @@ -54,6 +55,7 @@ /* TDCS_CONFIG_FLAGS bits */ #define TDCS_CONFIG_FLEXIBLE_PENDING_VE BIT_ULL(1) +#define TDCS_CONFIG_PAGE_RELEASE BIT_ULL(6) /* TDCS_TD_CTLS bits */ #define TD_CTLS_PENDING_VE_DISABLE_BIT 0 diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index d93ba092d311..0abfb3505093 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -345,6 +345,139 @@ static int tdx_memory_post_plug(u64 addr, u64 size) return -EINVAL; } +static bool tdx_page_release_supported; + +static void detect_mem_page_release(void) +{ + u64 config = 0; + + tdg_vm_rd(TDCS_CONFIG_FLAGS, &config); + + tdx_page_release_supported = !!(config & TDCS_CONFIG_PAGE_RELEASE); +} + +static unsigned long try_release_one(phys_addr_t start, unsigned long len, + enum pg_level pg_level) +{ + unsigned long release_size = page_level_size(pg_level); + struct tdx_module_args args = {}; + u8 page_size; + u64 ret; + + if (!IS_ALIGNED(start, release_size)) + return 0; + + if (len < release_size) + return 0; + + /* + * Pass the page physical address to TDX module to release the + * private page and to put it in PENDING state. + * + * Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G. + */ + switch (pg_level) { + case PG_LEVEL_4K: + page_size = TDX_PS_4K; + break; + case PG_LEVEL_2M: + page_size = TDX_PS_2M; + break; + case PG_LEVEL_1G: + page_size = TDX_PS_1G; + break; + default: + return 0; + } + + args.rcx = start | page_size; + ret = __tdcall(TDG_MEM_PAGE_RELEASE, &args); + if (ret) + return 0; + + return release_size; +} + +static bool _tdx_release_memory(phys_addr_t start, phys_addr_t end, phys_addr_t *cur) +{ + *cur = start; + + while (*cur < end) { + unsigned long len = end - *cur; + unsigned long release_size; + + /* + * Try larger release first. It speeds up process by cutting + * number of hypercalls (if successful). + */ + + release_size = try_release_one(*cur, len, PG_LEVEL_1G); + if (!release_size) + release_size = try_release_one(*cur, len, PG_LEVEL_2M); + if (!release_size) + release_size = try_release_one(*cur, len, PG_LEVEL_4K); + if (!release_size) + return false; + *cur += release_size; + } + + return true; +} + +/* + * Release memory pages back to the hypervisor in TDX guests. + * + * @start: Physical start address of memory range to release + * @end: Physical end address of memory range to release + * + * Uses TDG.MEM.PAGE.RELEASE TDCALL to transition private pages back to + * pending state. If PAGE_RELEASE is not supported by the TDX + * configuration, returns true (success) as no action is needed. + * + * On partial failure, automatically re-accepts any successfully released + * pages to restore consistent memory state. Re-acceptance failure is + * treated as a fatal error since it indicates severe TDX module issues. + * + * Returns: true on success, false on failure + */ +static bool tdx_release_memory(phys_addr_t start, phys_addr_t end) +{ + phys_addr_t released = start; + bool ret; + + if (!tdx_page_release_supported) + return true; + + ret = _tdx_release_memory(start, end, &released); + if (!ret) { + pr_err("Failed to release memory [0x%llx, 0x%llx)\n", + (unsigned long long)start, (unsigned long long)end); + + /* + * Re-accept any pages that were successfully released before + * the failure occurred. This should never fail since we're + * just restoring the previous accepted state. + */ + if (!tdx_accept_memory(start, released)) + panic("%s Failed to re-accept memory\n", __func__); + } + + return ret; +} + +static int tdx_memory_pre_unplug(u64 addr, u64 size) +{ + u64 end; + + if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(size)) + return -EINVAL; + + if (check_add_overflow(addr, size, &end)) + return -EINVAL; + + return tdx_release_memory(addr, end) ? 0 : -EINVAL; +} + static void tdx_setup(u64 *cc_mask) { struct tdx_module_args args = {}; @@ -380,6 +513,8 @@ static void tdx_setup(u64 *cc_mask) reduce_unnecessary_ve(); set_memory_post_plug_callback(tdx_memory_post_plug); + detect_mem_page_release(); + set_memory_pre_unplug_callback(tdx_memory_pre_unplug); } /* -- 2.52.0

