On Tue, Aug 26, 2025, Binbin Wu wrote:
> 
> 
> On 8/21/2025 12:28 PM, Sagi Shahar wrote:
> > TDX guests' registers cannot be initialized directly using
> > vcpu_regs_set(), hence the stack pointer needs to be initialized by
> > the guest itself, running boot code beginning at the reset vector.
> > 
> > Expose the function to allocate the guest stack so that TDX
> > initialization code can allocate it itself and skip the allocation in
> > vm_arch_vcpu_add() in that case.
> > 
> > Signed-off-by: Sagi Shahar <sa...@google.com>
> > ---
> >   .../selftests/kvm/include/x86/processor.h       |  2 ++
> >   tools/testing/selftests/kvm/lib/x86/processor.c | 17 ++++++++++++-----
> >   2 files changed, 14 insertions(+), 5 deletions(-)
> > 
> > diff --git a/tools/testing/selftests/kvm/include/x86/processor.h 
> > b/tools/testing/selftests/kvm/include/x86/processor.h
> > index 5c16507f9b2d..8fcc5118683e 100644
> > --- a/tools/testing/selftests/kvm/include/x86/processor.h
> > +++ b/tools/testing/selftests/kvm/include/x86/processor.h
> > @@ -1111,6 +1111,8 @@ static inline void vcpu_clear_cpuid_feature(struct 
> > kvm_vcpu *vcpu,
> >     vcpu_set_or_clear_cpuid_feature(vcpu, feature, false);
> >   }
> > +vm_vaddr_t kvm_allocate_vcpu_stack(struct kvm_vm *vm);
> > +
> >   uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index);
> >   int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t 
> > msr_value);
> > diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c 
> > b/tools/testing/selftests/kvm/lib/x86/processor.c
> > index b2a4b11ac8c0..1eae92957456 100644
> > --- a/tools/testing/selftests/kvm/lib/x86/processor.c
> > +++ b/tools/testing/selftests/kvm/lib/x86/processor.c
> > @@ -687,12 +687,9 @@ void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, 
> > void *guest_code)
> >     vcpu_regs_set(vcpu, &regs);
> >   }
> > -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
> > +vm_vaddr_t kvm_allocate_vcpu_stack(struct kvm_vm *vm)
> >   {
> > -   struct kvm_mp_state mp_state;
> > -   struct kvm_regs regs;
> >     vm_vaddr_t stack_vaddr;
> > -   struct kvm_vcpu *vcpu;
> >     stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
> >                                    DEFAULT_GUEST_STACK_VADDR_MIN,
> > @@ -713,6 +710,15 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, 
> > uint32_t vcpu_id)
> >                 "__vm_vaddr_alloc() did not provide a page-aligned 
> > address");
> >     stack_vaddr -= 8;
> > +   return stack_vaddr;
> > +}
> > +
> > +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
> > +{
> > +   struct kvm_mp_state mp_state;
> > +   struct kvm_regs regs;
> > +   struct kvm_vcpu *vcpu;
> > +
> >     vcpu = __vm_vcpu_add(vm, vcpu_id);
> >     vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
> >     vcpu_init_sregs(vm, vcpu);
> > @@ -721,7 +727,8 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, 
> > uint32_t vcpu_id)
> >     /* Setup guest general purpose registers */
> >     vcpu_regs_get(vcpu, &regs);
> >     regs.rflags = regs.rflags | 0x2;
> > -   regs.rsp = stack_vaddr;
> > +   if (vm->type != KVM_X86_TDX_VM)
> > +           regs.rsp = kvm_allocate_vcpu_stack(vm);
> 
> I am wondering if this could be more generic.
> I.e, make vcpu_regs_get() return the error code.

It would need to be a double-underscores variant, i.e. __vcpu_regs_get().  But
even then, I don't think it's worth getting that clever, because then to ensure
selftests aren't hitting KVM bugs, we'd want to assert that failure only occurs
for a TDX VM, i.e. we'd end up with:

        if (__vcpu_regs_get(vcpu, &regs)) {
                TEST_ASERT(is_tdx_vm(vm), "blah blah blah"
        } else {

        }

which doesn't really "save" anything relative to Sagi's proposed version of:

        if (is_tdx_vm(vm)) {
                vm_tdx_vcpu_add(vm, vcpu);
        } else {
                vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());

                vcpu_init_sregs(vm, vcpu);
                vcpu_init_xcrs(vm, vcpu);

                /* Setup guest general purpose registers */
                vcpu_regs_get(vcpu, &regs);
                regs.rflags = regs.rflags | 0x2;
                regs.rsp = kvm_allocate_vcpu_stack(vm);
                vcpu_regs_set(vcpu, &regs);
        }

> If vcpu_regs_get() failed (for TDX, since it's guest state is protected, the
> ioctl will return -EINVAL), the vcpu_regs_set(), including the allocation for
> the vcpu stack, could be skipped.
> 
> >     vcpu_regs_set(vcpu, &regs);
> >     /* Setup the MP state */
> 

Reply via email to