On Mon, Sep 20 2021 at 19:23, Fenghua Yu wrote:
> diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
> index c8def1b7f8fb..8a89b2cecd77 100644
> --- a/arch/x86/kernel/fpu/xstate.c
> +++ b/arch/x86/kernel/fpu/xstate.c
> @@ -1289,3 +1289,62 @@ int proc_pid_arch_status(struct seq_file *m, struct 
> pid_namespace *ns,
>       return 0;
>  }
>  #endif /* CONFIG_PROC_PID_ARCH_STATUS */
> +
> +#ifdef CONFIG_INTEL_IOMMU_SVM
> +/**
> + * fpu__pasid_write - Write the current task's PASID state/MSR.
> + * @pasid:   the PASID
> + *
> + * The PASID is written to the IA32_PASID MSR directly if the MSR is active.
> + * Otherwise it's written to the PASID. The IA32_PASID MSR should contain

written to the PASID? What's 'the PASID' ?

> + * the PASID after returning to the user.
> + *
> + * This is called only when ENQCMD is enabled.

Well, yes, but it does not explain why it is called and from which context.

> + */
> +void fpu__pasid_write(u32 pasid)
> +{
> +     struct xregs_state *xsave = &current->thread.fpu.state.xsave;
> +     u64 msr_val = pasid | MSR_IA32_PASID_VALID;
> +     struct fpu *fpu = &current->thread.fpu;
> +
> +     /*
> +      * ENQCMD always uses the compacted XSAVE format. Ensure the buffer
> +      * has space for the PASID.
> +      */
> +     BUG_ON(!(xsave->header.xcomp_bv & XFEATURE_MASK_PASID));
> +
> +     fpregs_lock();
> +
> +     /*
> +      * If the task's FPU doesn't need to be loaded or is valid, directly
> +      * write the IA32_PASID MSR. Otherwise, write the PASID state and
> +      * the MSR will be loaded from the PASID state before returning to
> +      * the user.
> +      */
> +     if (!test_thread_flag(TIF_NEED_FPU_LOAD) ||
> +         fpregs_state_valid(fpu, smp_processor_id())) {
> +             wrmsrl(MSR_IA32_PASID, msr_val);
> +     } else {
> +             struct ia32_pasid_state *ppasid_state;
> +             /*
> +              * Mark XFEATURE_PASID as non-init in the XSAVE buffer.
> +              * This ensures that a subsequent XRSTOR will see the new
> +              * value instead of writing the init value to the MSR.
> +              */

This and the above wrmsrl() assume that @pasid is valid which might be
correct, but I don't see any information about pasid lifetime. I assume
that there is no way to drop a PASID, right?

> +             xsave->header.xfeatures |= XFEATURE_MASK_PASID;
> +             ppasid_state = get_xsave_addr(xsave, XFEATURE_PASID);
> +             /*
> +              * ppasid_state shouldn't be NULL because XFEATURE_PASID
> +              * was set just now.
> +              *
> +              * Please note that the following operation is a "write only"
> +              * operation on the PASID state and it writes the *ENTIRE*
> +              * state component. Please don't blindly copy this code to
> +              * modify other XSAVE states.
> +              */
> +             ppasid_state->pasid = msr_val;
> +     }
> +
> +     fpregs_unlock();
> +}
> +#endif /* CONFIG_INTEL_IOMMU_SVM */

> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index a58800973aed..a25d738ae839 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
>  
> +static bool fixup_pasid_exception(void)
> +{
> +     if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
> +             return false;
> +
> +     return __fixup_pasid_exception();
> +}

Ok, so here is the hook into #GP which then calls out into:

> --- a/drivers/iommu/intel/svm.c
> +++ b/drivers/iommu/intel/svm.c
> @@ -1179,3 +1179,35 @@ int intel_svm_page_response(struct device *dev,
>       mutex_unlock(&pasid_mutex);
>       return ret;
>  }
> +
> +/*
> + * Try to figure out if there is a PASID MSR value to propagate to the
> + * thread taking the #GP.
> + */
> +bool __fixup_pasid_exception(void)
> +{
> +     u32 pasid;
> +
> +     /*
> +      * This function is called only when this #GP was triggered from user
> +      * space. So the mm cannot be NULL.
> +      */
> +     pasid = current->mm->pasid;
> +
> +     /* If no PASID is allocated, there is nothing to propagate. */
> +     if (pasid == PASID_DISABLED)
> +             return false;
> +
> +     /*
> +      * If the current task already has a valid PASID MSR, then the #GP
> +      * fault must be for some non-ENQCMD related reason.
> +      */
> +     if (current->has_valid_pasid)
> +             return false;
> +
> +     /* Fix up the MSR by the PASID in the mm. */
> +     fpu__pasid_write(pasid);
> +     current->has_valid_pasid = 1;
> +
> +     return true;
> +}

What is INTEL SVM specific on this? Nothing at all.

If there is a valid PASID in current->mm and the MSR has not been
updated yet, write it. Otherwise bail.

Thanks,

        tglx


_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Reply via email to