On Thu, Aug 18, 2022 at 03:04:51PM +0200, Jan Beulich wrote:
> From: Peter Zijlstra <pet...@infradead.org>
> 
> Having IBRS enabled while the SMT sibling is idle unnecessarily slows
> down the running sibling. OTOH, disabling IBRS around idle takes two
> MSR writes, which will increase the idle latency.
> 
> Therefore, only disable IBRS around deeper idle states. Shallow idle
> states are bounded by the tick in duration, since NOHZ is not allowed
> for them by virtue of their short target residency.
> 
> Only do this for mwait-driven idle, since that keeps interrupts disabled
> across idle, which makes disabling IBRS vs IRQ-entry a non-issue.
> 
> Note: C6 is a random threshold, most importantly C1 probably shouldn't
> disable IBRS, benchmarking needed.
> 
> Suggested-by: Tim Chen <tim.c.c...@linux.intel.com>
> Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
> Signed-off-by: Borislav Petkov <b...@suse.de>
> Reviewed-by: Josh Poimboeuf <jpoim...@kernel.org>
> Signed-off-by: Borislav Petkov <b...@suse.de>
> Origin: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
> bf5835bcdb96
> Signed-off-by: Jan Beulich <jbeul...@suse.com>

Acked-by: Roger Pau Monné <roger....@citrix.com>

One unrelated comment below.

> ---
> v3: New.
> 
> --- a/xen/arch/x86/cpu/mwait-idle.c
> +++ b/xen/arch/x86/cpu/mwait-idle.c
> @@ -141,6 +141,12 @@ static const struct cpuidle_state {
>  #define CPUIDLE_FLAG_TLB_FLUSHED     0x10000
>  
>  /*
> + * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
> + * above.
> + */
> +#define CPUIDLE_FLAG_IBRS            0x20000
> +
> +/*
>   * MWAIT takes an 8-bit "hint" in EAX "suggesting"
>   * the C-state (top nibble) and sub-state (bottom nibble)
>   * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
> @@ -530,31 +536,31 @@ static struct cpuidle_state __read_mostl
>       },
>       {
>               .name = "C6",
> -             .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
> +             .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | 
> CPUIDLE_FLAG_IBRS,
>               .exit_latency = 85,
>               .target_residency = 200,
>       },
>       {
>               .name = "C7s",
> -             .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
> +             .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | 
> CPUIDLE_FLAG_IBRS,
>               .exit_latency = 124,
>               .target_residency = 800,
>       },
>       {
>               .name = "C8",
> -             .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
> +             .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | 
> CPUIDLE_FLAG_IBRS,
>               .exit_latency = 200,
>               .target_residency = 800,
>       },
>       {
>               .name = "C9",
> -             .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
> +             .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | 
> CPUIDLE_FLAG_IBRS,
>               .exit_latency = 480,
>               .target_residency = 5000,
>       },
>       {
>               .name = "C10",
> -             .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
> +             .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | 
> CPUIDLE_FLAG_IBRS,
>               .exit_latency = 890,
>               .target_residency = 5000,
>       },
> @@ -576,7 +582,7 @@ static struct cpuidle_state __read_mostl
>       },
>       {
>               .name = "C6",
> -             .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
> +             .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | 
> CPUIDLE_FLAG_IBRS,
>               .exit_latency = 133,
>               .target_residency = 600,
>       },
> @@ -906,6 +912,7 @@ static const struct cpuidle_state snr_cs
>  static void cf_check mwait_idle(void)
>  {
>       unsigned int cpu = smp_processor_id();
> +     struct cpu_info *info = get_cpu_info();
>       struct acpi_processor_power *power = processor_powers[cpu];
>       struct acpi_processor_cx *cx = NULL;
>       unsigned int next_state;
> @@ -932,8 +939,6 @@ static void cf_check mwait_idle(void)
>                       pm_idle_save();
>               else
>               {
> -                     struct cpu_info *info = get_cpu_info();
> -
>                       spec_ctrl_enter_idle(info);
>                       safe_halt();
>                       spec_ctrl_exit_idle(info);

Do we need to disable speculation just for the hlt if there's no
C state change?

It would seem to me like the MSR writes could add a lot of latency
when there's no C state change.

Maybe I'm confused.

Thanks, Roger.

Reply via email to