On 20/05/16 16:10, Julien Grall wrote:
> Hi Juergen,
> 
> On 20/05/16 14:22, Juergen Gross wrote:
>> A guest mapping vcpu_runstate_info into its memory can't read this
>> information from another cpu but the one the data is referring to.
>> Reason is there is no reliable way for the guest to detect a concurrent
>> data update by the hypervisor.
>>
>> This patch series adds an update flag to the mapped data which can be
>> used by the guest to detect an update is occurring. As this flag is
>> modifying the current interface it has to be activated by using a
>> vm_assist hypercall, which in turn has to be made available for ARM.
>>
>> Runtime tested on x86 with a modified Linux kernel using the new
>> feature.
>> Compile tested only for ARM.
> 
> I would like to give a go on ARM. Who it be possible to provide the
> patch for Linux and how to test it?

Sure. You'll need the four attached patches (to be applied on top of
kernel 4.6). With CONFIG_PARAVIRT_TIME_ACCOUNTING set in the kernel
config, full functionality will be used (without being set the runstate
info of other cpus won't be read).

You can verify the vm_assist hypercall has worked via "xl debug-keys q"
and "xl dmesg | grep vm_assist" (value should be 00000020 on ARM).


Juergen

>From 689b4ba8c13be73ed51e485a7f7baea593d0ce6e Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgr...@suse.com>
Date: Tue, 17 May 2016 14:03:02 +0200
Subject: [PATCH v4] xen: add steal_clock support on x86

The pv_time_ops structure contains a function pointer for the
"steal_clock" functionality used only by KVM and Xen on ARM. Xen on x86
uses its own mechanism to account for the "stolen" time a thread wasn't
able to run due to hypervisor scheduling.

Add support in Xen arch independent time handling for this feature by
moving it out of the arm arch into drivers/xen and remove the x86 Xen
hack.

Signed-off-by: Juergen Gross <jgr...@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrov...@oracle.com>
---
V4: minor adjustments as requested by Stefano Stabellini (remove
    no longer needed #include, remove __init from header)
V3: add #include <asm/paravirt.h> to avoid build error on arm
V2: remove the x86 do_stolen_accounting() hack
---
 arch/arm/xen/enlighten.c    | 18 ++----------------
 arch/x86/xen/time.c         | 44 ++------------------------------------------
 drivers/xen/time.c          | 20 ++++++++++++++++++++
 include/linux/kernel_stat.h |  1 -
 include/xen/xen-ops.h       |  1 +
 kernel/sched/cputime.c      | 10 ----------
 6 files changed, 25 insertions(+), 69 deletions(-)

diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 75cd734..71db30c 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -12,7 +12,6 @@
 #include <xen/page.h>
 #include <xen/interface/sched.h>
 #include <xen/xen-ops.h>
-#include <asm/paravirt.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 #include <asm/system_misc.h>
@@ -84,19 +83,6 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
 }
 EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
 
-static unsigned long long xen_stolen_accounting(int cpu)
-{
-       struct vcpu_runstate_info state;
-
-       BUG_ON(cpu != smp_processor_id());
-
-       xen_get_runstate_snapshot(&state);
-
-       WARN_ON(state.state != RUNSTATE_running);
-
-       return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
-}
-
 static void xen_read_wallclock(struct timespec64 *ts)
 {
        u32 version;
@@ -355,8 +341,8 @@ static int __init xen_guest_init(void)
 
        register_cpu_notifier(&xen_cpu_notifier);
 
-       pv_time_ops.steal_clock = xen_stolen_accounting;
-       static_key_slow_inc(&paravirt_steal_enabled);
+       xen_time_setup_guest();
+
        if (xen_initial_domain())
                pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
 
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index a0a4e55..6be31df 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -11,8 +11,6 @@
 #include <linux/interrupt.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
-#include <linux/kernel_stat.h>
-#include <linux/math64.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
 #include <linux/pvclock_gtod.h>
@@ -31,44 +29,6 @@
 
 /* Xen may fire a timer up to this many ns early */
 #define TIMER_SLOP     100000
-#define NS_PER_TICK    (1000000000LL / HZ)
-
-/* snapshots of runstate info */
-static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
-
-/* unused ns of stolen time */
-static DEFINE_PER_CPU(u64, xen_residual_stolen);
-
-static void do_stolen_accounting(void)
-{
-       struct vcpu_runstate_info state;
-       struct vcpu_runstate_info *snap;
-       s64 runnable, offline, stolen;
-       cputime_t ticks;
-
-       xen_get_runstate_snapshot(&state);
-
-       WARN_ON(state.state != RUNSTATE_running);
-
-       snap = this_cpu_ptr(&xen_runstate_snapshot);
-
-       /* work out how much time the VCPU has not been runn*ing*  */
-       runnable = state.time[RUNSTATE_runnable] - 
snap->time[RUNSTATE_runnable];
-       offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
-
-       *snap = state;
-
-       /* Add the appropriate number of ticks of stolen time,
-          including any left-overs from last time. */
-       stolen = runnable + offline + __this_cpu_read(xen_residual_stolen);
-
-       if (stolen < 0)
-               stolen = 0;
-
-       ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
-       __this_cpu_write(xen_residual_stolen, stolen);
-       account_steal_ticks(ticks);
-}
 
 /* Get the TSC speed from Xen */
 static unsigned long xen_tsc_khz(void)
@@ -335,8 +295,6 @@ static irqreturn_t xen_timer_interrupt(int irq, void 
*dev_id)
                ret = IRQ_HANDLED;
        }
 
-       do_stolen_accounting();
-
        return ret;
 }
 
@@ -431,6 +389,8 @@ static void __init xen_time_init(void)
        xen_setup_timer(cpu);
        xen_setup_cpu_clockevents();
 
+       xen_time_setup_guest();
+
        if (xen_initial_domain())
                pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
 }
diff --git a/drivers/xen/time.c b/drivers/xen/time.c
index 7107842..2257b66 100644
--- a/drivers/xen/time.c
+++ b/drivers/xen/time.c
@@ -6,6 +6,7 @@
 #include <linux/math64.h>
 #include <linux/gfp.h>
 
+#include <asm/paravirt.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 
@@ -75,6 +76,15 @@ bool xen_vcpu_stolen(int vcpu)
        return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
 }
 
+static u64 xen_steal_clock(int cpu)
+{
+       struct vcpu_runstate_info state;
+
+       BUG_ON(cpu != smp_processor_id());
+       xen_get_runstate_snapshot(&state);
+       return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
+}
+
 void xen_setup_runstate_info(int cpu)
 {
        struct vcpu_register_runstate_memory_area area;
@@ -86,3 +96,13 @@ void xen_setup_runstate_info(int cpu)
                BUG();
 }
 
+void __init xen_time_setup_guest(void)
+{
+       pv_time_ops.steal_clock = xen_steal_clock;
+
+       static_key_slow_inc(&paravirt_steal_enabled);
+       /*
+        * We can't set paravirt_steal_rq_enabled as this would require the
+        * capability to read another cpu's runstate info.
+        */
+}
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 25a822f..44fda64 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -92,7 +92,6 @@ static inline void account_process_tick(struct task_struct 
*tsk, int user)
 extern void account_process_tick(struct task_struct *, int user);
 #endif
 
-extern void account_steal_ticks(unsigned long ticks);
 extern void account_idle_ticks(unsigned long ticks);
 
 #endif /* _LINUX_KERNEL_STAT_H */
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 86abe07..77bf9d1 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -21,6 +21,7 @@ void xen_resume_notifier_unregister(struct notifier_block 
*nb);
 
 bool xen_vcpu_stolen(int vcpu);
 void xen_setup_runstate_info(int cpu);
+void xen_time_setup_guest(void);
 void xen_get_runstate_snapshot(struct vcpu_runstate_info *res);
 
 int xen_setup_shutdown_event(void);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 75f98c5..8c4c6dc 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -490,16 +490,6 @@ void account_process_tick(struct task_struct *p, int 
user_tick)
 }
 
 /*
- * Account multiple ticks of steal time.
- * @p: the process from which the cpu time has been stolen
- * @ticks: number of stolen ticks
- */
-void account_steal_ticks(unsigned long ticks)
-{
-       account_steal_time(jiffies_to_cputime(ticks));
-}
-
-/*
  * Account multiple ticks of idle time.
  * @ticks: number of stolen ticks
  */
-- 
2.6.6

>From 4073bb301aed18981ec69c3cf5f0df4fae567d7c Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgr...@suse.com>
Date: Fri, 20 May 2016 09:32:30 +0200
Subject: [PATCH 1/3] xen: update xen headers

Update some Xen headers to be able to use new functionality.

Signed-off-by: Juergen Gross <jgr...@suse.com>
---
 include/xen/interface/vcpu.h | 24 +++++++++++++++---------
 include/xen/interface/xen.h  | 17 ++++++++++++++++-
 2 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h
index b05288c..98188c8 100644
--- a/include/xen/interface/vcpu.h
+++ b/include/xen/interface/vcpu.h
@@ -75,15 +75,21 @@
  */
 #define VCPUOP_get_runstate_info        4
 struct vcpu_runstate_info {
-               /* VCPU's current state (RUNSTATE_*). */
-               int              state;
-               /* When was current state entered (system time, ns)? */
-               uint64_t state_entry_time;
-               /*
-                * Time spent in each RUNSTATE_* (ns). The sum of these times is
-                * guaranteed not to drift from system time.
-                */
-               uint64_t time[4];
+       /* VCPU's current state (RUNSTATE_*). */
+       int              state;
+       /* When was current state entered (system time, ns)? */
+       uint64_t state_entry_time;
+       /*
+        * Update indicator set in state_entry_time:
+        * When activated via VMASST_TYPE_runstate_update_flag, set during
+        * updates in guest memory mapped copy of vcpu_runstate_info.
+        */
+#define XEN_RUNSTATE_UPDATE    (1ULL << 63)
+       /*
+        * Time spent in each RUNSTATE_* (ns). The sum of these times is
+        * guaranteed not to drift from system time.
+        */
+       uint64_t time[4];
 };
 DEFINE_GUEST_HANDLE_STRUCT(vcpu_runstate_info);
 
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index d133112..1b0d189 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -413,7 +413,22 @@ DEFINE_GUEST_HANDLE_STRUCT(mmuext_op);
 /* x86/PAE guests: support PDPTs above 4GB. */
 #define VMASST_TYPE_pae_extended_cr3     3
 
-#define MAX_VMASST_TYPE 3
+/*
+ * x86 guests: Sane behaviour for virtual iopl
+ *  - virtual iopl updated from do_iret() hypercalls.
+ *  - virtual iopl reported in bounce frames.
+ *  - guest kernels assumed to be level 0 for the purpose of iopl checks.
+ */
+#define VMASST_TYPE_architectural_iopl   4
+
+/*
+ * All guests: activate update indicator in vcpu_runstate_info
+ * Enable setting the XEN_RUNSTATE_UPDATE flag in guest memory mapped
+ * vcpu_runstate_info during updates of the runstate information.
+ */
+#define VMASST_TYPE_runstate_update_flag 5
+
+#define MAX_VMASST_TYPE 5
 
 #ifndef __ASSEMBLY__
 
-- 
2.6.6

>From ab457b88c03a66c6051ac022b51bc5c218f48842 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgr...@suse.com>
Date: Fri, 20 May 2016 12:08:21 +0200
Subject: [PATCH 2/3] arm/xen: add support for vm_assist hypercall

Add support for the Xen HYPERVISOR_vm_assist hypercall.

Signed-off-by: Juergen Gross <jgr...@suse.com>
---
 arch/arm/include/asm/xen/hypercall.h | 1 +
 arch/arm/xen/enlighten.c             | 1 +
 arch/arm/xen/hypercall.S             | 1 +
 arch/arm64/xen/hypercall.S           | 1 +
 4 files changed, 4 insertions(+)

diff --git a/arch/arm/include/asm/xen/hypercall.h 
b/arch/arm/include/asm/xen/hypercall.h
index b6b962d..9d874db 100644
--- a/arch/arm/include/asm/xen/hypercall.h
+++ b/arch/arm/include/asm/xen/hypercall.h
@@ -52,6 +52,7 @@ int HYPERVISOR_memory_op(unsigned int cmd, void *arg);
 int HYPERVISOR_physdev_op(int cmd, void *arg);
 int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args);
 int HYPERVISOR_tmem_op(void *arg);
+int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type);
 int HYPERVISOR_platform_op_raw(void *arg);
 static inline int HYPERVISOR_platform_op(struct xen_platform_op *op)
 {
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 71db30c..0f3aa12 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -389,4 +389,5 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op);
 EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op);
 EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op);
 EXPORT_SYMBOL_GPL(HYPERVISOR_multicall);
+EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist);
 EXPORT_SYMBOL_GPL(privcmd_call);
diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S
index 9a36f4f..a648dfc 100644
--- a/arch/arm/xen/hypercall.S
+++ b/arch/arm/xen/hypercall.S
@@ -91,6 +91,7 @@ HYPERCALL3(vcpu_op);
 HYPERCALL1(tmem_op);
 HYPERCALL1(platform_op_raw);
 HYPERCALL2(multicall);
+HYPERCALL2(vm_assist);
 
 ENTRY(privcmd_call)
        stmdb sp!, {r4}
diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S
index 70df80e..329c802 100644
--- a/arch/arm64/xen/hypercall.S
+++ b/arch/arm64/xen/hypercall.S
@@ -82,6 +82,7 @@ HYPERCALL3(vcpu_op);
 HYPERCALL1(tmem_op);
 HYPERCALL1(platform_op_raw);
 HYPERCALL2(multicall);
+HYPERCALL2(vm_assist);
 
 ENTRY(privcmd_call)
        mov x16, x0
-- 
2.6.6

>From f27da1aba6c9c92add4f88b4dcec517e5e321caa Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgr...@suse.com>
Date: Fri, 20 May 2016 12:25:58 +0200
Subject: [PATCH 3/3] xen: support runqueue steal time on xen

Up to now reading the stolen time of a remote cpu was not possible in a
performant way under Xen. This made support of runqueue steal time via
paravirt_steal_rq_enabled impossible.

With the addition of an appropriate hypervisor interface this is now
possible, so add the support.

Signed-off-by: Juergen Gross <jgr...@suse.com>
---
 drivers/xen/time.c | 42 +++++++++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/drivers/xen/time.c b/drivers/xen/time.c
index 2257b66..04b6cb7 100644
--- a/drivers/xen/time.c
+++ b/drivers/xen/time.c
@@ -19,6 +19,9 @@
 /* runstate info updated by Xen */
 static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
 
+/* runstate info of remote cpu accessible */
+static bool xen_runstate_remote;
+
 /* return an consistent snapshot of 64-bit time/counter value */
 static u64 get64(const u64 *p)
 {
@@ -47,27 +50,31 @@ static u64 get64(const u64 *p)
        return ret;
 }
 
-/*
- * Runstate accounting
- */
-void xen_get_runstate_snapshot(struct vcpu_runstate_info *res)
+static void xen_get_runstate_snapshot_cpu(struct vcpu_runstate_info *res,
+                                          unsigned cpu)
 {
        u64 state_time;
        struct vcpu_runstate_info *state;
 
        BUG_ON(preemptible());
 
-       state = this_cpu_ptr(&xen_runstate);
+       state = per_cpu_ptr(&xen_runstate, cpu);
 
-       /*
-        * The runstate info is always updated by the hypervisor on
-        * the current CPU, so there's no need to use anything
-        * stronger than a compiler barrier when fetching it.
-        */
        do {
                state_time = get64(&state->state_entry_time);
+               rmb();
                *res = READ_ONCE(*state);
-       } while (get64(&state->state_entry_time) != state_time);
+               rmb();
+       } while (get64(&state->state_entry_time) != state_time ||
+                (state_time & XEN_RUNSTATE_UPDATE));
+}
+
+/*
+ * Runstate accounting
+ */
+void xen_get_runstate_snapshot(struct vcpu_runstate_info *res)
+{
+       xen_get_runstate_snapshot_cpu(res, smp_processor_id());
 }
 
 /* return true when a vcpu could run but has no real cpu to run on */
@@ -80,8 +87,8 @@ static u64 xen_steal_clock(int cpu)
 {
        struct vcpu_runstate_info state;
 
-       BUG_ON(cpu != smp_processor_id());
-       xen_get_runstate_snapshot(&state);
+       BUG_ON(!xen_runstate_remote && cpu != smp_processor_id());
+       xen_get_runstate_snapshot_cpu(&state, cpu);
        return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
 }
 
@@ -98,11 +105,12 @@ void xen_setup_runstate_info(int cpu)
 
 void __init xen_time_setup_guest(void)
 {
+       xen_runstate_remote = !HYPERVISOR_vm_assist(VMASST_CMD_enable,
+                                       VMASST_TYPE_runstate_update_flag);
+
        pv_time_ops.steal_clock = xen_steal_clock;
 
        static_key_slow_inc(&paravirt_steal_enabled);
-       /*
-        * We can't set paravirt_steal_rq_enabled as this would require the
-        * capability to read another cpu's runstate info.
-        */
+       if (xen_runstate_remote)
+               static_key_slow_inc(&paravirt_steal_rq_enabled);
 }
-- 
2.6.6

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Reply via email to