Il 14/06/2013 15:58, Chegu Vinod ha scritto:
If a user chooses to turn on the auto-converge migration capability
these changes detect the lack of convergence and throttle down the
guest. i.e. force the VCPUs out of the guest for some duration
and let the migration thread catchup and help converge.
Hi Vinod,
pretty much the same comments I sent you yesterday on the obsolete
version of the patch still apply.
Verified the convergence using the following :
- Java Warehouse workload running on a 20VCPU/256G guest(~80% busy)
- OLTP like workload running on a 80VCPU/512G guest (~80% busy)
Sample results with Java warehouse workload : (migrate speed set to 20Gb and
migrate downtime set to 4seconds).
(qemu) info migrate
capabilities: xbzrle: off auto-converge: off <----
Migration status: active
total time: 1487503 milliseconds
expected downtime: 519 milliseconds
transferred ram: 383749347 kbytes
remaining ram: 2753372 kbytes
total ram: 268444224 kbytes
duplicate: 65461532 pages
skipped: 64901568 pages
normal: 95750218 pages
normal bytes: 383000872 kbytes
dirty pages rate: 67551 pages
---
(qemu) info migrate
capabilities: xbzrle: off auto-converge: on <----
Migration status: completed
total time: 241161 milliseconds
downtime: 6373 milliseconds
transferred ram: 28235307 kbytes
remaining ram: 0 kbytes
total ram: 268444224 kbytes
duplicate: 64946416 pages
skipped: 64903523 pages
normal: 7044971 pages
normal bytes: 28179884 kbytes
Signed-off-by: Chegu Vinod <chegu_vi...@hp.com>
---
arch_init.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 85 insertions(+), 0 deletions(-)
diff --git a/arch_init.c b/arch_init.c
index 5d32ecf..69c6c8c 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -104,6 +104,8 @@ int graphic_depth = 15;
#endif
const uint32_t arch_type = QEMU_ARCH;
+static bool mig_throttle_on;
+static void throttle_down_guest_to_converge(void);
/***********************************************************/
/* ram save/restore */
@@ -378,8 +380,15 @@ static void migration_bitmap_sync(void)
uint64_t num_dirty_pages_init = migration_dirty_pages;
MigrationState *s = migrate_get_current();
static int64_t start_time;
+ static int64_t bytes_xfer_prev;
static int64_t num_dirty_pages_period;
int64_t end_time;
+ int64_t bytes_xfer_now;
+ static int dirty_rate_high_cnt;
+
+ if (!bytes_xfer_prev) {
+ bytes_xfer_prev = ram_bytes_transferred();
+ }
if (!start_time) {
start_time = qemu_get_clock_ms(rt_clock);
@@ -404,6 +413,23 @@ static void migration_bitmap_sync(void)
/* more than 1 second = 1000 millisecons */
if (end_time > start_time + 1000) {
+ if (migrate_auto_converge()) {
+ /* The following detection logic can be refined later. For now:
+ Check to see if the dirtied bytes is 50% more than the approx.
+ amount of bytes that just got transferred since the last time we
+ were in this routine. If that happens >N times (for now N==4)
+ we turn on the throttle down logic */
+ bytes_xfer_now = ram_bytes_transferred();
+ if (s->dirty_pages_rate &&
+ ((num_dirty_pages_period*TARGET_PAGE_SIZE) >
+ ((bytes_xfer_now - bytes_xfer_prev)/2))) {
+ if (dirty_rate_high_cnt++ > 4) {
Too many parentheses, and please remove the nested if.
+ DPRINTF("Unable to converge. Throtting down guest\n");
Please use tracepoint instead.
+ mig_throttle_on = true;
Need to reset dirty_rate_high_cnt here, and both
dirty_rate_high_cnt/mig_throttle_on if you see !migrate_auto_converge().
This ensures that throttling does not kick in automatically if you
disable and re-enable the feature. It also lets you remove a bunch of
migrate_auto_converge() checks.
You also need to reset dirty_rate_high_cnt/mig_throttle_on in the setup
phase of migration.
+ }
+ }
+ bytes_xfer_prev = bytes_xfer_now;
+ }
s->dirty_pages_rate = num_dirty_pages_period * 1000
/ (end_time - start_time);
s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
@@ -628,6 +654,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
}
total_sent += bytes_sent;
acct_info.iterations++;
+ throttle_down_guest_to_converge();
You can use a shorter name, like check_cpu_throttling().
/* we want to check in the 1st loop, just in case it was the 1st time
and we had to sync the dirty bitmap.
qemu_get_clock_ns() is a bit expensive, so we only check each some
@@ -1098,3 +1125,61 @@ TargetInfo *qmp_query_target(Error **errp)
return info;
}
+
+static bool throttling_needed(void)
+{
+ if (!migrate_auto_converge()) {
+ return false;
+ }
+ return mig_throttle_on;
+}
+
+/* Stub function that's gets run on the vcpu when its brought out of the
+ VM to run inside qemu via async_run_on_cpu()*/
+static void mig_sleep_cpu(void *opq)
+{
+ qemu_mutex_unlock_iothread();
+ g_usleep(30*1000);
+ qemu_mutex_lock_iothread();
Letting the user specify the entity of the pause would be nice, so that
management can ramp it up. It can be done as a follow-up by adding a
'*value': 'int' field to MigrationCapabilityStatus (between 0 and 100,
default 30 as above).
Paolo
+}
+
+/* To reduce the dirty rate explicitly disallow the VCPUs from spending
+ much time in the VM. The migration thread will try to catchup.
+ Workload will experience a performance drop.
+*/
+static void mig_throttle_cpu_down(CPUState *cpu, void *data)
+{
+ async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
+}
+
+static void mig_throttle_guest_down(void)
+{
+ if (throttling_needed()) {
No need for this "if", it is done already in the caller.
+ qemu_mutex_lock_iothread();
+ qemu_for_each_cpu(mig_throttle_cpu_down, NULL);
+ qemu_mutex_unlock_iothread();
+ }
+}
+
+static void throttle_down_guest_to_converge(void)
+{
+ static int64_t t0;
+ int64_t t1;
+
+ if (!throttling_needed()) {
With the above suggested changes, this can simply check mig_throttle_on.
+ return;
+ }
+
+ if (!t0) {
+ t0 = qemu_get_clock_ns(rt_clock);
+ return;
+ }
+
+ t1 = qemu_get_clock_ns(rt_clock);
+
+ /* If it has been more than 40 ms since the last time the guest
+ * was throtled then do it again.
+ */
throttled
+ if (((t1-t0)/1000000) > 40) {
I prefer moving the multiplication to the right so you don't need
parentheses, but this is _really_ a nit...
Paolo
+ mig_throttle_guest_down();
+ t0 = t1;
+ }
+}
.