date:20170327

[Qemu-devel] [PATCH 1/5] migration: set current_active_state once

2017-03-27 Thread Peter Xu

We set it right above this one. No need to set it twice.

CC: Juan Quintela 
CC: "Dr. David Alan Gilbert" 
Signed-off-by: Peter Xu 
---
 migration/migration.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/migration/migration.c b/migration/migration.c
index 54060f7..f9f4d98 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1938,7 +1938,6 @@ static void *migration_thread(void *opaque)
 qemu_savevm_state_begin(s->to_dst_file, &s->params);
 
 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
-current_active_state = MIGRATION_STATUS_ACTIVE;
 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
   MIGRATION_STATUS_ACTIVE);
 
-- 
2.7.4

[Qemu-devel] [PATCH 0/5] several migrations related patches

2017-03-27 Thread Peter Xu

Mostly small touch-ups, the last one looks like a cpu throttle bug on
time slice calculation (or I missed anything?). Anyway, please review,
thanks.

Peter Xu (5):
  migration: set current_active_state once
  migration: rename max_size to threshold_size
  hmp: info migrate_capability format tunes
  hmp: info migrate_parameters format tunes
  cpu: throttle: fix throttle time slice

 cpus.c  |  6 ++
 hmp.c   | 26 +++---
 include/migration/vmstate.h |  3 ++-
 migration/migration.c   | 18 +-
 migration/savevm.c  |  4 ++--
 5 files changed, 26 insertions(+), 31 deletions(-)

-- 
2.7.4

[Qemu-devel] [PATCH 3/5] hmp: info migrate_capability format tunes

2017-03-27 Thread Peter Xu

Dump the info in a single line is hard to read. Do it one per line.
Also, the first "capabilities:" didn't help much. Let's remove it.

CC: "Dr. David Alan Gilbert" 
Signed-off-by: Peter Xu 
---
 hmp.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/hmp.c b/hmp.c
index edb8970..95eef8c 100644
--- a/hmp.c
+++ b/hmp.c
@@ -265,13 +265,11 @@ void hmp_info_migrate_capabilities(Monitor *mon, const 
QDict *qdict)
 caps = qmp_query_migrate_capabilities(NULL);
 
 if (caps) {
-monitor_printf(mon, "capabilities: ");
 for (cap = caps; cap; cap = cap->next) {
-monitor_printf(mon, "%s: %s ",
+monitor_printf(mon, "%s: %s\n",
MigrationCapability_lookup[cap->value->capability],
cap->value->state ? "on" : "off");
 }
-monitor_printf(mon, "\n");
 }
 
 qapi_free_MigrationCapabilityStatusList(caps);
-- 
2.7.4

[Qemu-devel] [PATCH 4/5] hmp: info migrate_parameters format tunes

2017-03-27 Thread Peter Xu

Do the same (one per line) to the parameter list.

CC: "Dr. David Alan Gilbert" 
Signed-off-by: Peter Xu 
---
 hmp.c | 22 ++
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/hmp.c b/hmp.c
index 95eef8c..b33e39e 100644
--- a/hmp.c
+++ b/hmp.c
@@ -282,46 +282,44 @@ void hmp_info_migrate_parameters(Monitor *mon, const 
QDict *qdict)
 params = qmp_query_migrate_parameters(NULL);
 
 if (params) {
-monitor_printf(mon, "parameters:");
 assert(params->has_compress_level);
-monitor_printf(mon, " %s: %" PRId64,
+monitor_printf(mon, "%s: %" PRId64 "\n",
 MigrationParameter_lookup[MIGRATION_PARAMETER_COMPRESS_LEVEL],
 params->compress_level);
 assert(params->has_compress_threads);
-monitor_printf(mon, " %s: %" PRId64,
+monitor_printf(mon, "%s: %" PRId64 "\n",
 MigrationParameter_lookup[MIGRATION_PARAMETER_COMPRESS_THREADS],
 params->compress_threads);
 assert(params->has_decompress_threads);
-monitor_printf(mon, " %s: %" PRId64,
+monitor_printf(mon, "%s: %" PRId64 "\n",
 MigrationParameter_lookup[MIGRATION_PARAMETER_DECOMPRESS_THREADS],
 params->decompress_threads);
 assert(params->has_cpu_throttle_initial);
-monitor_printf(mon, " %s: %" PRId64,
+monitor_printf(mon, "%s: %" PRId64 "\n",
 
MigrationParameter_lookup[MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL],
 params->cpu_throttle_initial);
 assert(params->has_cpu_throttle_increment);
-monitor_printf(mon, " %s: %" PRId64,
+monitor_printf(mon, "%s: %" PRId64 "\n",
 
MigrationParameter_lookup[MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT],
 params->cpu_throttle_increment);
-monitor_printf(mon, " %s: '%s'",
+monitor_printf(mon, "%s: '%s'\n",
 MigrationParameter_lookup[MIGRATION_PARAMETER_TLS_CREDS],
 params->has_tls_creds ? params->tls_creds : "");
-monitor_printf(mon, " %s: '%s'",
+monitor_printf(mon, "%s: '%s'\n",
 MigrationParameter_lookup[MIGRATION_PARAMETER_TLS_HOSTNAME],
 params->has_tls_hostname ? params->tls_hostname : "");
 assert(params->has_max_bandwidth);
-monitor_printf(mon, " %s: %" PRId64 " bytes/second",
+monitor_printf(mon, "%s: %" PRId64 " bytes/second\n",
 MigrationParameter_lookup[MIGRATION_PARAMETER_MAX_BANDWIDTH],
 params->max_bandwidth);
 assert(params->has_downtime_limit);
-monitor_printf(mon, " %s: %" PRId64 " milliseconds",
+monitor_printf(mon, "%s: %" PRId64 " milliseconds\n",
 MigrationParameter_lookup[MIGRATION_PARAMETER_DOWNTIME_LIMIT],
 params->downtime_limit);
 assert(params->has_x_checkpoint_delay);
-monitor_printf(mon, " %s: %" PRId64,
+monitor_printf(mon, "%s: %" PRId64 "\n",
 MigrationParameter_lookup[MIGRATION_PARAMETER_X_CHECKPOINT_DELAY],
 params->x_checkpoint_delay);
-monitor_printf(mon, "\n");
 }
 
 qapi_free_MigrationParameters(params);
-- 
2.7.4

[Qemu-devel] [PATCH 2/5] migration: rename max_size to threshold_size

2017-03-27 Thread Peter Xu

In migration codes (especially in migration_thread()), max_size is used
in many place for the threshold value that we will start to do the final
flush and jump to the next stage to dump the whole rest things to
destination. However its name is confusing to first readers. Let's
rename it to "threshold_size" when proper and add a comment for it. No
functional change is made.

CC: Juan Quintela 
CC: "Dr. David Alan Gilbert" 
Signed-off-by: Peter Xu 
---
 include/migration/vmstate.h |  3 ++-
 migration/migration.c   | 17 +
 migration/savevm.c  |  4 ++--
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index f2dbf84..dad3984 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -56,7 +56,8 @@ typedef struct SaveVMHandlers {
 
 /* This runs outside the iothread lock!  */
 int (*save_live_setup)(QEMUFile *f, void *opaque);
-void (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size,
+void (*save_live_pending)(QEMUFile *f, void *opaque,
+  uint64_t threshold_size,
   uint64_t *non_postcopiable_pending,
   uint64_t *postcopiable_pending);
 LoadStateHandler *load_state;
diff --git a/migration/migration.c b/migration/migration.c
index f9f4d98..b065fe4 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1907,7 +1907,8 @@ static void *migration_thread(void *opaque)
 int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
 int64_t initial_bytes = 0;
-int64_t max_size = 0;
+/* We'll do the final flush when reachs threshold_size */
+int64_t threshold_size = 0;
 int64_t start_time = initial_time;
 int64_t end_time;
 bool old_vm_running = false;
@@ -1951,17 +1952,17 @@ static void *migration_thread(void *opaque)
 if (!qemu_file_rate_limit(s->to_dst_file)) {
 uint64_t pend_post, pend_nonpost;
 
-qemu_savevm_state_pending(s->to_dst_file, max_size, &pend_nonpost,
-  &pend_post);
+qemu_savevm_state_pending(s->to_dst_file, threshold_size,
+  &pend_nonpost, &pend_post);
 pending_size = pend_nonpost + pend_post;
-trace_migrate_pending(pending_size, max_size,
+trace_migrate_pending(pending_size, threshold_size,
   pend_post, pend_nonpost);
-if (pending_size && pending_size >= max_size) {
+if (pending_size && pending_size >= threshold_size) {
 /* Still a significant amount to transfer */
 
 if (migrate_postcopy_ram() &&
 s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE &&
-pend_nonpost <= max_size &&
+pend_nonpost <= threshold_size &&
 atomic_read(&s->start_postcopy)) {
 
 if (!postcopy_start(s, &old_vm_running)) {
@@ -1993,13 +1994,13 @@ static void *migration_thread(void *opaque)
  initial_bytes;
 uint64_t time_spent = current_time - initial_time;
 double bandwidth = (double)transferred_bytes / time_spent;
-max_size = bandwidth * s->parameters.downtime_limit;
+threshold_size = bandwidth * s->parameters.downtime_limit;
 
 s->mbps = (((double) transferred_bytes * 8.0) /
 ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
 
 trace_migrate_transferred(transferred_bytes, time_spent,
-  bandwidth, max_size);
+  bandwidth, threshold_size);
 /* if we haven't sent anything, we don't want to recalculate
1 is a small enough number for our purposes */
 if (s->dirty_bytes_rate && transferred_bytes > 1) {
diff --git a/migration/savevm.c b/migration/savevm.c
index 3b19a4a..59c04eb 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1197,7 +1197,7 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, bool 
iterable_only)
  * the result is split into the amount for units that can and
  * for units that can't do postcopy.
  */
-void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
+void qemu_savevm_state_pending(QEMUFile *f, uint64_t threshold_size,
uint64_t *res_non_postcopiable,
uint64_t *res_postcopiable)
 {
@@ -1216,7 +1216,7 @@ void qemu_savevm_state_pending(QEMUFile *f, uint64_t 
max_size,
 continue;
 }
 }
-se->ops->save_live_pending(f, se->opaque, max_size,
+se->ops->save_live_pending(f, se->opaque, threshold_size,
res_non_postcopiable, res_po

[Qemu-devel] [PATCH v2] KVM: pci-assign: do not map smm memory slot pages

2017-03-27 Thread Herongguang (Stephen)


From f6f0ee6831488bef7af841cb86f3d85a04848fe5 Mon Sep 17 00:00:00 2001
From: herongguang 
Date: Mon, 27 Mar 2017 15:08:59 +0800
Subject: [PATCH] KVM: pci-assign: do not map smm memory slot pages
 in vt-d page table

or VM memory are not put thus leaked in kvm_iommu_unmap_memslots() when
destroy VM.

This is consistent with current vfio implementation.
---
 virt/kvm/kvm_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 482612b..9018d06 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1052,7 +1052,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 * changes) is disallowed above, so any other attribute changes getting
 * here can be skipped.
 */
-   if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
+   if ((as_id == 0) && ((change == KVM_MR_CREATE) || (change == 
KVM_MR_MOVE))) {
r = kvm_iommu_map_pages(kvm, &new);
return r;
}
--
1.7.12.4

On 2017/3/25 19:14, herongguang wrote:

or pages are not unmaped and freed

Signed-off-by: herongguang 
---
  arch/x86/kvm/iommu.c | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)

Well, do we should change pci-assign to not map SMM slots instead? Like vfio.

diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c
index b181426..5b931bb 100644
--- a/arch/x86/kvm/iommu.c
+++ b/arch/x86/kvm/iommu.c
@@ -320,15 +320,17 @@ void kvm_iommu_unmap_pages(struct kvm *kvm, struct 
kvm_memory_slot *slot)
  static int kvm_iommu_unmap_memslots(struct kvm *kvm)
  {
int idx;
-   struct kvm_memslots *slots;
+   struct kvm_memslots *slots, *smm_slots;
struct kvm_memory_slot *memslot;

idx = srcu_read_lock(&kvm->srcu);
slots = kvm_memslots(kvm);
-
kvm_for_each_memslot(memslot, slots)
kvm_iommu_unmap_pages(kvm, memslot);

+   smm_slots = __kvm_memslots(kvm, 1);
+   kvm_for_each_memslot(memslot, smm_slots)
+   kvm_iommu_unmap_pages(kvm, memslot);
srcu_read_unlock(&kvm->srcu, idx);

if (kvm->arch.iommu_noncoherent)

[Qemu-devel] [PATCH 5/5] cpu: throttle: fix throttle time slice

2017-03-27 Thread Peter Xu

IIUC the throttle idea is that: we split a CPU_THROTTLE_TIMESLICE_NS
time slice into two parts - one for vcpu, one for throttle thread (which
will suspend the thread by a sleep). However current algorithm on
calculating the working piece and sleeping piece is strange.

Assume a 99% throttle, what we want is to merely stop vcpu from running,
but the old logic will just first let the vcpu run for a very long
time (which is "CPU_THROTTLE_TIMESLICE_NS / (1-pct)" = 1 second) before
doing anything else.

Fixing it up to the simplest but imho accurate logic.

CC: Paolo Bonzini 
CC: Richard Henderson 
CC: Jason J. Herne 
Signed-off-by: Peter Xu 
---
 cpus.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/cpus.c b/cpus.c
index 167d961..7976ce4 100644
--- a/cpus.c
+++ b/cpus.c
@@ -633,7 +633,6 @@ static const VMStateDescription vmstate_timers = {
 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
 {
 double pct;
-double throttle_ratio;
 long sleeptime_ns;
 
 if (!cpu_throttle_get_percentage()) {
@@ -641,8 +640,7 @@ static void cpu_throttle_thread(CPUState *cpu, 
run_on_cpu_data opaque)
 }
 
 pct = (double)cpu_throttle_get_percentage()/100;
-throttle_ratio = pct / (1 - pct);
-sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
+sleeptime_ns = (long)((1 - pct) * CPU_THROTTLE_TIMESLICE_NS);
 
 qemu_mutex_unlock_iothread();
 atomic_set(&cpu->throttle_thread_scheduled, 0);
@@ -668,7 +666,7 @@ static void cpu_throttle_timer_tick(void *opaque)
 
 pct = (double)cpu_throttle_get_percentage()/100;
 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
-   CPU_THROTTLE_TIMESLICE_NS / (1-pct));
+   CPU_THROTTLE_TIMESLICE_NS * pct);
 }
 
 void cpu_throttle_set(int new_throttle_pct)
-- 
2.7.4

Re: [Qemu-devel] [PATCH 04/51] ram: Add dirty_rate_high_cnt to RAMState

2017-03-27 Thread Peter Xu

On Thu, Mar 23, 2017 at 09:44:57PM +0100, Juan Quintela wrote:
> We need to add a parameter to several functions to make this work.
> 
> Signed-off-by: Juan Quintela 
> Reviewed-by: Dr. David Alan Gilbert 

Reviewed-by: Peter Xu 

-- peterx

Re: [Qemu-devel] [PATCH 05/51] ram: Move bitmap_sync_count into RAMState

2017-03-27 Thread Peter Xu

On Thu, Mar 23, 2017 at 09:44:58PM +0100, Juan Quintela wrote:
> Signed-off-by: Juan Quintela 
> Reviewed-by: Dr. David Alan Gilbert 

Reviewed-by: Peter Xu 

(I see that we have MigrationStats.dirty_pages_rate which looks
 similar to this one. Maybe one day we can merge these two?)

> ---
>  migration/ram.c | 23 ---
>  1 file changed, 12 insertions(+), 11 deletions(-)
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index 1d5bf22..f811e81 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -45,8 +45,6 @@
>  #include "qemu/rcu_queue.h"
>  #include "migration/colo.h"
>  
> -static uint64_t bitmap_sync_count;
> -
>  /***/
>  /* ram save/restore */
>  
> @@ -154,6 +152,8 @@ struct RAMState {
>  bool ram_bulk_stage;
>  /* How many times we have dirty too many pages */
>  int dirty_rate_high_cnt;
> +/* How many times we have synchronized the bitmap */
> +uint64_t bitmap_sync_count;
>  };
>  typedef struct RAMState RAMState;
>  
> @@ -471,7 +471,7 @@ static void xbzrle_cache_zero_page(RAMState *rs, 
> ram_addr_t current_addr)
>  /* We don't care if this fails to allocate a new cache page
>   * as long as it updated an old one */
>  cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
> - bitmap_sync_count);
> + rs->bitmap_sync_count);
>  }
>  
>  #define ENCODING_FLAG_XBZRLE 0x1
> @@ -483,6 +483,7 @@ static void xbzrle_cache_zero_page(RAMState *rs, 
> ram_addr_t current_addr)
>   *  0 means that page is identical to the one already sent
>   *  -1 means that xbzrle would be longer than normal
>   *
> + * @rs: current RAM state
>   * @f: QEMUFile where to send the data
>   * @current_data: contents of the page
>   * @current_addr: addr of the page
> @@ -491,7 +492,7 @@ static void xbzrle_cache_zero_page(RAMState *rs, 
> ram_addr_t current_addr)
>   * @last_stage: if we are at the completion stage
>   * @bytes_transferred: increase it with the number of transferred bytes
>   */
> -static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
> +static int save_xbzrle_page(RAMState *rs, QEMUFile *f, uint8_t 
> **current_data,
>  ram_addr_t current_addr, RAMBlock *block,
>  ram_addr_t offset, bool last_stage,
>  uint64_t *bytes_transferred)
> @@ -499,11 +500,11 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t 
> **current_data,
>  int encoded_len = 0, bytes_xbzrle;
>  uint8_t *prev_cached_page;
>  
> -if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
> +if (!cache_is_cached(XBZRLE.cache, current_addr, rs->bitmap_sync_count)) 
> {
>  acct_info.xbzrle_cache_miss++;
>  if (!last_stage) {
>  if (cache_insert(XBZRLE.cache, current_addr, *current_data,
> - bitmap_sync_count) == -1) {
> + rs->bitmap_sync_count) == -1) {
>  return -1;
>  } else {
>  /* update *current_data when the page has been
> @@ -658,7 +659,7 @@ static void migration_bitmap_sync(RAMState *rs)
>  int64_t end_time;
>  int64_t bytes_xfer_now;
>  
> -bitmap_sync_count++;
> +rs->bitmap_sync_count++;
>  
>  if (!bytes_xfer_prev) {
>  bytes_xfer_prev = ram_bytes_transferred();
> @@ -720,9 +721,9 @@ static void migration_bitmap_sync(RAMState *rs)
>  start_time = end_time;
>  num_dirty_pages_period = 0;
>  }
> -s->dirty_sync_count = bitmap_sync_count;
> +s->dirty_sync_count = rs->bitmap_sync_count;
>  if (migrate_use_events()) {
> -qapi_event_send_migration_pass(bitmap_sync_count, NULL);
> +qapi_event_send_migration_pass(rs->bitmap_sync_count, NULL);
>  }
>  }
>  
> @@ -829,7 +830,7 @@ static int ram_save_page(RAMState *rs, MigrationState 
> *ms, QEMUFile *f,
>  ram_release_pages(ms, block->idstr, pss->offset, pages);
>  } else if (!rs->ram_bulk_stage &&
> !migration_in_postcopy(ms) && migrate_use_xbzrle()) {
> -pages = save_xbzrle_page(f, &p, current_addr, block,
> +pages = save_xbzrle_page(rs, f, &p, current_addr, block,
>   offset, last_stage, bytes_transferred);
>  if (!last_stage) {
>  /* Can't send this cached data async, since the cache page
> @@ -1998,7 +1999,7 @@ static int ram_save_init_globals(RAMState *rs)
>  int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
>  
>  rs->dirty_rate_high_cnt = 0;
> -bitmap_sync_count = 0;
> +rs->bitmap_sync_count = 0;
>  migration_bitmap_sync_init();
>  qemu_mutex_init(&migration_bitmap_mutex);
>  
> -- 
> 2.9.3
> 
> 

-- peterx

Re: [Qemu-devel] [PATCH 5/5] cpu: throttle: fix throttle time slice

2017-03-27 Thread Peter Xu

On Mon, Mar 27, 2017 at 03:21:28PM +0800, Peter Xu wrote:
> IIUC the throttle idea is that: we split a CPU_THROTTLE_TIMESLICE_NS
> time slice into two parts - one for vcpu, one for throttle thread (which
> will suspend the thread by a sleep). However current algorithm on
> calculating the working piece and sleeping piece is strange.
> 
> Assume a 99% throttle, what we want is to merely stop vcpu from running,
> but the old logic will just first let the vcpu run for a very long
> time (which is "CPU_THROTTLE_TIMESLICE_NS / (1-pct)" = 1 second) before
> doing anything else.
> 
> Fixing it up to the simplest but imho accurate logic.

Oh, looks like I need to switch the two pct below... :)

> 
> CC: Paolo Bonzini 
> CC: Richard Henderson 
> CC: Jason J. Herne 
> Signed-off-by: Peter Xu 
> ---
>  cpus.c | 6 ++
>  1 file changed, 2 insertions(+), 4 deletions(-)
> 
> diff --git a/cpus.c b/cpus.c
> index 167d961..7976ce4 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -633,7 +633,6 @@ static const VMStateDescription vmstate_timers = {
>  static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
>  {
>  double pct;
> -double throttle_ratio;
>  long sleeptime_ns;
>  
>  if (!cpu_throttle_get_percentage()) {
> @@ -641,8 +640,7 @@ static void cpu_throttle_thread(CPUState *cpu, 
> run_on_cpu_data opaque)
>  }
>  
>  pct = (double)cpu_throttle_get_percentage()/100;
> -throttle_ratio = pct / (1 - pct);
> -sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
> +sleeptime_ns = (long)((1 - pct) * CPU_THROTTLE_TIMESLICE_NS);
 ^
 here it should be "pct", while...

>  
>  qemu_mutex_unlock_iothread();
>  atomic_set(&cpu->throttle_thread_scheduled, 0);
> @@ -668,7 +666,7 @@ static void cpu_throttle_timer_tick(void *opaque)
>  
>  pct = (double)cpu_throttle_get_percentage()/100;
>  timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
> -   CPU_THROTTLE_TIMESLICE_NS / (1-pct));
> +   CPU_THROTTLE_TIMESLICE_NS * pct);
  ^^^
here it should be (1 - pct)

I'll wait for review comment on the raw idea, to see whether I will
need a repost. Sorry for the misunderstanding.

-- peterx

[Qemu-devel] [PATCH v2] configure: use pkg-config for obtaining xen version

2017-03-27 Thread Juergen Gross

Instead of trying to guess the Xen version to use by compiling various
test programs first just ask the system via pkg-config. Only if it
can't return the version fall back to the test program scheme.

If configure is being called with dedicated flags for the Xen libraries
use those instead of the pkg-config output. This will avoid breaking
an in-tree Xen build of an old Xen version while a new Xen version is
installed on the build machine: pkg-config would pick up the installed
Xen config files as the Xen tree wouldn't contain any of them.

Signed-off-by: Juergen Gross 
---
V2: - use pkg-config only if no Xen library paths have been specified via
  --extra-ldflags
- keep test program for detecting Xen 4.9
---
 configure | 155 ++
 1 file changed, 86 insertions(+), 69 deletions(-)

diff --git a/configure b/configure
index aabf098..c34b025 100755
--- a/configure
+++ b/configure
@@ -1962,30 +1962,46 @@ fi
 # xen probe
 
 if test "$xen" != "no" ; then
-  xen_libs="-lxenstore -lxenctrl -lxenguest"
-  xen_stable_libs="-lxenforeignmemory -lxengnttab -lxenevtchn"
+  # Check whether Xen library path is specified via --extra-ldflags to avoid
+  # overriding this setting with pkg-config output. If not, try pkg-config
+  # to obtain all needed flags.
 
-  # First we test whether Xen headers and libraries are available.
-  # If no, we are done and there is no Xen support.
-  # If yes, more tests are run to detect the Xen version.
+  if ! echo $EXTRA_LDFLAGS | grep tools/libxc > /dev/null && \
+ $pkg_config --exists xencontrol ; then
+xen_ctrl_version="$(printf '%d%02d%02d' \
+  $($pkg_config --modversion xencontrol | sed 's/\./ /g') )"
+xen=yes
+xen_pc="xencontrol xenstore xenguest xenforeignmemory xengnttab"
+xen_pc="$xen_pc xenevtchn xendevicemodel"
+QEMU_CFLAGS="$QEMU_CFLAGS $($pkg_config --cflags $xen_pc)"
+libs_softmmu="$($pkg_config --libs $xen_pc) $libs_softmmu"
+LDFLAGS="$($pkg_config --libs $xen_pc) $LDFLAGS"
+  else
 
-  # Xen (any)
-  cat > $TMPC < $TMPC <
 int main(void) {
   return 0;
 }
 EOF
-  if ! compile_prog "" "$xen_libs" ; then
-# Xen not found
-if test "$xen" = "yes" ; then
-  feature_not_found "xen" "Install xen devel"
-fi
-xen=no
+if ! compile_prog "" "$xen_libs" ; then
+  # Xen not found
+  if test "$xen" = "yes" ; then
+feature_not_found "xen" "Install xen devel"
+  fi
+  xen=no
 
-  # Xen unstable
-  elif
-  cat > $TMPC < $TMPC <
@@ -1998,13 +2014,13 @@ int main(void) {
   return 0;
 }
 EOF
-  compile_prog "" "$xen_libs $xen_stable_libs -lxendevicemodel"
-then
-xen_stable_libs="$xen_stable_libs -lxendevicemodel"
-xen_ctrl_version=40900
-xen=yes
-  elif
-  cat > $TMPC < $TMPC < $TMPC < $TMPC < $TMPC < $TMPC <
 #include 
 int main(void) {
@@ -2120,14 +2136,14 @@ int main(void) {
   return 0;
 }
 EOF
-  compile_prog "" "$xen_libs"
-then
-xen_ctrl_version=40700
-xen=yes
+compile_prog "" "$xen_libs"
+  then
+  xen_ctrl_version=40700
+  xen=yes
 
-  # Xen 4.6
-  elif
-  cat > $TMPC < $TMPC <
 #include 
 #include 
@@ -2148,14 +2164,14 @@ int main(void) {
   return 0;
 }
 EOF
-  compile_prog "" "$xen_libs"
-then
-xen_ctrl_version=40600
-xen=yes
+compile_prog "" "$xen_libs"
+  then
+  xen_ctrl_version=40600
+  xen=yes
 
-  # Xen 4.5
-  elif
-  cat > $TMPC < $TMPC <
 #include 
 #include 
@@ -2175,13 +2191,13 @@ int main(void) {
   return 0;
 }
 EOF
-  compile_prog "" "$xen_libs"
-then
-xen_ctrl_version=40500
-xen=yes
+compile_prog "" "$xen_libs"
+  then
+  xen_ctrl_version=40500
+  xen=yes
 
-  elif
-  cat > $TMPC < $TMPC <
 #include 
 #include 
@@ -2200,24 +2216,25 @@ int main(void) {
   return 0;
 }
 EOF
-  compile_prog "" "$xen_libs"
-then
-xen_ctrl_version=40200
-xen=yes
+compile_prog "" "$xen_libs"
+  then
+  xen_ctrl_version=40200
+  xen=yes
 
-  else
-if test "$xen" = "yes" ; then
-  feature_not_found "xen (unsupported version)" \
-"Install a supported xen (xen 4.2 or newer)"
+else
+  if test "$xen" = "yes" ; then
+feature_not_found "xen (unsupported version)" \
+  "Install a supported xen (xen 4.2 or newer)"
+  fi
+  xen=no
 fi
-xen=no
-  fi
 
-  if test "$xen" = yes; then
-if test $xen_ctrl_version -ge 40701  ; then
-  libs_softmmu="$xen_stable_libs $libs_softmmu"
+if test "$xen" = yes; then
+  if test $xen_ctrl_version -ge 40701  ; then
+libs_softmmu="$xen_stable_libs $libs_softmmu"
+  fi
+  libs_softmmu="$xen_libs $libs_softmmu"
 fi
-libs_softmmu="$xen_libs $libs_softmmu"
   fi
 fi
 
-- 
2.10.2

Re: [Qemu-devel] [PATCH 06/51] ram: Move start time into RAMState

2017-03-27 Thread Peter Xu

On Thu, Mar 23, 2017 at 09:44:59PM +0100, Juan Quintela wrote:
> Signed-off-by: Juan Quintela 
> Reviewed-by: Dr. David Alan Gilbert 
> ---
>  migration/ram.c | 20 +++-
>  1 file changed, 11 insertions(+), 9 deletions(-)
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index f811e81..5881805 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -154,6 +154,9 @@ struct RAMState {
>  int dirty_rate_high_cnt;
>  /* How many times we have synchronized the bitmap */
>  uint64_t bitmap_sync_count;
> +/* this variables are used for bitmap sync */

s/this/These/?

> +/* last time we did a full bitmap_sync */
> +int64_t start_time;

Not sure whether it'll be a good chance we rename this variable in
this patch to make it a less-generic name, like: bm_sync_start? But
again, this is nicpicking and totally optional.

With the typo fixed, please add:

Reviewed-by: Peter Xu 

Thanks,

-- peterx

Re: [Qemu-devel] q35 and sysbus devices

2017-03-27 Thread Thomas Huth

On 24.03.2017 18:59, Markus Armbruster wrote:
> Peter Maydell  writes:
> 
>> On 24 March 2017 at 16:58, Markus Armbruster  wrote:
>>> "Sysbus" isn't a bus.  In qdev's original design, every device had to
>>> plug into a bus, period.  The ones that really didn't were made to plug
>>> into "sysbus".
>>>
>>> Pretty much the only thing "sysbus" devices had in common was that they
>>> couldn't be used with device_add and device_del.
>>
>> This isn't really true. Sysbus devices support having MMIO regions
>> and IRQ lines and GPIO lines. If you need those you're a
>> sysbus device; otherwise you can probably just be a plain old Device.
> 
> Well, "device has MMIO regions, IRQ lines and GPIO lines" is about as
> "device contains virtual silicon".  What would a device without any of
> these *do*?

On ppc64, we've got for a example pseudo-devices that takes care of
certain hypercalls, e.g. the spapr-rng or the spapr-rtc devices (well,
the latter is still a TYPE_SYS_BUS_DEVICE ... we should convert it to
TYPE_DEVICE instead one day...). These devices do not have any MMIO, IRQ
or GPIO lines at all, so I think the differentiation between a very
generic TYPE_DEVICE class and a "SYS_BUS_DEVICE" (with MMIO/IRQ/GPIO)
class still makes sense.

 Thomas

Re: [Qemu-devel] [PATCH RESEND] xen: limit pkg-config to PKG_CONFIG_PATH for xen libraries

2017-03-27 Thread Paul Durrant

> -Original Message-
[snip]
> 
> To sum it up we have to care about the following scenarios:
> 
> a) Xen in-tree build, Xen >= 4.9
> b) Xen in-tree build, Xen < 4.9
> c) build out-of-Xen-tree
> 
> combined with any of:
> 
> 1) no Xen installed on build machine
> 2) Xen >= 4.9 installed
> 3) Xen < 4.9 installed
> 
> 1) + c) is not supported, all other combinations should work.
> 

Good summary.

> I suggest the following modifications of my patch:
> 
> - keep the test program for detection of Xen 4.9
> - scan the extra ldflags for special mentioning of tools/libxc
>   to detect Xen in-tree build < 4.9 (will detect 4.9, too, but
>   this is no problem), set xen_in_tree_old if found
> - if xen_in_tree_old found, don't use pkg-config for Xen version
>   detection and flags
> - otherwise try pkg-config first
> 
> This should cover all cases we need.
> 

That sounds plausible.

Thanks,

  Paul

> 
> Juergen

Re: [Qemu-devel] [PATCH 07/51] ram: Move bytes_xfer_prev into RAMState

2017-03-27 Thread Peter Xu

On Thu, Mar 23, 2017 at 09:45:00PM +0100, Juan Quintela wrote:
> Signed-off-by: Juan Quintela 
> Reviewed-by: Dr. David Alan Gilbert 

Reviewed-by: Peter Xu 

-- peterx

Re: [Qemu-devel] What's the next QEMU version after 2.9 ? (or: when is a good point in time to get rid of old interfaces)

2017-03-27 Thread Thomas Huth

On 24.03.2017 23:10, John Snow wrote:
> 
> 
> On 03/08/2017 03:26 AM, Thomas Huth wrote:
>>
>>  Hi everybody,
>>
>> what will be the next version of QEMU after 2.9? Will we go for a 2.10
>> (as I've seen it mentioned a couple of times on the mailing list
>> already), or do we dare to switch to 3.0 instead?
>>
>> I personally dislike two-digit minor version numbers like 2.10 since the
>> non-experienced users sometimes mix it up with 2.1 ... and there have
>> been a couple of new cool features in the past releases that would
>> justify a 3.0 now, too, I think.
>>
>> But anyway, the more important thing that keeps me concerned is: Someone
>>  once told me that we should get rid of old parameters and interfaces
>> (like HMP commands) primarily only when we're changing to a new major
>> version number. As you all know, QEMU has a lot of legacy options, which
>> are likely rather confusing than helpful for the new users nowadays,
>> e.g. things like the "-net channel" option (which is fortunately even
>> hardly documented), but maybe also even the whole vlan/hub concept in
>> the net code, or legacy parameters like "-usbdevice". If we switch to
>> version 3.0, could we agree to remove at least some of them?
>>
>>  Thomas
>>
> 
> As others have stated, we need a few releases to deprecate things first.
> 
> Maybe we should develop a serious plan to develop some of our legacy
> interfaces first.
> 
> Maybe 2.10 can introduce a list of things we want to deprecate,
> 2.11 can be the transition release,
> and then 3.0 can cut the cord and free of us our terrible burden?
> 
> I have a list of things I want to axe...

I've started a Wiki page with such a list here:

http://wiki.qemu-project.org/Features/LegacyRemoval

Feel free to amend!

 Thomas

Re: [Qemu-devel] [PATCH 08/51] ram: Move num_dirty_pages_period into RAMState

2017-03-27 Thread Peter Xu

On Thu, Mar 23, 2017 at 09:45:01PM +0100, Juan Quintela wrote:
> Signed-off-by: Juan Quintela 
> Reviewed-by: Dr. David Alan Gilbert 

Reviewed-by: Peter Xu 

-- peterx

[Qemu-devel] [PATCH] virtio-blk: add DISCARD support to virtio-blk driver

2017-03-27 Thread Changpeng Liu

Currently virtio-blk driver does not provide discard feature flag, so the
filesystems which built on top of the block device will not send discard
command. This is okay for HDD backend, but it will impact the performance
for SSD backend.

Add a feature flag VIRTIO_BLK_F_DISCARD and command VIRTIO_BLK_T_DISCARD
to extend exist virtio-blk protocol. virtio-blk protocol uses a single
8 bytes descriptor containing type,reserved and sector, currently Linux
uses the reserved field as IO priority, here we also re-use the reserved
field as number of discard sectors.

Signed-off-by: Changpeng Liu 
---
 drivers/block/virtio_blk.c  | 38 +-
 include/uapi/linux/virtio_blk.h | 12 ++--
 2 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 1d4c9f8..550cfe7 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -241,6 +241,9 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
case REQ_OP_FLUSH:
type = VIRTIO_BLK_T_FLUSH;
break;
+   case REQ_OP_DISCARD:
+   type = VIRTIO_BLK_T_DISCARD;
+   break;
case REQ_OP_SCSI_IN:
case REQ_OP_SCSI_OUT:
type = VIRTIO_BLK_T_SCSI_CMD;
@@ -256,16 +259,24 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type);
vbr->out_hdr.sector = type ?
0 : cpu_to_virtio64(vblk->vdev, blk_rq_pos(req));
-   vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(req));
+   vbr->out_hdr.u.ioprio = cpu_to_virtio32(vblk->vdev, 
req_get_ioprio(req));
 
blk_mq_start_request(req);
 
-   num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
-   if (num) {
-   if (rq_data_dir(req) == WRITE)
-   vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, 
VIRTIO_BLK_T_OUT);
-   else
-   vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, 
VIRTIO_BLK_T_IN);
+   if (type == VIRTIO_BLK_T_DISCARD) {
+   vbr->out_hdr.u.discard_nr_sectors = cpu_to_virtio32(vblk->vdev,
+   
blk_rq_sectors(req));
+   num = 0;
+   } else {
+   num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
+   if (num) {
+   if (rq_data_dir(req) == WRITE)
+   vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev,
+
VIRTIO_BLK_T_OUT);
+   else
+   vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev,
+
VIRTIO_BLK_T_IN);
+   }
}
 
spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
@@ -775,6 +786,15 @@ static int virtblk_probe(struct virtio_device *vdev)
if (!err && opt_io_size)
blk_queue_io_opt(q, blk_size * opt_io_size);
 
+   if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
+   q->limits.discard_zeroes_data = 0;
+   q->limits.discard_alignment = blk_size;
+   q->limits.discard_granularity = blk_size;
+   blk_queue_max_discard_sectors(q, UINT_MAX);
+   blk_queue_max_discard_segments(q, 1);
+   queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+   }
+
virtio_device_ready(vdev);
 
device_add_disk(&vdev->dev, vblk->disk);
@@ -882,14 +902,14 @@ static int virtblk_restore(struct virtio_device *vdev)
VIRTIO_BLK_F_SCSI,
 #endif
VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
-   VIRTIO_BLK_F_MQ,
+   VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD,
 }
 ;
 static unsigned int features[] = {
VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
-   VIRTIO_BLK_F_MQ,
+   VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD,
 };
 
 static struct virtio_driver virtio_blk = {
diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
index 9ebe4d9..d608649 100644
--- a/include/uapi/linux/virtio_blk.h
+++ b/include/uapi/linux/virtio_blk.h
@@ -38,6 +38,7 @@
 #define VIRTIO_BLK_F_BLK_SIZE  6   /* Block size of disk is available*/
 #define VIRTIO_BLK_F_TOPOLOGY  10  /* Topology information is available */
 #define VIRTIO_BLK_F_MQ12  /* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD   13  /* DISCARD command is supported */
 
 /* Legacy feature bits */
 #ifndef VIRTIO_BLK_NO_LEGACY
@@ -114,6 +115,9 @@ struct virtio_blk_config {
 /* Get device ID command */
 #define VIRTIO_BLK_T_GET_ID8
 
+/* Discard command */
+#define VIRTIO_BLK_T_DISCARD   16
+
 #ifndef VIR

Re: [Qemu-devel] [PATCH v7 14/17] memory: add MemoryRegionIOMMUOps.replay() callback

2017-03-27 Thread Liu, Yi L

> -Original Message-
> From: Qemu-devel [mailto:qemu-devel-bounces+yi.l.liu=intel@nongnu.org] On
> Behalf Of Peter Xu
> Sent: Tuesday, February 7, 2017 4:28 PM
> To: qemu-devel@nongnu.org
> Cc: Lan, Tianyu ; Tian, Kevin ;
> m...@redhat.com; jan.kis...@siemens.com; jasow...@redhat.com;
> pet...@redhat.com; alex.william...@redhat.com; bd.a...@gmail.com; David
> Gibson 
> Subject: [Qemu-devel] [PATCH v7 14/17] memory: add
> MemoryRegionIOMMUOps.replay() callback
> 
> Originally we have one memory_region_iommu_replay() function, which is the
> default behavior to replay the translations of the whole IOMMU region. 
> However,
> on some platform like x86, we may want our own replay logic for IOMMU regions.
> This patch add one more hook for IOMMUOps for the callback, and it'll 
> override the
> default if set.
> 
> Signed-off-by: Peter Xu 
> ---
>  include/exec/memory.h | 2 ++
>  memory.c  | 6 ++
>  2 files changed, 8 insertions(+)
> 
> diff --git a/include/exec/memory.h b/include/exec/memory.h index
> 0767888..30b2a74 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -191,6 +191,8 @@ struct MemoryRegionIOMMUOps {
>  void (*notify_flag_changed)(MemoryRegion *iommu,
>  IOMMUNotifierFlag old_flags,
>  IOMMUNotifierFlag new_flags);
> +/* Set this up to provide customized IOMMU replay function */
> +void (*replay)(MemoryRegion *iommu, IOMMUNotifier *notifier);
>  };
> 
>  typedef struct CoalescedMemoryRange CoalescedMemoryRange; diff --git
> a/memory.c b/memory.c index 7a4f2f9..9c253cc 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -1630,6 +1630,12 @@ void memory_region_iommu_replay(MemoryRegion
> *mr, IOMMUNotifier *n,
>  hwaddr addr, granularity;
>  IOMMUTLBEntry iotlb;
> +/* If the IOMMU has its own replay callback, override */
> +if (mr->iommu_ops->replay) {
> +mr->iommu_ops->replay(mr, n);
> +return;
> +}

Hi Alex, Peter,

Will all the other vendors(e.g. PPC, s390, ARM) add their own replay callback
as well? I guess it depends on whether the original replay algorithm work well
for them? Do you have such knowledge?

Regards,
Yi L

> +
>  granularity = memory_region_iommu_get_min_page_size(mr);
> 
>  for (addr = 0; addr < memory_region_size(mr); addr += granularity) {
> --
> 2.7.4
>

Re: [Qemu-devel] q35 and sysbus devices

2017-03-27 Thread Cornelia Huck

On Fri, 24 Mar 2017 16:23:18 -0300
Eduardo Habkost  wrote:

> On Fri, Mar 24, 2017 at 05:08:56PM +, Peter Maydell wrote:
> > On 24 March 2017 at 16:58, Markus Armbruster  wrote:
> > > "Sysbus" isn't a bus.  In qdev's original design, every device had to
> > > plug into a bus, period.  The ones that really didn't were made to plug
> > > into "sysbus".
> > >
> > > Pretty much the only thing "sysbus" devices had in common was that they
> > > couldn't be used with device_add and device_del.
> > 
> > This isn't really true. Sysbus devices support having MMIO regions
> > and IRQ lines and GPIO lines. If you need those you're a
> > sysbus device; otherwise you can probably just be a plain old Device.
> > 
> > > We fixed the design to permit bus-less devices, but we didn't get rid of
> > > "sysbus".
> > 
> > Call it what you want, but we should have some common code support
> > for "I want to have MMIOs and IRQs and GPIO lines". You could
> > argue for moving all that into Device I suppose.
> 
> Even if we don't move all that into Device, this sounds like an
> argument for the existence of struct SysBusDevice. But I still
> don't understand the reason TYPE_SYSTEM_BUS still exists.

ISTR some surprises that reset (or some other) callbacks were not
called as expected if there wasn't a sysbus device among the ancestors.
Don't know if that's still true.

[Qemu-devel] [PATCH] slirp: fix compilation errors with DEBUG set

2017-03-27 Thread Laurent Vivier

slirp/slirp.c: In function 'get_dns_addr_resolv_conf':
slirp/slirp.c:202:29: error: initialization discards 'const' qualifier from 
pointer target type [-Werror=discarded-qualifiers]
 char *res = inet_ntop(af, tmp_addr, s, sizeof(s));
 ^
slirp/slirp.c:204:25: error: assignment discards 'const' qualifier from pointer 
target type [-Werror=discarded-qualifiers]
 res = "(string conversion error)";

Signed-off-by: Laurent Vivier 
---
 slirp/slirp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/slirp/slirp.c b/slirp/slirp.c
index 60539de..5a94b06 100644
--- a/slirp/slirp.c
+++ b/slirp/slirp.c
@@ -198,7 +198,7 @@ static int get_dns_addr_resolv_conf(int af, void 
*pdns_addr, void *cached_addr,
 #ifdef DEBUG
 else {
 char s[INET6_ADDRSTRLEN];
-char *res = inet_ntop(af, tmp_addr, s, sizeof(s));
+const char *res = inet_ntop(af, tmp_addr, s, sizeof(s));
 if (!res) {
 res = "(string conversion error)";
 }
-- 
2.9.3

Re: [Qemu-devel] q35 and sysbus devices

2017-03-27 Thread Peter Maydell

On 27 March 2017 at 09:44, Cornelia Huck  wrote:
> ISTR some surprises that reset (or some other) callbacks were not
> called as expected if there wasn't a sysbus device among the ancestors.
> Don't know if that's still true.

Yes -- if you're a sysbus device then your reset method is
called. If you're only a subclass of Device then your reset
method is not called.

I think this is because buses like PCI have specific
reset behaviour (order important between host and
devices, maybe?), so we defer reset to the subclass.

(Reset is a mess anyway.)

thanks
-- PMM

Re: [Qemu-devel] [PATCH v2] configure: use pkg-config for obtaining xen version

2017-03-27 Thread Paul Durrant

> -Original Message-
> From: Juergen Gross [mailto:jgr...@suse.com]
> Sent: 27 March 2017 08:43
> To: qemu-devel@nongnu.org; xen-de...@lists.xenproject.org
> Cc: Anthony Perard ; kra...@redhat.com;
> sstabell...@kernel.org; Paul Durrant ; Juergen
> Gross 
> Subject: [PATCH v2] configure: use pkg-config for obtaining xen version
> 
> Instead of trying to guess the Xen version to use by compiling various
> test programs first just ask the system via pkg-config. Only if it
> can't return the version fall back to the test program scheme.
> 
> If configure is being called with dedicated flags for the Xen libraries
> use those instead of the pkg-config output. This will avoid breaking
> an in-tree Xen build of an old Xen version while a new Xen version is
> installed on the build machine: pkg-config would pick up the installed
> Xen config files as the Xen tree wouldn't contain any of them.
> 
> Signed-off-by: Juergen Gross 
> ---
> V2: - use pkg-config only if no Xen library paths have been specified via
>   --extra-ldflags
> - keep test program for detecting Xen 4.9

This all looks plausible but it doesn't seem to be working for me when trying 
to build 4.8. I'm still getting a xen ctrl version of 40900... still trying to 
figure out why.

Also, the whitespace changes later on in the patch should probably be split out.

  Paul

> ---
>  configure | 155 ++
> 
>  1 file changed, 86 insertions(+), 69 deletions(-)
> 
> diff --git a/configure b/configure
> index aabf098..c34b025 100755
> --- a/configure
> +++ b/configure
> @@ -1962,30 +1962,46 @@ fi
>  # xen probe
> 
>  if test "$xen" != "no" ; then
> -  xen_libs="-lxenstore -lxenctrl -lxenguest"
> -  xen_stable_libs="-lxenforeignmemory -lxengnttab -lxenevtchn"
> +  # Check whether Xen library path is specified via --extra-ldflags to avoid
> +  # overriding this setting with pkg-config output. If not, try pkg-config
> +  # to obtain all needed flags.
> 
> -  # First we test whether Xen headers and libraries are available.
> -  # If no, we are done and there is no Xen support.
> -  # If yes, more tests are run to detect the Xen version.
> +  if ! echo $EXTRA_LDFLAGS | grep tools/libxc > /dev/null && \
> + $pkg_config --exists xencontrol ; then
> +xen_ctrl_version="$(printf '%d%02d%02d' \
> +  $($pkg_config --modversion xencontrol | sed 's/\./ /g') )"
> +xen=yes
> +xen_pc="xencontrol xenstore xenguest xenforeignmemory xengnttab"
> +xen_pc="$xen_pc xenevtchn xendevicemodel"
> +QEMU_CFLAGS="$QEMU_CFLAGS $($pkg_config --cflags $xen_pc)"
> +libs_softmmu="$($pkg_config --libs $xen_pc) $libs_softmmu"
> +LDFLAGS="$($pkg_config --libs $xen_pc) $LDFLAGS"
> +  else
> 
> -  # Xen (any)
> -  cat > $TMPC < +xen_libs="-lxenstore -lxenctrl -lxenguest"
> +xen_stable_libs="-lxenforeignmemory -lxengnttab -lxenevtchn"
> +
> +# First we test whether Xen headers and libraries are available.
> +# If no, we are done and there is no Xen support.
> +# If yes, more tests are run to detect the Xen version.
> +
> +# Xen (any)
> +cat > $TMPC <  #include 
>  int main(void) {
>return 0;
>  }
>  EOF
> -  if ! compile_prog "" "$xen_libs" ; then
> -# Xen not found
> -if test "$xen" = "yes" ; then
> -  feature_not_found "xen" "Install xen devel"
> -fi
> -xen=no
> +if ! compile_prog "" "$xen_libs" ; then
> +  # Xen not found
> +  if test "$xen" = "yes" ; then
> +feature_not_found "xen" "Install xen devel"
> +  fi
> +  xen=no
> 
> -  # Xen unstable
> -  elif
> -  cat > $TMPC < +# Xen unstable
> +elif
> +cat > $TMPC <  #undef XC_WANT_COMPAT_DEVICEMODEL_API
>  #define __XEN_TOOLS__
>  #include 
> @@ -1998,13 +2014,13 @@ int main(void) {
>return 0;
>  }
>  EOF
> -  compile_prog "" "$xen_libs $xen_stable_libs -lxendevicemodel"
> -then
> -xen_stable_libs="$xen_stable_libs -lxendevicemodel"
> -xen_ctrl_version=40900
> -xen=yes
> -  elif
> -  cat > $TMPC < +compile_prog "" "$xen_libs $xen_stable_libs -lxendevicemodel"
> +  then
> +  xen_stable_libs="$xen_stable_libs -lxendevicemodel"
> +  xen_ctrl_version=40900
> +  xen=yes
> +elif
> +cat > $TMPC <  /*
>   * If we have stable libs the we don't want the libxc compat
>   * layers, regardless of what CFLAGS we may have been given.
> @@ -2054,12 +2070,12 @@ int main(void) {
>return 0;
>  }
>  EOF
> -  compile_prog "" "$xen_libs $xen_stable_libs"
> -then
> -xen_ctrl_version=40800
> -xen=yes
> -  elif
> -  cat > $TMPC < +compile_prog "" "$xen_libs $xen_stable_libs"
> +  then
> +  xen_ctrl_version=40800
> +  xen=yes
> +elif
> +cat > $TMPC <  /*
>   * If we have stable libs the we don't want the libxc compat
>   * layers, regardless of what CFLAGS we may have been given.
> @@ -2105,12 +2121,12 @@ int main(void) {
>return 0;
>  }
>  EOF
>

Re: [Qemu-devel] [PATCH v7 14/17] memory: add MemoryRegionIOMMUOps.replay() callback

2017-03-27 Thread Peter Xu

On Mon, Mar 27, 2017 at 08:35:05AM +, Liu, Yi L wrote:
> > -Original Message-
> > From: Qemu-devel [mailto:qemu-devel-bounces+yi.l.liu=intel@nongnu.org] 
> > On
> > Behalf Of Peter Xu
> > Sent: Tuesday, February 7, 2017 4:28 PM
> > To: qemu-devel@nongnu.org
> > Cc: Lan, Tianyu ; Tian, Kevin ;
> > m...@redhat.com; jan.kis...@siemens.com; jasow...@redhat.com;
> > pet...@redhat.com; alex.william...@redhat.com; bd.a...@gmail.com; David
> > Gibson 
> > Subject: [Qemu-devel] [PATCH v7 14/17] memory: add
> > MemoryRegionIOMMUOps.replay() callback
> > 
> > Originally we have one memory_region_iommu_replay() function, which is the
> > default behavior to replay the translations of the whole IOMMU region. 
> > However,
> > on some platform like x86, we may want our own replay logic for IOMMU 
> > regions.
> > This patch add one more hook for IOMMUOps for the callback, and it'll 
> > override the
> > default if set.
> > 
> > Signed-off-by: Peter Xu 
> > ---
> >  include/exec/memory.h | 2 ++
> >  memory.c  | 6 ++
> >  2 files changed, 8 insertions(+)
> > 
> > diff --git a/include/exec/memory.h b/include/exec/memory.h index
> > 0767888..30b2a74 100644
> > --- a/include/exec/memory.h
> > +++ b/include/exec/memory.h
> > @@ -191,6 +191,8 @@ struct MemoryRegionIOMMUOps {
> >  void (*notify_flag_changed)(MemoryRegion *iommu,
> >  IOMMUNotifierFlag old_flags,
> >  IOMMUNotifierFlag new_flags);
> > +/* Set this up to provide customized IOMMU replay function */
> > +void (*replay)(MemoryRegion *iommu, IOMMUNotifier *notifier);
> >  };
> > 
> >  typedef struct CoalescedMemoryRange CoalescedMemoryRange; diff --git
> > a/memory.c b/memory.c index 7a4f2f9..9c253cc 100644
> > --- a/memory.c
> > +++ b/memory.c
> > @@ -1630,6 +1630,12 @@ void memory_region_iommu_replay(MemoryRegion
> > *mr, IOMMUNotifier *n,
> >  hwaddr addr, granularity;
> >  IOMMUTLBEntry iotlb;
> > +/* If the IOMMU has its own replay callback, override */
> > +if (mr->iommu_ops->replay) {
> > +mr->iommu_ops->replay(mr, n);
> > +return;
> > +}
> 
> Hi Alex, Peter,
> 
> Will all the other vendors(e.g. PPC, s390, ARM) add their own replay callback
> as well? I guess it depends on whether the original replay algorithm work well
> for them? Do you have such knowledge?

I guess so. At least for VT-d we had this callback since the default
replay mechanism did not work well on x86 due to its extremely large
memory region size. Thanks,

-- peterx

Re: [Qemu-devel] [PATCH v2] configure: use pkg-config for obtaining xen version

2017-03-27 Thread Juergen Gross

On 27/03/17 11:07, Paul Durrant wrote:
>> -Original Message-
>> From: Juergen Gross [mailto:jgr...@suse.com]
>> Sent: 27 March 2017 08:43
>> To: qemu-devel@nongnu.org; xen-de...@lists.xenproject.org
>> Cc: Anthony Perard ; kra...@redhat.com;
>> sstabell...@kernel.org; Paul Durrant ; Juergen
>> Gross 
>> Subject: [PATCH v2] configure: use pkg-config for obtaining xen version
>>
>> Instead of trying to guess the Xen version to use by compiling various
>> test programs first just ask the system via pkg-config. Only if it
>> can't return the version fall back to the test program scheme.
>>
>> If configure is being called with dedicated flags for the Xen libraries
>> use those instead of the pkg-config output. This will avoid breaking
>> an in-tree Xen build of an old Xen version while a new Xen version is
>> installed on the build machine: pkg-config would pick up the installed
>> Xen config files as the Xen tree wouldn't contain any of them.
>>
>> Signed-off-by: Juergen Gross 
>> ---
>> V2: - use pkg-config only if no Xen library paths have been specified via
>>   --extra-ldflags
>> - keep test program for detecting Xen 4.9
> 
> This all looks plausible but it doesn't seem to be working for me when trying 
> to build 4.8. I'm still getting a xen ctrl version of 40900... still trying 
> to figure out why.

I suppose you reverted my previous patch?

Do you happen to have some stale 4.9 libraries in your Xen tree?

What happens without this patch?

> Also, the whitespace changes later on in the patch should probably be split 
> out.

Hmm, those are just due to the nesting level of the if's changed.
I don't think they can easily be split out without having a
malformed indentation.


Juergen

Re: [Qemu-devel] [PATCH v7 14/17] memory: add MemoryRegionIOMMUOps.replay() callback

2017-03-27 Thread Liu, Yi L

> -Original Message-
> From: Peter Xu [mailto:pet...@redhat.com]
> Sent: Monday, March 27, 2017 5:12 PM
> To: Liu, Yi L 
> Cc: alex.william...@redhat.com; Lan, Tianyu ; Tian, 
> Kevin
> ; m...@redhat.com; jan.kis...@siemens.com;
> jasow...@redhat.com; bd.a...@gmail.com; David Gibson
> ; qemu-devel@nongnu.org
> Subject: Re: [Qemu-devel] [PATCH v7 14/17] memory: add
> MemoryRegionIOMMUOps.replay() callback
> 
> On Mon, Mar 27, 2017 at 08:35:05AM +, Liu, Yi L wrote:
> > > -Original Message-
> > > From: Qemu-devel
> > > [mailto:qemu-devel-bounces+yi.l.liu=intel@nongnu.org] On Behalf
> > > Of Peter Xu
> > > Sent: Tuesday, February 7, 2017 4:28 PM
> > > To: qemu-devel@nongnu.org
> > > Cc: Lan, Tianyu ; Tian, Kevin
> > > ; m...@redhat.com; jan.kis...@siemens.com;
> > > jasow...@redhat.com; pet...@redhat.com; alex.william...@redhat.com;
> > > bd.a...@gmail.com; David Gibson 
> > > Subject: [Qemu-devel] [PATCH v7 14/17] memory: add
> > > MemoryRegionIOMMUOps.replay() callback
> > >
> > > Originally we have one memory_region_iommu_replay() function, which
> > > is the default behavior to replay the translations of the whole
> > > IOMMU region. However, on some platform like x86, we may want our own
> replay logic for IOMMU regions.
> > > This patch add one more hook for IOMMUOps for the callback, and
> > > it'll override the default if set.
> > >
> > > Signed-off-by: Peter Xu 
> > > ---
> > >  include/exec/memory.h | 2 ++
> > >  memory.c  | 6 ++
> > >  2 files changed, 8 insertions(+)
> > >
> > > diff --git a/include/exec/memory.h b/include/exec/memory.h index
> > > 0767888..30b2a74 100644
> > > --- a/include/exec/memory.h
> > > +++ b/include/exec/memory.h
> > > @@ -191,6 +191,8 @@ struct MemoryRegionIOMMUOps {
> > >  void (*notify_flag_changed)(MemoryRegion *iommu,
> > >  IOMMUNotifierFlag old_flags,
> > >  IOMMUNotifierFlag new_flags);
> > > +/* Set this up to provide customized IOMMU replay function */
> > > +void (*replay)(MemoryRegion *iommu, IOMMUNotifier *notifier);
> > >  };
> > >
> > >  typedef struct CoalescedMemoryRange CoalescedMemoryRange; diff
> > > --git a/memory.c b/memory.c index 7a4f2f9..9c253cc 100644
> > > --- a/memory.c
> > > +++ b/memory.c
> > > @@ -1630,6 +1630,12 @@ void memory_region_iommu_replay(MemoryRegion
> > > *mr, IOMMUNotifier *n,
> > >  hwaddr addr, granularity;
> > >  IOMMUTLBEntry iotlb;
> > > +/* If the IOMMU has its own replay callback, override */
> > > +if (mr->iommu_ops->replay) {
> > > +mr->iommu_ops->replay(mr, n);
> > > +return;
> > > +}
> >
> > Hi Alex, Peter,
> >
> > Will all the other vendors(e.g. PPC, s390, ARM) add their own replay
> > callback as well? I guess it depends on whether the original replay
> > algorithm work well for them? Do you have such knowledge?
> 
> I guess so. At least for VT-d we had this callback since the default replay 
> mechanism
> did not work well on x86 due to its extremely large memory region size. 
> Thanks,

thx. that would make sense.

Re: [Qemu-devel] [PATCH 11/51] ram: Move dup_pages into RAMState

2017-03-27 Thread Peter Xu

On Thu, Mar 23, 2017 at 09:45:04PM +0100, Juan Quintela wrote:
> Once there rename it to its actual meaning, zero_pages.
> 
> Signed-off-by: Juan Quintela 
> Reviewed-by: Dr. David Alan Gilbert 

Reviewed-by: Peter Xu 

Will post a question below though (not directly related to this patch
but context-wide)...

> ---
>  migration/ram.c | 29 ++---
>  1 file changed, 18 insertions(+), 11 deletions(-)
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index d8428c1..0da133f 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -165,6 +165,9 @@ struct RAMState {
>  uint64_t xbzrle_cache_miss_prev;
>  /* number of iterations at the beginning of period */
>  uint64_t iterations_prev;
> +/* Accounting fields */
> +/* number of zero pages.  It used to be pages filled by the same char. */
> +uint64_t zero_pages;
>  };
>  typedef struct RAMState RAMState;
>  
> @@ -172,7 +175,6 @@ static RAMState ram_state;
>  
>  /* accounting for migration statistics */
>  typedef struct AccountingInfo {
> -uint64_t dup_pages;
>  uint64_t skipped_pages;
>  uint64_t norm_pages;
>  uint64_t iterations;
> @@ -192,12 +194,12 @@ static void acct_clear(void)
>  
>  uint64_t dup_mig_bytes_transferred(void)
>  {
> -return acct_info.dup_pages * TARGET_PAGE_SIZE;
> +return ram_state.zero_pages * TARGET_PAGE_SIZE;
>  }
>  
>  uint64_t dup_mig_pages_transferred(void)
>  {
> -return acct_info.dup_pages;
> +return ram_state.zero_pages;
>  }
>  
>  uint64_t skipped_mig_bytes_transferred(void)
> @@ -737,19 +739,21 @@ static void migration_bitmap_sync(RAMState *rs)
>   *
>   * Returns the number of pages written.
>   *
> + * @rs: current RAM state
>   * @f: QEMUFile where to send the data
>   * @block: block that contains the page we want to send
>   * @offset: offset inside the block for the page
>   * @p: pointer to the page
>   * @bytes_transferred: increase it with the number of transferred bytes
>   */
> -static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
> +static int save_zero_page(RAMState *rs, QEMUFile *f, RAMBlock *block,
> +  ram_addr_t offset,
>uint8_t *p, uint64_t *bytes_transferred)
>  {
>  int pages = -1;
>  
>  if (is_zero_range(p, TARGET_PAGE_SIZE)) {
> -acct_info.dup_pages++;
> +rs->zero_pages++;
>  *bytes_transferred += save_page_header(f, block,
> offset | 
> RAM_SAVE_FLAG_COMPRESS);
>  qemu_put_byte(f, 0);
> @@ -822,11 +826,11 @@ static int ram_save_page(RAMState *rs, MigrationState 
> *ms, QEMUFile *f,
>  if (bytes_xmit > 0) {
>  acct_info.norm_pages++;
>  } else if (bytes_xmit == 0) {
> -acct_info.dup_pages++;
> +rs->zero_pages++;

This code path looks suspicous... since iiuc currently it should only
be triggered by RDMA case, and I believe here qemu_rdma_save_page()
should have met something wrong (so that it didn't return with
RAM_SAVE_CONTROL_DELAYED). Then is it correct we do increase zero page
counting unconditionally here? (hmm, the default bytes_xmit is zero as
well...)

Another thing is that I see when RDMA is enabled we are updating
accounting info with acct_update_position(), while we updated it here
as well. Is this an issue of duplicated accounting?

Similar question in ram_save_compressed_page().

Thanks,

>  }
>  }
>  } else {
> -pages = save_zero_page(f, block, offset, p, bytes_transferred);
> +pages = save_zero_page(rs, f, block, offset, p, bytes_transferred);
>  if (pages > 0) {
>  /* Must let xbzrle know, otherwise a previous (now 0'd) cached
>   * page would be stale
> @@ -998,7 +1002,7 @@ static int ram_save_compressed_page(RAMState *rs, 
> MigrationState *ms,
>  if (bytes_xmit > 0) {
>  acct_info.norm_pages++;
>  } else if (bytes_xmit == 0) {
> -acct_info.dup_pages++;
> +rs->zero_pages++;
>  }
>  }
>  } else {
> @@ -1010,7 +1014,7 @@ static int ram_save_compressed_page(RAMState *rs, 
> MigrationState *ms,
>   */
>  if (block != rs->last_sent_block) {
>  flush_compressed_data(f);
> -pages = save_zero_page(f, block, offset, p, bytes_transferred);
> +pages = save_zero_page(rs, f, block, offset, p, 
> bytes_transferred);
>  if (pages == -1) {
>  /* Make sure the first page is sent out before other pages */
>  bytes_xmit = save_page_header(f, block, offset |
> @@ -1031,7 +1035,7 @@ static int ram_save_compressed_page(RAMState *rs, 
> MigrationState *ms,
>  }
>  } else {
>  offset |= RAM_SAVE_FLAG_CONTINUE;
> -pages = save_zero_page(f, block, offset, p, bytes_transferred);
> +pa

Re: [Qemu-devel] [PATCH v2] hmp: gpa2hva and gpa2hpa hostaddr command

2017-03-27 Thread Dr. David Alan Gilbert

* Paolo Bonzini (bonz...@gnu.org) wrote:
> 
> 
> On 20/03/2017 18:16, Paolo Bonzini wrote:
> > 
> > 
> > On 20/03/2017 18:01, Markus Armbruster wrote:
> >> Peter Maydell  writes:
> >>
> >>> On 20 March 2017 at 16:29, Markus Armbruster  wrote:
>  Peter Maydell  writes:
> > I have some comments which feel kind of nit-picky, but since this
> > is a public-facing HMP API I think they need attention since we only
> > get one chance to get it right.
> 
>  HMP is not a stable interface.  If we get it wrong, we change it.  If
>  our change breaks your usage, you get to keep the pieces.
> >>>
> >>> Oh yes, I had my HMP and QMP the wrong way round.
> >>>
> >>> ...which reminds me that I thought we preferred all HMP commands
> >>> to be implemented in terms of their QMP equivalent ?
> >>
> >> Yes.  We make exceptions for commands we believe have no QMP use, such
> >> as "print".  I figure the rationale for these is "just for testing".
> >> Paolo, can you confirm?
> > 
> > Yes.  If somebody comes up with a non-testing use, I suppose we can
> > always add a QMP variant.  I'll send v3 which uses the current monitor
> > CPU's address space according to Peter's review.
> 
> Since there is no CPU method to transform a CPU state into a MemTxAttrs
> value, and the MemTxAttrs are needed to get the right address space, v2
> is the best I can do for 2.9.
> 
> Changing it to take the CPU state into account (e.g. include secure
> memory when in secure mode) can be done in later releases if necessary.
> 
> David, are you queuing the patch?

Yes, will do.

Dave

> Paolo
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [Qemu-devel] [PATCH 12/51] ram: Remove unused dup_mig_bytes_transferred()

2017-03-27 Thread Peter Xu

On Thu, Mar 23, 2017 at 09:45:05PM +0100, Juan Quintela wrote:
> Signed-off-by: Juan Quintela 
> Reviewed-by: Dr. David Alan Gilbert 

Reviewed-by: Peter Xu 

-- peterx

Re: [Qemu-devel] [PATCH 13/51] ram: Remove unused pages_skipped variable

2017-03-27 Thread Peter Xu

On Thu, Mar 23, 2017 at 09:45:06PM +0100, Juan Quintela wrote:
> For compatibility, we need to still send a value, but just specify it
> and comment the fact.
> 
> Signed-off-by: Juan Quintela 
> Reviewed-by: Dr. David Alan Gilbert 

Reviewed-by: Peter Xu 

-- peterx

Re: [Qemu-devel] [PATCH v2] configure: use pkg-config for obtaining xen version

2017-03-27 Thread Paul Durrant

> -Original Message-
> From: Juergen Gross [mailto:jgr...@suse.com]
> Sent: 27 March 2017 10:15
> To: Paul Durrant ; qemu-devel@nongnu.org; xen-
> de...@lists.xenproject.org
> Cc: Anthony Perard ; kra...@redhat.com;
> sstabell...@kernel.org
> Subject: Re: [PATCH v2] configure: use pkg-config for obtaining xen version
> 
> On 27/03/17 11:07, Paul Durrant wrote:
> >> -Original Message-
> >> From: Juergen Gross [mailto:jgr...@suse.com]
> >> Sent: 27 March 2017 08:43
> >> To: qemu-devel@nongnu.org; xen-de...@lists.xenproject.org
> >> Cc: Anthony Perard ; kra...@redhat.com;
> >> sstabell...@kernel.org; Paul Durrant ; Juergen
> >> Gross 
> >> Subject: [PATCH v2] configure: use pkg-config for obtaining xen version
> >>
> >> Instead of trying to guess the Xen version to use by compiling various
> >> test programs first just ask the system via pkg-config. Only if it
> >> can't return the version fall back to the test program scheme.
> >>
> >> If configure is being called with dedicated flags for the Xen libraries
> >> use those instead of the pkg-config output. This will avoid breaking
> >> an in-tree Xen build of an old Xen version while a new Xen version is
> >> installed on the build machine: pkg-config would pick up the installed
> >> Xen config files as the Xen tree wouldn't contain any of them.
> >>
> >> Signed-off-by: Juergen Gross 
> >> ---
> >> V2: - use pkg-config only if no Xen library paths have been specified via
> >>   --extra-ldflags
> >> - keep test program for detecting Xen 4.9
> >
> > This all looks plausible but it doesn't seem to be working for me when
> trying to build 4.8. I'm still getting a xen ctrl version of 40900... still 
> trying to
> figure out why.
> 
> I suppose you reverted my previous patch?
> 

Yes, I reverted that then applied this one.

> Do you happen to have some stale 4.9 libraries in your Xen tree?
>

Yes, it was picking up a libxendevicemodel when it should not have been. It 
would be handy if the probe compilations used --nostdlib, but that's not 
regression introduced by your patch. Now that pkg-config is used for 
out-of-tree builds I guess it may be possible to add that though.
 
> What happens without this patch?
> 
> > Also, the whitespace changes later on in the patch should probably be split
> out.
> 
> Hmm, those are just due to the nesting level of the if's changed.
> I don't think they can easily be split out without having a
> malformed indentation.
> 

Ok, I see that now I look at the code rather than just the patch. So, having 
fixed my linkage issue...

Tested-by: Paul Durrant 

> 
> Juergen

[Qemu-devel] [PATCH v1] qga: Add 'guest-get-fqdn' command

2017-03-27 Thread Vinzenz 'evilissimo' Feenstra

From: Vinzenz Feenstra 

Retrieving the guest OS fully qualified domain name (FQDN) is a very
useful feature for virtual management systems. This information can help
to have more user friendly VM access details, instead of an IP there
would be the FQDN. Also the FQDN reported can be used to have automated
checks for valid SSL certificates.

virsh # qemu-agent-command F25 '{ "execute": "guest-get-fqdn" }'
{"return":{"fqdn":"F25.lab.evilissimo.net"}}

Signed-off-by: Vinzenz Feenstra 
---
 qga/commands.c   | 11 +++
 qga/qapi-schema.json | 25 +
 2 files changed, 36 insertions(+)

diff --git a/qga/commands.c b/qga/commands.c
index 4d92946..61577af 100644
--- a/qga/commands.c
+++ b/qga/commands.c
@@ -499,3 +499,14 @@ int ga_parse_whence(GuestFileWhence *whence, Error **errp)
 error_setg(errp, "invalid whence code %"PRId64, whence->u.value);
 return -1;
 }
+
+GuestFQDN *qmp_guest_get_fqdn(Error **err)
+{
+GuestFQDN *result = NULL;
+gchar const *hostname = g_get_host_name();
+if (hostname != NULL) {
+result = g_new0(GuestFQDN, 1);
+result->fqdn = g_strdup(hostname);
+}
+return result;
+}
diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
index a02dbf2..0a2c0a4 100644
--- a/qga/qapi-schema.json
+++ b/qga/qapi-schema.json
@@ -1042,3 +1042,28 @@
   'data':{ 'path': 'str', '*arg': ['str'], '*env': ['str'],
'*input-data': 'str', '*capture-output': 'bool' },
   'returns': 'GuestExec' }
+
+
+
+##
+# @GuestFQDN:
+# @fqdn: Fully qualified domain name of the guest OS
+#
+# Since: 2.10
+##
+{ 'struct': 'GuestFQDN',
+  'data':   { 'fqdn': 'str' } }
+
+
+##
+# @guest-get-fqdn:
+#
+# Request the FQDN (Fully Qualified Domain Name) of the guest operating system
+#
+# Returns: FQDN on success
+#
+# Since: 2.10
+##
+{ 'command': 'guest-get-fqdn',
+  'returns': 'GuestFQDN' }
+
-- 
2.9.3

Re: [Qemu-devel] [PATCH] xen: additionally restrict xenforeignmemory operations

2017-03-27 Thread Paul Durrant

> -Original Message-
[snip]
> 
> This is OK but the file is growing too entangled. What do you think of
> the following, which moves the if CONFIG_XEN_CTRL_INTERFACE_VERSION
> <
> 40701 at the top? This way we don't have to add yet another ifdef.
> 

Yes, this looks better and appears to DTRT. I assume you will just apply this 
version and don't want be to resubmit?

Cheers,

  Paul

> diff --git a/include/hw/xen/xen_common.h
> b/include/hw/xen/xen_common.h
> index 4f3bd35..624fb86 100644
> --- a/include/hw/xen/xen_common.h
> +++ b/include/hw/xen/xen_common.h
> @@ -27,6 +27,58 @@ extern xc_interface *xen_xc;
>   * We don't support Xen prior to 4.2.0.
>   */
> 
> +/* Xen 4.2 through 4.6 */
> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40701
> +
> +typedef xc_interface xenforeignmemory_handle;
> +typedef xc_evtchn xenevtchn_handle;
> +typedef xc_gnttab xengnttab_handle;
> +
> +#define xenevtchn_open(l, f) xc_evtchn_open(l, f);
> +#define xenevtchn_close(h) xc_evtchn_close(h)
> +#define xenevtchn_fd(h) xc_evtchn_fd(h)
> +#define xenevtchn_pending(h) xc_evtchn_pending(h)
> +#define xenevtchn_notify(h, p) xc_evtchn_notify(h, p)
> +#define xenevtchn_bind_interdomain(h, d, p)
> xc_evtchn_bind_interdomain(h, d, p)
> +#define xenevtchn_unmask(h, p) xc_evtchn_unmask(h, p)
> +#define xenevtchn_unbind(h, p) xc_evtchn_unbind(h, p)
> +
> +#define xengnttab_open(l, f) xc_gnttab_open(l, f)
> +#define xengnttab_close(h) xc_gnttab_close(h)
> +#define xengnttab_set_max_grants(h, n) xc_gnttab_set_max_grants(h, n)
> +#define xengnttab_map_grant_ref(h, d, r, p) xc_gnttab_map_grant_ref(h,
> d, r, p)
> +#define xengnttab_unmap(h, a, n) xc_gnttab_munmap(h, a, n)
> +#define xengnttab_map_grant_refs(h, c, d, r, p) \
> +xc_gnttab_map_grant_refs(h, c, d, r, p)
> +#define xengnttab_map_domain_grant_refs(h, c, d, r, p) \
> +xc_gnttab_map_domain_grant_refs(h, c, d, r, p)
> +
> +#define xenforeignmemory_open(l, f) xen_xc
> +#define xenforeignmemory_close(h)
> +
> +static inline void *xenforeignmemory_map(xc_interface *h, uint32_t dom,
> + int prot, size_t pages,
> + const xen_pfn_t arr[/*pages*/],
> + int err[/*pages*/])
> +{
> +if (err)
> +return xc_map_foreign_bulk(h, dom, prot, arr, err, pages);
> +else
> +return xc_map_foreign_pages(h, dom, prot, arr, pages);
> +}
> +
> +#define xenforeignmemory_unmap(h, p, s) munmap(p, s * XC_PAGE_SIZE)
> +
> +#else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */
> +
> +#include 
> +#include 
> +#include 
> +
> +#endif
> +
> +extern xenforeignmemory_handle *xen_fmem;
> +
>  #if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40900
> 
>  typedef xc_interface xendevicemodel_handle;
> @@ -159,6 +211,13 @@ static inline int xendevicemodel_restrict(
>  return -1;
>  }
> 
> +static inline int xenforeignmemory_restrict(
> +xenforeignmemory_handle *fmem, domid_t domid)
> +{
> +errno = ENOTTY;
> +return -1;
> +}
> +
>  #else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40900 */
> 
>  #include 
> @@ -215,69 +274,32 @@ static inline int xen_modified_memory(domid_t
> domid, uint64_t first_pfn,
> 
>  static inline int xen_restrict(domid_t domid)
>  {
> -int rc = xendevicemodel_restrict(xen_dmod, domid);
> +int rc;
> 
> -trace_xen_domid_restrict(errno);
> +/* Attempt to restrict devicemodel operations */
> +rc = xendevicemodel_restrict(xen_dmod, domid);
> +trace_xen_domid_restrict(rc ? errno : 0);
> 
> -if (errno == ENOTTY) {
> -return 0;
> +if (rc < 0) {
> +/*
> + * If errno is ENOTTY then restriction is not implemented so
> + * there's no point in trying to restrict other types of
> + * operation, but it should not be treated as a failure.
> + */
> +if (errno == ENOTTY) {
> +return 0;
> +}
> +
> +return rc;
>  }
> 
> -return rc;
> -}
> -
> -/* Xen 4.2 through 4.6 */
> -#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40701
> -
> -typedef xc_interface xenforeignmemory_handle;
> -typedef xc_evtchn xenevtchn_handle;
> -typedef xc_gnttab xengnttab_handle;
> -
> -#define xenevtchn_open(l, f) xc_evtchn_open(l, f);
> -#define xenevtchn_close(h) xc_evtchn_close(h)
> -#define xenevtchn_fd(h) xc_evtchn_fd(h)
> -#define xenevtchn_pending(h) xc_evtchn_pending(h)
> -#define xenevtchn_notify(h, p) xc_evtchn_notify(h, p)
> -#define xenevtchn_bind_interdomain(h, d, p)
> xc_evtchn_bind_interdomain(h, d, p)
> -#define xenevtchn_unmask(h, p) xc_evtchn_unmask(h, p)
> -#define xenevtchn_unbind(h, p) xc_evtchn_unbind(h, p)
> -
> -#define xengnttab_open(l, f) xc_gnttab_open(l, f)
> -#define xengnttab_close(h) xc_gnttab_close(h)
> -#define xengnttab_set_max_grants(h, n) xc_gnttab_set_max_grants(h, n)
> -#define xengnttab_map_grant_ref(h, d, r, p) xc_gnttab_map_grant_ref(h,
> d, r, p)
> -#define xengnttab_unmap(h, a, n) xc_gnttab_munmap(h, a, n)
> -#define xengnttab_map

Re: [Qemu-devel] [PATCH 14/51] ram: Move norm_pages to RAMState

2017-03-27 Thread Peter Xu

On Thu, Mar 23, 2017 at 09:45:07PM +0100, Juan Quintela wrote:
> Signed-off-by: Juan Quintela 
> Reviewed-by: Dr. David Alan Gilbert 

Reviewed-by: Peter Xu 

-- peterx

Re: [Qemu-devel] [RFC v2 3/3] hw/intc/arm_gicv3_its: Allow save/restore

2017-03-27 Thread Auger Eric

Hi Peter,
On 13/03/2017 19:03, Peter Maydell wrote:
> On 6 March 2017 at 12:48, Eric Auger  wrote:
>> We change the restoration priority of both the GICv3 and ITS. The
>> GICv3 must be restored before the ITS and the ITS needs to be restored
>> before PCIe devices since it translates their MSI transactions.
> 
>> We typically observe the virtio-pci-net device sending MSI transactions
>> very early (even before the first vcpu run) which looks weird. It
>> appears that not servicing those transactions cause the virtio-pci-net
>> to stall.
> 
> This does seem rather weird and worth closer investigation.
> A stopped VM ought IMHO to be completely stopped, not still
> doing things...
After further investigations, here is what I observe:

Between the "running" vm_change_state_handler and the 1st
KVM_RUN I observe virtio_notify_config that calls
msix_notify(). This eventually attempts to inject an MSI.
This looks valid as the VM is in its running state.

Unfortunately the VGIC gets its final init stage, map_resources(),
executed on the first KVM_RUN. Until that point
no MMIO access can be performed. This looks the bad part to me.
So at the moment I trigger a map_resources() at the end of the ITS table
flush and that works.

If I don't do that, the virtio-net based network is not functional
on restore path.

Hope this clarifies.

Thanks

Eric

> 
>> Signed-off-by: Eric Auger 
>>
>> ---
>>
>> v1 -> v2:
>> - handle case where migrate_add_blocker fails
>> - add comments along with ITS and GICv3 migration priorities
>> ---
>>  hw/intc/arm_gicv3_common.c |  1 +
>>  hw/intc/arm_gicv3_its_common.c |  3 ++-
>>  hw/intc/arm_gicv3_its_kvm.c| 23 +++
>>  include/migration/vmstate.h|  2 ++
>>  4 files changed, 16 insertions(+), 13 deletions(-)
>>
>> diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
>> index c6493d6..4228b7c 100644
>> --- a/hw/intc/arm_gicv3_common.c
>> +++ b/hw/intc/arm_gicv3_common.c
>> @@ -145,6 +145,7 @@ static const VMStateDescription vmstate_gicv3 = {
>>  .minimum_version_id = 1,
>>  .pre_save = gicv3_pre_save,
>>  .post_load = gicv3_post_load,
>> +.priority = MIG_PRI_GICV3,
>>  .fields = (VMStateField[]) {
>>  VMSTATE_UINT32(gicd_ctlr, GICv3State),
>>  VMSTATE_UINT32_ARRAY(gicd_statusr, GICv3State, 2),
>> diff --git a/hw/intc/arm_gicv3_its_common.c b/hw/intc/arm_gicv3_its_common.c
>> index 75b9f04..854709f 100644
>> --- a/hw/intc/arm_gicv3_its_common.c
>> +++ b/hw/intc/arm_gicv3_its_common.c
>> @@ -48,7 +48,8 @@ static const VMStateDescription vmstate_its = {
>>  .name = "arm_gicv3_its",
>>  .pre_save = gicv3_its_pre_save,
>>  .post_load = gicv3_its_post_load,
>> -.unmigratable = true,
>> +.unmigratable = false,
> 
> unmigratable = false is the default, so we can just delete the line.
> 
>> +.priority = MIG_PRI_GICV3_ITS,
>>  .fields = (VMStateField[]) {
>>  VMSTATE_UINT32(ctlr, GICv3ITSState),
>>  VMSTATE_UINT64(cbaser, GICv3ITSState),
>> diff --git a/hw/intc/arm_gicv3_its_kvm.c b/hw/intc/arm_gicv3_its_kvm.c
>> index 45e57d6..3bd2873 100644
>> --- a/hw/intc/arm_gicv3_its_kvm.c
>> +++ b/hw/intc/arm_gicv3_its_kvm.c
>> @@ -76,18 +76,6 @@ static void kvm_arm_its_realize(DeviceState *dev, Error 
>> **errp)
>>  GICv3ITSState *s = ARM_GICV3_ITS_COMMON(dev);
>>  Error *local_err = NULL;
>>
>> -/*
>> - * Block migration of a KVM GICv3 ITS device: the API for saving and
>> - * restoring the state in the kernel is not yet available
>> - */
>> -error_setg(&s->migration_blocker, "vITS migration is not implemented");
>> -migrate_add_blocker(s->migration_blocker, &local_err);
>> -if (local_err) {
>> -error_propagate(errp, local_err);
>> -error_free(s->migration_blocker);
>> -return;
>> -}
>> -
>>  s->dev_fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_ARM_VGIC_ITS, 
>> false);
>>  if (s->dev_fd < 0) {
>>  error_setg_errno(errp, -s->dev_fd, "error creating in-kernel ITS");
>> @@ -104,6 +92,17 @@ static void kvm_arm_its_realize(DeviceState *dev, Error 
>> **errp)
>>
>>  gicv3_its_init_mmio(s, NULL);
>>
>> +if (!kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS,
>> +GITS_CTLR)) {
>> +error_setg(&s->migration_blocker, "vITS migration is not 
>> implemented");
> 
> I think we should specifically say that it's the host kernel
> that doesn't support ITS migration (ie not QEMU that's missing
> support).
> 
> The GICv3 uses the message
>  "This operating system kernel does not support vGICv3 migration"
> which is slightly odd phrasing but I guess we should follow that.
> 
>> +migrate_add_blocker(s->migration_blocker, &local_err);
>> +if (local_err) {
>> +error_propagate(errp, local_err);
>> +error_free(s->migration_blocker);
>> +return;
>> +}
>> +}
>> +
>>  kvm_msi_use_devid = true;
>>  kvm_gsi_direct_mapping = fa

[Qemu-devel] [RFC v3 0/3] vITS save/restore

2017-03-27 Thread Eric Auger

This series allows ITS save/restore and migration use cases.
It relies on not upstreamed kernel series [1].

ITS tables are flushed into guest RAM on VM stop while registers
are save on pre_save() callback. Tables and registers are restored
on ITS post_load().

That work was tested on Cavium ThunderX using virsh save/restore and
virt-manager live migration.

Best Regards

Eric

Host Kernel dependencies:
- [1] [PATCH v4 00/22] vITS save/restore

History:

v2 -> v3:
- GITS_IIDR is now saved and restored to check ABI revision.
- get/put functions renamed into pre_save/post_load
- unmigratable = false removed
- changed the migration blocker message
- remove the extract64 round s->ctlr
- reword some comments

v1 -> v2:
- rebase on 2.9 soft release code
- handle case where migrate_add_blocker fails
- add comments along with ITS and GICv3 migration priorities

Eric Auger (3):
  linux-headers: Partial header update for vITS save/restore
  hw/intc/arm_gicv3_its: Implement state save/restore
  hw/intc/arm_gicv3_its: Allow save/restore

 hw/intc/arm_gicv3_common.c |   1 +
 hw/intc/arm_gicv3_its_common.c |  11 ++-
 hw/intc/arm_gicv3_its_kvm.c| 120 +
 include/hw/intc/arm_gicv3_its_common.h |   8 +++
 include/migration/vmstate.h|   2 +
 linux-headers/asm-arm/kvm.h|   2 +
 linux-headers/asm-arm64/kvm.h  |   2 +
 7 files changed, 133 insertions(+), 13 deletions(-)

-- 
2.5.5

[Qemu-devel] [RFC v3 1/3] linux-headers: Partial header update for vITS save/restore

2017-03-27 Thread Eric Auger

This is a linux header update against 4.11-rc3 plus the non
upstreamed ITS migration series.

https://github.com/eauger/linux/tree/v4.11-rc3-its-mig-v4

It aims at enhancing the KVM user API with vITS save/restore
capability. This consists in two new groups for the
ARM_VGIC_ITS KVM device, named: KVM_DEV_ARM_VGIC_GRP_ITS_REGS,
KVM_DEV_ARM_VGIC_GRP_ITS_TABLES.

Signed-off-by: Eric Auger 
---
 linux-headers/asm-arm/kvm.h   | 2 ++
 linux-headers/asm-arm64/kvm.h | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h
index 1101d55..c8b9a68 100644
--- a/linux-headers/asm-arm/kvm.h
+++ b/linux-headers/asm-arm/kvm.h
@@ -192,6 +192,8 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
 #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
+#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS  8
+#define KVM_DEV_ARM_VGIC_GRP_ITS_TABLES9
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
(0x3fULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index 651ec30..a8d7d67 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -212,6 +212,8 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
 #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
+#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
+#define KVM_DEV_ARM_VGIC_GRP_ITS_TABLES 9
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
(0x3fULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
-- 
2.5.5

[Qemu-devel] [RFC v3 3/3] hw/intc/arm_gicv3_its: Allow save/restore

2017-03-27 Thread Eric Auger

We change the restoration priority of both the GICv3 and ITS. The
GICv3 must be restored before the ITS and the ITS needs to be restored
before PCIe devices since it translates their MSI transactions.

Signed-off-by: Eric Auger 

---
v2 -> v3:
- reword migration blocker message
- remove unmigratable setting to false

v1 -> v2:
- handle case where migrate_add_blocker fails
- add comments along with ITS and GICv3 migration priorities
---
 hw/intc/arm_gicv3_common.c |  1 +
 hw/intc/arm_gicv3_its_common.c |  2 +-
 hw/intc/arm_gicv3_its_kvm.c| 24 
 include/migration/vmstate.h|  2 ++
 4 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index c6493d6..4228b7c 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -145,6 +145,7 @@ static const VMStateDescription vmstate_gicv3 = {
 .minimum_version_id = 1,
 .pre_save = gicv3_pre_save,
 .post_load = gicv3_post_load,
+.priority = MIG_PRI_GICV3,
 .fields = (VMStateField[]) {
 VMSTATE_UINT32(gicd_ctlr, GICv3State),
 VMSTATE_UINT32_ARRAY(gicd_statusr, GICv3State, 2),
diff --git a/hw/intc/arm_gicv3_its_common.c b/hw/intc/arm_gicv3_its_common.c
index efab8c7..22ce4c4 100644
--- a/hw/intc/arm_gicv3_its_common.c
+++ b/hw/intc/arm_gicv3_its_common.c
@@ -48,7 +48,7 @@ static const VMStateDescription vmstate_its = {
 .name = "arm_gicv3_its",
 .pre_save = gicv3_its_pre_save,
 .post_load = gicv3_its_post_load,
-.unmigratable = true,
+.priority = MIG_PRI_GICV3_ITS,
 .fields = (VMStateField[]) {
 VMSTATE_UINT32(ctlr, GICv3ITSState),
 VMSTATE_UINT32(iidr, GICv3ITSState),
diff --git a/hw/intc/arm_gicv3_its_kvm.c b/hw/intc/arm_gicv3_its_kvm.c
index 1732831..15159f4 100644
--- a/hw/intc/arm_gicv3_its_kvm.c
+++ b/hw/intc/arm_gicv3_its_kvm.c
@@ -76,18 +76,6 @@ static void kvm_arm_its_realize(DeviceState *dev, Error 
**errp)
 GICv3ITSState *s = ARM_GICV3_ITS_COMMON(dev);
 Error *local_err = NULL;
 
-/*
- * Block migration of a KVM GICv3 ITS device: the API for saving and
- * restoring the state in the kernel is not yet available
- */
-error_setg(&s->migration_blocker, "vITS migration is not implemented");
-migrate_add_blocker(s->migration_blocker, &local_err);
-if (local_err) {
-error_propagate(errp, local_err);
-error_free(s->migration_blocker);
-return;
-}
-
 s->dev_fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_ARM_VGIC_ITS, false);
 if (s->dev_fd < 0) {
 error_setg_errno(errp, -s->dev_fd, "error creating in-kernel ITS");
@@ -104,6 +92,18 @@ static void kvm_arm_its_realize(DeviceState *dev, Error 
**errp)
 
 gicv3_its_init_mmio(s, NULL);
 
+if (!kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS,
+GITS_CTLR)) {
+error_setg(&s->migration_blocker, "This operating system kernel "
+   "does not support vGICv3 migration");
+migrate_add_blocker(s->migration_blocker, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+error_free(s->migration_blocker);
+return;
+}
+}
+
 kvm_msi_use_devid = true;
 kvm_gsi_direct_mapping = false;
 kvm_msi_via_irqfd_allowed = kvm_irqfds_enabled();
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index f2dbf84..8dab9c7 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -198,6 +198,8 @@ enum VMStateFlags {
 typedef enum {
 MIG_PRI_DEFAULT = 0,
 MIG_PRI_IOMMU,  /* Must happen before PCI devices */
+MIG_PRI_GICV3_ITS,  /* Must happen before PCI devices */
+MIG_PRI_GICV3,  /* Must happen before the ITS */
 MIG_PRI_MAX,
 } MigrationPriority;
 
-- 
2.5.5

[Qemu-devel] [RFC v3 2/3] hw/intc/arm_gicv3_its: Implement state save/restore

2017-03-27 Thread Eric Auger

We need to handle both registers and ITS tables. While
register handling is standard, ITS table handling is more
challenging since the kernel API is devised so that the
tables are flushed into guest RAM and not in vmstate buffers.

Flushing the ITS tables on device pre_save() is too late
since the guest RAM is already saved at this point.

Table flushing needs to happen when we are sure the vcpus
are stopped and before the last dirty page saving. The
right point is RUN_STATE_FINISH_MIGRATE but sometimes the
VM gets stopped before migration launch so let's simply
flush the tables each time the VM gets stopped.

For regular ITS registers we just can use vmstate pre_save()
and post_load() callbacks.

Signed-off-by: Eric Auger 

---

v2 -> v3:
- take into account Peter's comments:
  - add iidr save/restore
  - reword comments
  - remove s->ctlr = extract64(reg, 0, 32);
  - rename get()/put() function into pre_save()/post_load()
- do not execute put() if iidr == 0
---
 hw/intc/arm_gicv3_its_common.c |  9 
 hw/intc/arm_gicv3_its_kvm.c| 96 ++
 include/hw/intc/arm_gicv3_its_common.h |  8 +++
 3 files changed, 113 insertions(+)

diff --git a/hw/intc/arm_gicv3_its_common.c b/hw/intc/arm_gicv3_its_common.c
index 9d67c5c..efab8c7 100644
--- a/hw/intc/arm_gicv3_its_common.c
+++ b/hw/intc/arm_gicv3_its_common.c
@@ -49,6 +49,15 @@ static const VMStateDescription vmstate_its = {
 .pre_save = gicv3_its_pre_save,
 .post_load = gicv3_its_post_load,
 .unmigratable = true,
+.fields = (VMStateField[]) {
+VMSTATE_UINT32(ctlr, GICv3ITSState),
+VMSTATE_UINT32(iidr, GICv3ITSState),
+VMSTATE_UINT64(cbaser, GICv3ITSState),
+VMSTATE_UINT64(cwriter, GICv3ITSState),
+VMSTATE_UINT64(creadr, GICv3ITSState),
+VMSTATE_UINT64_ARRAY(baser, GICv3ITSState, 8),
+VMSTATE_END_OF_LIST()
+},
 };
 
 static MemTxResult gicv3_its_trans_read(void *opaque, hwaddr offset,
diff --git a/hw/intc/arm_gicv3_its_kvm.c b/hw/intc/arm_gicv3_its_kvm.c
index bd4f3aa..1732831 100644
--- a/hw/intc/arm_gicv3_its_kvm.c
+++ b/hw/intc/arm_gicv3_its_kvm.c
@@ -53,6 +53,24 @@ static int kvm_its_send_msi(GICv3ITSState *s, uint32_t 
value, uint16_t devid)
 return kvm_vm_ioctl(kvm_state, KVM_SIGNAL_MSI, &msi);
 }
 
+/**
+ * vm_change_state_handler - VM change state callback aiming at flushing
+ * ITS tables into guest RAM
+ *
+ * The tables get flushed to guest RAM whenever the VM gets stopped.
+ */
+static void vm_change_state_handler(void *opaque, int running,
+RunState state)
+{
+GICv3ITSState *s = (GICv3ITSState *)opaque;
+
+if (running) {
+return;
+}
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ITS_TABLES,
+  0, NULL, false);
+}
+
 static void kvm_arm_its_realize(DeviceState *dev, Error **errp)
 {
 GICv3ITSState *s = ARM_GICV3_ITS_COMMON(dev);
@@ -89,6 +107,8 @@ static void kvm_arm_its_realize(DeviceState *dev, Error 
**errp)
 kvm_msi_use_devid = true;
 kvm_gsi_direct_mapping = false;
 kvm_msi_via_irqfd_allowed = kvm_irqfds_enabled();
+
+qemu_add_vm_change_state_handler(vm_change_state_handler, s);
 }
 
 static void kvm_arm_its_init(Object *obj)
@@ -102,6 +122,80 @@ static void kvm_arm_its_init(Object *obj)
  &error_abort);
 }
 
+/**
+ * kvm_arm_its_pre_save - handles the saving of ITS registers.
+ * ITS tables are flushed into guest RAM separately and earlier,
+ * through the VM change state handler, since at the moment pre_save()
+ * is called, the guest RAM has already been saved.
+ */
+static void kvm_arm_its_pre_save(GICv3ITSState *s)
+{
+uint64_t reg;
+int i;
+
+for (i = 0; i < 8; i++) {
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS,
+  GITS_BASER + i * 8, &s->baser[i], false);
+}
+
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS,
+  GITS_CTLR, &s->ctlr, false);
+
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS,
+  GITS_CBASER, &s->cbaser, false);
+
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS,
+  GITS_CREADR, &s->creadr, false);
+
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS,
+  GITS_CWRITER, &s->cwriter, false);
+
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS,
+  GITS_IIDR, &s->iidr, false);
+}
+
+/**
+ * kvm_arm_its_post_load - Restore both the ITS registers and tables
+ */
+static void kvm_arm_its_post_load(GICv3ITSState *s)
+{
+uint64_t reg;
+int i;
+
+if (!s->iidr) {
+return;
+}
+
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS,
+  GITS_IIDR, &s->iidr, true);
+
+/*
+ * must be written before GITS_CREADR since GITS_CBASER write
+ * access resets GITS_CREADR.
+ */
+kvm_devic

Re: [Qemu-devel] [PATCH v2] Fix input-linux reading from device

2017-03-27 Thread Gerd Hoffmann

On So, 2017-03-26 at 11:53 +0200, Javier Celaya wrote:
> The evdev devices in input-linux.c are read in blocks of one whole
> event. If there are not enough bytes available, they are discarded,
> instead of being kept for the next read operation. This results in
> lost events, of even non-working devices.

Have you seen this happening in practice?

> +struct input_event event;
> +int to_be_read;

I'd suggest to store offset (i.e. bytes already read) instead, should
make the whole logic a bit simpler and easier to read.

> +} else if (rc > 0){

checkpatch.pl complains here:
ERROR: space required before the open brace '{'

cheers,
  Gerd

Re: [Qemu-devel] [PATCH v1] qga: Add 'guest-get-fqdn' command

2017-03-27 Thread Sameeh Jubran

On Thu, Mar 23, 2017 at 9:51 PM, Vinzenz 'evilissimo' Feenstra <
vfeen...@redhat.com> wrote:

> From: Vinzenz Feenstra 
>
> Retrieving the guest OS fully qualified domain name (FQDN) is a very
> useful feature for virtual management systems. This information can help
> to have more user friendly VM access details, instead of an IP there
> would be the FQDN. Also the FQDN reported can be used to have automated
> checks for valid SSL certificates.
>
> virsh # qemu-agent-command F25 '{ "execute": "guest-get-fqdn" }'
> {"return":{"fqdn":"F25.lab.evilissimo.net"}}
>
> Signed-off-by: Vinzenz Feenstra 
> ---
>  qga/commands.c   | 11 +++
>  qga/qapi-schema.json | 25 +
>  2 files changed, 36 insertions(+)
>
> diff --git a/qga/commands.c b/qga/commands.c
> index 4d92946..61577af 100644
> --- a/qga/commands.c
> +++ b/qga/commands.c
> @@ -499,3 +499,14 @@ int ga_parse_whence(GuestFileWhence *whence, Error
> **errp)
>  error_setg(errp, "invalid whence code %"PRId64, whence->u.value);
>  return -1;
>  }
> +
> +GuestFQDN *qmp_guest_get_fqdn(Error **err)
> +{
> +GuestFQDN *result = NULL;
> +gchar const *hostname = g_get_host_name();
>
According to glib documentation on "g_get_host_name" function:
"The returned name is not necessarily a fully-qualified domain name, or
even present in DNS or some other name service at all. It need not even be
unique on your local network or site, but usually it is."
I think the command name should be changed to something like
"guest_get_machine_name" and not fqdn as this is not always true!

Moreover the documentation states that:
"If no name can be determined, a default fixed string "localhost" is
returned."
I think we should handle this case and return and informative error.

> +if (hostname != NULL) {
> +result = g_new0(GuestFQDN, 1);
> +result->fqdn = g_strdup(hostname);
> +}
> +return result;
> +}
> diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
> index a02dbf2..0a2c0a4 100644
> --- a/qga/qapi-schema.json
> +++ b/qga/qapi-schema.json
> @@ -1042,3 +1042,28 @@
>'data':{ 'path': 'str', '*arg': ['str'], '*env': ['str'],
> '*input-data': 'str', '*capture-output': 'bool' },
>'returns': 'GuestExec' }
> +
> +
> +
> +##
> +# @GuestFQDN:
> +# @fqdn: Fully qualified domain name of the guest OS
> +#
> +# Since: 2.10
> +##
> +{ 'struct': 'GuestFQDN',
> +  'data':   { 'fqdn': 'str' } }
> +
> +
> +##
> +# @guest-get-fqdn:
> +#
> +# Request the FQDN (Fully Qualified Domain Name) of the guest operating
> system
> +#
> +# Returns: FQDN on success
> +#
> +# Since: 2.10
> +##
> +{ 'command': 'guest-get-fqdn',
> +  'returns': 'GuestFQDN' }
> +
> --
> 2.9.3
>
>
>


-- 
Respectfully,
*Sameeh Jubran*
*Linkedin *
*Software Engineer @ Daynix .*

Re: [Qemu-devel] [PATCH 16/51] ram: Move iterations into RAMState

2017-03-27 Thread Peter Xu

On Thu, Mar 23, 2017 at 09:45:09PM +0100, Juan Quintela wrote:
> Signed-off-by: Juan Quintela 

Reviewed-by: Peter Xu 

Another comment not directly related to this patch...

> ---
>  migration/ram.c | 12 +++-
>  1 file changed, 7 insertions(+), 5 deletions(-)
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index 9fa3bd7..690ca8f 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -170,6 +170,8 @@ struct RAMState {
>  uint64_t zero_pages;
>  /* number of normal transferred pages */
>  uint64_t norm_pages;
> +/* Iterations since start */
> +uint64_t iterations;
>  };
>  typedef struct RAMState RAMState;
>  
> @@ -177,7 +179,6 @@ static RAMState ram_state;
>  
>  /* accounting for migration statistics */
>  typedef struct AccountingInfo {
> -uint64_t iterations;
>  uint64_t xbzrle_bytes;
>  uint64_t xbzrle_pages;
>  uint64_t xbzrle_cache_miss;
> @@ -693,13 +694,13 @@ static void migration_bitmap_sync(RAMState *rs)
>  }
>  
>  if (migrate_use_xbzrle()) {
> -if (rs->iterations_prev != acct_info.iterations) {
> +if (rs->iterations_prev != rs->iterations) {
>  acct_info.xbzrle_cache_miss_rate =
> (double)(acct_info.xbzrle_cache_miss -
>  rs->xbzrle_cache_miss_prev) /
> -   (acct_info.iterations - rs->iterations_prev);
> +   (rs->iterations - rs->iterations_prev);

Here we are calculating cache miss rate by xbzrle_cache_miss and
iterations. However looks like xbzrle_cache_miss is counted per guest
page (in save_xbzrle_page()) while the iteration count is per host
page (in ram_save_iterate()). Then, what if host page size not equals
to guest page size? E.g., when host uses 2M huge pages, host page size
is 2M, while guest page size can be 4K?

Thanks,

-- peterx

Re: [Qemu-devel] [PATCH 17/51] ram: Move xbzrle_bytes into RAMState

2017-03-27 Thread Peter Xu

On Thu, Mar 23, 2017 at 09:45:10PM +0100, Juan Quintela wrote:
> Signed-off-by: Juan Quintela 

Reviewed-by: Peter Xu 

-- peterx

Re: [Qemu-devel] GSoC 2017 Proposal: TCG performance enhancements

2017-03-27 Thread Richard Henderson


On 03/26/2017 02:52 AM, Pranith Kumar wrote:

Hello,

With MTTCG code now merged in mainline, I tried to see if we are able to run
x86 SMP guests on ARM64 hosts. For this I tried running a windows XP guest on
a dragonboard 410c which has 1GB RAM. Since x86 has a strong memory model
whereas ARM64 is a weak memory model, I added a patch to generate fence
instructions for every guest memory access. After some minor fixes, I was
successfully able to boot a 4 core guest all the way to the desktop (albeit
with a 1GB backing swap). However the performance is severely
limited and the guest is barely usable. Based on my observations, I think
there are some easily implementable additions we can make to improve the
performance of TCG in general and on ARM64 in particular. I propose to do the
following as part of Google Summer of Code 2017.


* Implement jump-to-register instruction on ARM64 to overcome the 128MB
  translation cache size limit.

  The translation cache size for an ARM64 host is currently limited to 128
  MB. This limitation is imposed by utilizing a branch instruction which
  encodes the jump offset and is limited by the number of bits it can use for
  the range of the offset. The performance impact by this limitation is severe
  and can be observed when you try to run large programs like a browser in the
  guest. The cache is flushed several times before the browser starts and the
  performance is not satisfactory. This limitation can be overcome by
  generating a branch-to-register instruction and utilizing that when the
  destination address is outside the range of what can be encoded in current
  branch instruction.


128MB is really quite large.  I doubt doubling the cache size will really help 
that much.  That said, it's really quite trivial to make this change, if you'd 
like to experiment.


FWIW, I rarely see TB flushes for alpha -- not one during an entire gcc 
bootstrap.  Now, this is usually with 4GB ram, which by default implies 512MB 
translation cache.  But it does mean that, given an ideal guest, TB flushes 
should not dominate anything at all.


If you're seeing multiple flushes during the startup of a browser, your guest 
must be flushing for other reasons than the code_gen_buffer being full.




* Implement an LRU translation block code cache.

  In the current TCG design, when the translation cache fills up, we flush all
  the translated blocks (TBs) to free up space. We can improve this situation
  by not flushing the TBs that were recently used i.e., by implementing an LRU
  policy for freeing the blocks. This should avoid the re-translation overhead
  for frequently used blocks and improve performance.


The major problem you'll encounter is how to manage allocation in this case.

The current mechanism means that it is trivial to not know how much code is 
going to be generated for a given set of TCG opcodes.  When we reach the 
high-water mark, we've run out of room.  We then flush everything and start 
over at the beginning of the buffer.


If you manage the cache with an allocator, you'll need to know in advance how 
much code is going to be generated.  This is going to require that you either 
(1) severely over-estimate the space required (qemu_ld generates lots more code 
than just add), (2) severely increase the time required, by generating code 
twice, or (3) somewhat increase the time required, by generating 
position-independent code into an external buffer and copying it into place 
after determining the size.




* Avoid consistency overhead for strong memory model guests by generating
  load-acquire and store-release instructions.


This is probably required for good performance of the user-only code path, but 
considering the number of other insns required for the system tlb lookup, I'm 
surprised that the memory barrier matters.



Please let me know if you have any comments or suggestions. Also please let me
know if there are other enhancements that are easily implementable to increase
TCG performance as part of this project or otherwise.


I think it would be interesting to place TranslationBlock structures into the 
same memory block as code_gen_buffer, immediately before the code that 
implements the TB.


Consider what happens within every TB:

(1) We have one or more references to the TB address, via exit_tb.

For aarch64, this will normally require 2-4 insns.

# alpha-softmmu
0x7f75152114:  d0ffb320  adrp x0, #-0x99a000 (addr 0x7f747b8000)
0x7f75152118:  91004c00  add x0, x0, #0x13 (19)
0x7f7515211c:  17c3  b #-0xf4 (addr 0x7f75152028)

# alpha-linux-user
0x00569500:  d2800260  mov x0, #0x13
0x00569504:  f2b59820  movk x0, #0xacc1, lsl #16
0x00569508:  f2c00fe0  movk x0, #0x7f, lsl #32
0x0056950c:  17df  b #-0x84 (addr 0x569488)

We would reduce this to one insn, always, if the TB were close by, since the 
ADR instruction has a range of 1MB.



(2) We have zero to two references to a linked TB, via goto_tb.

Your stated

Re: [Qemu-devel] [[RFC][Bugfix:isapc lapic state]] Bugfix: isapc:apic_state ?Start QEMU with "qemu-system-x86_64 -nographic -M isapc -serial none -monitor stdio" ?and enter "info lapic" at the monito

2017-03-27 Thread Stefan Hajnoczi

On Mon, Mar 27, 2017 at 03:49:13PM +0530, Tejaswini wrote:

Thanks for the patch!

Please CC the maintainers of target/i386/helper.c:

  $ scripts/get_maintainer.pl -f target/i386/helper.c
  Paolo Bonzini  (maintainer:X86)
  Richard Henderson  (maintainer:X86)
  Eduardo Habkost  (maintainer:X86)

Please shorten the subject line:

  [[RFC][Bugfix:isapc lapic state]] Bugfix: isapc:apic_state ?Start QEMU with 
"qemu-system-x86_64 -nographic -M isapc -serial none -monitor stdio" ?and enter 
"info lapic" at the monitor prompt ⇒ Segmentation fault

I suggest:

  [PATCH] target-i386: fix "info lapic" segfault on isapc

The commit message (subject line) should be a short summary of the
patch.  Typically this is below 80 or even 72 characters.  Details go in
the commit description (email body before '---'), which can be
arbitrarily long.  Please move the command-line for reproducing the
segmentation fault into the commit description.

Please don't add extra tags to the Subject line unless you're sure they
are commonly used in QEMU (e.g. "[Bugfix:isapc lapic state]] Bugfix:
isapc:apic_state").

> From: Tejaswini Poluri 
> 
>   The error occurs for only isapc machine type as it doesn't have apic 
> state
>   The cpu->apic_state of isapc is NULL. Hence added null pointer check in
>   x86_cpu_dump_local_apic_state()

Please remove the indentation.

> 
> Signed-off-by: Tejaswini Poluri 
> ---
>  target/i386/helper.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/target/i386/helper.c b/target/i386/helper.c
> index e2af340..1a8e3dd 100644
> --- a/target/i386/helper.c
> +++ b/target/i386/helper.c
> @@ -327,7 +327,10 @@ void x86_cpu_dump_local_apic_state(CPUState *cs, FILE *f,
>  X86CPU *cpu = X86_CPU(cs);
>  APICCommonState *s = APIC_COMMON(cpu->apic_state);
>  uint32_t *lvt = s->lvt;
> -
> +if (!s) {
> +cpu_fprintf(f, "apic state not available\n");
> +return;
> +}

Did you test this code?

The dereference one line above will still cause a segfault:

  uint32_t *lvt = s->lvt;  <--- s is NULL!

signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH 18/51] ram: Move xbzrle_pages into RAMState

2017-03-27 Thread Peter Xu

On Thu, Mar 23, 2017 at 09:45:11PM +0100, Juan Quintela wrote:
> Signed-off-by: Juan Quintela 

Reviewed-by: Peter Xu 

-- peterx

Re: [Qemu-devel] [PATCH 19/51] ram: Move xbzrle_cache_miss into RAMState

2017-03-27 Thread Peter Xu

On Thu, Mar 23, 2017 at 09:45:12PM +0100, Juan Quintela wrote:
> Signed-off-by: Juan Quintela 

Reviewed-by: Peter Xu 

-- peterx

Re: [Qemu-devel] [PATCH 20/51] ram: Move xbzrle_cache_miss_rate into RAMState

2017-03-27 Thread Peter Xu

On Thu, Mar 23, 2017 at 09:45:13PM +0100, Juan Quintela wrote:
> Signed-off-by: Juan Quintela 

Reviewed-by: Peter Xu 

-- peterx

Re: [Qemu-devel] [PATCH 21/51] ram: Move xbzrle_overflows into RAMState

2017-03-27 Thread Peter Xu

On Thu, Mar 23, 2017 at 09:45:14PM +0100, Juan Quintela wrote:
> Once there, remove the now unused AccountingInfo struct and var.
> 
> Signed-off-by: Juan Quintela 

Reviewed-by: Peter Xu 

-- peterx

Re: [Qemu-devel] [PATCH for-2.9] i386: Don't override -cpu options on -cpu host/max

2017-03-27 Thread Igor Mammedov

On Fri, 24 Mar 2017 17:36:45 -0300
Eduardo Habkost  wrote:

> The existing code for "host" and "max" CPU models overrides every
> single feature in the CPU object at realize time, even the ones
> that were explicitly enabled or disabled by the user using
> "feat=on" or "feat=off", while features set using +feat/-feat are
> kept.
> 
> This means "-cpu host,+invtsc" works as expected, while
> "-cpu host,invtsc=on" doesn't.
> 
> This was a known bug, already documented in a comment inside
> x86_cpu_expand_features(). What makes this bug worse now is that
> libvirt 3.0.0 and newer now use "feat=on|off" instead of
> +feat/-feat when it detects a QEMU version that supports it (see
> libvirt commit d47db7b16dd5422c7e487c8c8ee5b181a2f9cd66).
> 
> Change the feature property getter/setter to set a
> env->user_features field, to keep track of features that were
> explicitly changed using QOM properties. Then make the
> max_features code not override user features when handling "-cpu
> host" and "-cpu max".
> 
> This will also allow us to remove the plus_features/minus_features
> hack in the future, but I plan to do that after 2.9.0 is
> released.
> 
> Reported-by: Jiri Denemark 
> Signed-off-by: Eduardo Habkost 
> ---
>  target/i386/cpu.h |  2 ++
>  target/i386/cpu.c | 33 +
>  2 files changed, 23 insertions(+), 12 deletions(-)
> 
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 07401ad9fe..c4602ca80d 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -1147,6 +1147,8 @@ typedef struct CPUX86State {
>  uint32_t cpuid_vendor3;
>  uint32_t cpuid_version;
>  FeatureWordArray features;
> +/* Features that were explicitly enabled/disabled */
> +FeatureWordArray user_features;
>  uint32_t cpuid_model[12];
>  
>  /* MTRRs */
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 7aa762245a..5f2addbf75 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -3373,15 +3373,20 @@ static void x86_cpu_expand_features(X86CPU *cpu, 
> Error **errp)
>  GList *l;
>  Error *local_err = NULL;
>  
> -/*TODO: cpu->max_features incorrectly overwrites features
> - * set using "feat=on|off". Once we fix this, we can convert
> +/*TODO: Now cpu->max_features doesn't overwrite features
> + * set using QOM properties, and we can convert
>   * plus_features & minus_features to global properties
>   * inside x86_cpu_parse_featurestr() too.
>   */
>  if (cpu->max_features) {
>  for (w = 0; w < FEATURE_WORDS; w++) {
> -env->features[w] =
> -x86_cpu_get_supported_feature_word(w, cpu->migratable);
> +/* Override only features that weren't not set explicitly
> + * by the user.
s/not// or if it was intended rephrase to avoid double negation.

> + */
> +env->features[w] &= env->user_features[w];
it probably should be assert to catch features not set via property,
which shouldn't be there in the first place, I don't like silent
filtering that happens here.

> +env->features[w] |=
> +x86_cpu_get_supported_feature_word(w, cpu->migratable) &
> +~env->user_features[w];
>  }
>  }
>  
> @@ -3692,15 +3697,17 @@ static void x86_cpu_unrealizefn(DeviceState *dev, 
> Error **errp)
>  }
>  
>  typedef struct BitProperty {
> -uint32_t *ptr;
> +FeatureWord w;
it would be better if this refactoring and related changes
were in a separate patch, something along lines:
 "x86/cpu: use FeatureWord instead of keeping a pointer to cpuid leaf"

>  uint32_t mask;
>  } BitProperty;
>  
>  static void x86_cpu_get_bit_prop(Object *obj, Visitor *v, const char *name,
>   void *opaque, Error **errp)
>  {
> +X86CPU *cpu = X86_CPU(obj);
>  BitProperty *fp = opaque;
> -bool value = (*fp->ptr & fp->mask) == fp->mask;
> +uint32_t f = cpu->env.features[fp->w];
> +bool value = (f & fp->mask) == fp->mask;
>  visit_type_bool(v, name, &value, errp);
>  }
>  
> @@ -3708,6 +3715,7 @@ static void x86_cpu_set_bit_prop(Object *obj, Visitor 
> *v, const char *name,
>   void *opaque, Error **errp)
>  {
>  DeviceState *dev = DEVICE(obj);
> +X86CPU *cpu = X86_CPU(obj);
>  BitProperty *fp = opaque;
>  Error *local_err = NULL;
>  bool value;
> @@ -3724,10 +3732,11 @@ static void x86_cpu_set_bit_prop(Object *obj, Visitor 
> *v, const char *name,
>  }
>  
>  if (value) {
> -*fp->ptr |= fp->mask;
> +cpu->env.features[fp->w] |= fp->mask;
>  } else {
> -*fp->ptr &= ~fp->mask;
> +cpu->env.features[fp->w] &= ~fp->mask;
>  }
> +cpu->env.user_features[fp->w] |= fp->mask;
>  }
>  
>  static void x86_cpu_release_bit_prop(Object *obj, const char *name,
> @@ -3745,7 +3754,7 @@ static void x86_cpu_release_bit_prop(Object *obj, const 
> char *name,
>   */
>  static void x86_cp

Re: [Qemu-devel] [[RFC]PATCH:hw/sd:sd_init()] hw/sd : modified the sd_init() function

2017-03-27 Thread Stefan Hajnoczi

On Mon, Mar 27, 2017 at 04:01:02PM +0530, Tejaswini wrote:
> From: Tejaswini Poluri 

Please shorten the subject line: "[PATCH] hw/sd: simplify sd_init() prototype"

> @@ -573,16 +573,19 @@ SDState *sd_init(BlockBackend *blk, bool is_spi)
>  qdev_prop_set_drive(dev, "drive", blk, &err);
>  if (err) {
>  error_report("sd_init failed: %s", error_get_pretty(err));
> -return NULL;
> +return -1;
>  }
>  qdev_prop_set_bit(dev, "spi", is_spi);
>  object_property_set_bool(obj, true, "realized", &err);
>  if (err) {
>  error_report("sd_init failed: %s", error_get_pretty(err));
> -return NULL;
> +return -1;
>  }
> -
> -return SD_CARD(dev);
> +sd_state = SD_CARD(dev);

The caller will not see the new value of sd_state.  In C arguments are
passed by value.  That means they are local variables inside the
function and do not affect the caller.

I have CCed Paolo Bonzini, who posted this task.  Maybe he can explain
what he meant by "Include SDState by value instead of allocating it in
sd_init (hw/sd/)".

> +if (!sd_state) {
> + return -1;
> + }

QEMU use 4 space indentation.  Please do not use tabs.

signature.asc
Description: PGP signature

Re: [Qemu-devel] slirp + ipxe + ipv6 dns issue

2017-03-27 Thread SAL

Hello Samuel,

  I tested your patch and solved my problem. Thank you.
My version: qemu-kvm-2.7.1-4.fc25.x86_64 + your patch.
Original qemu-kvm-2.7.1-4.fc25.x86_64 didn't work.

Cole, thanks for reporting. Consider applying this to Fedora package.

I am not a member of qemu-devel, so this mail will be probably rejected
from this list. Please forward it if requested.

SAL

On Sun, Mar 26, 2017 at 08:39:08PM +0200, Samuel Thibault wrote:
> Hello,
> 
> Cole Robinson, on ven. 24 mars 2017 21:17:43 -0400, wrote:
> > I bisected to this commit:
> > 
> > slirp: Add RDNSS advertisement
> 
> Mmm, I see.  Could you try the attached patch to confirm that it fixes
> the issue for you too?
> 
> Samuel

> diff --git a/slirp/ip6_icmp.c b/slirp/ip6_icmp.c
> index 298a48dd25..00183e5945 100644
> --- a/slirp/ip6_icmp.c
> +++ b/slirp/ip6_icmp.c
> @@ -143,17 +143,10 @@ void ndp_send_ra(Slirp *slirp)
>  /* Build IPv6 packet */
>  struct mbuf *t = m_get(slirp);
>  struct ip6 *rip = mtod(t, struct ip6 *);
> +size_t pl_size = 0;
>  rip->ip_src = (struct in6_addr)LINKLOCAL_ADDR;
>  rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST;
>  rip->ip_nh = IPPROTO_ICMPV6;
> -rip->ip_pl = htons(ICMP6_NDP_RA_MINLEN
> -+ NDPOPT_LINKLAYER_LEN
> -+ NDPOPT_PREFIXINFO_LEN
> -#ifndef _WIN32
> -+ NDPOPT_RDNSS_LEN
> -#endif
> -);
> -t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl);
>  
>  /* Build ICMPv6 packet */
>  t->m_data += sizeof(struct ip6);
> @@ -171,6 +164,7 @@ void ndp_send_ra(Slirp *slirp)
>  ricmp->icmp6_nra.reach_time = htonl(NDP_AdvReachableTime);
>  ricmp->icmp6_nra.retrans_time = htonl(NDP_AdvRetransTime);
>  t->m_data += ICMP6_NDP_RA_MINLEN;
> +pl_size += ICMP6_NDP_RA_MINLEN;
>  
>  /* Source link-layer address (NDP option) */
>  struct ndpopt *opt = mtod(t, struct ndpopt *);
> @@ -178,6 +172,7 @@ void ndp_send_ra(Slirp *slirp)
>  opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8;
>  in6_compute_ethaddr(rip->ip_src, opt->ndpopt_linklayer);
>  t->m_data += NDPOPT_LINKLAYER_LEN;
> +pl_size += NDPOPT_LINKLAYER_LEN;
>  
>  /* Prefix information (NDP option) */
>  struct ndpopt *opt2 = mtod(t, struct ndpopt *);
> @@ -192,27 +187,30 @@ void ndp_send_ra(Slirp *slirp)
>  opt2->ndpopt_prefixinfo.reserved2 = 0;
>  opt2->ndpopt_prefixinfo.prefix = slirp->vprefix_addr6;
>  t->m_data += NDPOPT_PREFIXINFO_LEN;
> +pl_size += NDPOPT_PREFIXINFO_LEN;
>  
> -#ifndef _WIN32
>  /* Prefix information (NDP option) */
> -/* disabled for windows for now, until get_dns6_addr is implemented */
> -struct ndpopt *opt3 = mtod(t, struct ndpopt *);
> -opt3->ndpopt_type = NDPOPT_RDNSS;
> -opt3->ndpopt_len = NDPOPT_RDNSS_LEN / 8;
> -opt3->ndpopt_rdnss.reserved = 0;
> -opt3->ndpopt_rdnss.lifetime = htonl(2 * NDP_MaxRtrAdvInterval);
> -opt3->ndpopt_rdnss.addr = slirp->vnameserver_addr6;
> -t->m_data += NDPOPT_RDNSS_LEN;
> -#endif
> +{
> +struct in6_addr addr;
> +uint32_t scope_id;
> +if (get_dns6_addr(&addr, &scope_id) >= 0) {
> +/* Host system does have an IPv6 DNS server, announce our proxy. 
>  */
> +struct ndpopt *opt3 = mtod(t, struct ndpopt *);
> +opt3->ndpopt_type = NDPOPT_RDNSS;
> +opt3->ndpopt_len = NDPOPT_RDNSS_LEN / 8;
> +opt3->ndpopt_rdnss.reserved = 0;
> +opt3->ndpopt_rdnss.lifetime = htonl(2 * NDP_MaxRtrAdvInterval);
> +opt3->ndpopt_rdnss.addr = slirp->vnameserver_addr6;
> +t->m_data += NDPOPT_RDNSS_LEN;
> +pl_size += NDPOPT_RDNSS_LEN;
> +}
> +}
> +
> +rip->ip_pl = htons(pl_size);
> +t->m_data -= sizeof(struct ip6) + pl_size;
> +t->m_len = sizeof(struct ip6) + pl_size;
>  
>  /* ICMPv6 Checksum */
> -#ifndef _WIN32
> -t->m_data -= NDPOPT_RDNSS_LEN;
> -#endif
> -t->m_data -= NDPOPT_PREFIXINFO_LEN;
> -t->m_data -= NDPOPT_LINKLAYER_LEN;
> -t->m_data -= ICMP6_NDP_RA_MINLEN;
> -t->m_data -= sizeof(struct ip6);
>  ricmp->icmp6_cksum = ip6_cksum(t);
>  
>  ip6_output(NULL, t, 0);

Re: [Qemu-devel] [PATCH v1] qga: Add 'guest-get-fqdn' command

2017-03-27 Thread Vinzenz Feenstra


> On Mar 27, 2017, at 12:44 PM, Sameeh Jubran  wrote:
> 
> 
> 
> On Thu, Mar 23, 2017 at 9:51 PM, Vinzenz 'evilissimo' Feenstra 
> mailto:vfeen...@redhat.com>> wrote:
> From: Vinzenz Feenstra mailto:vfeen...@redhat.com>>
> 
> Retrieving the guest OS fully qualified domain name (FQDN) is a very
> useful feature for virtual management systems. This information can help
> to have more user friendly VM access details, instead of an IP there
> would be the FQDN. Also the FQDN reported can be used to have automated
> checks for valid SSL certificates.
> 
> virsh # qemu-agent-command F25 '{ "execute": "guest-get-fqdn" }'
> {"return":{"fqdn":"F25.lab.evilissimo.net "}}
> 
> Signed-off-by: Vinzenz Feenstra  >
> ---
>  qga/commands.c   | 11 +++
>  qga/qapi-schema.json | 25 +
>  2 files changed, 36 insertions(+)
> 
> diff --git a/qga/commands.c b/qga/commands.c
> index 4d92946..61577af 100644
> --- a/qga/commands.c
> +++ b/qga/commands.c
> @@ -499,3 +499,14 @@ int ga_parse_whence(GuestFileWhence *whence, Error 
> **errp)
>  error_setg(errp, "invalid whence code %"PRId64, whence->u.value);
>  return -1;
>  }
> +
> +GuestFQDN *qmp_guest_get_fqdn(Error **err)
> +{
> +GuestFQDN *result = NULL;
> +gchar const *hostname = g_get_host_name();
> According to glib documentation on "g_get_host_name" function:
> "The returned name is not necessarily a fully-qualified domain name, or even 
> present in DNS or some other name service at all. It need not even be unique 
> on your local network or site, but usually it is."
> I think the command name should be changed to something like 
> "guest_get_machine_name" and not fqdn as this is not always true!

I prefer the `guest_get_fqdn` name over the machine name, because on best 
effort this is, what
this method is trying to achieve. The machine name could be without domain 
part, and yes, this
could be what it returns, again it’s on best effort to get a FQDN.

> 
> Moreover the documentation states that:
> "If no name can be determined, a default fixed string "localhost" is 
> returned."
> I think we should handle this case and return and informative error.

Having a fallback which returns ‘localhost' is pretty much a good thing in my 
opinion.
We should document it, yes, but I wouldn’t report an error here. Especially 
what do you want
to report? Returning ‘localhost’ is a value and a report of an error at the 
same time.

I will leave this up for discussion and will follow up with an improved 
documentation of the
functionality.

> +if (hostname != NULL) {
> +result = g_new0(GuestFQDN, 1);
> +result->fqdn = g_strdup(hostname);
> +}
> +return result;
> +}
> diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
> index a02dbf2..0a2c0a4 100644
> --- a/qga/qapi-schema.json
> +++ b/qga/qapi-schema.json
> @@ -1042,3 +1042,28 @@
>'data':{ 'path': 'str', '*arg': ['str'], '*env': ['str'],
> '*input-data': 'str', '*capture-output': 'bool' },
>'returns': 'GuestExec' }
> +
> +
> +
> +##
> +# @GuestFQDN:
> +# @fqdn: Fully qualified domain name of the guest OS
> +#
> +# Since: 2.10
> +##
> +{ 'struct': 'GuestFQDN',
> +  'data':   { 'fqdn': 'str' } }
> +
> +
> +##
> +# @guest-get-fqdn:
> +#
> +# Request the FQDN (Fully Qualified Domain Name) of the guest operating 
> system
> +#
> +# Returns: FQDN on success
> +#
> +# Since: 2.10
> +##
> +{ 'command': 'guest-get-fqdn',
> +  'returns': 'GuestFQDN' }
> +
> --
> 2.9.3
> 
> 
> 
> 
> 
> -- 
> Respectfully,
> Sameeh Jubran
> Linkedin 
> Software Engineer @ Daynix .

Re: [Qemu-devel] [PATCH v2] Fix input-linux reading from device

2017-03-27 Thread Javier Celaya

Hi

Javi

2017-03-27 12:11 GMT+02:00 Gerd Hoffmann :

> On So, 2017-03-26 at 11:53 +0200, Javier Celaya wrote:
> > The evdev devices in input-linux.c are read in blocks of one whole
> > event. If there are not enough bytes available, they are discarded,
> > instead of being kept for the next read operation. This results in
> > lost events, of even non-working devices.
>
> Have you seen this happening in practice?
>

Yes, quite frequently, like once per hour. Totally destroys a good gaming
session :)
The curious thing is, the mouse stops working, but in the keyboard I see
some missing keyup events (the keys get stuck), but then it recovers.


>
> > +struct input_event event;
> > +int to_be_read;
>
> I'd suggest to store offset (i.e. bytes already read) instead, should
> make the whole logic a bit simpler and easier to read.
>

OK


>
> > +} else if (rc > 0){
>
> checkpatch.pl complains here:
> ERROR: space required before the open brace '{'
>

Oops, missed that


>
> cheers,
>   Gerd
>
>

Re: [Qemu-devel] GSoC 2017 Proposal: TCG performance enhancements

2017-03-27 Thread Paolo Bonzini

On 25/03/2017 17:52, Pranith Kumar wrote:
> * Implement an LRU translation block code cache.
> 
>   In the current TCG design, when the translation cache fills up, we flush all
>   the translated blocks (TBs) to free up space. We can improve this situation
>   by not flushing the TBs that were recently used i.e., by implementing an LRU
>   policy for freeing the blocks. This should avoid the re-translation overhead
>   for frequently used blocks and improve performance.

IIRC, Emilio measured one flush every roughly 10 seconds with 128 MB
cache in system emulation mode---and "never" is a pretty accurate
estimate for user-mode emulation.  This means that a really hot block
would be retranslated very quickly.

Paolo

Re: [Qemu-devel] [virtio-dev] [PATCH] virtio-blk: add DISCARD support to virtio-blk driver

2017-03-27 Thread Paolo Bonzini



On 28/03/2017 10:39, Changpeng Liu wrote:
> + if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
> + q->limits.discard_zeroes_data = 0;

Maybe you could use another feature bit to populate discard_zeroes_data.

Paolo

> + q->limits.discard_alignment = blk_size;
> + q->limits.discard_granularity = blk_size;
> + blk_queue_max_discard_sectors(q, UINT_MAX);
> + blk_queue_max_discard_segments(q, 1);
> + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
> + }
> +

Re: [Qemu-devel] [PATCH v2 0/9] SMMUv3 Emulation support

2017-03-27 Thread Edgar E. Iglesias

On Wed, Mar 08, 2017 at 06:46:13PM +0100, Auger Eric wrote:
> Hi,
> On 22/08/2016 18:17, Prem Mallappa wrote:
> > v1 -> v2:
> > - Adopted review comments from Eric Auger
> > - Make SMMU_DPRINTF to internally call qemu_log
> > (since translation requests are too many, we need control
> >  on the type of log we want)
> > - SMMUTransCfg modified to suite simplicity
> > - Change RegInfo to uint64 register array
> > - Code cleanup
> > - Test cleanups
> > - Reshuffled patches
> > 
> > RFC -> v1:
> > - As per SMMUv3 spec 16.0 (only is_ste_consistant() is noticeable)
> > - Reworked register access/update logic
> > - Factored out translation code for
> > - single point bug fix
> > - sharing/removal in future
> > - (optional) Unit tests added, with PCI test device
> > - S1 with 4k/64k, S1+S2 with 4k/64k
> > - (S1 or S2) only can be verified by Linux 4.7 driver
> > - (optional) Priliminary ACPI support
> > 
> > RFC:
> > - Implements SMMUv3 spec 11.0
> > - Supported for PCIe devices, 
> > - Command Queue and Event Queue supported
> > - LPAE only, S1 is supported and Tested, S2 not tested
> > - BE mode Translation not supported
> > - IRQ support (legacy, no MSI)
> > - Tested with DPDK and e1000 
> > 
> > Patch 1: Add new log type for IOMMU transactions
> > 
> > Patch 2: Adds support in virt.c to create both SMMUv3 device and dts entries
> > 
> > Patch 2: Adds SMMUv3 model to QEMU
> > Multiple files, big ones, translate functionality is split across to
> > accomodate SMMUv2 model, and to remove when common translation feature
> > (if) becomes available.
> > 
> > Patch 3: Adds SMMU build support
> > 
> > Patch 4: Some devicetree function to add support for SMMU's multiple 
> > interrupt
> >  assignment with names
> > 
> > << optional patches >>
> > Optional patches are posted for completeness or for those who wants to test.
> > 
> > Patch 5: A simple PCI device which does DMA from 'src' to 'dst' given
> >  src_addr, dst_addr and size, and is used by unit test, uses
> >  pci_dma_read and pci_dma_write in a crude way but serves the purpose.
> > 
> > Patch 6: Current libqos PCI helpers are x86 only, this addes a generic 
> > interface
> > 
> > Patch 7: Unit tests for SMMU, 
> > - initializes SMMU device 
> > - initializes Test device
> > - allocates page tables 1:1 mapping va == pa
> > - allocates STE/CD accordingly for S1, S2, S1+S2
> > - initiates DMA via PCI test device
> > - verifies transfered data
> > 
> > Patch 8: Added ACPI IORT tables, was needed for internal project purpose, 
> > but 
> >  posting here for anyone looking for testing ACPI on ARM platforms.
> >  (P.S: Linux side IORT patches are WIP)
> > 
> > Repo:
> > https://github.com/pmallappa/qemu/tree/upstream/smmuv3/v2
> > 
> > To Test:
> > $ make tests/smmuv3-test
> > $ QTEST_QEMU_BINARY=aarch64-softmmu/qemu-system-aarch64 tests/smmuv3-test
> > << expect lot of prints >>
> > 
> > Any comments welcome..
> As Prem was forced to stop his activity on this series, I volunteer to
> pursue his work. Prior to starting the work, I just would like to check
> nobody works on this already or objects.
> 
> If not, I intend to rebase and will do my utmost to align, when sensible
> with what was done on Xilinx vsmmuv2/intel iommu.


That would be awesome! Sorry for the late reply.

I had some comments on the PCI integration that Prem did but I'm not sure
it matters now if you're going to go through the work. We can do a review
of your future patches instead.

Cheers,
Edgar

Re: [Qemu-devel] [PATCH v2] KVM: pci-assign: do not map smm memory slot pages

2017-03-27 Thread Paolo Bonzini



On 27/03/2017 09:21, Herongguang (Stephen) wrote:
> From f6f0ee6831488bef7af841cb86f3d85a04848fe5 Mon Sep 17 00:00:00 2001
> From: herongguang 
> Date: Mon, 27 Mar 2017 15:08:59 +0800
> Subject: [PATCH] KVM: pci-assign: do not map smm memory slot pages
>  in vt-d page table
> 
> or VM memory are not put thus leaked in kvm_iommu_unmap_memslots() when
> destroy VM.
> 
> This is consistent with current vfio implementation.
> ---
>  virt/kvm/kvm_main.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 482612b..9018d06 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1052,7 +1052,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
>   * changes) is disallowed above, so any other attribute changes
> getting
>   * here can be skipped.
>   */
> -if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
> +if ((as_id == 0) && ((change == KVM_MR_CREATE) || (change ==
> KVM_MR_MOVE))) {
>  r = kvm_iommu_map_pages(kvm, &new);
>  return r;
>  }

This makes more sense. :)

Applied to kvm/master, thanks.

Paolo

Re: [Qemu-devel] [PATCH v3 13/34] tcg: Add atomic helpers

2017-03-27 Thread Nikunj A Dadhania

Alex Bennée  writes:

> Nikunj A Dadhania  writes:
>
>> Richard Henderson  writes:
>>
>>> On 09/12/2016 06:47 AM, Alex Bennée wrote:
> > +/* Notice an IO access, or a notdirty page.  */
> > +if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
> > +/* There's really nothing that can be done to
> > +   support this apart from stop-the-world.  */
> > +goto stop_the_world;
 We are also triggering on TLB_NOTDIRTY here in the case where a
 conditional write is the first write to a page. I don't know if a
 stop_the_world is required at this point but we will need to ensure we
 clear bits as notdirty_mem_write() does.

>>>
>>> You're quite right that we could probably special-case TLB_NOTDIRTY here 
>>> such
>>> that (1) we needn't leave the cpu loop, and (2) needn't utilize the actual
>>> "write" part of notdirty_mem_write; just set the bits then fall through to 
>>> the
>>> actual atomic instruction below.
>>
>> I do hit this case with ppc64, where I see that its the first write to
>> the page and it exits from this every time, causing the kernel to print
>> soft-lockups.
>>
>> Can we add the special case here for NOTDIRTY and set the page as dirty
>> and return successfully?
>
> Does the atomic step fall-back not work for you?

Looked further and I do see that EXCP_ATOMIC does get executed in
qemu_tcg_cpu_thread_fn(), but I am not sure what is going wrong there.

Following snippet fixes the issue for me:

diff --git a/cputlb.c b/cputlb.c
index f5d056c..743776a 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -930,7 +930,13 @@ static void *atomic_mmu_lookup(CPUArchState *env, 
target_ulong addr,
 tlb_addr = tlbe->addr_write;
 }
 
-/* Notice an IO access, or a notdirty page.  */
+/* Check notdirty */
+if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
+tlb_set_dirty(ENV_GET_CPU(env), addr);
+tlb_addr = tlb_addr & ~TLB_NOTDIRTY;
+}
+
+/* Notice an IO access  */
 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
 /* There's really nothing that can be done to
support this apart from stop-the-world.  */

Regards
Nikunj

Re: [Qemu-devel] What's the next QEMU version after 2.9 ? (or: when is a good point in time to get rid of old interfaces)

2017-03-27 Thread Stefan Hajnoczi

On Mon, Mar 27, 2017 at 10:06:09AM +0200, Thomas Huth wrote:
> On 24.03.2017 23:10, John Snow wrote:
> > 
> > 
> > On 03/08/2017 03:26 AM, Thomas Huth wrote:
> >>
> >>  Hi everybody,
> >>
> >> what will be the next version of QEMU after 2.9? Will we go for a 2.10
> >> (as I've seen it mentioned a couple of times on the mailing list
> >> already), or do we dare to switch to 3.0 instead?
> >>
> >> I personally dislike two-digit minor version numbers like 2.10 since the
> >> non-experienced users sometimes mix it up with 2.1 ... and there have
> >> been a couple of new cool features in the past releases that would
> >> justify a 3.0 now, too, I think.
> >>
> >> But anyway, the more important thing that keeps me concerned is: Someone
> >>  once told me that we should get rid of old parameters and interfaces
> >> (like HMP commands) primarily only when we're changing to a new major
> >> version number. As you all know, QEMU has a lot of legacy options, which
> >> are likely rather confusing than helpful for the new users nowadays,
> >> e.g. things like the "-net channel" option (which is fortunately even
> >> hardly documented), but maybe also even the whole vlan/hub concept in
> >> the net code, or legacy parameters like "-usbdevice". If we switch to
> >> version 3.0, could we agree to remove at least some of them?
> >>
> >>  Thomas
> >>
> > 
> > As others have stated, we need a few releases to deprecate things first.
> > 
> > Maybe we should develop a serious plan to develop some of our legacy
> > interfaces first.
> > 
> > Maybe 2.10 can introduce a list of things we want to deprecate,
> > 2.11 can be the transition release,
> > and then 3.0 can cut the cord and free of us our terrible burden?
> > 
> > I have a list of things I want to axe...
> 
> I've started a Wiki page with such a list here:
> 
> http://wiki.qemu-project.org/Features/LegacyRemoval

It would be nice to get rid of the legacy -net option in 3.0.0.  I have
added it and included pointers to loose ends.  I think this is doable
but will require some time to achieve.

Stefan


signature.asc
Description: PGP signature

Re: [Qemu-devel] [RFC PATCH v4 06/20] core: add new security-policy object

2017-03-27 Thread Stefan Hajnoczi

On Fri, Mar 24, 2017 at 02:42:47PM -0500, Brijesh Singh wrote:
> 
> On 03/24/2017 10:40 AM, Stefan Hajnoczi wrote:
> 
> > 
> > Having one security policy doesn't make sense to me.  As mentioned,
> > there are many different areas of QEMU that have security relevant
> > configuration.  They are all unrelated so combining them into one object
> > with vague parameter names like "debug" makes for a confusing
> > command-line interface.
> > 
> > If the object is called sev-security-policy then I'm happy.
> > 
> 
> Works for with me but one of the feedback was to use security-policy [1].
> IIRC, the main reason for using 'security-policy' instead of 
> 'sev-security-policy'
> was to add a layer of abstraction so that in future if other platforms 
> supports
> memory encryption in slightly different way then all we need to do is to 
> create
> new object without needing to add a new parameter in -machine.
> 
> [1] http://marc.info/?l=qemu-devel&m=147388592213137&w=2
> 
> How about using 'memory-encryption-id' instead of security-policy ? If user 
> wants
> to launch SEV guest then memory-encryption-id should be set SEV specific 
> object.
> Something like this:
> 
> -machine ..,memory-encryption-id=sev0 \
> -object sev-guest,id=sev,debug=off,launch=launch0 \
> -object sev-launch-info,id=launch0 \

Something like that sounds good.  I think "-id" typically isn't included
in the option name.

So just the following is fine:

 -machine memory-encryption=sev0 \
 ...

Other examples: -device virtio-blk-pci,drive=drive0 and -device
e1000,netdev=netdev0.

Stefan


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH v2 0/9] SMMUv3 Emulation support

2017-03-27 Thread Auger Eric

Hi Edgar,

removing Prem's address which is not valid anymore

On 27/03/2017 13:44, Edgar E. Iglesias wrote:
> On Wed, Mar 08, 2017 at 06:46:13PM +0100, Auger Eric wrote:
>> Hi,
>> On 22/08/2016 18:17, Prem Mallappa wrote:
>>> v1 -> v2:
>>> - Adopted review comments from Eric Auger
>>> - Make SMMU_DPRINTF to internally call qemu_log
>>> (since translation requests are too many, we need control
>>>  on the type of log we want)
>>> - SMMUTransCfg modified to suite simplicity
>>> - Change RegInfo to uint64 register array
>>> - Code cleanup
>>> - Test cleanups
>>> - Reshuffled patches
>>>
>>> RFC -> v1:
>>> - As per SMMUv3 spec 16.0 (only is_ste_consistant() is noticeable)
>>> - Reworked register access/update logic
>>> - Factored out translation code for
>>> - single point bug fix
>>> - sharing/removal in future
>>> - (optional) Unit tests added, with PCI test device
>>> - S1 with 4k/64k, S1+S2 with 4k/64k
>>> - (S1 or S2) only can be verified by Linux 4.7 driver
>>> - (optional) Priliminary ACPI support
>>>
>>> RFC:
>>> - Implements SMMUv3 spec 11.0
>>> - Supported for PCIe devices, 
>>> - Command Queue and Event Queue supported
>>> - LPAE only, S1 is supported and Tested, S2 not tested
>>> - BE mode Translation not supported
>>> - IRQ support (legacy, no MSI)
>>> - Tested with DPDK and e1000 
>>>
>>> Patch 1: Add new log type for IOMMU transactions
>>>
>>> Patch 2: Adds support in virt.c to create both SMMUv3 device and dts entries
>>>
>>> Patch 2: Adds SMMUv3 model to QEMU
>>> Multiple files, big ones, translate functionality is split across to
>>> accomodate SMMUv2 model, and to remove when common translation feature
>>> (if) becomes available.
>>>
>>> Patch 3: Adds SMMU build support
>>>
>>> Patch 4: Some devicetree function to add support for SMMU's multiple 
>>> interrupt
>>>  assignment with names
>>>
>>> << optional patches >>
>>> Optional patches are posted for completeness or for those who wants to test.
>>>
>>> Patch 5: A simple PCI device which does DMA from 'src' to 'dst' given
>>>  src_addr, dst_addr and size, and is used by unit test, uses
>>>  pci_dma_read and pci_dma_write in a crude way but serves the purpose.
>>>
>>> Patch 6: Current libqos PCI helpers are x86 only, this addes a generic 
>>> interface
>>>
>>> Patch 7: Unit tests for SMMU, 
>>> - initializes SMMU device 
>>> - initializes Test device
>>> - allocates page tables 1:1 mapping va == pa
>>> - allocates STE/CD accordingly for S1, S2, S1+S2
>>> - initiates DMA via PCI test device
>>> - verifies transfered data
>>>
>>> Patch 8: Added ACPI IORT tables, was needed for internal project purpose, 
>>> but 
>>>  posting here for anyone looking for testing ACPI on ARM platforms.
>>>  (P.S: Linux side IORT patches are WIP)
>>>
>>> Repo:
>>> https://github.com/pmallappa/qemu/tree/upstream/smmuv3/v2
>>>
>>> To Test:
>>> $ make tests/smmuv3-test
>>> $ QTEST_QEMU_BINARY=aarch64-softmmu/qemu-system-aarch64 tests/smmuv3-test
>>> << expect lot of prints >>
>>>
>>> Any comments welcome..
>> As Prem was forced to stop his activity on this series, I volunteer to
>> pursue his work. Prior to starting the work, I just would like to check
>> nobody works on this already or objects.
>>
>> If not, I intend to rebase and will do my utmost to align, when sensible
>> with what was done on Xilinx vsmmuv2/intel iommu.
> 
> 
> That would be awesome! Sorry for the late reply.
no worries.
> 
> I had some comments on the PCI integration that Prem did but I'm not sure
> it matters now if you're going to go through the work. We can do a review
> of your future patches instead.

I plan to send a respin this week. As I am still getting familiar with
the code, this may be mostly a rebase on 2.9 plus some cleanups. Then I
plan to dig into more details, add missing features, maybe reuse your
table translation code if you allow me to do so, share a base class. So
I will be delighted to share about enhancements and possible reuse of
your code too.

note: at the moment I chose not to use the register API since you said
that maybe for this IP it didn't bring much added-value.

Thanks

Eric
> 
> Cheers,
> Edgar
>

Re: [Qemu-devel] [PATCH v2] KVM: pci-assign: do not map smm memory slot pages

2017-03-27 Thread Paolo Bonzini



On 27/03/2017 09:21, Herongguang (Stephen) wrote:
> From f6f0ee6831488bef7af841cb86f3d85a04848fe5 Mon Sep 17 00:00:00 2001
> From: herongguang 
> Date: Mon, 27 Mar 2017 15:08:59 +0800
> Subject: [PATCH] KVM: pci-assign: do not map smm memory slot pages
>  in vt-d page table
> 
> or VM memory are not put thus leaked in kvm_iommu_unmap_memslots() when
> destroy VM.
> 
> This is consistent with current vfio implementation.

Oops, you forgot a Signed-off-by.

Paolo

> ---
>  virt/kvm/kvm_main.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 482612b..9018d06 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1052,7 +1052,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
>   * changes) is disallowed above, so any other attribute changes
> getting
>   * here can be skipped.
>   */
> -if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
> +if ((as_id == 0) && ((change == KVM_MR_CREATE) || (change ==
> KVM_MR_MOVE))) {
>  r = kvm_iommu_map_pages(kvm, &new);
>  return r;
>  }

Re: [Qemu-devel] [PATCH v2 0/9] SMMUv3 Emulation support

2017-03-27 Thread Edgar E. Iglesias

On Mon, Mar 27, 2017 at 02:18:56PM +0200, Auger Eric wrote:
> Hi Edgar,
> 
> removing Prem's address which is not valid anymore
> 
> On 27/03/2017 13:44, Edgar E. Iglesias wrote:
> > On Wed, Mar 08, 2017 at 06:46:13PM +0100, Auger Eric wrote:
> >> Hi,
> >> On 22/08/2016 18:17, Prem Mallappa wrote:
> >>> v1 -> v2:
> >>>   - Adopted review comments from Eric Auger
> >>>   - Make SMMU_DPRINTF to internally call qemu_log
> >>>   (since translation requests are too many, we need control
> >>>on the type of log we want)
> >>>   - SMMUTransCfg modified to suite simplicity
> >>>   - Change RegInfo to uint64 register array
> >>>   - Code cleanup
> >>>   - Test cleanups
> >>>   - Reshuffled patches
> >>>
> >>> RFC -> v1:
> >>>   - As per SMMUv3 spec 16.0 (only is_ste_consistant() is noticeable)
> >>>   - Reworked register access/update logic
> >>>   - Factored out translation code for
> >>>   - single point bug fix
> >>>   - sharing/removal in future
> >>>   - (optional) Unit tests added, with PCI test device
> >>>   - S1 with 4k/64k, S1+S2 with 4k/64k
> >>>   - (S1 or S2) only can be verified by Linux 4.7 driver
> >>>   - (optional) Priliminary ACPI support
> >>>
> >>> RFC:
> >>>   - Implements SMMUv3 spec 11.0
> >>>   - Supported for PCIe devices, 
> >>>   - Command Queue and Event Queue supported
> >>>   - LPAE only, S1 is supported and Tested, S2 not tested
> >>>   - BE mode Translation not supported
> >>>   - IRQ support (legacy, no MSI)
> >>>   - Tested with DPDK and e1000 
> >>>
> >>> Patch 1: Add new log type for IOMMU transactions
> >>>
> >>> Patch 2: Adds support in virt.c to create both SMMUv3 device and dts 
> >>> entries
> >>>
> >>> Patch 2: Adds SMMUv3 model to QEMU
> >>>   Multiple files, big ones, translate functionality is split across to
> >>>   accomodate SMMUv2 model, and to remove when common translation feature
> >>>   (if) becomes available.
> >>>
> >>> Patch 3: Adds SMMU build support
> >>>
> >>> Patch 4: Some devicetree function to add support for SMMU's multiple 
> >>> interrupt
> >>>assignment with names
> >>>
> >>> << optional patches >>
> >>> Optional patches are posted for completeness or for those who wants to 
> >>> test.
> >>>
> >>> Patch 5: A simple PCI device which does DMA from 'src' to 'dst' given
> >>>src_addr, dst_addr and size, and is used by unit test, uses
> >>>pci_dma_read and pci_dma_write in a crude way but serves the purpose.
> >>>
> >>> Patch 6: Current libqos PCI helpers are x86 only, this addes a generic 
> >>> interface
> >>>
> >>> Patch 7: Unit tests for SMMU, 
> >>>   - initializes SMMU device 
> >>>   - initializes Test device
> >>>   - allocates page tables 1:1 mapping va == pa
> >>>   - allocates STE/CD accordingly for S1, S2, S1+S2
> >>>   - initiates DMA via PCI test device
> >>>   - verifies transfered data
> >>>
> >>> Patch 8: Added ACPI IORT tables, was needed for internal project purpose, 
> >>> but 
> >>>posting here for anyone looking for testing ACPI on ARM platforms.
> >>>(P.S: Linux side IORT patches are WIP)
> >>>
> >>> Repo:
> >>> https://github.com/pmallappa/qemu/tree/upstream/smmuv3/v2
> >>>
> >>> To Test:
> >>> $ make tests/smmuv3-test
> >>> $ QTEST_QEMU_BINARY=aarch64-softmmu/qemu-system-aarch64 tests/smmuv3-test
> >>> << expect lot of prints >>
> >>>
> >>> Any comments welcome..
> >> As Prem was forced to stop his activity on this series, I volunteer to
> >> pursue his work. Prior to starting the work, I just would like to check
> >> nobody works on this already or objects.
> >>
> >> If not, I intend to rebase and will do my utmost to align, when sensible
> >> with what was done on Xilinx vsmmuv2/intel iommu.
> > 
> > 
> > That would be awesome! Sorry for the late reply.
> no worries.
> > 
> > I had some comments on the PCI integration that Prem did but I'm not sure
> > it matters now if you're going to go through the work. We can do a review
> > of your future patches instead.
> 
> I plan to send a respin this week. As I am still getting familiar with
> the code, this may be mostly a rebase on 2.9 plus some cleanups. Then I
> plan to dig into more details, add missing features, maybe reuse your
> table translation code if you allow me to do so, share a base class. So
> I will be delighted to share about enhancements and possible reuse of
> your code too.

Sounds great. Feel free to re-use anything from Xilinx if needed.

> 
> note: at the moment I chose not to use the register API since you said
> that maybe for this IP it didn't bring much added-value.


Due to the large amounts of registers that share implementation it
may not be the best example to showcase the Register API. But if you
feel it comes handy for some reason, go for it. I don't really mind
either way.

Thanks,
Edgar


> 
> Thanks
> 
> Eric
> > 
> > Cheers,
> > Edgar
> >

[Qemu-devel] [PATCH] nbd: drop unused NBDClientSession.is_unix field

2017-03-27 Thread Stefan Hajnoczi

Signed-off-by: Stefan Hajnoczi 
---
 block/nbd-client.h | 2 --
 block/nbd.c| 2 --
 2 files changed, 4 deletions(-)

diff --git a/block/nbd-client.h b/block/nbd-client.h
index 8cdfc92..891ba44 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -30,8 +30,6 @@ typedef struct NBDClientSession {
 
 Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
 NBDReply reply;
-
-bool is_unix;
 } NBDClientSession;
 
 NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
diff --git a/block/nbd.c b/block/nbd.c
index f478f80..1b832c2 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -285,8 +285,6 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict 
*options, Error **errp)
 goto done;
 }
 
-s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;
-
 done:
 QDECREF(addr);
 qobject_decref(crumpled_addr);
-- 
2.9.3

Re: [Qemu-devel] Question nbd_refresh_filename()

2017-03-27 Thread Stefan Hajnoczi

On Fri, Mar 24, 2017 at 06:48:44PM +0100, Kevin Wolf wrote:
> Am 24.03.2017 um 16:47 hat Stefan Hajnoczi geschrieben:
> > On Thu, Mar 23, 2017 at 05:48:48PM +0100, Markus Armbruster wrote:
> > > BlockdevOptionsNbd has a member SocketAddress, and nbd_config() doesn't
> > > restrict variants.  Thus, all four SOCKET_ADDRESS_KIND_ can occur.
> > > 
> > > Now have a look at nbd_refresh_filename().  s->saddr->type is If
> > > SOCKET_ADDRESS_KIND_VSOCK or SOCKET_ADDRESS_KIND_FD, then @host, @port
> > > and @path all remain null, and bs->exact_filename[] is not touched.
> > > 
> > > Does this work as intended?
> > 
> > NDB over AF_VSOCK has not been tested.  I would expect it to fail
> > earlier than nbd_refresh_filename().
> 
> Why would that be? Do the socket creation helper functions not support
> vsock yet?

You are right.  The following creates an AF_VSOCK connection:

  -drive 
if=virtio,file.driver=nbd,file.server.type=vsock,file.server.data.cid=3,file.server.data.port=1234

There is no pretty filename or URL representation that block/nbd.c
parses today.  The user must specify the full file.server
VsockSocketAddress options.

I think nbd_refresh_filename() is fine from a vsock point of view.  If
we'd like to encourage NBD-over-vsock in the future we could add code to
for nbd+vsock:// but in the meantime we don't need to fill in
bs->exact_filename[].

signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH] nbd: drop unused NBDClientSession.is_unix field

2017-03-27 Thread Paolo Bonzini



On 27/03/2017 14:32, Stefan Hajnoczi wrote:
> Signed-off-by: Stefan Hajnoczi 
> ---
>  block/nbd-client.h | 2 --
>  block/nbd.c| 2 --
>  2 files changed, 4 deletions(-)
> 
> diff --git a/block/nbd-client.h b/block/nbd-client.h
> index 8cdfc92..891ba44 100644
> --- a/block/nbd-client.h
> +++ b/block/nbd-client.h
> @@ -30,8 +30,6 @@ typedef struct NBDClientSession {
>  
>  Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
>  NBDReply reply;
> -
> -bool is_unix;
>  } NBDClientSession;
>  
>  NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
> diff --git a/block/nbd.c b/block/nbd.c
> index f478f80..1b832c2 100644
> --- a/block/nbd.c
> +++ b/block/nbd.c
> @@ -285,8 +285,6 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict 
> *options, Error **errp)
>  goto done;
>  }
>  
> -s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;
> -
>  done:
>  QDECREF(addr);
>  qobject_decref(crumpled_addr);
> 

Queued for 2.9, thanks.

Paolo

Re: [Qemu-devel] [PATCH v4 1/8] xen: import ring.h from xen

2017-03-27 Thread Juergen Gross

On 24/03/17 18:37, Stefano Stabellini wrote:
> On Fri, 24 Mar 2017, Juergen Gross wrote:
>> On 23/03/17 19:22, Stefano Stabellini wrote:
>>> On Thu, 23 Mar 2017, Paolo Bonzini wrote:
 On 23/03/2017 14:55, Juergen Gross wrote:
> On 23/03/17 14:00, Greg Kurz wrote:
>> On Mon, 20 Mar 2017 11:19:05 -0700
>> Stefano Stabellini  wrote:
>>
>>> Do not use the ring.h header installed on the system. Instead, import
>>> the header into the QEMU codebase. This avoids problems when QEMU is
>>> built against a Xen version too old to provide all the ring macros.
>>>
>>> Signed-off-by: Stefano Stabellini 
>>> Reviewed-by: Greg Kurz 
>>> CC: anthony.per...@citrix.com
>>> CC: jgr...@suse.com
>>> ---
>>> NB: The new macros have not been committed to Xen yet. Do not apply this
>>> patch until they do.
>>> ---
>>
>> Looking at your other series for the kernel part of this feature:
>>
>> https://lkml.org/lkml/2017/3/22/761
>>
>> I realize that the ring.h header from Xen also exists in the kernel 
>> tree... 
>>
>> Shouldn't all the code that can be used in both kernel and userspace go 
>> to a
>> header file under include/uapi in the kernel tree ? And then we would 
>> import
>> it under include/standard-headers/linux in the QEMU tree and we could 
>> keep it
>> in sync using scripts/update-linux-headers.sh.
>>
>> Cc'ing Paolo for insights.
>
> As Xen isn't part of the kernel we don't want that. You can use and/or
> build qemu with xen-9pfs backend support on an old Linux kernel without
> the related frontend.

 As long as the header changes rarely, I guess it's fine not to go
 through update-linux-headers.sh.
>>>
>>> Very rarely, last time ring.h was changed was 2015, and to introduce a
>>> new macro (which we don't necessarily need in QEMU).
>>>
>>>
> OTOH I don't see the advantage of not using the headers from Xen. This
> is working for qdisk and pvusb backends and for all the Xen libraries.
> Do you expect the 9pfs backend to be used for a qemu version built
> against a Xen version not supporting that backend?
>>>
>>> Yes, I think that is entirely possible: Xen and QEMU versions can mix
>>> and match.
>>>
>>> Keeping in mind that the 9pfs backend has actually no build dependencies
>>> on Xen, except for these new ring.h macros, we have the following
>>> options:
>>>
>>> 1) we build the 9pfs backend only for Xen >= 4.9, because of the new
>>>macros in ring.h that we need
>>
>> Right. You have sent 9pfs support patches for Xen tools. So obviously
>> you need a proper Xen version to use 9pfs. Why not build qemu against
>> it? Do you really expect a new Xen being used with an old qemu while
>> wanting to use new features? That makes no sense for me.
>  
> Tools support is needed to setup the frontend/backend connection as
> usual, but that's not a requirement for building the 9pfs backend. In
> fact, the backend doesn't need any tools support for it to work. The
> macro themselves are just a convenience - the backend would work just
> fine without them. Why restrict the QEMU build gratuitously?

You are duplicating a header without any real benefit I can see. This
is adding future work for keeping both versions of the header in sync.

In which scenario would you want qemu to support xen-9pfs without being
built against a Xen version supporting xen-9pfs?

I am not completely against copying the header, I just don't see an
advantage for any distro or user in doing it.


Juergen

Re: [Qemu-devel] What's the next QEMU version after 2.9 ? (or: when is a good point in time to get rid of old interfaces)

2017-03-27 Thread Peter Maydell

On 27 March 2017 at 13:01, Stefan Hajnoczi  wrote:
> It would be nice to get rid of the legacy -net option in 3.0.0.  I have
> added it and included pointers to loose ends.  I think this is doable
> but will require some time to achieve.

What's the syntax for using -netdev with embedded network
devices that you can't create with -device ?
(We should document this at
http://wiki.qemu-project.org/Documentation/Networking)

thanks
-- PMM

Re: [Qemu-devel] Deprecating the -net option (was: What's the next QEMU version after 2.9 ? (or: when is a good point in time to get rid of old interfaces))

2017-03-27 Thread Thomas Huth

On 27.03.2017 14:01, Stefan Hajnoczi wrote:
> On Mon, Mar 27, 2017 at 10:06:09AM +0200, Thomas Huth wrote:
>> On 24.03.2017 23:10, John Snow wrote:
>>>
>>>
>>> On 03/08/2017 03:26 AM, Thomas Huth wrote:

  Hi everybody,

 what will be the next version of QEMU after 2.9? Will we go for a 2.10
 (as I've seen it mentioned a couple of times on the mailing list
 already), or do we dare to switch to 3.0 instead?

 I personally dislike two-digit minor version numbers like 2.10 since the
 non-experienced users sometimes mix it up with 2.1 ... and there have
 been a couple of new cool features in the past releases that would
 justify a 3.0 now, too, I think.

 But anyway, the more important thing that keeps me concerned is: Someone
  once told me that we should get rid of old parameters and interfaces
 (like HMP commands) primarily only when we're changing to a new major
 version number. As you all know, QEMU has a lot of legacy options, which
 are likely rather confusing than helpful for the new users nowadays,
 e.g. things like the "-net channel" option (which is fortunately even
 hardly documented), but maybe also even the whole vlan/hub concept in
 the net code, or legacy parameters like "-usbdevice". If we switch to
 version 3.0, could we agree to remove at least some of them?

  Thomas

>>>
>>> As others have stated, we need a few releases to deprecate things first.
>>>
>>> Maybe we should develop a serious plan to develop some of our legacy
>>> interfaces first.
>>>
>>> Maybe 2.10 can introduce a list of things we want to deprecate,
>>> 2.11 can be the transition release,
>>> and then 3.0 can cut the cord and free of us our terrible burden?
>>>
>>> I have a list of things I want to axe...
>>
>> I've started a Wiki page with such a list here:
>>
>> http://wiki.qemu-project.org/Features/LegacyRemoval
> 
> It would be nice to get rid of the legacy -net option in 3.0.0.  I have
> added it and included pointers to loose ends.  I think this is doable
> but will require some time to achieve.

Not sure whether we really can get rid of the -net option completely,
since AFAIK it is the only way to configure on-board NICs at the moment,
and Paolo complains if he needs to type longer command lines
(https://lists.gnu.org/archive/html/qemu-devel/2015-12/msg02448.html).

But maybe we could get rid of the VLANs here at least, e.g. by matching
"-net nic" and a following "-net user|bridge|tap|..." with an internal
netdev ID instead of creating a "VLAN" hub?

Or we could even turn the -net option into a full "convenience" option
instead (similar to "-hda" and friends), so that you even do not have to
specify "-net nic" anymore but create both, network source and sink with
one "-net" statement, e.g.:

 qemu-system-xxx -net user,model=e1000,hostfwd=...

Just my 0.02 €

 Thomas

signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v2] KVM: pci-assign: do not map smm memory slot pages

2017-03-27 Thread hrg

On Mon, Mar 27, 2017 at 8:22 PM, Paolo Bonzini  wrote:
>
>
> On 27/03/2017 09:21, Herongguang (Stephen) wrote:
>> From f6f0ee6831488bef7af841cb86f3d85a04848fe5 Mon Sep 17 00:00:00 2001
>> From: herongguang 
>> Date: Mon, 27 Mar 2017 15:08:59 +0800
>> Subject: [PATCH] KVM: pci-assign: do not map smm memory slot pages
>>  in vt-d page table
>>
>> or VM memory are not put thus leaked in kvm_iommu_unmap_memslots() when
>> destroy VM.
>>
>> This is consistent with current vfio implementation.
>
> Oops, you forgot a Signed-off-by.

Sorry, I forget this, I'll resend it tomorrow since I am home now.

PS, what's your opinion about this
(http://www.spinics.net/lists/kvm/msg146914.html)?

>
> Paolo
>
>> ---
>>  virt/kvm/kvm_main.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index 482612b..9018d06 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -1052,7 +1052,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
>>   * changes) is disallowed above, so any other attribute changes
>> getting
>>   * here can be skipped.
>>   */
>> -if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
>> +if ((as_id == 0) && ((change == KVM_MR_CREATE) || (change ==
>> KVM_MR_MOVE))) {
>>  r = kvm_iommu_map_pages(kvm, &new);
>>  return r;
>>  }
>

Re: [Qemu-devel] Deprecating the -net option

2017-03-27 Thread Thomas Huth

On 27.03.2017 14:56, Thomas Huth wrote:
> On 27.03.2017 14:01, Stefan Hajnoczi wrote:
>> On Mon, Mar 27, 2017 at 10:06:09AM +0200, Thomas Huth wrote:
>>> On 24.03.2017 23:10, John Snow wrote:


 On 03/08/2017 03:26 AM, Thomas Huth wrote:
>
>  Hi everybody,
>
> what will be the next version of QEMU after 2.9? Will we go for a 2.10
> (as I've seen it mentioned a couple of times on the mailing list
> already), or do we dare to switch to 3.0 instead?
>
> I personally dislike two-digit minor version numbers like 2.10 since the
> non-experienced users sometimes mix it up with 2.1 ... and there have
> been a couple of new cool features in the past releases that would
> justify a 3.0 now, too, I think.
>
> But anyway, the more important thing that keeps me concerned is: Someone
>  once told me that we should get rid of old parameters and interfaces
> (like HMP commands) primarily only when we're changing to a new major
> version number. As you all know, QEMU has a lot of legacy options, which
> are likely rather confusing than helpful for the new users nowadays,
> e.g. things like the "-net channel" option (which is fortunately even
> hardly documented), but maybe also even the whole vlan/hub concept in
> the net code, or legacy parameters like "-usbdevice". If we switch to
> version 3.0, could we agree to remove at least some of them?
>
>  Thomas
>

 As others have stated, we need a few releases to deprecate things first.

 Maybe we should develop a serious plan to develop some of our legacy
 interfaces first.

 Maybe 2.10 can introduce a list of things we want to deprecate,
 2.11 can be the transition release,
 and then 3.0 can cut the cord and free of us our terrible burden?

 I have a list of things I want to axe...
>>>
>>> I've started a Wiki page with such a list here:
>>>
>>> http://wiki.qemu-project.org/Features/LegacyRemoval
>>
>> It would be nice to get rid of the legacy -net option in 3.0.0.  I have
>> added it and included pointers to loose ends.  I think this is doable
>> but will require some time to achieve.
> 
> Not sure whether we really can get rid of the -net option completely,
> since AFAIK it is the only way to configure on-board NICs at the moment,
> and Paolo complains if he needs to type longer command lines
> (https://lists.gnu.org/archive/html/qemu-devel/2015-12/msg02448.html).
> 
> But maybe we could get rid of the VLANs here at least, e.g. by matching
> "-net nic" and a following "-net user|bridge|tap|..." with an internal
> netdev ID instead of creating a "VLAN" hub?
> 
> Or we could even turn the -net option into a full "convenience" option
> instead (similar to "-hda" and friends), so that you even do not have to
> specify "-net nic" anymore but create both, network source and sink with
> one "-net" statement, e.g.:
> 
>  qemu-system-xxx -net user,model=e1000,hostfwd=...
> 
> Just my 0.02 €

... and I forgot to mention: We should at least try to get rid of the
options first that only work with -net (or rather the VLAN concept),
like "-net dump", "-net channel", "-tftp", "-smb", "-bootp" and
"-redir", since these will hinder us from doing further reworks /
clean-ups in this area. I think I've mentioned them all on the Wiki page
now, if somebody is aware of another legacy option here that does not
work with "-netdev" yet (apart from -net nic itself), please let me know.

 Thomas




signature.asc
Description: OpenPGP digital signature

[Qemu-devel] SPARC tcg backend bugs (was: Proposal for deprecating unsupported host OSes & architecutures)

2017-03-27 Thread Peter Maydell

On 24 March 2017 at 17:24, Peter Maydell  wrote:
> So far I have found that we seem to be mishandling 32-bit guest
> load/stores, which I tentatively suggest
>
> diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c
> index d1f4c0dead..c72b57dc58 100644
> --- a/tcg/sparc/tcg-target.inc.c
> +++ b/tcg/sparc/tcg-target.inc.c
> @@ -1119,7 +1119,7 @@ static void tcg_out_qemu_ld(TCGContext *s,
> TCGReg data, TCGReg addr,
>  /* Skip the high-part; we'll perform the extract in the trampoline.  
> */
>  param++;
>  }
> -tcg_out_mov(s, TCG_TYPE_REG, param++, addr);
> +tcg_out_mov(s, TCG_TYPE_REG, param++, addrz);
>
>  /* We use the helpers to extend SB and SW data, leaving the case
> of SL needing explicit extending below.  */
> @@ -1199,7 +1199,7 @@ static void tcg_out_qemu_st(TCGContext *s,
> TCGReg data, TCGReg addr,
>  /* Skip the high-part; we'll perform the extract in the trampoline.  
> */
>  param++;
>  }
> -tcg_out_mov(s, TCG_TYPE_REG, param++, addr);
> +tcg_out_mov(s, TCG_TYPE_REG, param++, addrz);
>  if (!SPARC64 && (memop & MO_SIZE) == MO_64) {
>  /* Skip the high-part; we'll perform the extract in the trampoline.  
> */
>  param++;
>
> (otherwise we pass a high-bits-set value to the slowpath functions,
> which happens to work if QEMU was built with debug enabled but not
> if it doesn't.)
>
> That then at least makes i386 and x86_64 guests behave the same,
> ie they don't work. I haven't figured out what's going wrong there yet.

I've tracked this down to a similar problem: our implementation
of byte and halfword stores doesn't correctly zero/sign extend
the data value before passing it to the C helper function, which
means that if QEMU was compiled with optimization enabled we can
end up writing incorrect values to memory. Not sure what the
best fix is, though -- I guess some sign/zero extend code in
the store trampolines, but what's the best sparc code sequence
for doing 8/16 bit extension?

thanks
-- PMM

[Qemu-devel] [PATCH] trace: fix tcg tracing build breakage

2017-03-27 Thread Stefan Hajnoczi

Commit 0ab8ed18a6fe98bfc82705b0f041fbf2a8ca5b60 ("trace: switch to
modular code generation for sub-directories") forgot to convert "tcg"
trace events to the modular code generation approach where each
sub-directory has its own trace-events file.

This patch fixes compilation for "tcg" trace events.  Currently they are
only used in the root ./trace-events file.

"tcg" trace events can only be used in the root ./trace-events file for
the time being.

Reported-by: Peter Maydell 
Suggested-by: Emilio G. Cota 
Signed-off-by: Stefan Hajnoczi 
---
 docs/tracing.txt |  3 +++
 trace/Makefile.objs  | 16 
 scripts/tracetool/format/tcg_h.py|  1 +
 scripts/tracetool/format/tcg_helper_c.py |  1 +
 4 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/docs/tracing.txt b/docs/tracing.txt
index e14bb6d..8c0029b 100644
--- a/docs/tracing.txt
+++ b/docs/tracing.txt
@@ -405,6 +405,9 @@ information. If used together with the "tcg" property, it 
adds a second
 "TCGv_env" argument that must point to the per-target global TCG register that
 points to the vCPU when guest code is executed (usually the "cpu_env" 
variable).
 
+The "tcg" and "vcpu" properties are currently only honored in the root
+./trace-events file.
+
 The following example events:
 
 foo(uint32_t a) "a=%x"
diff --git a/trace/Makefile.objs b/trace/Makefile.objs
index 1b8eb4a..afd571c 100644
--- a/trace/Makefile.objs
+++ b/trace/Makefile.objs
@@ -9,27 +9,27 @@ $(BUILD_DIR)/trace-events-all: $(trace-events-files)
 
 $(obj)/generated-helpers-wrappers.h: 
$(obj)/generated-helpers-wrappers.h-timestamp
@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-$(obj)/generated-helpers-wrappers.h-timestamp: $(trace-events-files) 
$(BUILD_DIR)/config-host.mak $(tracetool-y)
+$(obj)/generated-helpers-wrappers.h-timestamp: $(SRC_PATH)/trace-events 
$(BUILD_DIR)/config-host.mak $(tracetool-y)
$(call quiet-command,$(TRACETOOL) \
-   --group=all \
+   --group=root \
--format=tcg-helper-wrapper-h \
--backend=$(TRACE_BACKENDS) \
$< > $@,"GEN","$(patsubst %-timestamp,%,$@)")
 
 $(obj)/generated-helpers.h: $(obj)/generated-helpers.h-timestamp
@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-$(obj)/generated-helpers.h-timestamp: $(trace-events-files) 
$(BUILD_DIR)/config-host.mak $(tracetool-y)
+$(obj)/generated-helpers.h-timestamp: $(SRC_PATH)/trace-events 
$(BUILD_DIR)/config-host.mak $(tracetool-y)
$(call quiet-command,$(TRACETOOL) \
-   --group=all \
+   --group=root \
--format=tcg-helper-h \
--backend=$(TRACE_BACKENDS) \
$< > $@,"GEN","$(patsubst %-timestamp,%,$@)")
 
 $(obj)/generated-helpers.c: $(obj)/generated-helpers.c-timestamp
@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-$(obj)/generated-helpers.c-timestamp: $(trace-events-files) 
$(BUILD_DIR)/config-host.mak $(tracetool-y)
+$(obj)/generated-helpers.c-timestamp: $(SRC_PATH)/trace-events 
$(BUILD_DIR)/config-host.mak $(tracetool-y)
$(call quiet-command,$(TRACETOOL) \
-   --group=all \
+   --group=root \
--format=tcg-helper-c \
--backend=$(TRACE_BACKENDS) \
$< > $@,"GEN","$(patsubst %-timestamp,%,$@)")
@@ -41,9 +41,9 @@ target-obj-y += generated-helpers.o
 
 $(obj)/generated-tcg-tracers.h: $(obj)/generated-tcg-tracers.h-timestamp
@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-$(obj)/generated-tcg-tracers.h-timestamp: $(trace-events-files) 
$(BUILD_DIR)/config-host.mak $(tracetool-y)
+$(obj)/generated-tcg-tracers.h-timestamp: $(SRC_PATH)/trace-events 
$(BUILD_DIR)/config-host.mak $(tracetool-y)
$(call quiet-command,$(TRACETOOL) \
-   --group=all \
+   --group=root \
--format=tcg-h \
--backend=$(TRACE_BACKENDS) \
$< > $@,"GEN","$(patsubst %-timestamp,%,$@)")
diff --git a/scripts/tracetool/format/tcg_h.py 
b/scripts/tracetool/format/tcg_h.py
index 7ddc4a5..db55f52 100644
--- a/scripts/tracetool/format/tcg_h.py
+++ b/scripts/tracetool/format/tcg_h.py
@@ -40,6 +40,7 @@ def generate(events, backend, group):
 '#define TRACE_%s_GENERATED_TCG_TRACERS_H' % group.upper(),
 '',
 '#include "exec/helper-proto.h"',
+'#include "%s"' % header,
 '',
 )
 
diff --git a/scripts/tracetool/format/tcg_helper_c.py 
b/scripts/tracetool/format/tcg_helper_c.py
index 7dccd8c..ec7acbe 100644
--- a/scripts/tracetool/format/tcg_helper_c.py
+++ b/scripts/tracetool/format/tcg_helper_c.py
@@ -55,6 +55,7 @@ def generate(events, backend, group):
 '#include "qemu-common.h"',
 '#include "cpu.h"',
 '#include "exec/helper-proto.h"',
+'#include "%s"' % header,
 '',
 )
 
-- 
2.9.3

Re: [Qemu-devel] compile failure if I enable guest_mem_before trace event

2017-03-27 Thread Stefan Hajnoczi

On Thu, Mar 23, 2017 at 10:39:43PM -0400, Emilio G. Cota wrote:
> On Thu, Mar 23, 2017 at 19:08:11 +, Peter Maydell wrote:
> > Hi; I thought I'd have a look at the guest_mem_before trace event,
> > but if I enable it (by deleting "disable" from the line in trace-events)
> > QEMU doesn't compile:
> > 
> >   CC  arm-softmmu/tcg/tcg-op.o
> > In file included from
> > /home/petmay01/linaro/qemu-from-laptop/qemu/include/trace-tcg.h:4:0,
> >  from
> > /home/petmay01/linaro/qemu-from-laptop/qemu/tcg/tcg-op.c:31:
> > ../trace/generated-tcg-tracers.h: In function ‘trace_guest_mem_before_tcg’:
> > ../trace/generated-tcg-tracers.h:11:5: error: implicit declaration of
> > function ‘trace_guest_mem_before_trans’
> > [-Werror=implicit-function-declaration]
> >  trace_guest_mem_before_trans(__cpu, info);
> >  ^
> > ../trace/generated-tcg-tracers.h:11:5: error: nested extern
> > declaration of ‘trace_guest_mem_before_trans’ [-Werror=nested-externs]
> > 
> > Am I doing something wrong, or is this a bug?
> 
> It doesn't work for me either. I bisected it to:
> 
> 0ab8ed18 "trace: switch to modular code generation for sub-directories"
> 
> It seems that after that commit no appropriate include is added
> to the generated tcg tracing .h files. The 'header' variable isn't used
> in the generation scripts for TCG, which is suspicious, e.g.:
> 
> --- a/scripts/tracetool/format/tcg_h.py
> +++ b/scripts/tracetool/format/tcg_h.py
> @@ -28,13 +28,17 @@ def vcpu_transform_args(args):
> 
> 
>  def generate(events, backend, group):
> +if group == "root":
> +header = "trace-root.h"
> +else:
> +header = "trace.h"
> +
>  out('/* This file is autogenerated by tracetool, do not edit. */',
>  '/* You must include this file after the inclusion of helper.h */',
>  '',
>  '#ifndef TRACE_%s_GENERATED_TCG_TRACERS_H' % group.upper(),
>  '#define TRACE_%s_GENERATED_TCG_TRACERS_H' % group.upper(),
>  '',
> -'#include "trace.h"',
>  '#include "exec/helper-proto.h"',
>  '',
>  )
> 
> 
> The appended fixes it for me; I hope it's enough for the tracing
> people to come quickly to a proper fix (sorry, I didn't even know
> what the tracing features were only a few minutes ago!).

Thanks for looking into this.  I have sent a patch.

Stefan


signature.asc
Description: PGP signature

Re: [Qemu-devel] callout to *file in bdrv_co_get_block_status

2017-03-27 Thread Peter Lieven


Am 20.03.2017 um 17:56 schrieb Paolo Bonzini:


On 20/03/2017 17:43, Peter Lieven wrote:

Am 20.03.2017 um 15:05 schrieb Paolo Bonzini:

On 20/03/2017 14:35, Peter Lieven wrote:

Am 20.03.2017 um 14:23 schrieb Paolo Bonzini:

On 20/03/2017 14:13, Peter Lieven wrote:

Am 20.03.2017 um 13:47 schrieb Peter Lieven:

commit 5daa74a6ebce7543aaad178c4061dc087bb4c705
Author: Paolo Bonzini 
Date:   Wed Sep 4 19:00:38 2013 +0200

 block: look for zero blocks in bs->file

 Reviewed-by: Eric Blake 

 Signed-off-by: Paolo Bonzini 
 Signed-off-by: Stefan Hajnoczi 


It was introduced while introducing bdv_get_block_status. I don't know what the 
real

issue was that was addressed with this patch?

Is it possible that this optimization was added especially for RAW? I was 
believing that
raw would forward the get_block_status call to bs->file, but it looks it 
doesn't.
If this one here was for RAW would it be an option to move this callout to the 
raw-format driver
and remove it from the generic code?

It was meant for both raw and qcow2.

Okay, but as Fam mentioned qcow2 Metadata should know that a cluster is zero. 
Do you remember
what the issue was?

I said that already---preallocated metadata.  Also, at the time
pre-qcow2v3 was more important.

Yes, but Fam said that with preallocated metadata the clusters should be zero, 
or was that
not true before qcow2v3?

Zero clusters didn't exist before qcow2v3 I think.


Are you using libiscsi, block devices or files?

Its a mixture. raw with libiscsi or lvm and qcow2 and vmdk either with libnfs 
or on local storage.

I stumbled across the issue with lseek on a tmpfs because in the build process 
for our templates
I temporarily have vmdks on a tmpfs and it takes ages before qemu-img convert 
starts to run (it iterates
over every 64kb cluster with that callout to find_allocation and for some 
reason lseek is very slow on tmpfs).

Ok, thanks.  Perhaps it's worth benchmarking tmpfs specifically.  Apart
from the system call overhead (which does not really matter if you're
going to do a read), lseek on other filesystems should not be any slower
than read.


Okay, but the even the read is not really necessary if the metadata is correct?
Would it be an idea to introduce an inverse flag live BDRV_BLOCK_NOT_ZERO for
cases where we know that there is really DATA and thus can avoid the second 
callout?

Peter

Re: [Qemu-devel] GSoC 2017 Proposal: TCG performance enhancements

2017-03-27 Thread Alex Bennée


Richard Henderson  writes:

> On 03/26/2017 02:52 AM, Pranith Kumar wrote:
>> Hello,
>>

>
>> Please let me know if you have any comments or suggestions. Also please let 
>> me
>> know if there are other enhancements that are easily implementable to 
>> increase
>> TCG performance as part of this project or otherwise.
>
> I think it would be interesting to place TranslationBlock structures
> into the same memory block as code_gen_buffer, immediately before the
> code that implements the TB.
>
> Consider what happens within every TB:
>
> (1) We have one or more references to the TB address, via exit_tb.
>
> For aarch64, this will normally require 2-4 insns.
>
> # alpha-softmmu
> 0x7f75152114:  d0ffb320  adrp x0, #-0x99a000 (addr 0x7f747b8000)
> 0x7f75152118:  91004c00  add x0, x0, #0x13 (19)
> 0x7f7515211c:  17c3  b #-0xf4 (addr 0x7f75152028)
>
> # alpha-linux-user
> 0x00569500:  d2800260  mov x0, #0x13
> 0x00569504:  f2b59820  movk x0, #0xacc1, lsl #16
> 0x00569508:  f2c00fe0  movk x0, #0x7f, lsl #32
> 0x0056950c:  17df  b #-0x84 (addr 0x569488)
>
> We would reduce this to one insn, always, if the TB were close by,
> since the ADR instruction has a range of 1MB.

Having a TB address statically addressable from the generated code would
also be very handy for doing things like rough block execution counts
(or even precise if you want to go through the atomic penalty for it).

It would be nice for future work to be able to track where our hot-paths
are through generated code.

>
>
> (2) We have zero to two references to a linked TB, via goto_tb.
>
> Your stated goal above for eliminating the code_gen_buffer maximum of
> 128MB can be done in two ways.
>
> (2A) Raise the maximum to 2GB.  For this we would align an instruction
> pair, adrp+add, to compute the address; the following insn would
> branch.  The update code would write a new destination by modifing the
> adrp+add with a single 64-bit store.
>
> (2B) Eliminate the maximum altogether by referencing the destination
> directly in the TB.  This is the !USE_DIRECT_JUMP path.  It is
> normally not used on 64-bit targets because computing the full 64-bit
> address of the TB is harder, or just as hard, as computing the full
> 64-bit address of the destination.
>
> However, if the TB is nearby, aarch64 can load the address from
> TB.jmp_target_addr in one insn, with LDR (literal).  This pc-relative
> load also has a 1MB range.
>
> This has the side benefit that it is much quicker to re-link TBs, both
> in the computation of the code for the destination as well as
> re-flushing the icache.
>
>
> In addition, I strongly suspect the 1,342,177 entries (153MB) that we
> currently allocate for tcg_ctx.tb_ctx.tbs, given a 512MB
> code_gen_buffer, is excessive.
>
> If we co-allocate the TB and the code, then we get exactly the right
> number of TBs allocated with no further effort.
>
> There will be some additional memory wastage, since we'll want to keep
> the code and the data in different cache lines and that means padding,
> but I don't think that'll be significant.  Indeed, given the above
> over-allocation will probably still be a net savings.
>
>
> r~


--
Alex Bennée

[Qemu-devel] [PATCH] spapr: fix memory hot-unplugging

2017-03-27 Thread Laurent Vivier

If, once the kernel has booted, we try to remove a memory
hotplugged while the kernel was not started, QEMU crashes on
an assert:

qemu-system-ppc64: hw/virtio/vhost.c:651:
   vhost_commit: Assertion `r >= 0' failed.
...
#4  in vhost_commit
#5  in memory_region_transaction_commit
#6  in pc_dimm_memory_unplug
#7  in spapr_memory_unplug
#8  spapr_machine_device_unplug
#9  in hotplug_handler_unplug
#10 in spapr_lmb_release
#11 in detach
#12 in set_allocation_state
#13 in rtas_set_indicator
...

If we take a closer look to the guest kernel log, we can see when
we try to unplug the memory:

pseries-hotplug-mem: Attempting to hot-add 4 LMB(s)

What happens:

1- The kernel has ignored the memory hotplug event because
   it was not started when it was generated.

2- When we hot-unplug the memory,
   QEMU starts to remove the memory,
generates an hot-unplug event,
and signals the kernel of the incoming new event

3- as the kernel is started, on the QEMU signal, it reads
   the event list, decodes the hotplug event and tries to
   finish the hotplugging.

4- QEMU receives the hotplug notification while it
   is trying to hot-unplug the memory. This moves the memory
   DRC to an invalid state

This patch prevents this by not allowing to set the allocation
state to USABLE while the DRC is awaiting release and allocation.

RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1432382

Signed-off-by: Laurent Vivier 
---
 hw/ppc/spapr_drc.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index 150f6bf..377ea65 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -135,6 +135,12 @@ static uint32_t set_allocation_state(sPAPRDRConnector *drc,
 if (!drc->dev) {
 return RTAS_OUT_NO_SUCH_INDICATOR;
 }
+if (drc->awaiting_release && drc->awaiting_allocation) {
+/* kernel is acknowledging a previous hotplug event
+ * while we are already removing it.
+ */
+return RTAS_OUT_NO_SUCH_INDICATOR;
+}
 }
 
 if (drc->type != SPAPR_DR_CONNECTOR_TYPE_PCI) {
-- 
2.9.3

[Qemu-devel] [PATCH RFC v3 for-2.9 06/11] rbd: Clean up runtime_opts, fix -drive to reject filename

2017-03-27 Thread Markus Armbruster

runtime_opts is used for three different purposes:

* qemu_rbd_open() uses it to accept options it recognizes, such as
  "pool" and "image".  Other .bdrv_open() methods do it similarly.

* qemu_rbd_open() accepts additional list-valued options
  auth-supported and server, with the help of qemu_rbd_array_opts().
  The list elements are again dictionaries.  qemu_rbd_array_opts()
  uses runtime_opts to accept their members.  Thus, runtime_opts
  contains recognized sub-sub-options "auth", "host", "port" in
  addition to recognized options.  No other block driver does that.

* qemu_rbd_create() uses it to convert the QDict produced by
  qemu_rbd_parse_filename() to QemuOpts.  No other block driver does
  that.  The keys produced by qemu_rbd_parse_filename() are "pool",
  "image", "snapshot", "conf", "user" and "keyvalue-pairs".
  qemu_rbd_open() accepts these, so no additional ones here.

This is a confusing mess.  Dates back to commit 0f9d252.  First step
to clean it up is documenting runtime_opts.desc[]:

* Reorder entries to match the QAPI schema, like we do in other block
  drivers.

* Document why the schema's "server" and "auth-supported" aren't in
  .desc[].

* Document why "keyvalue-pairs", "host", "port" and "auth" are in
  .desc[], but not the schema.

* Delete "filename", because none of the three users actually uses it.
  This fixes -drive to reject parameter filename instead of silently
  ignoring it.

Signed-off-by: Markus Armbruster 
Reviewed-by: Eric Blake 
---
 block/rbd.c | 40 +---
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index 2632533..b2afe07 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -294,21 +294,6 @@ static QemuOptsList runtime_opts = {
 .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
 .desc = {
 {
-.name = "filename",
-.type = QEMU_OPT_STRING,
-.help = "Specification of the rbd image",
-},
-{
-.name = "password-secret",
-.type = QEMU_OPT_STRING,
-.help = "ID of secret providing the password",
-},
-{
-.name = "conf",
-.type = QEMU_OPT_STRING,
-.help = "Rados config file location",
-},
-{
 .name = "pool",
 .type = QEMU_OPT_STRING,
 .help = "Rados pool name",
@@ -319,6 +304,11 @@ static QemuOptsList runtime_opts = {
 .help = "Image name in the pool",
 },
 {
+.name = "conf",
+.type = QEMU_OPT_STRING,
+.help = "Rados config file location",
+},
+{
 .name = "snapshot",
 .type = QEMU_OPT_STRING,
 .help = "Ceph snapshot name",
@@ -329,6 +319,19 @@ static QemuOptsList runtime_opts = {
 .type = QEMU_OPT_STRING,
 .help = "Rados id name",
 },
+/*
+ * server.* and auth-supported.* extracted manually, see
+ * qemu_rbd_array_opts()
+ */
+{
+.name = "password-secret",
+.type = QEMU_OPT_STRING,
+.help = "ID of secret providing the password",
+},
+
+/*
+ * Keys for qemu_rbd_parse_filename(), not in the QAPI schema
+ */
 {
 /*
  * HACK: name starts with '=' so that qemu_opts_parse()
@@ -338,6 +341,13 @@ static QemuOptsList runtime_opts = {
 .type = QEMU_OPT_STRING,
 .help = "Legacy rados key/value option parameters",
 },
+
+/*
+ * The remainder aren't option keys, but option sub-sub-keys,
+ * so that qemu_rbd_array_opts() can abuse runtime_opts for
+ * its own purposes
+ * TODO clean this up
+ */
 {
 .name = "host",
 .type = QEMU_OPT_STRING,
-- 
2.7.4

[Qemu-devel] [PATCH RFC v3 for-2.9 00/11] rbd: Clean up API and code

2017-03-27 Thread Markus Armbruster

This is RFC because it needs more testing.  I'm sending it out now in
the hope of getting some review while we test.

v3:
* PATCH 01-07 unchanged except for a doc tweak in PATCH 06
* PATCH 08-10 replace PATCH 9
* PATCH 8 becomes PATCH 11, rebased on top of 08-10, commit message
  updated, R-by dropped

Markus Armbruster (11):
  rbd: Reject -blockdev server.*.{numeric,to,ipv4,ipv6}
  rbd: Fix to cleanly reject -drive without pool or image
  rbd: Don't limit length of parameter values
  rbd: Clean up after the previous commit
  rbd: Don't accept -drive driver=rbd,keyvalue-pairs=...
  rbd: Clean up runtime_opts, fix -drive to reject filename
  rbd: Clean up qemu_rbd_create()'s detour through QemuOpts
  rbd: Revert -blockdev and -drive parameter auth-supported
  rbd: Revert -blockdev parameter password-secret
  Revert "rbd: add support for getting password from QCryptoSecret
object"
  rbd: Fix bugs around -drive parameter "server"

 block/rbd.c  | 356 ---
 qapi-schema.json |  21 ++-
 qapi/block-core.json |  29 +
 3 files changed, 95 insertions(+), 311 deletions(-)

-- 
2.7.4

[Qemu-devel] [PATCH RFC v3 for-2.9 09/11] rbd: Revert -blockdev parameter password-secret

2017-03-27 Thread Markus Armbruster

This reverts a part of commit 8a47e8e.  We're having second thoughts
on the QAPI schema (and thus the external interface), and haven't
reached consensus, yet.  Issues include:

* BlockdevOptionsRbd member @password-secret isn't actually a
  password, it's a key generated by Ceph.

* We're not sure where member @password-secret belongs (see the
  previous commit).

* How @password-secret interacts with settings from a configuration
  file specified with @conf is undocumented.  I suspect it's untested,
  too.

Let's avoid painting ourselves into a corner now, and revert the
feature for 2.9.

Note that users can still configure an authentication key with a
configuration file.  They probably do that anyway if they use Ceph
outside QEMU as well.

Signed-off-by: Markus Armbruster 
---
 qapi/block-core.json | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 6a7ca0b..2e60ab5 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2618,9 +2618,6 @@
 # @server: Monitor host address and port.  This maps
 #  to the "mon_host" Ceph option.
 #
-# @password-secret:The ID of a QCryptoSecret object providing
-#  the password for the login.
-#
 # Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsRbd',
-- 
2.7.4

[Qemu-devel] [PATCH RFC v3 for-2.9 04/11] rbd: Clean up after the previous commit

2017-03-27 Thread Markus Armbruster

This code in qemu_rbd_parse_filename()

found_str = qemu_rbd_next_tok(p, '\0', &p);
p = found_str;

has no effect.  Drop it, and simplify qemu_rbd_next_tok().

Signed-off-by: Markus Armbruster 
Reviewed-by: Eric Blake 
---
 block/rbd.c | 24 +---
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index 0fea348..182a5a3 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -104,19 +104,17 @@ static char *qemu_rbd_next_tok(char *src, char delim, 
char **p)
 
 *p = NULL;
 
-if (delim != '\0') {
-for (end = src; *end; ++end) {
-if (*end == delim) {
-break;
-}
-if (*end == '\\' && end[1] != '\0') {
-end++;
-}
-}
+for (end = src; *end; ++end) {
 if (*end == delim) {
-*p = end + 1;
-*end = '\0';
+break;
 }
+if (*end == '\\' && end[1] != '\0') {
+end++;
+}
+}
+if (*end == delim) {
+*p = end + 1;
+*end = '\0';
 }
 return src;
 }
@@ -177,10 +175,6 @@ static void qemu_rbd_parse_filename(const char *filename, 
QDict *options,
 goto done;
 }
 
-found_str = qemu_rbd_next_tok(p, '\0', &p);
-
-p = found_str;
-
 /* The following are essentially all key/value pairs, and we treat
  * 'id' and 'conf' a bit special.  Key/value pairs may be in any order. */
 while (p) {
-- 
2.7.4

[Qemu-devel] [PATCH RFC v3 for-2.9 03/11] rbd: Don't limit length of parameter values

2017-03-27 Thread Markus Armbruster

We laboriously enforce parameter values are between one and some
arbitrary limit in length.  Only RBD_MAX_IMAGE_NAME_SIZE comes from
librbd.h, and I'm not sure it applies.  Where the other limits come
from is unclear.

Drop the length checking.  The limits librbd actually imposes must be
checked by librbd anyway.

There's one minor complication: BDRVRBDState member name is a
fixed-size array.  Depends on the length limit.  Make it a pointer to
a dynamically allocated string.

Signed-off-by: Markus Armbruster 
Reviewed-by: Eric Blake 
---
 block/rbd.c | 91 ++---
 1 file changed, 14 insertions(+), 77 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index 5ba2a87..0fea348 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -56,11 +56,6 @@
 
 #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)
 
-#define RBD_MAX_CONF_NAME_SIZE 128
-#define RBD_MAX_CONF_VAL_SIZE 512
-#define RBD_MAX_CONF_SIZE 1024
-#define RBD_MAX_POOL_NAME_SIZE 128
-#define RBD_MAX_SNAP_NAME_SIZE 128
 #define RBD_MAX_SNAPS 100
 
 /* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */
@@ -99,16 +94,12 @@ typedef struct BDRVRBDState {
 rados_t cluster;
 rados_ioctx_t io_ctx;
 rbd_image_t image;
-char name[RBD_MAX_IMAGE_NAME_SIZE];
+char *name;
 char *snap;
 } BDRVRBDState;
 
-static char *qemu_rbd_next_tok(int max_len,
-   char *src, char delim,
-   const char *name,
-   char **p, Error **errp)
+static char *qemu_rbd_next_tok(char *src, char delim, char **p)
 {
-int l;
 char *end;
 
 *p = NULL;
@@ -127,15 +118,6 @@ static char *qemu_rbd_next_tok(int max_len,
 *end = '\0';
 }
 }
-l = strlen(src);
-if (l >= max_len) {
-error_setg(errp, "%s too long", name);
-return NULL;
-} else if (l == 0) {
-error_setg(errp, "%s too short", name);
-return NULL;
-}
-
 return src;
 }
 
@@ -159,7 +141,6 @@ static void qemu_rbd_parse_filename(const char *filename, 
QDict *options,
 char *p, *buf, *keypairs;
 char *found_str;
 size_t max_keypair_size;
-Error *local_err = NULL;
 
 if (!strstart(filename, "rbd:", &start)) {
 error_setg(errp, "File name must start with 'rbd:'");
@@ -171,11 +152,7 @@ static void qemu_rbd_parse_filename(const char *filename, 
QDict *options,
 keypairs = g_malloc0(max_keypair_size);
 p = buf;
 
-found_str = qemu_rbd_next_tok(RBD_MAX_POOL_NAME_SIZE, p,
-  '/', "pool name", &p, &local_err);
-if (local_err) {
-goto done;
-}
+found_str = qemu_rbd_next_tok(p, '/', &p);
 if (!p) {
 error_setg(errp, "Pool name is required");
 goto done;
@@ -184,27 +161,15 @@ static void qemu_rbd_parse_filename(const char *filename, 
QDict *options,
 qdict_put(options, "pool", qstring_from_str(found_str));
 
 if (strchr(p, '@')) {
-found_str = qemu_rbd_next_tok(RBD_MAX_IMAGE_NAME_SIZE, p,
-  '@', "object name", &p, &local_err);
-if (local_err) {
-goto done;
-}
+found_str = qemu_rbd_next_tok(p, '@', &p);
 qemu_rbd_unescape(found_str);
 qdict_put(options, "image", qstring_from_str(found_str));
 
-found_str = qemu_rbd_next_tok(RBD_MAX_SNAP_NAME_SIZE, p,
-  ':', "snap name", &p, &local_err);
-if (local_err) {
-goto done;
-}
+found_str = qemu_rbd_next_tok(p, ':', &p);
 qemu_rbd_unescape(found_str);
 qdict_put(options, "snapshot", qstring_from_str(found_str));
 } else {
-found_str = qemu_rbd_next_tok(RBD_MAX_IMAGE_NAME_SIZE, p,
-  ':', "object name", &p, &local_err);
-if (local_err) {
-goto done;
-}
+found_str = qemu_rbd_next_tok(p, ':', &p);
 qemu_rbd_unescape(found_str);
 qdict_put(options, "image", qstring_from_str(found_str));
 }
@@ -212,11 +177,7 @@ static void qemu_rbd_parse_filename(const char *filename, 
QDict *options,
 goto done;
 }
 
-found_str = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p,
-  '\0', "configuration", &p, &local_err);
-if (local_err) {
-goto done;
-}
+found_str = qemu_rbd_next_tok(p, '\0', &p);
 
 p = found_str;
 
@@ -224,12 +185,7 @@ static void qemu_rbd_parse_filename(const char *filename, 
QDict *options,
  * 'id' and 'conf' a bit special.  Key/value pairs may be in any order. */
 while (p) {
 char *name, *value;
-name = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p,
- '=', "conf option name", &p, &local_err);
-if (local_err) {
-break;
-}
-
+name = qemu_rbd_next_tok(p, '=', &p);
 if (!p) {
 error_setg(errp, "c

Re: [Qemu-devel] [PATCH] Create libqemutrace.a for all trace.o

2017-03-27 Thread Stefan Hajnoczi

On Fri, Mar 24, 2017 at 04:28:32PM +, Xu, Anthony wrote:
> Create libqemutrace.a for all trace.o
> Currently all trace.o are linked into qemu-system, qemu-img, 
> qemu-nbd, qemu-io etc., even the corresponding components 
> are not included.
> Create a libqemutrace.a that the linker would only pull in .o 
> files containing symbols that are actually referenced by the 
> program.
> ./trace.o, ./qapi/trace.o and ./util/trace.o are added into 
> libqemuutil.a  to avoid recursive dependencies between 
> libqemuutil.a and libqemutrace.a.

Why would libqemutrace.a depend on libqemuutil.a?

Tracing code shouldn't call other QEMU code.  That would could create
infinite recursion when a trace event is fired.


signature.asc
Description: PGP signature

[Qemu-devel] [PATCH RFC v3 for-2.9 08/11] rbd: Revert -blockdev and -drive parameter auth-supported

2017-03-27 Thread Markus Armbruster

This reverts half of commit 0a55679.  We're having second thoughts on
the QAPI schema (and thus the external interface), and haven't reached
consensus, yet.  Issues include:

* The implementation uses deprecated rados_conf_set() key
  "auth_supported".  No biggie.

* The implementation makes -drive silently ignore invalid parameters
  "auth" and "auth-supported.*.X" where X isn't "auth".  Fixable (in
  fact I'm going to fix similar bugs around parameter server), so
  again no biggie.

* BlockdevOptionsRbd member @password-secret applies only to
  authentication method cephx.  Should it be a variant member of
  RbdAuthMethod?

* BlockdevOptionsRbd member @user could apply to both methods cephx
  and none, but I'm not sure it's actually used with none.  If it
  isn't, should it be a variant member of RbdAuthMethod?

* The client offers a *set* of authentication methods, not a list.
  Should the methods be optional members of BlockdevOptionsRbd instead
  of members of list @auth-supported?  The latter begs the question
  what multiple entries for the same method mean.  Trivial question
  now that RbdAuthMethod contains nothing but @type, but less so when
  RbdAuthMethod acquires other members, such the ones discussed above.

* How BlockdevOptionsRbd member @auth-supported interacts with
  settings from a configuration file specified with @conf is
  undocumented.  I suspect it's untested, too.

Let's avoid painting ourselves into a corner now, and revert the
feature for 2.9.

Note that users can still configure authentication methods with a
configuration file.  They probably do that anyway if they use Ceph
outside QEMU as well.

qemu_rbd_array_opts()'s parameter @type now must be RBD_MON_HOST,
which is silly.  This will be cleaned up shortly.

Signed-off-by: Markus Armbruster 
---
 block/rbd.c  | 31 +++
 qapi/block-core.json | 24 
 2 files changed, 3 insertions(+), 52 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index cf0bab0..103ce44 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -320,8 +320,7 @@ static QemuOptsList runtime_opts = {
 .help = "Rados id name",
 },
 /*
- * server.* and auth-supported.* extracted manually, see
- * qemu_rbd_array_opts()
+ * server.* extracted manually, see qemu_rbd_array_opts()
  */
 {
 .name = "password-secret",
@@ -356,11 +355,6 @@ static QemuOptsList runtime_opts = {
 .name = "port",
 .type = QEMU_OPT_STRING,
 },
-{
-.name = "auth",
-.type = QEMU_OPT_STRING,
-.help = "Supported authentication method, either cephx or none",
-},
 { /* end of list */ }
 },
 };
@@ -512,7 +506,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
 }
 
 #define RBD_MON_HOST  0
-#define RBD_AUTH_SUPPORTED1
 
 static char *qemu_rbd_array_opts(QDict *options, const char *prefix, int type,
  Error **errp)
@@ -527,7 +520,7 @@ static char *qemu_rbd_array_opts(QDict *options, const char 
*prefix, int type,
 Error *local_err = NULL;
 int i;
 
-assert(type == RBD_MON_HOST || type == RBD_AUTH_SUPPORTED);
+assert(type == RBD_MON_HOST);
 
 num_entries = qdict_array_entries(options, prefix);
 
@@ -573,10 +566,9 @@ static char *qemu_rbd_array_opts(QDict *options, const 
char *prefix, int type,
 value = strbuf;
 }
 } else {
-value = qemu_opt_get(opts, "auth");
+abort();
 }
 
-
 /* each iteration in the for loop will build upon the string, and if
  * rados_str is NULL then it is our first pass */
 if (rados_str) {
@@ -608,7 +600,6 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict 
*options, int flags,
 QemuOpts *opts;
 Error *local_err = NULL;
 char *mon_host = NULL;
-char *auth_supported = NULL;
 int r;
 
 opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
@@ -619,14 +610,6 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict 
*options, int flags,
 return -EINVAL;
 }
 
-auth_supported = qemu_rbd_array_opts(options, "auth-supported.",
- RBD_AUTH_SUPPORTED, &local_err);
-if (local_err) {
-error_propagate(errp, local_err);
-r = -EINVAL;
-goto failed_opts;
-}
-
 mon_host = qemu_rbd_array_opts(options, "server.",
RBD_MON_HOST, &local_err);
 if (local_err) {
@@ -678,13 +661,6 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict 
*options, int flags,
 }
 }
 
-if (auth_supported) {
-r = rados_conf_set(s->cluster, "auth_supported", auth_supported);
-if (r < 0) {
-goto failed_shutdown;
-}
-}
-
 if (qemu_rbd_set_auth(s->cluster, secretid, errp) < 0) {
 r = -EIO;
 goto failed_shutdown;

[Qemu-devel] [PATCH RFC v3 for-2.9 01/11] rbd: Reject -blockdev server.*.{numeric, to, ipv4, ipv6}

2017-03-27 Thread Markus Armbruster

We use InetSocketAddress in the QAPI schema.  However, the code
doesn't use inet_connect_saddr(), but formats "host" and "port" into a
configuration string for rados_conf_set().  Thus, members "numeric",
"to", "ipv4" and "ipv6" are silently ignored.  Not nice.  Example:

-blockdev 
rbd,node-name=nn,pool=p,image=i,server.0.host=h0,server.0.port=12345,server.0.ipv4=off

Factor a suitable InetSocketAddressBase out of InetSocketAddress, and
use that.  "numeric", "to", "ipv4" and "ipv6" are now rejected.

Signed-off-by: Markus Armbruster 
Reviewed-by: Eric Blake 
---
 qapi-schema.json | 21 ++---
 qapi/block-core.json |  2 +-
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/qapi-schema.json b/qapi-schema.json
index 68a4327..b921994 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4051,19 +4051,27 @@
   'data': [ 'all', 'rx', 'tx' ] }
 
 ##
+# @InetSocketAddressBase:
+#
+# @host: host part of the address
+# @port: port part of the address
+##
+{ 'struct': 'InetSocketAddressBase',
+  'data': {
+'host': 'str',
+'port': 'str' } }
+
+##
 # @InetSocketAddress:
 #
 # Captures a socket address or address range in the Internet namespace.
 #
-# @host: host part of the address
-#
-# @port: port part of the address, or lowest port if @to is present
-#
 # @numeric: true if the host/port are guaranteed to be numeric,
 #   false if name resolution should be attempted. Defaults to false.
 #   (Since 2.9)
 #
-# @to: highest port to try
+# @to: If present, this is range of possible addresses, with port
+#  between @port and @to.
 #
 # @ipv4: whether to accept IPv4 addresses, default try both IPv4 and IPv6
 #
@@ -4072,9 +4080,8 @@
 # Since: 1.3
 ##
 { 'struct': 'InetSocketAddress',
+  'base': 'InetSocketAddressBase',
   'data': {
-'host': 'str',
-'port': 'str',
 '*numeric':  'bool',
 '*to': 'uint16',
 '*ipv4': 'bool',
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 0f132fc..5d2efe4 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2652,7 +2652,7 @@
 '*conf': 'str',
 '*snapshot': 'str',
 '*user': 'str',
-'*server': ['InetSocketAddress'],
+'*server': ['InetSocketAddressBase'],
 '*auth-supported': ['RbdAuthMethod'],
 '*password-secret': 'str' } }
 
-- 
2.7.4

[Qemu-devel] [PATCH RFC v3 for-2.9 05/11] rbd: Don't accept -drive driver=rbd, keyvalue-pairs=...

2017-03-27 Thread Markus Armbruster

The way we communicate extra key-value pairs from
qemu_rbd_parse_filename() to qemu_rbd_open() exposes option parameter
"keyvalue-pairs" on the command line.  It's not wanted there.  Hack:
rename the parameter to "=keyvalue-pairs" to make it inaccessible.

Signed-off-by: Markus Armbruster 
Reviewed-by: Eric Blake 
---
 block/rbd.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index 182a5a3..2632533 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -215,7 +215,7 @@ static void qemu_rbd_parse_filename(const char *filename, 
QDict *options,
 }
 
 if (keypairs[0]) {
-qdict_put(options, "keyvalue-pairs", qstring_from_str(keypairs));
+qdict_put(options, "=keyvalue-pairs", qstring_from_str(keypairs));
 }
 
 
@@ -330,7 +330,11 @@ static QemuOptsList runtime_opts = {
 .help = "Rados id name",
 },
 {
-.name = "keyvalue-pairs",
+/*
+ * HACK: name starts with '=' so that qemu_opts_parse()
+ * can't set it
+ */
+.name = "=keyvalue-pairs",
 .type = QEMU_OPT_STRING,
 .help = "Legacy rados key/value option parameters",
 },
@@ -405,7 +409,7 @@ static int qemu_rbd_create(const char *filename, QemuOpts 
*opts, Error **errp)
 conf   = qemu_opt_get(rbd_opts, "conf");
 clientname = qemu_opt_get(rbd_opts, "user");
 name   = qemu_opt_get(rbd_opts, "image");
-keypairs   = qemu_opt_get(rbd_opts, "keyvalue-pairs");
+keypairs   = qemu_opt_get(rbd_opts, "=keyvalue-pairs");
 
 ret = rados_create(&cluster, clientname);
 if (ret < 0) {
@@ -638,7 +642,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict 
*options, int flags,
 snap   = qemu_opt_get(opts, "snapshot");
 clientname = qemu_opt_get(opts, "user");
 name   = qemu_opt_get(opts, "image");
-keypairs   = qemu_opt_get(opts, "keyvalue-pairs");
+keypairs   = qemu_opt_get(opts, "=keyvalue-pairs");
 
 if (!pool || !name) {
 error_setg(errp, "Parameters 'pool' and 'image' are required");
-- 
2.7.4

[Qemu-devel] [PATCH RFC v3 for-2.9 10/11] Revert "rbd: add support for getting password from QCryptoSecret object"

2017-03-27 Thread Markus Armbruster

This reverts commit 60390a2192e7b38aee18db6ce7fb740498709737.

The commit's rationale

Currently RBD passwords must be provided on the command line
via

  $QEMU -drive file=rbd:pool/image:id=myname:\
   key=QVFDVm41aE82SHpGQWhBQXEwTkN2OGp0SmNJY0UrSE9CbE1RMUE=:\
   auth_supported=cephx

This is insecure because the key is visible in the OS process
listing.

is invalid.  You can easily avoid passing keys on the command line by
using "keyfile" instead of "key".  In fact, the Ceph documentation
calls use of key "not recommended".  But the most common way to
provide keys is a keyring.  The default keyrings should be just fine
for most users.  When they aren't, you can configure your own keyrings
with "keyring" or override the key with "keyfile".

The commit adds parameter password-secret to -drive.  Support for it
was included in -blockdev, but reverted in the previous commit due to
concerns about the QMP interface.  Revert it from -drive, too.

Cc: Daniel P. Berrange 
Signed-off-by: Markus Armbruster 
---
 block/rbd.c | 47 ---
 1 file changed, 47 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index 103ce44..5a58d3e 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -16,7 +16,6 @@
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "block/block_int.h"
-#include "crypto/secret.h"
 #include "qemu/cutils.h"
 #include "qapi/qmp/qstring.h"
 
@@ -225,26 +224,6 @@ done:
 return;
 }
 
-
-static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
- Error **errp)
-{
-if (secretid == 0) {
-return 0;
-}
-
-gchar *secret = qcrypto_secret_lookup_as_base64(secretid,
-errp);
-if (!secret) {
-return -1;
-}
-
-rados_conf_set(cluster, "key", secret);
-g_free(secret);
-
-return 0;
-}
-
 static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs,
  Error **errp)
 {
@@ -322,11 +301,6 @@ static QemuOptsList runtime_opts = {
 /*
  * server.* extracted manually, see qemu_rbd_array_opts()
  */
-{
-.name = "password-secret",
-.type = QEMU_OPT_STRING,
-.help = "ID of secret providing the password",
-},
 
 /*
  * Keys for qemu_rbd_parse_filename(), not in the QAPI schema
@@ -366,14 +340,11 @@ static int qemu_rbd_create(const char *filename, QemuOpts 
*opts, Error **errp)
 int64_t objsize;
 int obj_order = 0;
 const char *pool, *name, *conf, *clientname, *keypairs;
-const char *secretid;
 rados_t cluster;
 rados_ioctx_t io_ctx;
 QDict *options = NULL;
 int ret = 0;
 
-secretid = qemu_opt_get(opts, "password-secret");
-
 /* Read out options */
 bytes = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
  BDRV_SECTOR_SIZE);
@@ -426,11 +397,6 @@ static int qemu_rbd_create(const char *filename, QemuOpts 
*opts, Error **errp)
 goto shutdown;
 }
 
-if (qemu_rbd_set_auth(cluster, secretid, errp) < 0) {
-ret = -EIO;
-goto shutdown;
-}
-
 ret = rados_connect(cluster);
 if (ret < 0) {
 error_setg_errno(errp, -ret, "error connecting");
@@ -596,7 +562,6 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict 
*options, int flags,
 {
 BDRVRBDState *s = bs->opaque;
 const char *pool, *snap, *conf, *clientname, *name, *keypairs;
-const char *secretid;
 QemuOpts *opts;
 Error *local_err = NULL;
 char *mon_host = NULL;
@@ -618,8 +583,6 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict 
*options, int flags,
 goto failed_opts;
 }
 
-secretid = qemu_opt_get(opts, "password-secret");
-
 pool   = qemu_opt_get(opts, "pool");
 conf   = qemu_opt_get(opts, "conf");
 snap   = qemu_opt_get(opts, "snapshot");
@@ -661,11 +624,6 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict 
*options, int flags,
 }
 }
 
-if (qemu_rbd_set_auth(s->cluster, secretid, errp) < 0) {
-r = -EIO;
-goto failed_shutdown;
-}
-
 /*
  * Fallback to more conservative semantics if setting cache
  * options fails. Ignore errors from setting rbd_cache because the
@@ -1105,11 +1063,6 @@ static QemuOptsList qemu_rbd_create_opts = {
 .type = QEMU_OPT_SIZE,
 .help = "RBD object size"
 },
-{
-.name = "password-secret",
-.type = QEMU_OPT_STRING,
-.help = "ID of secret providing the password",
-},
 { /* end of list */ }
 }
 };
-- 
2.7.4

[Qemu-devel] [PATCH RFC v3 for-2.9 11/11] rbd: Fix bugs around -drive parameter "server"

2017-03-27 Thread Markus Armbruster

qemu_rbd_open() takes option parameters as a flattened QDict, with
keys of the form server.%d.host, server.%d.port, where %d counts up
from zero.

qemu_rbd_array_opts() extracts these values as follows.  First, it
calls qdict_array_entries() to find the list's length.  For each list
element, it formats the list's key prefix (e.g. "server.0."), then
creates a new QDict holding the options with that key prefix, then
converts that to a QemuOpts, so it can finally get the member values
from there.

If there's one surefire way to make code using QDict more awkward,
it's creating more of them and mixing in QemuOpts for good measure.

The extraction of keys starting with server.%d into another QDict
makes us ignore parameters like server.0.neither-host-nor-port
silently.

The conversion to QemuOpts abuses runtime_opts, as described a few
commits ago.

Rewrite to simply get the values straight from the options QDict.

Fixes -drive not to crash when server.*.* are present, but
server.*.host is absent.

Fixes -drive to reject invalid server.*.*.

Permits cleaning up runtime_opts.  Do that, and fix -drive to reject
bogus parameters host and port instead of silently ignoring them.

Signed-off-by: Markus Armbruster 
---
 block/rbd.c | 127 +++-
 1 file changed, 32 insertions(+), 95 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index 5a58d3e..7c6f084 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -13,14 +13,13 @@
 
 #include "qemu/osdep.h"
 
+#include 
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "block/block_int.h"
 #include "qemu/cutils.h"
 #include "qapi/qmp/qstring.h"
 
-#include 
-
 /*
  * When specifying the image filename use:
  *
@@ -299,7 +298,7 @@ static QemuOptsList runtime_opts = {
 .help = "Rados id name",
 },
 /*
- * server.* extracted manually, see qemu_rbd_array_opts()
+ * server.* extracted manually, see qemu_rbd_mon_host()
  */
 
 /*
@@ -314,21 +313,6 @@ static QemuOptsList runtime_opts = {
 .type = QEMU_OPT_STRING,
 .help = "Legacy rados key/value option parameters",
 },
-
-/*
- * The remainder aren't option keys, but option sub-sub-keys,
- * so that qemu_rbd_array_opts() can abuse runtime_opts for
- * its own purposes
- * TODO clean this up
- */
-{
-.name = "host",
-.type = QEMU_OPT_STRING,
-},
-{
-.name = "port",
-.type = QEMU_OPT_STRING,
-},
 { /* end of list */ }
 },
 };
@@ -471,89 +455,43 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
 qemu_aio_unref(acb);
 }
 
-#define RBD_MON_HOST  0
-
-static char *qemu_rbd_array_opts(QDict *options, const char *prefix, int type,
- Error **errp)
+static char *qemu_rbd_mon_host(QDict *options, Error **errp)
 {
-int num_entries;
-QemuOpts *opts = NULL;
-QDict *sub_options;
-const char *host;
-const char *port;
-char *str;
-char *rados_str = NULL;
-Error *local_err = NULL;
+const char **vals = g_new(const char *, qdict_size(options) + 1);
+char keybuf[32];
+const char *host, *port;
+char *rados_str;
 int i;
 
-assert(type == RBD_MON_HOST);
-
-num_entries = qdict_array_entries(options, prefix);
-
-if (num_entries < 0) {
-error_setg(errp, "Parse error on RBD QDict array");
-return NULL;
-}
-
-for (i = 0; i < num_entries; i++) {
-char *strbuf = NULL;
-const char *value;
-char *rados_str_tmp;
-
-str = g_strdup_printf("%s%d.", prefix, i);
-qdict_extract_subqdict(options, &sub_options, str);
-g_free(str);
-
-opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-qemu_opts_absorb_qdict(opts, sub_options, &local_err);
-QDECREF(sub_options);
-if (local_err) {
-error_propagate(errp, local_err);
-g_free(rados_str);
+for (i = 0;; i++) {
+sprintf(keybuf, "server.%d.host", i);
+host = qdict_get_try_str(options, keybuf);
+qdict_del(options, keybuf);
+sprintf(keybuf, "server.%d.port", i);
+port = qdict_get_try_str(options, keybuf);
+qdict_del(options, keybuf);
+if (!host && !port) {
+break;
+}
+if (!host) {
+error_setg(errp, "Parameter server.%d.host is missing", i);
 rados_str = NULL;
-goto exit;
+goto out;
 }
 
-if (type == RBD_MON_HOST) {
-host = qemu_opt_get(opts, "host");
-port = qemu_opt_get(opts, "port");
-
-value = host;
-if (port) {
-/* check for ipv6 */
-if (strchr(host, ':')) {
-strbuf = g_strdup_printf("[%s]:%s", host, port);
-} else {
-

[Qemu-devel] [PATCH RFC v3 for-2.9 02/11] rbd: Fix to cleanly reject -drive without pool or image

2017-03-27 Thread Markus Armbruster

qemu_rbd_open() neglects to check pool and image are present.
Reproducer:

$ qemu-system-x86_64 -nodefaults -drive if=none,driver=rbd,pool=p
Segmentation fault (core dumped)
$ qemu-system-x86_64 -nodefaults -drive if=none,driver=rbd,image=i
qemu-system-x86_64: -drive if=none,driver=rbd,image=i: error opening pool 
(null)

Doesn't affect -drive with file=..., because qemu_rbd_parse_filename()
always sets both pool and image.

Doesn't affect -blockdev, because pool and image are mandatory in the
QAPI schema.

Fix by adding the missing checks.

Signed-off-by: Markus Armbruster 
Reviewed-by: Eric Blake 
---
 block/rbd.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index ee13f3d..5ba2a87 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -711,6 +711,12 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict 
*options, int flags,
 name   = qemu_opt_get(opts, "image");
 keypairs   = qemu_opt_get(opts, "keyvalue-pairs");
 
+if (!pool || !name) {
+error_setg(errp, "Parameters 'pool' and 'image' are required");
+r = -EINVAL;
+goto failed_opts;
+}
+
 r = rados_create(&s->cluster, clientname);
 if (r < 0) {
 error_setg_errno(errp, -r, "error initializing");
@@ -718,9 +724,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict 
*options, int flags,
 }
 
 s->snap = g_strdup(snap);
-if (name) {
-pstrcpy(s->name, RBD_MAX_IMAGE_NAME_SIZE, name);
-}
+pstrcpy(s->name, RBD_MAX_IMAGE_NAME_SIZE, name);
 
 /* try default location when conf=NULL, but ignore failure */
 r = rados_conf_read_file(s->cluster, conf);
-- 
2.7.4

[Qemu-devel] [PATCH RFC v3 for-2.9 07/11] rbd: Clean up qemu_rbd_create()'s detour through QemuOpts

2017-03-27 Thread Markus Armbruster

The conversion from QDict to QemuOpts is pointless.  Simply get the
stuff straight from the QDict.

Signed-off-by: Markus Armbruster 
Reviewed-by: Eric Blake 
Reviewed-by: Kevin Wolf 
---
 block/rbd.c | 20 +---
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index b2afe07..cf0bab0 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -376,7 +376,6 @@ static int qemu_rbd_create(const char *filename, QemuOpts 
*opts, Error **errp)
 rados_t cluster;
 rados_ioctx_t io_ctx;
 QDict *options = NULL;
-QemuOpts *rbd_opts = NULL;
 int ret = 0;
 
 secretid = qemu_opt_get(opts, "password-secret");
@@ -407,19 +406,11 @@ static int qemu_rbd_create(const char *filename, QemuOpts 
*opts, Error **errp)
 goto exit;
 }
 
-rbd_opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-qemu_opts_absorb_qdict(rbd_opts, options, &local_err);
-if (local_err) {
-error_propagate(errp, local_err);
-ret = -EINVAL;
-goto exit;
-}
-
-pool   = qemu_opt_get(rbd_opts, "pool");
-conf   = qemu_opt_get(rbd_opts, "conf");
-clientname = qemu_opt_get(rbd_opts, "user");
-name   = qemu_opt_get(rbd_opts, "image");
-keypairs   = qemu_opt_get(rbd_opts, "=keyvalue-pairs");
+pool   = qdict_get_str(options, "pool");
+conf   = qdict_get_str(options, "conf");
+clientname = qdict_get_str(options, "user");
+name   = qdict_get_str(options, "image");
+keypairs   = qdict_get_str(options, "=keyvalue-pairs");
 
 ret = rados_create(&cluster, clientname);
 if (ret < 0) {
@@ -470,7 +461,6 @@ shutdown:
 
 exit:
 QDECREF(options);
-qemu_opts_del(rbd_opts);
 return ret;
 }
 
-- 
2.7.4

Re: [Qemu-devel] [PATCH 0/3] Add a tester for HMP commands

2017-03-27 Thread Dr. David Alan Gilbert

* Thomas Huth (th...@redhat.com) wrote:
> We currently do not test HMP commands automatically yet, so if they
> break, we do not notice this until somebody runs into the problem
> (like the "info qtree" problem that we recently had on qemu-system-ppc64).
> So let's add a simple tester that runs some HMP commands to check if they
> can crash or abort QEMU.
> 
> Note: Three boards are currently still blacklisted in the third patch.
> I've added the problems to our BiteSizeTasks wiki page, so I hope they
> will get fixed by GSoC students or somebody else soon. Once the problems
> are fixed, the blacklisting can be removed in the tester, too.

Queued

> Thomas Huth (3):
>   libqtest: Ignore QMP events when parsing the response for HMP commands
>   libqtest: Add a generic function to run a callback function for every
> machine
>   tests: Add a tester for HMP commands
> 
>  tests/Makefile.include |   2 +
>  tests/libqtest.c   |  36 +++
>  tests/libqtest.h   |   8 +++
>  tests/pc-cpu-test.c|  95 +++--
>  tests/qom-test.c   |  36 ++-
>  tests/test-hmp.c   | 160 
> +
>  6 files changed, 248 insertions(+), 89 deletions(-)
>  create mode 100644 tests/test-hmp.c
> 
> -- 
> 1.8.3.1
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

[Qemu-devel] [PATCH] debug: made printf always compile in debug output

2017-03-27 Thread Danil Antonov

>From cddb60744808eedbadebdc4f0258ee6db694c4a3 Mon Sep 17 00:00:00 2001
From: Danil Antonov 
Date: Mon, 27 Mar 2017 14:43:10 +0300
Subject: [PATCH] debug: made printf always compile in debug output

Wrapped printf calls inside debug macros (DPRINTF) in `if` statement.
This will ensure that printf function will always compile even if debug
output is turned off and, in turn, will prevent bitrot of the format
strings. Also updated some debug messages to remove compiler errors and
warnings.

Signed-off-by: Danil Antonov 
---
 A patch for "Bitrot prevention" in BiteSizedTasks.
 hw/acpi/pcihp.c   | 15 ++
 hw/acpi/piix4.c   | 16 ++-
 hw/arm/strongarm.c| 17 +++
 hw/arm/z2.c   | 16 +++
 hw/audio/lm4549.c | 18 +++-
 hw/block/pflash_cfi01.c   | 18 ++--
 hw/block/pflash_cfi02.c   | 19 ++--
 hw/char/ipoctal232.c  | 24 +---
 hw/char/serial.c  | 16 ++-
 hw/core/empty_slot.c  | 15 ++
 hw/display/sm501.c| 15 ++
 hw/display/ssd0303.c  | 26 ++---
 hw/display/ssd0323.c  | 28 +-
 hw/dma/i82374.c   | 18 +++-
 hw/dma/puv3_dma.c |  8 +++---
 hw/gpio/pl061.c   | 28 ++
 hw/gpio/puv3_gpio.c   |  8 +++---
 hw/i2c/bitbang_i2c.c  | 16 +++
 hw/i2c/pm_smbus.c | 15 ++
 hw/i2c/smbus.c| 28 +++---
 hw/i386/acpi-build.c  | 17 +++
 hw/i386/intel_iommu.c | 18 ++--
 hw/i386/pc.c  | 16 +++
 hw/i386/xen/xen_platform.c| 17 ++-
 hw/input/adb.c| 17 ++-
 hw/input/pckbd.c  | 17 +++
 hw/input/vmmouse.c| 15 ++
 hw/intc/arm_gicv3_kvm.c   | 16 ++-
 hw/intc/exynos4210_combiner.c | 18 ++--
 hw/intc/heathrow_pic.c| 17 ++-
 hw/intc/i8259.c   | 16 ++-
 hw/intc/ioapic.c  | 16 ++-
 hw/intc/puv3_intc.c   |  4 +--
 hw/ipack/tpci200.c| 16 +++
 hw/isa/apm.c  | 14 +
 hw/isa/vt82c686.c | 18 ++--
 hw/mips/gt64xxx_pci.c | 15 ++
 hw/misc/macio/cuda.c  | 15 ++
 hw/misc/puv3_pm.c |  8 +++---
 hw/net/dp8393x.c  | 15 ++
 hw/net/lan9118.c  | 20 +
 hw/net/mcf_fec.c  | 18 +++-
 hw/net/stellaris_enet.c   | 28 +++---
 hw/nvram/mac_nvram.c  | 16 +++
 hw/pci-bridge/dec.c   | 18 ++--
 hw/pci-host/apb.c | 16 +++
 hw/pci-host/bonito.c  | 19 +++-
 hw/pci-host/grackle.c | 16 +++
 hw/pci-host/uninorth.c| 16 +++
 hw/pci/msi.c  | 17 +++
 hw/pci/pci.c  | 16 +++
 hw/pci/pci_host.c | 16 +++
 hw/pci/pcie.c | 18 
 hw/pci/pcie_aer.c | 18 
 hw/s390x/s390-pci-bus.c   | 18 +++-
 hw/s390x/s390-pci-inst.c  | 18 +++-
 hw/scsi/lsi53c895a.c  | 26 ++---
 hw/scsi/scsi-disk.c   | 17 ++-
 hw/scsi/scsi-generic.c| 17 +--
 hw/sd/pl181.c | 16 +++
 hw/sd/sd.c| 14 +
 hw/sd/ssi-sd.c| 27 ++---
 hw/sparc64/sparc64.c  | 36 ---
 hw/sparc64/sun4u.c| 16 ++-
 hw/ssi/pl022.c| 28 ++
 hw/timer/exynos4210_mct.c | 43 +--
 hw/timer/exynos4210_pwm.c | 18 ++--
 hw/timer/exynos4210_rtc.c | 12 
 hw/timer/hpet.c   | 24 +---
 hw/timer/mc146818rtc.c| 33 -
 hw/timer/pl031.c  | 17 +--
 hw/timer/puv3_ost.c   |  4 +--
 hw/timer/sun4v-rtc.c  | 18 ++--
 hw/usb/dev-serial.c   | 17 +--
 hw/usb/dev-storage.c  | 17 +--
 hw/usb/hcd-ehci.h | 10 +++
 hw/usb/hcd-xhci.c | 19 +++-
 hw/virtio/virtio-bus.c| 15 ++
 hw/virtio/virtio-mmio.c   | 17 ++-
 include/hw/unicore32/puv3.h   | 15 ++
 include/hw/vfio/vfio-common.h | 17 +--
 kvm-all.c | 16 +--
 migration/block.c | 16 ++-
 page_cache.c  | 17 ++-
 target/i386/kvm.c | 18 ++--
 target/ppc/kvm.c  | 18 ++--
 target/s390x/kvm.c| 18 ++--
 target/sparc/ldst_helper.c| 67
+--
 target/unicore32/helper.c | 16 ++-
 target/unicore32/softmmu.c| 14 +

[Qemu-devel] [[RFC]PATCH:hw/sd:sd_init()] hw/sd : modified the sd_init() function

2017-03-27 Thread Tejaswini

From: Tejaswini Poluri 

Changed sd_init() to take SDstate by value and return state as success/failure
Edited the rest of the functions using sd_init() to accommodate the change

Signed-off-by: Tejaswini Poluri 
---
 hw/sd/milkymist-memcard.c |  3 +--
 hw/sd/omap_mmc.c  |  6 ++
 hw/sd/pl181.c |  4 ++--
 hw/sd/sd.c| 13 -
 hw/sd/ssi-sd.c|  3 +--
 include/hw/sd/sd.h|  2 +-
 6 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/hw/sd/milkymist-memcard.c b/hw/sd/milkymist-memcard.c
index 1f2f0ed..68e50e5 100644
--- a/hw/sd/milkymist-memcard.c
+++ b/hw/sd/milkymist-memcard.c
@@ -259,8 +259,7 @@ static int milkymist_memcard_init(SysBusDevice *dev)
 /* FIXME use a qdev drive property instead of drive_get_next() */
 dinfo = drive_get_next(IF_SD);
 blk = dinfo ? blk_by_legacy_dinfo(dinfo) : NULL;
-s->card = sd_init(blk, false);
-if (s->card == NULL) {
+if (sd_init(s->card, blk, false) < 0) {
 return -1;
 }
 
diff --git a/hw/sd/omap_mmc.c b/hw/sd/omap_mmc.c
index e934cd3..add48a6 100644
--- a/hw/sd/omap_mmc.c
+++ b/hw/sd/omap_mmc.c
@@ -593,8 +593,7 @@ struct omap_mmc_s *omap_mmc_init(hwaddr base,
 memory_region_add_subregion(sysmem, base, &s->iomem);
 
 /* Instantiate the storage */
-s->card = sd_init(blk, false);
-if (s->card == NULL) {
+if (sd_init(s->card, blk, false) < 0) {
 exit(1);
 }
 
@@ -620,8 +619,7 @@ struct omap_mmc_s *omap2_mmc_init(struct 
omap_target_agent_s *ta,
 omap_l4_attach(ta, 0, &s->iomem);
 
 /* Instantiate the storage */
-s->card = sd_init(blk, false);
-if (s->card == NULL) {
+if (sd_init(s->card, blk, false) < 0) {
 exit(1);
 }
 
diff --git a/hw/sd/pl181.c b/hw/sd/pl181.c
index 82c63a4..c2e2459 100644
--- a/hw/sd/pl181.c
+++ b/hw/sd/pl181.c
@@ -502,8 +502,8 @@ static void pl181_realize(DeviceState *dev, Error **errp)
 
 /* FIXME use a qdev drive property instead of drive_get_next() */
 dinfo = drive_get_next(IF_SD);
-s->card = sd_init(dinfo ? blk_by_legacy_dinfo(dinfo) : NULL, false);
-if (s->card == NULL) {
+if (sd_init(s->card, dinfo ?
+   blk_by_legacy_dinfo(dinfo) : NULL, false) < 0) {
 error_setg(errp, "sd_init failed");
 }
 }
diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index ba47bff..b593939 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -562,7 +562,7 @@ static const VMStateDescription sd_vmstate = {
 };
 
 /* Legacy initialization function for use by non-qdevified callers */
-SDState *sd_init(BlockBackend *blk, bool is_spi)
+int sd_init(SDState *sd_state, BlockBackend *blk, bool is_spi)
 {
 Object *obj;
 DeviceState *dev;
@@ -573,16 +573,19 @@ SDState *sd_init(BlockBackend *blk, bool is_spi)
 qdev_prop_set_drive(dev, "drive", blk, &err);
 if (err) {
 error_report("sd_init failed: %s", error_get_pretty(err));
-return NULL;
+return -1;
 }
 qdev_prop_set_bit(dev, "spi", is_spi);
 object_property_set_bool(obj, true, "realized", &err);
 if (err) {
 error_report("sd_init failed: %s", error_get_pretty(err));
-return NULL;
+return -1;
 }
-
-return SD_CARD(dev);
+sd_state = SD_CARD(dev);
+if (!sd_state) {
+   return -1;
+   }
+return 0;
 }
 
 void sd_set_cb(SDState *sd, qemu_irq readonly, qemu_irq insert)
diff --git a/hw/sd/ssi-sd.c b/hw/sd/ssi-sd.c
index 24001dc..c70b32d 100644
--- a/hw/sd/ssi-sd.c
+++ b/hw/sd/ssi-sd.c
@@ -244,8 +244,7 @@ static void ssi_sd_realize(SSISlave *d, Error **errp)
 s->mode = SSI_SD_CMD;
 /* FIXME use a qdev drive property instead of drive_get_next() */
 dinfo = drive_get_next(IF_SD);
-s->sd = sd_init(dinfo ? blk_by_legacy_dinfo(dinfo) : NULL, true);
-if (s->sd == NULL) {
+if (sd_init(s->sd, dinfo ? blk_by_legacy_dinfo(dinfo) : NULL, true) < 0) {
 error_setg(errp, "Device initialization failed.");
 return;
 }
diff --git a/include/hw/sd/sd.h b/include/hw/sd/sd.h
index 96caefe..63741c0 100644
--- a/include/hw/sd/sd.h
+++ b/include/hw/sd/sd.h
@@ -115,7 +115,7 @@ typedef struct {
 } SDBusClass;
 
 /* Legacy functions to be used only by non-qdevified callers */
-SDState *sd_init(BlockBackend *bs, bool is_spi);
+int sd_init(SDState *sd, BlockBackend *bs, bool is_spi);
 int sd_do_command(SDState *sd, SDRequest *req,
   uint8_t *response);
 void sd_write_data(SDState *sd, uint8_t value);
-- 
2.7.4

Re: [Qemu-devel] [PATCH v2] hmp: gpa2hva and gpa2hpa hostaddr command

2017-03-27 Thread Dr. David Alan Gilbert

* Paolo Bonzini (pbonz...@redhat.com) wrote:
> These commands are useful when testing machine-check passthrough.
> gpa2hva is useful to inject a MADV_HWPOISON madvise from gdb, while
> gpa2hpa is useful to inject an error with the mce-inject kernel
> module.
> 
> Signed-off-by: Paolo Bonzini 

Reviewed-by: Dr. David Alan Gilbert 

And queued.

Dave
> ---
>  hmp-commands.hx |  32 ++
>  monitor.c   | 101 
> 
>  2 files changed, 133 insertions(+)
> 
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index 8819281..0aca984 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -526,6 +526,38 @@ Dump 80 16 bit values at the start of the video memory.
>  ETEXI
>  
>  {
> +.name   = "gpa2hva",
> +.args_type  = "addr:l",
> +.params = "addr",
> +.help   = "print the host virtual address corresponding to a 
> guest physical address",
> +.cmd= hmp_gpa2hva,
> +},
> +
> +STEXI
> +@item gpa2hva @var{addr}
> +@findex gpa2hva
> +Print the host virtual address at which the guest's physical address 
> @var{addr}
> +is mapped.
> +ETEXI
> +
> +#ifdef CONFIG_LINUX
> +{
> +.name   = "gpa2hpa",
> +.args_type  = "addr:l",
> +.params = "addr",
> +.help   = "print the host physical address corresponding to a 
> guest physical address",
> +.cmd= hmp_gpa2hpa,
> +},
> +#endif
> +
> +STEXI
> +@item gpa2hpa @var{addr}
> +@findex gpa2hpa
> +Print the host physical address at which the guest's physical address 
> @var{addr}
> +is mapped.
> +ETEXI
> +
> +{
>  .name   = "p|print",
>  .args_type  = "fmt:/,val:l",
>  .params = "/fmt expr",
> diff --git a/monitor.c b/monitor.c
> index be282ec..216a97a 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -1421,6 +1421,107 @@ static void hmp_physical_memory_dump(Monitor *mon, 
> const QDict *qdict)
>  memory_dump(mon, count, format, size, addr, 1);
>  }
>  
> +static void *gpa2hva(MemoryRegion **p_mr, hwaddr addr, Error **errp)
> +{
> +MemoryRegionSection mrs = memory_region_find(get_system_memory(),
> + addr, 1);
> +
> +if (!mrs.mr) {
> +error_setg(errp, "No memory is mapped at address 0x%" HWADDR_PRIx, 
> addr);
> +return NULL;
> +}
> +
> +if (!memory_region_is_ram(mrs.mr) && !memory_region_is_romd(mrs.mr)) {
> +error_setg(errp, "Memory at address 0x%" HWADDR_PRIx "is not RAM", 
> addr);
> +memory_region_unref(mrs.mr);
> +return NULL;
> +}
> +
> +*p_mr = mrs.mr;
> +return qemu_map_ram_ptr(mrs.mr->ram_block, mrs.offset_within_region);
> +}
> +
> +static void hmp_gpa2hva(Monitor *mon, const QDict *qdict)
> +{
> +hwaddr addr = qdict_get_int(qdict, "addr");
> +Error *local_err = NULL;
> +MemoryRegion *mr;
> +void *ptr;
> +
> +ptr = gpa2hva(&mr, addr, &local_err);
> +if (local_err) {
> +error_report_err(local_err);
> +return;
> +}
> +
> +monitor_printf(mon, "Host virtual address for 0x%" HWADDR_PRIx
> +   " (%s) is %p\n",
> +   addr, mr->name, ptr);
> +
> +memory_region_unref(mr);
> +}
> +
> +#ifdef CONFIG_LINUX
> +static uint64_t vtop(void *ptr, Error **errp)
> +{
> +uint64_t pinfo;
> +uint64_t ret = -1;
> +uintptr_t addr = (uintptr_t) ptr;
> +uintptr_t pagesize = getpagesize();
> +off_t offset = addr / pagesize * sizeof(pinfo);
> +int fd;
> +
> +fd = open("/proc/self/pagemap", O_RDONLY);
> +if (fd == -1) {
> +error_setg_errno(errp, errno, "Cannot open /proc/self/pagemap");
> +return -1;
> +}
> +
> +/* Force copy-on-write if necessary.  */
> +atomic_add((uint8_t *)ptr, 0);
> +
> +if (pread(fd, &pinfo, sizeof(pinfo), offset) != sizeof(pinfo)) {
> +error_setg_errno(errp, errno, "Cannot read pagemap");
> +goto out;
> +}
> +if ((pinfo & (1ull << 63)) == 0) {
> +error_setg(errp, "Page not present");
> +goto out;
> +}
> +ret = ((pinfo & 0x007full) * pagesize) | (addr & (pagesize - 
> 1));
> +
> +out:
> +close(fd);
> +return ret;
> +}
> +
> +static void hmp_gpa2hpa(Monitor *mon, const QDict *qdict)
> +{
> +hwaddr addr = qdict_get_int(qdict, "addr");
> +Error *local_err = NULL;
> +MemoryRegion *mr;
> +void *ptr;
> +uint64_t physaddr;
> +
> +ptr = gpa2hva(&mr, addr, &local_err);
> +if (local_err) {
> +error_report_err(local_err);
> +return;
> +}
> +
> +physaddr = vtop(ptr, &local_err);
> +if (local_err) {
> +error_report_err(local_err);
> +} else {
> +monitor_printf(mon, "Host physical address for 0x%" HWADDR_PRIx
> +   " (%s) is 0x%" PRIx64 "\n",
> +   addr, mr->name, (uint64_t) physaddr);
> +}
> +
> +memory_re

Re: [Qemu-devel] [Qemu-block] [PATCH for-2.10 01/16] block: Add PreallocMode to BD.bdrv_truncate()

2017-03-27 Thread Max Reitz

On 23.03.2017 16:32, Stefan Hajnoczi wrote:
> On Wed, Mar 22, 2017 at 05:50:59PM +0100, Max Reitz wrote:
>> On 22.03.2017 17:28, Stefan Hajnoczi wrote:
>>> On Mon, Mar 20, 2017 at 04:07:16PM +0100, Max Reitz wrote:
 On 20.03.2017 11:18, Stefan Hajnoczi wrote:
> On Mon, Mar 13, 2017 at 10:39:46PM +0100, Max Reitz wrote:
>> diff --git a/block/iscsi.c b/block/iscsi.c
>> index ab559a6f71..5d6265c4a6 100644
>> --- a/block/iscsi.c
>> +++ b/block/iscsi.c
>> @@ -2060,11 +2060,16 @@ static void iscsi_reopen_commit(BDRVReopenState 
>> *reopen_state)
>>  }
>>  }
>>  
>> -static int iscsi_truncate(BlockDriverState *bs, int64_t offset, Error 
>> **errp)
>> +static int iscsi_truncate(BlockDriverState *bs, int64_t offset,
>> +  PreallocMode prealloc, Error **errp)
>>  {
>>  IscsiLun *iscsilun = bs->opaque;
>>  Error *local_err = NULL;
>>  
>> +if (prealloc != PREALLOC_MODE_OFF) {
>> +return -ENOTSUP;
>> +}
>> +
>>  if (iscsilun->type != TYPE_DISK) {
>>  return -ENOTSUP;
>>  }
>
> Nevermind what I said about adding a BiteSizedTasks entry:
>
> The missing errp usage is not in qemu.git/master yet.  Please fix up
> your bdrv_truncate() errp patch to use errp in all cases, e.g.
> error_setg("Unable to truncate non-disk LUN").

 The thing is that I wasn't comfortable doing that for all block drivers.
 I mean, I can take another look but I'd rather have vague error messages
 ("truncation failed: #{strerror}") than outright wrong ones because I
 didn't know what error message to use.

 Of course you could argue that this may probably come out during review
 but that implies that every submaintainer for every block driver would
 actually come out for review...
>>>
>>> I'm worried about errp being set in only a subset of error cases.
>>>
>>> This is likely to cause bugs if callers use if (local_err).  Grepping
>>> through the codebase I can see instances of:
>>
>> Yes, but the generic bdrv_truncate() will always set errp if the driver
>> hasn't done so.
> 
> I prefer consistently setting errp to make patch review easy (no
> checking all callers to see how they behave), making it safe to
> copy-paste the code elsewhere, etc.

The thing is, there is only a single caller for all of the
BlockDriver.bdrv_truncate() functions and that is the generic
bdrv_truncate() function.

I mean, of course I can copy-paste my generic error message from the
global bdrv_truncate() function into every driver where I'm not sure
what error to emit, but I find that a bit... Well, it looks bad when you
just look at the code without the history behind it.

But since you insist, I'll do so gladly. :-)

Max

> It's safer to be consistent, can produce more detailed error messages,
> and the cost of writing error_setg() calls is low.  But it's a matter of
> style, you've pointed out the code is technically correct right now.
> 
> Stefan
> 




signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH for-2.9] i386: Don't override -cpu options on -cpu host/max

2017-03-27 Thread Eduardo Habkost

On Mon, Mar 27, 2017 at 01:10:59PM +0200, Igor Mammedov wrote:
> On Fri, 24 Mar 2017 17:36:45 -0300
> Eduardo Habkost  wrote:
> 
> > The existing code for "host" and "max" CPU models overrides every
> > single feature in the CPU object at realize time, even the ones
> > that were explicitly enabled or disabled by the user using
> > "feat=on" or "feat=off", while features set using +feat/-feat are
> > kept.
> > 
> > This means "-cpu host,+invtsc" works as expected, while
> > "-cpu host,invtsc=on" doesn't.
> > 
> > This was a known bug, already documented in a comment inside
> > x86_cpu_expand_features(). What makes this bug worse now is that
> > libvirt 3.0.0 and newer now use "feat=on|off" instead of
> > +feat/-feat when it detects a QEMU version that supports it (see
> > libvirt commit d47db7b16dd5422c7e487c8c8ee5b181a2f9cd66).
> > 
> > Change the feature property getter/setter to set a
> > env->user_features field, to keep track of features that were
> > explicitly changed using QOM properties. Then make the
> > max_features code not override user features when handling "-cpu
> > host" and "-cpu max".
> > 
> > This will also allow us to remove the plus_features/minus_features
> > hack in the future, but I plan to do that after 2.9.0 is
> > released.
> > 
> > Reported-by: Jiri Denemark 
> > Signed-off-by: Eduardo Habkost 
> > ---
> >  target/i386/cpu.h |  2 ++
> >  target/i386/cpu.c | 33 +
> >  2 files changed, 23 insertions(+), 12 deletions(-)
> > 
> > diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> > index 07401ad9fe..c4602ca80d 100644
> > --- a/target/i386/cpu.h
> > +++ b/target/i386/cpu.h
> > @@ -1147,6 +1147,8 @@ typedef struct CPUX86State {
> >  uint32_t cpuid_vendor3;
> >  uint32_t cpuid_version;
> >  FeatureWordArray features;
> > +/* Features that were explicitly enabled/disabled */
> > +FeatureWordArray user_features;
> >  uint32_t cpuid_model[12];
> >  
> >  /* MTRRs */
> > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > index 7aa762245a..5f2addbf75 100644
> > --- a/target/i386/cpu.c
> > +++ b/target/i386/cpu.c
> > @@ -3373,15 +3373,20 @@ static void x86_cpu_expand_features(X86CPU *cpu, 
> > Error **errp)
> >  GList *l;
> >  Error *local_err = NULL;
> >  
> > -/*TODO: cpu->max_features incorrectly overwrites features
> > - * set using "feat=on|off". Once we fix this, we can convert
> > +/*TODO: Now cpu->max_features doesn't overwrite features
> > + * set using QOM properties, and we can convert
> >   * plus_features & minus_features to global properties
> >   * inside x86_cpu_parse_featurestr() too.
> >   */
> >  if (cpu->max_features) {
> >  for (w = 0; w < FEATURE_WORDS; w++) {
> > -env->features[w] =
> > -x86_cpu_get_supported_feature_word(w, cpu->migratable);
> > +/* Override only features that weren't not set explicitly
> > + * by the user.
> s/not// or if it was intended rephrase to avoid double negation.

I will fix that, thanks for spotting it.

> 
> > + */
> > +env->features[w] &= env->user_features[w];
> it probably should be assert to catch features not set via property,
> which shouldn't be there in the first place, I don't like silent
> filtering that happens here.

I wouldn't like to add an assert() so late in the 2.9 schedule.
But you are right that having anything present in
(env->features & ~env->user_features) would be a bug somewhere
else, and this line is not necessary.

> 
> > +env->features[w] |=
> > +x86_cpu_get_supported_feature_word(w, cpu->migratable) &
> > +~env->user_features[w];
> >  }
> >  }
> >  
> > @@ -3692,15 +3697,17 @@ static void x86_cpu_unrealizefn(DeviceState *dev, 
> > Error **errp)
> >  }
> >  
> >  typedef struct BitProperty {
> > -uint32_t *ptr;
> > +FeatureWord w;
> it would be better if this refactoring and related changes
> were in a separate patch, something along lines:
>  "x86/cpu: use FeatureWord instead of keeping a pointer to cpuid leaf"

I will do it in v2.

> 
> >  uint32_t mask;
> >  } BitProperty;
> >  
> >  static void x86_cpu_get_bit_prop(Object *obj, Visitor *v, const char *name,
> >   void *opaque, Error **errp)
> >  {
> > +X86CPU *cpu = X86_CPU(obj);
> >  BitProperty *fp = opaque;
> > -bool value = (*fp->ptr & fp->mask) == fp->mask;
> > +uint32_t f = cpu->env.features[fp->w];
> > +bool value = (f & fp->mask) == fp->mask;
> >  visit_type_bool(v, name, &value, errp);
> >  }
> >  
> > @@ -3708,6 +3715,7 @@ static void x86_cpu_set_bit_prop(Object *obj, Visitor 
> > *v, const char *name,
> >   void *opaque, Error **errp)
> >  {
> >  DeviceState *dev = DEVICE(obj);
> > +X86CPU *cpu = X86_CPU(obj);
> >  BitProperty *fp = opaque;
> >  Error *local_err = NULL;
> >  bool value;
>

Re: [Qemu-devel] [PATCH 2/3] libqtest: Add a generic function to run a callback function for every machine

2017-03-27 Thread Dr. David Alan Gilbert

* Thomas Huth (th...@redhat.com) wrote:
> Some tests need to run single tests for every available machine of the
> current QEMU binary. To avoid code duplication, let's extract this
> code that deals with 'query-machines' into a separate function.
> 
> Signed-off-by: Thomas Huth 

Having queued it, it's failing my test.

The reason is that qom-tests.c has a blacklist which blacklists Xen;
however your new generic function doesn't have the blacklist, so when
you run HMP commands on all machines it tries to run it on Xen since
my PC has the Xen libraries installed (so builds with Xen support) but
isn't a Xen guest.

It fails with:
xen be core: xen be core: can't connect to xenstored
can't connect to xenstored
xen_init_pv: xen backend core setup failed
Broken pipe

I suggest you probably need to share the blacklist as well.

(Unqueued)

Dave

> ---
>  tests/libqtest.c| 30 +
>  tests/libqtest.h|  8 +
>  tests/pc-cpu-test.c | 95 
> -
>  tests/qom-test.c| 36 
>  4 files changed, 80 insertions(+), 89 deletions(-)
> 
> diff --git a/tests/libqtest.c b/tests/libqtest.c
> index c9b2d76..d8b8066 100644
> --- a/tests/libqtest.c
> +++ b/tests/libqtest.c
> @@ -938,3 +938,33 @@ bool qtest_big_endian(QTestState *s)
>  {
>  return s->big_endian;
>  }
> +
> +void qtest_cb_for_every_machine(void (*cb)(const char *machine))
> +{
> +QDict *response, *minfo;
> +QList *list;
> +const QListEntry *p;
> +QObject *qobj;
> +QString *qstr;
> +const char *mname;
> +
> +qtest_start("-machine none");
> +response = qmp("{ 'execute': 'query-machines' }");
> +g_assert(response);
> +list = qdict_get_qlist(response, "return");
> +g_assert(list);
> +
> +for (p = qlist_first(list); p; p = qlist_next(p)) {
> +minfo = qobject_to_qdict(qlist_entry_obj(p));
> +g_assert(minfo);
> +qobj = qdict_get(minfo, "name");
> +g_assert(qobj);
> +qstr = qobject_to_qstring(qobj);
> +g_assert(qstr);
> +mname = qstring_get_str(qstr);
> +cb(mname);
> +}
> +
> +qtest_end();
> +QDECREF(response);
> +}
> diff --git a/tests/libqtest.h b/tests/libqtest.h
> index 2c9962d..43ffadd 100644
> --- a/tests/libqtest.h
> +++ b/tests/libqtest.h
> @@ -917,4 +917,12 @@ void qmp_fd_send(int fd, const char *fmt, ...);
>  QDict *qmp_fdv(int fd, const char *fmt, va_list ap);
>  QDict *qmp_fd(int fd, const char *fmt, ...);
>  
> +/**
> + * qtest_cb_for_every_machine:
> + * @cb: Pointer to the callback function
> + *
> + *  Call a callback function for every name of all available machines.
> + */
> +void qtest_cb_for_every_machine(void (*cb)(const char *machine));
> +
>  #endif
> diff --git a/tests/pc-cpu-test.c b/tests/pc-cpu-test.c
> index c3a2633..c4211a4 100644
> --- a/tests/pc-cpu-test.c
> +++ b/tests/pc-cpu-test.c
> @@ -79,69 +79,46 @@ static void test_data_free(gpointer data)
>  g_free(pc);
>  }
>  
> -static void add_pc_test_cases(void)
> +static void add_pc_test_case(const char *mname)
>  {
> -QDict *response, *minfo;
> -QList *list;
> -const QListEntry *p;
> -QObject *qobj;
> -QString *qstr;
> -const char *mname;
>  char *path;
>  PCTestData *data;
>  
> -qtest_start("-machine none");
> -response = qmp("{ 'execute': 'query-machines' }");
> -g_assert(response);
> -list = qdict_get_qlist(response, "return");
> -g_assert(list);
> -
> -for (p = qlist_first(list); p; p = qlist_next(p)) {
> -minfo = qobject_to_qdict(qlist_entry_obj(p));
> -g_assert(minfo);
> -qobj = qdict_get(minfo, "name");
> -g_assert(qobj);
> -qstr = qobject_to_qstring(qobj);
> -g_assert(qstr);
> -mname = qstring_get_str(qstr);
> -if (!g_str_has_prefix(mname, "pc-")) {
> -continue;
> -}
> -data = g_malloc(sizeof(PCTestData));
> -data->machine = g_strdup(mname);
> -data->cpu_model = "Haswell"; /* 1.3+ theoretically */
> -data->sockets = 1;
> -data->cores = 3;
> -data->threads = 2;
> -data->maxcpus = data->sockets * data->cores * data->threads * 2;
> -if (g_str_has_suffix(mname, "-1.4") ||
> -(strcmp(mname, "pc-1.3") == 0) ||
> -(strcmp(mname, "pc-1.2") == 0) ||
> -(strcmp(mname, "pc-1.1") == 0) ||
> -(strcmp(mname, "pc-1.0") == 0) ||
> -(strcmp(mname, "pc-0.15") == 0) ||
> -(strcmp(mname, "pc-0.14") == 0) ||
> -(strcmp(mname, "pc-0.13") == 0) ||
> -(strcmp(mname, "pc-0.12") == 0) ||
> -(strcmp(mname, "pc-0.11") == 0) ||
> -(strcmp(mname, "pc-0.10") == 0)) {
> -path = g_strdup_printf("cpu/%s/init/%ux%ux%u&maxcpus=%u",
> -   mname, data->sockets, data->cores,
> -   data->threads, data->maxcpus)

[Qemu-devel] [PATCH] scsi-generic: Fill in opt_xfer_len in INQUIRY reply if it is zero

2017-03-27 Thread Fam Zheng

When opt_xfer_len is zero, Linux ignores max_xfer_len erroneously.

While that obviously should be fixed, we do older guests a favor to
always filling in a value.

Signed-off-by: Fam Zheng 
---
 hw/scsi/scsi-generic.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
index 2933119..a55ff87 100644
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -237,9 +237,8 @@ static void scsi_read_complete(void * opaque, int ret)
 assert(max_transfer);
 stl_be_p(&r->buf[8], max_transfer);
 /* Also take care of the opt xfer len. */
-if (ldl_be_p(&r->buf[12]) > max_transfer) {
-stl_be_p(&r->buf[12], max_transfer);
-}
+stl_be_p(&r->buf[12],
+ MIN_NON_ZERO(max_transfer, ldl_be_p(&r->buf[12])));
 }
 scsi_req_data(&r->req, len);
 scsi_req_unref(&r->req);
-- 
2.9.3

[Qemu-devel] [PATCH] i386: Allow monitor / mwait cpuid override

2017-03-27 Thread Alexander Graf

KVM allows trap and emulate (read: NOP) of the MONITOR and MWAIT
instructions. There is work undergoing to enable actual execution
of these inside of KVM, but nobody really wants to expose the feature
to the guest by default, as it would eat up all of the host CPU.

So today there is no streamlined way to actually notify the guest that
it's ok to execute MONITOR / MWAIT, even when we want to explicitly
leave the guest in guest context.

This patch adds a new -cpu parameter called "mwait" which - when
enabled - force enables the MONITOR / MWAIT CPUID flag, even when
the underlying accel framework does not explicitly advertise support.

With that in place, we can explicitly allow users to specify that
they want have the guest execute MONITOR / MWAIT in its idle loop.

Signed-off-by: Alexander Graf 
---
 target/i386/cpu.c | 5 +
 target/i386/cpu.h | 1 +
 2 files changed, 6 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 7aa7622..c44020b 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3460,6 +3460,10 @@ static int x86_cpu_filter_features(X86CPU *cpu)
 x86_cpu_get_supported_feature_word(w, false);
 uint32_t requested_features = env->features[w];
 env->features[w] &= host_feat;
+if (cpu->expose_monitor && (w == FEAT_1_ECX)) {
+/* Force monitor feature in */
+env->features[w] |= CPUID_EXT_MONITOR;
+}
 cpu->filtered_features[w] = requested_features & ~env->features[w];
 if (cpu->filtered_features[w]) {
 rv = 1;
@@ -3988,6 +3992,7 @@ static Property x86_cpu_properties[] = {
 DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true),
 DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
 DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true),
+DEFINE_PROP_BOOL("mwait", X86CPU, expose_monitor, false),
 DEFINE_PROP_UINT32("phys-bits", X86CPU, phys_bits, 0),
 DEFINE_PROP_BOOL("host-phys-bits", X86CPU, host_phys_bits, false),
 DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true),
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 07401ad..7400d00 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1214,6 +1214,7 @@ struct X86CPU {
 bool check_cpuid;
 bool enforce_cpuid;
 bool expose_kvm;
+bool expose_monitor;
 bool migratable;
 bool max_features; /* Enable all supported features automatically */
 uint32_t apic_id;
-- 
1.8.5.6

1 2 3 >

1 - 100 of 278 matches

Mail list logo