date:20210608

[PATCH v3 0/5] block-copy: protect block-copy internal structures

2021-06-08 Thread Emanuele Giuseppe Esposito

This serie of patches aims to reduce the usage of the
AioContexlock in block-copy, by introducing smaller granularity
locks thus on making the block layer thread safe. 

This serie depends on my previous serie that brings thread safety
to the smaller API used by block-copy, like ratelimit, progressmeter
abd co-shared-resource.

What's missing for block-copy to be fully thread-safe is fixing
the CoSleep API to allow cross-thread sleep and wakeup.
Paolo is working on it.

Patch 1 introduces the .method field in BlockCopyState, to be used
instead of .use_copy_range, .copy_size and .zeros.
Patch 2-3 provide comments and refactoring in preparation to
the lock added in patch 4 on BlockCopyTask, BlockCopyCallState and
BlockCopyState. Patch 5 uses load_acquire/store_release to make sure
BlockCopyCallState OUT fields are updated before finished is set to
true. 

Based-on: <20210518094058.25952-1-eespo...@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito 
---
v3:
* Use a single lock instead of two [Paolo, Vladimir]
* Extend lock to protect also BdrvDirtyBitmap API [Vladimir]
* Drop patch 6 (set .method as atomic) since with current refactoring
  it can be simply included in the near critical sections protected by
  the lock

Emanuele Giuseppe Esposito (4):
  block-copy: improve comments of BlockCopyTask and BlockCopyState types
and functions
  block-copy: move progress_set_remaining in block_copy_task_end
  block-copy: add a CoMutex
  block-copy: atomic .cancelled and .finished fields in
BlockCopyCallState

Paolo Bonzini (1):
  block-copy: streamline choice of copy_range vs. read/write

 block/block-copy.c | 319 +
 1 file changed, 181 insertions(+), 138 deletions(-)

-- 
2.30.2

[PATCH v3 1/5] block-copy: streamline choice of copy_range vs. read/write

2021-06-08 Thread Emanuele Giuseppe Esposito

From: Paolo Bonzini 

Put the logic to determine the copy size in a separate function, so
that there is a simple state machine for the possible methods of
copying data from one BlockDriverState to the other.

Use .method instead of .copy_range as in-out argument, and
include also .zeroes as an additional copy method.

While at it, store the common computation of block_copy_max_transfer
into a new field of BlockCopyState, and make sure that we always
obey max_transfer; that's more efficient even for the
COPY_RANGE_READ_WRITE case.

Signed-off-by: Emanuele Giuseppe Esposito 
Signed-off-by: Paolo Bonzini 
---
 block/block-copy.c | 171 ++---
 1 file changed, 85 insertions(+), 86 deletions(-)

diff --git a/block/block-copy.c b/block/block-copy.c
index 943e30b7e6..d58051288b 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -28,6 +28,14 @@
 #define BLOCK_COPY_MAX_WORKERS 64
 #define BLOCK_COPY_SLICE_TIME 1ULL /* ns */
 
+typedef enum {
+COPY_READ_WRITE_CLUSTER,
+COPY_READ_WRITE,
+COPY_WRITE_ZEROES,
+COPY_RANGE_SMALL,
+COPY_RANGE_FULL
+} BlockCopyMethod;
+
 static coroutine_fn int block_copy_task_entry(AioTask *task);
 
 typedef struct BlockCopyCallState {
@@ -64,8 +72,7 @@ typedef struct BlockCopyTask {
 BlockCopyCallState *call_state;
 int64_t offset;
 int64_t bytes;
-bool zeroes;
-bool copy_range;
+BlockCopyMethod method;
 QLIST_ENTRY(BlockCopyTask) list;
 CoQueue wait_queue; /* coroutines blocked on this task */
 } BlockCopyTask;
@@ -86,8 +93,8 @@ typedef struct BlockCopyState {
 BdrvDirtyBitmap *copy_bitmap;
 int64_t in_flight_bytes;
 int64_t cluster_size;
-bool use_copy_range;
-int64_t copy_size;
+BlockCopyMethod method;
+int64_t max_transfer;
 uint64_t len;
 QLIST_HEAD(, BlockCopyTask) tasks; /* All tasks from all block-copy calls 
*/
 QLIST_HEAD(, BlockCopyCallState) calls;
@@ -149,6 +156,24 @@ static bool coroutine_fn 
block_copy_wait_one(BlockCopyState *s, int64_t offset,
 return true;
 }
 
+static int64_t block_copy_chunk_size(BlockCopyState *s)
+{
+switch (s->method) {
+case COPY_READ_WRITE_CLUSTER:
+return s->cluster_size;
+case COPY_READ_WRITE:
+case COPY_RANGE_SMALL:
+return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER),
+   s->max_transfer);
+case COPY_RANGE_FULL:
+return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
+   s->max_transfer);
+default:
+/* Cannot have COPY_WRITE_ZEROES here.  */
+abort();
+}
+}
+
 /*
  * Search for the first dirty area in offset/bytes range and create task at
  * the beginning of it.
@@ -158,8 +183,9 @@ static BlockCopyTask *block_copy_task_create(BlockCopyState 
*s,
  int64_t offset, int64_t bytes)
 {
 BlockCopyTask *task;
-int64_t max_chunk = MIN_NON_ZERO(s->copy_size, call_state->max_chunk);
+int64_t max_chunk = block_copy_chunk_size(s);
 
+max_chunk = MIN_NON_ZERO(max_chunk, call_state->max_chunk);
 if (!bdrv_dirty_bitmap_next_dirty_area(s->copy_bitmap,
offset, offset + bytes,
max_chunk, &offset, &bytes))
@@ -183,7 +209,7 @@ static BlockCopyTask *block_copy_task_create(BlockCopyState 
*s,
 .call_state = call_state,
 .offset = offset,
 .bytes = bytes,
-.copy_range = s->use_copy_range,
+.method = s->method,
 };
 qemu_co_queue_init(&task->wait_queue);
 QLIST_INSERT_HEAD(&s->tasks, task, list);
@@ -267,28 +293,27 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, 
BdrvChild *target,
 .len = bdrv_dirty_bitmap_size(copy_bitmap),
 .write_flags = write_flags,
 .mem = shres_create(BLOCK_COPY_MAX_MEM),
+.max_transfer = QEMU_ALIGN_DOWN(block_copy_max_transfer(source, target)
+, cluster_size),
 };
 
-if (block_copy_max_transfer(source, target) < cluster_size) {
+if (s->max_transfer < cluster_size) {
 /*
  * copy_range does not respect max_transfer. We don't want to bother
  * with requests smaller than block-copy cluster size, so fallback to
  * buffered copying (read and write respect max_transfer on their
  * behalf).
  */
-s->use_copy_range = false;
-s->copy_size = cluster_size;
+s->method = COPY_READ_WRITE_CLUSTER;
 } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) {
 /* Compression supports only cluster-size writes and no copy-range. */
-s->use_copy_range = false;
-s->copy_size = cluster_size;
+s->method = COPY_READ_WRITE_CLUSTER;
 } else {
 /*
  * We enable copy-range, but keep small copy_size, until first
  * successful copy_range (look at block_copy_do_copy).
  */
-s->

[PATCH v3 4/5] block-copy: add a CoMutex

2021-06-08 Thread Emanuele Giuseppe Esposito

Add a CoMutex to protect concurrent access of block-copy
data structures.

This mutex also protects .copy_bitmap, because its thread-safe
API does not prevent it from assigning two tasks to the same
bitmap region.

.finished, .cancelled and reads to .ret and .error_is_read will be
protected in the following patch, because are used also outside
coroutines.

Also set block_copy_task_create as coroutine_fn because:
1) it is static and only invoked by coroutine functions
2) this patch introduces and uses a CoMutex lock there

Signed-off-by: Emanuele Giuseppe Esposito 
---
 block/block-copy.c | 82 ++
 1 file changed, 54 insertions(+), 28 deletions(-)

diff --git a/block/block-copy.c b/block/block-copy.c
index e2adb5b2ea..56f62913e4 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -61,6 +61,7 @@ typedef struct BlockCopyCallState {
 
 /* OUT parameters */
 bool cancelled;
+/* Fields protected by lock in BlockCopyState */
 bool error_is_read;
 int ret;
 } BlockCopyCallState;
@@ -78,7 +79,7 @@ typedef struct BlockCopyTask {
 int64_t bytes; /* only re-set in task_shrink, before running the task */
 BlockCopyMethod method; /* initialized in block_copy_dirty_clusters() */
 
-/* State */
+/* State. Protected by lock in BlockCopyState */
 CoQueue wait_queue; /* coroutines blocked on this task */
 
 /* To reference all call states from BlockCopyState */
@@ -99,7 +100,8 @@ typedef struct BlockCopyState {
 BdrvChild *source;
 BdrvChild *target;
 
-/* State */
+/* State. Protected by lock */
+CoMutex lock;
 int64_t in_flight_bytes;
 BlockCopyMethod method;
 QLIST_HEAD(, BlockCopyTask) tasks; /* All tasks from all block-copy calls 
*/
@@ -139,8 +141,10 @@ typedef struct BlockCopyState {
 bool skip_unallocated;
 } BlockCopyState;
 
-static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
-int64_t offset, int64_t bytes)
+/* Called with lock held */
+static BlockCopyTask *find_conflicting_task_locked(BlockCopyState *s,
+   int64_t offset,
+   int64_t bytes)
 {
 BlockCopyTask *t;
 
@@ -160,18 +164,22 @@ static BlockCopyTask 
*find_conflicting_task(BlockCopyState *s,
 static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t offset,
  int64_t bytes)
 {
-BlockCopyTask *task = find_conflicting_task(s, offset, bytes);
+BlockCopyTask *task;
+
+QEMU_LOCK_GUARD(&s->lock);
+task = find_conflicting_task_locked(s, offset, bytes);
 
 if (!task) {
 return false;
 }
 
-qemu_co_queue_wait(&task->wait_queue, NULL);
+qemu_co_queue_wait(&task->wait_queue, &s->lock);
 
 return true;
 }
 
-static int64_t block_copy_chunk_size(BlockCopyState *s)
+/* Called with lock held */
+static int64_t block_copy_chunk_size_locked(BlockCopyState *s)
 {
 switch (s->method) {
 case COPY_READ_WRITE_CLUSTER:
@@ -193,14 +201,16 @@ static int64_t block_copy_chunk_size(BlockCopyState *s)
  * Search for the first dirty area in offset/bytes range and create task at
  * the beginning of it.
  */
-static BlockCopyTask *block_copy_task_create(BlockCopyState *s,
- BlockCopyCallState *call_state,
- int64_t offset, int64_t bytes)
+static coroutine_fn BlockCopyTask *block_copy_task_create(BlockCopyState *s,
+BlockCopyCallState *call_state,
+int64_t offset, int64_t bytes)
 {
 BlockCopyTask *task;
-int64_t max_chunk = block_copy_chunk_size(s);
+int64_t max_chunk;
 
-max_chunk = MIN_NON_ZERO(max_chunk, call_state->max_chunk);
+QEMU_LOCK_GUARD(&s->lock);
+max_chunk = MIN_NON_ZERO(block_copy_chunk_size_locked(s),
+ call_state->max_chunk);
 if (!bdrv_dirty_bitmap_next_dirty_area(s->copy_bitmap,
offset, offset + bytes,
max_chunk, &offset, &bytes))
@@ -212,7 +222,7 @@ static BlockCopyTask *block_copy_task_create(BlockCopyState 
*s,
 bytes = QEMU_ALIGN_UP(bytes, s->cluster_size);
 
 /* region is dirty, so no existent tasks possible in it */
-assert(!find_conflicting_task(s, offset, bytes));
+assert(!find_conflicting_task_locked(s, offset, bytes));
 
 bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
 s->in_flight_bytes += bytes;
@@ -248,16 +258,19 @@ static void coroutine_fn 
block_copy_task_shrink(BlockCopyTask *task,
 
 assert(new_bytes > 0 && new_bytes < task->bytes);
 
-task->s->in_flight_bytes -= task->bytes - new_bytes;
-bdrv_set_dirty_bitmap(task->s->copy_bitmap,
-  task->offset + new_bytes, task->bytes

[PATCH v3 2/5] block-copy: improve comments of BlockCopyTask and BlockCopyState types and functions

2021-06-08 Thread Emanuele Giuseppe Esposito

As done in BlockCopyCallState, categorize BlockCopyTask
and BlockCopyState in IN, State and OUT fields.
This is just to understand which field has to be protected with a lock.

.sleep_state is handled in the series "coroutine: new sleep/wake API"
and thus here left as TODO.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 block/block-copy.c | 47 ++
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/block/block-copy.c b/block/block-copy.c
index d58051288b..b3533a3003 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -56,25 +56,33 @@ typedef struct BlockCopyCallState {
 QLIST_ENTRY(BlockCopyCallState) list;
 
 /* State */
-int ret;
 bool finished;
-QemuCoSleep sleep;
-bool cancelled;
+QemuCoSleep sleep; /* TODO: protect API with a lock */
 
 /* OUT parameters */
+bool cancelled;
 bool error_is_read;
+int ret;
 } BlockCopyCallState;
 
 typedef struct BlockCopyTask {
 AioTask task;
 
+/*
+ * IN parameters. Initialized in block_copy_task_create()
+ * and never changed.
+ */
 BlockCopyState *s;
 BlockCopyCallState *call_state;
 int64_t offset;
-int64_t bytes;
-BlockCopyMethod method;
-QLIST_ENTRY(BlockCopyTask) list;
+int64_t bytes; /* only re-set in task_shrink, before running the task */
+BlockCopyMethod method; /* initialized in block_copy_dirty_clusters() */
+
+/* State */
 CoQueue wait_queue; /* coroutines blocked on this task */
+
+/* To reference all call states from BlockCopyState */
+QLIST_ENTRY(BlockCopyTask) list;
 } BlockCopyTask;
 
 static int64_t task_end(BlockCopyTask *task)
@@ -90,15 +98,25 @@ typedef struct BlockCopyState {
  */
 BdrvChild *source;
 BdrvChild *target;
-BdrvDirtyBitmap *copy_bitmap;
+
+/* State */
 int64_t in_flight_bytes;
-int64_t cluster_size;
 BlockCopyMethod method;
-int64_t max_transfer;
-uint64_t len;
 QLIST_HEAD(, BlockCopyTask) tasks; /* All tasks from all block-copy calls 
*/
 QLIST_HEAD(, BlockCopyCallState) calls;
+/* State fields that use a thread-safe API */
+BdrvDirtyBitmap *copy_bitmap;
+ProgressMeter *progress;
+SharedResource *mem;
+RateLimit rate_limit;
 
+/*
+ * IN parameters. Initialized in block_copy_state_new()
+ * and never changed.
+ */
+int64_t cluster_size;
+int64_t max_transfer;
+uint64_t len;
 BdrvRequestFlags write_flags;
 
 /*
@@ -114,14 +132,11 @@ typedef struct BlockCopyState {
  * In this case, block_copy() will query the source’s allocation status,
  * skip unallocated regions, clear them in the copy_bitmap, and invoke
  * block_copy_reset_unallocated() every time it does.
+ *
+ * This field is set in backup_run() before coroutines are run,
+ * therefore is an IN.
  */
 bool skip_unallocated;
-
-ProgressMeter *progress;
-
-SharedResource *mem;
-
-RateLimit rate_limit;
 } BlockCopyState;
 
 static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
-- 
2.30.2

[PATCH v3 5/5] block-copy: atomic .cancelled and .finished fields in BlockCopyCallState

2021-06-08 Thread Emanuele Giuseppe Esposito

By adding acquire/release pairs, we ensure that .ret and .error_is_read
fields are written by block_copy_dirty_clusters before .finished is true.

The atomic here are necessary because the fields are concurrently modified
also outside coroutines.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 block/block-copy.c | 33 ++---
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/block/block-copy.c b/block/block-copy.c
index 56f62913e4..55b6ce6a57 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -56,11 +56,11 @@ typedef struct BlockCopyCallState {
 QLIST_ENTRY(BlockCopyCallState) list;
 
 /* State */
-bool finished;
+bool finished; /* atomic */
 QemuCoSleep sleep; /* TODO: protect API with a lock */
 
 /* OUT parameters */
-bool cancelled;
+bool cancelled; /* atomic */
 /* Fields protected by lock in BlockCopyState */
 bool error_is_read;
 int ret;
@@ -648,7 +648,8 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state)
 assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
 assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
 
-while (bytes && aio_task_pool_status(aio) == 0 && !call_state->cancelled) {
+while (bytes && aio_task_pool_status(aio) == 0 &&
+   !qatomic_read(&call_state->cancelled)) {
 BlockCopyTask *task;
 int64_t status_bytes;
 
@@ -758,7 +759,7 @@ static int coroutine_fn 
block_copy_common(BlockCopyCallState *call_state)
 do {
 ret = block_copy_dirty_clusters(call_state);
 
-if (ret == 0 && !call_state->cancelled) {
+if (ret == 0 && !qatomic_read(&call_state->cancelled)) {
 ret = block_copy_wait_one(call_state->s, call_state->offset,
   call_state->bytes);
 }
@@ -772,9 +773,9 @@ static int coroutine_fn 
block_copy_common(BlockCopyCallState *call_state)
  * 2. We have waited for some intersecting block-copy request
  *It may have failed and produced new dirty bits.
  */
-} while (ret > 0 && !call_state->cancelled);
+} while (ret > 0 && !qatomic_read(&call_state->cancelled));
 
-call_state->finished = true;
+qatomic_store_release(&call_state->finished, true);
 
 if (call_state->cb) {
 call_state->cb(call_state->cb_opaque);
@@ -837,35 +838,37 @@ void block_copy_call_free(BlockCopyCallState *call_state)
 return;
 }
 
-assert(call_state->finished);
+assert(qatomic_load_acquire(&call_state->finished));
 g_free(call_state);
 }
 
 bool block_copy_call_finished(BlockCopyCallState *call_state)
 {
-return call_state->finished;
+return qatomic_load_acquire(&call_state->finished);
 }
 
 bool block_copy_call_succeeded(BlockCopyCallState *call_state)
 {
-return call_state->finished && !call_state->cancelled &&
-call_state->ret == 0;
+return qatomic_load_acquire(&call_state->finished) &&
+   !qatomic_read(&call_state->cancelled) &&
+   call_state->ret == 0;
 }
 
 bool block_copy_call_failed(BlockCopyCallState *call_state)
 {
-return call_state->finished && !call_state->cancelled &&
-call_state->ret < 0;
+return qatomic_load_acquire(&call_state->finished) &&
+   !qatomic_read(&call_state->cancelled) &&
+   call_state->ret < 0;
 }
 
 bool block_copy_call_cancelled(BlockCopyCallState *call_state)
 {
-return call_state->cancelled;
+return qatomic_read(&call_state->cancelled);
 }
 
 int block_copy_call_status(BlockCopyCallState *call_state, bool *error_is_read)
 {
-assert(call_state->finished);
+assert(qatomic_load_acquire(&call_state->finished));
 if (error_is_read) {
 *error_is_read = call_state->error_is_read;
 }
@@ -874,7 +877,7 @@ int block_copy_call_status(BlockCopyCallState *call_state, 
bool *error_is_read)
 
 void block_copy_call_cancel(BlockCopyCallState *call_state)
 {
-call_state->cancelled = true;
+qatomic_set(&call_state->cancelled, true);
 block_copy_kick(call_state);
 }
 
-- 
2.30.2

[PATCH v3 3/5] block-copy: move progress_set_remaining in block_copy_task_end

2021-06-08 Thread Emanuele Giuseppe Esposito

Moving this function in task_end ensures to update the progress
anyways, even if there is an error.

It also helps in next patch, allowing task_end to have only
one critical section.

Reviewed-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Emanuele Giuseppe Esposito 
---
 block/block-copy.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/block/block-copy.c b/block/block-copy.c
index b3533a3003..e2adb5b2ea 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -263,6 +263,9 @@ static void coroutine_fn block_copy_task_end(BlockCopyTask 
*task, int ret)
 bdrv_set_dirty_bitmap(task->s->copy_bitmap, task->offset, task->bytes);
 }
 QLIST_REMOVE(task, list);
+progress_set_remaining(task->s->progress,
+   bdrv_get_dirty_count(task->s->copy_bitmap) +
+   task->s->in_flight_bytes);
 qemu_co_queue_restart_all(&task->wait_queue);
 }
 
@@ -647,9 +650,6 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state)
 }
 if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) {
 block_copy_task_end(task, 0);
-progress_set_remaining(s->progress,
-   bdrv_get_dirty_count(s->copy_bitmap) +
-   s->in_flight_bytes);
 trace_block_copy_skip_range(s, task->offset, task->bytes);
 offset = task_end(task);
 bytes = end - offset;
-- 
2.30.2

Re: [PATCH] s390x/kvm: remove unused gs handling

2021-06-08 Thread Cornelia Huck

On Wed, Jun 02 2021, Cornelia Huck  wrote:

> With commit 0280b3eb7c05 ("s390x/kvm: use cpu model for gscb on
> compat machines"), we removed any calls to kvm_s390_get_gs()
> in favour of a different mechanism.
>
> Let's remove the unused kvm_s390_get_gs(), and with it the now
> unneeded cap_gs as well.
>
> Signed-off-by: Cornelia Huck 
> ---
>  target/s390x/kvm-stub.c  |  5 -
>  target/s390x/kvm.c   | 10 +-
>  target/s390x/kvm_s390x.h |  1 -
>  3 files changed, 1 insertion(+), 15 deletions(-)

Queued to s390-next.

Re: [PATCH] configure: Check whether we can compile the s390-ccw bios with -msoft-float

2021-06-08 Thread Thomas Huth


On 25/05/2021 17.13, Philippe Mathieu-Daudé wrote:

On 5/25/21 4:40 PM, Thomas Huth wrote:

On 25/05/2021 16.31, Cornelia Huck wrote:

On Tue, 25 May 2021 16:20:32 +0200
Thomas Huth  wrote:


The -msoft-float switch is not available in older versions of Clang.
Since we rely on the compiler to not generate floating point
instructions
unexpectedly, we block those old compilers now via a test in the
configure
script. Note that for some weird reasons, the Clang compiler only
complains
about the missing soft-float support if no other flags are passed via
"-Wl,..." to the linker. So we have to use "compile_object" instead of
"compile_prog" for this check.


Ugh.


It's maybe better to use compile_object for testing -msoft-float anyway
since it could influence the way of linking against libraries (if I get
https://reviews.llvm.org/D72189 right).



Signed-off-by: Thomas Huth 
---
   configure | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index 676239c697..673419ff31 100755
--- a/configure
+++ b/configure
@@ -5462,7 +5462,7 @@ if test "$cpu" = "s390x" ; then
     write_c_skeleton
     compile_prog "-march=z900" ""
     has_z900=$?
-  if [ $has_z900 = 0 ] || compile_prog "-march=z10" ""; then
+  if [ $has_z900 = 0 ] || compile_object "-march=z10 -msoft-float
-Werror"; then


Do you believe that we should have caught the various clang
specialties now?


Apart from one compiler warning that just popped up today, yes. I'm
seeing this compiler warning with Clang 12.0 now:

roms/SLOF/lib/libnet/ipv6.c:447:18: warning: variable length array
folded to constant array as an extension [-Wgnu-folding-constant]
     unsigned short raw[ip6size];
    ^

... not sure what to do with that one yet.


Fix it? =)

Same class as:
https://gitlab.freedesktop.org/slirp/libslirp/-/commit/6690d55626cc

-- >8 --
diff --git a/lib/libnet/ipv6.c b/lib/libnet/ipv6.c
index 6420004..b7daeda 100644
--- a/lib/libnet/ipv6.c
+++ b/lib/libnet/ipv6.c
@@ -441,10 +441,9 @@ static unsigned short ip6_checksum(struct ip6hdr
*ip6h, unsigned char *packet,
  {
 int i;
 unsigned long checksum;
-   const int ip6size = sizeof(struct ip6hdr)/sizeof(unsigned short);
 union {
 struct ip6hdr ip6h;
-   unsigned short raw[ip6size];
+   unsigned short raw[sizeof(struct ip6hdr) /
sizeof(unsigned short)];
 } pseudo;

 memcpy (&pseudo.ip6h, ip6h, sizeof(struct ip6hdr));
@@ -455,7 +454,7 @@ static unsigned short ip6_checksum(struct ip6hdr
*ip6h, unsigned char *packet,
 for (checksum = 0, i = 0; i < bytes; i += 2)
 checksum += (packet[i] << 8) | packet[i + 1];

-   for (i = 0; i < ip6size; i++)
+   for (i = 0; i < sizeof(pseudo.raw); i++)
 checksum += pseudo.raw[i];

 checksum = (checksum >> 16) + (checksum & 0x);
---

Do you want me to send the fix since I have it ready?


Yes, please!

 Thanks,
  Thomas

Re: [PATCH v4 1/6] blkdebug: refactor removal of a suspended request

2021-06-08 Thread Emanuele Giuseppe Esposito





On 07/06/2021 11:23, Paolo Bonzini wrote:

On 04/06/21 18:16, Eric Blake wrote:
On Fri, Jun 04, 2021 at 12:07:36PM +0200, Emanuele Giuseppe Esposito 
wrote:

Extract to a separate function.  Do not rely on FOREACH_SAFE, which is
only "safe" if the *current* node is removed---not if another node is
removed.  Instead, just walk the entire list from the beginning when
asked to resume all suspended requests with a given tag.
-    QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, next) {
+retry:
+    QLIST_FOREACH(r, &s->suspended_reqs, next) {
  if (!strcmp(r->tag, tag)) {
+    QLIST_REMOVE(r, next);


Isn't the whole point of using QLIST_FOREACH_SAFE the ability to call
QLIST_REMOVE on an element in that list while still iterating?


  qemu_coroutine_enter(r->co);
+    if (all) {
+    goto retry;
+    }
  return 0;


Oh, I see - you abandon the iteration in all control flow paths, so
the simpler loop is still okay.  Still, this confused me enough on
first read that it may be worth a comment, maybe:

/* No need for _SAFE, because iteration stops on first hit */


This is a bit confusing too because it sounds like not using _SAFE is an 
optimization, but it's actually wrong (see commit message).




What about:

/* No need for _SAFE, since a different coroutine can remove another 
node (not the current one) in this list, and when the current one is 
removed the iteration starts back from beginning anyways. */


Alternatively, no comment at all.

Thank you,
Emanuele

Re: [PATCH] docs/tools/virtiofsd.rst: Do not hard-code the QEMU binary name

2021-06-08 Thread Stefan Hajnoczi

On Mon, Jun 07, 2021 at 07:42:50PM +0200, Thomas Huth wrote:
> In downstream, we want to use a different name for the QEMU binary,
> and some people might also use the docs for non-x86 binaries, that's
> why we already created the |qemu_system| placeholder in the past.
> Use it now in the virtiofsd doc, too.
> 
> Signed-off-by: Thomas Huth 
> ---
>  docs/tools/virtiofsd.rst | 14 +++---
>  1 file changed, 7 insertions(+), 7 deletions(-)

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature

Re: [PATCH v16 04/99] qtest/arm-cpu-features: Use generic qtest_has_accel() to check for KVM

2021-06-08 Thread Philippe Mathieu-Daudé

On 6/7/21 3:22 PM, Thomas Huth wrote:
> On 04/06/2021 17.51, Alex Bennée wrote:
>> From: Philippe Mathieu-Daudé 
>>
>> Use the recently added generic qtest_has_accel() method to
>> check if KVM is available.
>>
>> Suggested-by: Claudio Fontana 
>> Reviewed-by: Andrew Jones 
>> Reviewed-by: Alex Bennée 
>> Signed-off-by: Philippe Mathieu-Daudé 
>> Signed-off-by: Alex Bennée 
>> Message-Id: <20210505125806.1263441-5-phi...@redhat.com>
>> ---
>>   tests/qtest/arm-cpu-features.c | 25 +
>>   1 file changed, 1 insertion(+), 24 deletions(-)
>>
>> diff --git a/tests/qtest/arm-cpu-features.c
>> b/tests/qtest/arm-cpu-features.c
>> index 8252b85bb8..7f4b252127 100644
>> --- a/tests/qtest/arm-cpu-features.c
>> +++ b/tests/qtest/arm-cpu-features.c
>> @@ -26,21 +26,6 @@
>>   "  'arguments': { 'type': 'full', "
>>   #define QUERY_TAIL  "}}"
>>   -static bool kvm_enabled(QTestState *qts)
>> -{
>> -    QDict *resp, *qdict;
>> -    bool enabled;
>> -
>> -    resp = qtest_qmp(qts, "{ 'execute': 'query-kvm' }");
>> -    g_assert(qdict_haskey(resp, "return"));
>> -    qdict = qdict_get_qdict(resp, "return");
>> -    g_assert(qdict_haskey(qdict, "enabled"));
>> -    enabled = qdict_get_bool(qdict, "enabled");
>> -    qobject_unref(resp);
>> -
>> -    return enabled;
>> -}
>> -
>>   static QDict *do_query_no_props(QTestState *qts, const char *cpu_type)
>>   {
>>   return qtest_qmp(qts, QUERY_HEAD "'model': { 'name': %s }"
>> @@ -493,14 +478,6 @@ static void
>> test_query_cpu_model_expansion_kvm(const void *data)
>>     qts = qtest_init(MACHINE_KVM "-cpu max");
>>   -    /*
>> - * These tests target the 'host' CPU type, so KVM must be enabled.
>> - */
>> -    if (!kvm_enabled(qts)) {
>> -    qtest_quit(qts);
>> -    return;
>> -    }
>> -
>>   /* Enabling and disabling kvm-no-adjvtime should always work. */
>>   assert_has_feature_disabled(qts, "host", "kvm-no-adjvtime");
>>   assert_set_feature(qts, "host", "kvm-no-adjvtime", true);
>> @@ -624,7 +601,7 @@ int main(int argc, char **argv)
>>    * order avoid attempting to run an AArch32 QEMU with KVM on
>>    * AArch64 hosts. That won't work and isn't easy to detect.
>>    */
>> -    if (g_str_equal(qtest_get_arch(), "aarch64")) {
>> +    if (g_str_equal(qtest_get_arch(), "aarch64") &&
>> qtest_has_accel("kvm")) {
>>   qtest_add_data_func("/arm/kvm/query-cpu-model-expansion",
>>   NULL, test_query_cpu_model_expansion_kvm);
> 
> I think this is wrong: query-kvm checks whether kvm is *enabled*, while
> your new function only checks whether kvm has been built into the
> binary. There is still the possibility that kvm has been built into the
> binary, but is not available on the host, so in that case the test will
> fail now.
> 
> Thus please drop / rework this patch.

Indeed, this is unfortunate :(

Re: [PATCH v16 06/99] qtest/arm-cpu-features: Remove TCG fallback to KVM specific tests

2021-06-08 Thread Philippe Mathieu-Daudé

On 6/7/21 3:28 PM, Thomas Huth wrote:
> On 04/06/2021 17.51, Alex Bennée wrote:
>> From: Philippe Mathieu-Daudé 
>>
>> sve_tests_sve_off_kvm() and test_query_cpu_model_expansion_kvm()
>> tests are now only being run if KVM is available. Drop the TCG
>> fallback.
>>
>> Suggested-by: Andrew Jones 
>> Reviewed-by: Andrew Jones 
>> Reviewed-by: Alex Bennée 
>> Signed-off-by: Philippe Mathieu-Daudé 
>> Signed-off-by: Alex Bennée 
>> Message-Id: <20210505125806.1263441-7-phi...@redhat.com>
>> ---
>>   tests/qtest/arm-cpu-features.c | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/tests/qtest/arm-cpu-features.c
>> b/tests/qtest/arm-cpu-features.c
>> index 66300c3bc2..b1d406542f 100644
>> --- a/tests/qtest/arm-cpu-features.c
>> +++ b/tests/qtest/arm-cpu-features.c
>> @@ -21,7 +21,7 @@
>>   #define SVE_MAX_VQ 16
>>     #define MACHINE "-machine virt,gic-version=max -accel tcg "
>> -#define MACHINE_KVM "-machine virt,gic-version=max -accel kvm -accel
>> tcg "
>> +#define MACHINE_KVM "-machine virt,gic-version=max -accel kvm "
> 
> Same comment as with patch 04/99: I think this is wrong. You're mixing
> up whether an accelerator has been built into the binary with the fact
> whether an accelerator is available and usable. There are plenty of
> cases where e.g. kvm is built into the binary but not usable during
> runtime, e.g. because:
> 1) The kernel does not support it
> 2) The current host CPU does not support it
> 3) There are problems with the permission to /dev/kvm
> etc.

Yes.

> I think we either need the fallback mechanism to tcg

No, this is precisely what we want to test.

> or you need to
> properly check whether KVM is usable, too.

Yes.

Re: [RFC PATCH 0/7] Support protection keys in an AMD EPYC-Milan VM

2021-06-08 Thread David Edmondson

On Thursday, 2021-05-20 at 15:56:40 +01, David Edmondson wrote:

> AMD EPYC-Milan CPUs introduced support for protection keys, previously
> available only with Intel CPUs.
>
> AMD chose to place the XSAVE state component for the protection keys
> at a different offset in the XSAVE state area than that chosen by
> Intel.
>
> To accommodate this, modify QEMU to behave appropriately on AMD
> systems, allowing a VM to properly take advantage of the new feature.
>
> Further, avoid manipulating XSAVE state components that are not
> present on AMD systems.
>
> The code in patch 6 that changes the CPUID 0x0d leaf is mostly dumped
> somewhere that seemed to work - I'm not sure where it really belongs.

Ping - any thoughts about this approach?

> David Edmondson (7):
>   target/i386: Declare constants for XSAVE offsets
>   target/i386: Use constants for XSAVE offsets
>   target/i386: Clarify the padding requirements of X86XSaveArea
>   target/i386: Prepare for per-vendor X86XSaveArea layout
>   target/i386: Introduce AMD X86XSaveArea sub-union
>   target/i386: Adjust AMD XSAVE PKRU area offset in CPUID leaf 0xd
>   target/i386: Manipulate only AMD XSAVE state on AMD
>
>  target/i386/cpu.c| 19 +
>  target/i386/cpu.h| 80 
>  target/i386/kvm/kvm.c| 57 +
>  target/i386/tcg/fpu_helper.c | 20 ++---
>  target/i386/xsave_helper.c   | 70 +++
>  5 files changed, 152 insertions(+), 94 deletions(-)
>
> -- 
> 2.30.2

dme.
-- 
You know your green from your red.

Re: [PATCH v16 08/99] qtest/migration-test: Skip tests if KVM not builtin on s390x/ppc64

2021-06-08 Thread Philippe Mathieu-Daudé

On 6/4/21 11:11 PM, Richard Henderson wrote:
> On 6/4/21 8:51 AM, Alex Bennée wrote:
>>   if (g_str_equal(qtest_get_arch(), "ppc64") &&
>>   (access("/sys/module/kvm_hv", F_OK) ||
>> - access("/dev/kvm", R_OK | W_OK))) {
>> + access("/dev/kvm", R_OK | W_OK) || !qtest_has_accel("kvm"))) {
>>   g_test_message("Skipping test: kvm_hv not available");
>>   return g_test_run();
>>   }
>> @@ -1398,7 +1398,7 @@ int main(int argc, char **argv)
>>    */
>>   if (g_str_equal(qtest_get_arch(), "s390x")) {
>>   #if defined(HOST_S390X)
>> -    if (access("/dev/kvm", R_OK | W_OK)) {
>> +    if (access("/dev/kvm", R_OK | W_OK) ||
>> !qtest_has_accel("kvm")) {
>>   g_test_message("Skipping test: kvm not available");
> 
> I would have sorted the kvm test first.

access() is a simple syscall from the qtest, while qtest_has_accel()
spawn a whole QEMU process to exec the QMP request.

> For s390x, we has the HOST test, but ppc doesn't.  So we're doing the
> access() on any host, e.g. x86_64, where kvm cannot true for this test.

Hmm I suppose the issue you described predate this patch?

Re: [PATCH 1/6] hyper-v: Overlay abstraction for synic event and msg pages

2021-06-08 Thread Alexander Graf




On 24.05.21 21:54, Siddharth Chandrasekaran wrote:

Capture overlay page semantic variables into 'struct overlay_page' and
add methods that operate over it. Adapt existing synic event and mesage
pages to use these methods to setup and manage overlays.

Since all overlay pages use bit 0 of the GPA to indicate if the overlay
is enabled, the checks for this bit is moved into the unified overlaying
method hyperv_overlay_update() so the caller does not need to care about
it.

Signed-off-by: Siddharth Chandrasekaran 


Reviewed-by: Alexander Graf 


Alex



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

Re: [PATCH 4/6] kvm/i386: Avoid multiple calls to check_extension(KVM_CAP_HYPERV)

2021-06-08 Thread Alexander Graf




On 24.05.21 21:54, Siddharth Chandrasekaran wrote:

KVM_CAP_HYPERV is a VM ioctl and can be cached at kvm_arch_init()
instead of performing an ioctl each time in hyperv_enabled() which is
called foreach vCPU. Apart from that, this variable will come in handy
in a subsequent patch.

Signed-off-by: Siddharth Chandrasekaran 


Reviewed-by: Alexander Graf 


Alex



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

Re: [PATCH 2/6] hyper-v: Use -1 as invalid overlay address

2021-06-08 Thread Alexander Graf




On 24.05.21 21:54, Siddharth Chandrasekaran wrote:

When managing overlay pages, we used hwaddr 0 to signal an invalid
address (to disable a page). Although unlikely, 0 _could_ be a valid
overlay offset as Hyper-V TLFS does not specify anything about it.

Use -1 as the invalid address indicator as it can never be a valid
address.

Signed-off-by: Siddharth Chandrasekaran 


Reviewed-by: Alexander Graf 


Alex



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

Re: [PATCH] docs/tools/virtiofsd: Fix bad rst syntax

2021-06-08 Thread Stefan Hajnoczi

On Mon, Jun 07, 2021 at 08:00:15PM +0200, Thomas Huth wrote:
> For literal blocks, there has to be an empty line after the two colons,
> and the block itself should be indented.
> 
> Signed-off-by: Thomas Huth 
> ---
>  docs/tools/virtiofsd.rst | 8 +---
>  1 file changed, 5 insertions(+), 3 deletions(-)

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature

[PATCH 2/7] Fix the qemu crash when guest shutdown during checkpoint

2021-06-08 Thread Zhang Chen

From: "Rao, Lei" 

This patch fixes the following:
qemu-system-x86_64: invalid runstate transition: 'colo' ->'shutdown'
Aborted (core dumped)

Signed-off-by: Lei Rao 
Reviewed-by: Li Zhijian 
Reviewed-by: Zhang Chen 
Reviewed-by: Lukas Straub 
Tested-by: Lukas Straub 
Signed-off-by: Zhang Chen 
---
 softmmu/runstate.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index ce8977c6a2..15640572c0 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -126,6 +126,7 @@ static const RunStateTransition runstate_transitions_def[] 
= {
 { RUN_STATE_RESTORE_VM, RUN_STATE_PRELAUNCH },
 
 { RUN_STATE_COLO, RUN_STATE_RUNNING },
+{ RUN_STATE_COLO, RUN_STATE_SHUTDOWN},
 
 { RUN_STATE_RUNNING, RUN_STATE_DEBUG },
 { RUN_STATE_RUNNING, RUN_STATE_INTERNAL_ERROR },
-- 
2.25.1

[PATCH 3/7] Optimize the function of filter_send

2021-06-08 Thread Zhang Chen

From: "Rao, Lei" 

The iov_size has been calculated in filter_send(). we can directly
return the size.In this way, this is no need to repeat calculations
in filter_redirector_receive_iov();

Signed-off-by: Lei Rao 
Reviewed-by: Li Zhijian 
Reviewed-by: Zhang Chen 
Reviewed-by: Lukas Straub 
Tested-by: Lukas Straub 
Signed-off-by: Zhang Chen 
---
 net/filter-mirror.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/filter-mirror.c b/net/filter-mirror.c
index f8e65007c0..f20240cc9f 100644
--- a/net/filter-mirror.c
+++ b/net/filter-mirror.c
@@ -88,7 +88,7 @@ static int filter_send(MirrorState *s,
 goto err;
 }
 
-return 0;
+return size;
 
 err:
 return ret < 0 ? ret : -EIO;
@@ -159,7 +159,7 @@ static ssize_t filter_mirror_receive_iov(NetFilterState *nf,
 int ret;
 
 ret = filter_send(s, iov, iovcnt);
-if (ret) {
+if (ret < 0) {
 error_report("filter mirror send failed(%s)", strerror(-ret));
 }
 
@@ -182,10 +182,10 @@ static ssize_t 
filter_redirector_receive_iov(NetFilterState *nf,
 
 if (qemu_chr_fe_backend_connected(&s->chr_out)) {
 ret = filter_send(s, iov, iovcnt);
-if (ret) {
+if (ret < 0) {
 error_report("filter redirector send failed(%s)", strerror(-ret));
 }
-return iov_size(iov, iovcnt);
+return ret;
 } else {
 return 0;
 }
-- 
2.25.1

[PATCH 0/7] COLO proxy patch

2021-06-08 Thread Zhang Chen

Hi Jason,
Please help me merge the COLO proxy patch to net branch.

Thanks
Chen

Rao, Lei (7):
  Remove some duplicate trace code.
  Fix the qemu crash when guest shutdown during checkpoint
  Optimize the function of filter_send
  Remove migrate_set_block_enabled in checkpoint
  Add a function named packet_new_nocopy for COLO.
  Add the function of colo_compare_cleanup
  Fixed calculation error of pkt->header_size in fill_pkt_tcp_info()

 migration/colo.c  |  6 --
 migration/migration.c |  4 
 net/colo-compare.c| 25 +++--
 net/colo-compare.h|  1 +
 net/colo.c| 25 +
 net/colo.h|  1 +
 net/filter-mirror.c   |  8 
 net/filter-rewriter.c |  3 +--
 net/net.c |  4 
 softmmu/runstate.c|  1 +
 10 files changed, 44 insertions(+), 34 deletions(-)

-- 
2.25.1

Re: [PATCH 3/6] kvm/i386: Stop using cpu->kvm_msr_buf in kvm_put_one_msr()

2021-06-08 Thread Alexander Graf




On 24.05.21 21:54, Siddharth Chandrasekaran wrote:

kvm_put_one_msr() zeros cpu->kvm_msr_buf and uses it to set one MSR to
KVM. It is pretty wasteful as cpu->kvm_msr_buf is 4096 bytes long;
instead use a local buffer to avoid memset.

Also, expose this method from kvm_i386.h as hyperv.c needs to set MSRs
in a subsequent patch.

Signed-off-by: Siddharth Chandrasekaran 


Reviewed-by: Alexander Graf 


Alex



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

[PATCH 4/7] Remove migrate_set_block_enabled in checkpoint

2021-06-08 Thread Zhang Chen

From: "Rao, Lei" 

We can detect disk migration in migrate_prepare, if disk migration
is enabled in COLO mode, we can directly report an error.and there
is no need to disable block migration at every checkpoint.

Signed-off-by: Lei Rao 
Signed-off-by: Zhang Chen 
Reviewed-by: Li Zhijian 
Reviewed-by: Zhang Chen 
Reviewed-by: Lukas Straub 
Tested-by: Lukas Straub 
---
 migration/colo.c  | 6 --
 migration/migration.c | 4 
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/migration/colo.c b/migration/colo.c
index e498fdb125..79fa1f6619 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -435,12 +435,6 @@ static int colo_do_checkpoint_transaction(MigrationState 
*s,
 if (failover_get_state() != FAILOVER_STATUS_NONE) {
 goto out;
 }
-
-/* Disable block migration */
-migrate_set_block_enabled(false, &local_err);
-if (local_err) {
-goto out;
-}
 qemu_mutex_lock_iothread();
 
 #ifdef CONFIG_REPLICATION
diff --git a/migration/migration.c b/migration/migration.c
index 1885860d7b..d059cf70b9 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2214,6 +2214,10 @@ static bool migrate_prepare(MigrationState *s, bool blk, 
bool blk_inc,
 }
 
 if (blk || blk_inc) {
+if (migrate_colo_enabled()) {
+error_setg(errp, "No disk migration is required in COLO mode");
+return false;
+}
 if (migrate_use_block() || migrate_use_block_incremental()) {
 error_setg(errp, "Command options are incompatible with "
"current migration capabilities");
-- 
2.25.1

[PATCH 1/7] Remove some duplicate trace code.

2021-06-08 Thread Zhang Chen

From: "Rao, Lei" 

There is the same trace code in the colo_compare_packet_payload.

Signed-off-by: Lei Rao 
Reviewed-by: Li Zhijian 
Reviewed-by: Zhang Chen 
Reviewed-by: Lukas Straub 
Tested-by: Lukas Straub 
Signed-off-by: Zhang Chen 
---
 net/colo-compare.c | 13 -
 1 file changed, 13 deletions(-)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index 9d1ad99941..c142c08dc6 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -590,19 +590,6 @@ static int colo_packet_compare_other(Packet *spkt, Packet 
*ppkt)
 uint16_t offset = ppkt->vnet_hdr_len;
 
 trace_colo_compare_main("compare other");
-if (trace_event_get_state_backends(TRACE_COLO_COMPARE_IP_INFO)) {
-char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
-
-strcpy(pri_ip_src, inet_ntoa(ppkt->ip->ip_src));
-strcpy(pri_ip_dst, inet_ntoa(ppkt->ip->ip_dst));
-strcpy(sec_ip_src, inet_ntoa(spkt->ip->ip_src));
-strcpy(sec_ip_dst, inet_ntoa(spkt->ip->ip_dst));
-
-trace_colo_compare_ip_info(ppkt->size, pri_ip_src,
-   pri_ip_dst, spkt->size,
-   sec_ip_src, sec_ip_dst);
-}
-
 if (ppkt->size != spkt->size) {
 trace_colo_compare_main("Other: payload size of packets are 
different");
 return -1;
-- 
2.25.1

Re: [PATCH v4 0/2] Gitlab: Add issue templates

2021-06-08 Thread Stefan Hajnoczi

On Mon, Jun 07, 2021 at 11:31:53AM -0400, John Snow wrote:
> Add "Bug" and "Feature Request" templates to the Gitlab interface to
> help improve the quality of newly reported issues.
> 
> To see what this looks like, I've temporarily allowed my Gitlab fork to
> diverge with these files merged.  See my fork's "new issue" page to see
> it in action: https://gitlab.com/jsnow/qemu/-/issues/new?issue
> 
> (It's outdated a bit for V4, but you get the idea.)
> 
> These patches do not add a "default" template, the user still has to
> select one from the list. I recommend that someone with permissions
> updates the default template:
> 
> 1. https://gitlab.com/qemu-project/qemu/edit
> 2. ctrl+f "Default description template for issues"
> 3. Update the default to the (suggested) below:
> 
> ```
> 
> ```
> 
> We can use this cover letter to discuss/review the wording on that
> default template which exists outside of repository data.
> 
> V4:
>  - Change the "build on master" to be more of a nudge than a mandate,
>with improved instructions (stefanha, danpb)
> 
> V3:
>  - Add pointer to https://www.qemu.org/download/#source
>  - Add pointer to https://www.qemu.org/contribute/security-process/
>  - Remove blurb covering tracing instructions.
> 
> V2:
> - Updated both templates based on feedback from Peter, Daniel, and
>   Thomas.
> 
> John Snow (2):
>   GitLab: Add "Bug" issue reporting template
>   GitLab: Add "Feature Request" issue template.
> 
>  .gitlab/issue_templates/bug.md | 64 ++
>  .gitlab/issue_templates/feature_request.md | 32 +++
>  2 files changed, 96 insertions(+)
>  create mode 100644 .gitlab/issue_templates/bug.md
>  create mode 100644 .gitlab/issue_templates/feature_request.md
> 
> -- 
> 2.31.1
> 
> 

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature

[PATCH 6/7] Add the function of colo_compare_cleanup

2021-06-08 Thread Zhang Chen

From: "Rao, Lei" 

This patch fixes the following:
#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
#1  0x7f6ae4559859 in __GI_abort () at abort.c:79
#2  0x559aaa386720 in error_exit (err=16, msg=0x559aaa5973d0 
<__func__.16227> "qemu_mutex_destroy") at util/qemu-thread-posix.c:36
#3  0x559aaa3868c5 in qemu_mutex_destroy (mutex=0x559aabffe828) at 
util/qemu-thread-posix.c:69
#4  0x559aaa2f93a8 in char_finalize (obj=0x559aabffe800) at 
chardev/char.c:285
#5  0x559aaa23318a in object_deinit (obj=0x559aabffe800, 
type=0x559aabfd7d20) at qom/object.c:606
#6  0x559aaa2331b8 in object_deinit (obj=0x559aabffe800, 
type=0x559aabfd9060) at qom/object.c:610
#7  0x559aaa233200 in object_finalize (data=0x559aabffe800) at 
qom/object.c:620
#8  0x559aaa234202 in object_unref (obj=0x559aabffe800) at 
qom/object.c:1074
#9  0x559aaa2356b6 in object_finalize_child_property 
(obj=0x559aac0dac10, name=0x559aac778760 "compare0-0", opaque=0x559aabffe800) 
at qom/object.c:1584
#10 0x559aaa232f70 in object_property_del_all (obj=0x559aac0dac10) at 
qom/object.c:557
#11 0x559aaa2331ed in object_finalize (data=0x559aac0dac10) at 
qom/object.c:619
#12 0x559aaa234202 in object_unref (obj=0x559aac0dac10) at 
qom/object.c:1074
#13 0x559aaa2356b6 in object_finalize_child_property 
(obj=0x559aac0c75c0, name=0x559aac0dadc0 "chardevs", opaque=0x559aac0dac10) at 
qom/object.c:1584
#14 0x559aaa233071 in object_property_del_child (obj=0x559aac0c75c0, 
child=0x559aac0dac10, errp=0x0) at qom/object.c:580
#15 0x559aaa233155 in object_unparent (obj=0x559aac0dac10) at 
qom/object.c:599
#16 0x559aaa2fb721 in qemu_chr_cleanup () at chardev/char.c:1159
#17 0x559aa9f9b110 in main (argc=54, argv=0x7ffeb62fa998, 
envp=0x7ffeb62fab50) at vl.c:4539

When chardev is cleaned up, chr_write_lock needs to be destroyed. But
the colo-compare module is not cleaned up normally before it when the
guest poweroff. It is holding chr_write_lock at this time. This will
cause qemu crash.So we add the function of colo_compare_cleanup() before
qemu_chr_cleanup() to fix the bug.

Signed-off-by: Lei Rao 
Reviewed-by: Zhang Chen 
Reviewed-by: Lukas Straub 
Tested-by: Lukas Straub 
Signed-off-by: Zhang Chen 
---
 net/colo-compare.c | 10 ++
 net/colo-compare.h |  1 +
 net/net.c  |  4 
 3 files changed, 15 insertions(+)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index c142c08dc6..5b538f4e0b 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -1402,6 +1402,16 @@ static void colo_compare_init(Object *obj)
  compare_set_vnet_hdr);
 }
 
+void colo_compare_cleanup(void)
+{
+CompareState *tmp = NULL;
+CompareState *n = NULL;
+
+QTAILQ_FOREACH_SAFE(tmp, &net_compares, next, n) {
+object_unparent(OBJECT(tmp));
+}
+}
+
 static void colo_compare_finalize(Object *obj)
 {
 CompareState *s = COLO_COMPARE(obj);
diff --git a/net/colo-compare.h b/net/colo-compare.h
index 22ddd512e2..b055270da2 100644
--- a/net/colo-compare.h
+++ b/net/colo-compare.h
@@ -20,5 +20,6 @@
 void colo_notify_compares_event(void *opaque, int event, Error **errp);
 void colo_compare_register_notifier(Notifier *notify);
 void colo_compare_unregister_notifier(Notifier *notify);
+void colo_compare_cleanup(void);
 
 #endif /* QEMU_COLO_COMPARE_H */
diff --git a/net/net.c b/net/net.c
index 2a472604ec..76bbb7c31b 100644
--- a/net/net.c
+++ b/net/net.c
@@ -52,6 +52,7 @@
 #include "qapi/error.h"
 #include "qapi/opts-visitor.h"
 #include "sysemu/runstate.h"
+#include "net/colo-compare.h"
 #include "net/filter.h"
 #include "qapi/string-output-visitor.h"
 
@@ -1402,6 +1403,9 @@ void net_cleanup(void)
 {
 NetClientState *nc;
 
+/*cleanup colo compare module for COLO*/
+colo_compare_cleanup();
+
 /* We may del multiple entries during qemu_del_net_client(),
  * so QTAILQ_FOREACH_SAFE() is also not safe here.
  */
-- 
2.25.1

[PATCH 5/7] Add a function named packet_new_nocopy for COLO.

2021-06-08 Thread Zhang Chen

From: "Rao, Lei" 

Use the packet_new_nocopy instead of packet_new in the
filter-rewriter module. There will be one less memory
copy in the processing of each network packet.

Signed-off-by: Lei Rao 
Signed-off-by: Zhang Chen 
Reviewed-by: Zhang Chen 
---
 net/colo.c| 25 +
 net/colo.h|  1 +
 net/filter-rewriter.c |  3 +--
 3 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/net/colo.c b/net/colo.c
index ef00609848..3a3e6e89a0 100644
--- a/net/colo.c
+++ b/net/colo.c
@@ -157,19 +157,28 @@ void connection_destroy(void *opaque)
 
 Packet *packet_new(const void *data, int size, int vnet_hdr_len)
 {
-Packet *pkt = g_slice_new(Packet);
+Packet *pkt = g_slice_new0(Packet);
 
 pkt->data = g_memdup(data, size);
 pkt->size = size;
 pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
 pkt->vnet_hdr_len = vnet_hdr_len;
-pkt->tcp_seq = 0;
-pkt->tcp_ack = 0;
-pkt->seq_end = 0;
-pkt->header_size = 0;
-pkt->payload_size = 0;
-pkt->offset = 0;
-pkt->flags = 0;
+
+return pkt;
+}
+
+/*
+ * packet_new_nocopy will not copy data, so the caller can't release
+ * the data. And it will be released in packet_destroy.
+ */
+Packet *packet_new_nocopy(void *data, int size, int vnet_hdr_len)
+{
+Packet *pkt = g_slice_new0(Packet);
+
+pkt->data = data;
+pkt->size = size;
+pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+pkt->vnet_hdr_len = vnet_hdr_len;
 
 return pkt;
 }
diff --git a/net/colo.h b/net/colo.h
index 573ab91785..d91cd245c4 100644
--- a/net/colo.h
+++ b/net/colo.h
@@ -101,6 +101,7 @@ bool connection_has_tracked(GHashTable 
*connection_track_table,
 ConnectionKey *key);
 void connection_hashtable_reset(GHashTable *connection_track_table);
 Packet *packet_new(const void *data, int size, int vnet_hdr_len);
+Packet *packet_new_nocopy(void *data, int size, int vnet_hdr_len);
 void packet_destroy(void *opaque, void *user_data);
 void packet_destroy_partial(void *opaque, void *user_data);
 
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
index 10fe3939b1..cb3a96cde1 100644
--- a/net/filter-rewriter.c
+++ b/net/filter-rewriter.c
@@ -270,8 +270,7 @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
 vnet_hdr_len = nf->netdev->vnet_hdr_len;
 }
 
-pkt = packet_new(buf, size, vnet_hdr_len);
-g_free(buf);
+pkt = packet_new_nocopy(buf, size, vnet_hdr_len);
 
 /*
  * if we get tcp packet
-- 
2.25.1

[PATCH 7/7] Fixed calculation error of pkt->header_size in fill_pkt_tcp_info()

2021-06-08 Thread Zhang Chen

From: "Rao, Lei" 

The data pointer has skipped vnet_hdr_len in the function of
parse_packet_early().So, we can not subtract vnet_hdr_len again
when calculating pkt->header_size in fill_pkt_tcp_info(). Otherwise,
it will cause network packet comparsion errors and greatly increase
the frequency of checkpoints.

Signed-off-by: Lei Rao 
Signed-off-by: Zhang Chen 
Reviewed-by: Li Zhijian 
Reviewed-by: Zhang Chen 
Reviewed-by: Lukas Straub 
Tested-by: Lukas Straub 
---
 net/colo-compare.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index 5b538f4e0b..b100e7b51f 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -211,7 +211,7 @@ static void fill_pkt_tcp_info(void *data, uint32_t *max_ack)
 pkt->tcp_ack = ntohl(tcphd->th_ack);
 *max_ack = *max_ack > pkt->tcp_ack ? *max_ack : pkt->tcp_ack;
 pkt->header_size = pkt->transport_header - (uint8_t *)pkt->data
-   + (tcphd->th_off << 2) - pkt->vnet_hdr_len;
+   + (tcphd->th_off << 2);
 pkt->payload_size = pkt->size - pkt->header_size;
 pkt->seq_end = pkt->tcp_seq + pkt->payload_size;
 pkt->flags = tcphd->th_flags;
-- 
2.25.1

RE: [PATCH V7 0/6] Passthrough specific network traffic in COLO

2021-06-08 Thread Zhang, Chen

Hi Jason and Markus,

No news for a while.
If you have time, please give me more comments for this series.

Thanks
Chen


> -Original Message-
> From: Zhang, Chen 
> Sent: Wednesday, May 26, 2021 10:54 AM
> To: Jason Wang ; qemu-dev  de...@nongnu.org>; Eric Blake ; Dr. David Alan
> Gilbert ; Markus Armbruster ;
> Daniel P. Berrangé ; Gerd Hoffmann
> ; Li Zhijian 
> Cc: Zhang Chen ; Zhang, Chen
> ; Lukas Straub 
> Subject: [PATCH V7 0/6] Passthrough specific network traffic in COLO
> 
> Due to some real user scenarios don't need to monitor all traffic.
> And qemu net-filter also need function to more detailed flow control.
> This series give user ability to passthrough kinds of COLO network stream.
> 
> For example, windows guest user want to enable windows remote desktop
> to touch guest(UDP/TCP 3389), This case use UDP and TCP mixed, and the tcp
> part payload always different caused by real desktop display data(for guest
> time/ mouse display).
> 
> Another case is some real user application will actively transmit information
> include guest time part, primary guest send data with time 10:01.000, At the
> same time secondary guest send data with time 10:01.001, it will always
> trigger COLO checkpoint(live migrate) to drop guest performance.
> 
>   V7:
> - Keep some data structure stay in .c (patch 4/6).
> - Fix mutex init issue (patch 5/6).
> - Make the IPFlowSpec 'protocol' field optional (patch 1/6).
> - Add compare_passthrough_find function in net.c (patch 6/6).
> 
>   V6:
> - Change QAPI IPFlowSpec protocol from enum to str.
> - Use getprotobyname to handle the protocols.
> - Optimize code in net.
> 
>   V5:
> - Squash original 1-3 QAPI patches together.
> - Rename some data structures to avoid misunderstanding.
> - Reuse InetSocketAddressBase in IPFlowSpec.
> - Add new function in util/qemu-sockets.c to parse
>   InetSocketAddressBase.
> - Update HMP command define to reuse current code.
> - Add more comments.
> 
>   V4:
> - Fix QAPI code conflict for V6.0 merged patches.
> - Note this feature for V6.1.
> 
>   V3:
> - Add COLO passthrough list lock.
> - Add usage demo and more comments.
> 
>   V2:
> - Add the n-tuple support.
> - Add some qapi definitions.
> - Support multi colo-compare objects.
> - Support setup each rules for each objects individually.
> - Clean up COLO compare definition to .h file.
> - Rebase HMP command for stable tree.
> - Add redundant rules check.
> 
> 
> Zhang Chen (6):
>   qapi/net: Add IPFlowSpec and QMP command for COLO passthrough
>   util/qemu-sockets.c: Add inet_parse_base to handle
> InetSocketAddressBase
>   hmp-commands: Add new HMP command for COLO passthrough
>   net/colo-compare: Move data structure and define to .h file.
>   net/colo-compare: Add passthrough list to CompareState
>   net/net.c: Add handler for COLO passthrough connection
> 
>  hmp-commands.hx|  26 +++
>  include/monitor/hmp.h  |   2 +
>  include/qemu/sockets.h |   1 +
>  monitor/hmp-cmds.c |  82 
>  net/colo-compare.c | 160 +--
>  net/colo-compare.h |  98 
>  net/net.c  | 168 +
>  qapi/net.json  |  68 +
>  util/qemu-sockets.c|  14 
>  9 files changed, 510 insertions(+), 109 deletions(-)
> 
> --
> 2.25.1

Re: [PATCH v16 09/99] qtest/bios-tables-test: Rename tests not TCG specific

2021-06-08 Thread Philippe Mathieu-Daudé

On 6/7/21 3:39 PM, Thomas Huth wrote:
> On 04/06/2021 17.51, Alex Bennée wrote:
>> From: Philippe Mathieu-Daudé 
>>
>> Various tests don't require TCG, but have '_tcg' in their name.
>> As this is misleading, remove 'tcg' from their name.
>>
>> Reported-by: Igor Mammedov 
>> Reviewed-by: Igor Mammedov 
>> Signed-off-by: Philippe Mathieu-Daudé 
>> Signed-off-by: Alex Bennée 
>> Message-Id: <20210505125806.1263441-10-phi...@redhat.com>
>> ---
>>   tests/qtest/bios-tables-test.c | 142 -
>>   1 file changed, 71 insertions(+), 71 deletions(-)
> [...]
>> @@ -1255,7 +1255,7 @@ static void test_acpi_microvm_rtc_tcg(void)
>>   free_test_data(&data);
>>   }
>>   -static void test_acpi_microvm_pcie_tcg(void)
>> +static void test_acpi_microvm_pcie(void)
>>   {
>>   test_data data;
> 
> This change is wrong: test_acpi_microvm_pcie_tcg() uses data.tcg_only =
> true, so the _tcg suffix indeed makes sense here.

I supposed I messed while rebasing...

Re: [PULL 00/11] Trivial branch for 6.1 patches

2021-06-08 Thread Peter Maydell

On Mon, 7 Jun 2021 at 20:00, Laurent Vivier  wrote:
>
> The following changes since commit 6f398e533f5e259b4f937f4aa9de970f7201d166:
>
>   Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-tcg-20210604' 
> into staging (2021-06-05 11:25:52 +0100)
>
> are available in the Git repository at:
>
>   git://github.com/vivier/qemu.git tags/trivial-branch-for-6.1-pull-request
>
> for you to fetch changes up to df77d45a51412ca84abd7f1490b48c1bccf07057:
>
>   vhost-vdpa: Remove redundant declaration of address_space_memory 
> (2021-06-05 21:33:46 +0200)
>
> 
> Trivial branch pull request 20210607
>
> 


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/6.1
for any user-visible changes.

-- PMM

Re: [PATCH v5 10/11] block: use int64_t instead of int in driver discard handlers

2021-06-08 Thread Vladimir Sementsov-Ogievskiy


07.06.2021 21:13, Eric Blake wrote:

On Wed, May 05, 2021 at 10:50:00AM +0300, Vladimir Sementsov-Ogievskiy wrote:

We are generally moving to int64_t for both offset and bytes parameters
on all io paths.

Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.

We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).

So, convert driver discard handlers bytes parameter to int64_t.

The only caller of all updated function is bdrv_co_pdiscard in
block/io.c. It is already prepared to work with 64bit requests, but
pass at most max(bs->bl.max_pdiscard, INT_MAX) to the driver.

Let's look at all updated functions:

backup-top: pass to bdrv_co_pdiscard which is 64bit


and to backup_top_cbw, but that is also 64-bit



blkdebug: all calculations are still OK, thanks to
   bdrv_check_qiov_request().
   both rule_check and bdrv_co_pdiscard are 64bit

blklogwrites: pass to blk_loc_writes_co_log which is 64bit

blkreply, copy-on-read, filter-compress: pass to bdrv_co_pdiscard, OK


blkreplay



file-posix: one handler calls raw_account_discard() is 64bit and both
   handlers calls raw_do_pdiscard(). Update raw_do_pdiscard, which pass
   to RawPosixAIOData::aio_nbytes, which is 64bit (and calls
   raw_account_discard())

gluster: somehow, third argument of glfs_discard_async is size_t.
   Let's set max_pdiscard accordingly.

iscsi: iscsi_allocmap_set_invalid is 64bit,
   !is_byte_request_lun_aligned is 64bit.
   list.num is uint32_t. Let's clarify max_pdiscard and
   pdiscard_alignment.


The patch tweaks max_pdiscard, but doesn't change pdiscard_alignment.



mirror_top, preallocate: pass to bdrv_mirror_top_do_write() which is
   64bit


file is mirror.c, not mirror-top.c.  But it matches the BlockDriver
bdrv_mirror_top name.  preallocate does not call
bdrv_mirror_top_do_write, so it's probably worth separating that line
out.



nbd: protocol limitation. max_pdiscard is alredy set strict enough,
   keep it as is for now.

nvmd: buf.nlb is uint32_t and we do shift. So, add corresponding limits
   to nvme_refresh_limits().


nvme



qcow2: calculations are still OK, thanks to bdrv_check_qiov_request(),
   qcow2_cluster_discard() is 64bit.

raw-format: raw_adjust_offset() is 64bit, bdrv_co_pdiscard too.

sheepdog: the format is deprecated. Don't care and just make old
   INT_MAX limit to be explicit

throttle: pass to bdrv_co_pdiscard() which is 64bit and to
   throttle_group_co_io_limits_intercept() which is 64bit as well.

test-block-iothread: bytes argument is unused

Great! Now all drivers are prepared to 64bit discard requests or has
explicit max_pdiscard limit.


are prepared to handle 64-bit discard requests, or else have explicit
max_pdiscard limits.



Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  include/block/block_int.h|  2 +-
  block/backup-top.c   |  2 +-
  block/blkdebug.c |  2 +-
  block/blklogwrites.c |  4 ++--
  block/blkreplay.c|  2 +-
  block/copy-on-read.c |  2 +-
  block/file-posix.c   |  7 ---
  block/filter-compress.c  |  2 +-
  block/gluster.c  |  7 +--
  block/iscsi.c| 10 +-
  block/mirror.c   |  2 +-
  block/nbd.c  |  6 --
  block/nvme.c | 14 +-
  block/preallocate.c  |  2 +-
  block/qcow2.c|  2 +-
  block/raw-format.c   |  2 +-
  block/sheepdog.c | 15 ++-
  block/throttle.c |  2 +-
  tests/unit/test-block-iothread.c |  2 +-
  block/trace-events   |  4 ++--
  20 files changed, 61 insertions(+), 30 deletions(-)




+++ b/block/gluster.c
@@ -891,6 +891,7 @@ out:
  static void qemu_gluster_refresh_limits(BlockDriverState *bs, Error **errp)
  {
  bs->bl.max_transfer = GLUSTER_MAX_TRANSFER;
+bs->bl.max_pdiscard = SIZE_MAX;


We probably want this to be MIN(GLUSTER_MAX_TRANSFER, SIZE_MAX). Also,
do we want to round it down to alignment boundaries?


I don't think so.. We just call glfs_discard_async() function which is not part 
of Qemu. So we shouldn't assume any extra restrictions except for argument 
types I think. byte is size_t, so maximum is SIZE_MAX.




+++ b/block/iscsi.c
@@ -1141,7 +1141,8 @@ iscsi_getlength(BlockDriverState *bs)
  }
  
  static int

-coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
+coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset,
+   int64_t bytes)
  {
  IscsiLun *iscsilun = bs->opaque;
  struct IscsiTask iTask;


Did you want to add some sort of assert(bytes / iscsilun->block_size
<= UINT32_MAX), or a comment that we are relying on bl.max_pdiscard?


Yes, will add, we are storing it to list.num which is

Re: [PATCH v16 00/99] arm tcg/kvm refactor and split with kvm only support

2021-06-08 Thread Philippe Mathieu-Daudé

Hi Alex,

On 6/4/21 5:51 PM, Alex Bennée wrote:
> Hi,
> 
> I have picked up the baton from Claudio to try and get the ARM
> re-factoring across the line. Most of the patches from Claudio remain
> unchanged and have just had minor fixups from re-basing against the
> moving target. I've done my best to make sure any fixes that have been
> made in the meantime weren't lost.
> 
> I've included Phillipe's qtest_has_accel v7 patches (I had problems
> with v8) to aid in my aarch64 testing. I'm expecting them to be
> up-streamed by Phillipe in due course. I've also nabbed one of
> Phillipe's Kconfig tweaks to allow for target specific expression of
> some config variables.
> 
> The main thing that enables the --disable-tcg build is the addition of
> --with-devices-FOO configure option which is a mechanism to override
> the existing default device configurations. The two that I've been
> testing are a 64 bit only build on x86:
> 
>   '../../configure' '--without-default-features' \
>  '--target-list=arm-softmmu,aarch64-softmmu' \
>  '--with-devices-aarch64=../../configs/aarch64-softmmu/64bit-only.mak'
> 
> which results in the aarch64-softmmu build only supporting sbsa-ref,
> virt and xlnx-versal-virt.
> 
> The second is a KVM only cross build:
> 
>   '../../configure' '--disable-docs' \
> '--target-list=aarch64-softmmu' \
> '--enable-kvm' '--disable-tcg' \
> '--cross-prefix=aarch64-linux-gnu-' \
> '--with-devices-aarch64=../../configs/aarch64-softmmu/virt-only.mak'
> 
> Finally I've made a few minor Kconfig and testing tweaks before adding
> some gitlab coverage. As a result I was able to drop the Revert: idau
> patch because I can properly build an image without stray devices in
> the qtree.
> 
> The following need review:
> 
>  - gitlab: defend the new stripped down arm64 configs
>  - tests/qtest: make xlnx-can-test conditional on being configured
>  - tests/qtest: split the cdrom-test into arm/aarch64
>  - hw/arm: add dependency on OR_IRQ for XLNX_VERSAL
>  - target/arm: move CONFIG_V7M out of default-devices

Without using --with-devices-aarch64, I'm getting:

FAILED: libqemu-aarch64-softmmu.fa.p/target_arm_tcg_sysemu_m_helper.c.o
cc -Ilibqemu-aarch64-softmmu.fa.p -I. -I../.. -Itarget/arm
-I../../target/arm -I../../capstone/include/capstone -Iqapi -Itrace -Iui
-Iui/shader -I/usr/include/pixman-1 -I/usr/include/spice-server
-I/usr/include/spice-1 -I/usr/include/glib-2.0
-I/usr/lib/aarch64-linux-gnu/glib-2.0/include -fdiagnostics-color=auto
-pipe -Wall -Winvalid-pch -Werror -std=gnu99 -O2 -g -isystem
/home/phil/qemu/linux-headers -isystem linux-headers -iquote . -iquote
/home/phil/qemu -iquote /home/phil/qemu/include -iquote
/home/phil/qemu/disas/libvixl -pthread -U_FORTIFY_SOURCE
-D_FORTIFY_SOURCE=2 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64
-D_LARGEFILE_SOURCE -Wstrict-prototypes -Wredundant-decls -Wundef
-Wwrite-strings -Wmissing-prototypes -fno-strict-aliasing -fno-common
-fwrapv -Wold-style-declaration -Wold-style-definition -Wtype-limits
-Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers
-Wempty-body -Wnested-externs -Wendif-labels -Wexpansion-to-defined
-Wimplicit-fallthrough=2 -Wno-missing-include-dirs
-Wno-shift-negative-value -Wno-psabi -fstack-protector-strong -fPIE
-isystem../../linux-headers -isystemlinux-headers -DNEED_CPU_H
'-DCONFIG_TARGET="aarch64-softmmu-config-target.h"'
'-DCONFIG_DEVICES="aarch64-softmmu-config-devices.h"' -MD -MQ
libqemu-aarch64-softmmu.fa.p/target_arm_tcg_sysemu_m_helper.c.o -MF
libqemu-aarch64-softmmu.fa.p/target_arm_tcg_sysemu_m_helper.c.o.d -o
libqemu-aarch64-softmmu.fa.p/target_arm_tcg_sysemu_m_helper.c.o -c
../../target/arm/tcg/sysemu/m_helper.c
In file included from ../../target/arm/tcg/sysemu/m_helper.c:12:
/home/phil/qemu/include/exec/helper-proto.h:41:10: fatal error:
trace/generated-helpers.h: No such file or directory
   41 | #include "trace/generated-helpers.h"
  |  ^~~
compilation terminated.

both --enable-xen and --disable-xen.

Is that expected?

Thanks,

Phil.

Re: [PATCH v16 00/99] arm tcg/kvm refactor and split with kvm only support

2021-06-08 Thread Philippe Mathieu-Daudé

On 6/8/21 10:44 AM, Philippe Mathieu-Daudé wrote:
> Hi Alex,
> 
> On 6/4/21 5:51 PM, Alex Bennée wrote:
>> Hi,
>>
>> I have picked up the baton from Claudio to try and get the ARM
>> re-factoring across the line. Most of the patches from Claudio remain
>> unchanged and have just had minor fixups from re-basing against the
>> moving target. I've done my best to make sure any fixes that have been
>> made in the meantime weren't lost.
>>
>> I've included Phillipe's qtest_has_accel v7 patches (I had problems
>> with v8) to aid in my aarch64 testing. I'm expecting them to be
>> up-streamed by Phillipe in due course. I've also nabbed one of
>> Phillipe's Kconfig tweaks to allow for target specific expression of
>> some config variables.
>>
>> The main thing that enables the --disable-tcg build is the addition of
>> --with-devices-FOO configure option which is a mechanism to override
>> the existing default device configurations. The two that I've been
>> testing are a 64 bit only build on x86:
>>
>>   '../../configure' '--without-default-features' \
>>  '--target-list=arm-softmmu,aarch64-softmmu' \
>>  '--with-devices-aarch64=../../configs/aarch64-softmmu/64bit-only.mak'
>>
>> which results in the aarch64-softmmu build only supporting sbsa-ref,
>> virt and xlnx-versal-virt.
>>
>> The second is a KVM only cross build:
>>
>>   '../../configure' '--disable-docs' \
>> '--target-list=aarch64-softmmu' \
>> '--enable-kvm' '--disable-tcg' \
>> '--cross-prefix=aarch64-linux-gnu-' \
>> '--with-devices-aarch64=../../configs/aarch64-softmmu/virt-only.mak'
>>
>> Finally I've made a few minor Kconfig and testing tweaks before adding
>> some gitlab coverage. As a result I was able to drop the Revert: idau
>> patch because I can properly build an image without stray devices in
>> the qtree.
>>
>> The following need review:
>>
>>  - gitlab: defend the new stripped down arm64 configs
>>  - tests/qtest: make xlnx-can-test conditional on being configured
>>  - tests/qtest: split the cdrom-test into arm/aarch64
>>  - hw/arm: add dependency on OR_IRQ for XLNX_VERSAL
>>  - target/arm: move CONFIG_V7M out of default-devices
> 
> Without using --with-devices-aarch64, I'm getting:
> 
> FAILED: libqemu-aarch64-softmmu.fa.p/target_arm_tcg_sysemu_m_helper.c.o
> cc -Ilibqemu-aarch64-softmmu.fa.p -I. -I../.. -Itarget/arm
> -I../../target/arm -I../../capstone/include/capstone -Iqapi -Itrace -Iui
> -Iui/shader -I/usr/include/pixman-1 -I/usr/include/spice-server
> -I/usr/include/spice-1 -I/usr/include/glib-2.0
> -I/usr/lib/aarch64-linux-gnu/glib-2.0/include -fdiagnostics-color=auto
> -pipe -Wall -Winvalid-pch -Werror -std=gnu99 -O2 -g -isystem
> /home/phil/qemu/linux-headers -isystem linux-headers -iquote . -iquote
> /home/phil/qemu -iquote /home/phil/qemu/include -iquote
> /home/phil/qemu/disas/libvixl -pthread -U_FORTIFY_SOURCE
> -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64
> -D_LARGEFILE_SOURCE -Wstrict-prototypes -Wredundant-decls -Wundef
> -Wwrite-strings -Wmissing-prototypes -fno-strict-aliasing -fno-common
> -fwrapv -Wold-style-declaration -Wold-style-definition -Wtype-limits
> -Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers
> -Wempty-body -Wnested-externs -Wendif-labels -Wexpansion-to-defined
> -Wimplicit-fallthrough=2 -Wno-missing-include-dirs
> -Wno-shift-negative-value -Wno-psabi -fstack-protector-strong -fPIE
> -isystem../../linux-headers -isystemlinux-headers -DNEED_CPU_H
> '-DCONFIG_TARGET="aarch64-softmmu-config-target.h"'
> '-DCONFIG_DEVICES="aarch64-softmmu-config-devices.h"' -MD -MQ
> libqemu-aarch64-softmmu.fa.p/target_arm_tcg_sysemu_m_helper.c.o -MF
> libqemu-aarch64-softmmu.fa.p/target_arm_tcg_sysemu_m_helper.c.o.d -o
> libqemu-aarch64-softmmu.fa.p/target_arm_tcg_sysemu_m_helper.c.o -c
> ../../target/arm/tcg/sysemu/m_helper.c
> In file included from ../../target/arm/tcg/sysemu/m_helper.c:12:
> /home/phil/qemu/include/exec/helper-proto.h:41:10: fatal error:
> trace/generated-helpers.h: No such file or directory
>41 | #include "trace/generated-helpers.h"
>   |  ^~~
> compilation terminated.
> 
> both --enable-xen and --disable-xen.
> 
> Is that expected?

IIRC (this was looong ago) before this configuration was not
buildable and Kconfig was erroring with something like:
"dependency not satisfied: ARM_V7M select TCG"

Re: [PATCH 5/6] kvm/i386: Add support for user space MSR filtering

2021-06-08 Thread Alexander Graf




On 24.05.21 22:01, Siddharth Chandrasekaran wrote:

Check and enable user space MSR filtering capability and handle new exit
reason KVM_EXIT_X86_WRMSR. This will be used in a follow up patch to
implement hyper-v overlay pages.

Signed-off-by: Siddharth Chandrasekaran 


This patch will break bisection, because we're no longer handling the 
writes in kernel space after this, but we also don't have user space 
handling available yet, right? It might be better to move all logic in 
this patch that sets up the filter for Hyper-V MSRs into the next one.



---
  target/i386/kvm/kvm.c | 72 +++
  1 file changed, 72 insertions(+)

diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 362f04ab3f..3591f8cecc 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -117,6 +117,8 @@ static bool has_msr_ucode_rev;
  static bool has_msr_vmx_procbased_ctls2;
  static bool has_msr_perf_capabs;
  static bool has_msr_pkrs;
+static bool has_msr_filtering;
+static bool msr_filters_active;
  
  static uint32_t has_architectural_pmu_version;

  static uint32_t num_architectural_pmu_gp_counters;
@@ -2138,6 +2140,57 @@ static void register_smram_listener(Notifier *n, void 
*unused)
   &smram_address_space, 1);
  }
  
+static void kvm_set_msr_filter_range(struct kvm_msr_filter_range *range, uint32_t flags,

+ uint32_t base, uint32_t nmsrs, ...)
+{
+int i, filter_to_userspace;
+va_list ap;
+
+range->flags = flags;
+range->nmsrs = nmsrs;
+range->base = base;
+
+va_start(ap, nmsrs);
+for (i = 0; i < nmsrs; i++) {
+filter_to_userspace = va_arg(ap, int);
+if (!filter_to_userspace) {
+range->bitmap[i / 8] = 1 << (i % 8);
+}
+}
+va_end(ap);
+}
+
+static int kvm_set_msr_filters(KVMState *s)
+{
+int r, nmsrs, nfilt = 0, bitmap_pos = 0;
+struct kvm_msr_filter filter = { };
+struct kvm_msr_filter_range *range;
+uint8_t bitmap_buf[KVM_MSR_FILTER_MAX_RANGES * 8] = {0};
+
+filter.flags = KVM_MSR_FILTER_DEFAULT_ALLOW;
+
+if (has_hyperv) {
+/* Hyper-V overlay page MSRs */


I think you want to extend this comment and indicate in a human readable 
form that you set the filter on WRMSR to trap HV_X64_MSR_GUEST_OS_ID and 
HV_X64_MSR_HYPERCALL into user space here.



+nmsrs = 2;
+range = &filter.ranges[nfilt++];
+range->bitmap = &bitmap_buf[bitmap_pos];
+kvm_set_msr_filter_range(range, KVM_MSR_FILTER_WRITE,
+ HV_X64_MSR_GUEST_OS_ID, nmsrs,
+ true, /* HV_X64_MSR_GUEST_OS_ID */
+ true  /* HV_X64_MSR_HYPERCALL */);
+bitmap_pos += ROUND_UP(nmsrs, 8) / 8;
+assert(bitmap_pos < sizeof(bitmap_buf));
+}
+
+r = kvm_vm_ioctl(s, KVM_X86_SET_MSR_FILTER, &filter);
+if (r != 0) {
+error_report("kvm: failed to set MSR filters");
+return -1;
+}
+
+return 0;
+}
+
  int kvm_arch_init(MachineState *ms, KVMState *s)
  {
  uint64_t identity_base = 0xfffbc000;
@@ -2269,6 +2322,17 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
  }
  }
  
+has_msr_filtering = kvm_check_extension(s, KVM_CAP_X86_USER_SPACE_MSR) &&

+kvm_check_extension(s, KVM_CAP_X86_MSR_FILTER);
+if (has_msr_filtering) {
+ret = kvm_vm_enable_cap(s, KVM_CAP_X86_USER_SPACE_MSR, 0,
+KVM_MSR_EXIT_REASON_FILTER);
+if (ret == 0) {
+ret = kvm_set_msr_filters(s);
+msr_filters_active = (ret == 0);
+}
+}
+
  return 0;
  }
  
@@ -4542,6 +4606,11 @@ static bool host_supports_vmx(void)

  return ecx & CPUID_EXT_VMX;
  }
  
+static int kvm_handle_wrmsr(X86CPU *cpu, struct kvm_run *run)

+{
+return 0;


The default handler should always set run->msr.error = 1 to mimic the 
existing behavior.



+}
+
  #define VMX_INVALID_GUEST_STATE 0x8021
  
  int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)

@@ -4600,6 +4669,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run 
*run)
  ioapic_eoi_broadcast(run->eoi.vector);
  ret = 0;
  break;
+case KVM_EXIT_X86_WRMSR:
+ret = kvm_handle_wrmsr(cpu, run);


Please provide a default RDMSR handler as well here.


Alex


+break;
  default:
  fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
  ret = -1;





Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

Re: [PATCH v2] hw/intc/arm_gicv3_cpuif: Tolerate spurious EOIR writes

2021-06-08 Thread Peter Maydell

On Fri, 4 Jun 2021 at 14:07, Jean-Philippe Brucker
 wrote:
>
> Commit 382c7160d1cd ("hw/intc/arm_gicv3_cpuif: Fix EOIR write access
> check logic") added an assert_not_reached() if the guest writes the EOIR
> register while no interrupt is active.
>
> It turns out some software does this: EDK2, in
> GicV3ExitBootServicesEvent(), unconditionally write EOIR for all
> interrupts that it manages. This now causes QEMU to abort when running
> UEFI on a VM with GICv3. Although it is UNPREDICTABLE behavior and EDK2
> does need fixing, the punishment seems a little harsh, especially since
> icc_eoir_write() already tolerates writes of nonexistent interrupt
> numbers. Display a guest error and tolerate spurious EOIR writes.
>
> Fixes: 382c7160d1cd ("hw/intc/arm_gicv3_cpuif: Fix EOIR write access check 
> logic")
> Signed-off-by: Jean-Philippe Brucker 
> ---
> v2: Added qemu_log_mask() (so I didn't keep the Reviewed-by tag)
> v1: 
> https://lore.kernel.org/qemu-devel/20210603110012.1182530-1-jean-phili...@linaro.org/



Applied to target-arm.next, thanks.

-- PMM

Re: [PATCH 0/3] target/arm: Decode fixes for aarch64

2021-06-08 Thread Peter Maydell

On Fri, 4 Jun 2021 at 19:36, Richard Henderson
 wrote:
>
> A couple of printfs left over from the beginning of time,
> and asserts that are reachable because of lack of decode.
>
>
> r~
>
>
> Richard Henderson (3):
>   target/arm: Diagnose UNALLOCATED in disas_simd_two_reg_misc_fp16
>   target/arm: Remove fprintf from disas_simd_mod_imm
>   target/arm: Diagnose UNALLOCATED in disas_simd_three_reg_same_fp16
>
>  target/arm/translate-a64.c | 87 +++---
>  1 file changed, 52 insertions(+), 35 deletions(-)



Applied to target-arm.next, thanks.

-- PMM

Re: [PATCH 6/6] hyper-v: Handle hypercall code page as an overlay page

2021-06-08 Thread Alexander Graf




On 24.05.21 22:02, Siddharth Chandrasekaran wrote:

Hypercall code page is specified in the Hyper-V TLFS to be an overlay
page, ie., guest chooses a GPA and the host _places_ a page at that
location, making it visible to the guest and the existing page becomes
inaccessible. Similarly when disabled, the host should _remove_ the
overlay and the old page should become visible to the guest.

Until now, KVM patched the hypercall code directly into the guest
chosen GPA which is incorrect; instead, use the new user space MSR
filtering feature to trap hypercall page MSR writes, overlay it as
requested and then invoke a KVM_SET_MSR from user space to bounce back
control KVM. This bounce back is needed as KVM may have to write data
into the newly overlaid page.

Signed-off-by: Siddharth Chandrasekaran 
---
  hw/hyperv/hyperv.c | 10 -
  include/hw/hyperv/hyperv.h |  5 +++
  target/i386/kvm/hyperv.c   | 84 ++
  target/i386/kvm/hyperv.h   |  4 ++
  target/i386/kvm/kvm.c  | 26 +++-
  5 files changed, 127 insertions(+), 2 deletions(-)

diff --git a/hw/hyperv/hyperv.c b/hw/hyperv/hyperv.c
index ac45e8e139..aa5ac5226e 100644
--- a/hw/hyperv/hyperv.c
+++ b/hw/hyperv/hyperv.c
@@ -36,6 +36,7 @@ struct SynICState {
  OBJECT_DECLARE_SIMPLE_TYPE(SynICState, SYNIC)
  
  static bool synic_enabled;

+struct hyperv_overlay_page hcall_page;
  
  static void alloc_overlay_page(struct hyperv_overlay_page *overlay,

 Object *owner, const char *name)
@@ -50,7 +51,7 @@ static void alloc_overlay_page(struct hyperv_overlay_page 
*overlay,
   * This method must be called with iothread lock taken as it modifies
   * the memory hierarchy.
   */
-static void hyperv_overlay_update(struct hyperv_overlay_page *overlay, hwaddr 
addr)
+void hyperv_overlay_update(struct hyperv_overlay_page *overlay, hwaddr addr)
  {
  if (addr != HYPERV_INVALID_OVERLAY_GPA) {
  /* check if overlay page is enabled */
@@ -70,6 +71,13 @@ static void hyperv_overlay_update(struct hyperv_overlay_page 
*overlay, hwaddr ad
  }
  }
  
+void hyperv_overlay_init(void)

+{
+memory_region_init_ram(&hcall_page.mr, NULL, "hyperv.hcall_page",
+   qemu_real_host_page_size, &error_abort);
+hcall_page.addr = HYPERV_INVALID_OVERLAY_GPA;
+}
+
  static void synic_update(SynICState *synic, bool enable,
   hwaddr msg_page_addr, hwaddr event_page_addr)
  {
diff --git a/include/hw/hyperv/hyperv.h b/include/hw/hyperv/hyperv.h
index d989193e84..f31a81 100644
--- a/include/hw/hyperv/hyperv.h
+++ b/include/hw/hyperv/hyperv.h
@@ -85,6 +85,11 @@ static inline uint32_t hyperv_vp_index(CPUState *cs)
  return cs->cpu_index;
  }
  
+extern struct hyperv_overlay_page hcall_page;

+
+void hyperv_overlay_init(void);
+void hyperv_overlay_update(struct hyperv_overlay_page *page, hwaddr addr);
+
  void hyperv_synic_add(CPUState *cs);
  void hyperv_synic_reset(CPUState *cs);
  void hyperv_synic_update(CPUState *cs, bool enable,
diff --git a/target/i386/kvm/hyperv.c b/target/i386/kvm/hyperv.c
index f49ed2621d..01c9c2468c 100644
--- a/target/i386/kvm/hyperv.c
+++ b/target/i386/kvm/hyperv.c
@@ -16,6 +16,76 @@
  #include "hyperv.h"
  #include "hw/hyperv/hyperv.h"
  #include "hyperv-proto.h"
+#include "kvm_i386.h"
+
+struct x86_hv_overlay {
+struct hyperv_overlay_page *page;
+uint32_t msr;
+hwaddr gpa;
+};
+
+static void async_overlay_update(CPUState *cs, run_on_cpu_data data)
+{
+X86CPU *cpu = X86_CPU(cs);
+struct x86_hv_overlay *overlay = data.host_ptr;
+
+qemu_mutex_lock_iothread();
+hyperv_overlay_update(overlay->page, overlay->gpa);
+qemu_mutex_unlock_iothread();
+
+/**
+ * Call KVM so it can keep a copy of the MSR data and do other post-overlay
+ * actions such as filling the overlay page contents before returning to
+ * guest. This works because MSR filtering is inactive for KVM_SET_MSRS
+ */
+kvm_put_one_msr(cpu, overlay->msr, overlay->gpa);
+
+g_free(overlay);
+}
+
+static void do_overlay_update(X86CPU *cpu, struct hyperv_overlay_page *page,
+  uint32_t msr, uint64_t data)
+{
+struct x86_hv_overlay *overlay = g_malloc(sizeof(struct x86_hv_overlay));
+
+*overlay = (struct x86_hv_overlay) {
+.page = page,
+.msr = msr,
+.gpa = data
+};
+
+/**
+ * This will run in this cpu thread before it returns to KVM, but in a
+ * safe environment (i.e. when all cpus are quiescent) -- this is
+ * necessary because memory hierarchy is being changed
+ */
+async_safe_run_on_cpu(CPU(cpu), async_overlay_update,
+  RUN_ON_CPU_HOST_PTR(overlay));
+}
+
+static void overlay_update(X86CPU *cpu, uint32_t msr, uint64_t data)
+{
+switch (msr) {
+case HV_X64_MSR_GUEST_OS_ID:
+/**
+ * When GUEST_OS_ID is cleared, hypercall overlay should be removed;
+ * otherwise i

Re: [PATCH v3 03/28] tcg: Re-order tcg_region_init vs tcg_prologue_init

2021-06-08 Thread Alex Bennée



Richard Henderson  writes:

> Instead of delaying tcg_region_init until after tcg_prologue_init
> is complete, do tcg_region_init first and let tcg_prologue_init
> shrink the first region by the size of the generated prologue.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alex Bennée 


-- 
Alex Bennée

Re: [PATCH v16 02/99] accel: Introduce 'query-accels' QMP command

2021-06-08 Thread Philippe Mathieu-Daudé

On 6/7/21 3:07 PM, Thomas Huth wrote:
> On 04/06/2021 17.51, Alex Bennée wrote:
>> From: Philippe Mathieu-Daudé 
>>
>> Introduce the 'query-accels' QMP command which returns a list
>> of built-in accelerator names.
>>
>> - Accelerator is a QAPI enum of all existing accelerators,
>>
>> - AcceleratorInfo is a QAPI structure providing accelerator
>>    specific information. Currently the common structure base
>>    provides the name of the accelerator, while the specific
>>    part is empty, but each accelerator can expand it.
>>
>> - 'query-accels' QMP command returns a list of @AcceleratorInfo
>>
>> For example on a KVM-only build we get:
>>
>>  { "execute": "query-accels" }
>>  {
>>  "return": [
>>  {
>>  "name": "qtest"
>>  },
>>  {
>>  "name": "kvm"
>>  }
>>  ]
>>  }
>>
>> Note that we can't make the enum values or union branches conditional
>> because of target-specific poisoning of accelerator definitions.
>>
>> Reviewed-by: Eric Blake 
>> Reviewed-by: Alex Bennée 
>> Tested-by: Alex Bennée 
>> Signed-off-by: Philippe Mathieu-Daudé 
>> Signed-off-by: Alex Bennée 
>> Message-Id: <20210505125806.1263441-3-phi...@redhat.com>
>> ---
> [...]
>> +static const bool accel_builtin_list[ACCELERATOR__MAX] = {
>> +    [ACCELERATOR_QTEST] = true,
>> +#ifdef CONFIG_TCG
>> +    [ACCELERATOR_TCG] = true,
>> +#endif
>> +#ifdef CONFIG_KVM
>> +    [ACCELERATOR_KVM] = true,
>> +#endif
>> +#ifdef CONFIG_HAX
>> +    [ACCELERATOR_HAX] = true,
>> +#endif
>> +#ifdef CONFIG_HVF
>> +    [ACCELERATOR_HVF] = true,
>> +#endif
>> +#ifdef CONFIG_WHPX
>> +    [ACCELERATOR_WHPX] = true,
>> +#endif
>> +#ifdef CONFIG_XEN_BACKEND
>> +    [ACCELERATOR_XEN] = true,
>> +#endif
> 
> Nit: Use alphabetical order here, too, just like you did in the enum?

This has been drastically simplified by Markus using target-specific
machine code in v8.

Re: [PATCH v1 1/1] tests/data/acpi/virt: add IORT files for ITS

2021-06-08 Thread Peter Maydell

On Thu, 3 Jun 2021 at 15:51, Shashi Mallela  wrote:
>
> Added expected IORT files applicable with latest
> GICv3 ITS changes.
>
> Signed-off-by: Shashi Mallela 

I assume this is intended to fix the 'make check' failure with
your ITS patches? This needs to be part of the ITS patch series,
and it has to be a multi-step process. This is documented in
a comment at the top of tests/qtest/bios-tables-test.c --
basically you start with a commit that says "temporarily
differences in these tables are OK", then you have the commits
that add the changes to the system that cause the tables to
change (ie the patch which enables the ITS on the virt board),
and then after that you update the expected tables and remove
the bit that says "differences are OK".

thanks
-- PMM

[PATCH v4 01/26] s390x/tcg: Fix FP CONVERT TO (LOGICAL) FIXED NaN handling

2021-06-08 Thread David Hildenbrand

In case we encounter a NaN, we have to return the smallest possible
number, corresponding to either 0 or the maximum negative number. This
seems to differ from IEEE handling as implemented in softfloat, whereby
we return the biggest possible number.

While at it, use float32_to_uint64() in the CLGEB handler.

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/fpu_helper.c | 41 +++
 target/s390x/vec_fpu_helper.c |  8 +--
 2 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/target/s390x/fpu_helper.c b/target/s390x/fpu_helper.c
index f155bc048c..13af158748 100644
--- a/target/s390x/fpu_helper.c
+++ b/target/s390x/fpu_helper.c
@@ -509,6 +509,9 @@ uint64_t HELPER(cgeb)(CPUS390XState *env, uint64_t v2, 
uint32_t m34)
 
 s390_restore_bfp_rounding_mode(env, old_mode);
 handle_exceptions(env, xxc_from_m34(m34), GETPC());
+if (float32_is_any_nan(v2)) {
+return INT64_MIN;
+}
 return ret;
 }
 
@@ -520,6 +523,9 @@ uint64_t HELPER(cgdb)(CPUS390XState *env, uint64_t v2, 
uint32_t m34)
 
 s390_restore_bfp_rounding_mode(env, old_mode);
 handle_exceptions(env, xxc_from_m34(m34), GETPC());
+if (float64_is_any_nan(v2)) {
+return INT64_MIN;
+}
 return ret;
 }
 
@@ -532,6 +538,9 @@ uint64_t HELPER(cgxb)(CPUS390XState *env, uint64_t h, 
uint64_t l, uint32_t m34)
 
 s390_restore_bfp_rounding_mode(env, old_mode);
 handle_exceptions(env, xxc_from_m34(m34), GETPC());
+if (float128_is_any_nan(v2)) {
+return INT64_MIN;
+}
 return ret;
 }
 
@@ -543,6 +552,9 @@ uint64_t HELPER(cfeb)(CPUS390XState *env, uint64_t v2, 
uint32_t m34)
 
 s390_restore_bfp_rounding_mode(env, old_mode);
 handle_exceptions(env, xxc_from_m34(m34), GETPC());
+if (float32_is_any_nan(v2)) {
+return INT32_MIN;
+}
 return ret;
 }
 
@@ -554,6 +566,9 @@ uint64_t HELPER(cfdb)(CPUS390XState *env, uint64_t v2, 
uint32_t m34)
 
 s390_restore_bfp_rounding_mode(env, old_mode);
 handle_exceptions(env, xxc_from_m34(m34), GETPC());
+if (float64_is_any_nan(v2)) {
+return INT32_MIN;
+}
 return ret;
 }
 
@@ -566,6 +581,9 @@ uint64_t HELPER(cfxb)(CPUS390XState *env, uint64_t h, 
uint64_t l, uint32_t m34)
 
 s390_restore_bfp_rounding_mode(env, old_mode);
 handle_exceptions(env, xxc_from_m34(m34), GETPC());
+if (float128_is_any_nan(v2)) {
+return INT32_MIN;
+}
 return ret;
 }
 
@@ -573,12 +591,12 @@ uint64_t HELPER(cfxb)(CPUS390XState *env, uint64_t h, 
uint64_t l, uint32_t m34)
 uint64_t HELPER(clgeb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
 {
 int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-uint64_t ret;
-
-v2 = float32_to_float64(v2, &env->fpu_status);
-ret = float64_to_uint64(v2, &env->fpu_status);
+uint64_t ret = float32_to_uint64(v2, &env->fpu_status);
 s390_restore_bfp_rounding_mode(env, old_mode);
 handle_exceptions(env, xxc_from_m34(m34), GETPC());
+if (float32_is_any_nan(v2)) {
+return 0;
+}
 return ret;
 }
 
@@ -590,6 +608,9 @@ uint64_t HELPER(clgdb)(CPUS390XState *env, uint64_t v2, 
uint32_t m34)
 
 s390_restore_bfp_rounding_mode(env, old_mode);
 handle_exceptions(env, xxc_from_m34(m34), GETPC());
+if (float64_is_any_nan(v2)) {
+return 0;
+}
 return ret;
 }
 
@@ -601,6 +622,9 @@ uint64_t HELPER(clgxb)(CPUS390XState *env, uint64_t h, 
uint64_t l, uint32_t m34)
 
 s390_restore_bfp_rounding_mode(env, old_mode);
 handle_exceptions(env, xxc_from_m34(m34), GETPC());
+if (float128_is_any_nan(make_float128(h, l))) {
+return 0;
+}
 return ret;
 }
 
@@ -612,6 +636,9 @@ uint64_t HELPER(clfeb)(CPUS390XState *env, uint64_t v2, 
uint32_t m34)
 
 s390_restore_bfp_rounding_mode(env, old_mode);
 handle_exceptions(env, xxc_from_m34(m34), GETPC());
+if (float32_is_any_nan(v2)) {
+return 0;
+}
 return ret;
 }
 
@@ -623,6 +650,9 @@ uint64_t HELPER(clfdb)(CPUS390XState *env, uint64_t v2, 
uint32_t m34)
 
 s390_restore_bfp_rounding_mode(env, old_mode);
 handle_exceptions(env, xxc_from_m34(m34), GETPC());
+if (float64_is_any_nan(v2)) {
+return 0;
+}
 return ret;
 }
 
@@ -634,6 +664,9 @@ uint64_t HELPER(clfxb)(CPUS390XState *env, uint64_t h, 
uint64_t l, uint32_t m34)
 
 s390_restore_bfp_rounding_mode(env, old_mode);
 handle_exceptions(env, xxc_from_m34(m34), GETPC());
+if (float128_is_any_nan(make_float128(h, l))) {
+return 0;
+}
 return ret;
 }
 
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index c1564e819b..56765918d2 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -326,7 +326,9 @@ void HELPER(gvec_vcdlg64s)(void *v1, const void *v2, 
CPUS390XState *env,
 
 static uint64_t vcgd64(uint64_t a, float_status *s)
 {
-return float64_to_int64(a, s);
+const uint64_t tmp

[PATCH v4 00/26] s390x/tcg: Implement Vector enhancements facility and switch to z14

2021-06-08 Thread David Hildenbrand

@Conny, I think this should be good to go.


This series adds support for the "Vector enhancements facility" and bumps
the qemu CPU model to a stripped-down z14.

I tested most vector FP instructions by generating random instructions
and vectors, comparing the result with results on actual hardware. I did
not test instructions/instruction variants with (partial) undeterministic
behavior and exception handling.

Linux' also seems to boot/work fine with it. Howeever, while testing this
series I noticed that Linux checks for the wrong facility bit - see [1].
I tested by temporarily faking availability of the "wrong" facility bit.

[1] https://lkml.kernel.org/r/20210503121244.25232-1-da...@redhat.com

v3 -> v4:
- "s390x/tcg: Implement VECTOR FP (MAXIMUM|MINIMUM)"
-- Move "const bool ..." into respective blocks
- "linux-user: elf: s390x: Prepare for Vector enhancements facility"
-- Add all currently defined Linux HWCAP.

v2 -> v3:
- "s390x/tcg: Fix FP CONVERT TO (LOGICAL) FIXED NaN handling"
-- Keep proper signal handling in the VECTOR variants
- "s390x/tcg: Simplify vop64_2() handling"
-- uint64_t se -> bool se
- "s390x/tcg: Implement VECTOR FP (MAXIMUM|MINIMUM)"
-- Drop special cases when both values are infinity
-- Simplify cases when both values are zero
-- Simplify dcmask handling
-- uint64_t se -> bool se
- "linux-user: elf: Prepare for Vector enhancements facility"
-- Added to properly indicate the new elf capability

v1 -> v2:
- Too much changed to spell it out explicitly. Mostly addressed feedback
  from Richard, a couple of bugfixes found while testing, and some
  simplifications/cleanups.
- Rebased on top of Richard's softfloat rework

Cc: qemu-s3...@nongnu.org
Cc: Cornelia Huck 
Cc: Halil Pasic 
Cc: Christian Borntraeger 
Cc: Thomas Huth 
Cc: Richard Henderson 
Cc: Laurent Vivier 


David Hildenbrand (26):
  s390x/tcg: Fix FP CONVERT TO (LOGICAL) FIXED NaN handling
  s390x/tcg: Fix instruction name for VECTOR FP LOAD
(LENGTHENED|ROUNDED)
  s390x/tcg: Simplify vop64_3() handling
  s390x/tcg: Simplify vop64_2() handling
  s390x/tcg: Simplify vfc64() handling
  s390x/tcg: Simplify vftci64() handling
  s390x/tcg: Simplify vfma64() handling
  s390x/tcg: Simplify vfll32() handling
  s390x/tcg: Simplify vflr64() handling
  s390x/tcg: Simplify wfc64() handling
  s390x/tcg: Implement VECTOR BIT PERMUTE
  s390x/tcg: Implement VECTOR MULTIPLY SUM LOGICAL
  s390x/tcg: Implement 32/128 bit for VECTOR FP
(ADD|DIVIDE|MULTIPLY|SUBTRACT)
  s390x/tcg: Implement 32/128 bit for VECTOR (LOAD FP INTEGER|FP SQUARE
ROOT)
  s390x/tcg: Implement 32/128 bit for VECTOR FP COMPARE *
  s390x/tcg: Implement 32/128 bit for VECTOR FP COMPARE (AND SIGNAL)
SCALAR
  s390x/tcg: Implement 64 bit for VECTOR FP LOAD LENGTHENED
  s390x/tcg: Implement 128 bit for VECTOR FP LOAD ROUNDED
  s390x/tcg: Implement 32/128 bit for VECTOR FP PERFORM SIGN OPERATION
  s390x/tcg: Implement 32/128 bit for VECTOR FP TEST DATA CLASS
IMMEDIATE
  s390x/tcg: Implement 32/128 bit for VECTOR FP MULTIPLY AND
(ADD|SUBTRACT)
  s390x/tcg: Implement VECTOR FP NEGATIVE MULTIPLY AND (ADD|SUBTRACT)
  s390x/tcg: Implement VECTOR FP (MAXIMUM|MINIMUM)
  linux-user: elf: s390x: Prepare for Vector enhancements facility
  s390x/tcg: We support Vector enhancements facility
  s390x/cpumodel: Bump up QEMU model to a stripped-down IBM z14 GA2

 hw/s390x/s390-virtio-ccw.c  |3 +
 include/elf.h   |7 +
 linux-user/elfload.c|1 +
 target/s390x/cpu_models.c   |4 +-
 target/s390x/fpu_helper.c   |   41 +-
 target/s390x/gen-features.c |   14 +-
 target/s390x/helper.h   |   70 +-
 target/s390x/insn-data.def  |   16 +-
 target/s390x/internal.h |9 +
 target/s390x/translate_vx.c.inc |  633 ++
 target/s390x/vec_fpu_helper.c   | 1079 ++-
 target/s390x/vec_helper.c   |   22 +
 12 files changed, 1428 insertions(+), 471 deletions(-)

-- 
2.31.1

[PATCH v4 03/26] s390x/tcg: Simplify vop64_3() handling

2021-06-08 Thread David Hildenbrand

Let's simplify, reworking our handler generation, passing the whole "m5"
register content and not providing specialized handlers for "se", and
reading/writing proper float64 values using new helpers.

Suggested-by: Richard Henderson 
Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/helper.h   |  4 --
 target/s390x/translate_vx.c.inc | 11 ++--
 target/s390x/vec_fpu_helper.c   | 94 +
 3 files changed, 30 insertions(+), 79 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index d4e4f3388f..2344f81273 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -247,7 +247,6 @@ DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, 
cptr, env, i32)
 
 /* === Vector Floating-Point Instructions */
 DEF_HELPER_FLAGS_5(gvec_vfa64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
-DEF_HELPER_FLAGS_5(gvec_vfa64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_4(gvec_wfc64, void, cptr, cptr, env, i32)
 DEF_HELPER_4(gvec_wfk64, void, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfce64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
@@ -271,7 +270,6 @@ DEF_HELPER_FLAGS_4(gvec_vcgd64s, TCG_CALL_NO_WG, void, ptr, 
cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vclgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vclgd64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfd64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
-DEF_HELPER_FLAGS_5(gvec_vfd64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_FLAGS_4(gvec_vfi64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfi64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfll32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
@@ -279,7 +277,6 @@ DEF_HELPER_FLAGS_4(gvec_vfll32s, TCG_CALL_NO_WG, void, ptr, 
cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vflr64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vflr64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
-DEF_HELPER_FLAGS_5(gvec_vfm64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_FLAGS_6(gvec_vfma64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfma64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfms64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
@@ -287,7 +284,6 @@ DEF_HELPER_FLAGS_6(gvec_vfms64s, TCG_CALL_NO_WG, void, ptr, 
cptr, cptr, cptr, en
 DEF_HELPER_FLAGS_4(gvec_vfsq64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfsq64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfs64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
-DEF_HELPER_FLAGS_5(gvec_vfs64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_4(gvec_vftci64, void, ptr, cptr, env, i32)
 DEF_HELPER_4(gvec_vftci64s, void, ptr, cptr, env, i32)
 
diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/translate_vx.c.inc
index eb767f5288..2d3fbdfab2 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/translate_vx.c.inc
@@ -2443,7 +2443,6 @@ static DisasJumpType op_vfa(DisasContext *s, DisasOps *o)
 {
 const uint8_t fpf = get_field(s, m4);
 const uint8_t m5 = get_field(s, m5);
-const bool se = extract32(m5, 3, 1);
 gen_helper_gvec_3_ptr *fn;
 
 if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
@@ -2453,22 +2452,22 @@ static DisasJumpType op_vfa(DisasContext *s, DisasOps 
*o)
 
 switch (s->fields.op2) {
 case 0xe3:
-fn = se ? gen_helper_gvec_vfa64s : gen_helper_gvec_vfa64;
+fn = gen_helper_gvec_vfa64;
 break;
 case 0xe5:
-fn = se ? gen_helper_gvec_vfd64s : gen_helper_gvec_vfd64;
+fn = gen_helper_gvec_vfd64;
 break;
 case 0xe7:
-fn = se ? gen_helper_gvec_vfm64s : gen_helper_gvec_vfm64;
+fn = gen_helper_gvec_vfm64;
 break;
 case 0xe2:
-fn = se ? gen_helper_gvec_vfs64s : gen_helper_gvec_vfs64;
+fn = gen_helper_gvec_vfs64;
 break;
 default:
 g_assert_not_reached();
 }
 gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2),
-   get_field(s, v3), cpu_env, 0, fn);
+   get_field(s, v3), cpu_env, m5, fn);
 return DISAS_NEXT;
 }
 
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index 56765918d2..280ee0f1ea 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -78,6 +78,16 @@ static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, 
uint8_t vec_exc,
 }
 }
 
+static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr)
+{
+return make_float64(s390_vec_read_element64(v, enr));
+}
+
+static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data)
+{
+return s390_vec_write_element64(v, enr, data);
+}
+
 typedef uint64_t

[PATCH v4 08/26] s390x/tcg: Simplify vfll32() handling

2021-06-08 Thread David Hildenbrand

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/helper.h   |  1 -
 target/s390x/translate_vx.c.inc |  6 +-
 target/s390x/vec_fpu_helper.c   | 21 +
 3 files changed, 6 insertions(+), 22 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 3c87593553..63039c8d73 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -262,7 +262,6 @@ DEF_HELPER_FLAGS_4(gvec_vclgd64, TCG_CALL_NO_WG, void, ptr, 
cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfd64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfi64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfll32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
-DEF_HELPER_FLAGS_4(gvec_vfll32s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vflr64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vflr64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/translate_vx.c.inc
index 4b5bf0a7e3..5ff59984b5 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/translate_vx.c.inc
@@ -2570,18 +2570,14 @@ static DisasJumpType op_vfll(DisasContext *s, DisasOps 
*o)
 {
 const uint8_t fpf = get_field(s, m3);
 const uint8_t m4 = get_field(s, m4);
-gen_helper_gvec_2_ptr *fn = gen_helper_gvec_vfll32;
 
 if (fpf != FPF_SHORT || extract32(m4, 0, 3)) {
 gen_program_exception(s, PGM_SPECIFICATION);
 return DISAS_NORETURN;
 }
 
-if (extract32(m4, 3, 1)) {
-fn = gen_helper_gvec_vfll32s;
-}
 gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env,
-   0, fn);
+   m4, gen_helper_gvec_vfll32);
 return DISAS_NEXT;
 }
 
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index 23b38df158..7bd3e44acc 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -287,9 +287,10 @@ DEF_GVEC_VFC(vfce, eq)
 DEF_GVEC_VFC(vfch, lt)
 DEF_GVEC_VFC(vfche, le)
 
-static void vfll32(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
-   bool s, uintptr_t retaddr)
+void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
 {
+const bool s = extract32(simd_data(desc), 3, 1);
 uint8_t vxc, vec_exc = 0;
 S390Vector tmp = {};
 int i;
@@ -306,20 +307,8 @@ static void vfll32(S390Vector *v1, const S390Vector *v2, 
CPUS390XState *env,
 break;
 }
 }
-handle_ieee_exc(env, vxc, vec_exc, retaddr);
-*v1 = tmp;
-}
-
-void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env,
- uint32_t desc)
-{
-vfll32(v1, v2, env, false, GETPC());
-}
-
-void HELPER(gvec_vfll32s)(void *v1, const void *v2, CPUS390XState *env,
-  uint32_t desc)
-{
-vfll32(v1, v2, env, true, GETPC());
+handle_ieee_exc(env, vxc, vec_exc, GETPC());
+*(S390Vector *)v1 = tmp;
 }
 
 static void vflr64(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
-- 
2.31.1

[PATCH v4 05/26] s390x/tcg: Simplify vfc64() handling

2021-06-08 Thread David Hildenbrand

Pass the m5 field via simd_data() and don't provide specialized handlers
for single-element variants.

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/helper.h   |  6 ---
 target/s390x/translate_vx.c.inc | 45 +---
 target/s390x/vec_fpu_helper.c   | 94 +
 3 files changed, 38 insertions(+), 107 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 4788c1ddaf..02a16924a7 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -250,17 +250,11 @@ DEF_HELPER_FLAGS_5(gvec_vfa64, TCG_CALL_NO_WG, void, ptr, 
cptr, cptr, env, i32)
 DEF_HELPER_4(gvec_wfc64, void, cptr, cptr, env, i32)
 DEF_HELPER_4(gvec_wfk64, void, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfce64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
-DEF_HELPER_FLAGS_5(gvec_vfce64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_5(gvec_vfce64_cc, void, ptr, cptr, cptr, env, i32)
-DEF_HELPER_5(gvec_vfce64s_cc, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfch64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
-DEF_HELPER_FLAGS_5(gvec_vfch64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_5(gvec_vfch64_cc, void, ptr, cptr, cptr, env, i32)
-DEF_HELPER_5(gvec_vfch64s_cc, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfche64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
-DEF_HELPER_FLAGS_5(gvec_vfche64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_5(gvec_vfche64_cc, void, ptr, cptr, cptr, env, i32)
-DEF_HELPER_5(gvec_vfche64s_cc, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcdg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcdlg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/translate_vx.c.inc
index 280d45bb19..604ae11024 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/translate_vx.c.inc
@@ -2497,7 +2497,6 @@ static DisasJumpType op_vfc(DisasContext *s, DisasOps *o)
 const uint8_t fpf = get_field(s, m4);
 const uint8_t m5 = get_field(s, m5);
 const uint8_t m6 = get_field(s, m6);
-const bool se = extract32(m5, 3, 1);
 const bool cs = extract32(m6, 0, 1);
 gen_helper_gvec_3_ptr *fn;
 
@@ -2506,37 +2505,21 @@ static DisasJumpType op_vfc(DisasContext *s, DisasOps 
*o)
 return DISAS_NORETURN;
 }
 
-if (cs) {
-switch (s->fields.op2) {
-case 0xe8:
-fn = se ? gen_helper_gvec_vfce64s_cc : gen_helper_gvec_vfce64_cc;
-break;
-case 0xeb:
-fn = se ? gen_helper_gvec_vfch64s_cc : gen_helper_gvec_vfch64_cc;
-break;
-case 0xea:
-fn = se ? gen_helper_gvec_vfche64s_cc : gen_helper_gvec_vfche64_cc;
-break;
-default:
-g_assert_not_reached();
-}
-} else {
-switch (s->fields.op2) {
-case 0xe8:
-fn = se ? gen_helper_gvec_vfce64s : gen_helper_gvec_vfce64;
-break;
-case 0xeb:
-fn = se ? gen_helper_gvec_vfch64s : gen_helper_gvec_vfch64;
-break;
-case 0xea:
-fn = se ? gen_helper_gvec_vfche64s : gen_helper_gvec_vfche64;
-break;
-default:
-g_assert_not_reached();
-}
+switch (s->fields.op2) {
+case 0xe8:
+fn = cs ? gen_helper_gvec_vfce64_cc : gen_helper_gvec_vfce64;
+break;
+case 0xeb:
+fn = cs ? gen_helper_gvec_vfch64_cc : gen_helper_gvec_vfch64;
+break;
+case 0xea:
+fn = cs ? gen_helper_gvec_vfche64_cc : gen_helper_gvec_vfche64;
+break;
+default:
+g_assert_not_reached();
 }
-gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2),
-   get_field(s, v3), cpu_env, 0, fn);
+gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2), get_field(s, v3),
+   cpu_env, m5, fn);
 if (cs) {
 set_cc_static(s);
 }
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index ab23a597da..01ee41d154 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -239,8 +239,8 @@ static int vfc64(S390Vector *v1, const S390Vector *v2, 
const S390Vector *v3,
 int i;
 
 for (i = 0; i < 2; i++) {
-const float64 a = s390_vec_read_element64(v2, i);
-const float64 b = s390_vec_read_element64(v3, i);
+const float64 a = s390_vec_read_float64(v2, i);
+const float64 b = s390_vec_read_float64(v3, i);
 
 /* swap the order of the parameters, so we can use existing functions 
*/
 if (fn(b, a, &env->fpu_status)) {
@@ -261,77 +261,31 @@ static int vfc64(S390Vector *v1, const S390Vector *v2, 
const S390Vector *v3,
 return 3;
 }
 
-void HELPER(gvec_vfce64)(void *v1, const void *v2, const void *v3,
-

[PATCH v4 04/26] s390x/tcg: Simplify vop64_2() handling

2021-06-08 Thread David Hildenbrand

Let's rework our macros and simplify. We still need helper functions in
most cases due to the different parameters types.

Next, we'll only have 32/128bit variants for vfi and vfsq, so special
case the others.

Note that for vfsq, the XxC and erm passed in the simd_data() will never be
set, resulting in the same behavior.

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/helper.h   |   6 -
 target/s390x/translate_vx.c.inc |  18 ++-
 target/s390x/vec_fpu_helper.c   | 190 +---
 3 files changed, 58 insertions(+), 156 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 2344f81273..4788c1ddaf 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -262,16 +262,11 @@ DEF_HELPER_FLAGS_5(gvec_vfche64s, TCG_CALL_NO_WG, void, 
ptr, cptr, cptr, env, i3
 DEF_HELPER_5(gvec_vfche64_cc, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_5(gvec_vfche64s_cc, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcdg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
-DEF_HELPER_FLAGS_4(gvec_vcdg64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcdlg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
-DEF_HELPER_FLAGS_4(gvec_vcdlg64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
-DEF_HELPER_FLAGS_4(gvec_vcgd64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vclgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
-DEF_HELPER_FLAGS_4(gvec_vclgd64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfd64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfi64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
-DEF_HELPER_FLAGS_4(gvec_vfi64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfll32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfll32s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vflr64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
@@ -282,7 +277,6 @@ DEF_HELPER_FLAGS_6(gvec_vfma64s, TCG_CALL_NO_WG, void, ptr, 
cptr, cptr, cptr, en
 DEF_HELPER_FLAGS_6(gvec_vfms64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfms64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfsq64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
-DEF_HELPER_FLAGS_4(gvec_vfsq64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfs64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_4(gvec_vftci64, void, ptr, cptr, env, i32)
 DEF_HELPER_4(gvec_vftci64s, void, ptr, cptr, env, i32)
diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/translate_vx.c.inc
index 2d3fbdfab2..280d45bb19 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/translate_vx.c.inc
@@ -2558,19 +2558,19 @@ static DisasJumpType op_vcdg(DisasContext *s, DisasOps 
*o)
 
 switch (s->fields.op2) {
 case 0xc3:
-fn = se ? gen_helper_gvec_vcdg64s : gen_helper_gvec_vcdg64;
+fn = gen_helper_gvec_vcdg64;
 break;
 case 0xc1:
-fn = se ? gen_helper_gvec_vcdlg64s : gen_helper_gvec_vcdlg64;
+fn = gen_helper_gvec_vcdlg64;
 break;
 case 0xc2:
-fn = se ? gen_helper_gvec_vcgd64s : gen_helper_gvec_vcgd64;
+fn = gen_helper_gvec_vcgd64;
 break;
 case 0xc0:
-fn = se ? gen_helper_gvec_vclgd64s : gen_helper_gvec_vclgd64;
+fn = gen_helper_gvec_vclgd64;
 break;
 case 0xc7:
-fn = se ? gen_helper_gvec_vfi64s : gen_helper_gvec_vfi64;
+fn = gen_helper_gvec_vfi64;
 break;
 case 0xc5:
 fn = se ? gen_helper_gvec_vflr64s : gen_helper_gvec_vflr64;
@@ -2681,18 +2681,14 @@ static DisasJumpType op_vfsq(DisasContext *s, DisasOps 
*o)
 {
 const uint8_t fpf = get_field(s, m3);
 const uint8_t m4 = get_field(s, m4);
-gen_helper_gvec_2_ptr *fn = gen_helper_gvec_vfsq64;
 
 if (fpf != FPF_LONG || extract32(m4, 0, 3)) {
 gen_program_exception(s, PGM_SPECIFICATION);
 return DISAS_NORETURN;
 }
 
-if (extract32(m4, 3, 1)) {
-fn = gen_helper_gvec_vfsq64s;
-}
-gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env,
-   0, fn);
+gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, m4,
+   gen_helper_gvec_vfsq64);
 return DISAS_NEXT;
 }
 
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index 280ee0f1ea..ab23a597da 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -88,7 +88,7 @@ static void s390_vec_write_float64(S390Vector *v, uint8_t 
enr, float64 data)
 return s390_vec_write_element64(v, enr, data);
 }
 
-typedef uint64_t (*vop64_2_fn)(uint64_t a, float_status *s);
+typedef float64 (*vop64_2_fn)(float64 a, float_status *s);
 static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XS

[PATCH v4 02/26] s390x/tcg: Fix instruction name for VECTOR FP LOAD (LENGTHENED|ROUNDED)

2021-06-08 Thread David Hildenbrand

Let's use the correct name.

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/insn-data.def | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 0bb1886a2e..35a0086a85 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1245,9 +1245,9 @@
 F(0xe7e5, VFD, VRR_c, V,   0, 0, 0, 0, vfa, 0, IF_VEC)
 /* VECTOR LOAD FP INTEGER */
 F(0xe7c7, VFI, VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
-/* VECTOR LOAD LENGTHENED */
+/* VECTOR FP LOAD LENGTHENED */
 F(0xe7c4, VFLL,VRR_a, V,   0, 0, 0, 0, vfll, 0, IF_VEC)
-/* VECTOR LOAD ROUNDED */
+/* VECTOR FP LOAD ROUNDED */
 F(0xe7c5, VFLR,VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
 /* VECTOR FP MULTIPLY */
 F(0xe7e7, VFM, VRR_c, V,   0, 0, 0, 0, vfa, 0, IF_VEC)
-- 
2.31.1

[PATCH v4 26/26] s390x/cpumodel: Bump up QEMU model to a stripped-down IBM z14 GA2

2021-06-08 Thread David Hildenbrand

TCG implements everything we need to run basic z14 OS+software.

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 hw/s390x/s390-virtio-ccw.c  |  3 +++
 target/s390x/cpu_models.c   |  4 ++--
 target/s390x/gen-features.c | 15 +--
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 7af27ca305..e4b18aef49 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -802,7 +802,10 @@ DEFINE_CCW_MACHINE(6_1, "6.1", true);
 
 static void ccw_machine_6_0_instance_options(MachineState *machine)
 {
+static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_0 };
+
 ccw_machine_6_1_instance_options(machine);
+s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat);
 }
 
 static void ccw_machine_6_0_class_options(MachineClass *mc)
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 050dcf2d42..94090a6e22 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -90,8 +90,8 @@ static S390CPUDef s390_cpu_defs[] = {
 CPUDEF_INIT(0x8562, 15, 1, 47, 0x0800U, "gen15b", "IBM z15 T02 GA1"),
 };
 
-#define QEMU_MAX_CPU_TYPE 0x2964
-#define QEMU_MAX_CPU_GEN 13
+#define QEMU_MAX_CPU_TYPE 0x3906
+#define QEMU_MAX_CPU_GEN 14
 #define QEMU_MAX_CPU_EC_GA 2
 static const S390FeatInit qemu_max_cpu_feat_init = { S390_FEAT_LIST_QEMU_MAX };
 static S390FeatBitmap qemu_max_cpu_feat;
diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index 219b1f9420..242c95ede4 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -706,23 +706,25 @@ static uint16_t qemu_V4_1[] = {
 S390_FEAT_VECTOR,
 };
 
-static uint16_t qemu_LATEST[] = {
+static uint16_t qemu_V6_0[] = {
 S390_FEAT_ACCESS_EXCEPTION_FS_INDICATION,
 S390_FEAT_SIDE_EFFECT_ACCESS_ESOP2,
 S390_FEAT_ESOP,
 };
 
-/* add all new definitions before this point */
-static uint16_t qemu_MAX[] = {
-/* generates a dependency warning, leave it out for now */
-S390_FEAT_MSA_EXT_5,
-/* features introduced after the z13 */
+static uint16_t qemu_LATEST[] = {
 S390_FEAT_INSTRUCTION_EXEC_PROT,
 S390_FEAT_MISC_INSTRUCTION_EXT2,
 S390_FEAT_MSA_EXT_8,
 S390_FEAT_VECTOR_ENH,
 };
 
+/* add all new definitions before this point */
+static uint16_t qemu_MAX[] = {
+/* generates a dependency warning, leave it out for now */
+S390_FEAT_MSA_EXT_5,
+};
+
 /** END FEATURE DEFS **/
 
 #define _YEARS  "2016"
@@ -839,6 +841,7 @@ static FeatGroupDefSpec QemuFeatDef[] = {
 QEMU_FEAT_INITIALIZER(V3_1),
 QEMU_FEAT_INITIALIZER(V4_0),
 QEMU_FEAT_INITIALIZER(V4_1),
+QEMU_FEAT_INITIALIZER(V6_0),
 QEMU_FEAT_INITIALIZER(LATEST),
 QEMU_FEAT_INITIALIZER(MAX),
 };
-- 
2.31.1

[PATCH v4 06/26] s390x/tcg: Simplify vftci64() handling

2021-06-08 Thread David Hildenbrand

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/helper.h   |  1 -
 target/s390x/translate_vx.c.inc |  7 ++-
 target/s390x/vec_fpu_helper.c   | 29 +++--
 3 files changed, 13 insertions(+), 24 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 02a16924a7..e832680236 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -273,7 +273,6 @@ DEF_HELPER_FLAGS_6(gvec_vfms64s, TCG_CALL_NO_WG, void, ptr, 
cptr, cptr, cptr, en
 DEF_HELPER_FLAGS_4(gvec_vfsq64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfs64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_4(gvec_vftci64, void, ptr, cptr, env, i32)
-DEF_HELPER_4(gvec_vftci64s, void, ptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/translate_vx.c.inc
index 604ae11024..1404471881 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/translate_vx.c.inc
@@ -2680,17 +2680,14 @@ static DisasJumpType op_vftci(DisasContext *s, DisasOps 
*o)
 const uint16_t i3 = get_field(s, i3);
 const uint8_t fpf = get_field(s, m4);
 const uint8_t m5 = get_field(s, m5);
-gen_helper_gvec_2_ptr *fn = gen_helper_gvec_vftci64;
 
 if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
 gen_program_exception(s, PGM_SPECIFICATION);
 return DISAS_NORETURN;
 }
 
-if (extract32(m5, 3, 1)) {
-fn = gen_helper_gvec_vftci64s;
-}
-gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, i3, fn);
+gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env,
+   deposit32(m5, 4, 12, i3), gen_helper_gvec_vftci64);
 set_cc_static(s);
 return DISAS_NEXT;
 }
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index 01ee41d154..2ced6fcfaf 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -413,13 +413,15 @@ void HELPER(gvec_vfms64s)(void *v1, const void *v2, const 
void *v3,
 vfma64(v1, v2, v3, v4, env, true, float_muladd_negate_c, GETPC());
 }
 
-static int vftci64(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
-   bool s, uint16_t i3)
+void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env,
+  uint32_t desc)
 {
+const uint16_t i3 = extract32(simd_data(desc), 4, 12);
+const bool s = extract32(simd_data(desc), 3, 1);
 int i, match = 0;
 
 for (i = 0; i < 2; i++) {
-float64 a = s390_vec_read_element64(v2, i);
+const float64 a = s390_vec_read_float64(v2, i);
 
 if (float64_dcmask(env, a) & i3) {
 match++;
@@ -432,20 +434,11 @@ static int vftci64(S390Vector *v1, const S390Vector *v2, 
CPUS390XState *env,
 }
 }
 
-if (match) {
-return s || match == 2 ? 0 : 1;
+if (match == 2 || (s && match)) {
+env->cc_op = 0;
+} else if (match) {
+env->cc_op = 1;
+} else {
+env->cc_op = 3;
 }
-return 3;
-}
-
-void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env,
-  uint32_t desc)
-{
-env->cc_op = vftci64(v1, v2, env, false, simd_data(desc));
-}
-
-void HELPER(gvec_vftci64s)(void *v1, const void *v2, CPUS390XState *env,
-   uint32_t desc)
-{
-env->cc_op = vftci64(v1, v2, env, true, simd_data(desc));
 }
-- 
2.31.1

[PATCH v4 09/26] s390x/tcg: Simplify vflr64() handling

2021-06-08 Thread David Hildenbrand

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/helper.h   |  1 -
 target/s390x/translate_vx.c.inc |  3 +--
 target/s390x/vec_fpu_helper.c   | 29 +++--
 3 files changed, 8 insertions(+), 25 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 63039c8d73..0cfb82ee8a 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -263,7 +263,6 @@ DEF_HELPER_FLAGS_5(gvec_vfd64, TCG_CALL_NO_WG, void, ptr, 
cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfi64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfll32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vflr64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
-DEF_HELPER_FLAGS_4(gvec_vflr64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfma64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfms64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/translate_vx.c.inc
index 5ff59984b5..91e2967c49 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/translate_vx.c.inc
@@ -2531,7 +2531,6 @@ static DisasJumpType op_vcdg(DisasContext *s, DisasOps *o)
 const uint8_t fpf = get_field(s, m3);
 const uint8_t m4 = get_field(s, m4);
 const uint8_t erm = get_field(s, m5);
-const bool se = extract32(m4, 3, 1);
 gen_helper_gvec_2_ptr *fn;
 
 if (fpf != FPF_LONG || extract32(m4, 0, 2) || erm > 7 || erm == 2) {
@@ -2556,7 +2555,7 @@ static DisasJumpType op_vcdg(DisasContext *s, DisasOps *o)
 fn = gen_helper_gvec_vfi64;
 break;
 case 0xc5:
-fn = se ? gen_helper_gvec_vflr64s : gen_helper_gvec_vflr64;
+fn = gen_helper_gvec_vflr64;
 break;
 default:
 g_assert_not_reached();
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index 7bd3e44acc..7ca9c892f7 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -311,9 +311,12 @@ void HELPER(gvec_vfll32)(void *v1, const void *v2, 
CPUS390XState *env,
 *(S390Vector *)v1 = tmp;
 }
 
-static void vflr64(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
-   bool s, bool XxC, uint8_t erm, uintptr_t retaddr)
+void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
 {
+const uint8_t erm = extract32(simd_data(desc), 4, 4);
+const bool s = extract32(simd_data(desc), 3, 1);
+const bool XxC = extract32(simd_data(desc), 2, 1);
 uint8_t vxc, vec_exc = 0;
 S390Vector tmp = {};
 int i, old_mode;
@@ -332,26 +335,8 @@ static void vflr64(S390Vector *v1, const S390Vector *v2, 
CPUS390XState *env,
 }
 }
 s390_restore_bfp_rounding_mode(env, old_mode);
-handle_ieee_exc(env, vxc, vec_exc, retaddr);
-*v1 = tmp;
-}
-
-void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env,
- uint32_t desc)
-{
-const uint8_t erm = extract32(simd_data(desc), 4, 4);
-const bool XxC = extract32(simd_data(desc), 2, 1);
-
-vflr64(v1, v2, env, false, XxC, erm, GETPC());
-}
-
-void HELPER(gvec_vflr64s)(void *v1, const void *v2, CPUS390XState *env,
-  uint32_t desc)
-{
-const uint8_t erm = extract32(simd_data(desc), 4, 4);
-const bool XxC = extract32(simd_data(desc), 2, 1);
-
-vflr64(v1, v2, env, true, XxC, erm, GETPC());
+handle_ieee_exc(env, vxc, vec_exc, GETPC());
+*(S390Vector *)v1 = tmp;
 }
 
 static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
-- 
2.31.1

[PATCH v4 07/26] s390x/tcg: Simplify vfma64() handling

2021-06-08 Thread David Hildenbrand

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/helper.h   |  2 --
 target/s390x/translate_vx.c.inc |  8 +++
 target/s390x/vec_fpu_helper.c   | 42 +
 3 files changed, 20 insertions(+), 32 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index e832680236..3c87593553 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -267,9 +267,7 @@ DEF_HELPER_FLAGS_4(gvec_vflr64, TCG_CALL_NO_WG, void, ptr, 
cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vflr64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfma64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
-DEF_HELPER_FLAGS_6(gvec_vfma64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfms64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
-DEF_HELPER_FLAGS_6(gvec_vfms64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfsq64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfs64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_4(gvec_vftci64, void, ptr, cptr, env, i32)
diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/translate_vx.c.inc
index 1404471881..4b5bf0a7e3 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/translate_vx.c.inc
@@ -2589,7 +2589,6 @@ static DisasJumpType op_vfma(DisasContext *s, DisasOps *o)
 {
 const uint8_t m5 = get_field(s, m5);
 const uint8_t fpf = get_field(s, m6);
-const bool se = extract32(m5, 3, 1);
 gen_helper_gvec_4_ptr *fn;
 
 if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
@@ -2598,13 +2597,12 @@ static DisasJumpType op_vfma(DisasContext *s, DisasOps 
*o)
 }
 
 if (s->fields.op2 == 0x8f) {
-fn = se ? gen_helper_gvec_vfma64s : gen_helper_gvec_vfma64;
+fn = gen_helper_gvec_vfma64;
 } else {
-fn = se ? gen_helper_gvec_vfms64s : gen_helper_gvec_vfms64;
+fn = gen_helper_gvec_vfms64;
 }
 gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
-   get_field(s, v3), get_field(s, v4), cpu_env,
-   0, fn);
+   get_field(s, v3), get_field(s, v4), cpu_env, m5, fn);
 return DISAS_NEXT;
 }
 
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index 2ced6fcfaf..23b38df158 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -374,12 +374,12 @@ static void vfma64(S390Vector *v1, const S390Vector *v2, 
const S390Vector *v3,
 int i;
 
 for (i = 0; i < 2; i++) {
-const uint64_t a = s390_vec_read_element64(v2, i);
-const uint64_t b = s390_vec_read_element64(v3, i);
-const uint64_t c = s390_vec_read_element64(v4, i);
-uint64_t ret = float64_muladd(a, b, c, flags, &env->fpu_status);
+const float64 a = s390_vec_read_float64(v2, i);
+const float64 b = s390_vec_read_float64(v3, i);
+const float64 c = s390_vec_read_float64(v4, i);
+const float64 ret = float64_muladd(a, b, c, flags, &env->fpu_status);
 
-s390_vec_write_element64(&tmp, i, ret);
+s390_vec_write_float64(&tmp, i, ret);
 vxc = check_ieee_exc(env, i, false, &vec_exc);
 if (s || vxc) {
 break;
@@ -389,29 +389,21 @@ static void vfma64(S390Vector *v1, const S390Vector *v2, 
const S390Vector *v3,
 *v1 = tmp;
 }
 
-void HELPER(gvec_vfma64)(void *v1, const void *v2, const void *v3,
- const void *v4, CPUS390XState *env, uint32_t desc)
-{
-vfma64(v1, v2, v3, v4, env, false, 0, GETPC());
-}
-
-void HELPER(gvec_vfma64s)(void *v1, const void *v2, const void *v3,
- const void *v4, CPUS390XState *env, uint32_t desc)
-{
-vfma64(v1, v2, v3, v4, env, true, 0, GETPC());
+#define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS) 
\
+void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,   
\
+   const void *v4, CPUS390XState *env, 
\
+   uint32_t desc)  
\
+{  
\
+const bool se = extract32(simd_data(desc), 3, 1);  
\
+   
\
+vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC());   
\
 }
 
-void HELPER(gvec_vfms64)(void *v1, const void *v2, const void *v3,
- const void *v4, CPUS390XState *env, uint32_t desc)
-{
-vfma64(v1, v2, v3, v4, env, false, float_muladd_negate_c, GETPC());
-}
+#define DEF_GVEC_VFMA(NAME, FLAGS) 
\
+DEF_GVEC_VFMA_B(NAME, FLAGS, 64)
 
-void HELPER(gvec_vfms64s)(void *v1, const void *v2, const voi

[PATCH v4 11/26] s390x/tcg: Implement VECTOR BIT PERMUTE

2021-06-08 Thread David Hildenbrand

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/helper.h   |  1 +
 target/s390x/insn-data.def  |  2 ++
 target/s390x/translate_vx.c.inc |  8 
 target/s390x/vec_helper.c   | 22 ++
 4 files changed, 33 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 0cfb82ee8a..e99c9643eb 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -126,6 +126,7 @@ DEF_HELPER_FLAGS_1(stck, TCG_CALL_NO_RWG_SE, i64, env)
 DEF_HELPER_FLAGS_3(probe_write_access, TCG_CALL_NO_WG, void, env, i64, i64)
 
 /* === Vector Support Instructions === */
+DEF_HELPER_FLAGS_4(gvec_vbperm, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(vll, TCG_CALL_NO_WG, void, env, ptr, i64, i64)
 DEF_HELPER_FLAGS_4(gvec_vpk16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vpk32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 35a0086a85..1634a6bc5a 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -989,6 +989,8 @@
 
 /* === Vector Support Instructions === */
 
+/* VECTOR BIT PERMUTE */
+E(0xe785, VBPERM,  VRR_c, VE,  0, 0, 0, 0, vbperm, 0, 0, IF_VEC)
 /* VECTOR GATHER ELEMENT */
 E(0xe713, VGEF,VRV,   V,   la2, 0, 0, 0, vge, 0, ES_32, IF_VEC)
 E(0xe712, VGEG,VRV,   V,   la2, 0, 0, 0, vge, 0, ES_64, IF_VEC)
diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/translate_vx.c.inc
index 91e2967c49..96283d4ddb 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/translate_vx.c.inc
@@ -327,6 +327,14 @@ static void gen_addi2_i64(TCGv_i64 dl, TCGv_i64 dh, 
TCGv_i64 al, TCGv_i64 ah,
 tcg_temp_free_i64(bh);
 }
 
+static DisasJumpType op_vbperm(DisasContext *s, DisasOps *o)
+{
+gen_gvec_3_ool(get_field(s, v1), get_field(s, v2), get_field(s, v3), 0,
+   gen_helper_gvec_vbperm);
+
+return DISAS_NEXT;
+}
+
 static DisasJumpType op_vge(DisasContext *s, DisasOps *o)
 {
 const uint8_t es = s->insn->data;
diff --git a/target/s390x/vec_helper.c b/target/s390x/vec_helper.c
index 986e7cc825..599bab06bd 100644
--- a/target/s390x/vec_helper.c
+++ b/target/s390x/vec_helper.c
@@ -19,6 +19,28 @@
 #include "exec/cpu_ldst.h"
 #include "exec/exec-all.h"
 
+void HELPER(gvec_vbperm)(void *v1, const void *v2, const void *v3,
+ uint32_t desc)
+{
+S390Vector tmp = {};
+uint16_t result = 0;
+int i;
+
+for (i = 0; i < 16; i++) {
+const uint8_t bit_nr = s390_vec_read_element8(v3, i);
+uint16_t bit;
+
+if (bit_nr >= 128) {
+continue;
+}
+bit = (s390_vec_read_element8(v2, bit_nr / 8)
+   >> (7 - (bit_nr % 8))) & 1;
+result |= (bit << (15 - i));
+}
+s390_vec_write_element16(&tmp, 3, result);
+*(S390Vector *)v1 = tmp;
+}
+
 void HELPER(vll)(CPUS390XState *env, void *v1, uint64_t addr, uint64_t bytes)
 {
 if (likely(bytes >= 16)) {
-- 
2.31.1

[PATCH v4 10/26] s390x/tcg: Simplify wfc64() handling

2021-06-08 Thread David Hildenbrand

... and prepare for 32/128 bit support.

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/vec_fpu_helper.c | 23 ---
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index 7ca9c892f7..4af59ea66c 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -201,8 +201,8 @@ static int wfc64(const S390Vector *v1, const S390Vector *v2,
  CPUS390XState *env, bool signal, uintptr_t retaddr)
 {
 /* only the zero-indexed elements are compared */
-const float64 a = s390_vec_read_element64(v1, 0);
-const float64 b = s390_vec_read_element64(v2, 0);
+const float64 a = s390_vec_read_float64(v1, 0);
+const float64 b = s390_vec_read_float64(v2, 0);
 uint8_t vxc, vec_exc = 0;
 int cmp;
 
@@ -217,17 +217,18 @@ static int wfc64(const S390Vector *v1, const S390Vector 
*v2,
 return float_comp_to_cc(env, cmp);
 }
 
-void HELPER(gvec_wfc64)(const void *v1, const void *v2, CPUS390XState *env,
-uint32_t desc)
-{
-env->cc_op = wfc64(v1, v2, env, false, GETPC());
+#define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS) 
\
+void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2, 
\
+   CPUS390XState *env, uint32_t desc)  
\
+{  
\
+env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC());  
\
 }
 
-void HELPER(gvec_wfk64)(const void *v1, const void *v2, CPUS390XState *env,
-uint32_t desc)
-{
-env->cc_op = wfc64(v1, v2, env, true, GETPC());
-}
+#define DEF_GVEC_WFC(NAME, SIGNAL) 
\
+ DEF_GVEC_WFC_B(NAME, SIGNAL, 64)
+
+DEF_GVEC_WFC(wfc, false)
+DEF_GVEC_WFC(wfk, true)
 
 typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status);
 static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
-- 
2.31.1

[PATCH v4 14/26] s390x/tcg: Implement 32/128 bit for VECTOR (LOAD FP INTEGER|FP SQUARE ROOT)

2021-06-08 Thread David Hildenbrand

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/helper.h   |  4 ++
 target/s390x/translate_vx.c.inc | 74 ++---
 target/s390x/vec_fpu_helper.c   | 46 +++-
 3 files changed, 109 insertions(+), 15 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 2d5e382e61..28797a6ccc 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -265,7 +265,9 @@ DEF_HELPER_FLAGS_4(gvec_vclgd64, TCG_CALL_NO_WG, void, ptr, 
cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfd32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfd64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfd128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
+DEF_HELPER_FLAGS_4(gvec_vfi32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfi64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfi128, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfll32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vflr64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
@@ -273,7 +275,9 @@ DEF_HELPER_FLAGS_5(gvec_vfm64, TCG_CALL_NO_WG, void, ptr, 
cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_FLAGS_6(gvec_vfma64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfms64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfsq32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfsq64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfsq128, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfs32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfs64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfs128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/translate_vx.c.inc
index 0fbd914b40..6241279e68 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/translate_vx.c.inc
@@ -2654,35 +2654,63 @@ static DisasJumpType op_vcdg(DisasContext *s, DisasOps 
*o)
 const uint8_t fpf = get_field(s, m3);
 const uint8_t m4 = get_field(s, m4);
 const uint8_t erm = get_field(s, m5);
-gen_helper_gvec_2_ptr *fn;
+gen_helper_gvec_2_ptr *fn = NULL;
 
-if (fpf != FPF_LONG || extract32(m4, 0, 2) || erm > 7 || erm == 2) {
-gen_program_exception(s, PGM_SPECIFICATION);
-return DISAS_NORETURN;
-}
 
 switch (s->fields.op2) {
 case 0xc3:
-fn = gen_helper_gvec_vcdg64;
+if (fpf == FPF_LONG) {
+fn = gen_helper_gvec_vcdg64;
+}
 break;
 case 0xc1:
-fn = gen_helper_gvec_vcdlg64;
+if (fpf == FPF_LONG) {
+fn = gen_helper_gvec_vcdlg64;
+}
 break;
 case 0xc2:
-fn = gen_helper_gvec_vcgd64;
+if (fpf == FPF_LONG) {
+fn = gen_helper_gvec_vcgd64;
+}
 break;
 case 0xc0:
-fn = gen_helper_gvec_vclgd64;
+if (fpf == FPF_LONG) {
+fn = gen_helper_gvec_vclgd64;
+}
 break;
 case 0xc7:
-fn = gen_helper_gvec_vfi64;
+switch (fpf) {
+case FPF_SHORT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+fn = gen_helper_gvec_vfi32;
+}
+break;
+case FPF_LONG:
+fn = gen_helper_gvec_vfi64;
+break;
+case FPF_EXT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+fn = gen_helper_gvec_vfi128;
+}
+break;
+default:
+break;
+}
 break;
 case 0xc5:
-fn = gen_helper_gvec_vflr64;
+if (fpf == FPF_LONG) {
+fn = gen_helper_gvec_vflr64;
+}
 break;
 default:
 g_assert_not_reached();
 }
+
+if (!fn || extract32(m4, 0, 2) || erm > 7 || erm == 2) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
 gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env,
deposit32(m4, 4, 4, erm), fn);
 return DISAS_NEXT;
@@ -2780,14 +2808,32 @@ static DisasJumpType op_vfsq(DisasContext *s, DisasOps 
*o)
 {
 const uint8_t fpf = get_field(s, m3);
 const uint8_t m4 = get_field(s, m4);
+gen_helper_gvec_2_ptr *fn = NULL;
+
+switch (fpf) {
+case FPF_SHORT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+fn = gen_helper_gvec_vfsq32;
+}
+break;
+case FPF_LONG:
+fn = gen_helper_gvec_vfsq64;
+break;
+case FPF_EXT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+fn = gen_helper_gvec_vfsq12

[PATCH v4 16/26] s390x/tcg: Implement 32/128 bit for VECTOR FP COMPARE (AND SIGNAL) SCALAR

2021-06-08 Thread David Hildenbrand

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/helper.h   |  4 +++
 target/s390x/translate_vx.c.inc | 38 ++--
 target/s390x/vec_fpu_helper.c   | 44 -
 3 files changed, 77 insertions(+), 9 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 146836126c..dca436f710 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -250,8 +250,12 @@ DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, 
cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfa32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfa64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfa128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
+DEF_HELPER_4(gvec_wfc32, void, cptr, cptr, env, i32)
+DEF_HELPER_4(gvec_wfk32, void, cptr, cptr, env, i32)
 DEF_HELPER_4(gvec_wfc64, void, cptr, cptr, env, i32)
 DEF_HELPER_4(gvec_wfk64, void, cptr, cptr, env, i32)
+DEF_HELPER_4(gvec_wfc128, void, cptr, cptr, env, i32)
+DEF_HELPER_4(gvec_wfk128, void, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfce32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_5(gvec_vfce32_cc, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfce64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/translate_vx.c.inc
index 6f6ef6b6b8..822a9d0513 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/translate_vx.c.inc
@@ -2598,19 +2598,41 @@ static DisasJumpType op_wfc(DisasContext *s, DisasOps 
*o)
 {
 const uint8_t fpf = get_field(s, m3);
 const uint8_t m4 = get_field(s, m4);
+gen_helper_gvec_2_ptr *fn = NULL;
 
-if (fpf != FPF_LONG || m4) {
+switch (fpf) {
+case FPF_SHORT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+fn = gen_helper_gvec_wfk32;
+if (s->fields.op2 == 0xcb) {
+fn = gen_helper_gvec_wfc32;
+}
+}
+break;
+case FPF_LONG:
+fn = gen_helper_gvec_wfk64;
+if (s->fields.op2 == 0xcb) {
+fn = gen_helper_gvec_wfc64;
+}
+break;
+case FPF_EXT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+fn = gen_helper_gvec_wfk128;
+if (s->fields.op2 == 0xcb) {
+fn = gen_helper_gvec_wfc128;
+}
+}
+break;
+default:
+break;
+};
+
+if (!fn || m4) {
 gen_program_exception(s, PGM_SPECIFICATION);
 return DISAS_NORETURN;
 }
 
-if (s->fields.op2 == 0xcb) {
-gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2),
-   cpu_env, 0, gen_helper_gvec_wfc64);
-} else {
-gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2),
-   cpu_env, 0, gen_helper_gvec_wfk64);
-}
+gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, 0, fn);
 set_cc_static(s);
 return DISAS_NEXT;
 }
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index 67dcd8b50a..fba5261ac4 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -307,6 +307,26 @@ DEF_GVEC_VOP3(vfs, sub)
 DEF_GVEC_VOP3(vfd, div)
 DEF_GVEC_VOP3(vfm, mul)
 
+static int wfc32(const S390Vector *v1, const S390Vector *v2,
+ CPUS390XState *env, bool signal, uintptr_t retaddr)
+{
+/* only the zero-indexed elements are compared */
+const float32 a = s390_vec_read_float32(v1, 0);
+const float32 b = s390_vec_read_float32(v2, 0);
+uint8_t vxc, vec_exc = 0;
+int cmp;
+
+if (signal) {
+cmp = float32_compare(a, b, &env->fpu_status);
+} else {
+cmp = float32_compare_quiet(a, b, &env->fpu_status);
+}
+vxc = check_ieee_exc(env, 0, false, &vec_exc);
+handle_ieee_exc(env, vxc, vec_exc, retaddr);
+
+return float_comp_to_cc(env, cmp);
+}
+
 static int wfc64(const S390Vector *v1, const S390Vector *v2,
  CPUS390XState *env, bool signal, uintptr_t retaddr)
 {
@@ -327,6 +347,26 @@ static int wfc64(const S390Vector *v1, const S390Vector 
*v2,
 return float_comp_to_cc(env, cmp);
 }
 
+static int wfc128(const S390Vector *v1, const S390Vector *v2,
+  CPUS390XState *env, bool signal, uintptr_t retaddr)
+{
+/* only the zero-indexed elements are compared */
+const float128 a = s390_vec_read_float128(v1);
+const float128 b = s390_vec_read_float128(v2);
+uint8_t vxc, vec_exc = 0;
+int cmp;
+
+if (signal) {
+cmp = float128_compare(a, b, &env->fpu_status);
+} else {
+cmp = float128_compare_quiet(a, b, &env->fpu_status);
+}
+vxc = check_ieee_exc(env, 0, false, &vec_exc);
+handle_ieee_exc(env, vxc, vec_exc, retaddr);
+
+return float_comp_to_cc(env, cmp);
+}
+
 #define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS) 
\
 void HELPER(gvec_##NAME##BITS)(const void

[PATCH v4 13/26] s390x/tcg: Implement 32/128 bit for VECTOR FP (ADD|DIVIDE|MULTIPLY|SUBTRACT)

2021-06-08 Thread David Hildenbrand

In case of 128bit, we always have a single element. Add new helpers for
reading/writing 32/128 bit floats.

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/helper.h   |  8 
 target/s390x/translate_vx.c.inc | 85 +
 target/s390x/vec_fpu_helper.c   | 74 ++--
 3 files changed, 153 insertions(+), 14 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index e99c9643eb..2d5e382e61 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -247,7 +247,9 @@ DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, 
cptr, env, i32)
 DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32)
 
 /* === Vector Floating-Point Instructions */
+DEF_HELPER_FLAGS_5(gvec_vfa32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfa64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfa128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_4(gvec_wfc64, void, cptr, cptr, env, i32)
 DEF_HELPER_4(gvec_wfk64, void, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfce64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
@@ -260,15 +262,21 @@ DEF_HELPER_FLAGS_4(gvec_vcdg64, TCG_CALL_NO_WG, void, 
ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcdlg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vclgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfd32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfd64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfd128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_FLAGS_4(gvec_vfi64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfll32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vflr64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfm32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfm128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_FLAGS_6(gvec_vfma64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfms64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfsq64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfs32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfs64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfs128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_4(gvec_vftci64, void, ptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/translate_vx.c.inc
index 6e75b40eb8..0fbd914b40 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/translate_vx.c.inc
@@ -2501,29 +2501,94 @@ static DisasJumpType op_vfa(DisasContext *s, DisasOps 
*o)
 {
 const uint8_t fpf = get_field(s, m4);
 const uint8_t m5 = get_field(s, m5);
-gen_helper_gvec_3_ptr *fn;
-
-if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
-gen_program_exception(s, PGM_SPECIFICATION);
-return DISAS_NORETURN;
-}
+gen_helper_gvec_3_ptr *fn = NULL;
 
 switch (s->fields.op2) {
 case 0xe3:
-fn = gen_helper_gvec_vfa64;
+switch (fpf) {
+case FPF_SHORT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+fn = gen_helper_gvec_vfa32;
+}
+break;
+case FPF_LONG:
+fn = gen_helper_gvec_vfa64;
+break;
+case FPF_EXT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+fn = gen_helper_gvec_vfa128;
+}
+break;
+default:
+break;
+}
 break;
 case 0xe5:
-fn = gen_helper_gvec_vfd64;
+switch (fpf) {
+case FPF_SHORT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+fn = gen_helper_gvec_vfd32;
+}
+break;
+case FPF_LONG:
+fn = gen_helper_gvec_vfd64;
+break;
+case FPF_EXT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+fn = gen_helper_gvec_vfd128;
+}
+break;
+default:
+break;
+}
 break;
 case 0xe7:
-fn = gen_helper_gvec_vfm64;
+switch (fpf) {
+case FPF_SHORT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+fn = gen_helper_gvec_vfm32;
+}
+break;
+case FPF_LONG:
+fn = gen_helper_gvec_vfm64;
+break;
+case FPF_EXT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+fn = gen_helper_gvec_vfm128;
+

[PATCH v4 12/26] s390x/tcg: Implement VECTOR MULTIPLY SUM LOGICAL

2021-06-08 Thread David Hildenbrand

Fortunately, we only need the Doubleword implementation.

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/insn-data.def  |  2 ++
 target/s390x/translate_vx.c.inc | 50 +
 2 files changed, 52 insertions(+)

diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 1634a6bc5a..1a3ae7e7e7 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1151,6 +1151,8 @@
 F(0xe7a7, VMO, VRR_c, V,   0, 0, 0, 0, vm, 0, IF_VEC)
 /* VECTOR MULTIPLY LOGICAL ODD */
 F(0xe7a5, VMLO,VRR_c, V,   0, 0, 0, 0, vm, 0, IF_VEC)
+/* VECTOR MULTIPLY SUM LOGICAL */
+F(0xe7b8, VMSL,VRR_d, VE,  0, 0, 0, 0, vmsl, 0, IF_VEC)
 /* VECTOR NAND */
 F(0xe76e, VNN, VRR_c, VE,  0, 0, 0, 0, vnn, 0, IF_VEC)
 /* VECTOR NOR */
diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/translate_vx.c.inc
index 96283d4ddb..6e75b40eb8 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/translate_vx.c.inc
@@ -1779,6 +1779,56 @@ static DisasJumpType op_vm(DisasContext *s, DisasOps *o)
 return DISAS_NEXT;
 }
 
+static DisasJumpType op_vmsl(DisasContext *s, DisasOps *o)
+{
+TCGv_i64 l1, h1, l2, h2;
+
+if (get_field(s, m4) != ES_64) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+l1 = tcg_temp_new_i64();
+h1 = tcg_temp_new_i64();
+l2 = tcg_temp_new_i64();
+h2 = tcg_temp_new_i64();
+
+/* Multipy both even elements from v2 and v3 */
+read_vec_element_i64(l1, get_field(s, v2), 0, ES_64);
+read_vec_element_i64(h1, get_field(s, v3), 0, ES_64);
+tcg_gen_mulu2_i64(l1, h1, l1, h1);
+/* Shift result left by one (x2) if requested */
+if (extract32(get_field(s, m6), 3, 1)) {
+tcg_gen_add2_i64(l1, h1, l1, h1, l1, h1);
+}
+
+/* Multipy both odd elements from v2 and v3 */
+read_vec_element_i64(l2, get_field(s, v2), 1, ES_64);
+read_vec_element_i64(h2, get_field(s, v3), 1, ES_64);
+tcg_gen_mulu2_i64(l2, h2, l2, h2);
+/* Shift result left by one (x2) if requested */
+if (extract32(get_field(s, m6), 2, 1)) {
+tcg_gen_add2_i64(l2, h2, l2, h2, l2, h2);
+}
+
+/* Add both intermediate results */
+tcg_gen_add2_i64(l1, h1, l1, h1, l2, h2);
+/* Add whole v4 */
+read_vec_element_i64(h2, get_field(s, v4), 0, ES_64);
+read_vec_element_i64(l2, get_field(s, v4), 1, ES_64);
+tcg_gen_add2_i64(l1, h1, l1, h1, l2, h2);
+
+/* Store final result into v1. */
+write_vec_element_i64(h1, get_field(s, v1), 0, ES_64);
+write_vec_element_i64(l1, get_field(s, v1), 1, ES_64);
+
+tcg_temp_free_i64(l1);
+tcg_temp_free_i64(h1);
+tcg_temp_free_i64(l2);
+tcg_temp_free_i64(h2);
+return DISAS_NEXT;
+}
+
 static DisasJumpType op_vnn(DisasContext *s, DisasOps *o)
 {
 gen_gvec_fn_3(nand, ES_8, get_field(s, v1),
-- 
2.31.1

[PATCH v4 19/26] s390x/tcg: Implement 32/128 bit for VECTOR FP PERFORM SIGN OPERATION

2021-06-08 Thread David Hildenbrand

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/translate_vx.c.inc | 106 ++--
 1 file changed, 73 insertions(+), 33 deletions(-)

diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/translate_vx.c.inc
index e94c9f9d86..4d1ccb4159 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/translate_vx.c.inc
@@ -2842,48 +2842,88 @@ static DisasJumpType op_vfpso(DisasContext *s, DisasOps 
*o)
 const uint8_t fpf = get_field(s, m3);
 const uint8_t m4 = get_field(s, m4);
 const uint8_t m5 = get_field(s, m5);
+const bool se = extract32(m4, 3, 1);
 TCGv_i64 tmp;
 
-if (fpf != FPF_LONG || extract32(m4, 0, 3) || m5 > 2) {
+if ((fpf != FPF_LONG && !s390_has_feat(S390_FEAT_VECTOR_ENH)) ||
+extract32(m4, 0, 3) || m5 > 2) {
 gen_program_exception(s, PGM_SPECIFICATION);
 return DISAS_NORETURN;
 }
 
-if (extract32(m4, 3, 1)) {
-tmp = tcg_temp_new_i64();
-read_vec_element_i64(tmp, v2, 0, ES_64);
-switch (m5) {
-case 0:
-/* sign bit is inverted (complement) */
-tcg_gen_xori_i64(tmp, tmp, 1ull << 63);
-break;
-case 1:
-/* sign bit is set to one (negative) */
-tcg_gen_ori_i64(tmp, tmp, 1ull << 63);
-break;
-case 2:
-/* sign bit is set to zero (positive) */
-tcg_gen_andi_i64(tmp, tmp, (1ull << 63) - 1);
-break;
+switch (fpf) {
+case FPF_SHORT:
+if (!se) {
+switch (m5) {
+case 0:
+/* sign bit is inverted (complement) */
+gen_gvec_fn_2i(xori, ES_32, v1, v2, 1ull << 31);
+break;
+case 1:
+/* sign bit is set to one (negative) */
+gen_gvec_fn_2i(ori, ES_32, v1, v2, 1ull << 31);
+break;
+case 2:
+/* sign bit is set to zero (positive) */
+gen_gvec_fn_2i(andi, ES_32, v1, v2, (1ull << 31) - 1);
+break;
+}
+return DISAS_NEXT;
 }
-write_vec_element_i64(tmp, v1, 0, ES_64);
-tcg_temp_free_i64(tmp);
-} else {
-switch (m5) {
-case 0:
-/* sign bit is inverted (complement) */
-gen_gvec_fn_2i(xori, ES_64, v1, v2, 1ull << 63);
-break;
-case 1:
-/* sign bit is set to one (negative) */
-gen_gvec_fn_2i(ori, ES_64, v1, v2, 1ull << 63);
-break;
-case 2:
-/* sign bit is set to zero (positive) */
-gen_gvec_fn_2i(andi, ES_64, v1, v2, (1ull << 63) - 1);
-break;
+break;
+case FPF_LONG:
+if (!se) {
+switch (m5) {
+case 0:
+/* sign bit is inverted (complement) */
+gen_gvec_fn_2i(xori, ES_64, v1, v2, 1ull << 63);
+break;
+case 1:
+/* sign bit is set to one (negative) */
+gen_gvec_fn_2i(ori, ES_64, v1, v2, 1ull << 63);
+break;
+case 2:
+/* sign bit is set to zero (positive) */
+gen_gvec_fn_2i(andi, ES_64, v1, v2, (1ull << 63) - 1);
+break;
+}
+return DISAS_NEXT;
 }
+break;
+case FPF_EXT:
+/* Only a single element. */
+break;
+default:
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
 }
+
+/* With a single element, we are only interested in bit 0. */
+tmp = tcg_temp_new_i64();
+read_vec_element_i64(tmp, v2, 0, ES_64);
+switch (m5) {
+case 0:
+/* sign bit is inverted (complement) */
+tcg_gen_xori_i64(tmp, tmp, 1ull << 63);
+break;
+case 1:
+/* sign bit is set to one (negative) */
+tcg_gen_ori_i64(tmp, tmp, 1ull << 63);
+break;
+case 2:
+/* sign bit is set to zero (positive) */
+tcg_gen_andi_i64(tmp, tmp, (1ull << 63) - 1);
+break;
+}
+write_vec_element_i64(tmp, v1, 0, ES_64);
+
+if (fpf == FPF_EXT) {
+read_vec_element_i64(tmp, v2, 1, ES_64);
+write_vec_element_i64(tmp, v1, 1, ES_64);
+}
+
+tcg_temp_free_i64(tmp);
+
 return DISAS_NEXT;
 }
 
-- 
2.31.1

[PATCH v4 15/26] s390x/tcg: Implement 32/128 bit for VECTOR FP COMPARE *

2021-06-08 Thread David Hildenbrand

In addition to 32/128bit variants, we also have to support the
"Signal-on-QNaN (SQ)" bit.

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/helper.h   | 12 +++
 target/s390x/translate_vx.c.inc | 57 -
 target/s390x/vec_fpu_helper.c   | 64 +++--
 3 files changed, 121 insertions(+), 12 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 28797a6ccc..146836126c 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -252,12 +252,24 @@ DEF_HELPER_FLAGS_5(gvec_vfa64, TCG_CALL_NO_WG, void, ptr, 
cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfa128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_4(gvec_wfc64, void, cptr, cptr, env, i32)
 DEF_HELPER_4(gvec_wfk64, void, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfce32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
+DEF_HELPER_5(gvec_vfce32_cc, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfce64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_5(gvec_vfce64_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfce128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
+DEF_HELPER_5(gvec_vfce128_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfch32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
+DEF_HELPER_5(gvec_vfch32_cc, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfch64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_5(gvec_vfch64_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfch128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
+DEF_HELPER_5(gvec_vfch128_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfche32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
+DEF_HELPER_5(gvec_vfche32_cc, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfche64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_5(gvec_vfche64_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfche128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
+DEF_HELPER_5(gvec_vfche128_cc, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcdg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcdlg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/translate_vx.c.inc
index 6241279e68..6f6ef6b6b8 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/translate_vx.c.inc
@@ -2621,26 +2621,65 @@ static DisasJumpType op_vfc(DisasContext *s, DisasOps 
*o)
 const uint8_t m5 = get_field(s, m5);
 const uint8_t m6 = get_field(s, m6);
 const bool cs = extract32(m6, 0, 1);
-gen_helper_gvec_3_ptr *fn;
-
-if (fpf != FPF_LONG || extract32(m5, 0, 3) || extract32(m6, 1, 3)) {
-gen_program_exception(s, PGM_SPECIFICATION);
-return DISAS_NORETURN;
-}
+const bool sq = extract32(m5, 2, 1);
+gen_helper_gvec_3_ptr *fn = NULL;
 
 switch (s->fields.op2) {
 case 0xe8:
-fn = cs ? gen_helper_gvec_vfce64_cc : gen_helper_gvec_vfce64;
+switch (fpf) {
+case FPF_SHORT:
+fn = cs ? gen_helper_gvec_vfce32_cc : gen_helper_gvec_vfce32;
+break;
+case FPF_LONG:
+fn = cs ? gen_helper_gvec_vfce64_cc : gen_helper_gvec_vfce64;
+break;
+case FPF_EXT:
+fn = cs ? gen_helper_gvec_vfce128_cc : gen_helper_gvec_vfce128;
+break;
+default:
+break;
+}
 break;
 case 0xeb:
-fn = cs ? gen_helper_gvec_vfch64_cc : gen_helper_gvec_vfch64;
+switch (fpf) {
+case FPF_SHORT:
+fn = cs ? gen_helper_gvec_vfch32_cc : gen_helper_gvec_vfch32;
+break;
+case FPF_LONG:
+fn = cs ? gen_helper_gvec_vfch64_cc : gen_helper_gvec_vfch64;
+break;
+case FPF_EXT:
+fn = cs ? gen_helper_gvec_vfch128_cc : gen_helper_gvec_vfch128;
+break;
+default:
+break;
+}
 break;
 case 0xea:
-fn = cs ? gen_helper_gvec_vfche64_cc : gen_helper_gvec_vfche64;
+switch (fpf) {
+case FPF_SHORT:
+fn = cs ? gen_helper_gvec_vfche32_cc : gen_helper_gvec_vfche32;
+break;
+case FPF_LONG:
+fn = cs ? gen_helper_gvec_vfche64_cc : gen_helper_gvec_vfche64;
+break;
+case FPF_EXT:
+fn = cs ? gen_helper_gvec_vfche128_cc : gen_helper_gvec_vfche128;
+break;
+default:
+break;
+}
 break;
 default:
 g_assert_not_reached();
 }
+
+if (!fn || extract32(m5, 0, 2) || extract32(m6, 1, 3) ||
+(!s390_has_feat(S390_FEAT_VECTOR_ENH) && (fpf != FPF_LONG || sq))) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS

[PATCH v4 23/26] s390x/tcg: Implement VECTOR FP (MAXIMUM|MINIMUM)

2021-06-08 Thread David Hildenbrand

For IEEE functions, we can reuse the softfloat implementations. For the
other functions, implement it generically for 32bit/64bit/128bit -
carefully taking care of all weird special cases according to the tables
defined in the PoP.

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/helper.h   |   6 +
 target/s390x/insn-data.def  |   4 +
 target/s390x/internal.h |   9 +
 target/s390x/translate_vx.c.inc |  44 +
 target/s390x/vec_fpu_helper.c   | 328 
 5 files changed, 391 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 913967ce4e..ba045f559d 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -291,6 +291,12 @@ DEF_HELPER_FLAGS_4(gvec_vflr128, TCG_CALL_NO_WG, void, 
ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
+DEF_HELPER_FLAGS_5(gvec_vfmax32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
+DEF_HELPER_FLAGS_5(gvec_vfmax64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
+DEF_HELPER_FLAGS_5(gvec_vfmax128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
+DEF_HELPER_FLAGS_5(gvec_vfmin32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
+DEF_HELPER_FLAGS_5(gvec_vfmin64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
+DEF_HELPER_FLAGS_5(gvec_vfmin128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
 DEF_HELPER_FLAGS_6(gvec_vfma32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfma64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfma128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 19b02dffca..3e5594210c 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1253,6 +1253,10 @@
 F(0xe7c4, VFLL,VRR_a, V,   0, 0, 0, 0, vfll, 0, IF_VEC)
 /* VECTOR FP LOAD ROUNDED */
 F(0xe7c5, VFLR,VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP MAXIMUM */
+F(0xe7ef, VFMAX,   VRR_c, VE,  0, 0, 0, 0, vfmax, 0, IF_VEC)
+/* VECTOR FP MINIMUM */
+F(0xe7ee, VFMIN,   VRR_c, VE,  0, 0, 0, 0, vfmax, 0, IF_VEC)
 /* VECTOR FP MULTIPLY */
 F(0xe7e7, VFM, VRR_c, V,   0, 0, 0, 0, vfa, 0, IF_VEC)
 /* VECTOR FP MULTIPLY AND ADD */
diff --git a/target/s390x/internal.h b/target/s390x/internal.h
index 11515bb617..d62dfc4dc6 100644
--- a/target/s390x/internal.h
+++ b/target/s390x/internal.h
@@ -288,6 +288,15 @@ uint8_t s390_softfloat_exc_to_ieee(unsigned int exc);
 int s390_swap_bfp_rounding_mode(CPUS390XState *env, int m3);
 void s390_restore_bfp_rounding_mode(CPUS390XState *env, int old_mode);
 int float_comp_to_cc(CPUS390XState *env, int float_compare);
+
+#define DCMASK_ZERO 0x0c00
+#define DCMASK_NORMAL   0x0300
+#define DCMASK_SUBNORMAL0x00c0
+#define DCMASK_INFINITY 0x0030
+#define DCMASK_QUIET_NAN0x000c
+#define DCMASK_SIGNALING_NAN0x0003
+#define DCMASK_NAN  0x000f
+#define DCMASK_NEGATIVE 0x0555
 uint16_t float32_dcmask(CPUS390XState *env, float32 f1);
 uint16_t float64_dcmask(CPUS390XState *env, float64 f1);
 uint16_t float128_dcmask(CPUS390XState *env, float128 f1);
diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/translate_vx.c.inc
index 200d83e783..a9d51b1f4c 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/translate_vx.c.inc
@@ -2814,6 +2814,50 @@ static DisasJumpType op_vfll(DisasContext *s, DisasOps 
*o)
 return DISAS_NEXT;
 }
 
+static DisasJumpType op_vfmax(DisasContext *s, DisasOps *o)
+{
+const uint8_t fpf = get_field(s, m4);
+const uint8_t m6 = get_field(s, m6);
+const uint8_t m5 = get_field(s, m5);
+gen_helper_gvec_3_ptr *fn;
+
+if (m6 == 5 || m6 == 6 || m6 == 7 || m6 > 13) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+switch (fpf) {
+case FPF_SHORT:
+if (s->fields.op2 == 0xef) {
+fn = gen_helper_gvec_vfmax32;
+} else {
+fn = gen_helper_gvec_vfmin32;
+}
+break;
+case FPF_LONG:
+if (s->fields.op2 == 0xef) {
+fn = gen_helper_gvec_vfmax64;
+} else {
+fn = gen_helper_gvec_vfmin64;
+}
+break;
+case FPF_EXT:
+if (s->fields.op2 == 0xef) {
+fn = gen_helper_gvec_vfmax128;
+} else {
+fn = gen_helper_gvec_vfmin128;
+}
+break;
+default:
+gen_program_exception(s, PGM_SPECIFICATION);
+return DISAS_NORETURN;
+}
+
+gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2), get_field(s, v3),
+   cpu_env, deposit32(m5, 4, 4, m6), fn);
+return DISAS_NEXT;
+}
+
 static DisasJumpType op_vf

[PATCH v4 21/26] s390x/tcg: Implement 32/128 bit for VECTOR FP MULTIPLY AND (ADD|SUBTRACT)

2021-06-08 Thread David Hildenbrand

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/helper.h   |  4 +++
 target/s390x/translate_vx.c.inc | 47 -
 target/s390x/vec_fpu_helper.c   | 44 +-
 3 files changed, 87 insertions(+), 8 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index bae73b9a56..2366756063 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -291,8 +291,12 @@ DEF_HELPER_FLAGS_4(gvec_vflr128, TCG_CALL_NO_WG, void, 
ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
+DEF_HELPER_FLAGS_6(gvec_vfma32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfma64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfma128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfms32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfms64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfms128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfsq32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfsq64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfsq128, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/translate_vx.c.inc
index 765f75df9c..17d41b178f 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/translate_vx.c.inc
@@ -2818,18 +2818,51 @@ static DisasJumpType op_vfma(DisasContext *s, DisasOps 
*o)
 {
 const uint8_t m5 = get_field(s, m5);
 const uint8_t fpf = get_field(s, m6);
-gen_helper_gvec_4_ptr *fn;
+gen_helper_gvec_4_ptr *fn = NULL;
 
-if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
+if (s->fields.op2 == 0x8f) {
+switch (fpf) {
+case FPF_SHORT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+fn = gen_helper_gvec_vfma32;
+}
+break;
+case FPF_LONG:
+fn = gen_helper_gvec_vfma64;
+break;
+case FPF_EXT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+fn = gen_helper_gvec_vfma128;
+}
+break;
+default:
+break;
+}
+} else {
+switch (fpf) {
+case FPF_SHORT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+fn = gen_helper_gvec_vfms32;
+}
+break;
+case FPF_LONG:
+fn = gen_helper_gvec_vfms64;
+break;
+case FPF_EXT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+fn = gen_helper_gvec_vfms128;
+}
+break;
+default:
+break;
+}
+}
+
+if (!fn || extract32(m5, 0, 3)) {
 gen_program_exception(s, PGM_SPECIFICATION);
 return DISAS_NORETURN;
 }
 
-if (s->fields.op2 == 0x8f) {
-fn = gen_helper_gvec_vfma64;
-} else {
-fn = gen_helper_gvec_vfms64;
-}
 gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
get_field(s, v3), get_field(s, v4), cpu_env, m5, fn);
 return DISAS_NEXT;
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index 6984f770ff..29ccc608dc 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -582,6 +582,30 @@ void HELPER(gvec_vflr128)(void *v1, const void *v2, 
CPUS390XState *env,
 s390_vec_write_float64(v1, 0, ret);
 }
 
+static void vfma32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+   const S390Vector *v4, CPUS390XState *env, bool s, int flags,
+   uintptr_t retaddr)
+{
+uint8_t vxc, vec_exc = 0;
+S390Vector tmp = {};
+int i;
+
+for (i = 0; i < 4; i++) {
+const float32 a = s390_vec_read_float32(v2, i);
+const float32 b = s390_vec_read_float32(v3, i);
+const float32 c = s390_vec_read_float32(v4, i);
+float32 ret = float32_muladd(a, b, c, flags, &env->fpu_status);
+
+s390_vec_write_float32(&tmp, i, ret);
+vxc = check_ieee_exc(env, i, false, &vec_exc);
+if (s || vxc) {
+break;
+}
+}
+handle_ieee_exc(env, vxc, vec_exc, retaddr);
+*v1 = tmp;
+}
+
 static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
const S390Vector *v4, CPUS390XState *env, bool s, int flags,
uintptr_t retaddr)
@@ -606,6 +630,22 @@ static void vfma64(S390Vector *v1, const S390Vector *v2, 
const S390Vector *v3,
 *v1 = tmp;
 }
 
+static void vfma128(S390Vector *v1, const S390Vector *v2, const S390Vector

[PATCH v4 17/26] s390x/tcg: Implement 64 bit for VECTOR FP LOAD LENGTHENED

2021-06-08 Thread David Hildenbrand

64 bit -> 128 bit, there is only a single final element.

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/helper.h   |  1 +
 target/s390x/translate_vx.c.inc | 19 ---
 target/s390x/vec_fpu_helper.c   | 13 +
 3 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index dca436f710..b5ba159402 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -285,6 +285,7 @@ DEF_HELPER_FLAGS_4(gvec_vfi32, TCG_CALL_NO_WG, void, ptr, 
cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfi64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfi128, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfll32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfll64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vflr64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/translate_vx.c.inc
index 822a9d0513..472afca45e 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/translate_vx.c.inc
@@ -2781,14 +2781,27 @@ static DisasJumpType op_vfll(DisasContext *s, DisasOps 
*o)
 {
 const uint8_t fpf = get_field(s, m3);
 const uint8_t m4 = get_field(s, m4);
+gen_helper_gvec_2_ptr *fn = NULL;
 
-if (fpf != FPF_SHORT || extract32(m4, 0, 3)) {
+switch (fpf) {
+case FPF_SHORT:
+fn = gen_helper_gvec_vfll32;
+break;
+case FPF_LONG:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+fn = gen_helper_gvec_vfll64;
+}
+break;
+default:
+break;
+}
+
+if (!fn || extract32(m4, 0, 3)) {
 gen_program_exception(s, PGM_SPECIFICATION);
 return DISAS_NORETURN;
 }
 
-gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env,
-   m4, gen_helper_gvec_vfll32);
+gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, m4, fn);
 return DISAS_NEXT;
 }
 
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index fba5261ac4..75e3212582 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -522,6 +522,19 @@ void HELPER(gvec_vfll32)(void *v1, const void *v2, 
CPUS390XState *env,
 *(S390Vector *)v1 = tmp;
 }
 
+void HELPER(gvec_vfll64)(void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
+{
+/* load from even element */
+const float128 ret = float64_to_float128(s390_vec_read_float64(v2, 0),
+ &env->fpu_status);
+uint8_t vxc, vec_exc = 0;
+
+vxc = check_ieee_exc(env, 0, false, &vec_exc);
+handle_ieee_exc(env, vxc, vec_exc, GETPC());
+s390_vec_write_float128(v1, ret);
+}
+
 void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env,
  uint32_t desc)
 {
-- 
2.31.1

[PATCH v4 18/26] s390x/tcg: Implement 128 bit for VECTOR FP LOAD ROUNDED

2021-06-08 Thread David Hildenbrand

128 bit -> 64 bit, there is only a single element to process.

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/helper.h   |  1 +
 target/s390x/translate_vx.c.inc | 11 ++-
 target/s390x/vec_fpu_helper.c   | 19 +++
 3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index b5ba159402..02e6967ae6 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -287,6 +287,7 @@ DEF_HELPER_FLAGS_4(gvec_vfi128, TCG_CALL_NO_WG, void, ptr, 
cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfll32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfll64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vflr64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vflr128, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/translate_vx.c.inc
index 472afca45e..e94c9f9d86 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/translate_vx.c.inc
@@ -2759,8 +2759,17 @@ static DisasJumpType op_vcdg(DisasContext *s, DisasOps 
*o)
 }
 break;
 case 0xc5:
-if (fpf == FPF_LONG) {
+switch (fpf) {
+case FPF_LONG:
 fn = gen_helper_gvec_vflr64;
+break;
+case FPF_EXT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+fn = gen_helper_gvec_vflr128;
+}
+break;
+default:
+break;
 }
 break;
 default:
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index 75e3212582..0fb82bd18f 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -563,6 +563,25 @@ void HELPER(gvec_vflr64)(void *v1, const void *v2, 
CPUS390XState *env,
 *(S390Vector *)v1 = tmp;
 }
 
+void HELPER(gvec_vflr128)(void *v1, const void *v2, CPUS390XState *env,
+  uint32_t desc)
+{
+const uint8_t erm = extract32(simd_data(desc), 4, 4);
+const bool XxC = extract32(simd_data(desc), 2, 1);
+uint8_t vxc, vec_exc = 0;
+int old_mode;
+float64 ret;
+
+old_mode = s390_swap_bfp_rounding_mode(env, erm);
+ret = float128_to_float64(s390_vec_read_float128(v2), &env->fpu_status);
+vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
+s390_restore_bfp_rounding_mode(env, old_mode);
+handle_ieee_exc(env, vxc, vec_exc, GETPC());
+
+/* place at even element, odd element is unpredictable */
+s390_vec_write_float64(v1, 0, ret);
+}
+
 static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
const S390Vector *v4, CPUS390XState *env, bool s, int flags,
uintptr_t retaddr)
-- 
2.31.1

[PATCH v4 24/26] linux-user: elf: s390x: Prepare for Vector enhancements facility

2021-06-08 Thread David Hildenbrand

Let's check for S390_FEAT_VECTOR_ENH and set HWCAP_S390_VXRS_EXT
accordingly. Add all missing HWCAP defined in upstream Linux.

Cc: Laurent Vivier 
Acked-by: Laurent Vivier 
Signed-off-by: David Hildenbrand 
---
 include/elf.h| 7 +++
 linux-user/elfload.c | 1 +
 2 files changed, 8 insertions(+)

diff --git a/include/elf.h b/include/elf.h
index 033bcc9576..811bf4a1cb 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -605,6 +605,13 @@ typedef struct {
 #define HWCAP_S390_HIGH_GPRS512
 #define HWCAP_S390_TE   1024
 #define HWCAP_S390_VXRS 2048
+#define HWCAP_S390_VXRS_BCD 4096
+#define HWCAP_S390_VXRS_EXT 8192
+#define HWCAP_S390_GS   16384
+#define HWCAP_S390_VXRS_EXT232768
+#define HWCAP_S390_VXRS_PDE 65536
+#define HWCAP_S390_SORT 131072
+#define HWCAP_S390_DFLT 262144
 
 /* M68K specific definitions. */
 /* We use the top 24 bits to encode information about the
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 17ab06f612..4b0172339e 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -1376,6 +1376,7 @@ static uint32_t get_elf_hwcap(void)
 hwcap |= HWCAP_S390_ETF3EH;
 }
 GET_FEATURE(S390_FEAT_VECTOR, HWCAP_S390_VXRS);
+GET_FEATURE(S390_FEAT_VECTOR_ENH, HWCAP_S390_VXRS_EXT);
 
 return hwcap;
 }
-- 
2.31.1

[PULL 06/12] machine: move dies from X86MachineState to CpuTopology

2021-06-08 Thread Paolo Bonzini

In order to make SMP configuration a Machine property, we need a getter as
well as a setter.  To simplify the implementation put everything that the
getter needs in the CpuTopology struct.

Signed-off-by: Paolo Bonzini 
---
 hw/core/machine.c |  1 +
 hw/i386/pc.c  |  4 +---
 hw/i386/x86.c | 15 +++
 include/hw/boards.h   |  1 +
 include/hw/i386/pc.h  |  1 -
 include/hw/i386/x86.h |  1 -
 6 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 55b9bc7817..d776c8cf20 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -970,6 +970,7 @@ static void machine_initfn(Object *obj)
 ms->smp.cpus = mc->default_cpus;
 ms->smp.max_cpus = mc->default_cpus;
 ms->smp.cores = 1;
+ms->smp.dies = 1;
 ms->smp.threads = 1;
 ms->smp.sockets = 1;
 }
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index c6d8d0d84d..92958e9ad7 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -712,8 +712,6 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
  */
 void pc_smp_parse(MachineState *ms, QemuOpts *opts)
 {
-X86MachineState *x86ms = X86_MACHINE(ms);
-
 if (opts) {
 unsigned cpus= qemu_opt_get_number(opts, "cpus", 0);
 unsigned sockets = qemu_opt_get_number(opts, "sockets", 0);
@@ -769,7 +767,7 @@ void pc_smp_parse(MachineState *ms, QemuOpts *opts)
 ms->smp.cores = cores;
 ms->smp.threads = threads;
 ms->smp.sockets = sockets;
-x86ms->smp_dies = dies;
+ms->smp.dies = dies;
 }
 
 if (ms->smp.cpus > 1) {
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index ed796fe6ba..2a99942016 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -64,7 +64,7 @@ inline void init_topo_info(X86CPUTopoInfo *topo_info,
 {
 MachineState *ms = MACHINE(x86ms);
 
-topo_info->dies_per_pkg = x86ms->smp_dies;
+topo_info->dies_per_pkg = ms->smp.dies;
 topo_info->cores_per_die = ms->smp.cores;
 topo_info->threads_per_core = ms->smp.threads;
 }
@@ -293,7 +293,7 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
 
 init_topo_info(&topo_info, x86ms);
 
-env->nr_dies = x86ms->smp_dies;
+env->nr_dies = ms->smp.dies;
 
 /*
  * If APIC ID is not set,
@@ -301,13 +301,13 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
  */
 if (cpu->apic_id == UNASSIGNED_APIC_ID) {
 int max_socket = (ms->smp.max_cpus - 1) /
-smp_threads / smp_cores / x86ms->smp_dies;
+smp_threads / smp_cores / ms->smp.dies;
 
 /*
  * die-id was optional in QEMU 4.0 and older, so keep it optional
  * if there's only one die per socket.
  */
-if (cpu->die_id < 0 && x86ms->smp_dies == 1) {
+if (cpu->die_id < 0 && ms->smp.dies == 1) {
 cpu->die_id = 0;
 }
 
@@ -322,9 +322,9 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
 if (cpu->die_id < 0) {
 error_setg(errp, "CPU die-id is not set");
 return;
-} else if (cpu->die_id > x86ms->smp_dies - 1) {
+} else if (cpu->die_id > ms->smp.dies - 1) {
 error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u",
-   cpu->die_id, x86ms->smp_dies - 1);
+   cpu->die_id, ms->smp.dies - 1);
 return;
 }
 if (cpu->core_id < 0) {
@@ -477,7 +477,7 @@ const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState 
*ms)
  &topo_info, &topo_ids);
 ms->possible_cpus->cpus[i].props.has_socket_id = true;
 ms->possible_cpus->cpus[i].props.socket_id = topo_ids.pkg_id;
-if (x86ms->smp_dies > 1) {
+if (ms->smp.dies > 1) {
 ms->possible_cpus->cpus[i].props.has_die_id = true;
 ms->possible_cpus->cpus[i].props.die_id = topo_ids.die_id;
 }
@@ -1252,7 +1252,6 @@ static void x86_machine_initfn(Object *obj)
 
 x86ms->smm = ON_OFF_AUTO_AUTO;
 x86ms->acpi = ON_OFF_AUTO_AUTO;
-x86ms->smp_dies = 1;
 x86ms->pci_irq_mask = ACPI_BUILD_PCI_IRQS;
 x86ms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
 x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 3d55d2bd62..87ae5cc300 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -282,6 +282,7 @@ typedef struct DeviceMemoryState {
  */
 typedef struct CpuTopology {
 unsigned int cpus;
+unsigned int dies;
 unsigned int cores;
 unsigned int threads;
 unsigned int sockets;
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 1522a3359a..4c2ca6d36a 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -19,7 +19,6 @@
  * PCMachineState:
  * @acpi_dev: link to ACPI PM device that performs ACPI hotplug handling
  * @boot_cpus: number of present VCPUs
- * @smp_dies: number of dies per one package
  */
 typedef struct PCMachineState {

[PULL 05/12] qemu-option: remove now-dead code

2021-06-08 Thread Paolo Bonzini

-M was the sole user of qemu_opts_set and qemu_opts_set_defaults,
remove them and the arguments that they used.

Signed-off-by: Paolo Bonzini 
---
 include/qemu/option.h   |  3 ---
 tests/unit/test-qemu-opts.c | 35 -
 util/qemu-option.c  | 51 -
 3 files changed, 10 insertions(+), 79 deletions(-)

diff --git a/include/qemu/option.h b/include/qemu/option.h
index fffb03d848..306bf07575 100644
--- a/include/qemu/option.h
+++ b/include/qemu/option.h
@@ -119,7 +119,6 @@ QemuOpts *qemu_opts_create(QemuOptsList *list, const char 
*id,
int fail_if_exists, Error **errp);
 void qemu_opts_reset(QemuOptsList *list);
 void qemu_opts_loc_restore(QemuOpts *opts);
-bool qemu_opts_set(QemuOptsList *list, const char *name, const char *value, 
Error **errp);
 const char *qemu_opts_id(QemuOpts *opts);
 void qemu_opts_set_id(QemuOpts *opts, char *id);
 void qemu_opts_del(QemuOpts *opts);
@@ -130,8 +129,6 @@ QemuOpts *qemu_opts_parse_noisily(QemuOptsList *list, const 
char *params,
   bool permit_abbrev);
 QemuOpts *qemu_opts_parse(QemuOptsList *list, const char *params,
   bool permit_abbrev, Error **errp);
-void qemu_opts_set_defaults(QemuOptsList *list, const char *params,
-int permit_abbrev);
 QemuOpts *qemu_opts_from_qdict(QemuOptsList *list, const QDict *qdict,
Error **errp);
 QDict *qemu_opts_to_qdict_filtered(QemuOpts *opts, QDict *qdict,
diff --git a/tests/unit/test-qemu-opts.c b/tests/unit/test-qemu-opts.c
index 6568e31a72..828d40e928 100644
--- a/tests/unit/test-qemu-opts.c
+++ b/tests/unit/test-qemu-opts.c
@@ -410,40 +410,6 @@ static void test_qemu_opts_reset(void)
 g_assert(opts == NULL);
 }
 
-static void test_qemu_opts_set(void)
-{
-QemuOptsList *list;
-QemuOpts *opts;
-const char *opt;
-
-list = qemu_find_opts("opts_list_04");
-g_assert(list != NULL);
-g_assert(QTAILQ_EMPTY(&list->head));
-g_assert_cmpstr(list->name, ==, "opts_list_04");
-
-/* should not find anything at this point */
-opts = qemu_opts_find(list, NULL);
-g_assert(opts == NULL);
-
-/* implicitly create opts and set str3 value */
-qemu_opts_set(list, "str3", "value", &error_abort);
-g_assert(!QTAILQ_EMPTY(&list->head));
-
-/* get the just created opts */
-opts = qemu_opts_find(list, NULL);
-g_assert(opts != NULL);
-
-/* check the str3 value */
-opt = qemu_opt_get(opts, "str3");
-g_assert_cmpstr(opt, ==, "value");
-
-qemu_opts_del(opts);
-
-/* should not find anything at this point */
-opts = qemu_opts_find(list, NULL);
-g_assert(opts == NULL);
-}
-
 static int opts_count_iter(void *opaque, const char *name, const char *value,
Error **errp)
 {
@@ -1041,7 +1007,6 @@ int main(int argc, char *argv[])
 g_test_add_func("/qemu-opts/opt_get_size", test_qemu_opt_get_size);
 g_test_add_func("/qemu-opts/opt_unset", test_qemu_opt_unset);
 g_test_add_func("/qemu-opts/opts_reset", test_qemu_opts_reset);
-g_test_add_func("/qemu-opts/opts_set", test_qemu_opts_set);
 g_test_add_func("/qemu-opts/opts_parse/general", test_opts_parse);
 g_test_add_func("/qemu-opts/opts_parse/bool", test_opts_parse_bool);
 g_test_add_func("/qemu-opts/opts_parse/number", test_opts_parse_number);
diff --git a/util/qemu-option.c b/util/qemu-option.c
index 4944015a25..ee78e42216 100644
--- a/util/qemu-option.c
+++ b/util/qemu-option.c
@@ -479,19 +479,14 @@ int qemu_opt_unset(QemuOpts *opts, const char *name)
 }
 }
 
-static QemuOpt *opt_create(QemuOpts *opts, const char *name, char *value,
-   bool prepend)
+static QemuOpt *opt_create(QemuOpts *opts, const char *name, char *value)
 {
 QemuOpt *opt = g_malloc0(sizeof(*opt));
 
 opt->name = g_strdup(name);
 opt->str = value;
 opt->opts = opts;
-if (prepend) {
-QTAILQ_INSERT_HEAD(&opts->head, opt, next);
-} else {
-QTAILQ_INSERT_TAIL(&opts->head, opt, next);
-}
+QTAILQ_INSERT_TAIL(&opts->head, opt, next);
 
 return opt;
 }
@@ -518,7 +513,7 @@ static bool opt_validate(QemuOpt *opt, Error **errp)
 bool qemu_opt_set(QemuOpts *opts, const char *name, const char *value,
   Error **errp)
 {
-QemuOpt *opt = opt_create(opts, name, g_strdup(value), false);
+QemuOpt *opt = opt_create(opts, name, g_strdup(value));
 
 if (!opt_validate(opt, errp)) {
 qemu_opt_del(opt);
@@ -662,15 +657,6 @@ void qemu_opts_loc_restore(QemuOpts *opts)
 loc_restore(&opts->loc);
 }
 
-bool qemu_opts_set(QemuOptsList *list, const char *name, const char *value, 
Error **errp)
-{
-QemuOpts *opts;
-
-assert(list->merge_lists);
-opts = qemu_opts_create(list, NULL, 0, &error_abort);
-return qemu_opt_set(opts, name, value, errp);
-}
-
 const char *qemu_opts_id(QemuOpts *opts)
 {
 retu

[PATCH v4 25/26] s390x/tcg: We support Vector enhancements facility

2021-06-08 Thread David Hildenbrand

Everything is wired up and all new instructions are implemented.

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/gen-features.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index a6ec918e90..219b1f9420 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -720,6 +720,7 @@ static uint16_t qemu_MAX[] = {
 S390_FEAT_INSTRUCTION_EXEC_PROT,
 S390_FEAT_MISC_INSTRUCTION_EXT2,
 S390_FEAT_MSA_EXT_8,
+S390_FEAT_VECTOR_ENH,
 };
 
 /** END FEATURE DEFS **/
-- 
2.31.1

[PATCH v4 20/26] s390x/tcg: Implement 32/128 bit for VECTOR FP TEST DATA CLASS IMMEDIATE

2021-06-08 Thread David Hildenbrand

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/helper.h   |  2 ++
 target/s390x/translate_vx.c.inc | 23 ++--
 target/s390x/vec_fpu_helper.c   | 47 +
 3 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 02e6967ae6..bae73b9a56 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -299,7 +299,9 @@ DEF_HELPER_FLAGS_4(gvec_vfsq128, TCG_CALL_NO_WG, void, ptr, 
cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfs32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfs64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfs128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
i32)
+DEF_HELPER_4(gvec_vftci32, void, ptr, cptr, env, i32)
 DEF_HELPER_4(gvec_vftci64, void, ptr, cptr, env, i32)
+DEF_HELPER_4(gvec_vftci128, void, ptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/translate_vx.c.inc
index 4d1ccb4159..765f75df9c 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/translate_vx.c.inc
@@ -2965,14 +2965,33 @@ static DisasJumpType op_vftci(DisasContext *s, DisasOps 
*o)
 const uint16_t i3 = get_field(s, i3);
 const uint8_t fpf = get_field(s, m4);
 const uint8_t m5 = get_field(s, m5);
+gen_helper_gvec_2_ptr *fn = NULL;
 
-if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
+switch (fpf) {
+case FPF_SHORT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+fn = gen_helper_gvec_vftci32;
+}
+break;
+case FPF_LONG:
+fn = gen_helper_gvec_vftci64;
+break;
+case FPF_EXT:
+if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+fn = gen_helper_gvec_vftci128;
+}
+break;
+default:
+break;
+}
+
+if (!fn || extract32(m5, 0, 3)) {
 gen_program_exception(s, PGM_SPECIFICATION);
 return DISAS_NORETURN;
 }
 
 gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env,
-   deposit32(m5, 4, 12, i3), gen_helper_gvec_vftci64);
+   deposit32(m5, 4, 12, i3), fn);
 set_cc_static(s);
 return DISAS_NEXT;
 }
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index 0fb82bd18f..6984f770ff 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -622,6 +622,36 @@ void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, 
const void *v3,   \
 DEF_GVEC_VFMA(vfma, 0)
 DEF_GVEC_VFMA(vfms, float_muladd_negate_c)
 
+void HELPER(gvec_vftci32)(void *v1, const void *v2, CPUS390XState *env,
+  uint32_t desc)
+{
+uint16_t i3 = extract32(simd_data(desc), 4, 12);
+bool s = extract32(simd_data(desc), 3, 1);
+int i, match = 0;
+
+for (i = 0; i < 4; i++) {
+float32 a = s390_vec_read_float32(v2, i);
+
+if (float32_dcmask(env, a) & i3) {
+match++;
+s390_vec_write_element32(v1, i, -1u);
+} else {
+s390_vec_write_element32(v1, i, 0);
+}
+if (s) {
+break;
+}
+}
+
+if (match == 4 || (s && match)) {
+env->cc_op = 0;
+} else if (match) {
+env->cc_op = 1;
+} else {
+env->cc_op = 3;
+}
+}
+
 void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env,
   uint32_t desc)
 {
@@ -651,3 +681,20 @@ void HELPER(gvec_vftci64)(void *v1, const void *v2, 
CPUS390XState *env,
 env->cc_op = 3;
 }
 }
+
+void HELPER(gvec_vftci128)(void *v1, const void *v2, CPUS390XState *env,
+   uint32_t desc)
+{
+const float128 a = s390_vec_read_float128(v2);
+uint16_t i3 = extract32(simd_data(desc), 4, 12);
+
+if (float128_dcmask(env, a) & i3) {
+env->cc_op = 0;
+s390_vec_write_element64(v1, 0, -1ull);
+s390_vec_write_element64(v1, 1, -1ull);
+} else {
+env->cc_op = 3;
+s390_vec_write_element64(v1, 0, 0);
+s390_vec_write_element64(v1, 1, 0);
+}
+}
-- 
2.31.1

[PULL 04/12] vl: switch -M parsing to keyval

2021-06-08 Thread Paolo Bonzini

Switch from QemuOpts to keyval.  This enables the introduction
of non-scalar machine properties, and JSON syntax in the future.

For JSON syntax to be supported right now, we would have to
consider what would happen if string-based dictionaries (produced by
-M key=val) were to be merged with strongly-typed dictionaries
(produced by -M {'key': 123}).

The simplest way out is to never enter the situation, and only allow one
-M option when JSON syntax is in use.  However, we want options such as
-smp to become syntactic sugar for -M, and this is a problem; as soon
as -smp becomes a shortcut for -M, QEMU would forbid using -M '{}'
together with -smp.  Therefore, allowing JSON syntax right now for -M
would be a forward-compatibility nightmare and it would be impossible
anyway to introduce -M incrementally in tools.

Instead, support for JSON syntax is delayed until after the main
options are converted to QOM compound properties.  These include -boot,
-acpitable, -smbios, -m, -semihosting-config, -rtc and -fw_cfg.  Once JSON
syntax is introduced, these options will _also_ be forbidden together
with -M '{...}'.

Signed-off-by: Paolo Bonzini 
---
 softmmu/vl.c | 303 ---
 1 file changed, 140 insertions(+), 163 deletions(-)

diff --git a/softmmu/vl.c b/softmmu/vl.c
index 326c1e9080..c3686a6722 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -145,6 +145,8 @@ static const char *cpu_option;
 static const char *mem_path;
 static const char *incoming;
 static const char *loadvm;
+static const char *accelerators;
+static QDict *machine_opts_dict;
 static QTAILQ_HEAD(, ObjectOption) object_opts = 
QTAILQ_HEAD_INITIALIZER(object_opts);
 static ram_addr_t maxram_size;
 static uint64_t ram_slots;
@@ -235,21 +237,6 @@ static QemuOptsList qemu_option_rom_opts = {
 },
 };
 
-static QemuOptsList qemu_machine_opts = {
-.name = "machine",
-.implied_opt_name = "type",
-.merge_lists = true,
-.head = QTAILQ_HEAD_INITIALIZER(qemu_machine_opts.head),
-.desc = {
-/*
- * no elements => accept any
- * sanity checking will happen later
- * when setting machine properties
- */
-{ }
-},
-};
-
 static QemuOptsList qemu_accel_opts = {
 .name = "accel",
 .implied_opt_name = "accel",
@@ -498,16 +485,6 @@ static QemuOptsList qemu_action_opts = {
 },
 };
 
-/**
- * Get machine options
- *
- * Returns: machine options (never null).
- */
-static QemuOpts *qemu_get_machine_opts(void)
-{
-return qemu_find_opts_singleton("machine");
-}
-
 const char *qemu_get_vm_name(void)
 {
 return qemu_name;
@@ -815,33 +792,6 @@ static MachineClass *find_default_machine(GSList *machines)
 return default_machineclass;
 }
 
-static int machine_help_func(QemuOpts *opts, MachineState *machine)
-{
-ObjectProperty *prop;
-ObjectPropertyIterator iter;
-
-if (!qemu_opt_has_help_opt(opts)) {
-return 0;
-}
-
-object_property_iter_init(&iter, OBJECT(machine));
-while ((prop = object_property_iter_next(&iter))) {
-if (!prop->set) {
-continue;
-}
-
-printf("%s.%s=%s", MACHINE_GET_CLASS(machine)->name,
-   prop->name, prop->type);
-if (prop->description) {
-printf(" (%s)\n", prop->description);
-} else {
-printf("\n");
-}
-}
-
-return 1;
-}
-
 static void version(void)
 {
 printf("QEMU emulator version " QEMU_FULL_VERSION "\n"
@@ -1546,33 +1496,31 @@ static gint machine_class_cmp(gconstpointer a, 
gconstpointer b)
   object_class_get_name(OBJECT_CLASS(mc1)));
 }
 
-static MachineClass *machine_parse(const char *name, GSList *machines)
+static void machine_help_func(const QDict *qdict)
 {
-MachineClass *mc;
-GSList *el;
+GSList *machines, *el;
+const char *type = qdict_get_try_str(qdict, "type");
 
-if (is_help_option(name)) {
-printf("Supported machines are:\n");
-machines = g_slist_sort(machines, machine_class_cmp);
-for (el = machines; el; el = el->next) {
-MachineClass *mc = el->data;
-if (mc->alias) {
-printf("%-20s %s (alias of %s)\n", mc->alias, mc->desc, 
mc->name);
-}
-printf("%-20s %s%s%s\n", mc->name, mc->desc,
-   mc->is_default ? " (default)" : "",
-   mc->deprecation_reason ? " (deprecated)" : "");
+machines = object_class_get_list(TYPE_MACHINE, false);
+if (type) {
+ObjectClass *machine_class = OBJECT_CLASS(find_machine(type, 
machines));
+if (machine_class) {
+type_print_class_properties(object_class_get_name(machine_class));
+return;
 }
-exit(0);
 }
 
-mc = find_machine(name, machines);
-if (!mc) {
-error_report("unsupported machine type");
-error_printf("Use -machine help to list supported machines\n");
-exit(1);
+printf("

[PULL 02/12] keyval: introduce keyval_merge

2021-06-08 Thread Paolo Bonzini

This patch introduces a function that merges two keyval-produced
(or keyval-like) QDicts.  It can be used to emulate the behavior of
.merge_lists = true QemuOpts groups, merging -readconfig sections and
command-line options in a single QDict, and also to implement -set.

Signed-off-by: Paolo Bonzini 
---
 include/qemu/option.h|  1 +
 tests/unit/test-keyval.c | 56 
 util/keyval.c| 47 +
 3 files changed, 104 insertions(+)

diff --git a/include/qemu/option.h b/include/qemu/option.h
index f73e0dc7d9..d89c66145a 100644
--- a/include/qemu/option.h
+++ b/include/qemu/option.h
@@ -149,5 +149,6 @@ QemuOptsList *qemu_opts_append(QemuOptsList *dst, 
QemuOptsList *list);
 
 QDict *keyval_parse(const char *params, const char *implied_key,
 bool *help, Error **errp);
+void keyval_merge(QDict *old, const QDict *new, Error **errp);
 
 #endif
diff --git a/tests/unit/test-keyval.c b/tests/unit/test-keyval.c
index e20c07cf3e..254b51e98c 100644
--- a/tests/unit/test-keyval.c
+++ b/tests/unit/test-keyval.c
@@ -747,6 +747,59 @@ static void test_keyval_visit_any(void)
 visit_free(v);
 }
 
+static void test_keyval_merge_success(void)
+{
+QDict *old = keyval_parse("opt1=abc,opt2.sub1=def,opt2.sub2=ghi,opt3=xyz",
+  NULL, NULL, &error_abort);
+QDict *new = keyval_parse("opt1=ABC,opt2.sub2=GHI,opt2.sub3=JKL",
+  NULL, NULL, &error_abort);
+QDict *combined = 
keyval_parse("opt1=ABC,opt2.sub1=def,opt2.sub2=GHI,opt2.sub3=JKL,opt3=xyz",
+   NULL, NULL, &error_abort);
+Error *err = NULL;
+
+keyval_merge(old, new, &err);
+g_assert(!err);
+g_assert(qobject_is_equal(QOBJECT(combined), QOBJECT(old)));
+qobject_unref(old);
+qobject_unref(new);
+qobject_unref(combined);
+}
+
+static void test_keyval_merge_list(void)
+{
+QDict *old = keyval_parse("opt1.0=abc,opt2.0=xyz",
+  NULL, NULL, &error_abort);
+QDict *new = keyval_parse("opt1.0=def",
+  NULL, NULL, &error_abort);
+QDict *combined = keyval_parse("opt1.0=abc,opt1.1=def,opt2.0=xyz",
+   NULL, NULL, &error_abort);
+Error *err = NULL;
+
+keyval_merge(old, new, &err);
+g_assert(!err);
+g_assert(qobject_is_equal(QOBJECT(combined), QOBJECT(old)));
+qobject_unref(old);
+qobject_unref(new);
+qobject_unref(combined);
+}
+
+static void test_keyval_merge_conflict(void)
+{
+QDict *old = keyval_parse("opt2.sub1=def,opt2.sub2=ghi",
+  NULL, NULL, &error_abort);
+QDict *new = keyval_parse("opt2=ABC",
+  NULL, NULL, &error_abort);
+Error *err = NULL;
+
+keyval_merge(new, old, &err);
+error_free_or_abort(&err);
+keyval_merge(old, new, &err);
+error_free_or_abort(&err);
+
+qobject_unref(old);
+qobject_unref(new);
+}
+
 int main(int argc, char *argv[])
 {
 g_test_init(&argc, &argv, NULL);
@@ -760,6 +813,9 @@ int main(int argc, char *argv[])
 g_test_add_func("/keyval/visit/optional", test_keyval_visit_optional);
 g_test_add_func("/keyval/visit/alternate", test_keyval_visit_alternate);
 g_test_add_func("/keyval/visit/any", test_keyval_visit_any);
+g_test_add_func("/keyval/merge/success", test_keyval_merge_success);
+g_test_add_func("/keyval/merge/list", test_keyval_merge_list);
+g_test_add_func("/keyval/merge/conflict", test_keyval_merge_conflict);
 g_test_run();
 return 0;
 }
diff --git a/util/keyval.c b/util/keyval.c
index be34928813..0797f36e1d 100644
--- a/util/keyval.c
+++ b/util/keyval.c
@@ -310,6 +310,53 @@ static char *reassemble_key(GSList *key)
 return g_string_free(s, FALSE);
 }
 
+/* Merge two dictionaries.  */
+static void keyval_do_merge(QDict *old, const QDict *new, GString *str, Error 
**errp)
+{
+size_t save_len = str->len;
+const QDictEntry *ent;
+QObject *old_value;
+
+for (ent = qdict_first(new); ent; ent = qdict_next(new, ent)) {
+old_value = qdict_get(old, ent->key);
+if (old_value) {
+if (qobject_type(old_value) != qobject_type(ent->value)) {
+error_setg(errp, "Parameter '%s%s' used inconsistently", 
str->str, ent->key);
+return;
+} else if (qobject_type(ent->value) == QTYPE_QDICT) {
+/* Merge sub-dictionaries.  */
+g_string_append(str, ent->key);
+g_string_append_c(str, '.');
+keyval_do_merge(qobject_to(QDict, old_value),
+qobject_to(QDict, ent->value),
+str, errp);
+g_string_truncate(str, save_len);
+continue;
+} else if (qobject_type(ent->value) == QTYPE_QLIST) {
+/* Append to old list.  */
+QList *old = qob

[PATCH v4 22/26] s390x/tcg: Implement VECTOR FP NEGATIVE MULTIPLY AND (ADD|SUBTRACT)

2021-06-08 Thread David Hildenbrand

Reviewed-by: Richard Henderson 
Signed-off-by: David Hildenbrand 
---
 target/s390x/helper.h   |  6 +
 target/s390x/insn-data.def  |  4 
 target/s390x/translate_vx.c.inc | 39 +++--
 target/s390x/vec_fpu_helper.c   |  2 ++
 4 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 2366756063..913967ce4e 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -297,6 +297,12 @@ DEF_HELPER_FLAGS_6(gvec_vfma128, TCG_CALL_NO_WG, void, 
ptr, cptr, cptr, cptr, en
 DEF_HELPER_FLAGS_6(gvec_vfms32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfms64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfms128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfnma32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfnma64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfnma128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfnms32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfnms64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfnms128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, 
env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfsq32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfsq64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfsq128, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 1a3ae7e7e7..19b02dffca 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1259,6 +1259,10 @@
 F(0xe78f, VFMA,VRR_e, V,   0, 0, 0, 0, vfma, 0, IF_VEC)
 /* VECTOR FP MULTIPLY AND SUBTRACT */
 F(0xe78e, VFMS,VRR_e, V,   0, 0, 0, 0, vfma, 0, IF_VEC)
+/* VECTOR FP NEGATIVE MULTIPLY AND ADD */
+F(0xe79f, VFNMA,   VRR_e, VE,  0, 0, 0, 0, vfma, 0, IF_VEC)
+/* VECTOR FP NEGATIVE MULTIPLY AND SUBTRACT */
+F(0xe79e, VFNMS,   VRR_e, VE,   0, 0, 0, 0, vfma, 0, IF_VEC)
 /* VECTOR FP PERFORM SIGN OPERATION */
 F(0xe7cc, VFPSO,   VRR_a, V,   0, 0, 0, 0, vfpso, 0, IF_VEC)
 /* VECTOR FP SQUARE ROOT */
diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/translate_vx.c.inc
index 17d41b178f..200d83e783 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/translate_vx.c.inc
@@ -2820,7 +2820,8 @@ static DisasJumpType op_vfma(DisasContext *s, DisasOps *o)
 const uint8_t fpf = get_field(s, m6);
 gen_helper_gvec_4_ptr *fn = NULL;
 
-if (s->fields.op2 == 0x8f) {
+switch (s->fields.op2) {
+case 0x8f:
 switch (fpf) {
 case FPF_SHORT:
 if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
@@ -2838,7 +2839,8 @@ static DisasJumpType op_vfma(DisasContext *s, DisasOps *o)
 default:
 break;
 }
-} else {
+break;
+case 0x8e:
 switch (fpf) {
 case FPF_SHORT:
 if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
@@ -2856,6 +2858,39 @@ static DisasJumpType op_vfma(DisasContext *s, DisasOps 
*o)
 default:
 break;
 }
+break;
+case 0x9f:
+switch (fpf) {
+case FPF_SHORT:
+fn = gen_helper_gvec_vfnma32;
+break;
+case FPF_LONG:
+fn = gen_helper_gvec_vfnma64;
+break;
+case FPF_EXT:
+fn = gen_helper_gvec_vfnma128;
+break;
+default:
+break;
+}
+break;
+case 0x9e:
+switch (fpf) {
+case FPF_SHORT:
+fn = gen_helper_gvec_vfnms32;
+break;
+case FPF_LONG:
+fn = gen_helper_gvec_vfnms64;
+break;
+case FPF_EXT:
+fn = gen_helper_gvec_vfnms128;
+break;
+default:
+break;
+}
+break;
+default:
+g_assert_not_reached();
 }
 
 if (!fn || extract32(m5, 0, 3)) {
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index 29ccc608dc..dc9bcc90a7 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -663,6 +663,8 @@ void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, 
const void *v3,   \
 
 DEF_GVEC_VFMA(vfma, 0)
 DEF_GVEC_VFMA(vfms, float_muladd_negate_c)
+DEF_GVEC_VFMA(vfnma, float_muladd_negate_result)
+DEF_GVEC_VFMA(vfnms, float_muladd_negate_c | float_muladd_negate_result)
 
 void HELPER(gvec_vftci32)(void *v1, const void *v2, CPUS390XState *env,
   uint32_t desc)
-- 
2.31.1

[PULL 07/12] machine: move common smp_parse code to caller

2021-06-08 Thread Paolo Bonzini

Most of smp_parse and pc_smp_parse is guarded by an "if (opts)"
conditional, and the rest is common to both function.  Move the
conditional and the common code to the caller, machine_smp_parse.

Move the replay_add_blocker call after all errors are checked for.

Signed-off-by: Paolo Bonzini 
---
 hw/core/machine.c | 112 ++--
 hw/i386/pc.c  | 116 +-
 2 files changed, 110 insertions(+), 118 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index d776c8cf20..1016ec9e1c 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -741,67 +741,59 @@ void machine_set_cpu_numa_node(MachineState *machine,
 
 static void smp_parse(MachineState *ms, QemuOpts *opts)
 {
-if (opts) {
-unsigned cpus= qemu_opt_get_number(opts, "cpus", 0);
-unsigned sockets = qemu_opt_get_number(opts, "sockets", 0);
-unsigned cores   = qemu_opt_get_number(opts, "cores", 0);
-unsigned threads = qemu_opt_get_number(opts, "threads", 0);
-
-/* compute missing values, prefer sockets over cores over threads */
-if (cpus == 0 || sockets == 0) {
-cores = cores > 0 ? cores : 1;
-threads = threads > 0 ? threads : 1;
-if (cpus == 0) {
-sockets = sockets > 0 ? sockets : 1;
-cpus = cores * threads * sockets;
-} else {
-ms->smp.max_cpus =
-qemu_opt_get_number(opts, "maxcpus", cpus);
-sockets = ms->smp.max_cpus / (cores * threads);
-}
-} else if (cores == 0) {
-threads = threads > 0 ? threads : 1;
-cores = cpus / (sockets * threads);
-cores = cores > 0 ? cores : 1;
-} else if (threads == 0) {
-threads = cpus / (cores * sockets);
-threads = threads > 0 ? threads : 1;
-} else if (sockets * cores * threads < cpus) {
-error_report("cpu topology: "
- "sockets (%u) * cores (%u) * threads (%u) < "
- "smp_cpus (%u)",
- sockets, cores, threads, cpus);
-exit(1);
-}
-
-ms->smp.max_cpus =
-qemu_opt_get_number(opts, "maxcpus", cpus);
-
-if (ms->smp.max_cpus < cpus) {
-error_report("maxcpus must be equal to or greater than smp");
-exit(1);
+unsigned cpus= qemu_opt_get_number(opts, "cpus", 0);
+unsigned sockets = qemu_opt_get_number(opts, "sockets", 0);
+unsigned cores   = qemu_opt_get_number(opts, "cores", 0);
+unsigned threads = qemu_opt_get_number(opts, "threads", 0);
+
+/* compute missing values, prefer sockets over cores over threads */
+if (cpus == 0 || sockets == 0) {
+cores = cores > 0 ? cores : 1;
+threads = threads > 0 ? threads : 1;
+if (cpus == 0) {
+sockets = sockets > 0 ? sockets : 1;
+cpus = cores * threads * sockets;
+} else {
+ms->smp.max_cpus =
+qemu_opt_get_number(opts, "maxcpus", cpus);
+sockets = ms->smp.max_cpus / (cores * threads);
 }
+} else if (cores == 0) {
+threads = threads > 0 ? threads : 1;
+cores = cpus / (sockets * threads);
+cores = cores > 0 ? cores : 1;
+} else if (threads == 0) {
+threads = cpus / (cores * sockets);
+threads = threads > 0 ? threads : 1;
+} else if (sockets * cores * threads < cpus) {
+error_report("cpu topology: "
+"sockets (%u) * cores (%u) * threads (%u) < "
+"smp_cpus (%u)",
+sockets, cores, threads, cpus);
+exit(1);
+}
 
-if (sockets * cores * threads != ms->smp.max_cpus) {
-error_report("Invalid CPU topology: "
- "sockets (%u) * cores (%u) * threads (%u) "
- "!= maxcpus (%u)",
- sockets, cores, threads,
- ms->smp.max_cpus);
-exit(1);
-}
+ms->smp.max_cpus =
+qemu_opt_get_number(opts, "maxcpus", cpus);
 
-ms->smp.cpus = cpus;
-ms->smp.cores = cores;
-ms->smp.threads = threads;
-ms->smp.sockets = sockets;
+if (ms->smp.max_cpus < cpus) {
+error_report("maxcpus must be equal to or greater than smp");
+exit(1);
 }
 
-if (ms->smp.cpus > 1) {
-Error *blocker = NULL;
-error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp");
-replay_add_blocker(blocker);
+if (sockets * cores * threads != ms->smp.max_cpus) {
+error_report("Invalid CPU topology: "
+"sockets (%u) * cores (%u) * threads (%u) "
+"!= maxcpus (%u)",
+sockets, cores, threads,
+ms->smp.max_cpus);
+exit(1);

[PULL 01/12] qom: export more functions for use with non-UserCreatable objects

2021-06-08 Thread Paolo Bonzini

Machines and accelerators are not user-creatable but they are going
to share similar command-line parsing machinery.  Export functions
that will be used with -machine and -accel in softmmu/vl.c.

Signed-off-by: Paolo Bonzini 
---
 include/qom/object.h| 23 
 qom/object_interfaces.c | 58 +
 2 files changed, 65 insertions(+), 16 deletions(-)

diff --git a/include/qom/object.h b/include/qom/object.h
index 6721cd312e..faae0d841f 100644
--- a/include/qom/object.h
+++ b/include/qom/object.h
@@ -861,6 +861,29 @@ static void do_qemu_init_ ## type_array(void)  
 \
 }   \
 type_init(do_qemu_init_ ## type_array)
 
+/**
+ * type_print_class_properties:
+ * @type: a QOM class name
+ *
+ * Print the object's class properties to stdout or the monitor.
+ * Return whether an object was found.
+ */
+bool type_print_class_properties(const char *type);
+
+/**
+ * object_set_properties_from_keyval:
+ * @obj: a QOM object
+ * @qdict: a dictionary with the properties to be set
+ * @from_json: true if leaf values of @qdict are typed, false if they
+ * are strings
+ * @errp: pointer to error object
+ *
+ * For each key in the dictionary, parse the value string if needed,
+ * then set the corresponding property in @obj.
+ */
+void object_set_properties_from_keyval(Object *obj, const QDict *qdict,
+   bool from_json, Error **errp);
+
 /**
  * object_class_dynamic_cast_assert:
  * @klass: The #ObjectClass to attempt to cast.
diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c
index 4479ee693a..ad9b56b59a 100644
--- a/qom/object_interfaces.c
+++ b/qom/object_interfaces.c
@@ -42,6 +42,44 @@ bool user_creatable_can_be_deleted(UserCreatable *uc)
 }
 }
 
+static void object_set_properties_from_qdict(Object *obj, const QDict *qdict,
+ Visitor *v, Error **errp)
+{
+const QDictEntry *e;
+Error *local_err = NULL;
+
+if (!visit_start_struct(v, NULL, NULL, 0, &local_err)) {
+goto out;
+}
+for (e = qdict_first(qdict); e; e = qdict_next(qdict, e)) {
+if (!object_property_set(obj, e->key, v, &local_err)) {
+break;
+}
+}
+if (!local_err) {
+visit_check_struct(v, &local_err);
+}
+visit_end_struct(v, NULL);
+
+out:
+if (local_err) {
+error_propagate(errp, local_err);
+}
+}
+
+void object_set_properties_from_keyval(Object *obj, const QDict *qdict,
+   bool from_json, Error **errp)
+{
+Visitor *v;
+if (from_json) {
+v = qobject_input_visitor_new(QOBJECT(qdict));
+} else {
+v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
+}
+object_set_properties_from_qdict(obj, qdict, v, errp);
+visit_free(v);
+}
+
 Object *user_creatable_add_type(const char *type, const char *id,
 const QDict *qdict,
 Visitor *v, Error **errp)
@@ -49,7 +87,6 @@ Object *user_creatable_add_type(const char *type, const char 
*id,
 ERRP_GUARD();
 Object *obj;
 ObjectClass *klass;
-const QDictEntry *e;
 Error *local_err = NULL;
 
 if (id != NULL && !id_wellformed(id)) {
@@ -78,18 +115,7 @@ Object *user_creatable_add_type(const char *type, const 
char *id,
 
 assert(qdict);
 obj = object_new(type);
-if (!visit_start_struct(v, NULL, NULL, 0, &local_err)) {
-goto out;
-}
-for (e = qdict_first(qdict); e; e = qdict_next(qdict, e)) {
-if (!object_property_set(obj, e->key, v, &local_err)) {
-break;
-}
-}
-if (!local_err) {
-visit_check_struct(v, &local_err);
-}
-visit_end_struct(v, NULL);
+object_set_properties_from_qdict(obj, qdict, v, &local_err);
 if (local_err) {
 goto out;
 }
@@ -178,7 +204,7 @@ static void user_creatable_print_types(void)
 g_slist_free(list);
 }
 
-static bool user_creatable_print_type_properites(const char *type)
+bool type_print_class_properties(const char *type)
 {
 ObjectClass *klass;
 ObjectPropertyIterator iter;
@@ -224,7 +250,7 @@ bool user_creatable_print_help(const char *type, QemuOpts 
*opts)
 }
 
 if (qemu_opt_has_help_opt(opts)) {
-return user_creatable_print_type_properites(type);
+return type_print_class_properties(type);
 }
 
 return false;
@@ -234,7 +260,7 @@ static void user_creatable_print_help_from_qdict(QDict 
*args)
 {
 const char *type = qdict_get_try_str(args, "qom-type");
 
-if (!type || !user_creatable_print_type_properites(type)) {
+if (!type || !type_print_class_properties(type)) {
 user_creatable_print_types();
 }
 }
-- 
2.31.1

[PULL 08/12] machine: add error propagation to mc->smp_parse

2021-06-08 Thread Paolo Bonzini

Clean up the smp_parse functions to use Error** instead of exiting.

Signed-off-by: Paolo Bonzini 
---
 hw/core/machine.c| 34 +++---
 hw/i386/pc.c | 28 ++--
 include/hw/boards.h  |  2 +-
 include/hw/i386/pc.h |  2 --
 4 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 1016ec9e1c..5a9c97ccc5 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -739,7 +739,7 @@ void machine_set_cpu_numa_node(MachineState *machine,
 }
 }
 
-static void smp_parse(MachineState *ms, QemuOpts *opts)
+static void smp_parse(MachineState *ms, QemuOpts *opts, Error **errp)
 {
 unsigned cpus= qemu_opt_get_number(opts, "cpus", 0);
 unsigned sockets = qemu_opt_get_number(opts, "sockets", 0);
@@ -766,28 +766,28 @@ static void smp_parse(MachineState *ms, QemuOpts *opts)
 threads = cpus / (cores * sockets);
 threads = threads > 0 ? threads : 1;
 } else if (sockets * cores * threads < cpus) {
-error_report("cpu topology: "
-"sockets (%u) * cores (%u) * threads (%u) < "
-"smp_cpus (%u)",
-sockets, cores, threads, cpus);
-exit(1);
+error_setg(errp, "cpu topology: "
+   "sockets (%u) * cores (%u) * threads (%u) < "
+   "smp_cpus (%u)",
+   sockets, cores, threads, cpus);
+return;
 }
 
 ms->smp.max_cpus =
 qemu_opt_get_number(opts, "maxcpus", cpus);
 
 if (ms->smp.max_cpus < cpus) {
-error_report("maxcpus must be equal to or greater than smp");
-exit(1);
+error_setg(errp, "maxcpus must be equal to or greater than smp");
+return;
 }
 
 if (sockets * cores * threads != ms->smp.max_cpus) {
-error_report("Invalid CPU topology: "
-"sockets (%u) * cores (%u) * threads (%u) "
-"!= maxcpus (%u)",
-sockets, cores, threads,
-ms->smp.max_cpus);
-exit(1);
+error_setg(errp, "Invalid CPU topology: "
+   "sockets (%u) * cores (%u) * threads (%u) "
+   "!= maxcpus (%u)",
+   sockets, cores, threads,
+   ms->smp.max_cpus);
+return;
 }
 
 ms->smp.cpus = cpus;
@@ -1126,9 +1126,13 @@ MemoryRegion *machine_consume_memdev(MachineState 
*machine,
 bool machine_smp_parse(MachineState *ms, QemuOpts *opts, Error **errp)
 {
 MachineClass *mc = MACHINE_GET_CLASS(ms);
+ERRP_GUARD();
 
 if (opts) {
-mc->smp_parse(ms, opts);
+mc->smp_parse(ms, opts, errp);
+if (*errp) {
+return false;
+}
 }
 
 /* sanity-check smp_cpus and max_cpus against mc */
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index e206ac85f3..cce275dcb1 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -710,7 +710,7 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
  * This function is very similar to smp_parse()
  * in hw/core/machine.c but includes CPU die support.
  */
-void pc_smp_parse(MachineState *ms, QemuOpts *opts)
+static void pc_smp_parse(MachineState *ms, QemuOpts *opts, Error **errp)
 {
 unsigned cpus= qemu_opt_get_number(opts, "cpus", 0);
 unsigned sockets = qemu_opt_get_number(opts, "sockets", 0);
@@ -738,28 +738,28 @@ void pc_smp_parse(MachineState *ms, QemuOpts *opts)
 threads = cpus / (cores * dies * sockets);
 threads = threads > 0 ? threads : 1;
 } else if (sockets * dies * cores * threads < cpus) {
-error_report("cpu topology: "
-"sockets (%u) * dies (%u) * cores (%u) * threads (%u) 
< "
-"smp_cpus (%u)",
-sockets, dies, cores, threads, cpus);
-exit(1);
+error_setg(errp, "cpu topology: "
+   "sockets (%u) * dies (%u) * cores (%u) * threads (%u) < "
+   "smp_cpus (%u)",
+   sockets, dies, cores, threads, cpus);
+return;
 }
 
 ms->smp.max_cpus =
 qemu_opt_get_number(opts, "maxcpus", cpus);
 
 if (ms->smp.max_cpus < cpus) {
-error_report("maxcpus must be equal to or greater than smp");
-exit(1);
+error_setg(errp, "maxcpus must be equal to or greater than smp");
+return;
 }
 
 if (sockets * dies * cores * threads != ms->smp.max_cpus) {
-error_report("Invalid CPU topology deprecated: "
-"sockets (%u) * dies (%u) * cores (%u) * threads (%u) "
-"!= maxcpus (%u)",
-sockets, dies, cores, threads,
-ms->smp.max_cpus);
-exit(1);
+error_setg(errp, "Invalid CPU topology deprecated: "
+   "sockets (%u) * dies (%u) * cores (%u) * threads (%u) "
+   "!= maxcpus (%u)",
+

[PULL 00/12] Machine and OS X changes for 2021-06-08

2021-06-08 Thread Paolo Bonzini

The following changes since commit 6f398e533f5e259b4f937f4aa9de970f7201d166:

  Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-tcg-20210604' into 
staging (2021-06-05 11:25:52 +0100)

are available in the Git repository at:

  https://gitlab.com/bonzini/qemu.git tags/for-upstream

for you to fetch changes up to 8f9f729185e3ac8d3c5a65d81eb9e74e229901ea:

  vnc: avoid deprecation warnings for SASL on OS X (2021-06-07 10:20:23 -0400)


* introduce "-M smp" (myself)
* avoid deprecation warnings for SASL on macOS 10.11 or newer.


Paolo Bonzini (12):
  qom: export more functions for use with non-UserCreatable objects
  keyval: introduce keyval_merge
  keyval: introduce keyval_parse_into
  vl: switch -M parsing to keyval
  qemu-option: remove now-dead code
  machine: move dies from X86MachineState to CpuTopology
  machine: move common smp_parse code to caller
  machine: add error propagation to mc->smp_parse
  machine: pass QAPI struct to mc->smp_parse
  machine: reject -smp dies!=1 for non-PC machines
  machine: add smp compound property
  vnc: avoid deprecation warnings for SASL on OS X

 hw/core/machine.c   | 184 ++--
 hw/i386/pc.c| 108 +++---
 hw/i386/x86.c   |  15 +-
 include/hw/boards.h |   4 +-
 include/hw/i386/pc.h|   3 -
 include/hw/i386/x86.h   |   1 -
 include/qemu/option.h   |   6 +-
 include/qom/object.h|  23 +++
 qapi/machine.json   |  27 
 qom/object_interfaces.c |  58 +---
 softmmu/vl.c| 336 ++--
 tests/qtest/numa-test.c |  22 +--
 tests/unit/test-keyval.c|  56 
 tests/unit/test-qemu-opts.c |  35 -
 ui/vnc-auth-sasl.c  |  20 +++
 ui/vnc-auth-sasl.h  |   1 +
 ui/vnc.c|  10 +-
 util/keyval.c   |  90 ++--
 util/qemu-option.c  |  51 ++-
 19 files changed, 607 insertions(+), 443 deletions(-)
-- 
2.31.1

[PULL 10/12] machine: reject -smp dies!=1 for non-PC machines

2021-06-08 Thread Paolo Bonzini

Signed-off-by: Paolo Bonzini 
---
 hw/core/machine.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 9ad8341a31..ffc076ae84 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -746,6 +746,10 @@ static void smp_parse(MachineState *ms, SMPConfiguration 
*config, Error **errp)
 unsigned cores   = config->has_cores ? config->cores : 0;
 unsigned threads = config->has_threads ? config->threads : 0;
 
+if (config->has_dies && config->dies != 0 && config->dies != 1) {
+error_setg(errp, "dies not supported by this machine's CPU topology");
+}
+
 /* compute missing values, prefer sockets over cores over threads */
 if (cpus == 0 || sockets == 0) {
 cores = cores > 0 ? cores : 1;
-- 
2.31.1

[PULL 11/12] machine: add smp compound property

2021-06-08 Thread Paolo Bonzini

Make -smp syntactic sugar for a compound property "-machine
smp.{cores,threads,cpu,...}".  machine_smp_parse is replaced by the
setter for the property.

numa-test will now cover the new syntax, while other tests
still use -smp.

Signed-off-by: Paolo Bonzini 
---
 hw/core/machine.c   | 108 +---
 include/hw/boards.h |   1 -
 softmmu/vl.c|  33 +---
 tests/qtest/numa-test.c |  22 
 4 files changed, 95 insertions(+), 69 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index ffc076ae84..c6ae89efec 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -19,6 +19,7 @@
 #include "hw/loader.h"
 #include "qapi/error.h"
 #include "qapi/qapi-visit-common.h"
+#include "qapi/qapi-visit-machine.h"
 #include "qapi/visitor.h"
 #include "hw/sysbus.h"
 #include "sysemu/cpus.h"
@@ -798,6 +799,57 @@ static void smp_parse(MachineState *ms, SMPConfiguration 
*config, Error **errp)
 ms->smp.sockets = sockets;
 }
 
+static void machine_get_smp(Object *obj, Visitor *v, const char *name,
+void *opaque, Error **errp)
+{
+MachineState *ms = MACHINE(obj);
+SMPConfiguration *config = &(SMPConfiguration){
+.has_cores = true, .cores = ms->smp.cores,
+.has_sockets = true, .sockets = ms->smp.sockets,
+.has_dies = true, .dies = ms->smp.dies,
+.has_threads = true, .threads = ms->smp.threads,
+.has_cpus = true, .cpus = ms->smp.cpus,
+.has_maxcpus = true, .maxcpus = ms->smp.max_cpus,
+};
+if (!visit_type_SMPConfiguration(v, name, &config, &error_abort)) {
+return;
+}
+}
+
+static void machine_set_smp(Object *obj, Visitor *v, const char *name,
+void *opaque, Error **errp)
+{
+MachineClass *mc = MACHINE_GET_CLASS(obj);
+MachineState *ms = MACHINE(obj);
+SMPConfiguration *config;
+ERRP_GUARD();
+
+if (!visit_type_SMPConfiguration(v, name, &config, errp)) {
+return;
+}
+
+mc->smp_parse(ms, config, errp);
+if (errp) {
+goto out_free;
+}
+
+/* sanity-check smp_cpus and max_cpus against mc */
+if (ms->smp.cpus < mc->min_cpus) {
+error_setg(errp, "Invalid SMP CPUs %d. The min CPUs "
+   "supported by machine '%s' is %d",
+   ms->smp.cpus,
+   mc->name, mc->min_cpus);
+} else if (ms->smp.max_cpus > mc->max_cpus) {
+error_setg(errp, "Invalid SMP CPUs %d. The max CPUs "
+   "supported by machine '%s' is %d",
+   current_machine->smp.max_cpus,
+   mc->name, mc->max_cpus);
+}
+
+out_free:
+qapi_free_SMPConfiguration(config);
+}
+
 static void machine_class_init(ObjectClass *oc, void *data)
 {
 MachineClass *mc = MACHINE_CLASS(oc);
@@ -837,6 +889,12 @@ static void machine_class_init(ObjectClass *oc, void *data)
 object_class_property_set_description(oc, "dumpdtb",
 "Dump current dtb to a file and quit");
 
+object_class_property_add(oc, "smp", "SMPConfiguration",
+machine_get_smp, machine_set_smp,
+NULL, NULL);
+object_class_property_set_description(oc, "smp",
+"CPU topology");
+
 object_class_property_add(oc, "phandle-start", "int",
 machine_get_phandle_start, machine_set_phandle_start,
 NULL, NULL);
@@ -1125,56 +1183,6 @@ MemoryRegion *machine_consume_memdev(MachineState 
*machine,
 return ret;
 }
 
-bool machine_smp_parse(MachineState *ms, QemuOpts *opts, Error **errp)
-{
-MachineClass *mc = MACHINE_GET_CLASS(ms);
-ERRP_GUARD();
-
-if (opts) {
-SMPConfiguration config = {
-.has_cpus = !!qemu_opt_get(opts, "cpus"),
-.cpus = qemu_opt_get_number(opts, "cpus", 0),
-.has_sockets = !!qemu_opt_get(opts, "sockets"),
-.sockets = qemu_opt_get_number(opts, "sockets", 0),
-.has_dies = !!qemu_opt_get(opts, "dies"),
-.dies = qemu_opt_get_number(opts, "dies", 0),
-.has_cores = !!qemu_opt_get(opts, "cores"),
-.cores = qemu_opt_get_number(opts, "cores", 0),
-.has_threads = !!qemu_opt_get(opts, "threads"),
-.threads = qemu_opt_get_number(opts, "threads", 0),
-.has_maxcpus = !!qemu_opt_get(opts, "maxcpus"),
-.maxcpus = qemu_opt_get_number(opts, "maxcpus", 0),
-};
-
-mc->smp_parse(ms, &config, errp);
-if (*errp) {
-return false;
-}
-}
-
-/* sanity-check smp_cpus and max_cpus against mc */
-if (ms->smp.cpus < mc->min_cpus) {
-error_setg(errp, "Invalid SMP CPUs %d. The min CPUs "
-   "supported by machine '%s' is %d",
-   ms->smp.cpus,
-   mc->name, mc->min_cpus);
-return false;
-} else if (ms->smp.max_cpus > mc->max_cpus) {
-error_setg(errp, "Invalid SMP CPUs %d. The max CPUs "
-   "supported by

[PULL 12/12] vnc: avoid deprecation warnings for SASL on OS X

2021-06-08 Thread Paolo Bonzini

Apple has deprecated sasl.h functions in OS X 10.11.  Therefore,
all files that use SASL API need to disable -Wdeprecated-declarations.
Remove the only use that is outside vnc-auth-sasl.c and add the
relevant #pragma GCC diagnostic there.

Signed-off-by: Paolo Bonzini 
Message-Id: <20210604120915.286195-1-pbonz...@redhat.com>
Signed-off-by: Paolo Bonzini 
---
 ui/vnc-auth-sasl.c | 20 
 ui/vnc-auth-sasl.h |  1 +
 ui/vnc.c   | 10 ++
 3 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c
index df7dc08e9f..cf65a0b161 100644
--- a/ui/vnc-auth-sasl.c
+++ b/ui/vnc-auth-sasl.c
@@ -28,10 +28,30 @@
 #include "vnc.h"
 #include "trace.h"
 
+/*
+ * Apple has deprecated sasl.h functions in OS X 10.11.  Therefore,
+ * files that use SASL API need to disable -Wdeprecated-declarations.
+ */
+#ifdef CONFIG_DARWIN
+#pragma GCC diagnostic warning "-Wdeprecated-declarations"
+#endif
+
 /* Max amount of data we send/recv for SASL steps to prevent DOS */
 #define SASL_DATA_MAX_LEN (1024 * 1024)
 
 
+bool vnc_sasl_server_init(Error **errp)
+{
+int saslErr = sasl_server_init(NULL, "qemu");
+
+if (saslErr != SASL_OK) {
+error_setg(errp, "Failed to initialize SASL auth: %s",
+   sasl_errstring(saslErr, NULL, NULL));
+return false;
+}
+return true;
+}
+
 void vnc_sasl_client_cleanup(VncState *vs)
 {
 if (vs->sasl.conn) {
diff --git a/ui/vnc-auth-sasl.h b/ui/vnc-auth-sasl.h
index 1bfb86c6f5..367b8672cc 100644
--- a/ui/vnc-auth-sasl.h
+++ b/ui/vnc-auth-sasl.h
@@ -63,6 +63,7 @@ struct VncDisplaySASL {
 char *authzid;
 };
 
+bool vnc_sasl_server_init(Error **errp);
 void vnc_sasl_client_cleanup(VncState *vs);
 
 size_t vnc_client_read_sasl(VncState *vs);
diff --git a/ui/vnc.c b/ui/vnc.c
index b3d4d7b9a5..f0a1550d58 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -4154,14 +4154,8 @@ void vnc_display_open(const char *id, Error **errp)
 trace_vnc_auth_init(vd, 1, vd->ws_auth, vd->ws_subauth);
 
 #ifdef CONFIG_VNC_SASL
-if (sasl) {
-int saslErr = sasl_server_init(NULL, "qemu");
-
-if (saslErr != SASL_OK) {
-error_setg(errp, "Failed to initialize SASL auth: %s",
-   sasl_errstring(saslErr, NULL, NULL));
-goto fail;
-}
+if (sasl && !vnc_sasl_server_init(errp)) {
+goto fail;
 }
 #endif
 vd->lock_key_sync = lock_key_sync;
-- 
2.31.1

[PULL 03/12] keyval: introduce keyval_parse_into

2021-06-08 Thread Paolo Bonzini

Allow parsing multiple keyval sequences into the same dictionary.
This will be used to simplify the parsing of the -M command line
option, which is currently a .merge_lists = true QemuOpts group.

Signed-off-by: Paolo Bonzini 
---
 include/qemu/option.h |  2 ++
 util/keyval.c | 43 +++
 2 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/include/qemu/option.h b/include/qemu/option.h
index d89c66145a..fffb03d848 100644
--- a/include/qemu/option.h
+++ b/include/qemu/option.h
@@ -147,6 +147,8 @@ void qemu_opts_print_help(QemuOptsList *list, bool 
print_caption);
 void qemu_opts_free(QemuOptsList *list);
 QemuOptsList *qemu_opts_append(QemuOptsList *dst, QemuOptsList *list);
 
+QDict *keyval_parse_into(QDict *qdict, const char *params, const char 
*implied_key,
+ bool *p_help, Error **errp);
 QDict *keyval_parse(const char *params, const char *implied_key,
 bool *help, Error **errp);
 void keyval_merge(QDict *old, const QDict *new, Error **errp);
diff --git a/util/keyval.c b/util/keyval.c
index 0797f36e1d..1ffd6e1204 100644
--- a/util/keyval.c
+++ b/util/keyval.c
@@ -478,13 +478,14 @@ static QObject *keyval_listify(QDict *cur, GSList 
*key_of_cur, Error **errp)
  * If @p_help is not NULL, store whether help is requested there.
  * If @p_help is NULL and help is requested, fail.
  *
- * On success, return a dictionary of the parsed keys and values.
- * On failure, store an error through @errp and return NULL.
+ * On success, return @dict, now filled with the parsed keys and values.
+ *
+ * On failure, store an error through @errp and return NULL.  Any keys
+ * and values parsed so far will be in @dict nevertheless.
  */
-QDict *keyval_parse(const char *params, const char *implied_key,
-bool *p_help, Error **errp)
+QDict *keyval_parse_into(QDict *qdict, const char *params, const char 
*implied_key,
+ bool *p_help, Error **errp)
 {
-QDict *qdict = qdict_new();
 QObject *listified;
 const char *s;
 bool help = false;
@@ -493,7 +494,6 @@ QDict *keyval_parse(const char *params, const char 
*implied_key,
 while (*s) {
 s = keyval_parse_one(qdict, s, implied_key, &help, errp);
 if (!s) {
-qobject_unref(qdict);
 return NULL;
 }
 implied_key = NULL;
@@ -503,15 +503,42 @@ QDict *keyval_parse(const char *params, const char 
*implied_key,
 *p_help = help;
 } else if (help) {
 error_setg(errp, "Help is not available for this option");
-qobject_unref(qdict);
 return NULL;
 }
 
 listified = keyval_listify(qdict, NULL, errp);
 if (!listified) {
-qobject_unref(qdict);
 return NULL;
 }
 assert(listified == QOBJECT(qdict));
 return qdict;
 }
+
+/*
+ * Parse @params in QEMU's traditional KEY=VALUE,... syntax.
+ *
+ * If @implied_key, the first KEY= can be omitted.  @implied_key is
+ * implied then, and VALUE can't be empty or contain ',' or '='.
+ *
+ * A parameter "help" or "?" without a value isn't added to the
+ * resulting dictionary, but instead is interpreted as help request.
+ * All other options are parsed and returned normally so that context
+ * specific help can be printed.
+ *
+ * If @p_help is not NULL, store whether help is requested there.
+ * If @p_help is NULL and help is requested, fail.
+ *
+ * On success, return a dictionary of the parsed keys and values.
+ * On failure, store an error through @errp and return NULL.
+ */
+QDict *keyval_parse(const char *params, const char *implied_key,
+bool *p_help, Error **errp)
+{
+QDict *qdict = qdict_new();
+QDict *ret = keyval_parse_into(qdict, params, implied_key, p_help, errp);
+
+if (!ret) {
+qobject_unref(qdict);
+}
+return ret;
+}
-- 
2.31.1

[PULL 09/12] machine: pass QAPI struct to mc->smp_parse

2021-06-08 Thread Paolo Bonzini

As part of converting -smp to a property with a QAPI type, define
the struct and use it to do the actual parsing.  machine_smp_parse
takes care of doing the QemuOpts->QAPI conversion by hand, for now.

Signed-off-by: Paolo Bonzini 
---
 hw/core/machine.c   | 33 +++--
 hw/i386/pc.c| 18 --
 include/hw/boards.h |  2 +-
 qapi/machine.json   | 27 +++
 4 files changed, 59 insertions(+), 21 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 5a9c97ccc5..9ad8341a31 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -739,12 +739,12 @@ void machine_set_cpu_numa_node(MachineState *machine,
 }
 }
 
-static void smp_parse(MachineState *ms, QemuOpts *opts, Error **errp)
+static void smp_parse(MachineState *ms, SMPConfiguration *config, Error **errp)
 {
-unsigned cpus= qemu_opt_get_number(opts, "cpus", 0);
-unsigned sockets = qemu_opt_get_number(opts, "sockets", 0);
-unsigned cores   = qemu_opt_get_number(opts, "cores", 0);
-unsigned threads = qemu_opt_get_number(opts, "threads", 0);
+unsigned cpus= config->has_cpus ? config->cpus : 0;
+unsigned sockets = config->has_sockets ? config->sockets : 0;
+unsigned cores   = config->has_cores ? config->cores : 0;
+unsigned threads = config->has_threads ? config->threads : 0;
 
 /* compute missing values, prefer sockets over cores over threads */
 if (cpus == 0 || sockets == 0) {
@@ -754,8 +754,7 @@ static void smp_parse(MachineState *ms, QemuOpts *opts, 
Error **errp)
 sockets = sockets > 0 ? sockets : 1;
 cpus = cores * threads * sockets;
 } else {
-ms->smp.max_cpus =
-qemu_opt_get_number(opts, "maxcpus", cpus);
+ms->smp.max_cpus = config->has_maxcpus ? config->maxcpus : cpus;
 sockets = ms->smp.max_cpus / (cores * threads);
 }
 } else if (cores == 0) {
@@ -773,8 +772,7 @@ static void smp_parse(MachineState *ms, QemuOpts *opts, 
Error **errp)
 return;
 }
 
-ms->smp.max_cpus =
-qemu_opt_get_number(opts, "maxcpus", cpus);
+ms->smp.max_cpus = config->has_maxcpus ? config->maxcpus : cpus;
 
 if (ms->smp.max_cpus < cpus) {
 error_setg(errp, "maxcpus must be equal to or greater than smp");
@@ -1129,7 +1127,22 @@ bool machine_smp_parse(MachineState *ms, QemuOpts *opts, 
Error **errp)
 ERRP_GUARD();
 
 if (opts) {
-mc->smp_parse(ms, opts, errp);
+SMPConfiguration config = {
+.has_cpus = !!qemu_opt_get(opts, "cpus"),
+.cpus = qemu_opt_get_number(opts, "cpus", 0),
+.has_sockets = !!qemu_opt_get(opts, "sockets"),
+.sockets = qemu_opt_get_number(opts, "sockets", 0),
+.has_dies = !!qemu_opt_get(opts, "dies"),
+.dies = qemu_opt_get_number(opts, "dies", 0),
+.has_cores = !!qemu_opt_get(opts, "cores"),
+.cores = qemu_opt_get_number(opts, "cores", 0),
+.has_threads = !!qemu_opt_get(opts, "threads"),
+.threads = qemu_opt_get_number(opts, "threads", 0),
+.has_maxcpus = !!qemu_opt_get(opts, "maxcpus"),
+.maxcpus = qemu_opt_get_number(opts, "maxcpus", 0),
+};
+
+mc->smp_parse(ms, &config, errp);
 if (*errp) {
 return false;
 }
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index cce275dcb1..8e1220db72 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -710,13 +710,13 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int 
level)
  * This function is very similar to smp_parse()
  * in hw/core/machine.c but includes CPU die support.
  */
-static void pc_smp_parse(MachineState *ms, QemuOpts *opts, Error **errp)
+static void pc_smp_parse(MachineState *ms, SMPConfiguration *config, Error 
**errp)
 {
-unsigned cpus= qemu_opt_get_number(opts, "cpus", 0);
-unsigned sockets = qemu_opt_get_number(opts, "sockets", 0);
-unsigned dies = qemu_opt_get_number(opts, "dies", 1);
-unsigned cores   = qemu_opt_get_number(opts, "cores", 0);
-unsigned threads = qemu_opt_get_number(opts, "threads", 0);
+unsigned cpus= config->has_cpus ? config->cpus : 0;
+unsigned sockets = config->has_sockets ? config->sockets : 0;
+unsigned dies= config->has_dies ? config->dies : 1;
+unsigned cores   = config->has_cores ? config->cores : 0;
+unsigned threads = config->has_threads ? config->threads : 0;
 
 /* compute missing values, prefer sockets over cores over threads */
 if (cpus == 0 || sockets == 0) {
@@ -726,8 +726,7 @@ static void pc_smp_parse(MachineState *ms, QemuOpts *opts, 
Error **errp)
 sockets = sockets > 0 ? sockets : 1;
 cpus = cores * threads * dies * sockets;
 } else {
-ms->smp.max_cpus =
-qemu_opt_get_number(opts, "maxcpus", cpus);
+ms->smp.max_cpus = config->has_maxcpus ? config->maxcpus :

Re: [PATCH v16 03/99] qtest: Add qtest_has_accel() method

2021-06-08 Thread Philippe Mathieu-Daudé

On 6/7/21 3:16 PM, Thomas Huth wrote:
> On 04/06/2021 17.51, Alex Bennée wrote:
>> From: Philippe Mathieu-Daudé 
>>
>> Introduce the qtest_has_accel() method which allows a runtime
>> query on whether a QEMU instance has an accelerator built-in.
>>
>> Reviewed-by: Eric Blake 
>> Reviewed-by: Alex Bennée 
>> Signed-off-by: Philippe Mathieu-Daudé 
>> Signed-off-by: Alex Bennée 
>> Message-Id: <20210505125806.1263441-4-phi...@redhat.com>
>> ---
>>   tests/qtest/libqos/libqtest.h |  8 
>>   tests/qtest/libqtest.c    | 29 +
>>   2 files changed, 37 insertions(+)

>>   +bool qtest_has_accel(const char *accel_name)
>> +{
>> +    bool has_accel = false;
>> +    QDict *response;
>> +    QList *accels;
>> +    QListEntry *accel;
>> +    QTestState *qts;
>> +
>> +    qts = qtest_initf("-accel qtest -machine none");
>> +    response = qtest_qmp(qts, "{'execute': 'query-accels'}");
>> +    accels = qdict_get_qlist(response, "return");
>> +
>> +    QLIST_FOREACH_ENTRY(accels, accel) {
>> +    QDict *accel_dict = qobject_to(QDict, qlist_entry_obj(accel));
>> +    const char *name = qdict_get_str(accel_dict, "name");
>> +
>> +    if (g_str_equal(name, accel_name)) {
>> +    has_accel = true;
>> +    break;
>> +    }
>> +    }
>> +    qobject_unref(response);
>> +
>> +    qtest_quit(qts);
>> +
>> +    return has_accel;
>> +}
> 
> This spawns a new instance of QEMU each time the function is called -
> which could slow down testing quite a bit if a test calls this function
> quite often. Would it be feasible to cache this information, so that you
> only have to run a new instance of QEMU once?

Good idea!

Re: [PULL 00/12] Machine and OS X changes for 2021-06-08

2021-06-08 Thread Daniel P . Berrangé

On Tue, Jun 08, 2021 at 11:40:05AM +0200, Paolo Bonzini wrote:
> The following changes since commit 6f398e533f5e259b4f937f4aa9de970f7201d166:
> 
>   Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-tcg-20210604' 
> into staging (2021-06-05 11:25:52 +0100)
> 
> are available in the Git repository at:
> 
>   https://gitlab.com/bonzini/qemu.git tags/for-upstream
> 
> for you to fetch changes up to 8f9f729185e3ac8d3c5a65d81eb9e74e229901ea:
> 
>   vnc: avoid deprecation warnings for SASL on OS X (2021-06-07 10:20:23 -0400)
> 
> 
> * introduce "-M smp" (myself)
> * avoid deprecation warnings for SASL on macOS 10.11 or newer.
> 
> 
> Paolo Bonzini (12):
>   qom: export more functions for use with non-UserCreatable objects
>   keyval: introduce keyval_merge
>   keyval: introduce keyval_parse_into
>   vl: switch -M parsing to keyval
>   qemu-option: remove now-dead code
>   machine: move dies from X86MachineState to CpuTopology
>   machine: move common smp_parse code to caller
>   machine: add error propagation to mc->smp_parse
>   machine: pass QAPI struct to mc->smp_parse
>   machine: reject -smp dies!=1 for non-PC machines
>   machine: add smp compound property
>   vnc: avoid deprecation warnings for SASL on OS X

None of these changes have any reviewed-by tags. Was this really meant
to be sent as a PULL before getting reviews ?

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v3 13/33] block/nbd: introduce nbd_client_connection_release()

2021-06-08 Thread Vladimir Sementsov-Ogievskiy


03.06.2021 00:27, Eric Blake wrote:

On Fri, Apr 16, 2021 at 11:08:51AM +0300, Vladimir Sementsov-Ogievskiy wrote:

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  block/nbd.c | 43 ++-
  1 file changed, 26 insertions(+), 17 deletions(-)


Commit message said what, but not why.  Presumably this is one more
bit of refactoring to make the upcoming file split in a later patch
easier.  But patch 12/33 said it was the last step before a new file,
and this patch isn't yet at a new file.  Missing some continuity in
your commit messages?



diff --git a/block/nbd.c b/block/nbd.c
index 21a4039359..8531d019b2 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -118,7 +118,7 @@ typedef struct BDRVNBDState {
  NBDClientConnection *conn;
  } BDRVNBDState;
  
-static void nbd_free_connect_thread(NBDClientConnection *conn);

+static void nbd_client_connection_release(NBDClientConnection *conn);


Is it necessary for a forward declaration, or can you just implement
the new function prior to its users?



Actually, otherwise we'll need a forward declaration for 
nbd_client_connection_do_free(). Anyway, this all doesn't make real sense 
before moving to separate file


--
Best regards,
Vladimir

Re: [PATCH v4 0/8] GICv3 LPI and ITS feature implementation

2021-06-08 Thread Peter Maydell

On Wed, 2 Jun 2021 at 19:00, Shashi Mallela  wrote:
>
> This patchset implements qemu device model for enabling physical
> LPI support and ITS functionality in GIC as per GICv3 specification.
> Both flat table and 2 level tables are implemented.The ITS commands
> for adding/deleting ITS table entries,trigerring LPI interrupts are
> implemented.Translated LPI interrupt ids are processed by redistributor
> to determine priority and set pending state appropriately before
> forwarding the same to cpu interface.
> The ITS feature support has been added to sbsa-ref platform as well as
> virt platform,wherein the emulated functionality co-exists with kvm
> kernel functionality.
>
> Changes in v4:
>  - review comments addressed
>  - redesigned the lpi pending priority determination logic to scan
>LPI pending table and config table right after lpi pending
>statechanges(SET/RESET) through gicv3_redist_update_lpi() call to
>determinethe highest priority lpi among the active lpis and save
>the details.The high priority interrupt determination logic in
>redistributor now usesthe saved high priority lpi details
>(alongside other interrupt types) instead of calling
>gicv3_redist_update_lpi() everytime(as in v3).This
>significantly reduces the call overhead associated with
>address_space_read of lpi config and pending tables.
>Testing with this new design showed no boot delays.
>  - profiled execution of gicv3_redist_update_lpi() using perf and
>framegraph to confirm execution is within normal limits.
>Also,specifically measured execution time to be an average of 175us
>with linux distro testing.
>  - All kvm_unit_tests PASS

This still fails to build with clang, in the same way as v3 failed.
Also (as noted in my other email, you need to integrate the updates
to the ACPI table test data into this series; 'make' and 'make check'
should work at every step in the patch series.

thanks
-- PMM

Re: [PATCH v4 1/8] hw/intc: GICv3 ITS initial framework

2021-06-08 Thread Peter Maydell

On Wed, 2 Jun 2021 at 19:00, Shashi Mallela  wrote:
>
> Added register definitions relevant to ITS,implemented overall
> ITS device framework with stubs for ITS control and translater
> regions read/write,extended ITS common to handle mmio init between
> existing kvm device and newer qemu device.
>
> Signed-off-by: Shashi Mallela 
> ---
>  hw/intc/arm_gicv3_its.c| 240 +
>  hw/intc/arm_gicv3_its_common.c |   8 +-
>  hw/intc/arm_gicv3_its_kvm.c|   2 +-
>  hw/intc/gicv3_internal.h   |  88 +++--
>  hw/intc/meson.build|   1 +
>  include/hw/intc/arm_gicv3_its_common.h |   9 +-
>  6 files changed, 331 insertions(+), 17 deletions(-)
>  create mode 100644 hw/intc/arm_gicv3_its.c> @@ -129,7 +132,6 @@ static void 
> gicv3_its_common_reset(DeviceState *dev)

>  s->cbaser = 0;
>  s->cwriter = 0;
>  s->creadr = 0;
> -s->iidr = 0;

You don't need to delete this -- leave it for the benefit of the KVM code.

Otherwise
Reviewed-by: Peter Maydell 

thanks
-- PMM

Re: [PATCH v3 09/33] block/nbd: bs-independent interface for nbd_co_establish_connection()

2021-06-08 Thread Vladimir Sementsov-Ogievskiy


02.06.2021 22:14, Eric Blake wrote:

On Fri, Apr 16, 2021 at 11:08:47AM +0300, Vladimir Sementsov-Ogievskiy wrote:

We are going to split connection code to separate file. Now we are


to a separate


ready to give nbd_co_establish_connection() clean and bs-independent
interface.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Roman Kagan 
---
  block/nbd.c | 49 +++--
  1 file changed, 31 insertions(+), 18 deletions(-)




-static int coroutine_fn
-nbd_co_establish_connection(BlockDriverState *bs, Error **errp)
+/*
+ * Get a new connection in context of @thr:
+ *   if thread is running, wait for completion


if the thread is running,...


+ *   if thread is already succeeded in background, and user didn't get the


if the thread already succeeded in the background,...


+ * result, just return it now
+ *   otherwise if thread is not running, start a thread and wait for completion


otherwise, the thread is not running, so start...


+ */
+static coroutine_fn QIOChannelSocket *
+nbd_co_establish_connection(NBDConnectThread *thr, Error **errp)
  {
+QIOChannelSocket *sioc = NULL;
  QemuThread thread;
-BDRVNBDState *s = bs->opaque;
-NBDConnectThread *thr = s->connect_thread;
-
-assert(!s->sioc);
  
  qemu_mutex_lock(&thr->mutex);
  
+/*

+ * Don't call nbd_co_establish_connection() in several coroutines in
+ * parallel. Only one call at once is supported.
+ */
+assert(!thr->wait_co);
+
  if (!thr->running) {
  if (thr->sioc) {
  /* Previous attempt finally succeeded in background */
-goto out;
+sioc = g_steal_pointer(&thr->sioc);
+qemu_mutex_unlock(&thr->mutex);


Worth using QEMU_LOCK_GUARD() here?


Refactored together with other critical sections in patch 15




+
+return sioc;
  }
+
  thr->running = true;
  error_free(thr->err);
  thr->err = NULL;


Reviewed-by: Eric Blake 




--
Best regards,
Vladimir

Re: [PATCH v3 17/33] nbd/client-connection: implement connection retry

2021-06-08 Thread Vladimir Sementsov-Ogievskiy


11.05.2021 23:54, Roman Kagan wrote:

On Fri, Apr 16, 2021 at 11:08:55AM +0300, Vladimir Sementsov-Ogievskiy wrote:

Add an option for thread to retry connection until success. We'll use
nbd/client-connection both for reconnect and for initial connection in
nbd_open(), so we need a possibility to use same NBDClientConnection
instance to connect once in nbd_open() and then use retry semantics for
reconnect.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  include/block/nbd.h |  2 ++
  nbd/client-connection.c | 55 +
  2 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index 5d86e6a393..5bb54d831c 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -409,6 +409,8 @@ const char *nbd_err_lookup(int err);
  /* nbd/client-connection.c */
  typedef struct NBDClientConnection NBDClientConnection;
  
+void nbd_client_connection_enable_retry(NBDClientConnection *conn);

+
  NBDClientConnection *nbd_client_connection_new(const SocketAddress *saddr,
 bool do_negotiation,
 const char *export_name,
diff --git a/nbd/client-connection.c b/nbd/client-connection.c
index ae4a77f826..002bd91f42 100644
--- a/nbd/client-connection.c
+++ b/nbd/client-connection.c
@@ -36,6 +36,8 @@ struct NBDClientConnection {
  NBDExportInfo initial_info;
  bool do_negotiation;
  
+bool do_retry;

+
  /*
   * Result of last attempt. Valid in FAIL and SUCCESS states.
   * If you want to steal error, don't forget to set pointer to NULL.
@@ -52,6 +54,15 @@ struct NBDClientConnection {
  Coroutine *wait_co; /* nbd_co_establish_connection() wait in yield() */
  };
  
+/*

+ * The function isn't protected by any mutex, so call it when thread is not
+ * running.
+ */
+void nbd_client_connection_enable_retry(NBDClientConnection *conn)
+{
+conn->do_retry = true;
+}
+
  NBDClientConnection *nbd_client_connection_new(const SocketAddress *saddr,
 bool do_negotiation,
 const char *export_name,
@@ -144,24 +155,37 @@ static void *connect_thread_func(void *opaque)
  NBDClientConnection *conn = opaque;
  bool do_free;
  int ret;
+uint64_t timeout = 1;
+uint64_t max_timeout = 16;
+
+while (true) {
+conn->sioc = qio_channel_socket_new();
+
+error_free(conn->err);
+conn->err = NULL;
+conn->updated_info = conn->initial_info;
+
+ret = nbd_connect(conn->sioc, conn->saddr,
+  conn->do_negotiation ? &conn->updated_info : NULL,
+  conn->tlscreds, &conn->ioc, &conn->err);
+conn->updated_info.x_dirty_bitmap = NULL;
+conn->updated_info.name = NULL;
+
+if (ret < 0) {
+object_unref(OBJECT(conn->sioc));
+conn->sioc = NULL;
+if (conn->do_retry) {
+sleep(timeout);
+if (timeout < max_timeout) {
+timeout *= 2;
+}
+continue;
+}
+}


How is it supposed to get canceled?



Next commit does it
 

-conn->sioc = qio_channel_socket_new();
-
-error_free(conn->err);
-conn->err = NULL;
-conn->updated_info = conn->initial_info;
-
-ret = nbd_connect(conn->sioc, conn->saddr,
-  conn->do_negotiation ? &conn->updated_info : NULL,
-  conn->tlscreds, &conn->ioc, &conn->err);
-if (ret < 0) {
-object_unref(OBJECT(conn->sioc));
-conn->sioc = NULL;
+break;
  }
  
-conn->updated_info.x_dirty_bitmap = NULL;

-conn->updated_info.name = NULL;
-
  WITH_QEMU_LOCK_GUARD(&conn->mutex) {
  assert(conn->running);
  conn->running = false;
@@ -172,7 +196,6 @@ static void *connect_thread_func(void *opaque)
  do_free = conn->detached;
  }
  
-

  if (do_free) {
  nbd_client_connection_do_free(conn);
  }
--
2.29.2




--
Best regards,
Vladimir

Re: [PATCH v4 2/8] hw/intc: GICv3 ITS register definitions added

2021-06-08 Thread Peter Maydell

On Wed, 2 Jun 2021 at 19:00, Shashi Mallela  wrote:
>
> Defined descriptors for ITS device table,collection table and ITS
> command queue entities.Implemented register read/write functions,
> extract ITS table parameters and command queue parameters,extended
> gicv3 common to capture qemu address space(which host the ITS table
> platform memories required for subsequent ITS processing) and
> initialize the same in ITS device.
>
> Signed-off-by: Shashi Mallela 

> @@ -41,7 +192,73 @@ static MemTxResult its_writel(GICv3ITSState *s, hwaddr 
> offset,
>uint64_t value, MemTxAttrs attrs)
>  {
>  MemTxResult result = MEMTX_OK;
> +int index;
>
> +switch (offset) {
> +case GITS_CTLR:
> +s->ctlr |= (value & ~(s->ctlr));
> +
> +if (s->ctlr & ITS_CTLR_ENABLED) {
> +extract_table_params(s);
> +extract_cmdq_params(s);
> +s->creadr = 0;
> +}
> +break;
> +case GITS_CBASER:
> +/*
> + * IMPDEF choice:- GITS_CBASER register becomes RO if ITS is
> + * already enabled
> + */
> +if (!(s->ctlr & ITS_CTLR_ENABLED)) {
> +s->cbaser = deposit64(s->cbaser, 0, 32, value);
> +s->creadr = 0;
> +}
> +break;
> +case GITS_CBASER + 4:
> +/*
> + * IMPDEF choice:- GITS_CBASER register becomes RO if ITS is
> + * already enabled
> + */
> +if (!(s->ctlr & ITS_CTLR_ENABLED)) {
> +s->cbaser = deposit64(s->cbaser, 32, 32, value);
> +}
> +break;
> +case GITS_CWRITER:
> +s->cwriter = deposit64(s->cwriter, 0, 32,
> +   (value & ~R_GITS_CWRITER_RETRY_MASK));
> +break;
> +case GITS_CWRITER + 4:
> +s->cwriter = deposit64(s->cwriter, 32, 32,
> +   (value & ~R_GITS_CWRITER_RETRY_MASK));

The RETRY bit is at the bottom of the 64-bit register, so you
don't want to mask with it when we're writing the top 32 bits
(otherwise you incorrectly clear bit 33 of the full 64-bit register).

> +break;
> +case GITS_BASER ... GITS_BASER + 0x3f:
> +/*
> + * IMPDEF choice:- GITS_BASERn register becomes RO if ITS is
> + * already enabled
> + */
> +if (!(s->ctlr & ITS_CTLR_ENABLED)) {
> +index = (offset - GITS_BASER) / 8;
> +
> +if (offset & 7) {
> +s->baser[index] = deposit64(s->baser[index], 32, 32,
> +(value & ~GITS_BASER_VAL_MASK));
> +} else {
> +s->baser[index] = deposit64(s->baser[index], 0, 32,
> +(value & ~GITS_BASER_VAL_MASK));
> +}

This has two problems:
(1) same as above, you're masking a 32-bit half-value with a MASK
constant that's for the full 64-bit value
(2) here (unlike with CWRITER) we don't want to clear the non-writeable
bits but leave them alone.

Something like this should work:

   if (offset & 7) {
   value <<= 32;
   value &= ~GITS_BASER_VAL_MASK;
   s->baser[index] &= GITS_BASER_VAL_MASK |
MAKE_64BIT_MASK(0, 32);
   s->baser[index] |= value;
} else {
   value &= ~GITS_BASER_VAL_MASK;
   s->baser[index] &= GITS_BASER_VAL_MASK |
MAKE_64BIT_MASK(32, 32);
   s->baser[index] |= value;
}

> +}
> +break;
> +case GITS_IIDR:
> +case GITS_IDREGS ... GITS_IDREGS + 0x2f:
> +/* RO registers, ignore the write */
> +qemu_log_mask(LOG_GUEST_ERROR,
> +  "%s: invalid guest write to RO register at offset "
> +  TARGET_FMT_plx "\n", __func__, offset);
> +break;
> +default:
> +result = MEMTX_ERROR;
> +break;
> +}
>  return result;
>  }

> @@ -57,7 +322,42 @@ static MemTxResult its_writell(GICv3ITSState *s, hwaddr 
> offset,
> uint64_t value, MemTxAttrs attrs)
>  {
>  MemTxResult result = MEMTX_OK;
> +int index;
>
> +switch (offset) {
> +case GITS_BASER ... GITS_BASER + 0x3f:
> +/*
> + * IMPDEF choice:- GITS_BASERn register becomes RO if ITS is
> + * already enabled
> + */
> +if (!(s->ctlr & ITS_CTLR_ENABLED)) {
> +index = (offset - GITS_BASER) / 8;
> +s->baser[index] |= (value & ~GITS_BASER_VAL_MASK);

This will allow the guest to write a 1 to a writeable bit,
but will not allow it to write a 0 again...
 s->baser[index] &= GITS_BASER_VAL_MASK;
 s->baser[index] |= (value & ~GITS_BASER_VAL_MASK);

Why VAL_MASK, by the way? The mask is defining the set of read-only bits,
so RO_MASK seems like a clearer name.

> +}
> +break;
> +case GIT

Re: [PATCH v3 17/33] nbd/client-connection: implement connection retry

2021-06-08 Thread Vladimir Sementsov-Ogievskiy


03.06.2021 20:49, Vladimir Sementsov-Ogievskiy wrote:

03.06.2021 19:17, Eric Blake wrote:

On Fri, Apr 16, 2021 at 11:08:55AM +0300, Vladimir Sementsov-Ogievskiy wrote:

Add an option for thread to retry connection until success. We'll use


for a thread to retry connection until it succeeds.


nbd/client-connection both for reconnect and for initial connection in
nbd_open(), so we need a possibility to use same NBDClientConnection
instance to connect once in nbd_open() and then use retry semantics for
reconnect.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  include/block/nbd.h |  2 ++
  nbd/client-connection.c | 55 +
  2 files changed, 41 insertions(+), 16 deletions(-)

+++ b/nbd/client-connection.c
@@ -36,6 +36,8 @@ struct NBDClientConnection {
  NBDExportInfo initial_info;
  bool do_negotiation;
+    bool do_retry;
+
  /*
   * Result of last attempt. Valid in FAIL and SUCCESS states.
   * If you want to steal error, don't forget to set pointer to NULL.
@@ -52,6 +54,15 @@ struct NBDClientConnection {
  Coroutine *wait_co; /* nbd_co_establish_connection() wait in yield() */
  };
+/*
+ * The function isn't protected by any mutex, so call it when thread is not


so only call it when the thread is not yet running

or maybe even

only call it when the client connection attempt has not yet started


+ * running.
+ */
+void nbd_client_connection_enable_retry(NBDClientConnection *conn)
+{
+    conn->do_retry = true;
+}
+
  NBDClientConnection *nbd_client_connection_new(const SocketAddress *saddr,
 bool do_negotiation,
 const char *export_name,
@@ -144,24 +155,37 @@ static void *connect_thread_func(void *opaque)
  NBDClientConnection *conn = opaque;
  bool do_free;
  int ret;
+    uint64_t timeout = 1;
+    uint64_t max_timeout = 16;
+
+    while (true) {
+    conn->sioc = qio_channel_socket_new();
+
+    error_free(conn->err);
+    conn->err = NULL;
+    conn->updated_info = conn->initial_info;
+
+    ret = nbd_connect(conn->sioc, conn->saddr,
+  conn->do_negotiation ? &conn->updated_info : NULL,
+  conn->tlscreds, &conn->ioc, &conn->err);
+    conn->updated_info.x_dirty_bitmap = NULL;
+    conn->updated_info.name = NULL;


I'm not quite sure I follow the allocation here: if we passed in
&conn->updated_info which got modified in-place by nbd_connect, then
are we risking a memory leak by ignoring the x_dirty_bitmap and name
set by that call?


Yes, that looks strange :\. Will check when prepare new version and fix or 
leave a comment here.


x_dirty_bitmap and name are not set by nbd_connect, they are IN parameters of 
nbd_receive_negotiate(). Their allocations are owned by conn->initial_info. So, 
here we've copied pointers into conn->updated_info. And then zero out them, when 
they are not needed anymore (and actually, to not store them and not finally return 
to the user our internal allocations). I'll add a comment here.






+
+    if (ret < 0) {
+    object_unref(OBJECT(conn->sioc));
+    conn->sioc = NULL;
+    if (conn->do_retry) {
+    sleep(timeout);


This is a bare sleep in a function not marked as coroutine_fn.  Do we
need to instead use coroutine sleep for better response to an early
exit if initialization is taking too long?


We are in a separate, by-hand created thread, which knows nothing about 
coroutines, iothreads, aio contexts etc.. I think bare sleep is what should be 
here.




+    if (timeout < max_timeout) {
+    timeout *= 2;
+    }
+    continue;
+    }
+    }
-    conn->sioc = qio_channel_socket_new();
-
-    error_free(conn->err);
-    conn->err = NULL;
-    conn->updated_info = conn->initial_info;
-
-    ret = nbd_connect(conn->sioc, conn->saddr,
-  conn->do_negotiation ? &conn->updated_info : NULL,
-  conn->tlscreds, &conn->ioc, &conn->err);
-    if (ret < 0) {
-    object_unref(OBJECT(conn->sioc));
-    conn->sioc = NULL;
+    break;
  }
-    conn->updated_info.x_dirty_bitmap = NULL;
-    conn->updated_info.name = NULL;
-
  WITH_QEMU_LOCK_GUARD(&conn->mutex) {
  assert(conn->running);
  conn->running = false;
@@ -172,7 +196,6 @@ static void *connect_thread_func(void *opaque)
  do_free = conn->detached;
  }
-
  if (do_free) {
  nbd_client_connection_do_free(conn);


Spurious hunk?



wull drop




--
Best regards,
Vladimir

Re: [PATCH v4 3/8] hw/intc: GICv3 ITS command queue framework

2021-06-08 Thread Peter Maydell

On Wed, 2 Jun 2021 at 19:00, Shashi Mallela  wrote:
>
> Added functionality to trigger ITS command queue processing on
> write to CWRITE register and process each command queue entry to
> identify the command type and handle commands like MAPD,MAPC,SYNC.
>
> Signed-off-by: Shashi Mallela 
> ---
>  hw/intc/arm_gicv3_its.c  | 295 +++
>  hw/intc/gicv3_internal.h |  37 +
>  2 files changed, 332 insertions(+)

> +if ((icid > s->ct.max_collids) || (rdbase > s->gicv3->num_cpu)) {
> +qemu_log_mask(LOG_GUEST_ERROR,
> +  "ITS MAPC: invalid collection table attributes "
> +  "icid %d rdbase %lu\n",  icid, rdbase);
> +/*
> + * in this implementation,in case of error

Still missing space after comma.

> + * we ignore this command and move onto the next
> + * command in the queue
> + */
> +} else {
> +res = update_cte(s, icid, valid, rdbase);
> +}
> +
> +return res;
> +}


> +} else {
> +/*
> + * in this implementation,in case of dma read/write error
> + * we stall the command processing
> + */

Ditto.

> +s->creadr = FIELD_DP64(s->creadr, GITS_CREADR, STALLED, 1);
> +qemu_log_mask(LOG_GUEST_ERROR,
> +  "%s: %x cmd processing failed!!\n", __func__, cmd);

The double-exclamation marks are unnecessary :-)

> +break;
> +}
> +}
> +}

Otherwise

Reviewed-by: Peter Maydell 

thanks
-- PMM

Re: [RFC PATCH 00/11] hw/nvme: reimplement all multi-aio commands with custom aiocbs

2021-06-08 Thread Stefan Hajnoczi

On Fri, Jun 04, 2021 at 08:52:26AM +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> This series reimplements flush, dsm, copy, zone reset and format nvm to
> allow cancellation. I posted an RFC back in March ("hw/block/nvme:
> convert ad-hoc aio tracking to aiocb") and I've applied some feedback
> from Stefan and reimplemented the remaining commands.
> 
> The basic idea is to define custom AIOCBs for these commands. The custom
> AIOCB takes care of issuing all the "nested" AIOs one by one instead of
> blindly sending them off simultaneously without tracking the returned
> aiocbs.
> 
> I've kept the RFC since I'm still new to using the block layer like
> this. I was hoping that Stefan could find some time to look over this -
> this is a huge series, so I don't expect non-nvme folks to spend a large
> amount of time on it, but I would really like feedback on my approach in
> the reimplementation of flush and format. Those commands are special in
> that may issue AIOs to multiple namespaces and thus, to multiple block
> backends. Since this device does not support iothreads, I've opted for
> simply always returning the main loop aio context, but I wonder if this
> is acceptable or not. It might be the case that this should contain an
> assert of some kind, in case someone starts adding iothread support.

This approach looks fine to me. Vladimir mentioned coroutines, which
have simpler code for sequential I/O, but don't support cancellation.
Since cancellation is the point of this series I think sticking to the
aio approach makes sense.

Regarding coroutine cancellation, it's a hard to add since there is
already a lot of coroutine code that's not written with cancellation in
mind.

I think I would approach it by adding a .cancel_cb() field to Coroutine
that does nothing by default (because existing code doesn't support
cancellation and we must wait for the operation to complete). Cases the
do support cancellation would install a .cancel_cb() across yield that
causes the operation that coroutine is waiting on to complete early.

An alternative approach is to re-enter the coroutine, but this requires
all yield points in QEMU to check for cancellation. I don't think this
is practical because converting all the code would be hard.

Anyway, the aio approach looks fine.

Stefan

signature.asc
Description: PGP signature

Re: [PATCH v8 02/12] accel: Introduce 'query-accels' QMP command

2021-06-08 Thread Philippe Mathieu-Daudé

On 6/3/21 7:19 PM, Alex Bennée wrote:
> 
> Philippe Mathieu-Daudé  writes:
> 
>> Introduce the 'query-accels' QMP command which returns a list
>> of built-in accelerator names.
>>
>> - Accelerator is a QAPI enum of all existing accelerators,
>>
>> - AcceleratorInfo is a QAPI structure providing accelerator
>>   specific information. Currently the common structure base
>>   provides the name of the accelerator, while the specific
>>   part is empty, but each accelerator can expand it.
>>
>> - 'query-accels' QMP command returns a list of @AcceleratorInfo
>>
>> For example on a KVM-only build we get:
>>
>> { "execute": "query-accels" }
>> {
>> "return": [
>> {
>> "name": "qtest"
>> },
>> {
>> "name": "kvm"
>> }
>> ]
>> }
>>
>> Reviewed-by: Eric Blake 
>> Reviewed-by: Alex Bennée 
>> Tested-by: Alex Bennée 
>> Signed-off-by: Philippe Mathieu-Daudé 
>> ---
>> v8:
>> - Include code snippet from Markus adding to machine-target.json
>>   to be able to use enum values or union branches conditional.
>> - Use accel_find() on enum to be sure the accelerator is enabled
>>   at runtime (chat with jsnow / eblake).
> 
> Hmm something broke because now I get:
> 
>  /usr/lib/x86_64-linux-gnu/libpixman-1.so -lgthread-2.0 -lglib-2.0 -lstdc++ 
> -Wl,--end-group
> /usr/bin/ld: libqemu-aarch64_be-linux-user.fa.p/accel_accel-qmp.c.o: in 
> function `qmp_query_accels':
> /home/alex/lsrc/qemu.git/builds/arm.all/../../accel/accel-qmp.c:15: undefined 
> reference to `Accelerator_lookup'
> collect2: error: ld returned 1 exit status
> [1327/1413] Linking target qemu-io

Sorry I missed that for user-mode, will be fixed in v9.

Re: [PATCH 8/9] virtiofsd: Optionally fill lo_inode.fhandle

2021-06-08 Thread Dr. David Alan Gilbert

* Max Reitz (mre...@redhat.com) wrote:
> When the inode_file_handles option is set, try to generate a file handle
> for new inodes instead of opening an O_PATH FD.
> 
> Being able to open these again will require CAP_DAC_READ_SEARCH, so the
> description text tells the user they will also need to specify
> -o modcaps=+dac_read_search.
> 
> Generating a file handle returns the mount ID it is valid for.  Opening
> it will require an FD instead.  We have mount_fds to map an ID to an FD.
> get_file_handle() fills the hash map by opening the file we have
> generated a handle for.  To verify that the resulting FD indeed
> represents the handle's mount ID, we use statx().  Therefore, using file
> handles requires statx() support.
> 
> Signed-off-by: Max Reitz 
> ---
>  tools/virtiofsd/helper.c  |   3 +
>  tools/virtiofsd/passthrough_ll.c  | 170 --
>  tools/virtiofsd/passthrough_seccomp.c |   1 +
>  3 files changed, 165 insertions(+), 9 deletions(-)
> 
> diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
> index 5e98ed702b..954f8639e6 100644
> --- a/tools/virtiofsd/helper.c
> +++ b/tools/virtiofsd/helper.c
> @@ -186,6 +186,9 @@ void fuse_cmdline_help(void)
> "   to virtiofsd from guest 
> applications.\n"
> "   default: no_allow_direct_io\n"
> "-o announce_submounts  Announce sub-mount points to the 
> guest\n"
> +   "-o inode_file_handles  Use file handles to reference 
> inodes\n"
> +   "   instead of O_PATH file 
> descriptors\n"
> +   "   (requires -o 
> modcaps=+dac_read_search)\n"
> );
>  }
>  
> diff --git a/tools/virtiofsd/passthrough_ll.c 
> b/tools/virtiofsd/passthrough_ll.c
> index 793d2c333e..d01f9d3a59 100644
> --- a/tools/virtiofsd/passthrough_ll.c
> +++ b/tools/virtiofsd/passthrough_ll.c
> @@ -190,6 +190,7 @@ struct lo_data {
>  /* An O_PATH file descriptor to /proc/self/fd/ */
>  int proc_self_fd;
>  int user_killpriv_v2, killpriv_v2;
> +int inode_file_handles;
>  };
>  
>  /**
> @@ -244,6 +245,10 @@ static const struct fuse_opt lo_opts[] = {
>  { "announce_submounts", offsetof(struct lo_data, announce_submounts), 1 
> },
>  { "killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 1 },
>  { "no_killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 0 },
> +{ "inode_file_handles", offsetof(struct lo_data, inode_file_handles), 1 
> },
> +{ "no_inode_file_handles",
> +  offsetof(struct lo_data, inode_file_handles),
> +  0 },
>  FUSE_OPT_END
>  };
>  static bool use_syslog = false;
> @@ -315,6 +320,108 @@ static int temp_fd_steal(TempFd *temp_fd)
>  }
>  }
>  
> +/**
> + * Generate a file handle for the given dirfd/name combination.
> + *
> + * If mount_fds does not yet contain an entry for the handle's mount
> + * ID, (re)open dirfd/name in O_RDONLY mode and add it to mount_fds
> + * as the FD for that mount ID.  (That is the file that we have
> + * generated a handle for, so it should be representative for the
> + * mount ID.  However, to be sure (and to rule out races), we use
> + * statx() to verify that our assumption is correct.)
> + */
> +static struct lo_fhandle *get_file_handle(struct lo_data *lo,
> +  int dirfd, const char *name)
> +{
> +/* We need statx() to verify the mount ID */
> +#if defined(CONFIG_STATX) && defined(STATX_MNT_ID)
> +struct lo_fhandle *fh;
> +int ret;
> +
> +if (!lo->use_statx || !lo->inode_file_handles) {
> +return NULL;
> +}
> +
> +fh = g_new0(struct lo_fhandle, 1);
> +
> +fh->handle.handle_bytes = sizeof(fh->padding) - sizeof(fh->handle);
> +ret = name_to_handle_at(dirfd, name, &fh->handle, &fh->mount_id,
> +AT_EMPTY_PATH);
> +if (ret < 0) {
> +goto fail;
> +}
> +
> +if (pthread_rwlock_rdlock(&mount_fds_lock)) {
> +goto fail;
> +}
> +if (!g_hash_table_contains(mount_fds, GINT_TO_POINTER(fh->mount_id))) {
> +struct statx stx;
> +int fd;
> +
> +pthread_rwlock_unlock(&mount_fds_lock);
> +
> +if (name[0]) {
> +fd = openat(dirfd, name, O_RDONLY);

But can't that be a device file or other special file that you must not
open?

Dave

> +} else {
> +char procname[64];
> +snprintf(procname, sizeof(procname), "%i", dirfd);
> +fd = openat(lo->proc_self_fd, procname, O_RDONLY);
> +}
> +if (fd < 0) {
> +goto fail;
> +}
> +
> +ret = statx(fd, "", AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW,
> +STATX_MNT_ID, &stx);
> +if (ret < 0) {
> +if (errno == ENOSYS) {
> +lo->use_statx = false;
> +fuse_log(FUSE_LOG_WARNING,
> + "st

Re: [PATCH v4 4/8] hw/intc: GICv3 ITS Command processing

2021-06-08 Thread Peter Maydell

On Wed, 2 Jun 2021 at 19:00, Shashi Mallela  wrote:
>
> Added ITS command queue handling for MAPTI,MAPI commands,handled ITS
> translation which triggers an LPI via INT command as well as write
> to GITS_TRANSLATER register,defined enum to differentiate between ITS
> command interrupt trigger and GITS_TRANSLATER based interrupt trigger.
> Each of these commands make use of other functionalities implemented to
> get device table entry,collection table entry or interrupt translation
> table entry required for their processing.
>
> Signed-off-by: Shashi Mallela 
> ---
>  hw/intc/arm_gicv3_its.c| 334 +
>  hw/intc/gicv3_internal.h   |  12 ++
>  include/hw/intc/arm_gicv3_common.h |   2 +
>  3 files changed, 348 insertions(+)
>
> diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
> index 6551c577b3..82bb5b84ef 100644
> --- a/hw/intc/arm_gicv3_its.c
> +++ b/hw/intc/arm_gicv3_its.c
> @@ -28,6 +28,13 @@ struct GICv3ITSClass {
>  void (*parent_reset)(DeviceState *dev);
>  };
>
> +typedef enum ItsCmdType {
> +NONE = 0, /* internal indication for GITS_TRANSLATER write */
> +CLEAR = 1,
> +DISCARD = 2,
> +INT = 3,
> +} ItsCmdType;
> +
>  static uint64_t baser_base_addr(uint64_t value, uint32_t page_sz)
>  {
>  uint64_t result = 0;
> @@ -49,6 +56,315 @@ static uint64_t baser_base_addr(uint64_t value, uint32_t 
> page_sz)
>  return result;
>  }
>
> +static bool get_cte(GICv3ITSState *s, uint16_t icid, uint64_t *cte,
> +MemTxResult *res)
> +{
> +AddressSpace *as = &s->gicv3->dma_as;
> +uint64_t l2t_addr;
> +uint64_t value;
> +bool valid_l2t;
> +uint32_t l2t_id;
> +uint32_t max_l2_entries;
> +bool status = false;
> +
> +if (s->ct.indirect) {
> +l2t_id = icid / (s->ct.page_sz / L1TABLE_ENTRY_SIZE);
> +
> +value = address_space_ldq_le(as,
> + s->ct.base_addr +
> + (l2t_id * L1TABLE_ENTRY_SIZE),
> + MEMTXATTRS_UNSPECIFIED, res);
> +
> +if (*res == MEMTX_OK) {
> +valid_l2t = (value >> VALID_SHIFT) & VALID_MASK;

VALID_MASK should be the mask in its shifted location
(for consistency with how the FIELD macros do it). Then
this is just
   valid_l2t = (value & VALID_MASK) != 0;

> +
> +if (valid_l2t) {
> +max_l2_entries = s->ct.page_sz / s->ct.entry_sz;
> +
> +l2t_addr = value & ((1ULL << 51) - 1);
> +
> +*cte =  address_space_ldq_le(as, l2t_addr +
> +((icid % max_l2_entries) * 
> GITS_CTE_SIZE),
> +MEMTXATTRS_UNSPECIFIED, res);
> +   }
> +   }
> +} else {
> +/* Flat level table */
> +*cte =  address_space_ldq_le(as, s->ct.base_addr +
> + (icid * GITS_CTE_SIZE),
> +  MEMTXATTRS_UNSPECIFIED, res);
> +}
> +
> +if (*cte & VALID_MASK) {
> +status = true;
> +}
> +
> +return status;

You don't need the 'status' variable, you can just
 return (*cte & VALID_MASK) != 0;

(Looks like this code is already assuming VALID_MASK is the mask
in its shifted location, and so inconsistent with your current definition ?)

> +static bool get_ite(GICv3ITSState *s, uint32_t eventid, uint64_t dte,
> +uint16_t *icid, uint32_t *pIntid, MemTxResult *res)
> +{
> +AddressSpace *as = &s->gicv3->dma_as;
> +uint64_t itt_addr;
> +bool status = false;
> +uint64_t itel = 0;
> +uint32_t iteh = 0;
> +
> +itt_addr = (dte >> 6ULL) & ITTADDR_MASK;
> +itt_addr <<= ITTADDR_SHIFT; /* 256 byte aligned */
> +
> +itel = address_space_ldq_le(as, itt_addr + (eventid * sizeof(uint64_t)),
> +MEMTXATTRS_UNSPECIFIED, res);
> +
> +if (*res == MEMTX_OK) {
> +iteh = address_space_ldl_le(as, itt_addr + ((eventid +
> +sizeof(uint64_t)) * sizeof(uint32_t)),
> +MEMTXATTRS_UNSPECIFIED, res);
> +
> +if (*res == MEMTX_OK) {
> +if (itel & VALID_MASK) {
> +if ((itel >> ITE_ENTRY_INTTYPE_SHIFT) & GITS_TYPE_PHYSICAL) {
> +*pIntid = (itel >> ITE_ENTRY_INTID_SHIFT) &
> +   ITE_ENTRY_INTID_MASK;

More _MASK constants that don't have the same semantics as the
registerfields versions. Please can you change all of these ?

> +*icid = iteh & ITE_ENTRY_ICID_MASK;
> +status = true;
> +}
> +}
> +}
> +}
> +return status;
> +}
> +
> +if ((devid > s->dt.max_devids) || !dte_valid || !ite_valid ||
> +!cte_valid || (eventid > max_eventid)) {
> +qemu_log_mask(LOG_GUEST_ERROR,
> +  "%s: invalid interrupt

Re: [PATCH v16 04/99] qtest/arm-cpu-features: Use generic qtest_has_accel() to check for KVM

2021-06-08 Thread Philippe Mathieu-Daudé

On 6/8/21 10:22 AM, Philippe Mathieu-Daudé wrote:
> On 6/7/21 3:22 PM, Thomas Huth wrote:
>> On 04/06/2021 17.51, Alex Bennée wrote:
>>> From: Philippe Mathieu-Daudé 
>>>
>>> Use the recently added generic qtest_has_accel() method to
>>> check if KVM is available.
>>>
>>> Suggested-by: Claudio Fontana 
>>> Reviewed-by: Andrew Jones 
>>> Reviewed-by: Alex Bennée 
>>> Signed-off-by: Philippe Mathieu-Daudé 
>>> Signed-off-by: Alex Bennée 
>>> Message-Id: <20210505125806.1263441-5-phi...@redhat.com>
>>> ---
>>>   tests/qtest/arm-cpu-features.c | 25 +
>>>   1 file changed, 1 insertion(+), 24 deletions(-)
>>>
>>> diff --git a/tests/qtest/arm-cpu-features.c
>>> b/tests/qtest/arm-cpu-features.c
>>> index 8252b85bb8..7f4b252127 100644
>>> --- a/tests/qtest/arm-cpu-features.c
>>> +++ b/tests/qtest/arm-cpu-features.c
>>> @@ -26,21 +26,6 @@
>>>   "  'arguments': { 'type': 'full', "
>>>   #define QUERY_TAIL  "}}"
>>>   -static bool kvm_enabled(QTestState *qts)
>>> -{
>>> -    QDict *resp, *qdict;
>>> -    bool enabled;
>>> -
>>> -    resp = qtest_qmp(qts, "{ 'execute': 'query-kvm' }");
>>> -    g_assert(qdict_haskey(resp, "return"));
>>> -    qdict = qdict_get_qdict(resp, "return");
>>> -    g_assert(qdict_haskey(qdict, "enabled"));
>>> -    enabled = qdict_get_bool(qdict, "enabled");
>>> -    qobject_unref(resp);
>>> -
>>> -    return enabled;
>>> -}
>>> -
>>>   static QDict *do_query_no_props(QTestState *qts, const char *cpu_type)
>>>   {
>>>   return qtest_qmp(qts, QUERY_HEAD "'model': { 'name': %s }"
>>> @@ -493,14 +478,6 @@ static void
>>> test_query_cpu_model_expansion_kvm(const void *data)
>>>     qts = qtest_init(MACHINE_KVM "-cpu max");
>>>   -    /*
>>> - * These tests target the 'host' CPU type, so KVM must be enabled.
>>> - */
>>> -    if (!kvm_enabled(qts)) {
>>> -    qtest_quit(qts);
>>> -    return;
>>> -    }
>>> -
>>>   /* Enabling and disabling kvm-no-adjvtime should always work. */
>>>   assert_has_feature_disabled(qts, "host", "kvm-no-adjvtime");
>>>   assert_set_feature(qts, "host", "kvm-no-adjvtime", true);
>>> @@ -624,7 +601,7 @@ int main(int argc, char **argv)
>>>    * order avoid attempting to run an AArch32 QEMU with KVM on
>>>    * AArch64 hosts. That won't work and isn't easy to detect.
>>>    */
>>> -    if (g_str_equal(qtest_get_arch(), "aarch64")) {
>>> +    if (g_str_equal(qtest_get_arch(), "aarch64") &&
>>> qtest_has_accel("kvm")) {
>>>   qtest_add_data_func("/arm/kvm/query-cpu-model-expansion",
>>>   NULL, test_query_cpu_model_expansion_kvm);
>>
>> I think this is wrong: query-kvm checks whether kvm is *enabled*, while
>> your new function only checks whether kvm has been built into the
>> binary. There is still the possibility that kvm has been built into the
>> binary, but is not available on the host, so in that case the test will
>> fail now.

Not enough coffee earlier. I think this is a documentation problem,
query-kvm returns a list of *runtime* accelerators:
https://www.mail-archive.com/qemu-devel@nongnu.org/msg811144.html

IIUC what Paolo said, if something asks for an accelerator that
is not present at build-time, then this is a configuration problem,
not relevant for the management interface.

>>
>> Thus please drop / rework this patch.
> 
> Indeed, this is unfortunate :(
>

Re: [PATCH 5/6] kvm/i386: Add support for user space MSR filtering

2021-06-08 Thread Siddharth Chandrasekaran

On Tue, Jun 08, 2021 at 10:48:53AM +0200, Alexander Graf wrote:
> On 24.05.21 22:01, Siddharth Chandrasekaran wrote:
> > Check and enable user space MSR filtering capability and handle new exit
> > reason KVM_EXIT_X86_WRMSR. This will be used in a follow up patch to
> > implement hyper-v overlay pages.
> > 
> > Signed-off-by: Siddharth Chandrasekaran 
> 
> This patch will break bisection, because we're no longer handling the writes
> in kernel space after this, but we also don't have user space handling
> available yet, right? It might be better to move all logic in this patch
> that sets up the filter for Hyper-V MSRs into the next one.

Yes, that's correct. I'll just bounce back all reads/writes to KVM. That
should maintain the existing behaviour.

> > ---
> >   target/i386/kvm/kvm.c | 72 +++
> >   1 file changed, 72 insertions(+)
> > 
> > diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> > index 362f04ab3f..3591f8cecc 100644
> > --- a/target/i386/kvm/kvm.c
> > +++ b/target/i386/kvm/kvm.c
> > @@ -117,6 +117,8 @@ static bool has_msr_ucode_rev;
> >   static bool has_msr_vmx_procbased_ctls2;
> >   static bool has_msr_perf_capabs;
> >   static bool has_msr_pkrs;
> > +static bool has_msr_filtering;
> > +static bool msr_filters_active;
> >   static uint32_t has_architectural_pmu_version;
> >   static uint32_t num_architectural_pmu_gp_counters;
> > @@ -2138,6 +2140,57 @@ static void register_smram_listener(Notifier *n, 
> > void *unused)
> >&smram_address_space, 1);
> >   }
> > +static void kvm_set_msr_filter_range(struct kvm_msr_filter_range *range, 
> > uint32_t flags,
> > + uint32_t base, uint32_t nmsrs, ...)
> > +{
> > +int i, filter_to_userspace;
> > +va_list ap;
> > +
> > +range->flags = flags;
> > +range->nmsrs = nmsrs;
> > +range->base = base;
> > +
> > +va_start(ap, nmsrs);
> > +for (i = 0; i < nmsrs; i++) {
> > +filter_to_userspace = va_arg(ap, int);
> > +if (!filter_to_userspace) {
> > +range->bitmap[i / 8] = 1 << (i % 8);
> > +}
> > +}
> > +va_end(ap);
> > +}
> > +
> > +static int kvm_set_msr_filters(KVMState *s)
> > +{
> > +int r, nmsrs, nfilt = 0, bitmap_pos = 0;
> > +struct kvm_msr_filter filter = { };
> > +struct kvm_msr_filter_range *range;
> > +uint8_t bitmap_buf[KVM_MSR_FILTER_MAX_RANGES * 8] = {0};
> > +
> > +filter.flags = KVM_MSR_FILTER_DEFAULT_ALLOW;
> > +
> > +if (has_hyperv) {
> > +/* Hyper-V overlay page MSRs */
> 
> I think you want to extend this comment and indicate in a human readable
> form that you set the filter on WRMSR to trap HV_X64_MSR_GUEST_OS_ID and
> HV_X64_MSR_HYPERCALL into user space here.

Sure.

> > +nmsrs = 2;
> > +range = &filter.ranges[nfilt++];
> > +range->bitmap = &bitmap_buf[bitmap_pos];
> > +kvm_set_msr_filter_range(range, KVM_MSR_FILTER_WRITE,
> > + HV_X64_MSR_GUEST_OS_ID, nmsrs,
> > + true, /* HV_X64_MSR_GUEST_OS_ID */
> > + true  /* HV_X64_MSR_HYPERCALL */);
> > +bitmap_pos += ROUND_UP(nmsrs, 8) / 8;
> > +assert(bitmap_pos < sizeof(bitmap_buf));
> > +}
> > +
> > +r = kvm_vm_ioctl(s, KVM_X86_SET_MSR_FILTER, &filter);
> > +if (r != 0) {
> > +error_report("kvm: failed to set MSR filters");
> > +return -1;
> > +}
> > +
> > +return 0;
> > +}
> > +
> >   int kvm_arch_init(MachineState *ms, KVMState *s)
> >   {
> >   uint64_t identity_base = 0xfffbc000;
> > @@ -2269,6 +2322,17 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
> >   }
> >   }
> > +has_msr_filtering = kvm_check_extension(s, KVM_CAP_X86_USER_SPACE_MSR) 
> > &&
> > +kvm_check_extension(s, KVM_CAP_X86_MSR_FILTER);
> > +if (has_msr_filtering) {
> > +ret = kvm_vm_enable_cap(s, KVM_CAP_X86_USER_SPACE_MSR, 0,
> > +KVM_MSR_EXIT_REASON_FILTER);
> > +if (ret == 0) {
> > +ret = kvm_set_msr_filters(s);
> > +msr_filters_active = (ret == 0);
> > +}
> > +}
> > +
> >   return 0;
> >   }
> > @@ -4542,6 +4606,11 @@ static bool host_supports_vmx(void)
> >   return ecx & CPUID_EXT_VMX;
> >   }
> > +static int kvm_handle_wrmsr(X86CPU *cpu, struct kvm_run *run)
> > +{
> > +return 0;
> 
> The default handler should always set run->msr.error = 1 to mimic the
> existing behavior.

Will do, thanks.

> > +}
> > +
> >   #define VMX_INVALID_GUEST_STATE 0x8021
> >   int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
> > @@ -4600,6 +4669,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run 
> > *run)
> >   ioapic_eoi_broadcast(run->eoi.vector);
> >   ret = 0;
> >   break;
> > +case KVM_EXIT_X86_WRMSR:
> > +ret = kvm_handle_wrmsr(cpu,

Re: [PATCH 6/6] hyper-v: Handle hypercall code page as an overlay page

2021-06-08 Thread Siddharth Chandrasekaran

On Tue, Jun 08, 2021 at 11:02:45AM +0200, Alexander Graf wrote:
> On 24.05.21 22:02, Siddharth Chandrasekaran wrote:
> > Hypercall code page is specified in the Hyper-V TLFS to be an overlay
> > page, ie., guest chooses a GPA and the host _places_ a page at that
> > location, making it visible to the guest and the existing page becomes
> > inaccessible. Similarly when disabled, the host should _remove_ the
> > overlay and the old page should become visible to the guest.
> > 
> > Until now, KVM patched the hypercall code directly into the guest
> > chosen GPA which is incorrect; instead, use the new user space MSR
> > filtering feature to trap hypercall page MSR writes, overlay it as
> > requested and then invoke a KVM_SET_MSR from user space to bounce back
> > control KVM. This bounce back is needed as KVM may have to write data
> > into the newly overlaid page.
> > 
> > Signed-off-by: Siddharth Chandrasekaran 
> > ---
> >   hw/hyperv/hyperv.c | 10 -
> >   include/hw/hyperv/hyperv.h |  5 +++
> >   target/i386/kvm/hyperv.c   | 84 ++
> >   target/i386/kvm/hyperv.h   |  4 ++
> >   target/i386/kvm/kvm.c  | 26 +++-
> >   5 files changed, 127 insertions(+), 2 deletions(-)
> > 
> > diff --git a/hw/hyperv/hyperv.c b/hw/hyperv/hyperv.c
> > index ac45e8e139..aa5ac5226e 100644
> > --- a/hw/hyperv/hyperv.c
> > +++ b/hw/hyperv/hyperv.c
> > @@ -36,6 +36,7 @@ struct SynICState {
> >   OBJECT_DECLARE_SIMPLE_TYPE(SynICState, SYNIC)
> >   static bool synic_enabled;
> > +struct hyperv_overlay_page hcall_page;
> >   static void alloc_overlay_page(struct hyperv_overlay_page *overlay,
> >  Object *owner, const char *name)
> > @@ -50,7 +51,7 @@ static void alloc_overlay_page(struct hyperv_overlay_page 
> > *overlay,
> >* This method must be called with iothread lock taken as it modifies
> >* the memory hierarchy.
> >*/
> > -static void hyperv_overlay_update(struct hyperv_overlay_page *overlay, 
> > hwaddr addr)
> > +void hyperv_overlay_update(struct hyperv_overlay_page *overlay, hwaddr 
> > addr)
> >   {
> >   if (addr != HYPERV_INVALID_OVERLAY_GPA) {
> >   /* check if overlay page is enabled */
> > @@ -70,6 +71,13 @@ static void hyperv_overlay_update(struct 
> > hyperv_overlay_page *overlay, hwaddr ad
> >   }
> >   }
> > +void hyperv_overlay_init(void)
> > +{
> > +memory_region_init_ram(&hcall_page.mr, NULL, "hyperv.hcall_page",
> > +   qemu_real_host_page_size, &error_abort);
> > +hcall_page.addr = HYPERV_INVALID_OVERLAY_GPA;
> > +}
> > +
> >   static void synic_update(SynICState *synic, bool enable,
> >hwaddr msg_page_addr, hwaddr event_page_addr)
> >   {
> > diff --git a/include/hw/hyperv/hyperv.h b/include/hw/hyperv/hyperv.h
> > index d989193e84..f31a81 100644
> > --- a/include/hw/hyperv/hyperv.h
> > +++ b/include/hw/hyperv/hyperv.h
> > @@ -85,6 +85,11 @@ static inline uint32_t hyperv_vp_index(CPUState *cs)
> >   return cs->cpu_index;
> >   }
> > +extern struct hyperv_overlay_page hcall_page;
> > +
> > +void hyperv_overlay_init(void);
> > +void hyperv_overlay_update(struct hyperv_overlay_page *page, hwaddr addr);
> > +
> >   void hyperv_synic_add(CPUState *cs);
> >   void hyperv_synic_reset(CPUState *cs);
> >   void hyperv_synic_update(CPUState *cs, bool enable,
> > diff --git a/target/i386/kvm/hyperv.c b/target/i386/kvm/hyperv.c
> > index f49ed2621d..01c9c2468c 100644
> > --- a/target/i386/kvm/hyperv.c
> > +++ b/target/i386/kvm/hyperv.c
> > @@ -16,6 +16,76 @@
> >   #include "hyperv.h"
> >   #include "hw/hyperv/hyperv.h"
> >   #include "hyperv-proto.h"
> > +#include "kvm_i386.h"
> > +
> > +struct x86_hv_overlay {
> > +struct hyperv_overlay_page *page;
> > +uint32_t msr;
> > +hwaddr gpa;
> > +};
> > +
> > +static void async_overlay_update(CPUState *cs, run_on_cpu_data data)
> > +{
> > +X86CPU *cpu = X86_CPU(cs);
> > +struct x86_hv_overlay *overlay = data.host_ptr;
> > +
> > +qemu_mutex_lock_iothread();
> > +hyperv_overlay_update(overlay->page, overlay->gpa);
> > +qemu_mutex_unlock_iothread();
> > +
> > +/**
> > + * Call KVM so it can keep a copy of the MSR data and do other 
> > post-overlay
> > + * actions such as filling the overlay page contents before returning 
> > to
> > + * guest. This works because MSR filtering is inactive for KVM_SET_MSRS
> > + */
> > +kvm_put_one_msr(cpu, overlay->msr, overlay->gpa);
> > +
> > +g_free(overlay);
> > +}
> > +
> > +static void do_overlay_update(X86CPU *cpu, struct hyperv_overlay_page 
> > *page,
> > +  uint32_t msr, uint64_t data)
> > +{
> > +struct x86_hv_overlay *overlay = g_malloc(sizeof(struct 
> > x86_hv_overlay));
> > +
> > +*overlay = (struct x86_hv_overlay) {
> > +.page = page,
> > +.msr = msr,
> > +.gpa = data
> > +};
> > +
> > +/**
> > + * This will run in thi

Re: [PATCH v4 5/8] hw/intc: GICv3 ITS Feature enablement

2021-06-08 Thread Peter Maydell

On Wed, 2 Jun 2021 at 19:00, Shashi Mallela  wrote:
>
> Added properties to enable ITS feature and define qemu system
> address space memory in gicv3 common,setup distributor and
> redistributor registers to indicate LPI support.
>
> Signed-off-by: Shashi Mallela 
> ---
>  hw/intc/arm_gicv3_common.c | 12 
>  hw/intc/arm_gicv3_dist.c   |  7 +--
>  hw/intc/arm_gicv3_its.c|  9 -
>  hw/intc/arm_gicv3_redist.c | 14 +++---
>  hw/intc/gicv3_internal.h   | 17 +
>  include/hw/intc/arm_gicv3_common.h |  1 +
>  6 files changed, 54 insertions(+), 6 deletions(-)


> @@ -386,7 +388,8 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr 
> offset,
>  bool sec_extn = !(s->gicd_ctlr & GICD_CTLR_DS);
>
>  *data = (1 << 25) | (1 << 24) | (sec_extn << 10) |
> -(0xf << 19) | itlinesnumber;
> +(s->lpi_enable << GICD_TYPER_LPIS_OFFSET) |
> +(GICD_TYPER_IDBITS << GICD_TYPER_IDBITS_OFFSET) | itlinesnumber;
>  return MEMTX_OK;
>  }
>  case GICD_IIDR:

This change is doing two things at once:
(1) setting the LPI enable bit
(2) changing from (0xf << 19) to something using symbolic constants.

If you want to do (2) as a cleanup I don't object, but please put
it in its own patch as it is unrelated to this one.

> diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
> index 82bb5b84ef..0a978cf55b 100644
> --- a/hw/intc/arm_gicv3_its.c
> +++ b/hw/intc/arm_gicv3_its.c
> @@ -294,6 +294,7 @@ static MemTxResult process_mapti(GICv3ITSState *s, 
> uint64_t value,
>  uint64_t itel = 0;
>  uint32_t iteh = 0;
>  uint32_t int_spurious = INTID_SPURIOUS;
> +uint64_t idbits;
>
>  devid = (value >> DEVID_SHIFT) & DEVID_MASK;
>  offset += NUM_BYTES_IN_DW;
> @@ -330,7 +331,13 @@ static MemTxResult process_mapti(GICv3ITSState *s, 
> uint64_t value,
>  max_eventid = (1UL << (((dte >> 1U) & SIZE_MASK) + 1));
>
>  if (!ignore_pInt) {
> -max_Intid = (1UL << (FIELD_EX64(s->typer, GITS_TYPER, IDBITS) + 1));
> +idbits = MIN(FIELD_EX64(s->gicv3->cpu->gicr_propbaser, 
> GICR_PROPBASER,
> +IDBITS), GICD_TYPER_IDBITS);
> +
> +if (idbits < GICR_PROPBASER_IDBITS_THRESHOLD) {
> +return res;
> +}
> +max_Intid = (1ULL << (idbits + 1));
>  }
>

This change should be folded into the patch where you add
this process_mapti() code, so it is correct from the start.

>  if ((devid > s->dt.max_devids) || (icid > s->ct.max_collids) ||
> diff --git a/hw/intc/arm_gicv3_redist.c b/hw/intc/arm_gicv3_redist.c
> index 8645220d61..fb9a4ee3cc 100644
> --- a/hw/intc/arm_gicv3_redist.c
> +++ b/hw/intc/arm_gicv3_redist.c
> @@ -244,14 +244,21 @@ static MemTxResult gicr_readl(GICv3CPUState *cs, hwaddr 
> offset,
>  static MemTxResult gicr_writel(GICv3CPUState *cs, hwaddr offset,
> uint64_t value, MemTxAttrs attrs)
>  {
> +

Stray new blank line.

>  switch (offset) {
>  case GICR_CTLR:
>  /* For our implementation, GICR_TYPER.DPGS is 0 and so all
>   * the DPG bits are RAZ/WI. We don't do anything asynchronously,
> - * so UWP and RWP are RAZ/WI. And GICR_TYPER.LPIS is 0 (we don't
> - * implement LPIs) so Enable_LPIs is RES0. So there are no writable
> - * bits for us.
> + * so UWP and RWP are RAZ/WI. GICR_TYPER.LPIS is 1 (we
> + * implement LPIs) so Enable_LPIs is programmable.
>   */
> +if (cs->gicr_typer & GICR_TYPER_PLPIS) {
> +if (value & GICR_CTLR_ENABLE_LPIS) {
> +cs->gicr_ctlr |= GICR_CTLR_ENABLE_LPIS;
> +} else {
> +cs->gicr_ctlr &= ~GICR_CTLR_ENABLE_LPIS;
> +}
> +}
>  return MEMTX_OK;
>  case GICR_STATUSR:
>  /* RAZ/WI for our implementation */
> @@ -395,6 +402,7 @@ static MemTxResult gicr_readll(GICv3CPUState *cs, hwaddr 
> offset,
>  static MemTxResult gicr_writell(GICv3CPUState *cs, hwaddr offset,
>  uint64_t value, MemTxAttrs attrs)
>  {
> +
>  switch (offset) {
>  case GICR_PROPBASER:
>  cs->gicr_propbaser = value;

Another stray new blank line.

thanks
-- PMM

Re: [PATCH v4 8/8] hw/arm/virt: add ITS support in virt GIC

2021-06-08 Thread Peter Maydell

On Wed, 2 Jun 2021 at 19:00, Shashi Mallela  wrote:
>
> Included creation of ITS as part of virt platform GIC
> initialization.This Emulated ITS model now co-exists with kvm

Still missing space after '.'.

> ITS and is enabled in absence of kvm irq kernel support in a
> platform.
>
> Signed-off-by: Shashi Mallela 
> ---

I gave you a reviewed-by tag on this patch in v3; please don't
drop reviewed-by tags unless you make changes to a patch,
they help reviewers know which parts of the series they don't
need to look at again.

For the record,

Otherwise,
Reviewed-by: Peter Maydell 

thanks
-- PMM

1 2 3 4 >

1 - 100 of 347 matches

Mail list logo