Re: [PATCH] net/filter: Optimize filter_send to coroutine

2021-12-24 Thread lizhij...@fujitsu.com


On 24/12/2021 10:37, Rao, Lei wrote:
> This patch is to improve the logic of QEMU main thread sleep code in
> qemu_chr_write_buffer() where it can be blocked and can't run other
> coroutines during COLO IO stress test.
>
> Our approach is to put filter_send() in a coroutine. In this way,
> filter_send() will call qemu_coroutine_yield() in qemu_co_sleep_ns(),
> so that it can be scheduled out and QEMU main thread has opportunity to
> run other tasks.
>
> Signed-off-by: Lei Rao 
> Signed-off-by: Zhang Chen 
> ---
>   net/filter-mirror.c | 67 -
>   1 file changed, 54 insertions(+), 13 deletions(-)
>
> diff --git a/net/filter-mirror.c b/net/filter-mirror.c
> index f20240cc9f..1e9f8b6216 100644
> --- a/net/filter-mirror.c
> +++ b/net/filter-mirror.c
> @@ -20,6 +20,7 @@
>   #include "chardev/char-fe.h"
>   #include "qemu/iov.h"
>   #include "qemu/sockets.h"
> +#include "block/aio-wait.h"
>   
>   #define TYPE_FILTER_MIRROR "filter-mirror"
>   typedef struct MirrorState MirrorState;
> @@ -42,20 +43,21 @@ struct MirrorState {
>   bool vnet_hdr;
>   };
>   
> -static int filter_send(MirrorState *s,
> -   const struct iovec *iov,
> -   int iovcnt)
> +typedef struct FilterSendCo {
> +MirrorState *s;
> +char *buf;
> +ssize_t size;
> +bool done;
> +int ret;
> +} FilterSendCo;
> +
> +static int _filter_send(MirrorState *s,
> +   char *buf,
> +   ssize_t size)
>   {
>   NetFilterState *nf = NETFILTER(s);
>   int ret = 0;
> -ssize_t size = 0;
>   uint32_t len = 0;
> -char *buf;
> -
> -size = iov_size(iov, iovcnt);
> -if (!size) {
> -return 0;
> -}
>   
>   len = htonl(size);
>   ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
> @@ -80,10 +82,7 @@ static int filter_send(MirrorState *s,
>   }
>   }
>   
> -buf = g_malloc(size);
> -iov_to_buf(iov, iovcnt, 0, buf, size);
>   ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
> -g_free(buf);
>   if (ret != size) {
>   goto err;
>   }
> @@ -94,6 +93,48 @@ err:
>   return ret < 0 ? ret : -EIO;
>   }
>   
> +static void coroutine_fn filter_send_co(void *opaque)
> +{
> +FilterSendCo *data = opaque;
> +
> +data->ret = _filter_send(data->s, data->buf, data->size);
> +data->done = true;
> +g_free(data->buf);
> +aio_wait_kick();
> +}
> +
> +static int filter_send(MirrorState *s,
> +   const struct iovec *iov,
> +   int iovcnt)
> +{
> +ssize_t size = iov_size(iov, iovcnt);
> +char *buf = NULL;
> +
> +if (!size) {
> +return 0;
> +}
> +
> +buf = g_malloc(size);
> +iov_to_buf(iov, iovcnt, 0, buf, size);
> +
> +FilterSendCo data = {
> +.s = s,
> +.size = size,
> +.buf = buf,
> +.ret = 0,
> +};
> +
> +Coroutine *co = qemu_coroutine_create(filter_send_co, &data);

BTW, does qemu/old gcc complaint such coding style ?

int a;
a = foo()
int b = a;



> +qemu_coroutine_enter(co);
> +
> +while (!data.done) {
> +aio_poll(qemu_get_aio_context(), true);
> +}
> +
> +return data.ret;
> +
redundant  newline

Otherwise,
Reviewed-by: Li Zhijian 



> +}
> +
>   static void redirector_to_filter(NetFilterState *nf,
>const uint8_t *buf,
>int len)


QEMU CI failure of cross-i386-* targets (meson picks wrong glib for native target)

2021-12-24 Thread Alessandro Di Federico via
Hi Paolo, I'm trying to get the QEMU CI run successfully for the
idef-parser patchset. However I'm facing an issue I haven't been able
to work around with meson. Maybe you can help?

The failing tests are cross-i386-*

https://gitlab.com/carl.cudig/qemu/-/jobs/1437392669
https://gitlab.com/carl.cudig/qemu/-/jobs/1437392673
https://gitlab.com/carl.cudig/qemu/-/jobs/1437392671

I think the problem is that we're adding a new build-time dependency:
glib. However glib is also a run-time dependency and, AFAIU, when cross
compiling for x86 on a x86-64 machine, if you create a native
executable, meson picks up the x86 version of glib (as opposed to the
x86-64).

I've been fiddling with this for a while, unsuccessfully.

Relevant portion of the code:


https://gitlab.com/carl.cudig/qemu/-/commit/c020958c37fa723f7e933a58b1bb1c3668ff4cff#8145a41027f26ff426d5a2c8b00c56f227943165_198_202

Happy holidays!

-- 
Alessandro Di Federico
rev.ng Labs Srl



Re: [PATCH v7] isa-applesmc: provide OSK forwarding on Apple hosts

2021-12-24 Thread Vladislav Yaroshchuk
ping:
https://patchew.org/QEMU/20211216104621.85108-1-yaroshchuk2...@gmail.com/


Re: [PATCH v3 6/8] migration: Dump sub-cmd name in loadvm_process_command tp

2021-12-24 Thread David Edmondson
On Friday, 2021-12-24 at 14:49:58 +08, Peter Xu wrote:

> It'll be easier to read the name rather than index of sub-cmd when debugging.
>
> Signed-off-by: Peter Xu 

Reviewed-by: David Edmondson 

> ---
>  migration/savevm.c | 3 ++-
>  migration/trace-events | 2 +-
>  2 files changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/migration/savevm.c b/migration/savevm.c
> index 0bef031acb..7f7af6f750 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -2272,12 +2272,13 @@ static int loadvm_process_command(QEMUFile *f)
>  return qemu_file_get_error(f);
>  }
>
> -trace_loadvm_process_command(cmd, len);
>  if (cmd >= MIG_CMD_MAX || cmd == MIG_CMD_INVALID) {
>  error_report("MIG_CMD 0x%x unknown (len 0x%x)", cmd, len);
>  return -EINVAL;
>  }
>
> +trace_loadvm_process_command(mig_cmd_args[cmd].name, len);
> +
>  if (mig_cmd_args[cmd].len != -1 && mig_cmd_args[cmd].len != len) {
>  error_report("%s received with bad length - expecting %zu, got %d",
>   mig_cmd_args[cmd].name,
> diff --git a/migration/trace-events b/migration/trace-events
> index b48d873b8a..d63a5915f5 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -22,7 +22,7 @@ loadvm_postcopy_handle_resume(void) ""
>  loadvm_postcopy_ram_handle_discard(void) ""
>  loadvm_postcopy_ram_handle_discard_end(void) ""
>  loadvm_postcopy_ram_handle_discard_header(const char *ramid, uint16_t len) 
> "%s: %ud"
> -loadvm_process_command(uint16_t com, uint16_t len) "com=0x%x len=%d"
> +loadvm_process_command(const char *s, uint16_t len) "com=%s len=%d"
>  loadvm_process_command_ping(uint32_t val) "0x%x"
>  postcopy_ram_listen_thread_exit(void) ""
>  postcopy_ram_listen_thread_start(void) ""

dme.
-- 
I'm catching up with myself!



Re: [PATCH v3 7/8] migration: Finer grained tracepoints for POSTCOPY_LISTEN

2021-12-24 Thread David Edmondson
On Friday, 2021-12-24 at 14:49:59 +08, Peter Xu wrote:

> The enablement of postcopy listening has a few steps, add a few tracepoints to
> be there ready for some basic measurements for them.
>
> Signed-off-by: Peter Xu 

Reviewed-by: David Edmondson 

> ---
>  migration/savevm.c | 9 -
>  migration/trace-events | 2 +-
>  2 files changed, 9 insertions(+), 2 deletions(-)
>
> diff --git a/migration/savevm.c b/migration/savevm.c
> index 7f7af6f750..592d550a2f 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -1947,9 +1947,10 @@ static void *postcopy_ram_listen_thread(void *opaque)
>  static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
>  {
>  PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING);
> -trace_loadvm_postcopy_handle_listen();
>  Error *local_err = NULL;
>
> +trace_loadvm_postcopy_handle_listen("enter");
> +
>  if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) {
>  error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps);
>  return -1;
> @@ -1964,6 +1965,8 @@ static int 
> loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
>  }
>  }
>
> +trace_loadvm_postcopy_handle_listen("after discard");
> +
>  /*
>   * Sensitise RAM - can now generate requests for blocks that don't exist
>   * However, at this point the CPU shouldn't be running, and the IO
> @@ -1976,6 +1979,8 @@ static int 
> loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
>  }
>  }
>
> +trace_loadvm_postcopy_handle_listen("after uffd");
> +
>  if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_LISTEN, &local_err)) {
>  error_report_err(local_err);
>  return -1;
> @@ -1990,6 +1995,8 @@ static int 
> loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
>  qemu_sem_wait(&mis->listen_thread_sem);
>  qemu_sem_destroy(&mis->listen_thread_sem);
>
> +trace_loadvm_postcopy_handle_listen("return");
> +
>  return 0;
>  }
>
> diff --git a/migration/trace-events b/migration/trace-events
> index d63a5915f5..77d1237d89 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -14,7 +14,7 @@ loadvm_handle_cmd_packaged_main(int ret) "%d"
>  loadvm_handle_cmd_packaged_received(int ret) "%d"
>  loadvm_handle_recv_bitmap(char *s) "%s"
>  loadvm_postcopy_handle_advise(void) ""
> -loadvm_postcopy_handle_listen(void) ""
> +loadvm_postcopy_handle_listen(const char *str) "%s"
>  loadvm_postcopy_handle_run(void) ""
>  loadvm_postcopy_handle_run_cpu_sync(void) ""
>  loadvm_postcopy_handle_run_vmstart(void) ""

dme.
-- 
If I could buy my reasoning, I'd pay to lose.



Re: [PATCH v3 8/8] migration: Tracepoint change in postcopy-run bottom half

2021-12-24 Thread David Edmondson
On Friday, 2021-12-24 at 14:50:00 +08, Peter Xu wrote:

> Remove the old two tracepoints and they're even near each other:
>
> trace_loadvm_postcopy_handle_run_cpu_sync()
> trace_loadvm_postcopy_handle_run_vmstart()
>
> Add trace_loadvm_postcopy_handle_run_bh() with a finer granule trace.
>
> Signed-off-by: Peter Xu 

Reviewed-by: David Edmondson 

> ---
>  migration/savevm.c | 12 +---
>  migration/trace-events |  3 +--
>  2 files changed, 10 insertions(+), 5 deletions(-)
>
> diff --git a/migration/savevm.c b/migration/savevm.c
> index 592d550a2f..3b8f565b14 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -2005,13 +2005,19 @@ static void loadvm_postcopy_handle_run_bh(void 
> *opaque)
>  Error *local_err = NULL;
>  MigrationIncomingState *mis = opaque;
>
> +trace_loadvm_postcopy_handle_run_bh("enter");
> +
>  /* TODO we should move all of this lot into postcopy_ram.c or a shared 
> code
>   * in migration.c
>   */
>  cpu_synchronize_all_post_init();
>
> +trace_loadvm_postcopy_handle_run_bh("after cpu sync");
> +
>  qemu_announce_self(&mis->announce_timer, migrate_announce_params());
>
> +trace_loadvm_postcopy_handle_run_bh("after announce");
> +
>  /* Make sure all file formats flush their mutable metadata.
>   * If we get an error here, just don't restart the VM yet. */
>  bdrv_invalidate_cache_all(&local_err);
> @@ -2021,9 +2027,7 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
>  autostart = false;
>  }
>
> -trace_loadvm_postcopy_handle_run_cpu_sync();
> -
> -trace_loadvm_postcopy_handle_run_vmstart();
> +trace_loadvm_postcopy_handle_run_bh("after invalidate cache");
>
>  dirty_bitmap_mig_before_vm_start();
>
> @@ -2036,6 +2040,8 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
>  }
>
>  qemu_bh_delete(mis->bh);
> +
> +trace_loadvm_postcopy_handle_run_bh("return");
>  }
>
>  /* After all discards we can start running and asking for pages */
> diff --git a/migration/trace-events b/migration/trace-events
> index 77d1237d89..e165687af2 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -16,8 +16,7 @@ loadvm_handle_recv_bitmap(char *s) "%s"
>  loadvm_postcopy_handle_advise(void) ""
>  loadvm_postcopy_handle_listen(const char *str) "%s"
>  loadvm_postcopy_handle_run(void) ""
> -loadvm_postcopy_handle_run_cpu_sync(void) ""
> -loadvm_postcopy_handle_run_vmstart(void) ""
> +loadvm_postcopy_handle_run_bh(const char *str) "%s"
>  loadvm_postcopy_handle_resume(void) ""
>  loadvm_postcopy_ram_handle_discard(void) ""
>  loadvm_postcopy_ram_handle_discard_end(void) ""

dme.
-- 
Jane was in her turtle neck, I was much happier then.



[PATCH 0/6] migration: Add 'no-ram' and 'ram-only' cpabilities

2021-12-24 Thread Nikita Lapshin
We want to implement exteranl bg-snapshot tool for saving RAM. For this it
is important to be able manage migration stream because tool has no idea
about non-RAM part and its size.

Possible solution is to send RAM separately. This can be done with
implemented RAM capabilities. These capabilities allow to send only RAM part
or non-RAM part so tool can save non-RAM part without special handlers.
RAM will be saved with special handlers for postcopy load.

Nikita Lapshin (6):
  migration: is_ram changed to is_iterable
  migration: should_skip() implemented
  migration: Add no-ram capability
  migration: Add ram-only capability
  migration: analyze-migration script changed
  migration: Test for ram capabilities

 migration/migration.c | 29 +-
 migration/migration.h |  2 +
 migration/ram.c   |  6 ++
 migration/savevm.c| 63 ++--
 qapi/migration.json   | 11 ++-
 scripts/analyze-migration.py  | 19 ++--
 .../tests/migrate-ram-capabilities-test   | 96 +++
 .../tests/migrate-ram-capabilities-test.out   |  5 +
 8 files changed, 192 insertions(+), 39 deletions(-)
 create mode 100755 tests/qemu-iotests/tests/migrate-ram-capabilities-test
 create mode 100644 tests/qemu-iotests/tests/migrate-ram-capabilities-test.out

-- 
2.27.0




[PATCH 2/6] migration: should_skip() implemented

2021-12-24 Thread Nikita Lapshin
For next changes it is convenient to make all decisions about
sections skipping in one function.

Signed-off-by: Nikita Lapshin 
---
 migration/savevm.c | 42 --
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/migration/savevm.c b/migration/savevm.c
index f90fdb2bdd..ba644083ab 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -943,6 +943,15 @@ static int vmstate_save(QEMUFile *f, SaveStateEntry *se,
 return vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
 }
 
+static bool should_skip(SaveStateEntry *se)
+{
+if (se->ops->is_active && !se->ops->is_active(se->opaque)) {
+return true;
+}
+
+return false;
+}
+
 /*
  * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
  */
@@ -1207,10 +1216,8 @@ void qemu_savevm_state_setup(QEMUFile *f)
 if (!se->ops || !se->ops->save_setup) {
 continue;
 }
-if (se->ops->is_active) {
-if (!se->ops->is_active(se->opaque)) {
-continue;
-}
+if (should_skip(se)) {
+continue;
 }
 save_section_header(f, se, QEMU_VM_SECTION_START);
 
@@ -1238,10 +1245,8 @@ int qemu_savevm_state_resume_prepare(MigrationState *s)
 if (!se->ops || !se->ops->resume_prepare) {
 continue;
 }
-if (se->ops->is_active) {
-if (!se->ops->is_active(se->opaque)) {
-continue;
-}
+if (should_skip(se)) {
+continue;
 }
 ret = se->ops->resume_prepare(s, se->opaque);
 if (ret < 0) {
@@ -1268,8 +1273,7 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
 if (!se->ops || !se->ops->save_live_iterate) {
 continue;
 }
-if (se->ops->is_active &&
-!se->ops->is_active(se->opaque)) {
+if (should_skip(se)) {
 continue;
 }
 if (se->ops->is_active_iterate &&
@@ -1336,10 +1340,8 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f)
 if (!se->ops || !se->ops->save_live_complete_postcopy) {
 continue;
 }
-if (se->ops->is_active) {
-if (!se->ops->is_active(se->opaque)) {
-continue;
-}
+if (should_skip(se)) {
+continue;
 }
 trace_savevm_section_start(se->idstr, se->section_id);
 /* Section type */
@@ -1373,10 +1375,8 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile 
*f, bool in_postcopy)
 continue;
 }
 
-if (se->ops->is_active) {
-if (!se->ops->is_active(se->opaque)) {
-continue;
-}
+if (should_skip(se)) {
+continue;
 }
 trace_savevm_section_start(se->idstr, se->section_id);
 
@@ -1521,10 +1521,8 @@ void qemu_savevm_state_pending(QEMUFile *f, uint64_t 
threshold_size,
 if (!se->ops || !se->ops->save_live_pending) {
 continue;
 }
-if (se->ops->is_active) {
-if (!se->ops->is_active(se->opaque)) {
-continue;
-}
+if (should_skip(se)) {
+continue;
 }
 se->ops->save_live_pending(f, se->opaque, threshold_size,
res_precopy_only, res_compatible,
-- 
2.27.0




[PATCH 3/6] migration: Add no-ram capability

2021-12-24 Thread Nikita Lapshin
This capability disable RAM section in migration stream.

Signed-off-by: Nikita Lapshin 
---
 migration/migration.c | 9 +
 migration/migration.h | 1 +
 migration/ram.c   | 6 ++
 qapi/migration.json   | 8 +---
 4 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 3de11ae921..006447d00a 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2610,6 +2610,15 @@ bool migrate_background_snapshot(void)
 return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT];
 }
 
+bool migrate_no_ram(void)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+return s->enabled_capabilities[MIGRATION_CAPABILITY_NO_RAM];
+}
+
 /* migration thread support */
 /*
  * Something bad happened to the RP stream, mark an error
diff --git a/migration/migration.h b/migration/migration.h
index 8130b703eb..43f7bf8eba 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -358,6 +358,7 @@ int migrate_decompress_threads(void);
 bool migrate_use_events(void);
 bool migrate_postcopy_blocktime(void);
 bool migrate_background_snapshot(void);
+bool migrate_no_ram(void);
 
 /* Sending on the return path - generic and then for each message type */
 void migrate_send_rp_shut(MigrationIncomingState *mis,
diff --git a/migration/ram.c b/migration/ram.c
index 57efa67f20..aa3583d0bc 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -4339,6 +4339,11 @@ static int ram_resume_prepare(MigrationState *s, void 
*opaque)
 return 0;
 }
 
+static bool ram_is_active(void* opaque)
+{
+return !migrate_no_ram();
+}
+
 static SaveVMHandlers savevm_ram_handlers = {
 .save_setup = ram_save_setup,
 .save_live_iterate = ram_save_iterate,
@@ -4351,6 +4356,7 @@ static SaveVMHandlers savevm_ram_handlers = {
 .load_setup = ram_load_setup,
 .load_cleanup = ram_load_cleanup,
 .resume_prepare = ram_resume_prepare,
+.is_active = ram_is_active,
 };
 
 static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host,
diff --git a/qapi/migration.json b/qapi/migration.json
index bbfd48cf0b..d53956852c 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -452,6 +452,8 @@
 #   procedure starts. The VM RAM is saved with running VM.
 #   (since 6.0)
 #
+# @no-ram: If enabled, migration stream won't contain any ram in it. (since 
7.0)
+#
 # Features:
 # @unstable: Members @x-colo and @x-ignore-shared are experimental.
 #
@@ -465,8 +467,7 @@
'block', 'return-path', 'pause-before-switchover', 'multifd',
'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate',
{ 'name': 'x-ignore-shared', 'features': [ 'unstable' ] },
-   'validate-uuid', 'background-snapshot'] }
-
+   'validate-uuid', 'background-snapshot', 'no-ram'] }
 ##
 # @MigrationCapabilityStatus:
 #
@@ -519,7 +520,8 @@
 #   {"state": false, "capability": "compress"},
 #   {"state": true, "capability": "events"},
 #   {"state": false, "capability": "postcopy-ram"},
-#   {"state": false, "capability": "x-colo"}
+#   {"state": false, "capability": "x-colo"},
+#   {"state": false, "capability": "no-ram"}
 #]}
 #
 ##
-- 
2.27.0




[PATCH 1/6] migration: is_ram changed to is_iterable

2021-12-24 Thread Nikita Lapshin
For new migration capabilities upcoming we need to use something
like is_ram for this purpose. This member of struction is true
not only for RAM so it should be renamed.

Signed-off-by: Nikita Lapshin 
---
 migration/savevm.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/migration/savevm.c b/migration/savevm.c
index 0bef031acb..f90fdb2bdd 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -248,7 +248,7 @@ typedef struct SaveStateEntry {
 const VMStateDescription *vmsd;
 void *opaque;
 CompatEntry *compat;
-int is_ram;
+int is_iterable;
 } SaveStateEntry;
 
 typedef struct SaveState {
@@ -797,9 +797,9 @@ int register_savevm_live(const char *idstr,
 se->ops = ops;
 se->opaque = opaque;
 se->vmsd = NULL;
-/* if this is a live_savem then set is_ram */
+/* if this is a live_savem then set is_iterable */
 if (ops->save_setup != NULL) {
-se->is_ram = 1;
+se->is_iterable = 1;
 }
 
 pstrcat(se->idstr, sizeof(se->idstr), idstr);
@@ -1625,7 +1625,7 @@ int qemu_save_device_state(QEMUFile *f)
 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
 int ret;
 
-if (se->is_ram) {
+if (se->is_iterable) {
 continue;
 }
 if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
@@ -2428,7 +2428,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, 
MigrationIncomingState *mis)
 se->load_section_id = section_id;
 
 /* Validate if it is a device's state */
-if (xen_enabled() && se->is_ram) {
+if (xen_enabled() && se->is_iterable) {
 error_report("loadvm: %s RAM loading not allowed on Xen", idstr);
 return -EINVAL;
 }
-- 
2.27.0




Re: [RFC v2 02/12] target/ppc: powerpc_excp: Set vector earlier

2021-12-24 Thread Fabiano Rosas
Richard Henderson  writes:

> On 12/20/21 10:18 AM, Fabiano Rosas wrote:
>> None of the interrupt setup code touches 'vector', so we can move it
>> earlier in the function. This will allow us to later move the System
>> Call Vectored setup that is on the top level into the
>> POWERPC_EXCP_SYSCALL_VECTORED code block.
>> 
>> This patch also moves the verification for when 'excp' does not have
>> an address associated with it. We now bail a little earlier when that
>> is the case. This should not cause any visible effects.
>> 
>> Signed-off-by: Fabiano Rosas 
>> ---
>>   target/ppc/excp_helper.c | 16 
>>   1 file changed, 8 insertions(+), 8 deletions(-)
>> 
>> diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
>> index 8b9c6bc5a8..14fd0213a0 100644
>> --- a/target/ppc/excp_helper.c
>> +++ b/target/ppc/excp_helper.c
>> @@ -352,6 +352,14 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int 
>> excp_model, int excp)
>>   }
>>   #endif
>>   
>> +vector = env->excp_vectors[excp];
>> +if (vector == (target_ulong)-1ULL) {
>> +cpu_abort(cs, "Raised an exception without defined vector %d\n",
>> +  excp);
>> +}
>> +
>> +vector |= env->excp_prefix;
>> +
>>   switch (excp) {
>>   case POWERPC_EXCP_NONE:
>>   /* Should never happen */
>
> You've moved the cpu_abort above the excp check above the early return for 
> NONE (which 
> possibly shouldn't exist) and above the excp default

Right, I think I had this patch initially after patch 7 which moves the
NONE check ealier in the function. I'll reorganize.

>  cpu_abort(cs, "Invalid PowerPC exception %d. Aborting\n", excp);
>
> I would certainly expect invalid excp to not have a defined vector
> either.

One thing we always had that this series makes more explicit is the
distinction between what interrupts QEMU knows about and what vectors a
processor uses. I have played around with using the same data structure
to hold both, which would perhaps match the expectation more closely,
but nothing came out of it. I think there's still space after this
series for further improvement in that regard.

> I'll also note that the excp_vectors[] index is no longer bounds checked by 
> the switch.

Ack.



[PATCH 5/6] migration: analyze-migration script changed

2021-12-24 Thread Nikita Lapshin
This script is used for RAM capabilities test. But it cannot work
in case of no vm description in migration stream.
So new flag is added to allow work this script with ram-only
migration stream.

Signed-off-by: Nikita Lapshin 
---
 scripts/analyze-migration.py | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py
index b82a1b0c58..80077a09bc 100755
--- a/scripts/analyze-migration.py
+++ b/scripts/analyze-migration.py
@@ -495,7 +495,7 @@ def __init__(self, filename):
 self.filename = filename
 self.vmsd_desc = None
 
-def read(self, desc_only = False, dump_memory = False, write_memory = 
False):
+def read(self, ram_only, desc_only = False, dump_memory = False, 
write_memory = False):
 # Read in the whole file
 file = MigrationFile(self.filename)
 
@@ -509,7 +509,8 @@ def read(self, desc_only = False, dump_memory = False, 
write_memory = False):
 if data != self.QEMU_VM_FILE_VERSION:
 raise Exception("Invalid version number %d" % data)
 
-self.load_vmsd_json(file)
+if not ram_only:
+self.load_vmsd_json(file)
 
 # Read sections
 self.sections = collections.OrderedDict()
@@ -518,7 +519,10 @@ def read(self, desc_only = False, dump_memory = False, 
write_memory = False):
 return
 
 ramargs = {}
-ramargs['page_size'] = self.vmsd_desc['page_size']
+if ram_only:
+ramargs['page_size'] = 4096
+else:
+ramargs['page_size'] = self.vmsd_desc['page_size']
 ramargs['dump_memory'] = dump_memory
 ramargs['write_memory'] = write_memory
 self.section_classes[('ram',0)][1] = ramargs
@@ -579,6 +583,7 @@ def default(self, o):
 parser.add_argument("-m", "--memory", help='dump RAM contents as well', 
action='store_true')
 parser.add_argument("-d", "--dump", help='what to dump ("state" or "desc")', 
default='state')
 parser.add_argument("-x", "--extract", help='extract contents into individual 
files', action='store_true')
+parser.add_argument("--ram-only", help='parse migration dump containing only 
RAM', action='store_true')
 args = parser.parse_args()
 
 jsonenc = JSONEncoder(indent=4, separators=(',', ': '))
@@ -586,14 +591,14 @@ def default(self, o):
 if args.extract:
 dump = MigrationDump(args.file)
 
-dump.read(desc_only = True)
+dump.read(desc_only = True, ram_only = args.ram_only)
 print("desc.json")
 f = open("desc.json", "w")
 f.truncate()
 f.write(jsonenc.encode(dump.vmsd_desc))
 f.close()
 
-dump.read(write_memory = True)
+dump.read(write_memory = True, ram_only = args.ram_only)
 dict = dump.getDict()
 print("state.json")
 f = open("state.json", "w")
@@ -602,12 +607,12 @@ def default(self, o):
 f.close()
 elif args.dump == "state":
 dump = MigrationDump(args.file)
-dump.read(dump_memory = args.memory)
+dump.read(dump_memory = args.memory, ram_only = args.ram_only)
 dict = dump.getDict()
 print(jsonenc.encode(dict))
 elif args.dump == "desc":
 dump = MigrationDump(args.file)
-dump.read(desc_only = True)
+dump.read(desc_only = True, ram_only = args.ram_only)
 print(jsonenc.encode(dump.vmsd_desc))
 else:
 raise Exception("Please specify either -x, -d state or -d desc")
-- 
2.27.0




[PATCH 4/6] migration: Add ram-only capability

2021-12-24 Thread Nikita Lapshin
If this capability is enabled migration stream
will have RAM section only.

Signed-off-by: Nikita Lapshin 
---
 migration/migration.c | 20 +++-
 migration/migration.h |  1 +
 migration/savevm.c| 11 ++-
 qapi/migration.json   |  7 +--
 4 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 006447d00a..4d7ef9d8dc 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1257,6 +1257,14 @@ static bool migrate_caps_check(bool *cap_list,
 return false;
 }
 
+if (cap_list[MIGRATION_CAPABILITY_NO_RAM] &&
+cap_list[MIGRATION_CAPABILITY_RAM_ONLY]) {
+error_setg(errp, "ram-only and no-ram aren't "
+ "compatible with each other");
+
+return false;
+}
+
 return true;
 }
 
@@ -2619,6 +2627,15 @@ bool migrate_no_ram(void)
 return s->enabled_capabilities[MIGRATION_CAPABILITY_NO_RAM];
 }
 
+bool migrate_ram_only(void)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+return s->enabled_capabilities[MIGRATION_CAPABILITY_RAM_ONLY];
+}
+
 /* migration thread support */
 /*
  * Something bad happened to the RP stream, mark an error
@@ -3973,7 +3990,8 @@ static void *bg_migration_thread(void *opaque)
  * save their state to channel-buffer along with devices.
  */
 cpu_synchronize_all_states();
-if (qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) {
+if (!migrate_ram_only() &&
+qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) {
 goto fail;
 }
 /*
diff --git a/migration/migration.h b/migration/migration.h
index 43f7bf8eba..d5a077e00d 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -359,6 +359,7 @@ bool migrate_use_events(void);
 bool migrate_postcopy_blocktime(void);
 bool migrate_background_snapshot(void);
 bool migrate_no_ram(void);
+bool migrate_ram_only(void);
 
 /* Sending on the return path - generic and then for each message type */
 void migrate_send_rp_shut(MigrationIncomingState *mis,
diff --git a/migration/savevm.c b/migration/savevm.c
index ba644083ab..e41ca76000 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -249,6 +249,7 @@ typedef struct SaveStateEntry {
 void *opaque;
 CompatEntry *compat;
 int is_iterable;
+bool is_ram;
 } SaveStateEntry;
 
 typedef struct SaveState {
@@ -802,6 +803,10 @@ int register_savevm_live(const char *idstr,
 se->is_iterable = 1;
 }
 
+if (!strcmp("ram", idstr)) {
+se->is_ram = true;
+}
+
 pstrcat(se->idstr, sizeof(se->idstr), idstr);
 
 if (instance_id == VMSTATE_INSTANCE_ID_ANY) {
@@ -949,6 +954,10 @@ static bool should_skip(SaveStateEntry *se)
 return true;
 }
 
+if (migrate_ram_only() && !se->is_ram) {
+return true;
+}
+
 return false;
 }
 
@@ -1486,7 +1495,7 @@ int qemu_savevm_state_complete_precopy(QEMUFile *f, bool 
iterable_only,
 }
 }
 
-if (iterable_only) {
+if (iterable_only || migrate_ram_only()) {
 goto flush;
 }
 
diff --git a/qapi/migration.json b/qapi/migration.json
index d53956852c..626fc59d14 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -454,6 +454,8 @@
 #
 # @no-ram: If enabled, migration stream won't contain any ram in it. (since 
7.0)
 #
+# @ram-only: If enabled, only RAM sections will be sent. (since 7.0)
+#
 # Features:
 # @unstable: Members @x-colo and @x-ignore-shared are experimental.
 #
@@ -467,7 +469,7 @@
'block', 'return-path', 'pause-before-switchover', 'multifd',
'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate',
{ 'name': 'x-ignore-shared', 'features': [ 'unstable' ] },
-   'validate-uuid', 'background-snapshot', 'no-ram'] }
+   'validate-uuid', 'background-snapshot', 'no-ram', 'ram-only'] }
 ##
 # @MigrationCapabilityStatus:
 #
@@ -521,7 +523,8 @@
 #   {"state": true, "capability": "events"},
 #   {"state": false, "capability": "postcopy-ram"},
 #   {"state": false, "capability": "x-colo"},
-#   {"state": false, "capability": "no-ram"}
+#   {"state": false, "capability": "no-ram"},
+#   {"state": false, "capability": "ram-only"}
 #]}
 #
 ##
-- 
2.27.0




[PATCH 6/6] migration: Test for ram capabilities

2021-12-24 Thread Nikita Lapshin
Use scripts/analyze-migration.py to split migration stream into
sections and analyze its output.

Signed-off-by: Nikita Lapshin 
---
 .../tests/migrate-ram-capabilities-test   | 96 +++
 .../tests/migrate-ram-capabilities-test.out   |  5 +
 2 files changed, 101 insertions(+)
 create mode 100755 tests/qemu-iotests/tests/migrate-ram-capabilities-test
 create mode 100644 tests/qemu-iotests/tests/migrate-ram-capabilities-test.out

diff --git a/tests/qemu-iotests/tests/migrate-ram-capabilities-test 
b/tests/qemu-iotests/tests/migrate-ram-capabilities-test
new file mode 100755
index 00..917f888340
--- /dev/null
+++ b/tests/qemu-iotests/tests/migrate-ram-capabilities-test
@@ -0,0 +1,96 @@
+#!/usr/bin/env python3
+# group: rw migration
+#
+# Tests for 'no-ram' and 'ram-only' capabilities
+#
+# Copyright (c) 2021 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import os
+import json
+import subprocess
+import iotests
+
+img = os.path.join(iotests.test_dir, 'disk.img')
+
+class TestRamCapabilities(iotests.QMPTestCase):
+def setUp(self):
+iotests.qemu_img('create', '-f', iotests.imgfmt, img, '10M')
+self.vm = iotests.VM()
+self.vm.launch()
+self.vm.qmp('migrate-set-capabilities', capabilities=[
+{
+'capability': 'events',
+'state': True
+}
+])
+
+def tearDown(self):
+self.vm.shutdown()
+os.remove(img)
+
+def check_ram_only(self, output):
+str_json = output.decode()
+json_obj = json.loads(str_json)
+
+success = False
+for key in json_obj:
+self.assertTrue("ram" in key)
+success = True
+self.assertTrue(success)
+
+def run_migration(self, capability, tmp_stream):
+self.vm.qmp('migrate-set-capabilities', capabilities=[
+{
+'capability': capability,
+'state': True
+}
+])
+
+self.vm.qmp('migrate', uri='exec:cat>' + tmp_stream)
+
+while True:
+event = self.vm.event_wait('MIGRATION')
+
+if event['data']['status'] == 'completed':
+break
+
+
+def test_no_ram(self):
+with iotests.FilePath('tmp_stream') as tmp_stream:
+self.run_migration('no-ram', tmp_stream)
+output = subprocess.run(
+['../../../scripts/analyze-migration.py', '-f', tmp_stream],
+stdout=subprocess.PIPE,
+stderr=subprocess.STDOUT,
+check=False).stdout
+
+self.assertFalse('ram' in output.decode())
+
+def test_ram_only(self):
+with iotests.FilePath('tmp_stream') as tmp_stream:
+self.run_migration('ram-only', tmp_stream)
+output = subprocess.run(
+['../../../scripts/analyze-migration.py', '-f', tmp_stream,
+'--ram-only'],
+stdout=subprocess.PIPE,
+stderr=subprocess.STDOUT,
+check=False).stdout
+
+self.check_ram_only(output)
+
+if __name__ == '__main__':
+iotests.main(supported_protocols=['file'])
diff --git a/tests/qemu-iotests/tests/migrate-ram-capabilities-test.out 
b/tests/qemu-iotests/tests/migrate-ram-capabilities-test.out
new file mode 100644
index 00..fbc63e62f8
--- /dev/null
+++ b/tests/qemu-iotests/tests/migrate-ram-capabilities-test.out
@@ -0,0 +1,5 @@
+..
+--
+Ran 2 tests
+
+OK
-- 
2.27.0




Re: [PATCH for-7.0] hw: Add compat machines for 7.0

2021-12-24 Thread Andrew Jones
On Fri, Dec 17, 2021 at 03:29:13PM +0100, Cornelia Huck wrote:
> On Fri, Dec 17 2021, Daniel P. Berrangé  wrote:
> 
> > On Fri, Dec 17, 2021 at 09:13:55AM +0100, Cornelia Huck wrote:
> >> On Wed, Dec 08 2021, Cornelia Huck  wrote:
> >> 
> >> > Add 7.0 machine types for arm/i440fx/q35/s390x/spapr.
> >> >
> >> > Signed-off-by: Cornelia Huck 
> >> > ---
> >> >  hw/arm/virt.c  |  9 -
> >> >  hw/core/machine.c  |  3 +++
> >> >  hw/i386/pc.c   |  3 +++
> >> >  hw/i386/pc_piix.c  | 14 +-
> >> >  hw/i386/pc_q35.c   | 13 -
> >> >  hw/ppc/spapr.c | 15 +--
> >> >  hw/s390x/s390-virtio-ccw.c | 14 +-
> >> >  include/hw/boards.h|  3 +++
> >> >  include/hw/i386/pc.h   |  3 +++
> >> >  9 files changed, 71 insertions(+), 6 deletions(-)
> >> >
> >> 
> >
> >
> >> > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> >> > index 223dd3e05d15..b03026bf0648 100644
> >> > --- a/hw/i386/pc_piix.c
> >> > +++ b/hw/i386/pc_piix.c
> >> > @@ -413,7 +413,7 @@ static void pc_i440fx_machine_options(MachineClass 
> >> > *m)
> >> >  machine_class_allow_dynamic_sysbus_dev(m, TYPE_VMBUS_BRIDGE);
> >> >  }
> >> >  
> >> > -static void pc_i440fx_6_2_machine_options(MachineClass *m)
> >> > +static void pc_i440fx_7_0_machine_options(MachineClass *m)
> >> >  {
> >> >  PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
> >> >  pc_i440fx_machine_options(m);
> >> > @@ -422,6 +422,18 @@ static void 
> >> > pc_i440fx_6_2_machine_options(MachineClass *m)
> >> >  pcmc->default_cpu_version = 1;
> >> >  }
> >> >  
> >> > +DEFINE_I440FX_MACHINE(v7_0, "pc-i440fx-7.0", NULL,
> >> > +  pc_i440fx_7_0_machine_options);
> >> > +
> >> > +static void pc_i440fx_6_2_machine_options(MachineClass *m)
> >> > +{
> >> > +pc_i440fx_machine_options(m);
> >
> > Needs to be pc_i440fx_7_0_machine_options()
> >
> >> > +m->alias = NULL;
> >> > +m->is_default = false;
> >> > +compat_props_add(m->compat_props, hw_compat_6_2, hw_compat_6_2_len);
> >> > +compat_props_add(m->compat_props, pc_compat_6_2, pc_compat_6_2_len);
> >> > +}
> >> > +
> >> >  DEFINE_I440FX_MACHINE(v6_2, "pc-i440fx-6.2", NULL,
> >> >pc_i440fx_6_2_machine_options);
> >> >  
> >> > diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> >> > index e1e100316d93..6b66eb16bb64 100644
> >> > --- a/hw/i386/pc_q35.c
> >> > +++ b/hw/i386/pc_q35.c
> >> > @@ -360,7 +360,7 @@ static void pc_q35_machine_options(MachineClass *m)
> >> >  m->max_cpus = 288;
> >> >  }
> >> >  
> >> > -static void pc_q35_6_2_machine_options(MachineClass *m)
> >> > +static void pc_q35_7_0_machine_options(MachineClass *m)
> >> >  {
> >> >  PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
> >> >  pc_q35_machine_options(m);
> >> > @@ -368,6 +368,17 @@ static void pc_q35_6_2_machine_options(MachineClass 
> >> > *m)
> >> >  pcmc->default_cpu_version = 1;
> >> >  }
> >> >  
> >> > +DEFINE_Q35_MACHINE(v7_0, "pc-q35-7.0", NULL,
> >> > +   pc_q35_7_0_machine_options);
> >> > +
> >> > +static void pc_q35_6_2_machine_options(MachineClass *m)
> >> > +{
> >> > +pc_q35_machine_options(m);
> >
> > Needs to be pc_q35_7_0_machine_options()
> >
> >> > +m->alias = NULL;
> >> > +compat_props_add(m->compat_props, hw_compat_6_2, hw_compat_6_2_len);
> >> > +compat_props_add(m->compat_props, pc_compat_6_2, pc_compat_6_2_len);
> >> > +}
> >> > +
> >> >  DEFINE_Q35_MACHINE(v6_2, "pc-q35-6.2", NULL,
> >> > pc_q35_6_2_machine_options);
> >> >  
> >> 
> >> So, this apparently causes some problems with one of the avocado tests:
> >> 
> >> 162-tests/avocado/x86_cpu_model_versions.py:X86CPUModelAliases.test_4_1_alias
> >>  -> AssertionError: None != 'Cascadelake-Server-v1' : Cascadelake-Server 
> >> must be an alias of Cascadelake-Server-v1
> >> 
> >> (full output at https://gitlab.com/qemu-project/qemu/-/jobs/1893456217)
> >> 
> >> I have looked at the patch again and do not see what might be wrong (has
> >> something changed with the cpu model versioning recently?)
> >> 
> >> Does anyone else (especially the x86 folks) have an idea?
> >
> > AFAICT, just a typo in chaining up the methods I've pointed out inline.
> 
> Duh, indeed. Thanks for spotting this.
> 
> Will send a v2.
> 
>

/me wonders if this patch could be generated with a "simple" script in
order to avoid typos and/or forgetting lines. It seems like all
architectures have pretty consistent patterns.

Thanks,
drew




Re: [PATCH v2] hw: Add compat machines for 7.0

2021-12-24 Thread Andrew Jones
On Fri, Dec 17, 2021 at 03:39:48PM +0100, Cornelia Huck wrote:
> Add 7.0 machine types for arm/i440fx/q35/s390x/spapr.
> 
> Acked-by: Cédric Le Goater 
> Reviewed-by: Juan Quintela 
> Signed-off-by: Cornelia Huck 
> ---
> 
> v1->v2: fix typo in i386 function chaining (thanks danpb!)
> 
> ---
>  hw/arm/virt.c  |  9 -
>  hw/core/machine.c  |  3 +++
>  hw/i386/pc.c   |  3 +++
>  hw/i386/pc_piix.c  | 14 +-
>  hw/i386/pc_q35.c   | 13 -
>  hw/ppc/spapr.c | 15 +--
>  hw/s390x/s390-virtio-ccw.c | 14 +-
>  include/hw/boards.h|  3 +++
>  include/hw/i386/pc.h   |  3 +++
>  9 files changed, 71 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 6bce595aba20..4593fea1ce8a 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -2856,10 +2856,17 @@ static void machvirt_machine_init(void)
>  }
>  type_init(machvirt_machine_init);
>  
> +static void virt_machine_7_0_options(MachineClass *mc)
> +{
> +}
> +DEFINE_VIRT_MACHINE_AS_LATEST(7, 0)
> +
>  static void virt_machine_6_2_options(MachineClass *mc)
>  {
> +virt_machine_7_0_options(mc);
> +compat_props_add(mc->compat_props, hw_compat_6_2, hw_compat_6_2_len);
>  }
> -DEFINE_VIRT_MACHINE_AS_LATEST(6, 2)
> +DEFINE_VIRT_MACHINE(6, 2)
>  
>  static void virt_machine_6_1_options(MachineClass *mc)
>  {

For the arm parts

Reviewed-by: Andrew Jones 




Re: [PATCH] net/filter: Optimize filter_send to coroutine

2021-12-24 Thread Rao, Lei




On 12/24/2021 6:07 PM, lizhij...@fujitsu.com wrote:



On 24/12/2021 10:37, Rao, Lei wrote:

This patch is to improve the logic of QEMU main thread sleep code in
qemu_chr_write_buffer() where it can be blocked and can't run other
coroutines during COLO IO stress test.

Our approach is to put filter_send() in a coroutine. In this way,
filter_send() will call qemu_coroutine_yield() in qemu_co_sleep_ns(),
so that it can be scheduled out and QEMU main thread has opportunity to
run other tasks.

Signed-off-by: Lei Rao 
Signed-off-by: Zhang Chen 
---
   net/filter-mirror.c | 67 -
   1 file changed, 54 insertions(+), 13 deletions(-)

diff --git a/net/filter-mirror.c b/net/filter-mirror.c
index f20240cc9f..1e9f8b6216 100644
--- a/net/filter-mirror.c
+++ b/net/filter-mirror.c
@@ -20,6 +20,7 @@
   #include "chardev/char-fe.h"
   #include "qemu/iov.h"
   #include "qemu/sockets.h"
+#include "block/aio-wait.h"
   
   #define TYPE_FILTER_MIRROR "filter-mirror"

   typedef struct MirrorState MirrorState;
@@ -42,20 +43,21 @@ struct MirrorState {
   bool vnet_hdr;
   };
   
-static int filter_send(MirrorState *s,

-   const struct iovec *iov,
-   int iovcnt)
+typedef struct FilterSendCo {
+MirrorState *s;
+char *buf;
+ssize_t size;
+bool done;
+int ret;
+} FilterSendCo;
+
+static int _filter_send(MirrorState *s,
+   char *buf,
+   ssize_t size)
   {
   NetFilterState *nf = NETFILTER(s);
   int ret = 0;
-ssize_t size = 0;
   uint32_t len = 0;
-char *buf;
-
-size = iov_size(iov, iovcnt);
-if (!size) {
-return 0;
-}
   
   len = htonl(size);

   ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
@@ -80,10 +82,7 @@ static int filter_send(MirrorState *s,
   }
   }
   
-buf = g_malloc(size);

-iov_to_buf(iov, iovcnt, 0, buf, size);
   ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
-g_free(buf);
   if (ret != size) {
   goto err;
   }
@@ -94,6 +93,48 @@ err:
   return ret < 0 ? ret : -EIO;
   }
   
+static void coroutine_fn filter_send_co(void *opaque)

+{
+FilterSendCo *data = opaque;
+
+data->ret = _filter_send(data->s, data->buf, data->size);
+data->done = true;
+g_free(data->buf);
+aio_wait_kick();
+}
+
+static int filter_send(MirrorState *s,
+   const struct iovec *iov,
+   int iovcnt)
+{
+ssize_t size = iov_size(iov, iovcnt);
+char *buf = NULL;
+
+if (!size) {
+return 0;
+}
+
+buf = g_malloc(size);
+iov_to_buf(iov, iovcnt, 0, buf, size);
+
+FilterSendCo data = {
+.s = s,
+.size = size,
+.buf = buf,
+.ret = 0,
+};
+
+Coroutine *co = qemu_coroutine_create(filter_send_co, &data);


BTW, does qemu/old gcc complaint such coding style ?

int a;
a = foo()
int b = a;


There are a lot of codes of this style in QEMU.
It is written that we need at least GCC v7.4 to compile QEMU in the configure 
file.
So, I think it is no problem.






+qemu_coroutine_enter(co);
+
+while (!data.done) {
+aio_poll(qemu_get_aio_context(), true);
+}
+
+return data.ret;
+

redundant  newline


will be changed in V2.

Thanks,
Lei



Otherwise,
Reviewed-by: Li Zhijian 




+}
+
   static void redirector_to_filter(NetFilterState *nf,
const uint8_t *buf,
int len)




Re: [PATCH 3/8] ppc/ppc405: Activate MMU logs

2021-12-24 Thread BALATON Zoltan

On Thu, 23 Dec 2021, Richard Henderson wrote:

On 12/21/21 10:40 PM, Cédric Le Goater wrote:

There is no need to deactivate MMU logging at compile time.

Signed-off-by: Cédric Le Goater
---
  target/ppc/mmu_common.c | 4 ++--
  target/ppc/mmu_helper.c | 2 +-
  2 files changed, 3 insertions(+), 3 deletions(-)


There's also no need to use #defines.
You should just remove these and the ifdefs.


I thought the same unless these are disabled by default for performance 
reasons. MMU is already quite slow, would this make it even slower?


Regards,
BALATON Zoltan

Re: [RFC PATCH 2/2] accel/tcg: replace phys_pc with asid in TB htable key

2021-12-24 Thread Vasilev Oleg via
On 12/23/2021 7:31 PM, Richard Henderson wrote:
> On 12/22/21 8:50 AM, Oleg Vasilev wrote:
>> From: Oleg Vasilev 
>>
>> Using a physical pc requires to translate address every time next block
>> needs to be found and executed. This also contaminates TLB with code-related
>> records.
>>
>> Instead, I suggest we introduce an architecture-specific address space
>> identifier, and use it to distinguish between different AS's
>> translation blocks.
> 
> Why do you believe that asid is sufficient here?  You're not invalidating any 
> more TBs 
> that I can see.  What happens when the kernel re-uses an asid?

Hi,

Sorry, I had some comments for the patch, but forgot to put it in.

So, I think I interpret the term "asid" in some other sense, namely, an
identifier, which is constant during whole lifespan of an address space.
Same as PID in that sense. Do you think this is a viable approach?

If we assume translation table wouldn't change during process life,
after the death of the process, all it address space would be anyway
unmapped and corresponding translation blocks would be invalidated.


> 
> I believe this patch to be fundamentally flawed.

Maybe it is, I just wanted to get feedback from you guys. Do you think
maybe exists some other way, which would not require translating va->pa
every time to look up next block?

More context is in:

 Subject: Suggestions for TCG performance improvements
 Date: Thu, 2 Dec 2021 09:47:13 +
 Message-ID: 

> 
> All that said,
> 
>> +/* Returns the identifier for a current address space. */
>> +static uint64_t arm_get_asid(CPUState *cs)
>> +{
>> +ARMCPU *cpu = ARM_CPU(cs);
>> +CPUARMState *env = &cpu->env;
>> +ARMMMUIdx mmu_idx = arm_mmu_idx(env);
>> +uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr;
>> +
>> +#define TCR_A1 (1U << 22)
>> +return regime_ttbr(env, mmu_idx, (tcr&TCR_A1)>0);
>> +}
> 
> Why are you returning the entire ttbr, and not the asid in the top 16 bits?

Actually, for my particular case I seem to need to return the lowest 40
bits, which is actual base for TT.

Thanks,
Oleg

> 
> 
> r~
> 




Re: [PATCH] tests/qtest/virtio-net-failover: Use g_file_open_tmp() to create temporary file

2021-12-24 Thread Philippe Mathieu-Daudé
On 12/22/21 09:36, Thomas Huth wrote:
> g_test_rand_int() must not be called before g_test_init(), otherwise
> the glib will show a "g_rand_int: assertion 'rand != NULL' failed"
> message in the log. So we could change the order here, but actually,
> it's safer to use g_file_open_tmp() anyway, so let's use that function
> now instead.
> 
> Reported-by: Philippe Mathieu-Daudé 
> Suggested-by: Richard Henderson 
> Signed-off-by: Thomas Huth 
> ---
>  tests/qtest/virtio-net-failover.c | 8 +---
>  1 file changed, 5 insertions(+), 3 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé 




Re: [PULL 10/28] export/fuse: Pass default_permissions for mount

2021-12-24 Thread Vladimir Sementsov-Ogievskiy

09.07.2021 15:50, Kevin Wolf wrote:

From: Max Reitz 

We do not do any permission checks in fuse_open(), so let the kernel do
them.  We already let fuse_getattr() report the proper UNIX permissions,
so this should work the way we want.

This causes a change in 308's reference output, because now opening a
non-writable export with O_RDWR fails already, instead of only actually
attempting to write to it.  (That is an improvement.)

Signed-off-by: Max Reitz 
Message-Id: <20210625142317.271673-2-mre...@redhat.com>
Signed-off-by: Kevin Wolf 
---
  block/export/fuse.c| 8 ++--
  tests/qemu-iotests/308 | 3 ++-
  tests/qemu-iotests/308.out | 2 +-
  3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/block/export/fuse.c b/block/export/fuse.c
index 38f74c94da..d0b88e8f80 100644
--- a/block/export/fuse.c
+++ b/block/export/fuse.c
@@ -153,8 +153,12 @@ static int setup_fuse_export(FuseExport *exp, const char 
*mountpoint,
  struct fuse_args fuse_args;
  int ret;
  
-/* Needs to match what fuse_init() sets.  Only max_read must be supplied. */

-mount_opts = g_strdup_printf("max_read=%zu", FUSE_MAX_BOUNCE_BYTES);
+/*
+ * max_read needs to match what fuse_init() sets.
+ * max_write need not be supplied.
+ */
+mount_opts = g_strdup_printf("max_read=%zu,default_permissions",
+ FUSE_MAX_BOUNCE_BYTES);
  
  fuse_argv[0] = ""; /* Dummy program name */

  fuse_argv[1] = "-o";
diff --git a/tests/qemu-iotests/308 b/tests/qemu-iotests/308
index f122065d0f..11c28a75f2 100755
--- a/tests/qemu-iotests/308
+++ b/tests/qemu-iotests/308
@@ -215,7 +215,8 @@ echo '=== Writable export ==='
  fuse_export_add 'export-mp' "'mountpoint': '$EXT_MP', 'writable': true"
  
  # Check that writing to the read-only export fails

-$QEMU_IO -f raw -c 'write -P 42 1M 64k' "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -f raw -c 'write -P 42 1M 64k' "$TEST_IMG" 2>&1 \
+| _filter_qemu_io | _filter_testdir | _filter_imgfmt
  
  # But here it should work

  $QEMU_IO -f raw -c 'write -P 42 1M 64k' "$EXT_MP" | _filter_qemu_io
diff --git a/tests/qemu-iotests/308.out b/tests/qemu-iotests/308.out
index 466e7e0267..0e9420645f 100644
--- a/tests/qemu-iotests/308.out
+++ b/tests/qemu-iotests/308.out
@@ -91,7 +91,7 @@ virtual size: 0 B (0 bytes)
'mountpoint': 'TEST_DIR/t.IMGFMT.fuse', 'writable': true
} }
  {"return": {}}
-write failed: Permission denied
+qemu-io: can't open device TEST_DIR/t.IMGFMT: Could not open 
'TEST_DIR/t.IMGFMT': Permission denied
  wrote 65536/65536 bytes at offset 1048576
  64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
  wrote 65536/65536 bytes at offset 1048576



Hi!

308 test fails for me now:

308   fail   [16:02:49] [16:02:53]   3.5s   (last: 3.7s)  output mismatch 
(see 308.out.bad)
--- /work/src/qemu/master/tests/qemu-iotests/308.out
+++ 308.out.bad
@@ -91,7 +91,7 @@
   'mountpoint': 'TEST_DIR/t.IMGFMT.fuse', 'writable': true
   } }
 {"return": {}}
-qemu-io: can't open device TEST_DIR/t.IMGFMT: Could not open 
'TEST_DIR/t.IMGFMT': Permission denied
+write failed: Permission denied
 wrote 65536/65536 bytes at offset 1048576
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 65536/65536 bytes at offset 1048576
Failures: 308
Failed 1 of 1 iotests


And bisect points to exactly this commit.

Should it somehow depend on environment?


--
Best regards,
Vladimir



[PATCH v2 3/5] block/stream: add own blk

2021-12-24 Thread Vladimir Sementsov-Ogievskiy
block-stream is the only block-job, that reasonably use BlockJob.blk.
We are going to drop BlockJob.blk soon. So, let block-stream have own
blk.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/stream.c | 24 ++--
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index e45113aed6..7c6b173ddd 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -33,6 +33,7 @@ enum {
 
 typedef struct StreamBlockJob {
 BlockJob common;
+BlockBackend *blk;
 BlockDriverState *base_overlay; /* COW overlay (stream from this) */
 BlockDriverState *above_base;   /* Node directly above the base */
 BlockDriverState *cor_filter_bs;
@@ -88,17 +89,18 @@ static int stream_prepare(Job *job)
 static void stream_clean(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
-BlockJob *bjob = &s->common;
 
 if (s->cor_filter_bs) {
 bdrv_cor_filter_drop(s->cor_filter_bs);
 s->cor_filter_bs = NULL;
 }
 
+blk_unref(s->blk);
+s->blk = NULL;
+
 /* Reopen the image back in read-only mode if necessary */
 if (s->bs_read_only) {
 /* Give up write permissions before making it read-only */
-blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
 bdrv_reopen_set_read_only(s->target_bs, true, NULL);
 }
 
@@ -108,7 +110,6 @@ static void stream_clean(Job *job)
 static int coroutine_fn stream_run(Job *job, Error **errp)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
-BlockBackend *blk = s->common.blk;
 BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 int64_t len;
 int64_t offset = 0;
@@ -159,7 +160,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 }
 trace_stream_one_iteration(s, offset, n, ret);
 if (copy) {
-ret = stream_populate(blk, offset, n);
+ret = stream_populate(s->blk, offset, n);
 }
 if (ret < 0) {
 BlockErrorAction action =
@@ -294,13 +295,24 @@ void stream_start(const char *job_id, BlockDriverState 
*bs,
 }
 
 s = block_job_create(job_id, &stream_job_driver, NULL, cor_filter_bs,
- BLK_PERM_CONSISTENT_READ,
- basic_flags | BLK_PERM_WRITE,
+ 0, BLK_PERM_ALL,
  speed, creation_flags, NULL, NULL, errp);
 if (!s) {
 goto fail;
 }
 
+s->blk = blk_new_with_bs(cor_filter_bs, BLK_PERM_CONSISTENT_READ,
+ basic_flags | BLK_PERM_WRITE, errp);
+if (!s->blk) {
+goto fail;
+}
+/*
+ * Disable request queuing in the BlockBackend to avoid deadlocks on drain:
+ * The job reports that it's busy until it reaches a pause point.
+ */
+blk_set_disable_request_queuing(s->blk, true);
+blk_set_allow_aio_context_change(s->blk, true);
+
 /*
  * Prevent concurrent jobs trying to modify the graph structure here, we
  * already have our own plans. Also don't allow resize as the image size is
-- 
2.31.1




[PATCH v2 2/5] test-blockjob-txn: don't abuse job->blk

2021-12-24 Thread Vladimir Sementsov-Ogievskiy
Here we use job->blk to drop our own reference in job cleanup. Let's do
simpler: drop our reference immediately after job creation.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 tests/unit/test-blockjob-txn.c | 10 +-
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/tests/unit/test-blockjob-txn.c b/tests/unit/test-blockjob-txn.c
index 8bd13b9949..c69028b450 100644
--- a/tests/unit/test-blockjob-txn.c
+++ b/tests/unit/test-blockjob-txn.c
@@ -25,14 +25,6 @@ typedef struct {
 int *result;
 } TestBlockJob;
 
-static void test_block_job_clean(Job *job)
-{
-BlockJob *bjob = container_of(job, BlockJob, job);
-BlockDriverState *bs = blk_bs(bjob->blk);
-
-bdrv_unref(bs);
-}
-
 static int coroutine_fn test_block_job_run(Job *job, Error **errp)
 {
 TestBlockJob *s = container_of(job, TestBlockJob, common.job);
@@ -73,7 +65,6 @@ static const BlockJobDriver test_block_job_driver = {
 .free  = block_job_free,
 .user_resume   = block_job_user_resume,
 .run   = test_block_job_run,
-.clean = test_block_job_clean,
 },
 };
 
@@ -105,6 +96,7 @@ static BlockJob *test_block_job_start(unsigned int 
iterations,
 s = block_job_create(job_id, &test_block_job_driver, txn, bs,
  0, BLK_PERM_ALL, 0, JOB_DEFAULT,
  test_block_job_cb, data, &error_abort);
+bdrv_unref(bs); /* referenced by job now */
 s->iterations = iterations;
 s->use_timer = use_timer;
 s->rc = rc;
-- 
2.31.1




[PATCH v2 1/5] blockjob: implement and use block_job_get_aio_context

2021-12-24 Thread Vladimir Sementsov-Ogievskiy
We are going to drop BlockJob.blk. So let's retrieve block job context
from underlying job instead of main node.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/block/blockjob.h | 7 +++
 blockdev.c   | 6 +++---
 blockjob.c   | 5 +
 qemu-img.c   | 2 +-
 4 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index d200f33c10..3b84805140 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -173,4 +173,11 @@ bool block_job_is_internal(BlockJob *job);
  */
 const BlockJobDriver *block_job_driver(BlockJob *job);
 
+/*
+ * block_job_get_aio_context:
+ *
+ * Returns aio context associated with a block job.
+ */
+AioContext *block_job_get_aio_context(BlockJob *job);
+
 #endif
diff --git a/blockdev.c b/blockdev.c
index 0eb2823b1b..b5ff9b854e 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3315,7 +3315,7 @@ static BlockJob *find_block_job(const char *id, 
AioContext **aio_context,
 return NULL;
 }
 
-*aio_context = blk_get_aio_context(job->blk);
+*aio_context = block_job_get_aio_context(job);
 aio_context_acquire(*aio_context);
 
 return job;
@@ -3420,7 +3420,7 @@ void qmp_block_job_finalize(const char *id, Error **errp)
  * automatically acquires the new one), so make sure we release the correct
  * one.
  */
-aio_context = blk_get_aio_context(job->blk);
+aio_context = block_job_get_aio_context(job);
 job_unref(&job->job);
 aio_context_release(aio_context);
 }
@@ -3711,7 +3711,7 @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp)
 if (block_job_is_internal(job)) {
 continue;
 }
-aio_context = blk_get_aio_context(job->blk);
+aio_context = block_job_get_aio_context(job);
 aio_context_acquire(aio_context);
 value = block_job_query(job, errp);
 aio_context_release(aio_context);
diff --git a/blockjob.c b/blockjob.c
index 4bad1408cb..70bc3105a6 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -547,3 +547,8 @@ BlockErrorAction block_job_error_action(BlockJob *job, 
BlockdevOnError on_err,
 }
 return action;
 }
+
+AioContext *block_job_get_aio_context(BlockJob *job)
+{
+return job->job.aio_context;
+}
diff --git a/qemu-img.c b/qemu-img.c
index f036a1d428..21ba1e6800 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -902,7 +902,7 @@ static void common_block_job_cb(void *opaque, int ret)
 static void run_block_job(BlockJob *job, Error **errp)
 {
 uint64_t progress_current, progress_total;
-AioContext *aio_context = blk_get_aio_context(job->blk);
+AioContext *aio_context = block_job_get_aio_context(job);
 int ret = 0;
 
 aio_context_acquire(aio_context);
-- 
2.31.1




[PATCH v2 4/5] test-bdrv-drain: don't use BlockJob.blk

2021-12-24 Thread Vladimir Sementsov-Ogievskiy
We are going to drop BlockJob.blk in further commit. For tests it's
enough to simply pass bs pointer.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 tests/unit/test-bdrv-drain.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 2d3c17e566..36be84ae55 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -772,6 +772,7 @@ static void test_iothread_drain_subtree(void)
 
 typedef struct TestBlockJob {
 BlockJob common;
+BlockDriverState *bs;
 int run_ret;
 int prepare_ret;
 bool running;
@@ -783,7 +784,7 @@ static int test_job_prepare(Job *job)
 TestBlockJob *s = container_of(job, TestBlockJob, common.job);
 
 /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
-blk_flush(s->common.blk);
+bdrv_flush(s->bs);
 return s->prepare_ret;
 }
 
@@ -792,7 +793,7 @@ static void test_job_commit(Job *job)
 TestBlockJob *s = container_of(job, TestBlockJob, common.job);
 
 /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
-blk_flush(s->common.blk);
+bdrv_flush(s->bs);
 }
 
 static void test_job_abort(Job *job)
@@ -800,7 +801,7 @@ static void test_job_abort(Job *job)
 TestBlockJob *s = container_of(job, TestBlockJob, common.job);
 
 /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
-blk_flush(s->common.blk);
+bdrv_flush(s->bs);
 }
 
 static int coroutine_fn test_job_run(Job *job, Error **errp)
@@ -915,6 +916,7 @@ static void test_blockjob_common_drain_node(enum drain_type 
drain_type,
 tjob = block_job_create("job0", &test_job_driver, NULL, src,
 0, BLK_PERM_ALL,
 0, 0, NULL, NULL, &error_abort);
+tjob->bs = src;
 job = &tjob->common;
 block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort);
 
@@ -1538,6 +1540,7 @@ typedef struct TestDropBackingBlockJob {
 bool should_complete;
 bool *did_complete;
 BlockDriverState *detach_also;
+BlockDriverState *bs;
 } TestDropBackingBlockJob;
 
 static int coroutine_fn test_drop_backing_job_run(Job *job, Error **errp)
@@ -1557,7 +1560,7 @@ static void test_drop_backing_job_commit(Job *job)
 TestDropBackingBlockJob *s =
 container_of(job, TestDropBackingBlockJob, common.job);
 
-bdrv_set_backing_hd(blk_bs(s->common.blk), NULL, &error_abort);
+bdrv_set_backing_hd(s->bs, NULL, &error_abort);
 bdrv_set_backing_hd(s->detach_also, NULL, &error_abort);
 
 *s->did_complete = true;
@@ -1657,6 +1660,7 @@ static void test_blockjob_commit_by_drained_end(void)
 job = block_job_create("job", &test_drop_backing_job_driver, NULL,
bs_parents[2], 0, BLK_PERM_ALL, 0, 0, NULL, NULL,
&error_abort);
+job->bs = bs_parents[2];
 
 job->detach_also = bs_parents[0];
 job->did_complete = &job_has_completed;
-- 
2.31.1




[PATCH v2 0/5] block-job: drop BlockJob.blk

2021-12-24 Thread Vladimir Sementsov-Ogievskiy
Hi all!

v2: rebase on master, fix iostest 283

Block jobs usually operate with several block nodes, and better to
handle them symmetrically, than use one from s->common.blk and one from
s->target (or something like this). Moreover, generic blockjob layer has
no use of BlockJob.blk. And more-moreover, most of block-jobs don't
really use this blk. Actually only block-stream use it.

I've started this thing (unbinding block-job and its main node) long
ago. First step was removing bs->job pointer in b23c580c946644b. Then
block_job_drain was dropped in bb0c94099382b5273.

Now let's finally drop job->blk pointer.

Vladimir Sementsov-Ogievskiy (5):
  blockjob: implement and use block_job_get_aio_context
  test-blockjob-txn: don't abuse job->blk
  block/stream: add own blk
  test-bdrv-drain: don't use BlockJob.blk
  blockjob: drop BlockJob.blk field

 include/block/blockjob.h   | 10 +++---
 block/mirror.c |  7 ---
 block/stream.c | 24 +--
 blockdev.c |  6 +++---
 blockjob.c | 36 --
 qemu-img.c |  2 +-
 tests/unit/test-bdrv-drain.c   | 12 
 tests/unit/test-blockjob-txn.c | 10 +-
 tests/qemu-iotests/141.out |  2 +-
 tests/qemu-iotests/283 |  3 ++-
 tests/qemu-iotests/283.out |  2 +-
 11 files changed, 59 insertions(+), 55 deletions(-)

-- 
2.31.1




[PATCH v2 5/5] blockjob: drop BlockJob.blk field

2021-12-24 Thread Vladimir Sementsov-Ogievskiy
It's unused now (except for permission handling)[*]. The only reasonable
user of it was block-stream job, recently updated to use own blk. And
other block jobs prefer to use own source node related objects.

So, the arguments of dropping the field are:

 - block jobs prefer not to use it
 - block jobs usually has more then one node to operate on, and better
   to operate symmetrically (for example has both source and target
   blk's in specific block-job state structure)

*: BlockJob.blk is used to keep some permissions. We simply move
permissions to block-job child created in block_job_create() together
with blk.

In mirror, we just should not care anymore about restoring state of
blk. Most probably this code could be dropped long ago, after dropping
bs->job pointer. Now it finally goes away together with BlockJob.blk
itself.

iotest 141 output is updated, as "bdrv_has_blk(bs)" check in
qmp_blockdev_del() doesn't fail (we don't have blk now). Still, new
error message looks even better.

In iotest 283 we need to add a job id, otherwise "Invalid job ID"
happens now earlier than permission check (as permissions moved from
blk to block-job node).

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/block/blockjob.h   |  3 ---
 block/mirror.c |  7 ---
 blockjob.c | 31 ---
 tests/qemu-iotests/141.out |  2 +-
 tests/qemu-iotests/283 |  3 ++-
 tests/qemu-iotests/283.out |  2 +-
 6 files changed, 16 insertions(+), 32 deletions(-)

diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 3b84805140..87fbb3985f 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -43,9 +43,6 @@ typedef struct BlockJob {
 /** Data belonging to the generic Job infrastructure */
 Job job;
 
-/** The block device on which the job is operating.  */
-BlockBackend *blk;
-
 /** Status that is published by the query-block-jobs QMP API */
 BlockDeviceIoStatus iostatus;
 
diff --git a/block/mirror.c b/block/mirror.c
index efec2c7674..959e3dfbd6 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -771,13 +771,6 @@ static int mirror_exit_common(Job *job)
 block_job_remove_all_bdrv(bjob);
 bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
 
-/* We just changed the BDS the job BB refers to (with either or both of the
- * bdrv_replace_node() calls), so switch the BB back so the cleanup does
- * the right thing. We don't need any permissions any more now. */
-blk_remove_bs(bjob->blk);
-blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
-blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort);
-
 bs_opaque->job = NULL;
 
 bdrv_drained_end(src);
diff --git a/blockjob.c b/blockjob.c
index 70bc3105a6..10815a89fe 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -86,7 +86,6 @@ void block_job_free(Job *job)
 BlockJob *bjob = container_of(job, BlockJob, job);
 
 block_job_remove_all_bdrv(bjob);
-blk_unref(bjob->blk);
 ratelimit_destroy(&bjob->limit);
 error_free(bjob->blocker);
 }
@@ -433,22 +432,16 @@ void *block_job_create(const char *job_id, const 
BlockJobDriver *driver,
uint64_t shared_perm, int64_t speed, int flags,
BlockCompletionFunc *cb, void *opaque, Error **errp)
 {
-BlockBackend *blk;
 BlockJob *job;
+int ret;
 
 if (job_id == NULL && !(flags & JOB_INTERNAL)) {
 job_id = bdrv_get_device_name(bs);
 }
 
-blk = blk_new_with_bs(bs, perm, shared_perm, errp);
-if (!blk) {
-return NULL;
-}
-
-job = job_create(job_id, &driver->job_driver, txn, 
blk_get_aio_context(blk),
+job = job_create(job_id, &driver->job_driver, txn, 
bdrv_get_aio_context(bs),
  flags, cb, opaque, errp);
 if (job == NULL) {
-blk_unref(blk);
 return NULL;
 }
 
@@ -458,8 +451,6 @@ void *block_job_create(const char *job_id, const 
BlockJobDriver *driver,
 
 ratelimit_init(&job->limit);
 
-job->blk = blk;
-
 job->finalize_cancelled_notifier.notify = block_job_event_cancelled;
 job->finalize_completed_notifier.notify = block_job_event_completed;
 job->pending_notifier.notify = block_job_event_pending;
@@ -476,21 +467,23 @@ void *block_job_create(const char *job_id, const 
BlockJobDriver *driver,
 
 error_setg(&job->blocker, "block device is in use by block job: %s",
job_type_str(&job->job));
-block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
+
+ret = block_job_add_bdrv(job, "main node", bs, perm, shared_perm, errp);
+if (ret < 0) {
+goto fail;
+}
 
 bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
 
-/* Disable request queuing in the BlockBackend to avoid deadlocks on drain:
- * The job reports that it's busy until it reaches a pause point. */
-blk_set_disable_request_queuing(blk, true);
-blk_set_allow_aio_context_chang

Re: [PATCH v2 1/6] migration: All this fields are unsigned

2021-12-24 Thread Juan Quintela
Peter Xu  wrote:
> On Tue, Dec 21, 2021 at 01:52:30PM +0100, Juan Quintela wrote:
>> So printing it as %d is wrong.  Notice that for the channel id, that
>> is an uint8_t, but I changed it anyways for consistency.
>
> Just curious: uint_8 can always correctly converted to a int, so the patch
> shouldn't have a functional change, right?

Yeap.  My understanding of C promotion rules is that everything is
promoted to int by default, if the size is smaller than positive part of
it, that is ok.

But once there, I reviewed all "%d", and if it was unsigned, I change it
to "%u".

>> Signed-off-by: Juan Quintela 
>> Reviewed-by: Philippe Mathieu-Daudé 
>
> Reviewed-by: Peter Xu 

Thanks, Juan.




Re: [RFC PATCH 2/2] accel/tcg: replace phys_pc with asid in TB htable key

2021-12-24 Thread Richard Henderson

On 12/24/21 5:02 AM, Vasilev Oleg wrote:

On 12/23/2021 7:31 PM, Richard Henderson wrote:

On 12/22/21 8:50 AM, Oleg Vasilev wrote:

From: Oleg Vasilev 

Using a physical pc requires to translate address every time next block
needs to be found and executed. This also contaminates TLB with code-related
records.

Instead, I suggest we introduce an architecture-specific address space
identifier, and use it to distinguish between different AS's
translation blocks.


Why do you believe that asid is sufficient here?  You're not invalidating any 
more TBs
that I can see.  What happens when the kernel re-uses an asid?


Hi,

Sorry, I had some comments for the patch, but forgot to put it in.

So, I think I interpret the term "asid" in some other sense, namely, an
identifier, which is constant during whole lifespan of an address space.
Same as PID in that sense. Do you think this is a viable approach?


No, I do not.


If we assume translation table wouldn't change during process life,
after the death of the process, all it address space would be anyway
unmapped and corresponding translation blocks would be invalidated.


While this assumption is often true, it certainly isn't universally true.
Consider the cases of dlclose, followed by another dlopen; or any JIT.


r~



Re: [PATCH 3/8] ppc/ppc405: Activate MMU logs

2021-12-24 Thread Richard Henderson

On 12/24/21 4:57 AM, BALATON Zoltan wrote:

On Thu, 23 Dec 2021, Richard Henderson wrote:

On 12/21/21 10:40 PM, Cédric Le Goater wrote:

There is no need to deactivate MMU logging at compile time.

Signed-off-by: Cédric Le Goater
---
  target/ppc/mmu_common.c | 4 ++--
  target/ppc/mmu_helper.c | 2 +-
  2 files changed, 3 insertions(+), 3 deletions(-)


There's also no need to use #defines.
You should just remove these and the ifdefs.


I thought the same unless these are disabled by default for performance reasons. MMU is 
already quite slow, would this make it even slower?


I don't believe the difference will be measurable.


r~



[PATCH] tests/tcg: Use $cpu in configure.sh

2021-12-24 Thread Richard Henderson
Use $cpu instead of $ARCH, which has been removed from
the top-level configure.

Fixes: 823eb013452e
Signed-off-by: Richard Henderson 
---
 configure  | 2 +-
 tests/tcg/configure.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index eb977e5b6f..030728d11e 100755
--- a/configure
+++ b/configure
@@ -3821,7 +3821,7 @@ done
 (for i in $cross_cc_vars; do
   export $i
 done
-export target_list source_path use_containers ARCH
+export target_list source_path use_containers cpu
 $source_path/tests/tcg/configure.sh)
 
 # temporary config to build submodules
diff --git a/tests/tcg/configure.sh b/tests/tcg/configure.sh
index 9ef913df5b..8eb4287c84 100755
--- a/tests/tcg/configure.sh
+++ b/tests/tcg/configure.sh
@@ -326,7 +326,7 @@ for target in $target_list; do
   elif test $got_cross_cc = no && test "$container" != no && \
   test -n "$container_image"; then
   for host in $container_hosts; do
-  if test "$host" = "$ARCH"; then
+  if test "$host" = "$cpu"; then
   echo "DOCKER_IMAGE=$container_image" >> $config_target_mak
   echo "DOCKER_CROSS_CC_GUEST=$container_cross_cc" >> \
$config_target_mak
-- 
2.25.1




Re: QEMU CI failure of cross-i386-* targets (meson picks wrong glib for native target)

2021-12-24 Thread Paolo Bonzini
Is the configure script setting $cross_compile to yes? That will decide
whether meson getting a --cross-file or a --native-file option, and
consequently whether it treats the host and build machines as equal or
different.

Paolo

Il ven 24 dic 2021, 12:09 Alessandro Di Federico  ha scritto:

> Hi Paolo, I'm trying to get the QEMU CI run successfully for the
> idef-parser patchset. However I'm facing an issue I haven't been able
> to work around with meson. Maybe you can help?
>
> The failing tests are cross-i386-*
>
> https://gitlab.com/carl.cudig/qemu/-/jobs/1437392669
> https://gitlab.com/carl.cudig/qemu/-/jobs/1437392673
> https://gitlab.com/carl.cudig/qemu/-/jobs/1437392671
>
> I think the problem is that we're adding a new build-time dependency:
> glib. However glib is also a run-time dependency and, AFAIU, when cross
> compiling for x86 on a x86-64 machine, if you create a native
> executable, meson picks up the x86 version of glib (as opposed to the
> x86-64).
>
> I've been fiddling with this for a while, unsuccessfully.
>
> Relevant portion of the code:
>
>
> https://gitlab.com/carl.cudig/qemu/-/commit/c020958c37fa723f7e933a58b1bb1c3668ff4cff#8145a41027f26ff426d5a2c8b00c56f227943165_198_202
>
> Happy holidays!
>
> --
> Alessandro Di Federico
> rev.ng Labs Srl
>
>


Re: [PATCH 1/6] target/riscv: add cfg properties for zfinx, zdinx and zhinx{min}

2021-12-24 Thread Richard Henderson

On 12/23/21 7:49 PM, liweiwei wrote:

Co-authored-by: ardxwe
Signed-off-by: liweiwei
Signed-off-by: wangjunqiang
---
  roms/SLOF|  2 +-
  target/riscv/cpu.c   | 12 
  target/riscv/cpu.h   |  4 
  target/riscv/translate.c |  8 
  4 files changed, 25 insertions(+), 1 deletion(-)


Reviewed-by: Richard Henderson 

r~



Re: [PATCH 2/6] target/riscv: add support for unique fpr read/write with support for zfinx

2021-12-24 Thread Richard Henderson

On 12/23/21 7:49 PM, liweiwei wrote:

+static TCGv_i64 get_fpr_hs(DisasContext *ctx, int reg_num)
+{
+if (ctx->ext_zfinx) {
+switch (get_ol(ctx)) {
+case MXL_RV32:
+#ifdef TARGET_RISCV32
+if (reg_num == 0) {
+tcg_gen_concat_i32_i64(t, ctx->zero, ctx->zero);


return tcg_constant_i64(0);
You could hoist this above the switch.


+tcg_gen_concat_i32_i64(t, cpu_gpr[reg_num], ctx->zero);
tcg_gen_extu_i32_i64 -- though I would think a signed extraction would be more natural, as 
compared with the signed value you'll get from the RV64 case.



+case MXL_RV64:
+if (reg_num == 0) {
+return ctx->zero;
+} else {
+return cpu_gpr[reg_num];
+}
+#endif
+default:
+g_assert_not_reached();
+}



+} else {
+return cpu_fpr[reg_num];
+}


It is tempting to reverse the sense of the zfinx if, to eliminate this case first, and 
keep the indentation level down.




+static TCGv_i64 get_fpr_d(DisasContext *ctx, int reg_num)
+{
+if (ctx->ext_zfinx) {
+switch (get_ol(ctx)) {
+case MXL_RV32:
+if (reg_num & 1) {
+gen_exception_illegal(ctx);
+return NULL;


Not keen on checking this here.  It should be done earlier.


+} else {
+#ifdef TARGET_RISCV32
+TCGv_i64 t = ftemp_new(ctx);
+if (reg_num == 0) {
+tcg_gen_concat_i32_i64(t, ctx->zero, ctx->zero);
+} else {
+tcg_gen_concat_i32_i64(t, cpu_gpr[reg_num], 
cpu_gpr[reg_num + 1]);
+}
+return t;
+}
+#else
+if (reg_num == 0) {
+return ctx->zero;
+} else {
+TCGv_i64 t = ftemp_new(ctx);
+tcg_gen_deposit_i64(t, cpu_gpr[reg_num], cpu_gpr[reg_num + 
1], 32, 32);
+return t;
+}
+}
+case MXL_RV64:
+if (reg_num == 0) {
+return ctx->zero;
+} else {
+return cpu_gpr[reg_num];
+}
+#endif
+default:
+g_assert_not_reached();
+}
+} else {
+return cpu_fpr[reg_num];
+}


Very similar comments to above.


+static void gen_set_fpr_d(DisasContext *ctx, int reg_num, TCGv_i64 t)
+{
+if (ctx->ext_zfinx) {
+if (reg_num != 0) {
+switch (get_ol(ctx)) {
+case MXL_RV32:
+if (reg_num & 1) {
+gen_exception_illegal(ctx);


This is *far* too late to diagnose illegal insn.  You've already modified global state in 
the FPSCR, or have taken an fpu exception in fpu_helper.c.



+} else {
+#ifdef TARGET_RISCV32
+tcg_gen_extrl_i64_i32(cpu_gpr[reg_num], t);
+tcg_gen_extrh_i64_i32(cpu_gpr[reg_num + 1], t);
+}


tcg_gen_extr_i64_i32 does both at once.
Never split braces around ifdefs like this.


r~



Re: [PATCH 3/6] target/riscv: add support for zfinx

2021-12-24 Thread Richard Henderson

On 12/23/21 7:49 PM, liweiwei wrote:

  static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a)
  {
  REQUIRE_FPU;
-REQUIRE_EXT(ctx, RVF);
+REQUIRE_ZFINX_OR_F(ctx);
  
+TCGv_i64 dest = dest_fpr(ctx, a->rd);

  if (a->rs1 == a->rs2) { /* FMOV */
-gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
+TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+if (ctx->ext_zfinx) {
+gen_nanbox_s(dest, src1);


Sign-extend, not nanbox.  Or, since you handle sign-extension in gen_set_gpr_hs, nothing 
at all -- just tcg_gen_mov_i64.



  } else { /* FSGNJ */
-TCGv_i64 rs1 = tcg_temp_new_i64();
-TCGv_i64 rs2 = tcg_temp_new_i64();
-
-gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
-gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
-
-/* This formulation retains the nanboxing of rs2. */
-tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);
-tcg_temp_free_i64(rs1);
-tcg_temp_free_i64(rs2);
+TCGv_i64 rs1, rs2;
+if (!ctx->ext_zfinx) {
+TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+rs1 = tcg_temp_new_i64();
+rs2 = tcg_temp_new_i64();
+gen_check_nanbox_s(rs1, src1);
+gen_check_nanbox_s(rs2, src2);
+} else {
+rs1 = get_fpr_hs(ctx, a->rs1);
+rs2 = get_fpr_hs(ctx, a->rs2);
+}
+
+/* This formulation retains the nanboxing of rs2 in normal 'F'. */
+tcg_gen_deposit_i64(dest, rs2, rs1, 0, 31);
+if (!ctx->ext_zfinx) {
+tcg_temp_free_i64(rs1);
+tcg_temp_free_i64(rs2);
+} else {
+gen_nanbox_s(dest, dest);
+}


This is tangled enough that I think you should check zfinx at one higher indent level, and 
not do conditional allocate followed by conditional free.



diff --git a/target/riscv/internals.h b/target/riscv/internals.h
index 065e8162a2..9f3f3319f2 100644
--- a/target/riscv/internals.h
+++ b/target/riscv/internals.h
@@ -51,8 +51,12 @@ static inline uint64_t nanbox_s(float32 f)
  return f | MAKE_64BIT_MASK(32, 32);
  }
  
-static inline float32 check_nanbox_s(uint64_t f)

+static inline float32 check_nanbox_s(CPURISCVState *env, uint64_t f)
  {
+/* Disable nanbox check when enable zfinx */
+if (RISCV_CPU(env_cpu(env))->cfg.ext_zfinx)
+return (uint32_t)f;
+


Braces.


r~



Re: [PATCH 4/6] target/riscv: add support for zdinx

2021-12-24 Thread Richard Henderson

On 12/23/21 7:49 PM, liweiwei wrote:

  static bool trans_fsgnj_d(DisasContext *ctx, arg_fsgnj_d *a)
  {
+REQUIRE_FPU;
+REQUIRE_ZDINX_OR_D(ctx);
+
+TCGv_i64 dest = dest_fpr(ctx, a->rd);
+TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
+
  if (a->rs1 == a->rs2) { /* FMOV */


Applies to the F version as well, but we should not assemble src2 when we don't 
need it.


-tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
+tcg_gen_mov_i64(dest, src1);



I think this can just be dest = get_fpr_d(ctx, a->src1), leaving the final "move" to 
gen_set_fpr_d.



r~



Re: [PATCH 5/6] target/riscv: add support for zhinx/zhinxmin

2021-12-24 Thread Richard Henderson

On 12/23/21 7:49 PM, liweiwei wrote:

+static inline float16 check_nanbox_h(CPURISCVState *env, uint64_t f)
  {
+/* Disable nanbox check when enable zfinx */
+if (RISCV_CPU(env_cpu(env))->cfg.ext_zfinx)
+return (uint16_t)f;


Braces.


r~



Re: [PATCH 6/6] target/riscv: expose zfinx, zdinx, zhinx{min} properties

2021-12-24 Thread Richard Henderson

On 12/23/21 7:49 PM, liweiwei wrote:

Co-authored-by: ardxwe 
Signed-off-by: liweiwei 
Signed-off-by: wangjunqiang 
---
  target/riscv/cpu.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index a5fa14f2ac..dbd15693be 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -657,6 +657,10 @@ static Property riscv_cpu_properties[] = {
  DEFINE_PROP_BOOL("zbb", RISCVCPU, cfg.ext_zbb, true),
  DEFINE_PROP_BOOL("zbc", RISCVCPU, cfg.ext_zbc, true),
  DEFINE_PROP_BOOL("zbs", RISCVCPU, cfg.ext_zbs, true),
+DEFINE_PROP_BOOL("Zdinx", RISCVCPU, cfg.ext_zdinx, false),
+DEFINE_PROP_BOOL("Zfinx", RISCVCPU, cfg.ext_zfinx, false),
+DEFINE_PROP_BOOL("Zhinx", RISCVCPU, cfg.ext_zhinx, false),
+DEFINE_PROP_BOOL("Zhinxmin", RISCVCPU, cfg.ext_zhinxmin, false),
  DEFINE_PROP_BOOL("x-h", RISCVCPU, cfg.ext_h, false),
  DEFINE_PROP_BOOL("x-j", RISCVCPU, cfg.ext_j, false),
  /* ePMP 0.9.3 */



Reviewed-by: Richard Henderson 

r~



[PATCH] tests/unit/test-util-sockets: Use g_file_open_tmp() to create temp file

2021-12-24 Thread Philippe Mathieu-Daudé
Similarly to commit e63ed64c6d1 ("tests/qtest/virtio-net-failover:
Use g_file_open_tmp() to create temporary file"), avoid calling
g_test_rand_int() before g_test_init(): use g_file_open_tmp().

Signed-off-by: Philippe Mathieu-Daudé 
---
 tests/unit/test-util-sockets.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/unit/test-util-sockets.c b/tests/unit/test-util-sockets.c
index 72b92465298..896247e3ed3 100644
--- a/tests/unit/test-util-sockets.c
+++ b/tests/unit/test-util-sockets.c
@@ -305,9 +305,11 @@ static void test_socket_unix_abstract(void)
 };
 int i;
 
+i = g_file_open_tmp("unix-XX", &addr.u.q_unix.path, NULL);
+g_assert_true(i >= 0);
+close(i);
+
 addr.type = SOCKET_ADDRESS_TYPE_UNIX;
-addr.u.q_unix.path = g_strdup_printf("unix-%d-%u",
- getpid(), g_random_int());
 addr.u.q_unix.has_abstract = true;
 addr.u.q_unix.abstract = true;
 addr.u.q_unix.has_tight = false;
-- 
2.33.1




Re: [PATCH] tests/unit/test-util-sockets: Use g_file_open_tmp() to create temp file

2021-12-24 Thread Richard Henderson

On 12/24/21 3:45 PM, Philippe Mathieu-Daudé wrote:

Similarly to commit e63ed64c6d1 ("tests/qtest/virtio-net-failover:
Use g_file_open_tmp() to create temporary file"), avoid calling
g_test_rand_int() before g_test_init(): use g_file_open_tmp().

Signed-off-by: Philippe Mathieu-Daudé 
---
  tests/unit/test-util-sockets.c | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/unit/test-util-sockets.c b/tests/unit/test-util-sockets.c
index 72b92465298..896247e3ed3 100644
--- a/tests/unit/test-util-sockets.c
+++ b/tests/unit/test-util-sockets.c
@@ -305,9 +305,11 @@ static void test_socket_unix_abstract(void)
  };
  int i;
  
+i = g_file_open_tmp("unix-XX", &addr.u.q_unix.path, NULL);

+g_assert_true(i >= 0);


Just g_assert.  Otherwise,
Reviewed-by: Richard Henderson 


r~


+close(i);
+
  addr.type = SOCKET_ADDRESS_TYPE_UNIX;
-addr.u.q_unix.path = g_strdup_printf("unix-%d-%u",
- getpid(), g_random_int());
  addr.u.q_unix.has_abstract = true;
  addr.u.q_unix.abstract = true;
  addr.u.q_unix.has_tight = false;






Re: [PATCH 3/6] linux-user: Add code for PR_GET/SET_UNALIGN

2021-12-24 Thread Richard Henderson

On 12/20/21 2:31 PM, Philippe Mathieu-Daudé wrote:

+/* Used for user-only emulation of prctl(PR_SET_UNALIGN). */
+bool prctl_unalign_sigbus;


Could we forward-declare a UserEmuCPUState structure in this file,
use it here:

struct UserEmuCPUState *user_cpu;

and declare it in include/hw/core/useremu-cpu.h (or better name)
restricted to user emulation?


Eh.  I suppose we could, but at the moment it's one byte in an existing padding 
hole.


I'd rather not mix sys/user emu specific fields in CPUState.


There are *lots* of system specific fields in CPUState.


r~



Re: [PATCH 2/6] target/riscv: add support for unique fpr read/write with support for zfinx

2021-12-24 Thread liweiwei

Thanks for your comments.

在 2021/12/25 上午6:00, Richard Henderson 写道:

On 12/23/21 7:49 PM, liweiwei wrote:

+static TCGv_i64 get_fpr_hs(DisasContext *ctx, int reg_num)
+{
+    if (ctx->ext_zfinx) {
+    switch (get_ol(ctx)) {
+    case MXL_RV32:
+#ifdef TARGET_RISCV32
+    if (reg_num == 0) {
+    tcg_gen_concat_i32_i64(t, ctx->zero, ctx->zero);


return tcg_constant_i64(0);
You could hoist this above the switch.


OK.

+    tcg_gen_concat_i32_i64(t, cpu_gpr[reg_num], ctx->zero);
tcg_gen_extu_i32_i64 -- though I would think a signed extraction would 
be more natural, as compared with the signed value you'll get from the 
RV64 case.


In RV64 case, this should be nan-boxing value( upper bits are all 
ones).  However, zfinx will not check nan-boxing of source, the upper 32 
bits have no effect on the final result. So I think both zero-extended 
or sign-extended are OK.

+    case MXL_RV64:
+    if (reg_num == 0) {
+    return ctx->zero;
+    } else {
+    return cpu_gpr[reg_num];
+    }
+#endif
+    default:
+    g_assert_not_reached();
+    }



+    } else {
+    return cpu_fpr[reg_num];
+    }


It is tempting to reverse the sense of the zfinx if, to eliminate this 
case first, and keep the indentation level down.



OK I'll update this.



+static TCGv_i64 get_fpr_d(DisasContext *ctx, int reg_num)
+{
+    if (ctx->ext_zfinx) {
+    switch (get_ol(ctx)) {
+    case MXL_RV32:
+    if (reg_num & 1) {
+    gen_exception_illegal(ctx);
+    return NULL;


Not keen on checking this here.  It should be done earlier.


OK. I'll put this check into the trans_* function

+    } else {
+#ifdef TARGET_RISCV32
+    TCGv_i64 t = ftemp_new(ctx);
+    if (reg_num == 0) {
+    tcg_gen_concat_i32_i64(t, ctx->zero, ctx->zero);
+    } else {
+    tcg_gen_concat_i32_i64(t, cpu_gpr[reg_num], 
cpu_gpr[reg_num + 1]);

+    }
+    return t;
+    }
+#else
+    if (reg_num == 0) {
+    return ctx->zero;
+    } else {
+    TCGv_i64 t = ftemp_new(ctx);
+    tcg_gen_deposit_i64(t, cpu_gpr[reg_num], 
cpu_gpr[reg_num + 1], 32, 32);

+    return t;
+    }
+    }
+    case MXL_RV64:
+    if (reg_num == 0) {
+    return ctx->zero;
+    } else {
+    return cpu_gpr[reg_num];
+    }
+#endif
+    default:
+    g_assert_not_reached();
+    }
+    } else {
+    return cpu_fpr[reg_num];
+    }


Very similar comments to above.


+static void gen_set_fpr_d(DisasContext *ctx, int reg_num, TCGv_i64 t)
+{
+    if (ctx->ext_zfinx) {
+    if (reg_num != 0) {
+    switch (get_ol(ctx)) {
+    case MXL_RV32:
+    if (reg_num & 1) {
+    gen_exception_illegal(ctx);


This is *far* too late to diagnose illegal insn.  You've already 
modified global state in the FPSCR, or have taken an fpu exception in 
fpu_helper.c.

OK. I'll put this check into the trans_* function too.



+    } else {
+#ifdef TARGET_RISCV32
+    tcg_gen_extrl_i64_i32(cpu_gpr[reg_num], t);
+    tcg_gen_extrh_i64_i32(cpu_gpr[reg_num + 1], t);
+    }


tcg_gen_extr_i64_i32 does both at once.
Never split braces around ifdefs like this.

OK. I'll update this.



r~





Re: [PATCH 3/6] target/riscv: add support for zfinx

2021-12-24 Thread liweiwei



在 2021/12/25 上午6:26, Richard Henderson 写道:

On 12/23/21 7:49 PM, liweiwei wrote:

  static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a)
  {
  REQUIRE_FPU;
-    REQUIRE_EXT(ctx, RVF);
+    REQUIRE_ZFINX_OR_F(ctx);
  +    TCGv_i64 dest = dest_fpr(ctx, a->rd);
  if (a->rs1 == a->rs2) { /* FMOV */
-    gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
+    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+    if (ctx->ext_zfinx) {
+    gen_nanbox_s(dest, src1);


Sign-extend, not nanbox.  Or, since you handle sign-extension in 
gen_set_gpr_hs, nothing at all -- just tcg_gen_mov_i64.


Yeah. I did gen_nanbox_s here is to make the dest nan-boxing just as 
other instructions. It's truely unnecessary here. I'll try maximize the 
reuse of code here.





  } else { /* FSGNJ */
-    TCGv_i64 rs1 = tcg_temp_new_i64();
-    TCGv_i64 rs2 = tcg_temp_new_i64();
-
-    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
-    gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
-
-    /* This formulation retains the nanboxing of rs2. */
-    tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);
-    tcg_temp_free_i64(rs1);
-    tcg_temp_free_i64(rs2);
+    TCGv_i64 rs1, rs2;
+    if (!ctx->ext_zfinx) {
+    TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+    TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+    rs1 = tcg_temp_new_i64();
+    rs2 = tcg_temp_new_i64();
+    gen_check_nanbox_s(rs1, src1);
+    gen_check_nanbox_s(rs2, src2);
+    } else {
+    rs1 = get_fpr_hs(ctx, a->rs1);
+    rs2 = get_fpr_hs(ctx, a->rs2);
+    }
+
+    /* This formulation retains the nanboxing of rs2 in normal 
'F'. */

+    tcg_gen_deposit_i64(dest, rs2, rs1, 0, 31);
+    if (!ctx->ext_zfinx) {
+    tcg_temp_free_i64(rs1);
+    tcg_temp_free_i64(rs2);
+    } else {
+    gen_nanbox_s(dest, dest);
+    }


This is tangled enough that I think you should check zfinx at one 
higher indent level, and not do conditional allocate followed by 
conditional free.
OK, I'll divide them. By the way I did in this way to maximize the reuse 
of code.



diff --git a/target/riscv/internals.h b/target/riscv/internals.h
index 065e8162a2..9f3f3319f2 100644
--- a/target/riscv/internals.h
+++ b/target/riscv/internals.h
@@ -51,8 +51,12 @@ static inline uint64_t nanbox_s(float32 f)
  return f | MAKE_64BIT_MASK(32, 32);
  }
  -static inline float32 check_nanbox_s(uint64_t f)
+static inline float32 check_nanbox_s(CPURISCVState *env, uint64_t f)
  {
+    /* Disable nanbox check when enable zfinx */
+    if (RISCV_CPU(env_cpu(env))->cfg.ext_zfinx)
+    return (uint32_t)f;
+


Braces.


r~





Re: [PATCH 4/6] target/riscv: add support for zdinx

2021-12-24 Thread liweiwei



在 2021/12/25 上午6:30, Richard Henderson 写道:

On 12/23/21 7:49 PM, liweiwei wrote:

  static bool trans_fsgnj_d(DisasContext *ctx, arg_fsgnj_d *a)
  {
+    REQUIRE_FPU;
+    REQUIRE_ZDINX_OR_D(ctx);
+
+    TCGv_i64 dest = dest_fpr(ctx, a->rd);
+    TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+    TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
+
  if (a->rs1 == a->rs2) { /* FMOV */


Applies to the F version as well, but we should not assemble src2 when 
we don't need it.

OK, I'll fix this.



- tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
+    tcg_gen_mov_i64(dest, src1);



I think this can just be dest = get_fpr_d(ctx, a->src1), leaving the 
final "move" to gen_set_fpr_d.

OK. I'll update this.



r~





Re: [PATCH 5/6] target/riscv: add support for zhinx/zhinxmin

2021-12-24 Thread liweiwei



在 2021/12/25 上午6:32, Richard Henderson 写道:

On 12/23/21 7:49 PM, liweiwei wrote:

+static inline float16 check_nanbox_h(CPURISCVState *env, uint64_t f)
  {
+    /* Disable nanbox check when enable zfinx */
+    if (RISCV_CPU(env_cpu(env))->cfg.ext_zfinx)
+    return (uint16_t)f;


Braces.


OK. I'll add this later.

r~





Re: [RFC v2 04/12] target/ppc: powerpc_excp: Stop passing excp_model around

2021-12-24 Thread David Gibson
On Mon, Dec 20, 2021 at 03:18:55PM -0300, Fabiano Rosas wrote:
> We can just access it directly in powerpc_excp.
> 
> Signed-off-by: Fabiano Rosas 

Reviewed-by: David Gibson 

> ---
>  target/ppc/excp_helper.c | 43 
>  1 file changed, 21 insertions(+), 22 deletions(-)
> 
> diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
> index 7bdc1e8410..45641f6d1d 100644
> --- a/target/ppc/excp_helper.c
> +++ b/target/ppc/excp_helper.c
> @@ -293,10 +293,11 @@ static inline void powerpc_set_excp_state(PowerPCCPU 
> *cpu,
>   * Note that this function should be greatly optimized when called
>   * with a constant excp, from ppc_hw_interrupt
>   */
> -static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
> +static inline void powerpc_excp(PowerPCCPU *cpu, int excp)
>  {
>  CPUState *cs = CPU(cpu);
>  CPUPPCState *env = &cpu->env;
> +int excp_model = env->excp_model;
>  target_ulong msr, new_msr, vector;
>  int srr0, srr1, lev = -1;
>  
> @@ -878,9 +879,8 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int 
> excp_model, int excp)
>  void ppc_cpu_do_interrupt(CPUState *cs)
>  {
>  PowerPCCPU *cpu = POWERPC_CPU(cs);
> -CPUPPCState *env = &cpu->env;
>  
> -powerpc_excp(cpu, env->excp_model, cs->exception_index);
> +powerpc_excp(cpu, cs->exception_index);
>  }
>  
>  static void ppc_hw_interrupt(CPUPPCState *env)
> @@ -891,20 +891,20 @@ static void ppc_hw_interrupt(CPUPPCState *env)
>  /* External reset */
>  if (env->pending_interrupts & (1 << PPC_INTERRUPT_RESET)) {
>  env->pending_interrupts &= ~(1 << PPC_INTERRUPT_RESET);
> -powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_RESET);
> +powerpc_excp(cpu, POWERPC_EXCP_RESET);
>  return;
>  }
>  /* Machine check exception */
>  if (env->pending_interrupts & (1 << PPC_INTERRUPT_MCK)) {
>  env->pending_interrupts &= ~(1 << PPC_INTERRUPT_MCK);
> -powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_MCHECK);
> +powerpc_excp(cpu, POWERPC_EXCP_MCHECK);
>  return;
>  }
>  #if 0 /* TODO */
>  /* External debug exception */
>  if (env->pending_interrupts & (1 << PPC_INTERRUPT_DEBUG)) {
>  env->pending_interrupts &= ~(1 << PPC_INTERRUPT_DEBUG);
> -powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_DEBUG);
> +powerpc_excp(cpu, POWERPC_EXCP_DEBUG);
>  return;
>  }
>  #endif
> @@ -924,7 +924,7 @@ static void ppc_hw_interrupt(CPUPPCState *env)
>  if ((async_deliver || msr_hv == 0) && hdice) {
>  /* HDEC clears on delivery */
>  env->pending_interrupts &= ~(1 << PPC_INTERRUPT_HDECR);
> -powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_HDECR);
> +powerpc_excp(cpu, POWERPC_EXCP_HDECR);
>  return;
>  }
>  }
> @@ -934,7 +934,7 @@ static void ppc_hw_interrupt(CPUPPCState *env)
>  /* LPCR will be clear when not supported so this will work */
>  bool hvice = !!(env->spr[SPR_LPCR] & LPCR_HVICE);
>  if ((async_deliver || msr_hv == 0) && hvice) {
> -powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_HVIRT);
> +powerpc_excp(cpu, POWERPC_EXCP_HVIRT);
>  return;
>  }
>  }
> @@ -946,14 +946,14 @@ static void ppc_hw_interrupt(CPUPPCState *env)
>  /* HEIC blocks delivery to the hypervisor */
>  if ((async_deliver && !(heic && msr_hv && !msr_pr)) ||
>  (env->has_hv_mode && msr_hv == 0 && !lpes0)) {
> -powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_EXTERNAL);
> +powerpc_excp(cpu, POWERPC_EXCP_EXTERNAL);
>  return;
>  }
>  }
>  if (msr_ce != 0) {
>  /* External critical interrupt */
>  if (env->pending_interrupts & (1 << PPC_INTERRUPT_CEXT)) {
> -powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_CRITICAL);
> +powerpc_excp(cpu, POWERPC_EXCP_CRITICAL);
>  return;
>  }
>  }
> @@ -961,24 +961,24 @@ static void ppc_hw_interrupt(CPUPPCState *env)
>  /* Watchdog timer on embedded PowerPC */
>  if (env->pending_interrupts & (1 << PPC_INTERRUPT_WDT)) {
>  env->pending_interrupts &= ~(1 << PPC_INTERRUPT_WDT);
> -powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_WDT);
> +powerpc_excp(cpu, POWERPC_EXCP_WDT);
>  return;
>  }
>  if (env->pending_interrupts & (1 << PPC_INTERRUPT_CDOORBELL)) {
>  env->pending_interrupts &= ~(1 << PPC_INTERRUPT_CDOORBELL);
> -powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_DOORCI);
> +powerpc_excp(cpu, POWERPC_EXCP_DOORCI);
>  return;
>  }
>  /* Fixed interval timer on embedded PowerPC */
>  if (env->pending_interrupts & (1 << PPC_INTERRUPT_FIT)) {
>  env->pending_interrupts &= ~(1 << PPC_INTERRUPT_FIT);
> -  

Re: [RFC v2 05/12] target/ppc: powerpc_excp: Standardize arguments to interrupt code

2021-12-24 Thread David Gibson
On Mon, Dec 20, 2021 at 03:18:56PM -0300, Fabiano Rosas wrote:
> In preparation to moving the interrupt code into separate functions,
> create a PPCIntrArgs structure to serve as a consistent API.

The patch doesn't seem to match this description - I see no new
structure here.

> 
> No functional change intended.
> 
> Signed-off-by: Fabiano Rosas 
> ---
>  target/ppc/excp_helper.c | 213 +--
>  1 file changed, 113 insertions(+), 100 deletions(-)
> 
> diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
> index 45641f6d1d..f478ff8a87 100644
> --- a/target/ppc/excp_helper.c
> +++ b/target/ppc/excp_helper.c
> @@ -164,7 +164,7 @@ static int powerpc_reset_wakeup(CPUState *cs, CPUPPCState 
> *env, int excp,
>  static inline void ppc_excp_apply_ail(PowerPCCPU *cpu, int excp_model, int 
> excp,
>target_ulong msr,
>target_ulong *new_msr,
> -  target_ulong *vector)
> +  target_ulong *new_nip)
>  {
>  #if defined(TARGET_PPC64)
>  CPUPPCState *env = &cpu->env;
> @@ -241,9 +241,9 @@ static inline void ppc_excp_apply_ail(PowerPCCPU *cpu, 
> int excp_model, int excp,
>  
>  if (excp != POWERPC_EXCP_SYSCALL_VECTORED) {
>  if (ail == 2) {
> -*vector |= 0x00018000ull;
> +*new_nip |= 0x00018000ull;
>  } else if (ail == 3) {
> -*vector |= 0xc0004000ull;
> +*new_nip |= 0xc0004000ull;
>  }
>  } else {
>  /*
> @@ -251,15 +251,15 @@ static inline void ppc_excp_apply_ail(PowerPCCPU *cpu, 
> int excp_model, int excp,
>   * only the MSR. AIL=3 replaces the 0x17000 base with 0xc...3000.
>   */
>  if (ail == 3) {
> -*vector &= ~0x00017000ull; /* Un-apply the base offset */
> -*vector |= 0xc0003000ull; /* Apply scv's AIL=3 offset */
> +*new_nip &= ~0x00017000ull; /* Un-apply the base offset 
> */
> +*new_nip |= 0xc0003000ull; /* Apply scv's AIL=3 offset */
>  }
>  }
>  #endif
>  }
>  
> -static inline void powerpc_set_excp_state(PowerPCCPU *cpu,
> -  target_ulong vector, target_ulong 
> msr)
> +static inline void powerpc_set_excp_state(PowerPCCPU *cpu, target_ulong 
> new_nip,
> +  target_ulong new_msr)
>  {
>  CPUState *cs = CPU(cpu);
>  CPUPPCState *env = &cpu->env;
> @@ -272,9 +272,9 @@ static inline void powerpc_set_excp_state(PowerPCCPU *cpu,
>   * will prevent setting of the HV bit which some exceptions might need
>   * to do.
>   */
> -env->msr = msr & env->msr_mask;
> +env->msr = new_msr & env->msr_mask;
>  hreg_compute_hflags(env);
> -env->nip = vector;
> +env->nip = new_nip;
>  /* Reset exception state */
>  cs->exception_index = POWERPC_EXCP_NONE;
>  env->error_code = 0;
> @@ -289,6 +289,15 @@ static inline void powerpc_set_excp_state(PowerPCCPU 
> *cpu,
>  check_tlb_flush(env, false);
>  }
>  
> +typedef struct PPCIntrArgs {
> +target_ulong nip;
> +target_ulong msr;
> +target_ulong new_nip;
> +target_ulong new_msr;
> +int sprn_srr0;
> +int sprn_srr1;
> +} PPCIntrArgs;
> +
>  /*
>   * Note that this function should be greatly optimized when called
>   * with a constant excp, from ppc_hw_interrupt
> @@ -298,35 +307,35 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int 
> excp)
>  CPUState *cs = CPU(cpu);
>  CPUPPCState *env = &cpu->env;
>  int excp_model = env->excp_model;
> -target_ulong msr, new_msr, vector;
> -int srr0, srr1, lev = -1;
> +PPCIntrArgs regs;
> +int lev = -1;
>  
>  qemu_log_mask(CPU_LOG_INT, "Raise exception at " TARGET_FMT_lx
>" => %08x (%02x)\n", env->nip, excp, env->error_code);
>  
>  /* new srr1 value excluding must-be-zero bits */
>  if (excp_model == POWERPC_EXCP_BOOKE) {
> -msr = env->msr;
> +regs.msr = env->msr;
>  } else {
> -msr = env->msr & ~0x783fULL;
> +regs.msr = env->msr & ~0x783fULL;
>  }
> +regs.nip = env->nip;
>  
>  /*
>   * new interrupt handler msr preserves existing HV and ME unless
>   * explicitly overriden
>   */
> -new_msr = env->msr & (((target_ulong)1 << MSR_ME) | MSR_HVB);
> +regs.new_msr = env->msr & (((target_ulong)1 << MSR_ME) | MSR_HVB);
>  
> -/* target registers */
> -srr0 = SPR_SRR0;
> -srr1 = SPR_SRR1;
> +regs.sprn_srr0 = SPR_SRR0;
> +regs.sprn_srr1 = SPR_SRR1;
>  
>  /*
>   * check for special resume at 0x100 from doze/nap/sleep/winkle on
>   * P7/P8/P9
>   */
>  if (env->resume_as_sreset) {
> -excp = powerpc_reset_wakeup(cs, env, excp, &msr);
> +excp = powerpc_reset_wakeup(cs, env, excp,