date:20240111

Re: [PATCH 10/40] vdpa: assign svq descriptors a separate ASID when possible

2024-01-11 Thread Jason Wang

On Fri, Dec 8, 2023 at 2:50 AM Si-Wei Liu  wrote:
>
> When backend supports the VHOST_BACKEND_F_DESC_ASID feature
> and all the data vqs can support one or more descriptor group
> to host SVQ vrings and descriptors, we assign them a different
> ASID than where its buffers reside in guest memory address
> space. With this dedicated ASID for SVQs, the IOVA for what
> vdpa device may care effectively becomes the GPA, thus there's
> no need to translate IOVA address. For this reason, shadow_data
> can be turned off accordingly. It doesn't mean the SVQ is not
> enabled, but just that the translation is not needed from iova
> tree perspective.
>
> We can reuse CVQ's address space ID to host SVQ descriptors
> because both CVQ and SVQ are emulated in the same QEMU
> process, which will share the same VA address space.
>
> Signed-off-by: Si-Wei Liu 
> ---
>  hw/virtio/vhost-vdpa.c |  5 -
>  net/vhost-vdpa.c   | 57 
> ++
>  2 files changed, 57 insertions(+), 5 deletions(-)
>
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 24844b5..30dff95 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -627,6 +627,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void 
> *opaque, Error **errp)
>  uint64_t qemu_backend_features = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
>   0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
>   0x1ULL << VHOST_BACKEND_F_IOTLB_ASID |
> + 0x1ULL << VHOST_BACKEND_F_DESC_ASID |
>   0x1ULL << VHOST_BACKEND_F_SUSPEND;
>  int ret;
>
> @@ -1249,7 +1250,9 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
>  goto err;
>  }
>
> -vhost_svq_start(svq, dev->vdev, vq, v->shared->iova_tree);
> +vhost_svq_start(svq, dev->vdev, vq,
> +v->desc_group >= 0 && v->address_space_id ?
> +NULL : v->shared->iova_tree);

Nit: it might be a little bit more clear if we use a helper to check
like vhost_svq_needs _iova_tree()

>  ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err);
>  if (unlikely(!ok)) {
>  goto err_map;
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 2555897..aebaa53 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -366,20 +366,50 @@ static int vhost_vdpa_set_address_space_id(struct 
> vhost_vdpa *v,
>  static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
>  {
>  struct vhost_vdpa *v = &s->vhost_vdpa;
> +int r;
>
>  migration_add_notifier(&s->migration_state,
> vdpa_net_migration_state_notifier);
>
> +if (!v->shadow_vqs_enabled) {
> +if (v->desc_group >= 0 &&
> +v->address_space_id != VHOST_VDPA_GUEST_PA_ASID) {
> +vhost_vdpa_set_address_space_id(v, v->desc_group,
> +VHOST_VDPA_GUEST_PA_ASID);
> +s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
> +}
> +return;
> +}
> +
>  /* iova_tree may be initialized by vhost_vdpa_net_load_setup */
> -if (v->shadow_vqs_enabled && !v->shared->iova_tree) {
> +if (!v->shared->iova_tree) {
>  v->shared->iova_tree = 
> vhost_iova_tree_new(v->shared->iova_range.first,
> 
> v->shared->iova_range.last);
>  }
> +
> +if (s->always_svq || v->desc_group < 0) {

I think the always_svq mode deserves a TODO there since it can utilize
the desc_group actually?

> +return;
> +}
> +
> +r = vhost_vdpa_set_address_space_id(v, v->desc_group,
> +VHOST_VDPA_NET_CVQ_ASID);

Any reason why we only set the descriptor group for the first nc?

(This seems implies the device has one descriptor group for all
virtqueue which might not be true)

> +if (unlikely(r < 0)) {
> +/* The other data vqs should also fall back to using the same ASID */
> +s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
> +return;
> +}
> +
> +/* No translation needed on data SVQ when descriptor group is used */
> +s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;
> +s->vhost_vdpa.shared->shadow_data = false;
> +return;
>  }
>
>  static int vhost_vdpa_net_data_start(NetClientState *nc)
>  {
>  VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> +VhostVDPAState *s0 = vhost_vdpa_net_first_nc_vdpa(s);
> +
>  struct vhost_vdpa *v = &s->vhost_vdpa;
>
>  assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> @@ -397,6 +427,18 @@ static int vhost_vdpa_net_data_start(NetClientState *nc)
>  return 0;
>  }
>
> +if (v->desc_group >= 0 && v->desc_group != s0->vhost_vdpa.desc_group) {
> +unsigned asid;
> +asid = v->shadow_vqs_enabled ?
>

Re: [PATCH 11/40] vdpa: factor out vhost_vdpa_last_dev

2024-01-11 Thread Jason Wang

On Fri, Dec 8, 2023 at 2:50 AM Si-Wei Liu  wrote:
>
> Generalize duplicated condition check for the last vq of vdpa
> device to a common function.
>
> Signed-off-by: Si-Wei Liu 

Acked-by: Jason Wang 

Thanks

> ---
>  hw/virtio/vhost-vdpa.c | 9 +++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 30dff95..2b1cc14 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -593,6 +593,11 @@ static bool vhost_vdpa_first_dev(struct vhost_dev *dev)
>  return v->index == 0;
>  }
>
> +static bool vhost_vdpa_last_dev(struct vhost_dev *dev)
> +{
> +return dev->vq_index + dev->nvqs == dev->vq_index_end;
> +}
> +
>  static int vhost_vdpa_get_dev_features(struct vhost_dev *dev,
> uint64_t *features)
>  {
> @@ -1432,7 +1437,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, 
> bool started)
>  goto out_stop;
>  }
>
> -if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
> +if (!vhost_vdpa_last_dev(dev)) {
>  return 0;
>  }
>
> @@ -1467,7 +1472,7 @@ static void vhost_vdpa_reset_status(struct vhost_dev 
> *dev)
>  {
>  struct vhost_vdpa *v = dev->opaque;
>
> -if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
> +if (!vhost_vdpa_last_dev(dev)) {
>  return;
>  }
>
> --
> 1.8.3.1
>

Re: [PATCH 13/40] vdpa: ref counting VhostVDPAShared

2024-01-11 Thread Jason Wang

On Fri, Dec 8, 2023 at 2:50 AM Si-Wei Liu  wrote:
>
> Subsequent patches attempt to release VhostVDPAShared resources,
> for example iova tree to free and memory listener to unregister,
> in vdpa_dev_cleanup(). Instead of checking against the vq index,
> which is not always available in all of the callers, counting
> the usage by reference. Then it'll be easy to free resource
> upon the last deref.
>
> Signed-off-by: Si-Wei Liu 
> ---
>  include/hw/virtio/vhost-vdpa.h |  2 ++
>  net/vhost-vdpa.c   | 14 ++
>  2 files changed, 12 insertions(+), 4 deletions(-)
>
> diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> index 63493ff..7b8d3bf 100644
> --- a/include/hw/virtio/vhost-vdpa.h
> +++ b/include/hw/virtio/vhost-vdpa.h
> @@ -70,6 +70,8 @@ typedef struct vhost_vdpa_shared {
>
>  /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA 
> */
>  bool shadow_data;
> +
> +unsigned refcnt;
>  } VhostVDPAShared;
>
>  typedef struct vhost_vdpa {
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index aebaa53..a126e5c 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -236,11 +236,11 @@ static void vhost_vdpa_cleanup(NetClientState *nc)
>  g_free(s->vhost_net);
>  s->vhost_net = NULL;
>  }
> -if (s->vhost_vdpa.index != 0) {
> -return;
> +if (--s->vhost_vdpa.shared->refcnt == 0) {
> +qemu_close(s->vhost_vdpa.shared->device_fd);
> +g_free(s->vhost_vdpa.shared);
>  }

I'd suggest having a get and put helper, then we can check and do
cleanup in the put when refcnt is zero.

Thanks

> -qemu_close(s->vhost_vdpa.shared->device_fd);
> -g_free(s->vhost_vdpa.shared);
> +s->vhost_vdpa.shared = NULL;
>  }
>
>  /** Dummy SetSteeringEBPF to support RSS for vhost-vdpa backend  */
> @@ -1896,6 +1896,7 @@ static NetClientState 
> *net_vhost_vdpa_init(NetClientState *peer,
>  s->vhost_vdpa.shared->device_fd = vdpa_device_fd;
>  s->vhost_vdpa.shared->iova_range = iova_range;
>  s->vhost_vdpa.shared->shadow_data = svq;
> +s->vhost_vdpa.shared->refcnt++;
>  } else if (!is_datapath) {
>  s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(),
>   PROT_READ | PROT_WRITE,
> @@ -1910,6 +1911,7 @@ static NetClientState 
> *net_vhost_vdpa_init(NetClientState *peer,
>  }
>  if (queue_pair_index != 0) {
>  s->vhost_vdpa.shared = shared;
> +s->vhost_vdpa.shared->refcnt++;
>  }
>
>  ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
> @@ -1928,6 +1930,10 @@ static NetClientState 
> *net_vhost_vdpa_init(NetClientState *peer,
>  return nc;
>
>  err:
> +if (--s->vhost_vdpa.shared->refcnt == 0) {
> +g_free(s->vhost_vdpa.shared);
> +}
> +s->vhost_vdpa.shared = NULL;
>  qemu_del_net_client(nc);
>  return NULL;
>  }
> --
> 1.8.3.1
>

Re: [PATCH 14/40] vdpa: convert iova_tree to ref count based

2024-01-11 Thread Jason Wang

On Fri, Dec 8, 2023 at 2:50 AM Si-Wei Liu  wrote:
>
> So that it can be freed from vhost_vdpa_cleanup on
> the last deref. The next few patches will try to
> make iova tree life cycle not depend on memory
> listener, and there's possiblity to keep iova tree
> around when memory mapping is not changed across
> device reset.
>
> Signed-off-by: Si-Wei Liu 
> ---
>  net/vhost-vdpa.c | 9 ++---
>  1 file changed, 2 insertions(+), 7 deletions(-)
>
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index a126e5c..7b8f047 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -238,6 +238,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc)
>  }
>  if (--s->vhost_vdpa.shared->refcnt == 0) {
>  qemu_close(s->vhost_vdpa.shared->device_fd);
> +g_clear_pointer(&s->vhost_vdpa.shared->iova_tree,
> +vhost_iova_tree_delete);

Could be part of the put() as well.

Thanks

>  g_free(s->vhost_vdpa.shared);
>  }
>  s->vhost_vdpa.shared = NULL;
> @@ -461,19 +463,12 @@ static int vhost_vdpa_net_data_load(NetClientState *nc)
>  static void vhost_vdpa_net_client_stop(NetClientState *nc)
>  {
>  VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> -struct vhost_dev *dev;
>
>  assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>
>  if (s->vhost_vdpa.index == 0) {
>  migration_remove_notifier(&s->migration_state);
>  }
> -
> -dev = s->vhost_vdpa.dev;
> -if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
> -g_clear_pointer(&s->vhost_vdpa.shared->iova_tree,
> -vhost_iova_tree_delete);
> -}
>  }
>
>  static int vhost_vdpa_net_load_setup(NetClientState *nc, NICState *nic)
> --
> 1.8.3.1
>

Re: [PATCH 15/40] vdpa: add svq_switching and flush_map to header

2024-01-11 Thread Jason Wang

On Fri, Dec 8, 2023 at 2:50 AM Si-Wei Liu  wrote:
>
> Will be used in next patches.
>
> Signed-off-by: Si-Wei Liu 
> ---
>  include/hw/virtio/vhost-vdpa.h | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> index 7b8d3bf..0fe0f60 100644
> --- a/include/hw/virtio/vhost-vdpa.h
> +++ b/include/hw/virtio/vhost-vdpa.h
> @@ -72,6 +72,12 @@ typedef struct vhost_vdpa_shared {
>  bool shadow_data;
>
>  unsigned refcnt;
> +
> +/* SVQ switching is in progress? 1: turn on SVQ, -1: turn off SVQ */
> +int svq_switching;

Nit: just curious about any reason why 0, 1 or true false is not used?

Thanks

> +
> +/* Flush mappings on reset due to shared address space */
> +bool flush_map;
>  } VhostVDPAShared;
>
>  typedef struct vhost_vdpa {
> --
> 1.8.3.1
>

Re: [PATCH 16/40] vdpa: indicate SVQ switching via flag

2024-01-11 Thread Jason Wang

On Fri, Dec 8, 2023 at 2:50 AM Si-Wei Liu  wrote:
>
> svq_switching indicates the case where SVQ mode change
> is on going. Positive (1) means switching from the
> normal passthrough mode to SVQ mode, and negative (-1)
> meaning switch SVQ back to the passthrough; zero (0)
> indicates that there's no SVQ mode switch taking place.

Ok, so the previous patch forgot to describe the zero(0).

And it looks to me we'd better use enum instead of the magic number here.

Thanks

>
> Signed-off-by: Si-Wei Liu 
> ---
>  net/vhost-vdpa.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 7b8f047..04718b2 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -320,6 +320,7 @@ static void 
> vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
>  data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
>  cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
>n->max_ncs - n->max_queue_pairs : 0;
> +v->shared->svq_switching = enable ? 1 : -1;
>  /*
>   * TODO: vhost_net_stop does suspend, get_base and reset. We can be 
> smarter
>   * in the future and resume the device if read-only operations between
> @@ -332,6 +333,7 @@ static void 
> vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
>  if (unlikely(r < 0)) {
>  error_report("unable to start vhost net: %s(%d)", g_strerror(-r), 
> -r);
>  }
> +v->shared->svq_switching = 0;
>  }
>
>  static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data)
> --
> 1.8.3.1
>

Re: [NOTFORMERGE PATCH 2/2] gitlab: Add Loongarch64 KVM-only build

2024-01-11 Thread Thomas Huth


On 11/01/2024 08.37, gaosong wrote:

Hi,

在 2024/1/11 下午3:10, Thomas Huth 写道:

On 02/01/2024 18.22, Philippe Mathieu-Daudé wrote:

Signed-off-by: Philippe Mathieu-Daudé 
---
Used to test 
https://lore.kernel.org/qemu-devel/20231228084051.3235354-1-zhaotian...@loongson.cn/


So why is it NOTFORMERGE ? Don't we want to test KVM-only builds for 
loongarch in the long run?


 Thomas


I think we can drop this title.

I tested this job by the latest loongarch kvm patches.  buf I find a 
test-hmp check error.
Can you recreate the error manually? i.e. compile with configure 
--disable-tcg and then run:


 V=2 QTEST_QEMU_BINARY=./qemu-system-loongarch64 tests/qtest/test-hmp

That should likely provide you with a hint where it is crashing

 Thomas

Re: [PATCH] hw/core: Handle cpu_model_from_type() returning NULL value

2024-01-11 Thread Philippe Mathieu-Daudé


Hi Gavin,

On 11/1/24 08:30, Gavin Shan wrote:

Hi Phil,

On 1/11/24 16:47, Philippe Mathieu-Daudé wrote:

Per cpu_model_from_type() docstring (added in commit 445946f4dd):

   * Returns: CPU model name or NULL if the CPU class doesn't exist

We must check the return value in order to avoid surprises, i.e.:

  $ qemu-system-arm -machine virt -cpu cortex-a9
   qemu-system-arm: Invalid CPU model: cortex-a9
   The valid models are: cortex-a7, cortex-a15, (null), (null), 
(null), (null), (null), (null), (null), (null), (null), (null), 
(null), max


Add assertions when the call can not fail (because the CPU type
must be registered).

Fixes: 5422d2a8fa ("machine: Print CPU model name instead of CPU type")
Reported-by: Peter Maydell 
Signed-off-by: Philippe Mathieu-Daudé 
---
  cpu-target.c  | 1 +
  hw/core/machine.c | 5 +
  target/ppc/cpu_init.c | 1 +
  3 files changed, 7 insertions(+)

diff --git a/cpu-target.c b/cpu-target.c
index 5eecd7ea2d..b0f6deb13b 100644
--- a/cpu-target.c
+++ b/cpu-target.c
@@ -291,6 +291,7 @@ static void cpu_list_entry(gpointer data, gpointer 
user_data)

  const char *typename = object_class_get_name(OBJECT_CLASS(data));
  g_autofree char *model = cpu_model_from_type(typename);
+    assert(model);
  if (cc->deprecation_note) {
  qemu_printf("  %s (deprecated)\n", model);
  } else {
diff --git a/hw/core/machine.c b/hw/core/machine.c
index fc239101f9..730ec10328 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -1422,16 +1422,21 @@ static bool is_cpu_type_supported(const 
MachineState *machine, Error **errp)

  /* The user specified CPU type isn't valid */
  if (!mc->valid_cpu_types[i]) {
  g_autofree char *requested = 
cpu_model_from_type(machine->cpu_type);

+    assert(requested);
  error_setg(errp, "Invalid CPU model: %s", requested);
  if (!mc->valid_cpu_types[1]) {
  g_autofree char *model = cpu_model_from_type(
   
mc->valid_cpu_types[0]);

+    assert(model);
  error_append_hint(errp, "The only valid type is: 
%s\n", model);

  } else {
  error_append_hint(errp, "The valid models are: ");
  for (i = 0; mc->valid_cpu_types[i]; i++) {
  g_autofree char *model = cpu_model_from_type(
   
mc->valid_cpu_types[i]);

+    if (!model) {
+    continue;
+    }


Shall we assert(model) for this case, to be consistent with other cases? :)


No, this is the "(null)" cases displayed in the example.

IOW, mc->valid_cpu_types[] contains a CPU type which isn't registered,
so we just skip it.




  error_append_hint(errp, "%s%s",
    model,
    mc->valid_cpu_types[i + 1] ? 
", " : "");


Otherwise, the separator here need to be adjusted because it's uncertain 
that

mc->valid_cpu_types[i+1] ... mc->valid_cpu_types[END] are valid.


Here we know mc->valid_cpu_types[i] is *not* NULL, but
mc->valid_cpu_types[i + 1] might be (signaling the end
of the array).

This seems correct to me, but I might be missing something.




diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 344196a8ce..58f0c1e30e 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -7037,6 +7037,7 @@ static void ppc_cpu_list_entry(gpointer data, 
gpointer user_data)

  }
  name = cpu_model_from_type(typename);
+    assert(name);
  qemu_printf("PowerPC %-16s PVR %08x\n", name, pcc->pvr);
  for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
  PowerPCCPUAlias *alias = &ppc_cpu_aliases[i];


Thanks,
Gavin

Re: [PATCH 00/40] vdpa-net: improve migration downtime through descriptor ASID and persistent IOTLB

2024-01-11 Thread Jason Wang

On Fri, Dec 8, 2023 at 2:50 AM Si-Wei Liu  wrote:
>
> This patch series contain several enhancements to SVQ live migration downtime
> for vDPA-net hardware device, specifically on mlx5_vdpa. Currently it is based
> off of Eugenio's RFC v2 .load_setup series [1] to utilize the shared facility
> and reduce frictions in merging or duplicating code if at all possible.
>
> It's stacked up in particular order as below, as the optimization for one on
> the top has to depend on others on the bottom. Here's a breakdown for what
> each part does respectively:
>
> Patch #  |  Feature / optimization
> -V---
> 35 - 40  | trace events
> 34   | migrate_cancel bug fix
> 21 - 33  | (Un)map batching at stop-n-copy to further optimize LM down time
> 11 - 20  | persistent IOTLB [3] to improve LM down time
> 02 - 10  | SVQ descriptor ASID [2] to optimize SVQ switching
> 01   | dependent linux headers
>  V
>
> Let's first define 2 sources of downtime that this work is concerned with:
>
> * SVQ switching downtime (Downtime #1): downtime at the start of migration.
>   Time spent on teardown and setup for SVQ mode switching, and this downtime
>   is regarded as the maxium time for an individual vdpa-net device.
>   No memory transfer is involved during SVQ switching, hence no .
>
> * LM downtime (Downtime #2): aggregated downtime for all vdpa-net devices on
>   resource teardown and setup in the last stop-n-copy phase on source host.
>
> With each part of the optimizations applied bottom up, the effective outcome
> in terms of down time (in seconds) performance can be observed in this table:
>
>
> |Downtime #1|Downtime #2
> +---+---
> Baseline QEMU   | 20s ~ 30s |20s
> |   |
> Iterative map   |   |
> at destination[1]   |5s |20s
> |   |
> SVQ descriptor  |   |
> ASID [2]|2s | 5s
> |   |
> |   |
> persistent IOTLB|2s | 2s
>   [3]   |   |
> |   |
> (Un)map batching|   |
> at stop-n-copy  |  1.7s |   1.5s
> before switchover   |   |
>
> (VM config: 128GB mem, 2 mlx5_vdpa devices, each w/ 4 data vqs)

This looks promising!

But the series looks a little bit huge, can we split them into 2 or 3 series?

It helps to speed up the reviewing and merging.

Thanks

>
> Please find the details regarding each enhancement on the commit log.
>
> Thanks,
> -Siwei
>
>
> [1] [RFC PATCH v2 00/10] Map memory at destination .load_setup in vDPA-net 
> migration
> https://lists.nongnu.org/archive/html/qemu-devel/2023-11/msg05711.html
> [2] VHOST_BACKEND_F_DESC_ASID
> https://lore.kernel.org/virtualization/20231018171456.1624030-2-dtatu...@nvidia.com/
> [3] VHOST_BACKEND_F_IOTLB_PERSIST
> https://lore.kernel.org/virtualization/1698304480-18463-1-git-send-email-si-wei@oracle.com/
>
> ---
>
> Si-Wei Liu (40):
>   linux-headers: add vhost_types.h and vhost.h
>   vdpa: add vhost_vdpa_get_vring_desc_group
>   vdpa: probe descriptor group index for data vqs
>   vdpa: piggyback desc_group index when probing isolated cvq
>   vdpa: populate desc_group from net_vhost_vdpa_init
>   vhost: make svq work with gpa without iova translation
>   vdpa: move around vhost_vdpa_set_address_space_id
>   vdpa: add back vhost_vdpa_net_first_nc_vdpa
>   vdpa: no repeat setting shadow_data
>   vdpa: assign svq descriptors a separate ASID when possible
>   vdpa: factor out vhost_vdpa_last_dev
>   vdpa: check map_thread_enabled before join maps thread
>   vdpa: ref counting VhostVDPAShared
>   vdpa: convert iova_tree to ref count based
>   vdpa: add svq_switching and flush_map to header
>   vdpa: indicate SVQ switching via flag
>   vdpa: judge if map can be kept across reset
>   vdpa: unregister listener on last dev cleanup
>   vdpa: should avoid map flushing with persistent iotlb
>   vdpa: avoid mapping flush across reset
>   vdpa: vhost_vdpa_dma_batch_end_once rename
>   vdpa: factor out vhost_vdpa_map_batch_begin
>   vdpa: vhost_vdpa_dma_batch_begin_once rename
>   vdpa: factor out vhost_vdpa_dma_batch_end
>   vdpa: add asid to dma_batch_once API
>   vdpa: return int for dma_batch_once API
>   vdpa: add asid to all dma_batch call sites
>   vdpa: support iotlb_batch_asid
>   vdpa: expose API vhost_vdpa_dma_batch_once
>   vdpa: batch map/unmap op per svq pair basis
>   vdpa: batch map and unmap around cvq svq start/stop
>   vdpa: factor out vhost_vdpa_net_get_nc_vdpa
>   vdpa: batch multiple dma_unmap to a single call for vm stop
>   vdpa: fix network breakage after cancelling migration
>   vdpa:

Re: [PATCH v4 8/9b] target/loongarch: Implement set vcpu intr for kvm

2024-01-11 Thread Philippe Mathieu-Daudé


On 11/1/24 03:29, gaosong wrote:

Hi,

在 2024/1/10 下午5:41, Philippe Mathieu-Daudé 写道:

From: Tianrui Zhao 

Implement loongarch kvm set vcpu interrupt interface,
when a irq is set in vcpu, we use the KVM_INTERRUPT
ioctl to set intr into kvm.

Signed-off-by: Tianrui Zhao 
Signed-off-by: xianglai li 
Reviewed-by: Song Gao 
Message-ID: <20240105075804.1228596-9-zhaotian...@loongson.cn>
[PMD: Split from bigger patch, part 2]
Signed-off-by: Philippe Mathieu-Daudé 
---
  target/loongarch/kvm/kvm_loongarch.h | 16 
  target/loongarch/cpu.c   |  9 -
  target/loongarch/kvm/kvm.c   | 15 +++
  target/loongarch/trace-events    |  1 +
  4 files changed, 40 insertions(+), 1 deletion(-)
  create mode 100644 target/loongarch/kvm/kvm_loongarch.h




diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index d9f8661cfd..d3a8a2f521 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -12,6 +12,7 @@
  #include "qemu/module.h"
  #include "sysemu/qtest.h"
  #include "sysemu/tcg.h"
+#include "sysemu/kvm.h"
  #include "exec/exec-all.h"
  #include "cpu.h"
  #include "internals.h"
@@ -21,6 +22,10 @@
  #include "sysemu/reset.h"
  #endif
  #include "vec.h"
+#ifdef CONFIG_KVM
+#include "kvm/kvm_loongarch.h"


This broken  tcg 'loongarch64-softmmu' build on X86 host, :-[

../target/loongarch/cpu.c: In function ‘loongarch_cpu_set_irq’:
../target/loongarch/cpu.c:122:9: error: implicit declaration of function 
‘kvm_loongarch_set_interrupt’ [-Werror=implicit-function-declaration]

   122 | kvm_loongarch_set_interrupt(cpu, irq, level);
   | ^~~
../target/loongarch/cpu.c:122:9: error: nested extern declaration of 
‘kvm_loongarch_set_interrupt’ [-Werror=nested-externs]

../target/loongarch/cpu.c: In function ‘loongarch_cpu_reset_hold’:
../target/loongarch/cpu.c:557:9: error: implicit declaration of function 
‘kvm_arch_reset_vcpu’; did you mean ‘kvm_arch_init_vcpu’? 
[-Werror=implicit-function-declaration]

   557 | kvm_arch_reset_vcpu(env);
   | ^~~
   | kvm_arch_init_vcpu
../target/loongarch/cpu.c:557:9: error: nested extern declaration of 
‘kvm_arch_reset_vcpu’ [-Werror=nested-externs]

cc1: all warnings being treated as errors

I will move it out of  '#ifdef CONFIG_KVM'


Oh, right.

(Sorry I only tested on loongarch64 host).

Thanks!

Phil.

Re: [PATCH 17/40] vdpa: judge if map can be kept across reset

2024-01-11 Thread Jason Wang

On Fri, Dec 8, 2023 at 2:50 AM Si-Wei Liu  wrote:
>
> The descriptor group for SVQ ASID allows the guest memory mapping
> to retain across SVQ switching, same as how isolated CVQ can do
> with a different ASID than the guest GPA space. Introduce an
> evaluation function to judge whether to flush or keep iotlb maps
> based on virtqueue's descriptor group and cvq isolation capability.

I may miss something, but is there any reason we can't judge during
initialization?

We know the device capability so it should not depend on any runtime
configuration.

Thanks

>
> Have to hook the evaluation function to NetClient's .poll op as
> .vhost_reset_status runs ahead of .stop, and .vhost_dev_start
> don't have access to the vhost-vdpa net's information.
>
> Signed-off-by: Si-Wei Liu 
> ---
>  net/vhost-vdpa.c | 40 
>  1 file changed, 40 insertions(+)
>
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 04718b2..e9b96ed 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -504,12 +504,36 @@ static int vhost_vdpa_net_load_cleanup(NetClientState 
> *nc, NICState *nic)
>   n->parent_obj.status & 
> VIRTIO_CONFIG_S_DRIVER_OK);
>  }
>
> +static void vhost_vdpa_net_data_eval_flush(NetClientState *nc, bool stop)
> +{
> +VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> +struct vhost_vdpa *v = &s->vhost_vdpa;
> +
> +if (!stop) {
> +return;
> +}
> +
> +if (s->vhost_vdpa.index == 0) {
> +if (s->always_svq) {
> +v->shared->flush_map = true;
> +} else if (!v->shared->svq_switching || v->desc_group >= 0) {
> +v->shared->flush_map = false;
> +} else {
> +v->shared->flush_map = true;
> +}
> +} else if (!s->always_svq && v->shared->svq_switching &&
> +   v->desc_group < 0) {
> +v->shared->flush_map = true;
> +}
> +}
> +
>  static NetClientInfo net_vhost_vdpa_info = {
>  .type = NET_CLIENT_DRIVER_VHOST_VDPA,
>  .size = sizeof(VhostVDPAState),
>  .receive = vhost_vdpa_receive,
>  .start = vhost_vdpa_net_data_start,
>  .load = vhost_vdpa_net_data_load,
> +.poll = vhost_vdpa_net_data_eval_flush,
>  .stop = vhost_vdpa_net_client_stop,
>  .cleanup = vhost_vdpa_cleanup,
>  .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
> @@ -1368,12 +1392,28 @@ static int vhost_vdpa_net_cvq_load(NetClientState *nc)
>  return 0;
>  }
>
> +static void vhost_vdpa_net_cvq_eval_flush(NetClientState *nc, bool stop)
> +{
> +VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> +struct vhost_vdpa *v = &s->vhost_vdpa;
> +
> +if (!stop) {
> +return;
> +}
> +
> +if (!v->shared->flush_map && !v->shared->svq_switching &&
> +!s->cvq_isolated && v->desc_group < 0) {
> +v->shared->flush_map = true;
> +}
> +}
> +
>  static NetClientInfo net_vhost_vdpa_cvq_info = {
>  .type = NET_CLIENT_DRIVER_VHOST_VDPA,
>  .size = sizeof(VhostVDPAState),
>  .receive = vhost_vdpa_receive,
>  .start = vhost_vdpa_net_cvq_start,
>  .load = vhost_vdpa_net_cvq_load,
> +.poll = vhost_vdpa_net_cvq_eval_flush,
>  .stop = vhost_vdpa_net_cvq_stop,
>  .cleanup = vhost_vdpa_cleanup,
>  .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
> --
> 1.8.3.1
>

Re: [PATCH 18/40] vdpa: unregister listener on last dev cleanup

2024-01-11 Thread Jason Wang

On Fri, Dec 8, 2023 at 2:50 AM Si-Wei Liu  wrote:
>
> So that the free of iova tree struct can be safely deferred to
> until the last vq referencing it goes away.
>
> Signed-off-by: Si-Wei Liu 
> ---
>  hw/virtio/vhost-vdpa.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 4f026db..ea2dfc8 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -815,7 +815,10 @@ static int vhost_vdpa_cleanup(struct vhost_dev *dev)
>  }
>
>  vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
> -memory_listener_unregister(&v->shared->listener);
> +if (vhost_vdpa_last_dev(dev) && v->shared->listener_registered) {
> +memory_listener_unregister(&v->shared->listener);
> +v->shared->listener_registered = false;
> +}

Can we move this to the put() (refcnt decreasing helper) of shared?

Thanks

>  vhost_vdpa_svq_cleanup(dev);
>
>  dev->opaque = NULL;
> --
> 1.8.3.1
>

Re: [PATCH 1/2] gitlab: Introduce Loongarch64 runner

2024-01-11 Thread Thomas Huth


On 11/01/2024 08.25, gaosong wrote:

Hi,

在 2024/1/11 下午3:08, Thomas Huth 写道:

On 02/01/2024 18.22, Philippe Mathieu-Daudé wrote:

Full build config to run CI tests on a Loongarch64 host.

Forks might enable this by setting LOONGARCH64_RUNNER_AVAILABLE
in their CI namespace settings, see:
https://www.qemu.org/docs/master/devel/ci.html#maintainer-controlled-job-variables

Signed-off-by: Philippe Mathieu-Daudé 
---
  docs/devel/ci-jobs.rst.inc    |  6 ++
  .gitlab-ci.d/custom-runners.yml   |  1 +
  .../openeuler-22.03-loongarch64.yml   | 21 +++
  3 files changed, 28 insertions(+)
  create mode 100644 
.gitlab-ci.d/custom-runners/openeuler-22.03-loongarch64.yml



...
diff --git a/.gitlab-ci.d/custom-runners.yml 
b/.gitlab-ci.d/custom-runners.yml

index 8e5b9500f4..152ace4492 100644
--- a/.gitlab-ci.d/custom-runners.yml
+++ b/.gitlab-ci.d/custom-runners.yml
@@ -32,3 +32,4 @@ include:
    - local: '/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch64.yml'
    - local: '/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch32.yml'
    - local: '/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml'
+  - local: '/.gitlab-ci.d/custom-runners/openeuler-22.03-loongarch64.yml'
diff --git a/.gitlab-ci.d/custom-runners/openeuler-22.03-loongarch64.yml 
b/.gitlab-ci.d/custom-runners/openeuler-22.03-loongarch64.yml

new file mode 100644
index 00..86d18f820e
--- /dev/null
+++ b/.gitlab-ci.d/custom-runners/openeuler-22.03-loongarch64.yml
@@ -0,0 +1,21 @@
+openeuler-22.03-loongarch64-all:
+ extends: .custom_runner_template :-)
+ needs: []
+ stage: build
+ tags:
+ - oe2203
+ - loongarch64
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ 
/^staging/'

+   when: manual
+   allow_failure: true
+ - if: "$LOONGARCH64_RUNNER_AVAILABLE"
+   when: manual
+   allow_failure: true
+ script:
+ - mkdir build
+ - cd build
+ - ../configure
+   || { cat config.log meson-logs/meson-log.txt; exit 1; }
+ - make --output-sync -j`nproc --ignore=40`
+ - make --output-sync -j`nproc --ignore=40` check


Does this system really have more than 40 CPU threads? Or is this a 
copy-n-past from one of the other scripts? In the latter case, I'd suggest 
to adjust the --ignore=40 to a more reasonable value.


 Thomas

No,  only 32.   I think it should be --ignore=32 or 16.


--ignore=32 then also does not make much sense, that would still be the same 
as simply omitting the -j parameter. I guess --ignore=16 should be fine.



I create a same runner on this machine,  and I  find  some check error.
but I am not sure how to fix it. :-)

See:

https://gitlab.com/gaosong/qemu/-/jobs/5906269934


Seems to be related to RAM backing... for example, the erst-test is failing, 
which is doing something like:


setup_vm_cmd(&state,
"-object memory-backend-file,"
"mem-path=acpi-erst.XX,"
"size=64K,"
"share=on,"
"id=nvram "
"-device acpi-erst,"
"memdev=nvram");

So it seems like -object memory-backend-file" is not correctly working in 
your gitlab runner? Is there some setup missing?


 Thomas

Re: [PATCH 19/40] vdpa: should avoid map flushing with persistent iotlb

2024-01-11 Thread Jason Wang

On Fri, Dec 8, 2023 at 2:50 AM Si-Wei Liu  wrote:
>
> Today memory listener is unregistered in vhost_vdpa_reset_status
> unconditionally, due to which all the maps will be flushed away
> from the iotlb. However, map flush is not always needed, and
> doing it from performance hot path may have innegligible latency
> impact that affects VM reboot time or brown out period during
> live migration.
>
> Leverage the IOTLB_PERSIST backend featuae, which ensures durable
> iotlb maps and not disappearing even across reset. When it is
> supported, we may conditionally keep the maps for cases where the
> guest memory mapping doesn't change. Prepare a function so that
> the next patch will be able to use it to keep the maps.
>
> Signed-off-by: Si-Wei Liu 
> ---
>  hw/virtio/trace-events |  1 +
>  hw/virtio/vhost-vdpa.c | 20 
>  2 files changed, 21 insertions(+)
>
> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> index 77905d1..9725d44 100644
> --- a/hw/virtio/trace-events
> +++ b/hw/virtio/trace-events
> @@ -66,6 +66,7 @@ vhost_vdpa_set_owner(void *dev) "dev: %p"
>  vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, 
> uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p 
> desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 
> 0x%"PRIx64
>  vhost_vdpa_get_iova_range(void *dev, uint64_t first, uint64_t last) "dev: %p 
> first: 0x%"PRIx64" last: 0x%"PRIx64
>  vhost_vdpa_set_config_call(void *dev, int fd)"dev: %p fd: %d"
> +vhost_vdpa_maybe_flush_map(void *dev, bool reg, bool flush, bool persist) 
> "dev: %p registered: %d flush_map: %d iotlb_persistent: %d"
>
>  # virtio.c
>  virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned 
> out_num) "elem %p size %zd in_num %u out_num %u"
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index ea2dfc8..31e0a55 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -1471,6 +1471,26 @@ out_stop:
>  return ok ? 0 : -1;
>  }
>
> +static void vhost_vdpa_maybe_flush_map(struct vhost_dev *dev)

Nit: Not a native speaker, but it looks like
vhost_vdpa_may_flush_map() is better.

> +{
> +struct vhost_vdpa *v = dev->opaque;
> +
> +trace_vhost_vdpa_maybe_flush_map(dev, v->shared->listener_registered,
> + v->shared->flush_map,
> + !!(dev->backend_cap &
> + 
> BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST)));
> +
> +if (!v->shared->listener_registered) {
> +return;
> +}
> +
> +if (!(dev->backend_cap & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST)) ||
> +v->shared->flush_map) {
> +memory_listener_unregister(&v->shared->listener);
> +v->shared->listener_registered = false;
> +}

Others look good.

Thanks

> +}
> +
>  static void vhost_vdpa_reset_status(struct vhost_dev *dev)
>  {
>  struct vhost_vdpa *v = dev->opaque;
> --
> 1.8.3.1
>

Re: [PATCH v7 02/16] i386/cpu: Use APIC ID offset to encode cache topo in CPUID[4]

2024-01-11 Thread Zhao Liu

Hi Xiaoyao,

On Wed, Jan 10, 2024 at 05:31:28PM +0800, Xiaoyao Li wrote:
> Date: Wed, 10 Jan 2024 17:31:28 +0800
> From: Xiaoyao Li 
> Subject: Re: [PATCH v7 02/16] i386/cpu: Use APIC ID offset to encode cache
>  topo in CPUID[4]
> 
> On 1/8/2024 4:27 PM, Zhao Liu wrote:
> > From: Zhao Liu 
> > 
> > Refer to the fixes of cache_info_passthrough ([1], [2]) and SDM, the
> > CPUID.04H:EAX[bits 25:14] and CPUID.04H:EAX[bits 31:26] should use the
> > nearest power-of-2 integer.
> > 
> > The nearest power-of-2 integer can be calculated by pow2ceil() or by
> > using APIC ID offset (like L3 topology using 1 << die_offset [3]).
> > 
> > But in fact, CPUID.04H:EAX[bits 25:14] and CPUID.04H:EAX[bits 31:26]
> > are associated with APIC ID. For example, in linux kernel, the field
> > "num_threads_sharing" (Bits 25 - 14) is parsed with APIC ID.
> 
> And for
> > another example, on Alder Lake P, the CPUID.04H:EAX[bits 31:26] is not
> > matched with actual core numbers and it's calculated by:
> > "(1 << (pkg_offset - core_offset)) - 1".
> 
> could you elaborate it more? what is the value of actual core numbers on
> Alder lake P? and what is the pkg_offset and core_offset?

For example, the following's the CPUID dump of an ADL-S machine:

CPUID.04H:

0x0004 0x00: eax=0xfc004121 ebx=0x01c0003f ecx=0x003f edx=0x
0x0004 0x01: eax=0xfc004122 ebx=0x01c0003f ecx=0x007f edx=0x
0x0004 0x02: eax=0xfc01c143 ebx=0x03c0003f ecx=0x07ff edx=0x
0x0004 0x03: eax=0xfc1fc163 ebx=0x0240003f ecx=0x9fff edx=0x0004
0x0004 0x04: eax=0x ebx=0x ecx=0x edx=0x


CPUID.1FH:

0x001f 0x00: eax=0x0001 ebx=0x0001 ecx=0x0100 edx=0x004c
0x001f 0x01: eax=0x0007 ebx=0x0014 ecx=0x0201 edx=0x004c
0x001f 0x02: eax=0x ebx=0x ecx=0x0002 edx=0x004c

The CPUID.04H:EAX[bits 31:26] is 63.
>From CPUID.1FH.00H:EAX[bits 04:00], the core_offset is 1, and from
CPUID.1FH.01H:EAX[bits 04:00], the pkg_offset is 7.

Thus we can verify that the above equation as:

1 << (0x7 - 0x1) - 1 = 63.

"Maximum number of addressable IDs" refers to the maximum number of IDs
that can be enumerated in the APIC ID's topology layout, which does not
necessarily correspond to the actual number of topology domains.

> 
> > Therefore the offset of APIC ID should be preferred to calculate nearest
> > power-of-2 integer for CPUID.04H:EAX[bits 25:14] and CPUID.04H:EAX[bits
> > 31:26]:
> > 1. d/i cache is shared in a core, 1 << core_offset should be used
> > instand of "cs->nr_threads" in encode_cache_cpuid4() for
> 
> /s/instand/instead

Thanks!

> 
> > CPUID.04H.00H:EAX[bits 25:14] and CPUID.04H.01H:EAX[bits 25:14].
> > 2. L2 cache is supposed to be shared in a core as for now, thereby
> > 1 << core_offset should also be used instand of "cs->nr_threads" in
> 
> ditto

Okay.

> 
> > encode_cache_cpuid4() for CPUID.04H.02H:EAX[bits 25:14].
> > 3. Similarly, the value for CPUID.04H:EAX[bits 31:26] should also be
> > calculated with the bit width between the Package and SMT levels in
> > the APIC ID (1 << (pkg_offset - core_offset) - 1).
> > 
> > In addition, use APIC ID offset to replace "pow2ceil()" for
> > cache_info_passthrough case.
> > 
> > [1]: efb3934adf9e ("x86: cpu: make sure number of addressable IDs for 
> > processor cores meets the spec")
> > [2]: d7caf13b5fcf ("x86: cpu: fixup number of addressable IDs for logical 
> > processors sharing cache")
> > [3]: d65af288a84d ("i386: Update new x86_apicid parsing rules with 
> > die_offset support")
> > 
> > Fixes: 7e3482f82480 ("i386: Helpers to encode cache information 
> > consistently")
> > Suggested-by: Robert Hoo 
> > Signed-off-by: Zhao Liu 
> > Tested-by: Babu Moger 
> > Tested-by: Yongwei Ma 
> > Acked-by: Michael S. Tsirkin 
> > ---
> > Changes since v3:
> >   * Fix compile warnings. (Babu)
> >   * Fix spelling typo.
> > 
> > Changes since v1:
> >   * Use APIC ID offset to replace "pow2ceil()" for cache_info_passthrough
> > case. (Yanan)
> >   * Split the L1 cache fix into a separate patch.
> >   * Rename the title of this patch (the original is "i386/cpu: Fix number
> > of addressable IDs in CPUID.04H").
> > ---
> >   target/i386/cpu.c | 30 +++---
> >   1 file changed, 23 insertions(+), 7 deletions(-)
> > 
> > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > index 5a3678a789cf..c8d2a585723a 100644
> > --- a/target/i386/cpu.c
> > +++ b/target/i386/cpu.c
> > @@ -6014,7 +6014,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
> > uint32_t count,
> >   {
> >   X86CPU *cpu = env_archcpu(env);
> >   CPUState *cs = env_cpu(env);
> > -uint32_t die_offset;
> >   uint32_t limit;
> >   uint32_t signature[3];
> >   X86CPUTopoInfo topo_info;
> > @@ -6098,39 +6097,56 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t 
> > index, uint32_t count,
> >   int host_vcpus_per_cache = 1 + ((

Re: [PATCH 20/40] vdpa: avoid mapping flush across reset

2024-01-11 Thread Jason Wang

On Fri, Dec 8, 2023 at 2:52 AM Si-Wei Liu  wrote:
>
> Leverage the IOTLB_PERSIST and DESC_ASID features to achieve
> a slightly light weight reset path, without resorting to
> suspend and resume. Not as best but it offers significant
> time saving too, which should somehow play its role in live
> migration down time reduction by large.
>
> It benefits two cases:
>   - normal virtio reset in the VM, e.g. guest reboot, don't
> have to tear down all iotlb mapping and set up again.
>   - SVQ switching, in which data vq's descriptor table and
> vrings are moved to a different ASID than where its
> buffers reside. Along with the use of persistent iotlb,
> it would save substantial time from pinning and mapping
> unneccessarily when moving descriptors on to or out of
> shadow mode.
>
> Signed-off-by: Si-Wei Liu 

Looks good to me.

Thanks

> ---
>  hw/virtio/vhost-vdpa.c | 7 ++-
>  1 file changed, 2 insertions(+), 5 deletions(-)
>
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 31e0a55..47c764b 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -633,6 +633,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void 
> *opaque, Error **errp)
>   0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
>   0x1ULL << VHOST_BACKEND_F_IOTLB_ASID |
>   0x1ULL << VHOST_BACKEND_F_DESC_ASID |
> + 0x1ULL << VHOST_BACKEND_F_IOTLB_PERSIST 
> |
>   0x1ULL << VHOST_BACKEND_F_SUSPEND;
>  int ret;
>
> @@ -1493,8 +1494,6 @@ static void vhost_vdpa_maybe_flush_map(struct vhost_dev 
> *dev)
>
>  static void vhost_vdpa_reset_status(struct vhost_dev *dev)
>  {
> -struct vhost_vdpa *v = dev->opaque;
> -
>  if (!vhost_vdpa_last_dev(dev)) {
>  return;
>  }
> @@ -1502,9 +1501,7 @@ static void vhost_vdpa_reset_status(struct vhost_dev 
> *dev)
>  vhost_vdpa_reset_device(dev);
>  vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
> VIRTIO_CONFIG_S_DRIVER);
> -memory_listener_unregister(&v->shared->listener);
> -v->shared->listener_registered = false;
> -
> +vhost_vdpa_maybe_flush_map(dev);
>  }
>
>  static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
> --
> 1.8.3.1
>

Re: [PATCH v7 03/16] i386/cpu: Consolidate the use of topo_info in cpu_x86_cpuid()

2024-01-11 Thread Zhao Liu

Hi Xiaoyao,

On Wed, Jan 10, 2024 at 07:52:38PM +0800, Xiaoyao Li wrote:
> Date: Wed, 10 Jan 2024 19:52:38 +0800
> From: Xiaoyao Li 
> Subject: Re: [PATCH v7 03/16] i386/cpu: Consolidate the use of topo_info in
>  cpu_x86_cpuid()
> 
> On 1/8/2024 4:27 PM, Zhao Liu wrote:
> > From: Zhao Liu 
> > 
> > In cpu_x86_cpuid(), there are many variables in representing the cpu
> > topology, e.g., topo_info, cs->nr_cores/cs->nr_threads.
> 
> Please use comma instead of slash. cs->nr_cores/cs->nr_threads looks like
> one variable.

Okay.

> 
> > Since the names of cs->nr_cores/cs->nr_threads does not accurately
> > represent its meaning, the use of cs->nr_cores/cs->nr_threads is prone
> > to confusion and mistakes.
> > 
> > And the structure X86CPUTopoInfo names its members clearly, thus the
> > variable "topo_info" should be preferred.
> > 
> > In addition, in cpu_x86_cpuid(), to uniformly use the topology variable,
> > replace env->dies with topo_info.dies_per_pkg as well.
> > 
> > Suggested-by: Robert Hoo 
> > Signed-off-by: Zhao Liu 
> > Tested-by: Babu Moger 
> > Tested-by: Yongwei Ma 
> > Acked-by: Michael S. Tsirkin 
> > ---
> > Changes since v3:
> >   * Fix typo. (Babu)
> > 
> > Changes since v1:
> >   * Extract cores_per_socket from the code block and use it as a local
> > variable for cpu_x86_cpuid(). (Yanan)
> >   * Remove vcpus_per_socket variable and use cpus_per_pkg directly.
> > (Yanan)
> >   * Replace env->dies with topo_info.dies_per_pkg in cpu_x86_cpuid().
> > ---
> >   target/i386/cpu.c | 31 ++-
> >   1 file changed, 18 insertions(+), 13 deletions(-)
> > 
> > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > index c8d2a585723a..6f8fa772ecf8 100644
> > --- a/target/i386/cpu.c
> > +++ b/target/i386/cpu.c
> > @@ -6017,11 +6017,16 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t 
> > index, uint32_t count,
> >   uint32_t limit;
> >   uint32_t signature[3];
> >   X86CPUTopoInfo topo_info;
> > +uint32_t cores_per_pkg;
> > +uint32_t cpus_per_pkg;
> 
> I prefer to lps_per_pkg or threads_per_pkg.

Okay, lp is not common in QEMU code, so I would change this to
threads_per_pkg.

> 
> Other than it,
> 
> Reviewed-by: Xiaoyao Li 

Thanks!

-Zhao

Re: [PATCH 2/2] tests/tcg/s390x: Test LOAD ADDRESS EXTENDED

2024-01-11 Thread Thomas Huth


On 10/01/2024 00.22, Ilya Leoshkevich wrote:

Add a small test to prevent regressions. Userspace runs in primary
mode, so LAE should always set the access register to 0.

Signed-off-by: Ilya Leoshkevich 
---
  tests/tcg/s390x/Makefile.target |  1 +
  tests/tcg/s390x/lae.c   | 25 +
  2 files changed, 26 insertions(+)
  create mode 100644 tests/tcg/s390x/lae.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 0e670f3f8b9..30994dcf9c2 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -44,6 +44,7 @@ TESTS+=clgebr
  TESTS+=clc
  TESTS+=laalg
  TESTS+=add-logical-with-carry
+TESTS+=lae
  
  cdsg: CFLAGS+=-pthread

  cdsg: LDFLAGS+=-pthread
diff --git a/tests/tcg/s390x/lae.c b/tests/tcg/s390x/lae.c
new file mode 100644
index 000..661e95f9978
--- /dev/null
+++ b/tests/tcg/s390x/lae.c
@@ -0,0 +1,25 @@
+/*
+ * Test the LOAD ADDRESS EXTENDED instruction.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include 
+#include 
+
+int main(void)
+{
+unsigned long long ar = -1, b2 = 10, r, x2 = 500;
+int tmp;
+
+asm("ear %[tmp],%[r]\n"
+"lae %[r],42(%[x2],%[b2])\n"
+"ear %[ar],%[r]\n"
+"sar %[r],%[tmp]"
+: [tmp] "=&r" (tmp), [r] "=&r" (r), [ar] "+r" (ar)
+: [b2] "r" (b2), [x2] "r" (x2)
+: "memory");
+assert(ar == 0xULL);
+assert(r == 100542);
+
+return EXIT_SUCCESS;
+}


I'm sorry, but it fails when building with Clang (version 17):

.../qemu/tests/tcg/s390x/lae.c:14:9: error: invalid operand for instruction
   14 | asm("ear %[tmp],%[r]\n"
  | ^
:1:10: note: instantiated into assembly here
1 | ear %r2,%r1
  | ^
.../qemu/tests/tcg/s390x/lae.c:16:10: error: invalid operand for instruction
   16 | "ear %[ar],%[r]\n"
  |  ^
:3:9: note: instantiated into assembly here
3 | ear %r0,%r1
  | ^
.../qemu/tests/tcg/s390x/lae.c:17:10: error: invalid operand for instruction
   17 | "sar %[r],%[tmp]"
  |  ^
:4:5: note: instantiated into assembly here
4 | sar %r1,%r2
  | ^
3 errors generated.

Any suggestions how to fix it best?

 Thomas

Re: [RFC PATCH v3 04/30] io: fsync before closing a file channel

2024-01-11 Thread Peter Xu

On Mon, Nov 27, 2023 at 05:25:46PM -0300, Fabiano Rosas wrote:
> Make sure the data is flushed to disk before closing file
> channels. This will ensure data is on disk at the end of a migration
> to file.

Looks reasonable, but just two (possibly naive) questions:

(1) Does this apply to all io channel users, or only migration?

(2) Why metadata doesn't matter (v.s. fsync(), when CONFIG_FDATASYNC=y)?

Thanks,

-- 
Peter Xu

Re: [NOTFORMERGE PATCH 2/2] gitlab: Add Loongarch64 KVM-only build

2024-01-11 Thread gaosong


在 2024/1/11 下午4:20, Thomas Huth 写道:

On 11/01/2024 08.37, gaosong wrote:

Hi,

在 2024/1/11 下午3:10, Thomas Huth 写道:

On 02/01/2024 18.22, Philippe Mathieu-Daudé wrote:

Signed-off-by: Philippe Mathieu-Daudé 
---
Used to test 
https://lore.kernel.org/qemu-devel/20231228084051.3235354-1-zhaotian...@loongson.cn/


So why is it NOTFORMERGE ? Don't we want to test KVM-only builds for 
loongarch in the long run?


 Thomas


I think we can drop this title.

I tested this job by the latest loongarch kvm patches.  buf I find a 
test-hmp check error.
Can you recreate the error manually? i.e. compile with configure 
--disable-tcg and then run:


 V=2 QTEST_QEMU_BINARY=./qemu-system-loongarch64 tests/qtest/test-hmp

That should likely provide you with a hint where it is crashing

 Thomas

Thank you,

LoongArch no support these cmds  or some problems .
-    "gva2gpa 0",
-    "memsave 0 4096 \"/dev/null\"",
-    "x /8i 0x100",
-    "xp /16x 0",

Could we disable these 4 cmds or the test_temp check?
After we fix the cmds problems, we can enable them.

Thanks.
Song gao

Re: [PATCH v3] scripts/checkpatch: Support codespell checking

2024-01-11 Thread Thomas Huth


On 05/01/2024 09.38, Zhao Liu wrote:

From: Zhao Liu 

Add two spelling check options (--codespell and --codespellfile) to
enhance spelling check through dictionary, which copied the Linux
kernel's implementation in checkpatch.pl.

This check uses the dictionary at "/usr/share/codespell/dictionary.txt"
by default, if there is no dictionary specified under this path, it
will look for the dictionary of python3's codespell (This requires user
to add python3's path in environment variable $PATH, and to install
codespell by "pip install codespell").

Tested-by: Yongwei Ma 
Tested-by: Samuel Tardieu 
Signed-off-by: Zhao Liu 


Tested-by: Thomas Huth 

Thanks, I'll add it to my next pull request.

Re: [PATCH v7 05/16] i386: Decouple CPUID[0x1F] subleaf with specific topology level

2024-01-11 Thread Zhao Liu

Hi Xiaoyao,

On Thu, Jan 11, 2024 at 11:19:34AM +0800, Xiaoyao Li wrote:
> Date: Thu, 11 Jan 2024 11:19:34 +0800
> From: Xiaoyao Li 
> Subject: Re: [PATCH v7 05/16] i386: Decouple CPUID[0x1F] subleaf with
>  specific topology level
> 
> On 1/8/2024 4:27 PM, Zhao Liu wrote:
> > From: Zhao Liu 
> > 
> > At present, the subleaf 0x02 of CPUID[0x1F] is bound to the "die" level.
> > 
> > In fact, the specific topology level exposed in 0x1F depends on the
> > platform's support for extension levels (module, tile and die).
> > 
> > To help expose "module" level in 0x1F, decouple CPUID[0x1F] subleaf
> > with specific topology level.
> > 
> > Signed-off-by: Zhao Liu 
> > Tested-by: Babu Moger 
> > Tested-by: Yongwei Ma 
> > Acked-by: Michael S. Tsirkin 
> > ---
> > Changes since v3:
> >   * New patch to prepare to expose module level in 0x1F.
> >   * Move the CPUTopoLevel enumeration definition from "i386: Add cache
> > topology info in CPUCacheInfo" to this patch. Note, to align with
> > topology types in SDM, revert the name of CPU_TOPO_LEVEL_UNKNOW to
> > CPU_TOPO_LEVEL_INVALID.
> > ---
> >   target/i386/cpu.c | 136 +-
> >   target/i386/cpu.h |  15 +
> >   2 files changed, 126 insertions(+), 25 deletions(-)
> > 
> > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > index bc440477d13d..5c295c9a9e2d 100644
> > --- a/target/i386/cpu.c
> > +++ b/target/i386/cpu.c
> > @@ -269,6 +269,116 @@ static void encode_cache_cpuid4(CPUCacheInfo *cache,
> >  (cache->complex_indexing ? CACHE_COMPLEX_IDX : 0);
> >   }
> > +static uint32_t num_cpus_by_topo_level(X86CPUTopoInfo *topo_info,
> > +   enum CPUTopoLevel topo_level)
> > +{
> > +switch (topo_level) {
> > +case CPU_TOPO_LEVEL_SMT:
> > +return 1;
> > +case CPU_TOPO_LEVEL_CORE:
> > +return topo_info->threads_per_core;
> > +case CPU_TOPO_LEVEL_DIE:
> > +return topo_info->threads_per_core * topo_info->cores_per_die;
> > +case CPU_TOPO_LEVEL_PACKAGE:
> > +return topo_info->threads_per_core * topo_info->cores_per_die *
> > +   topo_info->dies_per_pkg;
> > +default:
> > +g_assert_not_reached();
> > +}
> > +return 0;
> > +}
> > +
> > +static uint32_t apicid_offset_by_topo_level(X86CPUTopoInfo *topo_info,
> > +enum CPUTopoLevel topo_level)
> > +{
> > +switch (topo_level) {
> > +case CPU_TOPO_LEVEL_SMT:
> > +return 0;
> > +case CPU_TOPO_LEVEL_CORE:
> > +return apicid_core_offset(topo_info);
> > +case CPU_TOPO_LEVEL_DIE:
> > +return apicid_die_offset(topo_info);
> > +case CPU_TOPO_LEVEL_PACKAGE:
> > +return apicid_pkg_offset(topo_info);
> > +default:
> > +g_assert_not_reached();
> > +}
> > +return 0;
> > +}
> > +
> > +static uint32_t cpuid1f_topo_type(enum CPUTopoLevel topo_level)
> > +{
> > +switch (topo_level) {
> > +case CPU_TOPO_LEVEL_INVALID:
> > +return CPUID_1F_ECX_TOPO_LEVEL_INVALID;
> > +case CPU_TOPO_LEVEL_SMT:
> > +return CPUID_1F_ECX_TOPO_LEVEL_SMT;
> > +case CPU_TOPO_LEVEL_CORE:
> > +return CPUID_1F_ECX_TOPO_LEVEL_CORE;
> > +case CPU_TOPO_LEVEL_DIE:
> > +return CPUID_1F_ECX_TOPO_LEVEL_DIE;
> > +default:
> > +/* Other types are not supported in QEMU. */
> > +g_assert_not_reached();
> > +}
> > +return 0;
> > +}
> > +
> > +static void encode_topo_cpuid1f(CPUX86State *env, uint32_t count,
> > +X86CPUTopoInfo *topo_info,
> > +uint32_t *eax, uint32_t *ebx,
> > +uint32_t *ecx, uint32_t *edx)
> > +{
> > +static DECLARE_BITMAP(topo_bitmap, CPU_TOPO_LEVEL_MAX);
> > +X86CPU *cpu = env_archcpu(env);
> > +unsigned long level, next_level;
> > +uint32_t num_cpus_next_level, offset_next_level;
> 
> again, I dislike the name of cpus to represent the logical process or
> thread. we can call it, num_lps_next_level, or num_threads_next_level;

Okay, will use num_threads_next_level ;-)

> 
> > +
> > +/*
> > + * Initialize the bitmap to decide which levels should be
> > + * encoded in 0x1f.
> > + */
> > +if (!count) {
> 
> using static bitmap and initialize the bitmap on (count == 0), looks bad to
> me. It highly relies on the order of how encode_topo_cpuid1f() is called,
> and fragile.
> 
> Instead, we can maintain an array in CPUX86State, e.g.,
> 
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -1904,6 +1904,8 @@ typedef struct CPUArchState {
> 
>  /* Number of dies within this CPU package. */
>  unsigned nr_dies;
> +
> +unint8_t valid_cpu_topo[CPU_TOPO_LEVEL_MAX];
>  } CPUX86State;
> 
> 
> and initialize it as below, when initializing the env
> 
> env->valid_cpu_topo[0] = CPU_TOPO_LEVEL_SMT;
> env->valid_cpu_topo[1] = CPU_TOPO_LEVEL_CORE;
> if (env->nr_die

Re: [NOTFORMERGE PATCH 2/2] gitlab: Add Loongarch64 KVM-only build

2024-01-11 Thread Thomas Huth


On 11/01/2024 09.50, gaosong wrote:

在 2024/1/11 下午4:20, Thomas Huth 写道:

On 11/01/2024 08.37, gaosong wrote:

Hi,

在 2024/1/11 下午3:10, Thomas Huth 写道:

On 02/01/2024 18.22, Philippe Mathieu-Daudé wrote:

Signed-off-by: Philippe Mathieu-Daudé 
---
Used to test 
https://lore.kernel.org/qemu-devel/20231228084051.3235354-1-zhaotian...@loongson.cn/


So why is it NOTFORMERGE ? Don't we want to test KVM-only builds for 
loongarch in the long run?


 Thomas


I think we can drop this title.

I tested this job by the latest loongarch kvm patches.  buf I find a 
test-hmp check error.
Can you recreate the error manually? i.e. compile with configure 
--disable-tcg and then run:


 V=2 QTEST_QEMU_BINARY=./qemu-system-loongarch64 tests/qtest/test-hmp

That should likely provide you with a hint where it is crashing

 Thomas

Thank you,

LoongArch no support these cmds  or some problems .
-    "gva2gpa 0",
-    "memsave 0 4096 \"/dev/null\"",
-    "x /8i 0x100",
-    "xp /16x 0",

Could we disable these 4 cmds or the test_temp check?
After we fix the cmds problems, we can enable them.


Even if loongarch does not support one of these commands, it should not 
crash QEMU. So please fix the crashes first before considering to enable the 
KVM-only test in the CI.


 Thanks
  Thomas

Re: [PATCH 2/2] tests/tcg/s390x: Test LOAD ADDRESS EXTENDED

2024-01-11 Thread Ilya Leoshkevich

On Thu, 2024-01-11 at 09:37 +0100, Thomas Huth wrote:
> On 10/01/2024 00.22, Ilya Leoshkevich wrote:
> > Add a small test to prevent regressions. Userspace runs in primary
> > mode, so LAE should always set the access register to 0.
> > 
> > Signed-off-by: Ilya Leoshkevich 
> > ---
> >   tests/tcg/s390x/Makefile.target |  1 +
> >   tests/tcg/s390x/lae.c   | 25 +
> >   2 files changed, 26 insertions(+)
> >   create mode 100644 tests/tcg/s390x/lae.c
> > 
> > diff --git a/tests/tcg/s390x/Makefile.target
> > b/tests/tcg/s390x/Makefile.target
> > index 0e670f3f8b9..30994dcf9c2 100644
> > --- a/tests/tcg/s390x/Makefile.target
> > +++ b/tests/tcg/s390x/Makefile.target
> > @@ -44,6 +44,7 @@ TESTS+=clgebr
> >   TESTS+=clc
> >   TESTS+=laalg
> >   TESTS+=add-logical-with-carry
> > +TESTS+=lae
> >   
> >   cdsg: CFLAGS+=-pthread
> >   cdsg: LDFLAGS+=-pthread
> > diff --git a/tests/tcg/s390x/lae.c b/tests/tcg/s390x/lae.c
> > new file mode 100644
> > index 000..661e95f9978
> > --- /dev/null
> > +++ b/tests/tcg/s390x/lae.c
> > @@ -0,0 +1,25 @@
> > +/*
> > + * Test the LOAD ADDRESS EXTENDED instruction.
> > + *
> > + * SPDX-License-Identifier: GPL-2.0-or-later
> > + */
> > +#include 
> > +#include 
> > +
> > +int main(void)
> > +{
> > +    unsigned long long ar = -1, b2 = 10, r, x2 = 500;
> > +    int tmp;
> > +
> > +    asm("ear %[tmp],%[r]\n"
> > +    "lae %[r],42(%[x2],%[b2])\n"
> > +    "ear %[ar],%[r]\n"
> > +    "sar %[r],%[tmp]"
> > +    : [tmp] "=&r" (tmp), [r] "=&r" (r), [ar] "+r" (ar)
> > +    : [b2] "r" (b2), [x2] "r" (x2)
> > +    : "memory");
> > +    assert(ar == 0xULL);
> > +    assert(r == 100542);
> > +
> > +    return EXIT_SUCCESS;
> > +}
> 
> I'm sorry, but it fails when building with Clang (version 17):
> 
> .../qemu/tests/tcg/s390x/lae.c:14:9: error: invalid operand for
> instruction
>     14 | asm("ear %[tmp],%[r]\n"
>    | ^
> :1:10: note: instantiated into assembly here
>  1 | ear %r2,%r1
>    | ^
> .../qemu/tests/tcg/s390x/lae.c:16:10: error: invalid operand for
> instruction
>     16 | "ear %[ar],%[r]\n"
>    |  ^
> :3:9: note: instantiated into assembly here
>  3 | ear %r0,%r1
>    | ^
> .../qemu/tests/tcg/s390x/lae.c:17:10: error: invalid operand for
> instruction
>     17 | "sar %[r],%[tmp]"
>    |  ^
> :4:5: note: instantiated into assembly here
>  4 | sar %r1,%r2
>    | ^
> 3 errors generated.
> 
> Any suggestions how to fix it best?
> 
>   Thomas
> 

clang wants %aN there, and I don't see a way to convert %rN to %aN.
Seems like I'll have to hardcode the register number. I'll send a v2.

Re: [PATCH v7 07/16] i386: Support modules_per_die in X86CPUTopoInfo

2024-01-11 Thread Zhao Liu

Hi Xiaoyao,

On Thu, Jan 11, 2024 at 01:53:53PM +0800, Xiaoyao Li wrote:

> > -cores_per_pkg = topo_info.cores_per_die * topo_info.dies_per_pkg;
> > +cores_per_pkg = topo_info.cores_per_module * topo_info.modules_per_die 
> > *
> > +topo_info.dies_per_pkg;
> 
> Nit. maybe we can introduce some helper function like
> 
> static inline uint32_t topo_info_cores_per_pkg(X86CPUTopoInfo *topo_info) {
>   return topo_info.cores_per_module * topo_info.modules_per_die *
>topo_info.dies_per_pkg;
> }
> 
> so we don't need to care how it calculates.

Yeah, will add this helper, maybe in another patch.

> 
> Besides,
> 
> Reviewed-by: Xiaoyao Li 

Thanks!

-Zhao

Re: [PATCH v7 08/16] i386: Expose module level in CPUID[0x1F]

2024-01-11 Thread Zhao Liu

Hi Xiaoyao,

On Thu, Jan 11, 2024 at 02:04:53PM +0800, Xiaoyao Li wrote:
> Date: Thu, 11 Jan 2024 14:04:53 +0800
> From: Xiaoyao Li 
> Subject: Re: [PATCH v7 08/16] i386: Expose module level in CPUID[0x1F]
> 
> On 1/8/2024 4:27 PM, Zhao Liu wrote:
> > From: Zhao Liu 
> > 
> > Linux kernel (from v6.4, with commit edc0a2b595765 ("x86/topology: Fix
> > erroneous smp_num_siblings on Intel Hybrid platforms") is able to
> > handle platforms with Module level enumerated via CPUID.1F.
> > 
> > Expose the module level in CPUID[0x1F] if the machine has more than 1
> > modules.
> > 
> > (Tested CPU topology in CPUID[0x1F] leaf with various die/cluster
> > configurations in "-smp".)
> > 
> > Signed-off-by: Zhao Liu 
> > Tested-by: Babu Moger 
> > Tested-by: Yongwei Ma 
> > Acked-by: Michael S. Tsirkin 
> > ---
> > Changes since v3:
> >   * New patch to expose module level in 0x1F.
> >   * Add Tested-by tag from Yongwei.
> > ---
> >   target/i386/cpu.c | 12 +++-
> >   target/i386/cpu.h |  2 ++
> >   target/i386/kvm/kvm.c |  2 +-
> >   3 files changed, 14 insertions(+), 2 deletions(-)
> > 
> > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > index 294ca6b8947a..a2d39d2198b6 100644
> > --- a/target/i386/cpu.c
> > +++ b/target/i386/cpu.c
> > @@ -277,6 +277,8 @@ static uint32_t num_cpus_by_topo_level(X86CPUTopoInfo 
> > *topo_info,
> >   return 1;
> >   case CPU_TOPO_LEVEL_CORE:
> >   return topo_info->threads_per_core;
> > +case CPU_TOPO_LEVEL_MODULE:
> > +return topo_info->threads_per_core * topo_info->cores_per_module;
> >   case CPU_TOPO_LEVEL_DIE:
> >   return topo_info->threads_per_core * topo_info->cores_per_module *
> >  topo_info->modules_per_die;
> > @@ -297,6 +299,8 @@ static uint32_t 
> > apicid_offset_by_topo_level(X86CPUTopoInfo *topo_info,
> >   return 0;
> >   case CPU_TOPO_LEVEL_CORE:
> >   return apicid_core_offset(topo_info);
> > +case CPU_TOPO_LEVEL_MODULE:
> > +return apicid_module_offset(topo_info);
> >   case CPU_TOPO_LEVEL_DIE:
> >   return apicid_die_offset(topo_info);
> >   case CPU_TOPO_LEVEL_PACKAGE:
> > @@ -316,6 +320,8 @@ static uint32_t cpuid1f_topo_type(enum CPUTopoLevel 
> > topo_level)
> >   return CPUID_1F_ECX_TOPO_LEVEL_SMT;
> >   case CPU_TOPO_LEVEL_CORE:
> >   return CPUID_1F_ECX_TOPO_LEVEL_CORE;
> > +case CPU_TOPO_LEVEL_MODULE:
> > +return CPUID_1F_ECX_TOPO_LEVEL_MODULE;
> >   case CPU_TOPO_LEVEL_DIE:
> >   return CPUID_1F_ECX_TOPO_LEVEL_DIE;
> >   default:
> > @@ -347,6 +353,10 @@ static void encode_topo_cpuid1f(CPUX86State *env, 
> > uint32_t count,
> >   if (env->nr_dies > 1) {
> >   set_bit(CPU_TOPO_LEVEL_DIE, topo_bitmap);
> >   }
> > +
> > +if (env->nr_modules > 1) {
> > +set_bit(CPU_TOPO_LEVEL_MODULE, topo_bitmap);
> > +}
> >   }
> >   *ecx = count & 0xff;
> > @@ -6394,7 +6404,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
> > uint32_t count,
> >   break;
> >   case 0x1F:
> >   /* V2 Extended Topology Enumeration Leaf */
> > -if (topo_info.dies_per_pkg < 2) {
> > +if (topo_info.modules_per_die < 2 && topo_info.dies_per_pkg < 2) {
> 
> maybe we can come up with below function if we have env->valid_cpu_topo[] as
> I suggested in patch 5.
> 
> bool cpu_x86_has_valid_cpuid1f(CPUX86State *env) {
>   return env->valid_cpu_topo[2] ? true : false;
> }
> 
> ...

This makes sense.

> 
> >   *eax = *ebx = *ecx = *edx = 0;
> >   break;
> >   }
> > diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> > index eecd30bde92b..97b290e10576 100644
> > --- a/target/i386/cpu.h
> > +++ b/target/i386/cpu.h
> > @@ -1018,6 +1018,7 @@ enum CPUTopoLevel {
> >   CPU_TOPO_LEVEL_INVALID,
> >   CPU_TOPO_LEVEL_SMT,
> >   CPU_TOPO_LEVEL_CORE,
> > +CPU_TOPO_LEVEL_MODULE,
> >   CPU_TOPO_LEVEL_DIE,
> >   CPU_TOPO_LEVEL_PACKAGE,
> >   CPU_TOPO_LEVEL_MAX,
> > @@ -1032,6 +1033,7 @@ enum CPUTopoLevel {
> >   #define CPUID_1F_ECX_TOPO_LEVEL_INVALID  CPUID_B_ECX_TOPO_LEVEL_INVALID
> >   #define CPUID_1F_ECX_TOPO_LEVEL_SMT  CPUID_B_ECX_TOPO_LEVEL_SMT
> >   #define CPUID_1F_ECX_TOPO_LEVEL_CORE CPUID_B_ECX_TOPO_LEVEL_CORE
> > +#define CPUID_1F_ECX_TOPO_LEVEL_MODULE   3
> >   #define CPUID_1F_ECX_TOPO_LEVEL_DIE  5
> >   /* MSR Feature Bits */
> > diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> > index 4ce80555b45c..e5ddb214cb36 100644
> > --- a/target/i386/kvm/kvm.c
> > +++ b/target/i386/kvm/kvm.c
> > @@ -1913,7 +1913,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
> >   break;
> >   }
> >   case 0x1f:
> > -if (env->nr_dies < 2) {
> > +if (env->nr_modules < 2 && env->nr_dies < 2) {
> 
> then cpu_x86_has_valid_cpuid1f() can be used here.
>

Good idae, I will also try this.

Thanks,
Zhao

[PATCH v2 0/2] target/s390x: Fix LAE setting a wrong access register

2024-01-11 Thread Ilya Leoshkevich

v1: https://lists.gnu.org/archive/html/qemu-devel/2024-01/msg01596.html
v1 -> v2: Fix building the test with clang (Thomas).



Hi,

Ido has noticed that LAE sets a wrong access register and proposed a
fix. This series fixes the issue and adds a test.

Best regards,
Ilya

Ilya Leoshkevich (2):
  target/s390x: Fix LAE setting a wrong access register
  tests/tcg/s390x: Test LOAD ADDRESS EXTENDED

 target/s390x/tcg/translate.c|  3 ++-
 tests/tcg/s390x/Makefile.target |  1 +
 tests/tcg/s390x/lae.c   | 31 +++
 3 files changed, 34 insertions(+), 1 deletion(-)
 create mode 100644 tests/tcg/s390x/lae.c

-- 
2.43.0

[PATCH v2 2/2] tests/tcg/s390x: Test LOAD ADDRESS EXTENDED

2024-01-11 Thread Ilya Leoshkevich

Add a small test to prevent regressions. Userspace runs in primary
mode, so LAE should always set the access register to 0.

Signed-off-by: Ilya Leoshkevich 
---
 tests/tcg/s390x/Makefile.target |  1 +
 tests/tcg/s390x/lae.c   | 31 +++
 2 files changed, 32 insertions(+)
 create mode 100644 tests/tcg/s390x/lae.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 0e670f3f8b9..30994dcf9c2 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -44,6 +44,7 @@ TESTS+=clgebr
 TESTS+=clc
 TESTS+=laalg
 TESTS+=add-logical-with-carry
+TESTS+=lae
 
 cdsg: CFLAGS+=-pthread
 cdsg: LDFLAGS+=-pthread
diff --git a/tests/tcg/s390x/lae.c b/tests/tcg/s390x/lae.c
new file mode 100644
index 000..59712b5e371
--- /dev/null
+++ b/tests/tcg/s390x/lae.c
@@ -0,0 +1,31 @@
+/*
+ * Test the LOAD ADDRESS EXTENDED instruction.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include 
+#include 
+
+int main(void)
+{
+unsigned long long ar = -1, b2 = 10, r, x2 = 500;
+/*
+ * Hardcode the register number, since clang does not allow using %rN in
+ * place of %aN.
+ */
+register unsigned long long r2 __asm__("2");
+int tmp;
+
+asm("ear %[tmp],%%a2\n"
+"lae %%r2,42(%[x2],%[b2])\n"
+"ear %[ar],%%a2\n"
+"sar %%a2,%[tmp]"
+: [tmp] "=&r" (tmp), "=&r" (r2), [ar] "+r" (ar)
+: [b2] "r" (b2), [x2] "r" (x2)
+: "memory");
+r = r2;
+assert(ar == 0xULL);
+assert(r == 100542);
+
+return EXIT_SUCCESS;
+}
-- 
2.43.0

Re: [PATCH 12/19] qapi/schema: split "checked" field into "checking" and "checked"

2024-01-11 Thread Markus Armbruster

John Snow  writes:

> On Wed, Nov 22, 2023, 9:02 AM Markus Armbruster  wrote:
>
>> John Snow  writes:
>>
>> > Differentiate between "actively in the process of checking" and
>> > "checking has completed". This allows us to clean up the types of some
>> > internal fields such as QAPISchemaObjectType's members field which
>> > currently uses "None" as a canary for determining if check has
>> > completed.
>>
>> Certain members become valid only after .check().  Two ways to code
>> that:
>>
>> 1. Assign to such members only in .check().  If you try to use them
>> before .check(), AttributeError.  Nice.  Drawback: pylint is unhappy,
>> W0201 attribute-defined-outside-init.
>>
>
> Can be overcome by declaring the field in __init__, which satisfies both
> the linter and my developer usability sense (Classes should be easy to have
> their properties enumerated by looking in one well known place.)
>
>
>> 2. Assign None in .__init__(), and the real value in .check().  If you
>> try to use them before .check(), you get None, which hopefully leads to
>> an error.  Meh, but pylint is happy.
>>
>> I picked 2. because pylint's warning made me go "when in Rome..."
>>
>
> Yep, this is perfectly cromulent dynamically typed Python. It's not the
> Roman's fault I'm packing us up to go to another empire.
>
>
>> With type hints, we can declare in .__init__(), and assign in .check().
>> Gives me the AttributeError I like, and pylint remains happy.  What do
>> you think?
>>
>
> Sounds good to me in general, I already changed this for 2/3 of my other
> uses of @property.
>
> (I'm only reluctant because I don't really like that it's a "lie", but in
> this case, without potentially significant rewrites, it's a reasonable type
> band-aid. Once we're type checked, we can refactor more confidently if we
> so desire, to make certain patterns less prominent or landmine-y.)

The general problem is "attribute value is valid only after a state
transition" (here: .member is valid only after .check()).

We want to catch uses of the attribute before it becomes valid.

We want to keep pylint and mypy happy.

Solutions:

1. Initialize in .__init__() to some invalid value.  Set it to the valid
   value in .check().

1.a. Pick the "natural" invalid value: None

   How to catch: assert attribute value is valid (here: .members is not
   None).  Easy to forget.  Better: when the use will safely choke on
   the invalid value (here: elide for uses like for m in .members),
   catch is automatic.

   Pylint: fine.

   Mypy: adding None to the set of values changes the type from T to
   Optional[T].  Because of this, mypy commonly can't prove valid uses
   are valid.  Keeping it happy requires cluttering the code with
   assertions and such.  Meh.

   Note: catching invalid uses is a run time check.  Mypy won't.

1.b. Pick an invalid value of type T (here: [])

   How to catch: same as 1.a., except automatic catch is rare.  Meh.

   Pylint: fine.

   Mypy: fine.

2. Declare in .__init__() without initializing.  Initialize to valid
   value in .check()

   How to catch: always automatic.  Good, me want!

   Pylint: fine.

   Mypy: fine.

   Note: catching invalid uses is a run time check.  Mypy won't.

3. Express the state transition in the type system

   To catch invalid uses statically with mypy, we need to use different
   types before and after the state transition.  Feels possible.  Also
   feels ludicrously overengineered.

May I have 2., please?

>> Splitting .checked feels like a separate change, though.  I can't quite
>> see why we need it.  Help me out: what problem does it solve?
>>
>
> Mechanically, I wanted to eliminate the Optional type from the members
> field, but you have conditionals in the code that use the presence or
> absence of this field as a query to determine if we had run check or not
> yet.
>
> So I did the stupidest possible thing and added a "checked" variable to
> explicitly represent it.

If 2. complicates the existing "have we .check()ed?" code too much, then
adding such a variable may indeed be useful.

>> > This simplifies the typing from a cumbersome Optional[List[T]] to merely
>> > a List[T], which is more pythonic: it is safe to iterate over an empty
>> > list with "for x in []" whereas with an Optional[List[T]] you have to
>> > rely on the more cumbersome "if L: for x in L: ..."
>>
>> Yes, but when L is None, it's *invalid*, and for i in L *should* fail
>> when L is invalid.
>>
>
> Sure, but it's so invalid that it causes static typing errors.
>
> You can't write "for x in None" in a way that makes mypy happy, None is not
> iterable.

A variable that is declared, but not initialized (2. above) also not
iterable, and it does make mypy happy, doesn't it?

> If you want to preserve the behavior of "iterating an empty collection is
> an Assertion", you need a custom iterator that throws an exception when the
> collection is empty. I can do that, if you'd like, but I think it's
> actually fine to just allow the collection

[PATCH v2 1/2] target/s390x: Fix LAE setting a wrong access register

2024-01-11 Thread Ilya Leoshkevich

LAE should set the access register corresponding to the first operand,
instead, it always modifies access register 1.

Co-developed-by: Ido Plat 
Cc: qemu-sta...@nongnu.org
Fixes: a1c7610a6879 ("target-s390x: implement LAY and LAEY instructions")
Reviewed-by: David Hildenbrand 
Signed-off-by: Ilya Leoshkevich 
---
 target/s390x/tcg/translate.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index 62ab2be8b12..8df00b7df9f 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -3221,6 +3221,7 @@ static DisasJumpType op_mov2e(DisasContext *s, DisasOps 
*o)
 {
 int b2 = get_field(s, b2);
 TCGv ar1 = tcg_temp_new_i64();
+int r1 = get_field(s, r1);
 
 o->out = o->in2;
 o->in2 = NULL;
@@ -3244,7 +3245,7 @@ static DisasJumpType op_mov2e(DisasContext *s, DisasOps 
*o)
 break;
 }
 
-tcg_gen_st32_i64(ar1, tcg_env, offsetof(CPUS390XState, aregs[1]));
+tcg_gen_st32_i64(ar1, tcg_env, offsetof(CPUS390XState, aregs[r1]));
 return DISAS_NEXT;
 }
 
-- 
2.43.0

Re: [NOTFORMERGE PATCH 2/2] gitlab: Add Loongarch64 KVM-only build

2024-01-11 Thread Philippe Mathieu-Daudé


On 11/1/24 08:10, Thomas Huth wrote:

On 02/01/2024 18.22, Philippe Mathieu-Daudé wrote:

Signed-off-by: Philippe Mathieu-Daudé 
---
Used to test 
https://lore.kernel.org/qemu-devel/20231228084051.3235354-1-zhaotian...@loongson.cn/


So why is it NOTFORMERGE ? Don't we want to test KVM-only builds for 
loongarch in the long run?


We do. I was just waiting the loongarch64/KVM support was first.

Re: [PATCH 11/19] qapi/schema: fix QAPISchemaArrayType.check's call to resolve_type

2024-01-11 Thread Markus Armbruster

John Snow  writes:

> On Thu, Nov 23, 2023, 8:03 AM Markus Armbruster  wrote:
>
>> John Snow  writes:
>>
>> > On Wed, Nov 22, 2023 at 7:59 AM Markus Armbruster  
>> > wrote:
>> >>
>> >> John Snow  writes:
>> >>
>> >> > There's more conditionals in here than we can reasonably pack into a
>> >> > terse little statement, so break it apart into something more> explicit.
>> >> >
>> >> > (When would a built-in array ever cause a QAPISemError? I don't know,
>> >> > maybe never - but the type system wasn't happy all the same.)
>> >> >
>> >> > Signed-off-by: John Snow 
>> >> > ---
>> >> >  scripts/qapi/schema.py | 11 +--
>> >> >  1 file changed, 9 insertions(+), 2 deletions(-)
>> >> >
>> >> > diff --git a/scripts/qapi/schema.py b/scripts/qapi/schema.py
>> >> > index 462acb2bb61..164d86c4064 100644
>> >> > --- a/scripts/qapi/schema.py
>> >> > +++ b/scripts/qapi/schema.py
>> >> > @@ -384,9 +384,16 @@ def need_has_if_optional(self):
>> >> >
>> >> >  def check(self, schema):
>> >> >  super().check(schema)
>> >> > +
>> >> > +if self.info:
>> >> > +assert self.info.defn_meta  # guaranteed to be set by> 
>> >> > expr.py
>> >> > +what = self.info.defn_meta
>> >> > +else:
>> >> > +what = 'built-in array'
>> >> > +
>> >> >  self._element_type = schema.resolve_type(
>> >> > -self._element_type_name, self.info,
>> >> > -self.info and self.info.defn_meta)
>> >> > +self._element_type_name, self.info, what
>> >> > +)
>> 0>> >  assert not isinstance(self.element_type, QAPISchemaArrayType)
>> >> >
>> >> >  def set_module(self, schema):
>> >>
>> >> What problem are you solving here?
>> >>
>> >
>> > 1. "self.info and self.info.defn_meta" is the wrong type ifn't self.info
>>
>> self.info is Optional[QAPISourceInfo].
>>
>> When self.info, then self.info.defn_meta is is Optional[str].
>>
>> Naive me expects self.info and self.info.defn_meta to be Optional[str].
>> Playing with mypy...  it seems to be Union[QAPISourceInfo, None, str].
>> Type inference too weak.
>>
>
> I think my expectations match yours: "x and y" should return either x or y,
> so the resulting type would naively be Union[X | Y], which would indeed be
> Union[QAPISourceInfo | None | str], but:
>
> If QAPISourceInfo is *false-y*, but not None, it'd be possible for the
> expression to yield a QAPISourceInfo. mypy does not understand that
> QAPISourceInfo can never be false-y.
>
> (That I know of. Maybe there's a trick to annotate it. I like your solution
> below better anyway, just curious about the exact nature of this
> limitation.)
>
>
>> > 2. self.info.defn_meta is *also* not guaranteed by static types
>>
>> Yes.  We know it's not None ("guaranteed to be set by expr.py"), but the
>> type system doesn't.
>>
>
> Mmhmm.
>
>
>> > ultimately: we need to assert self.info and self.info.defn_meta both;
>> > but it's possible (?) that we don't have self.info in the case that
>> > we're a built-in array, so I handle that.
>>
>> This bring us back to the question in your commit message: "When would a
>> built-in array ever cause a QAPISemError?"  Short answer: never.
>
> Right, okay. I just couldn't guarantee it statically. I knew this patch was
> a little bananas, sorry for tossing you the stinkbomb.

No need to be sorry!  Feels like an efficient way to collaborate with
me.

>> Long answer.  We're dealing with a *specific* QAPISemError here, namely
>> .resolve_type()'s "uses unknown type".  If this happens for a built-in
>> array, it's a programming error.
>>
>> Let's commit such an error to see what happens: stick
>>
>> self._make_array_type('xxx', None)
>>
>> Dies like this:
>>
>> Traceback (most recent call last):
>>   File "/work/armbru/qemu/scripts/qapi/main.py", line 94, in main
>> generate(args.schema,
>>   File "/work/armbru/qemu/scripts/qapi/main.py", line 50, in generate
>> schema = QAPISchema(schema_file)
>>  ^^^
>>   File "/work/armbru/qemu/scripts/qapi/schema.py", line 938, in
>> __init__
>> self.check()
>>   File "/work/armbru/qemu/scripts/qapi/schema.py", line 1225, in check
>> ent.check(self)
>>   File "/work/armbru/qemu/scripts/qapi/schema.py", line 373, in check
>> self.element_type = schema.resolve_type(
>> 
>>   File "/work/armbru/qemu/scripts/qapi/schema.py", line 973, in
>> resolve_type
>> raise QAPISemError(
>> qapi.error.QAPISemError: 
>>
>> During handling of the above exception, another exception occurred:
>>
>> Traceback (most recent call last):
>>   File "/work/armbru/qemu/scripts/qapi-gen.py", line 19, in 
>> sys.exit(main.main())
>>  ^^^
>>   File "/work/armbru/qemu/scripts/qapi/main.py", line 101, in main
>> print(err, file=sys.stderr)
>>   File "/work/armbru/qemu/scripts/qapi/error.py"

Re: [PATCH] hw/virtio: remove meaningless NULL-check

2024-01-11 Thread Дмитрий Фролов


ping

Re: [PATCH v3 13/14] hw/arm: Prefer arm_feature(AARCH64) over object_property_find(aarch64)

2024-01-11 Thread Philippe Mathieu-Daudé


On 10/1/24 20:53, Philippe Mathieu-Daudé wrote:

The "aarch64" property is added to ARMCPU when the
ARM_FEATURE_AARCH64 feature is available. Rather than
checking whether the QOM property is present, directly
check the feature.

Suggested-by: Markus Armbruster 
Signed-off-by: Philippe Mathieu-Daudé 
---
  hw/arm/virt.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 49ed5309ff..a43e87874c 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2140,7 +2140,7 @@ static void machvirt_init(MachineState *machine)
  numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], DEVICE(cpuobj),
&error_fatal);
  
-aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL);

+aarch64 &= arm_feature(cpu_env(cs), ARM_FEATURE_AARCH64);


So after this patch there are no more use of the ARMCPU "aarch64"
property from code. Still it is exposed via the qom-tree. Thus it
can be set (see aarch64_cpu_set_aarch64). I could understand one
flip this feature to create a custom CPU (as a big-LITTLE setup
as Marc mentioned on IRC), but I don't understand what is the
expected behavior when this is flipped at runtime. Can that
happen in real hardware (how could the guest react to that...)?

Thanks,

Phil.

Re: [PULL 2/7] s390x: do a subsystem reset before the unprotect on reboot

2024-01-11 Thread Cédric Le Goater


On 1/10/24 21:28, Matthew Rosato wrote:

On 1/10/24 1:30 PM, Cédric Le Goater wrote:

On 9/12/23 13:41, Thomas Huth wrote:

From: Janosch Frank 

Bound APQNs have to be reset before tearing down the secure config via
s390_machine_unprotect(). Otherwise the Ultravisor will return a error
code.

So let's do a subsystem_reset() which includes a AP reset before the
unprotect call. We'll do a full device_reset() afterwards which will
reset some devices twice. That's ok since we can't move the
device_reset() before the unprotect as it includes a CPU clear reset
which the Ultravisor does not expect at that point in time.

Signed-off-by: Janosch Frank 
Message-ID: <20230901114851.154357-1-fran...@linux.ibm.com>
Tested-by: Viktor Mihajlovski 
Acked-by: Christian Borntraeger 
Signed-off-by: Thomas Huth 
---
   hw/s390x/s390-virtio-ccw.c | 10 ++
   1 file changed, 10 insertions(+)

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 3dd0b2372d..2d75f2131f 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -438,10 +438,20 @@ static void s390_machine_reset(MachineState *machine, 
ShutdownCause reason)
   switch (reset_type) {
   case S390_RESET_EXTERNAL:
   case S390_RESET_REIPL:
+    /*
+ * Reset the subsystem which includes a AP reset. If a PV
+ * guest had APQNs attached the AP reset is a prerequisite to
+ * unprotecting since the UV checks if all APQNs are reset.
+ */
+    subsystem_reset();



This commit introduced a regression with pass-though ISM devices.

After startup, a reboot will generate extra device resets (vfio-pci in
this case) which break the pass-though ISM device in a subtle way,


Hi Cedric, thanks for reporting this...  I was able to reproduce just now, and 
it looks like ISM firmware is unhappy specifically with this susbystem_reset 
call added by ef1535901a0, not necessarily the multiple attempts at reset -- I 
verified that reverting ef1535901a0 resolves the ISM issue, but if I instead 
try reverting the older 03451953c79e while leaving ef1535901a0 in place then 
ISM devices still break on guest reboot.



probably related to IOMMU mapping according to 03451953c79e
("s390x/pci: reset ISM passthrough devices on shutdown and system
reset"). After poweroff, the device is left in a sort-of-a-use state
on the host and the LPAR has to be rebooted to clear the invalid state
of the device. To be noted, that standard PCI devices are immune to
this change.


As a bit of background, ISM firmware is very sensitive re: the contents of the 
(host) IOMMU and attempts at manipulation that it deems to be out-of-order; the 
point of 03451953c79e was to ensure that the device gets a reset before we 
attempt at unmapping anything that wasn't cleaned up in an orderly fashion by 
the (guest) ism driver at the time of shutdown/reset (e.g. underlying firmware 
may view guest SBAs in the IOMMU as still registered for use and will throw an 
error condition at attempts to remove their entries in the IOMMU without first 
going through an unregistration process).

The unmap that would make ISM upset would generally be coming out of 
vfio_listener_region_del where we just do one big vfio_dma_unmap -- a quick 
trace shows that the subsystem_reset call added by ef1535901a0 is causing the 
vfio_listener_region_del to once again trigger before the pci reset of the ISM 
device, effectively re-introducing the condition that 03451953c79e was trying 
to resolve.


Yes. I saw the vfio_listener_region_del trace coming first and came to
the conclusion it was related to IOMMU mappings.


The extra resets should avoided in some ways, (a shutdown notifier and
a reset callback are already registered for ISM devices by 03451953c79e)


So as mentioned above, it's not the extra resets that are the issue, it's the 
order of operations.  Basically, we need to drive pci_device_reset for any ISM 
device associated with the guest before we destroy the vfio memory listener 
(now triggered in this case via subsystem_reset).  So if we must drive this 
subsystem_reset before we trigger the device reset callbacks then it might 
require a s390 pci bus routine that is called before or during subystem_reset 
just to reset the ISM devices associated with this guest first; I'm not sure 
yet.

As an aside:  I wonder why we are always doing the subsystem_reset here 
unconditionally rather than only when s390_is_pv() since that seems to be the 
only case that requires it.


That would be a start to workaround the issue.
 

and, most important, once the VM terminates, the device resources
should be cleared in the host kernel. So there seem to be two issues
to address in mainline QEMU and in Linux AFAICT.


Because of the condition detected by ISM firmware as described above, the host device was placed in an error state and remains in that state. 


OK. this condition is considered serious enough to be reported to a
management level. This seems a bit exc

Re: [PATCH v3 13/14] hw/arm: Prefer arm_feature(AARCH64) over object_property_find(aarch64)

2024-01-11 Thread Marc Zyngier

On Thu, 11 Jan 2024 09:39:18 +,
Philippe Mathieu-Daudé  wrote:
> 
> On 10/1/24 20:53, Philippe Mathieu-Daudé wrote:
> > The "aarch64" property is added to ARMCPU when the
> > ARM_FEATURE_AARCH64 feature is available. Rather than
> > checking whether the QOM property is present, directly
> > check the feature.
> > 
> > Suggested-by: Markus Armbruster 
> > Signed-off-by: Philippe Mathieu-Daudé 
> > ---
> >   hw/arm/virt.c | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> > index 49ed5309ff..a43e87874c 100644
> > --- a/hw/arm/virt.c
> > +++ b/hw/arm/virt.c
> > @@ -2140,7 +2140,7 @@ static void machvirt_init(MachineState *machine)
> >   numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], 
> > DEVICE(cpuobj),
> > &error_fatal);
> >   -aarch64 &= object_property_get_bool(cpuobj, "aarch64",
> > NULL);
> > +aarch64 &= arm_feature(cpu_env(cs), ARM_FEATURE_AARCH64);
> 
> So after this patch there are no more use of the ARMCPU "aarch64"
> property from code. Still it is exposed via the qom-tree. Thus it
> can be set (see aarch64_cpu_set_aarch64). I could understand one
> flip this feature to create a custom CPU (as a big-LITTLE setup
> as Marc mentioned on IRC), but I don't understand what is the
> expected behavior when this is flipped at runtime. Can that
> happen in real hardware (how could the guest react to that...)?

I don't think it makes any sense to do that while a guest is running
(and no HW I'm aware of would do this). However, it all depends what
you consider "run time". You could imagine creating a skeletal VM with
all features, and then apply a bunch of changes before the guest
actually runs.

I don't know enough about the qom-tree and dynamic manipulation of
these properties though, and I'm likely to be wrong about the expected
usage model.

Thanks,

M.

-- 
Without deviation from the norm, progress is not possible.

Re: [NOTFORMERGE PATCH 2/2] gitlab: Add Loongarch64 KVM-only build

2024-01-11 Thread gaosong


在 2024/1/11 下午5:04, Thomas Huth 写道:

On 11/01/2024 09.50, gaosong wrote:

在 2024/1/11 下午4:20, Thomas Huth 写道:

On 11/01/2024 08.37, gaosong wrote:

Hi,

在 2024/1/11 下午3:10, Thomas Huth 写道:

On 02/01/2024 18.22, Philippe Mathieu-Daudé wrote:

Signed-off-by: Philippe Mathieu-Daudé 
---
Used to test 
https://lore.kernel.org/qemu-devel/20231228084051.3235354-1-zhaotian...@loongson.cn/


So why is it NOTFORMERGE ? Don't we want to test KVM-only builds 
for loongarch in the long run?


 Thomas


I think we can drop this title.

I tested this job by the latest loongarch kvm patches.  buf I find 
a test-hmp check error.
Can you recreate the error manually? i.e. compile with configure 
--disable-tcg and then run:


 V=2 QTEST_QEMU_BINARY=./qemu-system-loongarch64 tests/qtest/test-hmp

That should likely provide you with a hint where it is crashing

 Thomas

Thank you,

LoongArch no support these cmds  or some problems .
-    "gva2gpa 0",
-    "memsave 0 4096 \"/dev/null\"",
-    "x /8i 0x100",
-    "xp /16x 0",

Could we disable these 4 cmds or the test_temp check?
After we fix the cmds problems, we can enable them.


Even if loongarch does not support one of these commands, it should 
not crash QEMU. So please fix the crashes first before considering to 
enable the KVM-only test in the CI.




Sure,  we will fix the cmds problems first.

Thanks.
Song Gao

Re: [RFC PATCH v3 05/30] migration/qemu-file: add utility methods for working with seekable channels

2024-01-11 Thread Peter Xu

On Mon, Nov 27, 2023 at 05:25:47PM -0300, Fabiano Rosas wrote:
> From: Nikolay Borisov 
> 
> Add utility methods that will be needed when implementing 'fixed-ram'
> migration capability.
> 
> qemu_file_is_seekable
> qemu_put_buffer_at
> qemu_get_buffer_at
> qemu_set_offset
> qemu_get_offset
> 
> Signed-off-by: Nikolay Borisov 
> Signed-off-by: Fabiano Rosas 
> Reviewed-by: Daniel P. Berrangé 
> ---
>  include/migration/qemu-file-types.h |  2 +
>  migration/qemu-file.c   | 82 +
>  migration/qemu-file.h   |  6 +++
>  3 files changed, 90 insertions(+)
> 
> diff --git a/include/migration/qemu-file-types.h 
> b/include/migration/qemu-file-types.h
> index 9ba163f333..adec5abc07 100644
> --- a/include/migration/qemu-file-types.h
> +++ b/include/migration/qemu-file-types.h
> @@ -50,6 +50,8 @@ unsigned int qemu_get_be16(QEMUFile *f);
>  unsigned int qemu_get_be32(QEMUFile *f);
>  uint64_t qemu_get_be64(QEMUFile *f);
>  
> +bool qemu_file_is_seekable(QEMUFile *f);
> +
>  static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv)
>  {
>  qemu_put_be64(f, *pv);
> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> index 94231ff295..faf6427b91 100644
> --- a/migration/qemu-file.c
> +++ b/migration/qemu-file.c
> @@ -33,6 +33,7 @@
>  #include "options.h"
>  #include "qapi/error.h"
>  #include "rdma.h"
> +#include "io/channel-file.h"
>  
>  #define IO_BUF_SIZE 32768
>  #define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
> @@ -255,6 +256,10 @@ static void qemu_iovec_release_ram(QEMUFile *f)
>  memset(f->may_free, 0, sizeof(f->may_free));
>  }
>  
> +bool qemu_file_is_seekable(QEMUFile *f)
> +{
> +return qio_channel_has_feature(f->ioc, QIO_CHANNEL_FEATURE_SEEKABLE);
> +}
>  
>  /**
>   * Flushes QEMUFile buffer
> @@ -447,6 +452,83 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, 
> size_t size)
>  }
>  }
>  
> +void qemu_put_buffer_at(QEMUFile *f, const uint8_t *buf, size_t buflen,
> +off_t pos)
> +{
> +Error *err = NULL;
> +
> +if (f->last_error) {
> +return;
> +}
> +
> +qemu_fflush(f);
> +qio_channel_pwrite(f->ioc, (char *)buf, buflen, pos, &err);

Partial writes won't set err.  Do we want to check the retval here too and
fail properly if detected partial writes?

> +
> +if (err) {
> +qemu_file_set_error_obj(f, -EIO, err);
> +} else {
> +stat64_add(&mig_stats.qemu_file_transferred, buflen);

buflen is only accurate if with above, iiuc.

> +}
> +
> +return;
> +}
> +
> +
> +size_t qemu_get_buffer_at(QEMUFile *f, const uint8_t *buf, size_t buflen,
> +  off_t pos)
> +{
> +Error *err = NULL;
> +ssize_t ret;
> +
> +if (f->last_error) {
> +return 0;
> +}
> +
> +ret = qio_channel_pread(f->ioc, (char *)buf, buflen, pos, &err);

Same question here.

> +if (ret == -1 || err) {
> +goto error;
> +}
> +
> +return (size_t)ret;
> +
> + error:
> +qemu_file_set_error_obj(f, -EIO, err);
> +return 0;
> +}
> +
> +void qemu_set_offset(QEMUFile *f, off_t off, int whence)
> +{
> +Error *err = NULL;
> +off_t ret;
> +
> +qemu_fflush(f);
> +
> +if (!qemu_file_is_writable(f)) {
> +f->buf_index = 0;
> +f->buf_size = 0;
> +}

There's the qemu_file_is_writable() check after all, then put qemu_fflush()
into condition too?

  if (qemu_file_is_writable(f)) {
  qemu_fflush(f);
  } else {
 /* Drop all the cached buffers if existed; will trigger a re-fill later */
 f->buf_index = 0;
 f->buf_size = 0;
  }

> +
> +ret = qio_channel_io_seek(f->ioc, off, whence, &err);
> +if (ret == (off_t)-1) {
> +qemu_file_set_error_obj(f, -EIO, err);
> +}
> +}
> +
> +off_t qemu_get_offset(QEMUFile *f)
> +{
> +Error *err = NULL;
> +off_t ret;
> +
> +qemu_fflush(f);
> +
> +ret = qio_channel_io_seek(f->ioc, 0, SEEK_CUR, &err);
> +if (ret == (off_t)-1) {
> +qemu_file_set_error_obj(f, -EIO, err);
> +}
> +return ret;
> +}
> +
> +
>  void qemu_put_byte(QEMUFile *f, int v)
>  {
>  if (f->last_error) {
> diff --git a/migration/qemu-file.h b/migration/qemu-file.h
> index 8aec9fabf7..32fd4a34fd 100644
> --- a/migration/qemu-file.h
> +++ b/migration/qemu-file.h
> @@ -75,6 +75,12 @@ QEMUFile *qemu_file_get_return_path(QEMUFile *f);
>  int qemu_fflush(QEMUFile *f);
>  void qemu_file_set_blocking(QEMUFile *f, bool block);
>  int qemu_file_get_to_fd(QEMUFile *f, int fd, size_t size);
> +void qemu_set_offset(QEMUFile *f, off_t off, int whence);
> +off_t qemu_get_offset(QEMUFile *f);
> +void qemu_put_buffer_at(QEMUFile *f, const uint8_t *buf, size_t buflen,
> +off_t pos);
> +size_t qemu_get_buffer_at(QEMUFile *f, const uint8_t *buf, size_t buflen,
> +  off_t pos);
>  
>  QIOChannel *qemu_file_get_ioc(QEMUFile *file);
>  
> -- 
> 2.35.3
> 

-- 
Peter Xu

Re: [PATCH v2 1/2] nubus-device: round Declaration ROM memory region address to qemu_target_page_size()

2024-01-11 Thread Mark Cave-Ayland


On 11/01/2024 06:22, Philippe Mathieu-Daudé wrote:


On 9/1/24 22:53, Mark Cave-Ayland wrote:

On 08/01/2024 23:06, Philippe Mathieu-Daudé wrote:


On 8/1/24 20:20, Mark Cave-Ayland wrote:

Declaration ROM binary images can be any arbitrary size, however if a host ROM
memory region is not aligned to qemu_target_page_size() then we fail the
"assert(!(iotlb & ~TARGET_PAGE_MASK))" check in tlb_set_page_full().

Ensure that the host ROM memory region is aligned to qemu_target_page_size()
and adjust the offset at which the Declaration ROM image is loaded, since Nubus
ROM images are unusual in that they are aligned to the end of the slot address
space.

Signed-off-by: Mark Cave-Ayland 
---
  hw/nubus/nubus-device.c | 16 
  1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/hw/nubus/nubus-device.c b/hw/nubus/nubus-device.c
index 49008e4938..e4f824d58b 100644
--- a/hw/nubus/nubus-device.c
+++ b/hw/nubus/nubus-device.c
@@ -10,6 +10,7 @@
  #include "qemu/osdep.h"
  #include "qemu/datadir.h"
+#include "exec/target_page.h"
  #include "hw/irq.h"
  #include "hw/loader.h"
  #include "hw/nubus/nubus.h"
@@ -30,7 +31,7 @@ static void nubus_device_realize(DeviceState *dev, Error 
**errp)
  NubusDevice *nd = NUBUS_DEVICE(dev);
  char *name, *path;
  hwaddr slot_offset;
-    int64_t size;
+    int64_t size, align_size;


Both are 'size_t'.


I had a look at include/hw/loader.h, and the function signature for 
get_image_size() returns int64_t. Does it not make sense to keep int64_t here and 
use uintptr_t for the pointer arithmetic as below so that everything matches?


Oh you are right:

$ git grep -E '(get_image_size|qemu_target_page_size|load_image_size)\(' include
include/exec/target_page.h:17:size_t qemu_target_page_size(void);
include/hw/loader.h:13:int64_t get_image_size(const char *filename);
include/hw/loader.h:30:ssize_t load_image_size(const char *filename, void *addr, 
size_t size);


So I guess int64_t is safer.


Okay.


  int ret;
  /* Super */
@@ -76,16 +77,23 @@ static void nubus_device_realize(DeviceState *dev, Error 
**errp)
  }
  name = g_strdup_printf("nubus-slot-%x-declaration-rom", nd->slot);
-    memory_region_init_rom(&nd->decl_rom, OBJECT(dev), name, size,
+
+    /*
+ * Ensure ROM memory region is aligned to target page size regardless
+ * of the size of the Declaration ROM image
+ */
+    align_size = ROUND_UP(size, qemu_target_page_size());
+    memory_region_init_rom(&nd->decl_rom, OBJECT(dev), name, align_size,
 &error_abort);
-    ret = load_image_mr(path, &nd->decl_rom);
+    ret = load_image_size(path, memory_region_get_ram_ptr(&nd->decl_rom) +
+    (uintptr_t)align_size - size, size);


memory_region_get_ram_ptr() returns a 'void *' so this looks dubious.
Maybe use a local variable to ease offset calculation?

   char *rombase = memory_region_get_ram_ptr(&nd->decl_rom);
   ret = load_image_size(path, rombase + align_size - size, size);

Otherwise KISS but ugly:

   ret = load_image_size(path,
 (void *)((uintptr_t)memory_region_get_ram_ptr(&nd->decl_rom)
  + align_size - size), size);


I prefer the first approach, but with uint8_t instead of char since it clarifies 
that it is a pointer to an arbitrary set of bytes as opposed to a string. Does that 
seem reasonable?


Sure! Then with that:

Reviewed-by: Philippe Mathieu-Daudé 


Thanks! I've also had an off-list request from Elliot to increase the maximum 
Declaration ROM size as enabling full debug can hit the existing 128k limit. I'll add 
this simple change into the series and repost as v3.







  g_free(path);
  g_free(name);
  if (ret < 0) {
  error_setg(errp, "could not load romfile \"%s\"", nd->romfile);
  return;
  }
-    memory_region_add_subregion(&nd->slot_mem, NUBUS_SLOT_SIZE - size,
+    memory_region_add_subregion(&nd->slot_mem, NUBUS_SLOT_SIZE - 
align_size,
  &nd->decl_rom);
  }
  }



ATB,

Mark.

Re: [PATCH v3 13/14] hw/arm: Prefer arm_feature(AARCH64) over object_property_find(aarch64)

2024-01-11 Thread Philippe Mathieu-Daudé


On 11/1/24 10:47, Marc Zyngier wrote:

On Thu, 11 Jan 2024 09:39:18 +,
Philippe Mathieu-Daudé  wrote:


On 10/1/24 20:53, Philippe Mathieu-Daudé wrote:

The "aarch64" property is added to ARMCPU when the
ARM_FEATURE_AARCH64 feature is available. Rather than
checking whether the QOM property is present, directly
check the feature.

Suggested-by: Markus Armbruster 
Signed-off-by: Philippe Mathieu-Daudé 
---
   hw/arm/virt.c | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 49ed5309ff..a43e87874c 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2140,7 +2140,7 @@ static void machvirt_init(MachineState *machine)
   numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], 
DEVICE(cpuobj),
 &error_fatal);
   -aarch64 &= object_property_get_bool(cpuobj, "aarch64",
NULL);
+aarch64 &= arm_feature(cpu_env(cs), ARM_FEATURE_AARCH64);


So after this patch there are no more use of the ARMCPU "aarch64"
property from code. Still it is exposed via the qom-tree. Thus it
can be set (see aarch64_cpu_set_aarch64). I could understand one
flip this feature to create a custom CPU (as a big-LITTLE setup
as Marc mentioned on IRC), but I don't understand what is the
expected behavior when this is flipped at runtime. Can that
happen in real hardware (how could the guest react to that...)?


I don't think it makes any sense to do that while a guest is running
(and no HW I'm aware of would do this). However, it all depends what
you consider "run time". You could imagine creating a skeletal VM with
all features, and then apply a bunch of changes before the guest
actually runs.


Thanks, this makes sense and confirms my guess.


I don't know enough about the qom-tree and dynamic manipulation of
these properties though, and I'm likely to be wrong about the expected
usage model.


Kevin, Markus, this seems a good example of QOM "config" property that
is RW *before* Realize and should become RO *after* it.

QDev properties has PropertyInfo::realized_set_allowed set to false by
default, but here this property is added at the QOM (lower) layer, so
there is no such check IIUC.

Should "aarch64" become a static QDev property instead (registered via
device_class_set_props -> qdev_class_add_property)?

This just an analyzed example, unfortunately there are many more...

Thanks,

Phil.

Re: [PULL 2/7] s390x: do a subsystem reset before the unprotect on reboot

2024-01-11 Thread Christian Borntraeger





Am 11.01.24 um 10:43 schrieb Cédric Le Goater:
[...]



On a side note, I am also seeing :


Michael?



[   73.989688] [ cut here ]
[   73.989696] unexpected non zero alert.mask 0x20
[   73.989748] WARNING: CPU: 9 PID: 4503 at arch/s390/kvm/interrupt.c:3214 
kvm_s390_gisa_destroy+0xd4/0xe8 [kvm]
[   73.989791] Modules linked in: vfio_pci vfio_pci_core irqbypass vhost_net 
vhost vhost_iotlb tap tun xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT 
nf_reject_ipv4 nft_compat nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 
nf_defrag_ipv4 nf_tables nfnetlink 8021q garp mrp rfkill sunrpc ext4 mbcache 
jbd2 vfio_ap zcrypt_cex4 vfio_ccw mdev vfio_iommu_type1 vfio drm fuse i2c_core 
drm_panel_orientation_quirks xfs libcrc32c dm_service_time mlx5_core sd_mod 
t10_pi ghash_s390 sg prng des_s390 libdes sha3_512_s390 sha3_256_s390 mlxfw tls 
scm_block psample eadm_sch qeth_l2 bridge stp llc dasd_eckd_mod zfcp qeth 
dasd_mod scsi_transport_fc ccwgroup qdio dm_multipath dm_mirror dm_region_hash 
dm_log dm_mod pkey zcrypt kvm aes_s390
[   73.989825] CPU: 9 PID: 4503 Comm: worker Kdump: loaded Not tainted 
6.7.0-clg-dirty #52
[   73.989827] Hardware name: IBM 3931 LA1 400 (LPAR)
[   73.989829] Krnl PSW : 0704c0018000 03ff7fcd2198 
(kvm_s390_gisa_destroy+0xd8/0xe8 [kvm])
[   73.989845]    R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 
RI:0 EA:3
[   73.989847] Krnl GPRS: c000fffe 00070027 0023 
0007df4249c8
[   73.989849]    03800649b858 03800649b850 0007fcb9db00 

[   73.989851]    8ebae8c8 83a8c4f0 00b69900 
8ebac000
[   73.989853]    03ff903aef68 03800649bd98 03ff7fcd2194 
03800649b9f8
[   73.989859] Krnl Code: 03ff7fcd2188: c0224f88    larl    
%r2,03ff7fd1c098
   03ff7fcd218e: c0e5fffea360    brasl    
%r14,03ff7fca684e
  #03ff7fcd2194: af00    mc    0,0
  >03ff7fcd2198: e310b7680204    lg    
%r1,10088(%r11)
   03ff7fcd219e: a7f4ffae    brc    
15,03ff7fcd20fa
   03ff7fcd21a2: 0707    bcr    0,%r7
   03ff7fcd21a4: 0707    bcr    0,%r7
   03ff7fcd21a6: 0707    bcr    0,%r7
[   73.989929] Call Trace:
[   73.989931]  [<03ff7fcd2198>] kvm_s390_gisa_destroy+0xd8/0xe8 [kvm]
[   73.989946] ([<03ff7fcd2194>] kvm_s390_gisa_destroy+0xd4/0xe8 [kvm])
[   73.989960]  [<03ff7fcc1578>] kvm_arch_destroy_vm+0x50/0x118 [kvm]
[   73.989974]  [<03ff7fcb00a2>] kvm_destroy_vm+0x15a/0x260 [kvm]
[   73.989985]  [<03ff7fcb021e>] kvm_vm_release+0x36/0x48 [kvm]
[   73.989996]  [<0007de4f830c>] __fput+0x94/0x2d0
[   73.990009]  [<0007de20d838>] task_work_run+0x88/0xe8
[   73.990013]  [<0007de1e75e0>] do_exit+0x2e0/0x4e0
[   73.990016]  [<0007de1e79c0>] do_group_exit+0x40/0xb8
[   73.990017]  [<0007de1f96e8>] send_sig_info+0x0/0xa8
[   73.990021]  [<0007de194b26>] arch_do_signal_or_restart+0x56/0x318
[   73.990025]  [<0007de28bf12>] exit_to_user_mode_prepare+0x10a/0x1a0
[   73.990028]  [<0007deb607d2>] __do_syscall+0x152/0x1f8
[   73.990032]  [<0007deb70ac8>] system_call+0x70/0x98
[   73.990036] Last Breaking-Event-Address:
[   73.990037]  [<0007de1e0c58>] __warn_printk+0x78/0xe8

[PATCH v3 2/3] nubus.h: increase maximum Declaration ROM size from 128k to 1Mb

2024-01-11 Thread Mark Cave-Ayland

Whilst 128k is more than enough for a typical Declaration ROM, a C compiler
configured to produce an unstripped debug binary can generate a ROM image that
exceeds this limit. Increase the maximum size to 1Mb to help make life easier
for developers.

Signed-off-by: Mark Cave-Ayland 
---
 include/hw/nubus/nubus.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/hw/nubus/nubus.h b/include/hw/nubus/nubus.h
index b3b4d2eadb..fee79b71d1 100644
--- a/include/hw/nubus/nubus.h
+++ b/include/hw/nubus/nubus.h
@@ -51,7 +51,7 @@ struct NubusBus {
 qemu_irq irqs[NUBUS_IRQS];
 };
 
-#define NUBUS_DECL_ROM_MAX_SIZE(128 * KiB)
+#define NUBUS_DECL_ROM_MAX_SIZE(1 * MiB)
 
 struct NubusDevice {
 DeviceState qdev;
-- 
2.39.2

[PATCH v3 1/3] nubus-device: round Declaration ROM memory region address to qemu_target_page_size()

2024-01-11 Thread Mark Cave-Ayland

Declaration ROM binary images can be any arbitrary size, however if a host ROM
memory region is not aligned to qemu_target_page_size() then we fail the
"assert(!(iotlb & ~TARGET_PAGE_MASK))" check in tlb_set_page_full().

Ensure that the host ROM memory region is aligned to qemu_target_page_size()
and adjust the offset at which the Declaration ROM image is loaded, since Nubus
ROM images are unusual in that they are aligned to the end of the slot address
space.

Signed-off-by: Mark Cave-Ayland 
Reviewed-by: Philippe Mathieu-Daudé 
---
 hw/nubus/nubus-device.c | 18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/hw/nubus/nubus-device.c b/hw/nubus/nubus-device.c
index 49008e4938..be4cb24696 100644
--- a/hw/nubus/nubus-device.c
+++ b/hw/nubus/nubus-device.c
@@ -10,6 +10,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/datadir.h"
+#include "exec/target_page.h"
 #include "hw/irq.h"
 #include "hw/loader.h"
 #include "hw/nubus/nubus.h"
@@ -30,7 +31,8 @@ static void nubus_device_realize(DeviceState *dev, Error 
**errp)
 NubusDevice *nd = NUBUS_DEVICE(dev);
 char *name, *path;
 hwaddr slot_offset;
-int64_t size;
+int64_t size, align_size;
+uint8_t *rom_ptr;
 int ret;
 
 /* Super */
@@ -76,16 +78,24 @@ static void nubus_device_realize(DeviceState *dev, Error 
**errp)
 }
 
 name = g_strdup_printf("nubus-slot-%x-declaration-rom", nd->slot);
-memory_region_init_rom(&nd->decl_rom, OBJECT(dev), name, size,
+
+/*
+ * Ensure ROM memory region is aligned to target page size regardless
+ * of the size of the Declaration ROM image
+ */
+align_size = ROUND_UP(size, qemu_target_page_size());
+memory_region_init_rom(&nd->decl_rom, OBJECT(dev), name, align_size,
&error_abort);
-ret = load_image_mr(path, &nd->decl_rom);
+rom_ptr = memory_region_get_ram_ptr(&nd->decl_rom);
+ret = load_image_size(path, rom_ptr + (uintptr_t)(align_size - size),
+  size);
 g_free(path);
 g_free(name);
 if (ret < 0) {
 error_setg(errp, "could not load romfile \"%s\"", nd->romfile);
 return;
 }
-memory_region_add_subregion(&nd->slot_mem, NUBUS_SLOT_SIZE - size,
+memory_region_add_subregion(&nd->slot_mem, NUBUS_SLOT_SIZE - 
align_size,
 &nd->decl_rom);
 }
 }
-- 
2.39.2

[PATCH v3 0/3] nubus: add nubus-virtio-mmio device

2024-01-11 Thread Mark Cave-Ayland

This series introduces a new nubus-virtio-mmio device which can be plugged into
the q800 machine to enable a 68k Classic MacOS guest to access virtio devices
such as virtio-9p-device (host filesharing), virtio-gpu (extended framebuffer
support) and virtio-tablet-device (absolute positioning).

Once the nubus-virtio-mmio device has been plugged into the q800 machine, virtio
devices can be accessed by a Classic MacOS guest using the drivers from the
classicvirtio project at https://github.com/elliotnunn/classicvirtio.

The nubus-virtio-mmio device is purposefully designed to be similar to the
virtio-mmio interface used by the existing 68k virt machine, making use of a
similar memory layout and the goldfish PIC for simple interrupt management. The
main difference is that only a single goldfish PIC is used, however that still
allows up to 32 virtio devices to be connected using a single nubus card.

Patch 1 fixes an alignment bug in the existing nubus-device Declaration ROM code
whereby some ROM images could trigger an assert() in QEMU, patch 2 increases the
maximum Declaration ROM size (to aid development), whilst patch 3 adds the
nubus-virtio-mmio device itself.

Signed-off-by: Mark Cave-Ayland 

[Patches still needing review: 2, 3]

v3:
- Rebase onto master
- Update patch 1 alignment calculation to use intermediatery uint8_t rom_ptr
  variable, add Phil's R-B tag
- Add patch 2 to increase maximum Declaration ROM size to 1MB

v2:
- Rebase onto master
- Adjust comment in patch 1 as suggested by Phil


Mark Cave-Ayland (3):
  nubus-device: round Declaration ROM memory region address to
qemu_target_page_size()
  nubus.h: increase maximum Declaration ROM size from 128k to 1Mb
  nubus: add nubus-virtio-mmio device

 hw/nubus/meson.build |   1 +
 hw/nubus/nubus-device.c  |  18 +++--
 hw/nubus/nubus-virtio-mmio.c | 102 +++
 include/hw/nubus/nubus-virtio-mmio.h |  36 ++
 include/hw/nubus/nubus.h |   2 +-
 5 files changed, 154 insertions(+), 5 deletions(-)
 create mode 100644 hw/nubus/nubus-virtio-mmio.c
 create mode 100644 include/hw/nubus/nubus-virtio-mmio.h

-- 
2.39.2

[PATCH v3 3/3] nubus: add nubus-virtio-mmio device

2024-01-11 Thread Mark Cave-Ayland

The nubus-virtio-mmio device is a Nubus card that contains a set of 32 
virtio-mmio
devices and a goldfish PIC similar to the m68k virt machine that can be plugged
into the m68k q800 machine.

There are currently a number of drivers under development that can be used in
conjunction with this device to provide accelerated and/or additional hypervisor
services to 68k Classic MacOS.

Signed-off-by: Mark Cave-Ayland 
---
 hw/nubus/meson.build |   1 +
 hw/nubus/nubus-virtio-mmio.c | 102 +++
 include/hw/nubus/nubus-virtio-mmio.h |  36 ++
 3 files changed, 139 insertions(+)
 create mode 100644 hw/nubus/nubus-virtio-mmio.c
 create mode 100644 include/hw/nubus/nubus-virtio-mmio.h

diff --git a/hw/nubus/meson.build b/hw/nubus/meson.build
index e7ebda8993..9a7a12ea68 100644
--- a/hw/nubus/meson.build
+++ b/hw/nubus/meson.build
@@ -2,6 +2,7 @@ nubus_ss = ss.source_set()
 nubus_ss.add(files('nubus-device.c'))
 nubus_ss.add(files('nubus-bus.c'))
 nubus_ss.add(files('nubus-bridge.c'))
+nubus_ss.add(files('nubus-virtio-mmio.c'))
 nubus_ss.add(when: 'CONFIG_Q800', if_true: files('mac-nubus-bridge.c'))
 
 system_ss.add_all(when: 'CONFIG_NUBUS', if_true: nubus_ss)
diff --git a/hw/nubus/nubus-virtio-mmio.c b/hw/nubus/nubus-virtio-mmio.c
new file mode 100644
index 00..58a63c84d0
--- /dev/null
+++ b/hw/nubus/nubus-virtio-mmio.c
@@ -0,0 +1,102 @@
+/*
+ * QEMU Macintosh Nubus Virtio MMIO card
+ *
+ * Copyright (c) 2024 Mark Cave-Ayland 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/nubus/nubus-virtio-mmio.h"
+
+
+#define NUBUS_VIRTIO_MMIO_PIC_OFFSET   0
+#define NUBUS_VIRTIO_MMIO_DEV_OFFSET   0x200
+
+
+static void nubus_virtio_mmio_set_input_irq(void *opaque, int n, int level)
+{
+NubusDevice *nd = NUBUS_DEVICE(opaque);
+
+nubus_set_irq(nd, level);
+}
+
+static void nubus_virtio_mmio_realize(DeviceState *dev, Error **errp)
+{
+NubusVirtioMMIODeviceClass *nvmdc = NUBUS_VIRTIO_MMIO_GET_CLASS(dev);
+NubusVirtioMMIO *s = NUBUS_VIRTIO_MMIO(dev);
+NubusDevice *nd = NUBUS_DEVICE(dev);
+SysBusDevice *sbd;
+int i, offset;
+
+nvmdc->parent_realize(dev, errp);
+if (*errp) {
+return;
+}
+
+/* Goldfish PIC */
+sbd = SYS_BUS_DEVICE(&s->pic);
+if (!sysbus_realize(sbd, errp)) {
+return;
+}
+memory_region_add_subregion(&nd->slot_mem, NUBUS_VIRTIO_MMIO_PIC_OFFSET,
+sysbus_mmio_get_region(sbd, 0));
+sysbus_connect_irq(sbd, 0,
+   qdev_get_gpio_in_named(dev, "pic-input-irq", 0));
+
+/* virtio-mmio devices */
+offset = NUBUS_VIRTIO_MMIO_DEV_OFFSET;
+for (i = 0; i < NUBUS_VIRTIO_MMIO_NUM_DEVICES; i++) {
+sbd = SYS_BUS_DEVICE(&s->virtio_mmio[i]);
+qdev_prop_set_bit(DEVICE(sbd), "force-legacy", false);
+if (!sysbus_realize_and_unref(sbd, errp)) {
+return;
+}
+
+memory_region_add_subregion(&nd->slot_mem, offset,
+sysbus_mmio_get_region(sbd, 0));
+offset += 0x200;
+
+sysbus_connect_irq(sbd, 0, qdev_get_gpio_in(DEVICE(&s->pic), i));
+}
+}
+
+static void nubus_virtio_mmio_init(Object *obj)
+{
+NubusVirtioMMIO *s = NUBUS_VIRTIO_MMIO(obj);
+int i;
+
+object_initialize_child(obj, "pic", &s->pic, TYPE_GOLDFISH_PIC);
+for (i = 0; i < NUBUS_VIRTIO_MMIO_NUM_DEVICES; i++) {
+char *name = g_strdup_printf("virtio-mmio[%d]", i);
+object_initialize_child(obj, name, &s->virtio_mmio[i],
+TYPE_VIRTIO_MMIO);
+g_free(name);
+}
+
+/* Input from goldfish PIC */
+qdev_init_gpio_in_named(DEVICE(obj), nubus_virtio_mmio_set_input_irq,
+"pic-input-irq", 1);
+}
+
+static void nubus_virtio_mmio_class_init(ObjectClass *oc, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(oc);
+NubusVirtioMMIODeviceClass *nvmdc = NUBUS_VIRTIO_MMIO_CLASS(oc);
+
+device_class_set_parent_realize(dc, nubus_virtio_mmio_realize,
+&nvmdc->parent_realize);
+}
+
+static const TypeInfo nubus_virtio_mmio_types[] = {
+{
+.name = TYPE_NUBUS_VIRTIO_MMIO,
+.parent = TYPE_NUBUS_DEVICE,
+.instance_init = nubus_virtio_mmio_init,
+.instance_size = sizeof(NubusVirtioMMIO),
+.class_init = nubus_virtio_mmio_class_init,
+.class_size = sizeof(NubusVirtioMMIODeviceClass),
+},
+};
+
+DEFINE_TYPES(nubus_virtio_mmio_types)
diff --git a/include/hw/nubus/nubus-virtio-mmio.h 
b/include/hw/nubus/nubus-virtio-mmio.h
new file mode 100644
index 00..de497b7f76
--- /dev/null
+++ b/include/hw/nubus/nubus-virtio-mmio.h
@@ -0,0 +1,36 @@
+/*
+ * QEMU Macintosh Nubus Virtio MMIO card
+ *
+ * Copyright (c) 2023 Mark Cave-Ayland 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_NUBUS_VIRTIO_MMIO_H
+#define HW_NUBUS_VIRTIO_MMIO_H
+
+#include "hw/nu

Re: [RFC PATCH v3 06/30] migration/ram: Introduce 'fixed-ram' migration capability

2024-01-11 Thread Peter Xu

On Mon, Nov 27, 2023 at 05:25:48PM -0300, Fabiano Rosas wrote:
> Add a new migration capability 'fixed-ram'.
> 
> The core of the feature is to ensure that each RAM page has a specific
> offset in the resulting migration stream. The reasons why we'd want
> such behavior are:
> 
>  - The resulting file will have a bounded size, since pages which are
>dirtied multiple times will always go to a fixed location in the
>file, rather than constantly being added to a sequential
>stream. This eliminates cases where a VM with, say, 1G of RAM can
>result in a migration file that's 10s of GBs, provided that the
>workload constantly redirties memory.
> 
>  - It paves the way to implement O_DIRECT-enabled save/restore of the
>migration stream as the pages are ensured to be written at aligned
>offsets.
> 
>  - It allows the usage of multifd so we can write RAM pages to the
>migration file in parallel.
> 
> For now, enabling the capability has no effect. The next couple of
> patches implement the core functionality.
> 
> Signed-off-by: Fabiano Rosas 
> ---
> - mentioned seeking on docs
> ---
>  docs/devel/migration.rst | 21 +
>  migration/options.c  | 34 ++
>  migration/options.h  |  1 +
>  migration/savevm.c   |  1 +
>  qapi/migration.json  |  6 +-
>  5 files changed, 62 insertions(+), 1 deletion(-)
> 
> diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst
> index ec55089b25..eeb4fec31f 100644
> --- a/docs/devel/migration.rst
> +++ b/docs/devel/migration.rst
> @@ -572,6 +572,27 @@ Others (especially either older devices or system 
> devices which for
>  some reason don't have a bus concept) make use of the ``instance id``
>  for otherwise identically named devices.
>  
> +Fixed-ram format
> +
> +
> +When the ``fixed-ram`` capability is enabled, a slightly different
> +stream format is used for the RAM section. Instead of having a
> +sequential stream of pages that follow the RAMBlock headers, the dirty
> +pages for a RAMBlock follow its header. This ensures that each RAM
> +page has a fixed offset in the resulting migration file.
> +
> +The ``fixed-ram`` capability must be enabled in both source and
> +destination with:
> +
> +``migrate_set_capability fixed-ram on``
> +
> +Since pages are written to their relative offsets and out of order
> +(due to the memory dirtying patterns), streaming channels such as
> +sockets are not supported. A seekable channel such as a file is
> +required. This can be verified in the QIOChannel by the presence of
> +the QIO_CHANNEL_FEATURE_SEEKABLE. In more practical terms, this
> +migration format requires the ``file:`` URI when migrating.

After the doc cleanup that I just posted, fixed-ram can have its own file
now.

Could you move the nice ascii art from patch 8 commit message to here?
More doc is always good.  The commit message can get lost very soon, doc
will be more persistent.

Also, can we provide more information on this feature in the doc then users
can know when they should be used, and how?

For example, IIUC it only applies to the case where the user wants to stop
the VM right after snapshot-ing it into a file, right?  We'd better be
clear on this, as this is quite a special use of migration anyway. When at
this, we should also mention the fact that it's always suggested to stop
the VM first before doing such a migration?

> +
>  Return path
>  ---
>  
> diff --git a/migration/options.c b/migration/options.c
> index 8d8ec73ad9..775428a8a5 100644
> --- a/migration/options.c
> +++ b/migration/options.c
> @@ -204,6 +204,7 @@ Property migration_properties[] = {
>  DEFINE_PROP_MIG_CAP("x-switchover-ack",
>  MIGRATION_CAPABILITY_SWITCHOVER_ACK),
>  DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT),
> +DEFINE_PROP_MIG_CAP("x-fixed-ram", MIGRATION_CAPABILITY_FIXED_RAM),

Let's drop "x-"?  I am thinking we should drop all x-, it can break some
scripts but iiuc shouldn't be more than that.  Definitely another story..

>  DEFINE_PROP_END_OF_LIST(),
>  };

-- 
Peter Xu

Re: [RFC PATCH v3 00/30] migration: File based migration with multifd and fixed-ram

2024-01-11 Thread Peter Xu

On Mon, Nov 27, 2023 at 05:25:42PM -0300, Fabiano Rosas wrote:
> Hi,
> 
> In this v3:
> 
> Added support for the "file:/dev/fdset/" syntax to receive multiple
> file descriptors. This allows the management layer to open the
> migration file beforehand and pass the file descriptors to QEMU. We
> need more than one fd to be able to use O_DIRECT concurrently with
> unaligned writes.
> 
> Dropped the auto-pause capability. That discussion was kind of
> stuck. We can revisit optimizations for non-live scenarios once the
> series is more mature/merged.
> 
> Changed the multifd incoming side to use a more generic data structure
> instead of MultiFDPages_t. This allows multifd to restore the ram
> using larger chunks.
> 
> The rest are minor changes, I have noted them in the patches
> themselves.

Fabiano,

Could you always keep a section around in the cover letter (and also in the
upcoming doc file fixed-ram.rst) on the benefits of this feature?

Please bare with me - I can start to ask silly questions.

I thought it was about "keeping the snapshot file small".  But then when I
was thinking the use case, iiuc fixed-ram migration should always suggest
the user to stop the VM first before migration starts, then if the VM is
stopped the ultimate image shouldn't be large either.

Or is it about performance only?  Where did I miss?

Thanks,

-- 
Peter Xu

[PULL 04/41] hw/intc/armv7m_nvic: add "num-prio-bits" property

2024-01-11 Thread Peter Maydell

From: Samuel Tardieu 

Cortex-M NVIC can have a different number of priority bits.
Cortex-M0/M0+/M1 devices must use 2 or more bits, while devices based
on ARMv7m and up must use 3 or more bits.

This adds a "num-prio-bits" property which will get sensible default
values if unset (2 or 8 depending on the device). Unless a SOC
specifies the number of bits to use, the previous behavior is
maintained for backward compatibility.

Signed-off-by: Samuel Tardieu 
Reviewed-by: Peter Maydell 
Message-id: 20240106181503.1746200-2-...@rfc1149.net
Suggested-by: Anton Kochkov 
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1122
Reviewed-by: Peter Maydell 
Signed-off-by: Peter Maydell 
---
 hw/intc/armv7m_nvic.c | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index 50f9a973a2e..404a445138a 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -2572,6 +2572,11 @@ static const VMStateDescription vmstate_nvic = {
 static Property props_nvic[] = {
 /* Number of external IRQ lines (so excluding the 16 internal exceptions) 
*/
 DEFINE_PROP_UINT32("num-irq", NVICState, num_irq, 64),
+/*
+ * Number of the maximum priority bits that can be used. 0 means
+ * to use a reasonable default.
+ */
+DEFINE_PROP_UINT8("num-prio-bits", NVICState, num_prio_bits, 0),
 DEFINE_PROP_END_OF_LIST()
 };
 
@@ -2685,7 +2690,23 @@ static void armv7m_nvic_realize(DeviceState *dev, Error 
**errp)
 /* include space for internal exception vectors */
 s->num_irq += NVIC_FIRST_IRQ;
 
-s->num_prio_bits = arm_feature(&s->cpu->env, ARM_FEATURE_V7) ? 8 : 2;
+if (s->num_prio_bits == 0) {
+/*
+ * If left unspecified, use 2 bits by default on Cortex-M0/M0+/M1
+ * and 8 bits otherwise.
+ */
+s->num_prio_bits = arm_feature(&s->cpu->env, ARM_FEATURE_V7) ? 8 : 2;
+} else {
+uint8_t min_prio_bits =
+arm_feature(&s->cpu->env, ARM_FEATURE_V7) ? 3 : 2;
+if (s->num_prio_bits < min_prio_bits || s->num_prio_bits > 8) {
+error_setg(errp,
+   "num-prio-bits %d is outside "
+   "NVIC acceptable range [%d-8]",
+   s->num_prio_bits, min_prio_bits);
+return;
+}
+}
 
 /*
  * This device provides a single memory region which covers the
-- 
2.34.1

[PULL 39/41] target/arm: Report HCR_EL2.{NV,NV1,NV2} in cpu dumps

2024-01-11 Thread Peter Maydell

When interpreting CPU dumps where FEAT_NV and FEAT_NV2 are in use,
it's helpful to include the values of HCR_EL2.{NV,NV1,NV2} in the CPU
dump format, as a way of distinguishing when we are in EL1 as part of
executing guest-EL2 and when we are just in normal EL1.

Add the bits to the end of the log line that shows PSTATE and similar
information:

PSTATE=03c9  EL2h  BTYPE=0 NV NV2

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/cpu.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index c15ad52ab3d..7d763786d88 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1059,6 +1059,7 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, 
int flags)
 uint32_t psr = pstate_read(env);
 int i, j;
 int el = arm_current_el(env);
+uint64_t hcr = arm_hcr_el2_eff(env);
 const char *ns_status;
 bool sve;
 
@@ -1096,6 +1097,10 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE 
*f, int flags)
 if (cpu_isar_feature(aa64_bti, cpu)) {
 qemu_fprintf(f, "  BTYPE=%d", (psr & PSTATE_BTYPE) >> 10);
 }
+qemu_fprintf(f, "%s%s%s",
+ (hcr & HCR_NV) ? " NV" : "",
+ (hcr & HCR_NV1) ? " NV1" : "",
+ (hcr & HCR_NV2) ? " NV2" : "");
 if (!(flags & CPU_DUMP_FPU)) {
 qemu_fprintf(f, "\n");
 return;
-- 
2.34.1

[PULL 00/41] target-arm queue

2024-01-11 Thread Peter Maydell

Mostly my FEAT_NV/NV2 stuff, but some other smaller series too.

-- PMM

The following changes since commit 9468484fe904ab4691de6d9c34616667f377ceac:

  Merge tag 'block-pull-request' of https://gitlab.com/stefanha/qemu into 
staging (2024-01-09 10:32:23 +)

are available in the Git repository at:

  https://git.linaro.org/people/pmaydell/qemu-arm.git 
tags/pull-target-arm-20240111

for you to fetch changes up to e2862554c257e908a3833265e38365e794abd362:

  target/arm: Add FEAT_NV2 to max, neoverse-n2, neoverse-v1 CPUs (2024-01-09 
14:44:45 +)


target-arm queue:
 * Emulate FEAT_NV, FEAT_NV2
 * add cache controller for Freescale i.MX6
 * Add minimal support for the B-L475E-IOT01A board
 * Allow SoC models to configure M-profile CPUs with correct number
   of NVIC priority bits
 * Add missing QOM parent for v7-M SoCs
 * Set CTR_EL0.{IDC,DIC} for the 'max' CPU
 * hw/intc/arm_gicv3_cpuif: handle LPIs in in the list registers


Inès Varhol (2):
  hw/arm: Add minimal support for the STM32L4x5 SoC
  hw/arm: Add minimal support for the B-L475E-IOT01A board

Nikita Ostrenkov (1):
  hw/arm: add cache controller for Freescale i.MX6

Peter Maydell (34):
  target/arm: Set CTR_EL0.{IDC,DIC} for the 'max' CPU
  hw/intc/arm_gicv3_cpuif: handle LPIs in in the list registers
  target/arm: Handle HCR_EL2 accesses for bits introduced with FEAT_NV
  target/arm: Implement HCR_EL2.AT handling
  target/arm: Enable trapping of ERET for FEAT_NV
  target/arm: Always honour HCR_EL2.TSC when HCR_EL2.NV is set
  target/arm: Allow use of upper 32 bits of TBFLAG_A64
  target/arm: Record correct opcode fields in cpreg for E2H aliases
  target/arm: *_EL12 registers should UNDEF when HCR_EL2.E2H is 0
  target/arm: Make EL2 cpreg accessfns safe for FEAT_NV EL1 accesses
  target/arm: Move FPU/SVE/SME access checks up above ARM_CP_SPECIAL_MASK 
check
  target/arm: Trap sysreg accesses for FEAT_NV
  target/arm: Make NV reads of CurrentEL return EL2
  target/arm: Set SPSR_EL1.M correctly when nested virt is enabled
  target/arm: Trap registers when HCR_EL2.{NV, NV1} == {1, 1}
  target/arm: Always use arm_pan_enabled() when checking if PAN is enabled
  target/arm: Don't honour PSTATE.PAN when HCR_EL2.{NV, NV1} == {1, 1}
  target/arm: Treat LDTR* and STTR* as LDR/STR when NV, NV1 is 1, 1
  target/arm: Handle FEAT_NV page table attribute changes
  target/arm: Add FEAT_NV to max, neoverse-n2, neoverse-v1 CPUs
  target/arm: Handle HCR_EL2 accesses for FEAT_NV2 bits
  target/arm: Implement VNCR_EL2 register
  target/arm: Handle FEAT_NV2 changes to when SPSR_EL1.M reports EL2
  target/arm: Handle FEAT_NV2 redirection of SPSR_EL2, ELR_EL2, ESR_EL2, 
FAR_EL2
  target/arm: Implement FEAT_NV2 redirection of sysregs to RAM
  target/arm: Report VNCR_EL2 based faults correctly
  target/arm: Mark up VNCR offsets (offsets 0x0..0xff)
  target/arm: Mark up VNCR offsets (offsets 0x100..0x160)
  target/arm: Mark up VNCR offsets (offsets 0x168..0x1f8)
  target/arm: Mark up VNCR offsets (offsets >= 0x200, except GIC)
  hw/intc/arm_gicv3_cpuif: Mark up VNCR offsets for GIC CPU registers
  target/arm: Report HCR_EL2.{NV,NV1,NV2} in cpu dumps
  target/arm: Enhance CPU_LOG_INT to show SPSR on AArch64 exception-entry
  target/arm: Add FEAT_NV2 to max, neoverse-n2, neoverse-v1 CPUs

Philippe Mathieu-Daudé (1):
  hw/arm: Add missing QOM parent for v7-M SoCs

Samuel Tardieu (3):
  hw/intc/armv7m_nvic: add "num-prio-bits" property
  hw/arm/armv7m: alias the NVIC "num-prio-bits" property
  hw/arm/socs: configure priority bits for existing SOCs

 MAINTAINERS |  15 ++
 docs/system/arm/b-l475e-iot01a.rst  |  46 +
 docs/system/arm/emulation.rst   |   2 +
 docs/system/arm/stm32.rst   |   6 +-
 docs/system/target-arm.rst  |   1 +
 configs/devices/arm-softmmu/default.mak |   1 +
 include/hw/arm/armv7m.h |   1 +
 include/hw/arm/stm32l4x5_soc.h  |  57 ++
 target/arm/cpregs.h |  54 +-
 target/arm/cpu-features.h   |  10 +
 target/arm/cpu.h|  24 ++-
 target/arm/syndrome.h   |  20 +-
 target/arm/tcg/translate.h  |  16 +-
 hw/arm/armv7m.c |   2 +
 hw/arm/b-l475e-iot01a.c |  72 +++
 hw/arm/fsl-imx6.c   |   3 +
 hw/arm/msf2-som.c   |   1 +
 hw/arm/netduino2.c  |   1 +
 hw/arm/netduinoplus2.c  |   1 +
 hw/arm/olimex-stm32-h405.c  |   1 +
 hw/arm/stellaris.c  |   2 +
 hw/arm/stm32f100_soc.c  |

[PULL 41/41] target/arm: Add FEAT_NV2 to max, neoverse-n2, neoverse-v1 CPUs

2024-01-11 Thread Peter Maydell

Enable FEAT_NV2 on the 'max' CPU, and stop filtering it out for
the Neoverse N2 and Neoverse V1 CPUs.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 docs/system/arm/emulation.rst | 1 +
 target/arm/cpu.c  | 5 -
 target/arm/tcg/cpu64.c| 2 +-
 3 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
index d827b42de79..f67aea2d836 100644
--- a/docs/system/arm/emulation.rst
+++ b/docs/system/arm/emulation.rst
@@ -64,6 +64,7 @@ the following architecture extensions:
 - FEAT_MTE2 (Memory Tagging Extension)
 - FEAT_MTE3 (MTE Asymmetric Fault Handling)
 - FEAT_NV (Nested Virtualization)
+- FEAT_NV2 (Enhanced nested virtualization support)
 - FEAT_PACIMP (Pointer authentication - IMPLEMENTATION DEFINED algorithm)
 - FEAT_PACQARMA3 (Pointer authentication - QARMA3 algorithm)
 - FEAT_PACQARMA5 (Pointer authentication - QARMA5 algorithm)
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 7d763786d88..826ce842c09 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2243,11 +2243,6 @@ static void arm_cpu_realizefn(DeviceState *dev, Error 
**errp)
 /* FEAT_MPAM (Memory Partitioning and Monitoring Extension) */
 cpu->isar.id_aa64pfr0 =
 FIELD_DP64(cpu->isar.id_aa64pfr0, ID_AA64PFR0, MPAM, 0);
-/* FEAT_NV2 (Enhanced Nested Virtualization support) */
-if (FIELD_EX64(cpu->isar.id_aa64mmfr2, ID_AA64MMFR2, NV) > 1) {
-cpu->isar.id_aa64mmfr2 =
-FIELD_DP64(cpu->isar.id_aa64mmfr2, ID_AA64MMFR2, NV, 1);
-}
 }
 
 /* MPU can be configured out of a PMSA CPU either by setting has-mpu
diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
index 93f040e6e96..5fba2c0f040 100644
--- a/target/arm/tcg/cpu64.c
+++ b/target/arm/tcg/cpu64.c
@@ -1204,7 +1204,7 @@ void aarch64_max_tcg_initfn(Object *obj)
 t = FIELD_DP64(t, ID_AA64MMFR2, UAO, 1);  /* FEAT_UAO */
 t = FIELD_DP64(t, ID_AA64MMFR2, IESB, 1); /* FEAT_IESB */
 t = FIELD_DP64(t, ID_AA64MMFR2, VARANGE, 1);  /* FEAT_LVA */
-t = FIELD_DP64(t, ID_AA64MMFR2, NV, 1);   /* FEAT_NV */
+t = FIELD_DP64(t, ID_AA64MMFR2, NV, 2);   /* FEAT_NV2 */
 t = FIELD_DP64(t, ID_AA64MMFR2, ST, 1);   /* FEAT_TTST */
 t = FIELD_DP64(t, ID_AA64MMFR2, AT, 1);   /* FEAT_LSE2 */
 t = FIELD_DP64(t, ID_AA64MMFR2, IDS, 1);  /* FEAT_IDST */
-- 
2.34.1

[PULL 03/41] hw/arm: Add minimal support for the B-L475E-IOT01A board

2024-01-11 Thread Peter Maydell

From: Inès Varhol 

This commit adds a new B-L475E-IOT01A board using the STM32L475VG SoC
as well as a dedicated documentation file.
The implementation is derived from the Netduino Plus 2 machine.
There are no peripherals implemented yet, only memory regions.

Tested-by: Philippe Mathieu-Daudé 
Reviewed-by: Philippe Mathieu-Daudé 
Acked-by: Alistair Francis 
Signed-off-by: Arnaud Minier 
Signed-off-by: Inès Varhol 
Message-id: 20240108135849.351719-3-ines.var...@telecom-paris.fr
Signed-off-by: Peter Maydell 
---
 MAINTAINERS |  7 +++
 docs/system/arm/b-l475e-iot01a.rst  | 46 
 docs/system/arm/stm32.rst   |  6 ++-
 docs/system/target-arm.rst  |  1 +
 configs/devices/arm-softmmu/default.mak |  1 +
 hw/arm/b-l475e-iot01a.c | 72 +
 hw/arm/Kconfig  |  6 +++
 hw/arm/meson.build  |  1 +
 8 files changed, 138 insertions(+), 2 deletions(-)
 create mode 100644 docs/system/arm/b-l475e-iot01a.rst
 create mode 100644 hw/arm/b-l475e-iot01a.c

diff --git a/MAINTAINERS b/MAINTAINERS
index da29dcc16ec..b406fb20c05 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1130,6 +1130,13 @@ S: Maintained
 F: hw/arm/stm32l4x5_soc.c
 F: include/hw/arm/stm32l4x5_soc.h
 
+B-L475E-IOT01A IoT Node
+M: Arnaud Minier 
+M: Inès Varhol 
+L: qemu-...@nongnu.org
+S: Maintained
+F: hw/arm/b-l475e-iot01a.c
+
 SmartFusion2
 M: Subbaraya Sundeep 
 M: Peter Maydell 
diff --git a/docs/system/arm/b-l475e-iot01a.rst 
b/docs/system/arm/b-l475e-iot01a.rst
new file mode 100644
index 000..2b128e6b847
--- /dev/null
+++ b/docs/system/arm/b-l475e-iot01a.rst
@@ -0,0 +1,46 @@
+B-L475E-IOT01A IoT Node (``b-l475e-iot01a``)
+
+
+The B-L475E-IOT01A IoT Node uses the STM32L475VG SoC which is based on
+ARM Cortex-M4F core. It is part of STMicroelectronics
+:doc:`STM32 boards ` and more specifically the STM32L4
+ultra-low power series. The STM32L4x5 chip runs at up to 80 MHz and
+integrates 128 KiB of SRAM and up to 1MiB of Flash. The B-L475E-IOT01A board
+namely features 64 Mibit QSPI Flash, BT, WiFi and RF connectivity,
+USART, I2C, SPI, CAN and USB OTG, as well as a variety of sensors.
+
+Supported devices
+"
+
+Currently, B-L475E-IOT01A machine's implementation is minimal,
+it only supports the following device:
+
+- Cortex-M4F based STM32L4x5 SoC
+
+Missing devices
+"""
+
+The B-L475E-IOT01A does *not* support the following devices:
+
+- Extended interrupts and events controller (EXTI)
+- Reset and clock control (RCC)
+- Serial ports (UART)
+- System configuration controller (SYSCFG)
+- General-purpose I/Os (GPIO)
+- Analog to Digital Converter (ADC)
+- SPI controller
+- Timer controller (TIMER)
+
+See the complete list of unimplemented peripheral devices
+in the STM32L4x5 module : ``./hw/arm/stm32l4x5_soc.c``
+
+Boot options
+
+
+The B-L475E-IOT01A machine can be started using the ``-kernel``
+option to load a firmware. Example:
+
+.. code-block:: bash
+
+  $ qemu-system-arm -M b-l475e-iot01a -kernel firmware.bin
+
diff --git a/docs/system/arm/stm32.rst b/docs/system/arm/stm32.rst
index d7265b763d4..3b640f3ee07 100644
--- a/docs/system/arm/stm32.rst
+++ b/docs/system/arm/stm32.rst
@@ -16,11 +16,13 @@ based on this chip :
 
 - ``netduino2`` Netduino 2 board with STM32F205RFT6 microcontroller
 
-The STM32F4 series is based on ARM Cortex-M4F core. This series is pin-to-pin
-compatible with STM32F2 series. The following machines are based on this chip :
+The STM32F4 series is based on ARM Cortex-M4F core, as well as the STM32L4
+ultra-low-power series. The STM32F4 series is pin-to-pin compatible with 
STM32F2 series.
+The following machines are based on this ARM Cortex-M4F chip :
 
 - ``netduinoplus2`` Netduino Plus 2 board with STM32F405RGT6 
microcontroller
 - ``olimex-stm32-h405`` Olimex STM32 H405 board with STM32F405RGT6 
microcontroller
+- ``b-l475e-iot01a`` :doc:`B-L475E-IOT01A IoT Node 
` board with STM32L475VG microcontroller
 
 There are many other STM32 series that are currently not supported by QEMU.
 
diff --git a/docs/system/target-arm.rst b/docs/system/target-arm.rst
index 790ac1b8a2b..c9d7c0dda7e 100644
--- a/docs/system/target-arm.rst
+++ b/docs/system/target-arm.rst
@@ -84,6 +84,7 @@ undocumented; you can get a complete list by running
arm/vexpress
arm/aspeed
arm/bananapi_m2u.rst
+   arm/b-l475e-iot01a.rst
arm/sabrelite
arm/digic
arm/cubieboard
diff --git a/configs/devices/arm-softmmu/default.mak 
b/configs/devices/arm-softmmu/default.mak
index 980c48a7d99..023faa2f750 100644
--- a/configs/devices/arm-softmmu/default.mak
+++ b/configs/devices/arm-softmmu/default.mak
@@ -19,6 +19,7 @@ CONFIG_ARM_VIRT=y
 # CONFIG_NSERIES=n
 # CONFIG_STELLARIS=n
 # CONFIG_STM32VLDISCOVERY=n
+# CONFIG_B_L475E_IOT01A=n
 # CONFIG_REALVIEW=n
 # CONFIG_VERSATILE=n
 # CONFIG_VEXPRESS=n
diff --

[PULL 38/41] hw/intc/arm_gicv3_cpuif: Mark up VNCR offsets for GIC CPU registers

2024-01-11 Thread Peter Maydell

Mark up the cpreginfo structs for the GIC CPU registers to indicate
the offsets from VNCR_EL2, as defined in table D8-66 in rule R_CSRPQ
in the Arm ARM.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 hw/intc/arm_gicv3_cpuif.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
index 6ac90536402..e1a60d8c15b 100644
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@@ -2684,6 +2684,7 @@ static const ARMCPRegInfo gicv3_cpuif_hcr_reginfo[] = {
 { .name = "ICH_AP0R0_EL2", .state = ARM_CP_STATE_BOTH,
   .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 8, .opc2 = 0,
   .type = ARM_CP_IO | ARM_CP_NO_RAW,
+  .nv2_redirect_offset = 0x480,
   .access = PL2_RW,
   .readfn = ich_ap_read,
   .writefn = ich_ap_write,
@@ -2691,6 +2692,7 @@ static const ARMCPRegInfo gicv3_cpuif_hcr_reginfo[] = {
 { .name = "ICH_AP1R0_EL2", .state = ARM_CP_STATE_BOTH,
   .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 9, .opc2 = 0,
   .type = ARM_CP_IO | ARM_CP_NO_RAW,
+  .nv2_redirect_offset = 0x4a0,
   .access = PL2_RW,
   .readfn = ich_ap_read,
   .writefn = ich_ap_write,
@@ -2698,6 +2700,7 @@ static const ARMCPRegInfo gicv3_cpuif_hcr_reginfo[] = {
 { .name = "ICH_HCR_EL2", .state = ARM_CP_STATE_BOTH,
   .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 11, .opc2 = 0,
   .type = ARM_CP_IO | ARM_CP_NO_RAW,
+  .nv2_redirect_offset = 0x4c0,
   .access = PL2_RW,
   .readfn = ich_hcr_read,
   .writefn = ich_hcr_write,
@@ -2729,6 +2732,7 @@ static const ARMCPRegInfo gicv3_cpuif_hcr_reginfo[] = {
 { .name = "ICH_VMCR_EL2", .state = ARM_CP_STATE_BOTH,
   .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 11, .opc2 = 7,
   .type = ARM_CP_IO | ARM_CP_NO_RAW,
+  .nv2_redirect_offset = 0x4c8,
   .access = PL2_RW,
   .readfn = ich_vmcr_read,
   .writefn = ich_vmcr_write,
@@ -2739,6 +2743,7 @@ static const ARMCPRegInfo gicv3_cpuif_ich_apxr1_reginfo[] 
= {
 { .name = "ICH_AP0R1_EL2", .state = ARM_CP_STATE_BOTH,
   .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 8, .opc2 = 1,
   .type = ARM_CP_IO | ARM_CP_NO_RAW,
+  .nv2_redirect_offset = 0x488,
   .access = PL2_RW,
   .readfn = ich_ap_read,
   .writefn = ich_ap_write,
@@ -2746,6 +2751,7 @@ static const ARMCPRegInfo gicv3_cpuif_ich_apxr1_reginfo[] 
= {
 { .name = "ICH_AP1R1_EL2", .state = ARM_CP_STATE_BOTH,
   .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 9, .opc2 = 1,
   .type = ARM_CP_IO | ARM_CP_NO_RAW,
+  .nv2_redirect_offset = 0x4a8,
   .access = PL2_RW,
   .readfn = ich_ap_read,
   .writefn = ich_ap_write,
@@ -2756,6 +2762,7 @@ static const ARMCPRegInfo 
gicv3_cpuif_ich_apxr23_reginfo[] = {
 { .name = "ICH_AP0R2_EL2", .state = ARM_CP_STATE_BOTH,
   .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 8, .opc2 = 2,
   .type = ARM_CP_IO | ARM_CP_NO_RAW,
+  .nv2_redirect_offset = 0x490,
   .access = PL2_RW,
   .readfn = ich_ap_read,
   .writefn = ich_ap_write,
@@ -2763,6 +2770,7 @@ static const ARMCPRegInfo 
gicv3_cpuif_ich_apxr23_reginfo[] = {
 { .name = "ICH_AP0R3_EL2", .state = ARM_CP_STATE_BOTH,
   .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 8, .opc2 = 3,
   .type = ARM_CP_IO | ARM_CP_NO_RAW,
+  .nv2_redirect_offset = 0x498,
   .access = PL2_RW,
   .readfn = ich_ap_read,
   .writefn = ich_ap_write,
@@ -2770,6 +2778,7 @@ static const ARMCPRegInfo 
gicv3_cpuif_ich_apxr23_reginfo[] = {
 { .name = "ICH_AP1R2_EL2", .state = ARM_CP_STATE_BOTH,
   .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 9, .opc2 = 2,
   .type = ARM_CP_IO | ARM_CP_NO_RAW,
+  .nv2_redirect_offset = 0x4b0,
   .access = PL2_RW,
   .readfn = ich_ap_read,
   .writefn = ich_ap_write,
@@ -2777,6 +2786,7 @@ static const ARMCPRegInfo 
gicv3_cpuif_ich_apxr23_reginfo[] = {
 { .name = "ICH_AP1R3_EL2", .state = ARM_CP_STATE_BOTH,
   .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 9, .opc2 = 3,
   .type = ARM_CP_IO | ARM_CP_NO_RAW,
+  .nv2_redirect_offset = 0x4b8,
   .access = PL2_RW,
   .readfn = ich_ap_read,
   .writefn = ich_ap_write,
@@ -2898,6 +2908,7 @@ void gicv3_init_cpuif(GICv3State *s)
   .opc0 = 3, .opc1 = 4, .crn = 12,
   .crm = 12 + (j >> 3), .opc2 = j & 7,
   .type = ARM_CP_IO | ARM_CP_NO_RAW,
+  .nv2_redirect_offset = 0x400 + 8 * j,
   .access = PL2_RW,
   .readfn = ich_lr_read,
   .writefn = ich_lr_write,
-- 
2.34.1

[PULL 07/41] hw/arm: Add missing QOM parent for v7-M SoCs

2024-01-11 Thread Peter Maydell

From: Philippe Mathieu-Daudé 

QDev objects created with qdev_new() need to manually add
their parent relationship with object_property_add_child().

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Alistair Francis 
Message-id: 20240104141159.53883-1-phi...@linaro.org
Signed-off-by: Peter Maydell 
---
 hw/arm/msf2-som.c  | 1 +
 hw/arm/netduino2.c | 1 +
 hw/arm/netduinoplus2.c | 1 +
 hw/arm/olimex-stm32-h405.c | 1 +
 hw/arm/stm32vldiscovery.c  | 1 +
 5 files changed, 5 insertions(+)

diff --git a/hw/arm/msf2-som.c b/hw/arm/msf2-som.c
index eb74b23797c..a269cf044b9 100644
--- a/hw/arm/msf2-som.c
+++ b/hw/arm/msf2-som.c
@@ -60,6 +60,7 @@ static void emcraft_sf2_s2s010_init(MachineState *machine)
 memory_region_add_subregion(sysmem, DDR_BASE_ADDRESS, ddr);
 
 dev = qdev_new(TYPE_MSF2_SOC);
+object_property_add_child(OBJECT(machine), "soc", OBJECT(dev));
 qdev_prop_set_string(dev, "part-name", "M2S010");
 qdev_prop_set_string(dev, "cpu-type", mc->default_cpu_type);
 
diff --git a/hw/arm/netduino2.c b/hw/arm/netduino2.c
index 501f63a77f9..8b1a9a24379 100644
--- a/hw/arm/netduino2.c
+++ b/hw/arm/netduino2.c
@@ -44,6 +44,7 @@ static void netduino2_init(MachineState *machine)
 clock_set_hz(sysclk, SYSCLK_FRQ);
 
 dev = qdev_new(TYPE_STM32F205_SOC);
+object_property_add_child(OBJECT(machine), "soc", OBJECT(dev));
 qdev_connect_clock_in(dev, "sysclk", sysclk);
 sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
 
diff --git a/hw/arm/netduinoplus2.c b/hw/arm/netduinoplus2.c
index 2e589849478..bccd1003549 100644
--- a/hw/arm/netduinoplus2.c
+++ b/hw/arm/netduinoplus2.c
@@ -44,6 +44,7 @@ static void netduinoplus2_init(MachineState *machine)
 clock_set_hz(sysclk, SYSCLK_FRQ);
 
 dev = qdev_new(TYPE_STM32F405_SOC);
+object_property_add_child(OBJECT(machine), "soc", OBJECT(dev));
 qdev_connect_clock_in(dev, "sysclk", sysclk);
 sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
 
diff --git a/hw/arm/olimex-stm32-h405.c b/hw/arm/olimex-stm32-h405.c
index d793de7c97f..4ad7b043be0 100644
--- a/hw/arm/olimex-stm32-h405.c
+++ b/hw/arm/olimex-stm32-h405.c
@@ -47,6 +47,7 @@ static void olimex_stm32_h405_init(MachineState *machine)
 clock_set_hz(sysclk, SYSCLK_FRQ);
 
 dev = qdev_new(TYPE_STM32F405_SOC);
+object_property_add_child(OBJECT(machine), "soc", OBJECT(dev));
 qdev_connect_clock_in(dev, "sysclk", sysclk);
 sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
 
diff --git a/hw/arm/stm32vldiscovery.c b/hw/arm/stm32vldiscovery.c
index 190db6118b9..cc419351605 100644
--- a/hw/arm/stm32vldiscovery.c
+++ b/hw/arm/stm32vldiscovery.c
@@ -47,6 +47,7 @@ static void stm32vldiscovery_init(MachineState *machine)
 clock_set_hz(sysclk, SYSCLK_FRQ);
 
 dev = qdev_new(TYPE_STM32F100_SOC);
+object_property_add_child(OBJECT(machine), "soc", OBJECT(dev));
 qdev_connect_clock_in(dev, "sysclk", sysclk);
 sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
 
-- 
2.34.1

[PULL 34/41] target/arm: Mark up VNCR offsets (offsets 0x0..0xff)

2024-01-11 Thread Peter Maydell

Mark up the cpreginfo structs to indicate offsets for system
registers from VNCR_EL2, as defined in table D8-66 in rule R_CSRPQ in
the Arm ARM. This commit covers offsets below 0x100; all of these
registers are redirected to memory regardless of the value of
HCR_EL2.NV1.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/helper.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index 93991c07b78..bc5a0810421 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -6059,6 +6059,7 @@ static const ARMCPRegInfo hcrx_el2_reginfo = {
 .name = "HCRX_EL2", .state = ARM_CP_STATE_AA64,
 .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 2,
 .access = PL2_RW, .writefn = hcrx_write, .accessfn = access_hxen,
+.nv2_redirect_offset = 0xa0,
 .fieldoffset = offsetof(CPUARMState, cp15.hcrx_el2),
 };
 
@@ -6125,6 +6126,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
   .type = ARM_CP_IO,
   .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
   .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2),
+  .nv2_redirect_offset = 0x78,
   .writefn = hcr_write, .raw_writefn = raw_write },
 { .name = "HCR", .state = ARM_CP_STATE_AA32,
   .type = ARM_CP_ALIAS | ARM_CP_IO,
@@ -6209,6 +6211,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
 { .name = "VTCR_EL2", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2,
   .access = PL2_RW,
+  .nv2_redirect_offset = 0x40,
   /* no .writefn needed as this can't cause an ASID change */
   .fieldoffset = offsetof(CPUARMState, cp15.vtcr_el2) },
 { .name = "VTTBR", .state = ARM_CP_STATE_AA32,
@@ -6220,6 +6223,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
 { .name = "VTTBR_EL2", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 0,
   .access = PL2_RW, .writefn = vttbr_write, .raw_writefn = raw_write,
+  .nv2_redirect_offset = 0x20,
   .fieldoffset = offsetof(CPUARMState, cp15.vttbr_el2) },
 { .name = "SCTLR_EL2", .state = ARM_CP_STATE_BOTH,
   .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 0,
@@ -6228,6 +6232,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
 { .name = "TPIDR_EL2", .state = ARM_CP_STATE_BOTH,
   .opc0 = 3, .opc1 = 4, .crn = 13, .crm = 0, .opc2 = 2,
   .access = PL2_RW, .resetvalue = 0,
+  .nv2_redirect_offset = 0x90,
   .fieldoffset = offsetof(CPUARMState, cp15.tpidr_el[2]) },
 { .name = "TTBR0_EL2", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 0,
@@ -6323,6 +6328,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
   .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 0, .opc2 = 3,
   .access = PL2_RW, .type = ARM_CP_IO, .resetvalue = 0,
   .writefn = gt_cntvoff_write,
+  .nv2_redirect_offset = 0x60,
   .fieldoffset = offsetof(CPUARMState, cp15.cntvoff_el2) },
 { .name = "CNTVOFF", .cp = 15, .opc1 = 4, .crm = 14,
   .access = PL2_RW, .type = ARM_CP_64BIT | ARM_CP_ALIAS | ARM_CP_IO,
@@ -6361,6 +6367,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
 { .name = "HSTR_EL2", .state = ARM_CP_STATE_BOTH,
   .cp = 15, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 3,
   .access = PL2_RW,
+  .nv2_redirect_offset = 0x80,
   .fieldoffset = offsetof(CPUARMState, cp15.hstr_el2) },
 };
 
@@ -6386,10 +6393,12 @@ static const ARMCPRegInfo el2_sec_cp_reginfo[] = {
 { .name = "VSTTBR_EL2", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 6, .opc2 = 0,
   .access = PL2_RW, .accessfn = sel2_access,
+  .nv2_redirect_offset = 0x30,
   .fieldoffset = offsetof(CPUARMState, cp15.vsttbr_el2) },
 { .name = "VSTCR_EL2", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 6, .opc2 = 2,
   .access = PL2_RW, .accessfn = sel2_access,
+  .nv2_redirect_offset = 0x48,
   .fieldoffset = offsetof(CPUARMState, cp15.vstcr_el2) },
 };
 
@@ -8155,6 +8164,7 @@ static const ARMCPRegInfo nv2_reginfo[] = {
   .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 2, .opc2 = 0,
   .access = PL2_RW,
   .writefn = vncr_write,
+  .nv2_redirect_offset = 0xb0,
   .fieldoffset = offsetof(CPUARMState, cp15.vncr_el2) },
 };
 
@@ -8986,6 +8996,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
   .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 0,
   .access = PL2_RW, .resetvalue = cpu->midr,
   .type = ARM_CP_EL3_NO_EL2_C_NZ,
+  .nv2_redirect_offset = 0x88,
   .fieldoffset = offsetof(CPUARMState, cp15.vpidr_el2) },
 { .name = "VMPIDR", .state = ARM_CP_STATE_AA32,
   .cp = 15, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 5,
@@ -8997,6 +9008,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
   .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 5,

[PULL 17/41] target/arm: Make EL2 cpreg accessfns safe for FEAT_NV EL1 accesses

2024-01-11 Thread Peter Maydell

FEAT_NV and FEAT_NV2 will allow EL1 to attempt to access cpregs that
only exist at EL2. This means we're going to want to run their
accessfns when the CPU is at EL1. In almost all cases, the behaviour
we want is "the accessfn returns OK if at EL1".

Mostly the accessfn already does the right thing; in a few cases we
need to explicitly check that the EL is not 1 before applying various
trap controls, or split out an accessfn used both for an _EL1 and an
_EL2 register into two so we can handle the FEAT_NV case correctly
for the _EL2 register.

There are two registers where we want the accessfn to trap for
a FEAT_NV EL1 access: VSTTBR_EL2 and VSTCR_EL2 should UNDEF
an access from NonSecure EL1, not trap to EL2 under FEAT_NV.
The way we have written sel2_access() already results in this
behaviour.

We can identify the registers we care about here because they
all have opc1 == 4 or 5.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/debug_helper.c | 12 +++-
 target/arm/helper.c   | 65 ++-
 2 files changed, 69 insertions(+), 8 deletions(-)

diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c
index 83d2619080f..b39144d5b93 100644
--- a/target/arm/debug_helper.c
+++ b/target/arm/debug_helper.c
@@ -844,6 +844,16 @@ static CPAccessResult access_tda(CPUARMState *env, const 
ARMCPRegInfo *ri,
 return CP_ACCESS_OK;
 }
 
+static CPAccessResult access_dbgvcr32(CPUARMState *env, const ARMCPRegInfo *ri,
+  bool isread)
+{
+/* MCDR_EL3.TDMA doesn't apply for FEAT_NV traps */
+if (arm_current_el(env) == 2 && (env->cp15.mdcr_el3 & MDCR_TDA)) {
+return CP_ACCESS_TRAP_EL3;
+}
+return CP_ACCESS_OK;
+}
+
 /*
  * Check for traps to Debug Comms Channel registers. If FEAT_FGT
  * is implemented then these are controlled by MDCR_EL2.TDCC for
@@ -1062,7 +1072,7 @@ static const ARMCPRegInfo debug_aa32_el1_reginfo[] = {
  */
 { .name = "DBGVCR32_EL2", .state = ARM_CP_STATE_AA64,
   .opc0 = 2, .opc1 = 4, .crn = 0, .crm = 7, .opc2 = 0,
-  .access = PL2_RW, .accessfn = access_tda,
+  .access = PL2_RW, .accessfn = access_dbgvcr32,
   .type = ARM_CP_NOP | ARM_CP_EL3_NO_EL2_KEEP },
 };
 
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 797b7518f61..7c7f92c16de 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -3324,6 +3324,11 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
 static CPAccessResult e2h_access(CPUARMState *env, const ARMCPRegInfo *ri,
  bool isread)
 {
+if (arm_current_el(env) == 1) {
+/* This must be a FEAT_NV access */
+/* TODO: FEAT_ECV will need to check CNTHCTL_EL2 here */
+return CP_ACCESS_OK;
+}
 if (!(arm_hcr_el2_eff(env) & HCR_E2H)) {
 return CP_ACCESS_TRAP;
 }
@@ -6014,7 +6019,7 @@ static void hcrx_write(CPUARMState *env, const 
ARMCPRegInfo *ri,
 static CPAccessResult access_hxen(CPUARMState *env, const ARMCPRegInfo *ri,
   bool isread)
 {
-if (arm_current_el(env) < 3
+if (arm_current_el(env) == 2
 && arm_feature(env, ARM_FEATURE_EL3)
 && !(env->cp15.scr_el3 & SCR_HXEN)) {
 return CP_ACCESS_TRAP_EL3;
@@ -6539,6 +6544,15 @@ static CPAccessResult el2_e2h_e12_access(CPUARMState 
*env,
  const ARMCPRegInfo *ri,
  bool isread)
 {
+if (arm_current_el(env) == 1) {
+/*
+ * This must be a FEAT_NV access (will either trap or redirect
+ * to memory). None of the registers with _EL12 aliases want to
+ * apply their trap controls for this kind of access, so don't
+ * call the orig_accessfn or do the "UNDEF when E2H is 0" check.
+ */
+return CP_ACCESS_OK;
+}
 /* FOO_EL12 aliases only exist when E2H is 1; otherwise they UNDEF */
 if (!(arm_hcr_el2_eff(env) & HCR_E2H)) {
 return CP_ACCESS_TRAP_UNCATEGORIZED;
@@ -7015,10 +7029,21 @@ static CPAccessResult access_tpidr2(CPUARMState *env, 
const ARMCPRegInfo *ri,
 return CP_ACCESS_OK;
 }
 
-static CPAccessResult access_esm(CPUARMState *env, const ARMCPRegInfo *ri,
- bool isread)
+static CPAccessResult access_smprimap(CPUARMState *env, const ARMCPRegInfo *ri,
+  bool isread)
+{
+/* If EL1 this is a FEAT_NV access and CPTR_EL3.ESM doesn't apply */
+if (arm_current_el(env) == 2
+&& arm_feature(env, ARM_FEATURE_EL3)
+&& !FIELD_EX64(env->cp15.cptr_el[3], CPTR_EL3, ESM)) {
+return CP_ACCESS_TRAP_EL3;
+}
+return CP_ACCESS_OK;
+}
+
+static CPAccessResult access_smpri(CPUARMState *env, const ARMCPRegInfo *ri,
+   bool isread)
 {
-/* TODO: FEAT_FGT for SMPRI_EL1 but not SMPRIMAP_EL2 */
 if (arm_current_el(env) < 3

[PULL 25/41] target/arm: Treat LDTR* and STTR* as LDR/STR when NV, NV1 is 1, 1

2024-01-11 Thread Peter Maydell

FEAT_NV requires (per I_JKLJK) that when HCR_EL2.{NV,NV1} is {1,1} the
unprivileged-access instructions LDTR, STTR etc behave as normal
loads and stores. Implement the check that handles this.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/tcg/hflags.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
index f33c0a12741..8f254bf9ccb 100644
--- a/target/arm/tcg/hflags.c
+++ b/target/arm/tcg/hflags.c
@@ -261,8 +261,10 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, 
int el, int fp_el,
 switch (mmu_idx) {
 case ARMMMUIdx_E10_1:
 case ARMMMUIdx_E10_1_PAN:
-/* TODO: ARMv8.3-NV */
-DP_TBFLAG_A64(flags, UNPRIV, 1);
+/* FEAT_NV: NV,NV1 == 1,1 means we don't do UNPRIV accesses */
+if ((hcr & (HCR_NV | HCR_NV1)) != (HCR_NV | HCR_NV1)) {
+DP_TBFLAG_A64(flags, UNPRIV, 1);
+}
 break;
 case ARMMMUIdx_E20_2:
 case ARMMMUIdx_E20_2_PAN:
-- 
2.34.1

[PULL 27/41] target/arm: Add FEAT_NV to max, neoverse-n2, neoverse-v1 CPUs

2024-01-11 Thread Peter Maydell

Enable FEAT_NV on the 'max' CPU, and stop filtering it out for the
Neoverse N2 and Neoverse V1 CPUs.  We continue to downgrade FEAT_NV2
support to FEAT_NV for the latter two CPU types.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 docs/system/arm/emulation.rst | 1 +
 target/arm/cpu.c  | 8 +---
 target/arm/tcg/cpu64.c| 1 +
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
index 0b604f90059..d827b42de79 100644
--- a/docs/system/arm/emulation.rst
+++ b/docs/system/arm/emulation.rst
@@ -63,6 +63,7 @@ the following architecture extensions:
 - FEAT_MTE (Memory Tagging Extension)
 - FEAT_MTE2 (Memory Tagging Extension)
 - FEAT_MTE3 (MTE Asymmetric Fault Handling)
+- FEAT_NV (Nested Virtualization)
 - FEAT_PACIMP (Pointer authentication - IMPLEMENTATION DEFINED algorithm)
 - FEAT_PACQARMA3 (Pointer authentication - QARMA3 algorithm)
 - FEAT_PACQARMA5 (Pointer authentication - QARMA5 algorithm)
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 1c8b7874823..c15ad52ab3d 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2238,9 +2238,11 @@ static void arm_cpu_realizefn(DeviceState *dev, Error 
**errp)
 /* FEAT_MPAM (Memory Partitioning and Monitoring Extension) */
 cpu->isar.id_aa64pfr0 =
 FIELD_DP64(cpu->isar.id_aa64pfr0, ID_AA64PFR0, MPAM, 0);
-/* FEAT_NV (Nested Virtualization) */
-cpu->isar.id_aa64mmfr2 =
-FIELD_DP64(cpu->isar.id_aa64mmfr2, ID_AA64MMFR2, NV, 0);
+/* FEAT_NV2 (Enhanced Nested Virtualization support) */
+if (FIELD_EX64(cpu->isar.id_aa64mmfr2, ID_AA64MMFR2, NV) > 1) {
+cpu->isar.id_aa64mmfr2 =
+FIELD_DP64(cpu->isar.id_aa64mmfr2, ID_AA64MMFR2, NV, 1);
+}
 }
 
 /* MPU can be configured out of a PMSA CPU either by setting has-mpu
diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
index 40e7a45166f..93f040e6e96 100644
--- a/target/arm/tcg/cpu64.c
+++ b/target/arm/tcg/cpu64.c
@@ -1204,6 +1204,7 @@ void aarch64_max_tcg_initfn(Object *obj)
 t = FIELD_DP64(t, ID_AA64MMFR2, UAO, 1);  /* FEAT_UAO */
 t = FIELD_DP64(t, ID_AA64MMFR2, IESB, 1); /* FEAT_IESB */
 t = FIELD_DP64(t, ID_AA64MMFR2, VARANGE, 1);  /* FEAT_LVA */
+t = FIELD_DP64(t, ID_AA64MMFR2, NV, 1);   /* FEAT_NV */
 t = FIELD_DP64(t, ID_AA64MMFR2, ST, 1);   /* FEAT_TTST */
 t = FIELD_DP64(t, ID_AA64MMFR2, AT, 1);   /* FEAT_LSE2 */
 t = FIELD_DP64(t, ID_AA64MMFR2, IDS, 1);  /* FEAT_IDST */
-- 
2.34.1

[PULL 22/41] target/arm: Trap registers when HCR_EL2.{NV, NV1} == {1, 1}

2024-01-11 Thread Peter Maydell

When HCR_EL2.{NV,NV1} is {1,1} we must trap five extra registers to
EL2: VBAR_EL1, ELR_EL1, SPSR_EL1, SCXTNUM_EL1 and TFSR_EL1.
Implement these traps.

This trap does not apply when FEAT_NV2 is implemented and enabled;
include the check that HCR_EL2.NV2 is 0 here, to save us having
to come back and add it later.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/helper.c | 45 +
 1 file changed, 41 insertions(+), 4 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index e48b03ba1d0..c8296a9c191 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -5355,6 +5355,19 @@ static void mdcr_el2_write(CPUARMState *env, const 
ARMCPRegInfo *ri,
 }
 }
 
+static CPAccessResult access_nv1(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+if (arm_current_el(env) == 1) {
+uint64_t hcr_nv = arm_hcr_el2_eff(env) & (HCR_NV | HCR_NV1 | HCR_NV2);
+
+if (hcr_nv == (HCR_NV | HCR_NV1)) {
+return CP_ACCESS_TRAP_EL2;
+}
+}
+return CP_ACCESS_OK;
+}
+
 #ifdef CONFIG_USER_ONLY
 /*
  * `IC IVAU` is handled to improve compatibility with JITs that dual-map their
@@ -5703,12 +5716,12 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
 { .name = "ELR_EL1", .state = ARM_CP_STATE_AA64,
   .type = ARM_CP_ALIAS,
   .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 1,
-  .access = PL1_RW,
+  .access = PL1_RW, .accessfn = access_nv1,
   .fieldoffset = offsetof(CPUARMState, elr_el[1]) },
 { .name = "SPSR_EL1", .state = ARM_CP_STATE_AA64,
   .type = ARM_CP_ALIAS,
   .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 0,
-  .access = PL1_RW,
+  .access = PL1_RW, .accessfn = access_nv1,
   .fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_SVC]) },
 /*
  * We rely on the access checks not allowing the guest to write to the
@@ -7831,6 +7844,17 @@ static CPAccessResult access_mte(CPUARMState *env, const 
ARMCPRegInfo *ri,
 return CP_ACCESS_OK;
 }
 
+static CPAccessResult access_tfsr_el1(CPUARMState *env, const ARMCPRegInfo *ri,
+  bool isread)
+{
+CPAccessResult nv1 = access_nv1(env, ri, isread);
+
+if (nv1 != CP_ACCESS_OK) {
+return nv1;
+}
+return access_mte(env, ri, isread);
+}
+
 static CPAccessResult access_tfsr_el2(CPUARMState *env, const ARMCPRegInfo *ri,
   bool isread)
 {
@@ -7875,7 +7899,7 @@ static const ARMCPRegInfo mte_reginfo[] = {
   .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[0]) },
 { .name = "TFSR_EL1", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 6, .opc2 = 0,
-  .access = PL1_RW, .accessfn = access_mte,
+  .access = PL1_RW, .accessfn = access_tfsr_el1,
   .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[1]) },
 { .name = "TFSR_EL2", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 6, .opc2 = 0,
@@ -8027,6 +8051,18 @@ static CPAccessResult access_scxtnum(CPUARMState *env, 
const ARMCPRegInfo *ri,
 return CP_ACCESS_OK;
 }
 
+static CPAccessResult access_scxtnum_el1(CPUARMState *env,
+ const ARMCPRegInfo *ri,
+ bool isread)
+{
+CPAccessResult nv1 = access_nv1(env, ri, isread);
+
+if (nv1 != CP_ACCESS_OK) {
+return nv1;
+}
+return access_scxtnum(env, ri, isread);
+}
+
 static const ARMCPRegInfo scxtnum_reginfo[] = {
 { .name = "SCXTNUM_EL0", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 3, .crn = 13, .crm = 0, .opc2 = 7,
@@ -8035,7 +8071,7 @@ static const ARMCPRegInfo scxtnum_reginfo[] = {
   .fieldoffset = offsetof(CPUARMState, scxtnum_el[0]) },
 { .name = "SCXTNUM_EL1", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 0, .crn = 13, .crm = 0, .opc2 = 7,
-  .access = PL1_RW, .accessfn = access_scxtnum,
+  .access = PL1_RW, .accessfn = access_scxtnum_el1,
   .fgt = FGT_SCXTNUM_EL1,
   .fieldoffset = offsetof(CPUARMState, scxtnum_el[1]) },
 { .name = "SCXTNUM_EL2", .state = ARM_CP_STATE_AA64,
@@ -9417,6 +9453,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
 { .name = "VBAR", .state = ARM_CP_STATE_BOTH,
   .opc0 = 3, .crn = 12, .crm = 0, .opc1 = 0, .opc2 = 0,
   .access = PL1_RW, .writefn = vbar_write,
+  .accessfn = access_nv1,
   .fgt = FGT_VBAR_EL1,
   .bank_fieldoffsets = { offsetof(CPUARMState, cp15.vbar_s),
  offsetof(CPUARMState, cp15.vbar_ns) },
-- 
2.34.1

[PULL 40/41] target/arm: Enhance CPU_LOG_INT to show SPSR on AArch64 exception-entry

2024-01-11 Thread Peter Maydell

We already print various lines of information when we take an
exception, including the ELR and (if relevant) the FAR. Now
that FEAT_NV means that we might report something other than
the old PSTATE to the guest as the SPSR, it's worth logging
this as well.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/helper.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index 4550ff7ffde..dc8f14f4331 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -11416,6 +11416,7 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
 }
 env->banked_spsr[aarch64_banked_spsr_index(new_el)] = old_mode;
 
+qemu_log_mask(CPU_LOG_INT, "...with SPSR 0x%x\n", old_mode);
 qemu_log_mask(CPU_LOG_INT, "...with ELR 0x%" PRIx64 "\n",
   env->elr_el[new_el]);
 
-- 
2.34.1

[PULL 26/41] target/arm: Handle FEAT_NV page table attribute changes

2024-01-11 Thread Peter Maydell

FEAT_NV requires that when HCR_EL2.{NV,NV1} == {1,1} the handling
of some of the page table attribute bits changes for the EL1&0
translation regime:

 * for block and page descriptors:
  - bit [54] holds PXN, not UXN
  - bit [53] is RES0, and the effective value of UXN is 0
  - bit [6], AP[1], is treated as 0
 * for table descriptors, when hierarchical permissions are enabled:
  - bit [60] holds PXNTable, not UXNTable
  - bit [59] is RES0
  - bit [61], APTable[0] is treated as 0

Implement these changes to the page table attribute handling.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/ptw.c | 21 +
 1 file changed, 21 insertions(+)

diff --git a/target/arm/ptw.c b/target/arm/ptw.c
index 0ecd3a36dad..2d4fa8dbcaf 100644
--- a/target/arm/ptw.c
+++ b/target/arm/ptw.c
@@ -1581,6 +1581,12 @@ static bool lpae_block_desc_valid(ARMCPU *cpu, bool ds,
 }
 }
 
+static bool nv_nv1_enabled(CPUARMState *env, S1Translate *ptw)
+{
+uint64_t hcr = arm_hcr_el2_eff_secstate(env, ptw->in_space);
+return (hcr & (HCR_NV | HCR_NV1)) == (HCR_NV | HCR_NV1);
+}
+
 /**
  * get_phys_addr_lpae: perform one stage of page table walk, LPAE format
  *
@@ -1989,6 +1995,21 @@ static bool get_phys_addr_lpae(CPUARMState *env, 
S1Translate *ptw,
 xn = extract64(attrs, 54, 1);
 pxn = extract64(attrs, 53, 1);
 
+if (el == 1 && nv_nv1_enabled(env, ptw)) {
+/*
+ * With FEAT_NV, when HCR_EL2.{NV,NV1} == {1,1}, the block/page
+ * descriptor bit 54 holds PXN, 53 is RES0, and the effective value
+ * of UXN is 0. Similarly for bits 59 and 60 in table descriptors
+ * (which we have already folded into bits 53 and 54 of attrs).
+ * AP[1] (descriptor bit 6, our ap bit 0) is treated as 0.
+ * Similarly, APTable[0] from the table descriptor is treated as 0;
+ * we already folded this into AP[1] and squashing that to 0 does
+ * the right thing.
+ */
+pxn = xn;
+xn = 0;
+ap &= ~1;
+}
 /*
  * Note that we modified ptw->in_space earlier for NSTable, but
  * result->f.attrs retains a copy of the original security space.
-- 
2.34.1

[PULL 33/41] target/arm: Report VNCR_EL2 based faults correctly

2024-01-11 Thread Peter Maydell

If FEAT_NV2 redirects a system register access to a memory offset
from VNCR_EL2, that access might fault.  In this case we need to
report the correct syndrome information:
 * Data Abort, from same-EL
 * no ISS information
 * the VNCR bit (bit 13) is set

and the exception must be taken to EL2.

Save an appropriate syndrome template when generating code; we can
then use that to:
 * select the right target EL
 * reconstitute a correct final syndrome for the data abort
 * report the right syndrome if we take a FEAT_RME granule protection
   fault on the VNCR-based write

Note that because VNCR is bit 13, we must start keeping bit 13 in
template syndromes, by adjusting ARM_INSN_START_WORD2_SHIFT.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/cpu.h   |  4 ++--
 target/arm/syndrome.h  | 20 
 target/arm/tcg/tlb_helper.c| 27 +--
 target/arm/tcg/translate-a64.c |  4 
 4 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 9281d74aa9d..ec276fcd57c 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -120,12 +120,12 @@ enum {
 #define TARGET_INSN_START_EXTRA_WORDS 2
 
 /* The 2nd extra word holding syndrome info for data aborts does not use
- * the upper 6 bits nor the lower 14 bits. We mask and shift it down to
+ * the upper 6 bits nor the lower 13 bits. We mask and shift it down to
  * help the sleb128 encoder do a better job.
  * When restoring the CPU state, we shift it back up.
  */
 #define ARM_INSN_START_WORD2_MASK ((1 << 26) - 1)
-#define ARM_INSN_START_WORD2_SHIFT 14
+#define ARM_INSN_START_WORD2_SHIFT 13
 
 /* We currently assume float and double are IEEE single and double
precision respectively.
diff --git a/target/arm/syndrome.h b/target/arm/syndrome.h
index 95454b5b3bb..1a49767479f 100644
--- a/target/arm/syndrome.h
+++ b/target/arm/syndrome.h
@@ -86,6 +86,9 @@ typedef enum {
 #define ARM_EL_IL (1 << ARM_EL_IL_SHIFT)
 #define ARM_EL_ISV (1 << ARM_EL_ISV_SHIFT)
 
+/* In the Data Abort syndrome */
+#define ARM_EL_VNCR (1 << 13)
+
 static inline uint32_t syn_get_ec(uint32_t syn)
 {
 return syn >> ARM_EL_EC_SHIFT;
@@ -256,13 +259,12 @@ static inline uint32_t syn_bxjtrap(int cv, int cond, int 
rm)
 (cv << 24) | (cond << 20) | rm;
 }
 
-static inline uint32_t syn_gpc(int s2ptw, int ind, int gpcsc,
+static inline uint32_t syn_gpc(int s2ptw, int ind, int gpcsc, int vncr,
int cm, int s1ptw, int wnr, int fsc)
 {
-/* TODO: FEAT_NV2 adds VNCR */
 return (EC_GPC << ARM_EL_EC_SHIFT) | ARM_EL_IL | (s2ptw << 21)
-| (ind << 20) | (gpcsc << 14) | (cm << 8) | (s1ptw << 7)
-| (wnr << 6) | fsc;
+| (ind << 20) | (gpcsc << 14) | (vncr << 13) | (cm << 8)
+| (s1ptw << 7) | (wnr << 6) | fsc;
 }
 
 static inline uint32_t syn_insn_abort(int same_el, int ea, int s1ptw, int fsc)
@@ -295,6 +297,16 @@ static inline uint32_t syn_data_abort_with_iss(int same_el,
| (ea << 9) | (cm << 8) | (s1ptw << 7) | (wnr << 6) | fsc;
 }
 
+/*
+ * Faults due to FEAT_NV2 VNCR_EL2-based accesses report as same-EL
+ * Data Aborts with the VNCR bit set.
+ */
+static inline uint32_t syn_data_abort_vncr(int ea, int wnr, int fsc)
+{
+return (EC_DATAABORT << ARM_EL_EC_SHIFT) | (1 << ARM_EL_EC_SHIFT)
+| ARM_EL_IL | ARM_EL_VNCR | (wnr << 6) | fsc;
+}
+
 static inline uint32_t syn_swstep(int same_el, int isv, int ex)
 {
 return (EC_SOFTWARESTEP << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
diff --git a/target/arm/tcg/tlb_helper.c b/target/arm/tcg/tlb_helper.c
index 4fdd85359e1..dd5de74ffb7 100644
--- a/target/arm/tcg/tlb_helper.c
+++ b/target/arm/tcg/tlb_helper.c
@@ -50,7 +50,15 @@ static inline uint32_t merge_syn_data_abort(uint32_t 
template_syn,
  * ST64BV, or ST64BV0 insns report syndrome info even for stage-1
  * faults and regardless of the target EL.
  */
-if (!(template_syn & ARM_EL_ISV) || target_el != 2
+if (template_syn & ARM_EL_VNCR) {
+/*
+ * FEAT_NV2 faults on accesses via VNCR_EL2 are a special case:
+ * they are always reported as "same EL", even though we are going
+ * from EL1 to EL2.
+ */
+assert(!fi->stage2);
+syn = syn_data_abort_vncr(fi->ea, is_write, fsc);
+} else if (!(template_syn & ARM_EL_ISV) || target_el != 2
 || fi->s1ptw || !fi->stage2) {
 syn = syn_data_abort_no_iss(same_el, 0,
 fi->ea, 0, fi->s1ptw, is_write, fsc);
@@ -169,6 +177,20 @@ void arm_deliver_fault(ARMCPU *cpu, vaddr addr,
 int current_el = arm_current_el(env);
 bool same_el;
 uint32_t syn, exc, fsr, fsc;
+/*
+ * We know this must be a data or insn abort, and that
+ * env->exception.syndrome contains the template syndrome set
+ * up at translate time. So we can check only the VNCR bit
+ * (and indeed synd

[PULL 12/41] target/arm: Enable trapping of ERET for FEAT_NV

2024-01-11 Thread Peter Maydell

When FEAT_NV is turned on via the HCR_EL2.NV bit, ERET instructions
are trapped, with the same syndrome information as for the existing
FEAT_FGT fine-grained trap (in the pseudocode this is handled in
AArch64.CheckForEretTrap()).

Rename the DisasContext and tbflag bits to reflect that they are
no longer exclusively for FGT traps, and set the tbflag bit when
FEAT_NV is enabled as well as when the FGT is enabled.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/cpu.h   |  2 +-
 target/arm/tcg/translate.h |  4 ++--
 target/arm/tcg/hflags.c| 11 ++-
 target/arm/tcg/translate-a64.c |  6 +++---
 4 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 8c3ca2e2319..8da6bfda228 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -3232,7 +3232,7 @@ FIELD(TBFLAG_A64, PSTATE_ZA, 23, 1)
 FIELD(TBFLAG_A64, SVL, 24, 4)
 /* Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not. */
 FIELD(TBFLAG_A64, SME_TRAP_NONSTREAMING, 28, 1)
-FIELD(TBFLAG_A64, FGT_ERET, 29, 1)
+FIELD(TBFLAG_A64, TRAP_ERET, 29, 1)
 FIELD(TBFLAG_A64, NAA, 30, 1)
 FIELD(TBFLAG_A64, ATA0, 31, 1)
 
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index 3c3bb3431ad..8c84377003c 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -138,10 +138,10 @@ typedef struct DisasContext {
 bool mve_no_pred;
 /* True if fine-grained traps are active */
 bool fgt_active;
-/* True if fine-grained trap on ERET is enabled */
-bool fgt_eret;
 /* True if fine-grained trap on SVC is enabled */
 bool fgt_svc;
+/* True if a trap on ERET is enabled (FGT or NV) */
+bool trap_eret;
 /* True if FEAT_LSE2 SCTLR_ELx.nAA is set */
 bool naa;
 /*
diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
index a6ebd7571a3..560fb7964ab 100644
--- a/target/arm/tcg/hflags.c
+++ b/target/arm/tcg/hflags.c
@@ -169,6 +169,7 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, 
int el, int fp_el,
 CPUARMTBFlags flags = {};
 ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx);
 uint64_t tcr = regime_tcr(env, mmu_idx);
+uint64_t hcr = arm_hcr_el2_eff(env);
 uint64_t sctlr;
 int tbii, tbid;
 
@@ -285,13 +286,21 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, 
int el, int fp_el,
 if (arm_fgt_active(env, el)) {
 DP_TBFLAG_ANY(flags, FGT_ACTIVE, 1);
 if (FIELD_EX64(env->cp15.fgt_exec[FGTREG_HFGITR], HFGITR_EL2, ERET)) {
-DP_TBFLAG_A64(flags, FGT_ERET, 1);
+DP_TBFLAG_A64(flags, TRAP_ERET, 1);
 }
 if (fgt_svc(env, el)) {
 DP_TBFLAG_ANY(flags, FGT_SVC, 1);
 }
 }
 
+/*
+ * ERET can also be trapped for FEAT_NV. arm_hcr_el2_eff() takes care
+ * of "is EL2 enabled" and the NV bit can only be set if FEAT_NV is 
present.
+ */
+if (el == 1 && (hcr & HCR_NV)) {
+DP_TBFLAG_A64(flags, TRAP_ERET, 1);
+}
+
 if (cpu_isar_feature(aa64_mte, env_archcpu(env))) {
 /*
  * Set MTE_ACTIVE if any access may be Checked, and leave clear
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index f3b5b9124d0..0f30e71f9bd 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -1606,7 +1606,7 @@ static bool trans_ERET(DisasContext *s, arg_ERET *a)
 if (s->current_el == 0) {
 return false;
 }
-if (s->fgt_eret) {
+if (s->trap_eret) {
 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2);
 return true;
 }
@@ -1633,7 +1633,7 @@ static bool trans_ERETA(DisasContext *s, arg_reta *a)
 return false;
 }
 /* The FGT trap takes precedence over an auth trap. */
-if (s->fgt_eret) {
+if (s->trap_eret) {
 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2);
 return true;
 }
@@ -13980,7 +13980,7 @@ static void 
aarch64_tr_init_disas_context(DisasContextBase *dcbase,
 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
-dc->fgt_eret = EX_TBFLAG_A64(tb_flags, FGT_ERET);
+dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET);
 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
-- 
2.34.1

[PULL 29/41] target/arm: Implement VNCR_EL2 register

2024-01-11 Thread Peter Maydell

For FEAT_NV2, a new system register VNCR_EL2 holds the base
address of the memory which nested-guest system register
accesses are redirected to. Implement this register.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/cpu.h|  3 +++
 target/arm/helper.c | 26 ++
 2 files changed, 29 insertions(+)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index d7a10fb4b61..0e48a1366bd 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -547,6 +547,9 @@ typedef struct CPUArchState {
 uint64_t gpccr_el3;
 uint64_t gptbr_el3;
 uint64_t mfar_el3;
+
+/* NV2 register */
+uint64_t vncr_el2;
 } cp15;
 
 struct {
diff --git a/target/arm/helper.c b/target/arm/helper.c
index e3e56539594..53bd6c85990 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -8131,6 +8131,28 @@ static const ARMCPRegInfo fgt_reginfo[] = {
   .access = PL2_RW, .accessfn = access_fgt,
   .fieldoffset = offsetof(CPUARMState, cp15.fgt_exec[FGTREG_HFGITR]) },
 };
+
+static void vncr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+   uint64_t value)
+{
+/*
+ * Clear the RES0 bottom 12 bits; this means at runtime we can guarantee
+ * that VNCR_EL2 + offset is 64-bit aligned. We don't need to do anything
+ * about the RESS bits at the top -- we choose the "generate an EL2
+ * translation abort on use" CONSTRAINED UNPREDICTABLE option (i.e. let
+ * the ptw.c code detect the resulting invalid address).
+ */
+env->cp15.vncr_el2 = value & ~0xfffULL;
+}
+
+static const ARMCPRegInfo nv2_reginfo[] = {
+{ .name = "VNCR_EL2", .state = ARM_CP_STATE_AA64,
+  .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 2, .opc2 = 0,
+  .access = PL2_RW,
+  .writefn = vncr_write,
+  .fieldoffset = offsetof(CPUARMState, cp15.vncr_el2) },
+};
+
 #endif /* TARGET_AARCH64 */
 
 static CPAccessResult access_predinv(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -9614,6 +9636,10 @@ void register_cp_regs_for_features(ARMCPU *cpu)
 define_arm_cp_regs(cpu, rme_mte_reginfo);
 }
 }
+
+if (cpu_isar_feature(aa64_nv2, cpu)) {
+define_arm_cp_regs(cpu, nv2_reginfo);
+}
 #endif
 
 if (cpu_isar_feature(any_predinv, cpu)) {
-- 
2.34.1

[PULL 01/41] hw/arm: add cache controller for Freescale i.MX6

2024-01-11 Thread Peter Maydell

From: Nikita Ostrenkov 

Signed-off-by: Nikita Ostrenkov 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 20231219105510.4907-1-n.ostren...@gmail.com
[PMM: fixed stray whitespace]
Signed-off-by: Peter Maydell 
---
 hw/arm/fsl-imx6.c | 3 +++
 hw/arm/Kconfig| 1 +
 2 files changed, 4 insertions(+)

diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c
index b2153022c04..af2e982b052 100644
--- a/hw/arm/fsl-imx6.c
+++ b/hw/arm/fsl-imx6.c
@@ -154,6 +154,9 @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp)
qdev_get_gpio_in(DEVICE(&s->cpu[i]), ARM_CPU_FIQ));
 }
 
+/* L2 cache controller */
+sysbus_create_simple("l2x0", FSL_IMX6_PL310_ADDR, NULL);
+
 if (!sysbus_realize(SYS_BUS_DEVICE(&s->ccm), errp)) {
 return;
 }
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 660f49db498..b853577e725 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -537,6 +537,7 @@ config FSL_IMX6
 select IMX_I2C
 select IMX_USBPHY
 select WDT_IMX2
+select PL310  # cache controller
 select SDHCI
 
 config ASPEED_SOC
-- 
2.34.1

[PULL 21/41] target/arm: Set SPSR_EL1.M correctly when nested virt is enabled

2024-01-11 Thread Peter Maydell

FEAT_NV requires that when HCR_EL2.{NV,NV1} == {1,0} and an exception
is taken from EL1 to EL1 then the reported EL in SPSR_EL1.M should be
EL2, not EL1.  Implement this behaviour.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/helper.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index 7c7f92c16de..e48b03ba1d0 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -11249,6 +11249,12 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
 old_mode = pstate_read(env);
 aarch64_save_sp(env, arm_current_el(env));
 env->elr_el[new_el] = env->pc;
+
+if (cur_el == 1 && new_el == 1 &&
+((arm_hcr_el2_eff(env) & (HCR_NV | HCR_NV1)) == HCR_NV)) {
+/* I_ZJRNN: report EL2 in the SPSR by setting M[3:2] to 0b10 */
+old_mode = deposit32(old_mode, 2, 2, 2);
+}
 } else {
 old_mode = cpsr_read_for_spsr_elx(env);
 env->elr_el[new_el] = env->regs[15];
-- 
2.34.1

[PULL 14/41] target/arm: Allow use of upper 32 bits of TBFLAG_A64

2024-01-11 Thread Peter Maydell

The TBFLAG_A64 TB flag bits go in flags2, which for AArch64 guests
we know is 64 bits. However at the moment we use FIELD_EX32() and
FIELD_DP32() to read and write these bits, which only works for
bits 0 to 31. Since we're about to add a flag that uses bit 32,
switch to FIELD_EX64() and FIELD_DP64() so that this will work.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/cpu.h | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 8da6bfda228..6dd0f642581 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -3237,12 +3237,14 @@ FIELD(TBFLAG_A64, NAA, 30, 1)
 FIELD(TBFLAG_A64, ATA0, 31, 1)
 
 /*
- * Helpers for using the above.
+ * Helpers for using the above. Note that only the A64 accessors use
+ * FIELD_DP64() and FIELD_EX64(), because in the other cases the flags
+ * word either is or might be 32 bits only.
  */
 #define DP_TBFLAG_ANY(DST, WHICH, VAL) \
 (DST.flags = FIELD_DP32(DST.flags, TBFLAG_ANY, WHICH, VAL))
 #define DP_TBFLAG_A64(DST, WHICH, VAL) \
-(DST.flags2 = FIELD_DP32(DST.flags2, TBFLAG_A64, WHICH, VAL))
+(DST.flags2 = FIELD_DP64(DST.flags2, TBFLAG_A64, WHICH, VAL))
 #define DP_TBFLAG_A32(DST, WHICH, VAL) \
 (DST.flags2 = FIELD_DP32(DST.flags2, TBFLAG_A32, WHICH, VAL))
 #define DP_TBFLAG_M32(DST, WHICH, VAL) \
@@ -3251,7 +3253,7 @@ FIELD(TBFLAG_A64, ATA0, 31, 1)
 (DST.flags2 = FIELD_DP32(DST.flags2, TBFLAG_AM32, WHICH, VAL))
 
 #define EX_TBFLAG_ANY(IN, WHICH)   FIELD_EX32(IN.flags, TBFLAG_ANY, WHICH)
-#define EX_TBFLAG_A64(IN, WHICH)   FIELD_EX32(IN.flags2, TBFLAG_A64, WHICH)
+#define EX_TBFLAG_A64(IN, WHICH)   FIELD_EX64(IN.flags2, TBFLAG_A64, WHICH)
 #define EX_TBFLAG_A32(IN, WHICH)   FIELD_EX32(IN.flags2, TBFLAG_A32, WHICH)
 #define EX_TBFLAG_M32(IN, WHICH)   FIELD_EX32(IN.flags2, TBFLAG_M32, WHICH)
 #define EX_TBFLAG_AM32(IN, WHICH)  FIELD_EX32(IN.flags2, TBFLAG_AM32, WHICH)
-- 
2.34.1

[PULL 23/41] target/arm: Always use arm_pan_enabled() when checking if PAN is enabled

2024-01-11 Thread Peter Maydell

Currently the code in target/arm/helper.c mostly checks the PAN bits
in env->pstate or env->uncached_cpsr directly when it wants to know
if PAN is enabled, because in most callsites we know whether we are
in AArch64 or AArch32. We do have an arm_pan_enabled() function, but
we only use it in a few places where the code might run in either an
AArch32 or AArch64 context.

For FEAT_NV, when HCR_EL2.{NV,NV1} is {1,1} PAN is always disabled
even when the PSTATE.PAN bit is set, the "is PAN enabled" test
becomes more complicated. Make all places that check for PAN use
arm_pan_enabled(), so we have a place to put the FEAT_NV test.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/helper.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index c8296a9c191..1db2effb1c0 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -263,6 +263,15 @@ void init_cpreg_list(ARMCPU *cpu)
 g_list_free(keys);
 }
 
+static bool arm_pan_enabled(CPUARMState *env)
+{
+if (is_a64(env)) {
+return env->pstate & PSTATE_PAN;
+} else {
+return env->uncached_cpsr & CPSR_PAN;
+}
+}
+
 /*
  * Some registers are not accessible from AArch32 EL3 if SCR.NS == 0.
  */
@@ -3614,7 +3623,7 @@ static void ats_write(CPUARMState *env, const 
ARMCPRegInfo *ri, uint64_t value)
 g_assert(ss != ARMSS_Secure);  /* ARMv8.4-SecEL2 is 64-bit only */
 /* fall through */
 case 1:
-if (ri->crm == 9 && (env->uncached_cpsr & CPSR_PAN)) {
+if (ri->crm == 9 && arm_pan_enabled(env)) {
 mmu_idx = ARMMMUIdx_Stage1_E1_PAN;
 } else {
 mmu_idx = ARMMMUIdx_Stage1_E1;
@@ -3730,7 +3739,7 @@ static void ats_write64(CPUARMState *env, const 
ARMCPRegInfo *ri,
 case 0:
 switch (ri->opc1) {
 case 0: /* AT S1E1R, AT S1E1W, AT S1E1RP, AT S1E1WP */
-if (ri->crm == 9 && (env->pstate & PSTATE_PAN)) {
+if (ri->crm == 9 && arm_pan_enabled(env)) {
 mmu_idx = regime_e20 ?
   ARMMMUIdx_E20_2_PAN : ARMMMUIdx_Stage1_E1_PAN;
 } else {
@@ -12145,15 +12154,6 @@ ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState 
*env, bool secstate)
 }
 #endif
 
-static bool arm_pan_enabled(CPUARMState *env)
-{
-if (is_a64(env)) {
-return env->pstate & PSTATE_PAN;
-} else {
-return env->uncached_cpsr & CPSR_PAN;
-}
-}
-
 ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el)
 {
 ARMMMUIdx idx;
-- 
2.34.1

[PULL 13/41] target/arm: Always honour HCR_EL2.TSC when HCR_EL2.NV is set

2024-01-11 Thread Peter Maydell

The HCR_EL2.TSC trap for trapping EL1 execution of SMC instructions
has a behaviour change for FEAT_NV when EL3 is not implemented:

 * in older architecture versions TSC was required to have no
   effect (i.e. the SMC insn UNDEFs)
 * with FEAT_NV, when HCR_EL2.NV == 1 the trap must apply
   (i.e. SMC traps to EL2, as it already does in all cases when
   EL3 is implemented)
 * in newer architecture versions, the behaviour either without
   FEAT_NV or with FEAT_NV and HCR_EL2.NV == 0 is relaxed to
   an IMPDEF choice between UNDEF and trap-to-EL2 (i.e. it is
   permitted to always honour HCR_EL2.TSC) for AArch64 only

Add the condition to honour the trap bit when HCR_EL2.NV == 1.  We
leave the HCR_EL2.NV == 0 case with the existing (UNDEF) behaviour,
as our IMPDEF choice (both because it avoids a behaviour change
for older CPU models and because we'd have to distinguish AArch32
from AArch64 if we opted to trap to EL2).

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/tcg/op_helper.c | 16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/target/arm/tcg/op_helper.c b/target/arm/tcg/op_helper.c
index 105ab63ed75..b5ac26061c7 100644
--- a/target/arm/tcg/op_helper.c
+++ b/target/arm/tcg/op_helper.c
@@ -985,7 +985,14 @@ void HELPER(pre_smc)(CPUARMState *env, uint32_t syndrome)
  *
  *  Conduit SMC, valid call  Trap to EL2 PSCI Call
  *  Conduit SMC, inval call  Trap to EL2 Undef insn
- *  Conduit not SMC  Undef insn  Undef insn
+ *  Conduit not SMC  Undef or trap[1]Undef insn
+ *
+ * [1] In this case:
+ *  - if HCR_EL2.NV == 1 we must trap to EL2
+ *  - if HCR_EL2.NV == 0 then newer architecture revisions permit
+ *AArch64 (but not AArch32) to trap to EL2 as an IMPDEF choice
+ *  - otherwise we must UNDEF
+ * We take the IMPDEF choice to always UNDEF if HCR_EL2.NV == 0.
  */
 
 /* On ARMv8 with EL3 AArch64, SMD applies to both S and NS state.
@@ -999,9 +1006,12 @@ void HELPER(pre_smc)(CPUARMState *env, uint32_t syndrome)
  : smd_flag && !secure;
 
 if (!arm_feature(env, ARM_FEATURE_EL3) &&
+!(arm_hcr_el2_eff(env) & HCR_NV) &&
 cpu->psci_conduit != QEMU_PSCI_CONDUIT_SMC) {
-/* If we have no EL3 then SMC always UNDEFs and can't be
- * trapped to EL2. PSCI-via-SMC is a sort of ersatz EL3
+/*
+ * If we have no EL3 then traditionally SMC always UNDEFs and can't be
+ * trapped to EL2. For nested virtualization, SMC can be trapped to
+ * the outer hypervisor. PSCI-via-SMC is a sort of ersatz EL3
  * firmware within QEMU, and we want an EL2 guest to be able
  * to forbid its EL1 from making PSCI calls into QEMU's
  * "firmware" via HCR.TSC, so for these purposes treat
-- 
2.34.1

[PULL 09/41] hw/intc/arm_gicv3_cpuif: handle LPIs in in the list registers

2024-01-11 Thread Peter Maydell

The hypervisor can deliver (virtual) LPIs to a guest by setting up a
list register to have an intid which is an LPI.  The GIC has to treat
these a little differently to standard interrupt IDs, because LPIs
have no Active state, and so the guest will only EOI them, it will
not also deactivate them.  So icv_eoir_write() must do two things:

 * if the LPI ID is not in any list register, we drop the
   priority but do not increment the EOI count
 * if the LPI ID is in a list register, we immediately deactivate
   it, regardless of the split-drop-and-deactivate control

This can be seen in the VirtualWriteEOIR0() and VirtualWriteEOIR1()
pseudocode in the GICv3 architecture specification.

Without this fix, potentially a hypervisor guest might stall because
LPIs get stuck in a bogus Active+Pending state.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 hw/intc/arm_gicv3_cpuif.c | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
index 77c2a6dd3b6..6ac90536402 100644
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@@ -1434,16 +1434,25 @@ static void icv_eoir_write(CPUARMState *env, const 
ARMCPRegInfo *ri,
 idx = icv_find_active(cs, irq);
 
 if (idx < 0) {
-/* No valid list register corresponding to EOI ID */
-icv_increment_eoicount(cs);
+/*
+ * No valid list register corresponding to EOI ID; if this is a vLPI
+ * not in the list regs then do nothing; otherwise increment EOI count
+ */
+if (irq < GICV3_LPI_INTID_START) {
+icv_increment_eoicount(cs);
+}
 } else {
 uint64_t lr = cs->ich_lr_el2[idx];
 int thisgrp = (lr & ICH_LR_EL2_GROUP) ? GICV3_G1NS : GICV3_G0;
 int lr_gprio = ich_lr_prio(lr) & icv_gprio_mask(cs, grp);
 
 if (thisgrp == grp && lr_gprio == dropprio) {
-if (!icv_eoi_split(env, cs)) {
-/* Priority drop and deactivate not split: deactivate irq now 
*/
+if (!icv_eoi_split(env, cs) || irq >= GICV3_LPI_INTID_START) {
+/*
+ * Priority drop and deactivate not split: deactivate irq now.
+ * LPIs always get their active state cleared immediately
+ * because no separate deactivate is expected.
+ */
 icv_deactivate_irq(cs, idx);
 }
 }
-- 
2.34.1

[PULL 24/41] target/arm: Don't honour PSTATE.PAN when HCR_EL2.{NV, NV1} == {1, 1}

2024-01-11 Thread Peter Maydell

For FEAT_NV, when HCR_EL2.{NV,NV1} is {1,1} PAN is always disabled
even when the PSTATE.PAN bit is set. Implement this by having
arm_pan_enabled() return false in this situation.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/helper.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index 1db2effb1c0..24751e05b24 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -266,6 +266,9 @@ void init_cpreg_list(ARMCPU *cpu)
 static bool arm_pan_enabled(CPUARMState *env)
 {
 if (is_a64(env)) {
+if ((arm_hcr_el2_eff(env) & (HCR_NV | HCR_NV1)) == (HCR_NV | HCR_NV1)) 
{
+return false;
+}
 return env->pstate & PSTATE_PAN;
 } else {
 return env->uncached_cpsr & CPSR_PAN;
-- 
2.34.1

[PULL 30/41] target/arm: Handle FEAT_NV2 changes to when SPSR_EL1.M reports EL2

2024-01-11 Thread Peter Maydell

With FEAT_NV2, the condition for when SPSR_EL1.M should report that
an exception was taken from EL2 changes.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/helper.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index 53bd6c85990..b9b3aaf4db7 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -11328,10 +11328,18 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
 aarch64_save_sp(env, arm_current_el(env));
 env->elr_el[new_el] = env->pc;
 
-if (cur_el == 1 && new_el == 1 &&
-((arm_hcr_el2_eff(env) & (HCR_NV | HCR_NV1)) == HCR_NV)) {
-/* I_ZJRNN: report EL2 in the SPSR by setting M[3:2] to 0b10 */
-old_mode = deposit32(old_mode, 2, 2, 2);
+if (cur_el == 1 && new_el == 1) {
+uint64_t hcr = arm_hcr_el2_eff(env);
+if ((hcr & (HCR_NV | HCR_NV1 | HCR_NV2)) == HCR_NV ||
+(hcr & (HCR_NV | HCR_NV2)) == (HCR_NV | HCR_NV2)) {
+/*
+ * FEAT_NV, FEAT_NV2 may need to report EL2 in the SPSR
+ * by setting M[3:2] to 0b10.
+ * If NV2 is disabled, change SPSR when NV,NV1 == 1,0 (I_ZJRNN)
+ * If NV2 is enabled, change SPSR when NV is 1 (I_DBTLM)
+ */
+old_mode = deposit32(old_mode, 2, 2, 2);
+}
 }
 } else {
 old_mode = cpsr_read_for_spsr_elx(env);
-- 
2.34.1

[PULL 32/41] target/arm: Implement FEAT_NV2 redirection of sysregs to RAM

2024-01-11 Thread Peter Maydell

FEAT_NV2 requires that when HCR_EL2.{NV,NV2} == 0b11 then accesses by
EL1 to certain system registers are redirected to RAM.  The full list
of affected registers is in the table in rule R_CSRPQ in the Arm ARM.
The registers may be normally accessible at EL1 (like ACTLR_EL1), or
normally UNDEF at EL1 (like HCR_EL2).  Some registers redirect to RAM
only when HCR_EL2.NV1 is 0, and some only when HCR_EL2.NV1 is 1;
others trap in both cases.

Add the infrastructure for identifying which registers should be
redirected and turning them into memory accesses.

This code does not set the correct syndrome or arrange for the
exception to be taken to the correct target EL if the access via
VNCR_EL2 faults; we will do that in the next commit.

Subsequent commits will mark up the relevant regdefs to set their
nv2_redirect_offset, and if relevant one of the two flags which
indicates that the redirect happens only for a particular value of
HCR_EL2.NV1.

Signed-off-by: Peter Maydell 
Tested-by: Miguel Luis 
Reviewed-by: Richard Henderson 
---
 target/arm/cpregs.h| 12 
 target/arm/cpu.h   |  4 +++
 target/arm/tcg/translate.h |  6 
 target/arm/tcg/hflags.c|  6 
 target/arm/tcg/translate-a64.c | 56 ++
 5 files changed, 84 insertions(+)

diff --git a/target/arm/cpregs.h b/target/arm/cpregs.h
index cb795bed75b..b6fdd0f3eb4 100644
--- a/target/arm/cpregs.h
+++ b/target/arm/cpregs.h
@@ -826,6 +826,11 @@ typedef void CPResetFn(CPUARMState *env, const 
ARMCPRegInfo *opaque);
 
 #define CP_ANY 0xff
 
+/* Flags in the high bits of nv2_redirect_offset */
+#define NV2_REDIR_NV1 0x4000 /* Only redirect when HCR_EL2.NV1 == 1 */
+#define NV2_REDIR_NO_NV1 0x8000 /* Only redirect when HCR_EL2.NV1 == 0 */
+#define NV2_REDIR_FLAG_MASK 0xc000
+
 /* Definition of an ARM coprocessor register */
 struct ARMCPRegInfo {
 /* Name of register (useful mainly for debugging, need not be unique) */
@@ -867,6 +872,13 @@ struct ARMCPRegInfo {
  * value encodes both the trap register and bit within it.
  */
 FGTBit fgt;
+
+/*
+ * Offset from VNCR_EL2 when FEAT_NV2 redirects access to memory;
+ * may include an NV2_REDIR_* flag.
+ */
+uint32_t nv2_redirect_offset;
+
 /*
  * The opaque pointer passed to define_arm_cp_regs_with_opaque() when
  * this register was defined: can be used to hand data through to the
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index f521219ea95..9281d74aa9d 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -3241,6 +3241,10 @@ FIELD(TBFLAG_A64, ATA0, 31, 1)
 FIELD(TBFLAG_A64, NV, 32, 1)
 FIELD(TBFLAG_A64, NV1, 33, 1)
 FIELD(TBFLAG_A64, NV2, 34, 1)
+/* Set if FEAT_NV2 RAM accesses use the EL2&0 translation regime */
+FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1)
+/* Set if FEAT_NV2 RAM accesses are big-endian */
+FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1)
 
 /*
  * Helpers for using the above. Note that only the A64 accessors use
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index 9e13c4ef7b6..93be745cf33 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -150,6 +150,10 @@ typedef struct DisasContext {
 bool nv1;
 /* True if NV enabled and HCR_EL2.NV2 is set */
 bool nv2;
+/* True if NV2 enabled and NV2 RAM accesses use EL2&0 translation regime */
+bool nv2_mem_e20;
+/* True if NV2 enabled and NV2 RAM accesses are big-endian */
+bool nv2_mem_be;
 /*
  * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI.
  *  < 0, set by the current instruction.
@@ -165,6 +169,8 @@ typedef struct DisasContext {
 int c15_cpar;
 /* TCG op of the current insn_start.  */
 TCGOp *insn_start;
+/* Offset from VNCR_EL2 when FEAT_NV2 redirects this reg to memory */
+uint32_t nv2_redirect_offset;
 } DisasContext;
 
 typedef struct DisasCompare {
diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
index d2b352663e8..8e5d35d9227 100644
--- a/target/arm/tcg/hflags.c
+++ b/target/arm/tcg/hflags.c
@@ -307,6 +307,12 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, 
int el, int fp_el,
 }
 if (hcr & HCR_NV2) {
 DP_TBFLAG_A64(flags, NV2, 1);
+if (hcr & HCR_E2H) {
+DP_TBFLAG_A64(flags, NV2_MEM_E20, 1);
+}
+if (env->cp15.sctlr_el[2] & SCTLR_EE) {
+DP_TBFLAG_A64(flags, NV2_MEM_BE, 1);
+}
 }
 }
 
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 2ada5b7e3f6..2938397d52c 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -2135,6 +2135,7 @@ static void handle_sys(DisasContext *s, bool isread,
 bool nv_trap_to_el2 = false;
 bool nv_redirect_reg = false;
 bool skip_fp_access_checks = false;
+bool nv2_mem_redirect = false;
 TCGv_ptr tcg_ri = NULL;
 TCGv_i64 tcg_rt;
 uint32_t syndrome = syn_aa64_sysregtr

[PULL 37/41] target/arm: Mark up VNCR offsets (offsets >= 0x200, except GIC)

2024-01-11 Thread Peter Maydell

Mark up the cpreginfo structs to indicate offsets for system
registers from VNCR_EL2, as defined in table D8-66 in rule R_CSRPQ in
the Arm ARM.  This covers all the remaining offsets at 0x200 and
above, except for the GIC ICH_* registers.

(Note that because we don't implement FEAT_SPE, FEAT_TRF,
FEAT_MPAM, FEAT_BRBE or FEAT_AMUv1p1 we don't implement any
of the registers that use offsets at 0x800 and above.)

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/helper.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index aa66f5169ab..4550ff7ffde 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -4287,6 +4287,7 @@ static const ARMCPRegInfo vmsa_pmsa_cp_reginfo[] = {
   .opc0 = 3, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 0,
   .access = PL1_RW, .accessfn = access_tvm_trvm,
   .fgt = FGT_FAR_EL1,
+  .nv2_redirect_offset = 0x220 | NV2_REDIR_NV1,
   .fieldoffset = offsetof(CPUARMState, cp15.far_el[1]),
   .resetvalue = 0, },
 };
@@ -4302,6 +4303,7 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = {
   .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 0, .opc2 = 0,
   .access = PL1_RW, .accessfn = access_tvm_trvm,
   .fgt = FGT_TTBR0_EL1,
+  .nv2_redirect_offset = 0x200 | NV2_REDIR_NV1,
   .writefn = vmsa_ttbr_write, .resetvalue = 0, .raw_writefn = raw_write,
   .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr0_s),
  offsetof(CPUARMState, cp15.ttbr0_ns) } },
@@ -4309,6 +4311,7 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = {
   .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 0, .opc2 = 1,
   .access = PL1_RW, .accessfn = access_tvm_trvm,
   .fgt = FGT_TTBR1_EL1,
+  .nv2_redirect_offset = 0x210 | NV2_REDIR_NV1,
   .writefn = vmsa_ttbr_write, .resetvalue = 0, .raw_writefn = raw_write,
   .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr1_s),
  offsetof(CPUARMState, cp15.ttbr1_ns) } },
@@ -5741,6 +5744,7 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
   .type = ARM_CP_ALIAS,
   .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 1,
   .access = PL1_RW, .accessfn = access_nv1,
+  .nv2_redirect_offset = 0x230 | NV2_REDIR_NV1,
   .fieldoffset = offsetof(CPUARMState, elr_el[1]) },
 { .name = "SPSR_EL1", .state = ARM_CP_STATE_AA64,
   .type = ARM_CP_ALIAS,
@@ -5760,6 +5764,7 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
   .fieldoffset = offsetof(CPUARMState, sp_el[0]) },
 { .name = "SP_EL1", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 1, .opc2 = 0,
+  .nv2_redirect_offset = 0x240,
   .access = PL2_RW, .type = ARM_CP_ALIAS | ARM_CP_EL3_NO_EL2_KEEP,
   .fieldoffset = offsetof(CPUARMState, sp_el[1]) },
 { .name = "SPSel", .state = ARM_CP_STATE_AA64,
@@ -6882,9 +6887,11 @@ static const ARMCPRegInfo minimal_ras_reginfo[] = {
   .type = ARM_CP_CONST, .resetvalue = 0 },
 { .name = "VDISR_EL2", .state = ARM_CP_STATE_BOTH,
   .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 1, .opc2 = 1,
+  .nv2_redirect_offset = 0x500,
   .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.vdisr_el2) },
 { .name = "VSESR_EL2", .state = ARM_CP_STATE_BOTH,
   .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 2, .opc2 = 3,
+  .nv2_redirect_offset = 0x508,
   .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.vsesr_el2) },
 };
 
@@ -9548,6 +9555,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
   .access = PL1_RW, .writefn = vbar_write,
   .accessfn = access_nv1,
   .fgt = FGT_VBAR_EL1,
+  .nv2_redirect_offset = 0x250 | NV2_REDIR_NV1,
   .bank_fieldoffsets = { offsetof(CPUARMState, cp15.vbar_s),
  offsetof(CPUARMState, cp15.vbar_ns) },
   .resetvalue = 0 },
-- 
2.34.1

[PULL 06/41] hw/arm/socs: configure priority bits for existing SOCs

2024-01-11 Thread Peter Maydell

From: Samuel Tardieu 

Update the number of priority bits for a number of existing
SoCs according to their technical documentation:

- STM32F100/F205/F405/L4x5: 4 bits
- Stellaris (Sandstorm/Fury): 3 bits

Signed-off-by: Samuel Tardieu 
Reviewed-by: Peter Maydell 
Message-id: 20240106181503.1746200-4-...@rfc1149.net
Signed-off-by: Peter Maydell 
---
 hw/arm/stellaris.c | 2 ++
 hw/arm/stm32f100_soc.c | 1 +
 hw/arm/stm32f205_soc.c | 1 +
 hw/arm/stm32f405_soc.c | 1 +
 hw/arm/stm32l4x5_soc.c | 1 +
 5 files changed, 6 insertions(+)

diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
index 729a8bf5695..d18b1144af5 100644
--- a/hw/arm/stellaris.c
+++ b/hw/arm/stellaris.c
@@ -47,6 +47,7 @@
 #define BP_GAMEPAD   0x04
 
 #define NUM_IRQ_LINES 64
+#define NUM_PRIO_BITS 3
 
 typedef const struct {
 const char *name;
@@ -1067,6 +1068,7 @@ static void stellaris_init(MachineState *ms, 
stellaris_board_info *board)
 
 nvic = qdev_new(TYPE_ARMV7M);
 qdev_prop_set_uint32(nvic, "num-irq", NUM_IRQ_LINES);
+qdev_prop_set_uint8(nvic, "num-prio-bits", NUM_PRIO_BITS);
 qdev_prop_set_string(nvic, "cpu-type", ms->cpu_type);
 qdev_prop_set_bit(nvic, "enable-bitband", true);
 qdev_connect_clock_in(nvic, "cpuclk",
diff --git a/hw/arm/stm32f100_soc.c b/hw/arm/stm32f100_soc.c
index b90d440d7aa..808b783515d 100644
--- a/hw/arm/stm32f100_soc.c
+++ b/hw/arm/stm32f100_soc.c
@@ -115,6 +115,7 @@ static void stm32f100_soc_realize(DeviceState *dev_soc, 
Error **errp)
 /* Init ARMv7m */
 armv7m = DEVICE(&s->armv7m);
 qdev_prop_set_uint32(armv7m, "num-irq", 61);
+qdev_prop_set_uint8(armv7m, "num-prio-bits", 4);
 qdev_prop_set_string(armv7m, "cpu-type", ARM_CPU_TYPE_NAME("cortex-m3"));
 qdev_prop_set_bit(armv7m, "enable-bitband", true);
 qdev_connect_clock_in(armv7m, "cpuclk", s->sysclk);
diff --git a/hw/arm/stm32f205_soc.c b/hw/arm/stm32f205_soc.c
index 1a548646f6e..a451e21f59c 100644
--- a/hw/arm/stm32f205_soc.c
+++ b/hw/arm/stm32f205_soc.c
@@ -127,6 +127,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, 
Error **errp)
 
 armv7m = DEVICE(&s->armv7m);
 qdev_prop_set_uint32(armv7m, "num-irq", 96);
+qdev_prop_set_uint8(armv7m, "num-prio-bits", 4);
 qdev_prop_set_string(armv7m, "cpu-type", ARM_CPU_TYPE_NAME("cortex-m3"));
 qdev_prop_set_bit(armv7m, "enable-bitband", true);
 qdev_connect_clock_in(armv7m, "cpuclk", s->sysclk);
diff --git a/hw/arm/stm32f405_soc.c b/hw/arm/stm32f405_soc.c
index a65bbe298d2..2ad5b79a069 100644
--- a/hw/arm/stm32f405_soc.c
+++ b/hw/arm/stm32f405_soc.c
@@ -149,6 +149,7 @@ static void stm32f405_soc_realize(DeviceState *dev_soc, 
Error **errp)
 
 armv7m = DEVICE(&s->armv7m);
 qdev_prop_set_uint32(armv7m, "num-irq", 96);
+qdev_prop_set_uint8(armv7m, "num-prio-bits", 4);
 qdev_prop_set_string(armv7m, "cpu-type", ARM_CPU_TYPE_NAME("cortex-m4"));
 qdev_prop_set_bit(armv7m, "enable-bitband", true);
 qdev_connect_clock_in(armv7m, "cpuclk", s->sysclk);
diff --git a/hw/arm/stm32l4x5_soc.c b/hw/arm/stm32l4x5_soc.c
index 70609a6dac4..159d5315c99 100644
--- a/hw/arm/stm32l4x5_soc.c
+++ b/hw/arm/stm32l4x5_soc.c
@@ -102,6 +102,7 @@ static void stm32l4x5_soc_realize(DeviceState *dev_soc, 
Error **errp)
 object_initialize_child(OBJECT(dev_soc), "armv7m", &s->armv7m, 
TYPE_ARMV7M);
 armv7m = DEVICE(&s->armv7m);
 qdev_prop_set_uint32(armv7m, "num-irq", 96);
+qdev_prop_set_uint32(armv7m, "num-prio-bits", 4);
 qdev_prop_set_string(armv7m, "cpu-type", ARM_CPU_TYPE_NAME("cortex-m4"));
 qdev_prop_set_bit(armv7m, "enable-bitband", true);
 qdev_connect_clock_in(armv7m, "cpuclk", s->sysclk);
-- 
2.34.1

[PULL 20/41] target/arm: Make NV reads of CurrentEL return EL2

2024-01-11 Thread Peter Maydell

FEAT_NV requires that when HCR_EL2.NV is set reads of the CurrentEL
register from EL1 always report EL2 rather than the real EL.
Implement this.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/tcg/translate-a64.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index f5377dbaf2d..ed1cc019a4c 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -2241,12 +2241,17 @@ static void handle_sys(DisasContext *s, bool isread,
 }
 return;
 case ARM_CP_CURRENTEL:
-/* Reads as current EL value from pstate, which is
+{
+/*
+ * Reads as current EL value from pstate, which is
  * guaranteed to be constant by the tb flags.
+ * For nested virt we should report EL2.
  */
+int el = s->nv ? 2 : s->current_el;
 tcg_rt = cpu_reg(s, rt);
-tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
+tcg_gen_movi_i64(tcg_rt, el << 2);
 return;
+}
 case ARM_CP_DC_ZVA:
 /* Writes clear the aligned block of memory which rt points into. */
 if (s->mte_active[0]) {
-- 
2.34.1

[PULL 11/41] target/arm: Implement HCR_EL2.AT handling

2024-01-11 Thread Peter Maydell

The FEAT_NV HCR_EL2.AT bit enables trapping of some address
translation instructions from EL1 to EL2.  Implement this behaviour.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/helper.c | 21 +++--
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index 4e5fd25199c..dc4b4123e00 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -3703,6 +3703,15 @@ static CPAccessResult at_s1e2_access(CPUARMState *env, 
const ARMCPRegInfo *ri,
 return at_e012_access(env, ri, isread);
 }
 
+static CPAccessResult at_s1e01_access(CPUARMState *env, const ARMCPRegInfo *ri,
+  bool isread)
+{
+if (arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_AT)) {
+return CP_ACCESS_TRAP_EL2;
+}
+return at_e012_access(env, ri, isread);
+}
+
 static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri,
 uint64_t value)
 {
@@ -5568,22 +5577,22 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
   .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 0,
   .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
   .fgt = FGT_ATS1E1R,
-  .accessfn = at_e012_access, .writefn = ats_write64 },
+  .accessfn = at_s1e01_access, .writefn = ats_write64 },
 { .name = "AT_S1E1W", .state = ARM_CP_STATE_AA64,
   .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 1,
   .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
   .fgt = FGT_ATS1E1W,
-  .accessfn = at_e012_access, .writefn = ats_write64 },
+  .accessfn = at_s1e01_access, .writefn = ats_write64 },
 { .name = "AT_S1E0R", .state = ARM_CP_STATE_AA64,
   .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 2,
   .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
   .fgt = FGT_ATS1E0R,
-  .accessfn = at_e012_access, .writefn = ats_write64 },
+  .accessfn = at_s1e01_access, .writefn = ats_write64 },
 { .name = "AT_S1E0W", .state = ARM_CP_STATE_AA64,
   .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 3,
   .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
   .fgt = FGT_ATS1E0W,
-  .accessfn = at_e012_access, .writefn = ats_write64 },
+  .accessfn = at_s1e01_access, .writefn = ats_write64 },
 { .name = "AT_S12E1R", .state = ARM_CP_STATE_AA64,
   .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 4,
   .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
@@ -8168,12 +8177,12 @@ static const ARMCPRegInfo ats1e1_reginfo[] = {
   .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 0,
   .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
   .fgt = FGT_ATS1E1RP,
-  .accessfn = at_e012_access, .writefn = ats_write64 },
+  .accessfn = at_s1e01_access, .writefn = ats_write64 },
 { .name = "AT_S1E1WP", .state = ARM_CP_STATE_AA64,
   .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 1,
   .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
   .fgt = FGT_ATS1E1WP,
-  .accessfn = at_e012_access, .writefn = ats_write64 },
+  .accessfn = at_s1e01_access, .writefn = ats_write64 },
 };
 
 static const ARMCPRegInfo ats1cp_reginfo[] = {
-- 
2.34.1

[PULL 16/41] target/arm: *_EL12 registers should UNDEF when HCR_EL2.E2H is 0

2024-01-11 Thread Peter Maydell

The alias registers like SCTLR_EL12 only exist when HCR_EL2.E2H
is 1; they should UNDEF otherwise. We weren't implementing this.
Add an intercept of the accessfn for these aliases, and implement
the UNDEF check.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/cpregs.h |  3 ++-
 target/arm/helper.c | 16 
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/target/arm/cpregs.h b/target/arm/cpregs.h
index f1293d16c07..e748d184cb6 100644
--- a/target/arm/cpregs.h
+++ b/target/arm/cpregs.h
@@ -937,7 +937,7 @@ struct ARMCPRegInfo {
 CPResetFn *resetfn;
 
 /*
- * "Original" writefn and readfn.
+ * "Original" readfn, writefn, accessfn.
  * For ARMv8.1-VHE register aliases, we overwrite the read/write
  * accessor functions of various EL1/EL0 to perform the runtime
  * check for which sysreg should actually be modified, and then
@@ -948,6 +948,7 @@ struct ARMCPRegInfo {
  */
 CPReadFn *orig_readfn;
 CPWriteFn *orig_writefn;
+CPAccessFn *orig_accessfn;
 };
 
 /*
diff --git a/target/arm/helper.c b/target/arm/helper.c
index dc2471eda7e..797b7518f61 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -6535,6 +6535,20 @@ static void el2_e2h_e12_write(CPUARMState *env, const 
ARMCPRegInfo *ri,
 return ri->orig_writefn(env, ri->opaque, value);
 }
 
+static CPAccessResult el2_e2h_e12_access(CPUARMState *env,
+ const ARMCPRegInfo *ri,
+ bool isread)
+{
+/* FOO_EL12 aliases only exist when E2H is 1; otherwise they UNDEF */
+if (!(arm_hcr_el2_eff(env) & HCR_E2H)) {
+return CP_ACCESS_TRAP_UNCATEGORIZED;
+}
+if (ri->orig_accessfn) {
+return ri->orig_accessfn(env, ri->opaque, isread);
+}
+return CP_ACCESS_OK;
+}
+
 static void define_arm_vh_e2h_redirects_aliases(ARMCPU *cpu)
 {
 struct E2HAlias {
@@ -6648,6 +6662,7 @@ static void define_arm_vh_e2h_redirects_aliases(ARMCPU 
*cpu)
 new_reg->opaque = src_reg;
 new_reg->orig_readfn = src_reg->readfn ?: raw_read;
 new_reg->orig_writefn = src_reg->writefn ?: raw_write;
+new_reg->orig_accessfn = src_reg->accessfn;
 if (!new_reg->raw_readfn) {
 new_reg->raw_readfn = raw_read;
 }
@@ -6656,6 +6671,7 @@ static void define_arm_vh_e2h_redirects_aliases(ARMCPU 
*cpu)
 }
 new_reg->readfn = el2_e2h_e12_read;
 new_reg->writefn = el2_e2h_e12_write;
+new_reg->accessfn = el2_e2h_e12_access;
 
 ok = g_hash_table_insert(cpu->cp_regs,
  (gpointer)(uintptr_t)a->new_key, new_reg);
-- 
2.34.1

[PULL 19/41] target/arm: Trap sysreg accesses for FEAT_NV

2024-01-11 Thread Peter Maydell

For FEAT_NV, accesses to system registers and instructions from EL1
which would normally UNDEF there but which work in EL2 need to
instead be trapped to EL2. Detect this both for "we know this will
UNDEF at translate time" and "we found this UNDEFs at runtime", and
make the affected registers trap to EL2 instead.

The Arm ARM defines the set of registers that should trap in terms
of their names; for our implementation this would be both awkward
and inefficent as a test, so we instead trap based on the opc1
field of the sysreg. The regularity of the architectural choice
of encodings for sysregs means that in practice this captures
exactly the correct set of registers.

Regardless of how we try to define the registers this trapping
applies to, there's going to be a certain possibility of breakage
if new architectural features introduce new registers that don't
follow the current rules (FEAT_MEC is one example already visible
in the released sysreg XML, though not yet in the Arm ARM). This
approach seems to me to be straightforward and likely to require
a minimum of manual overrides.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/cpregs.h| 34 +++
 target/arm/cpu.h   |  1 +
 target/arm/tcg/translate.h |  2 ++
 target/arm/tcg/hflags.c|  1 +
 target/arm/tcg/translate-a64.c | 49 +++---
 5 files changed, 77 insertions(+), 10 deletions(-)

diff --git a/target/arm/cpregs.h b/target/arm/cpregs.h
index e748d184cb6..3c5f1b48879 100644
--- a/target/arm/cpregs.h
+++ b/target/arm/cpregs.h
@@ -1080,4 +1080,38 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu);
 
 CPAccessResult access_tvm_trvm(CPUARMState *, const ARMCPRegInfo *, bool);
 
+/**
+ * arm_cpreg_trap_in_nv: Return true if cpreg traps in nested virtualization
+ *
+ * Return true if this cpreg is one which should be trapped to EL2 if
+ * it is executed at EL1 when nested virtualization is enabled via HCR_EL2.NV.
+ */
+static inline bool arm_cpreg_traps_in_nv(const ARMCPRegInfo *ri)
+{
+/*
+ * The Arm ARM defines the registers to be trapped in terms of
+ * their names (I_TZTZL). However the underlying principle is "if
+ * it would UNDEF at EL1 but work at EL2 then it should trap", and
+ * the way the encoding of sysregs and system instructions is done
+ * means that the right set of registers is exactly those where
+ * the opc1 field is 4 or 5. (You can see this also in the assert
+ * we do that the opc1 field and the permissions mask line up in
+ * define_one_arm_cp_reg_with_opaque().)
+ * Checking the opc1 field is easier for us and avoids the problem
+ * that we do not consistently use the right architectural names
+ * for all sysregs, since we treat the name field as largely for debug.
+ *
+ * However we do this check, it is going to be at least potentially
+ * fragile to future new sysregs, but this seems the least likely
+ * to break.
+ *
+ * In particular, note that the released sysreg XML defines that
+ * the FEAT_MEC sysregs and instructions do not follow this FEAT_NV
+ * trapping rule, so we will need to add an ARM_CP_* flag to indicate
+ * "register does not trap on NV" to handle those if/when we implement
+ * FEAT_MEC.
+ */
+return ri->opc1 == 4 || ri->opc1 == 5;
+}
+
 #endif /* TARGET_ARM_CPREGS_H */
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 6dd0f642581..d7a10fb4b61 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -3235,6 +3235,7 @@ FIELD(TBFLAG_A64, SME_TRAP_NONSTREAMING, 28, 1)
 FIELD(TBFLAG_A64, TRAP_ERET, 29, 1)
 FIELD(TBFLAG_A64, NAA, 30, 1)
 FIELD(TBFLAG_A64, ATA0, 31, 1)
+FIELD(TBFLAG_A64, NV, 32, 1)
 
 /*
  * Helpers for using the above. Note that only the A64 accessors use
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index 8c84377003c..63e075bce3a 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -144,6 +144,8 @@ typedef struct DisasContext {
 bool trap_eret;
 /* True if FEAT_LSE2 SCTLR_ELx.nAA is set */
 bool naa;
+/* True if FEAT_NV HCR_EL2.NV is enabled */
+bool nv;
 /*
  * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI.
  *  < 0, set by the current instruction.
diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
index 560fb7964ab..f33c0a12741 100644
--- a/target/arm/tcg/hflags.c
+++ b/target/arm/tcg/hflags.c
@@ -299,6 +299,7 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, 
int el, int fp_el,
  */
 if (el == 1 && (hcr & HCR_NV)) {
 DP_TBFLAG_A64(flags, TRAP_ERET, 1);
+DP_TBFLAG_A64(flags, NV, 1);
 }
 
 if (cpu_isar_feature(aa64_mte, env_archcpu(env))) {
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 5975fc47930..f5377dbaf2d 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/transla

[PULL 10/41] target/arm: Handle HCR_EL2 accesses for bits introduced with FEAT_NV

2024-01-11 Thread Peter Maydell

FEAT_NV defines three new bits in HCR_EL2: NV, NV1 and AT.  When the
feature is enabled, allow these bits to be written, and flush the
TLBs for the bits which affect page table interpretation.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/cpu-features.h | 5 +
 target/arm/helper.c   | 6 +-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
index 954d3582685..3a43c328d9e 100644
--- a/target/arm/cpu-features.h
+++ b/target/arm/cpu-features.h
@@ -839,6 +839,11 @@ static inline bool isar_feature_aa64_e0pd(const 
ARMISARegisters *id)
 return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, E0PD) != 0;
 }
 
+static inline bool isar_feature_aa64_nv(const ARMISARegisters *id)
+{
+return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, NV) != 0;
+}
+
 static inline bool isar_feature_aa64_pmuv3p1(const ARMISARegisters *id)
 {
 return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 &&
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 7889fd45d67..4e5fd25199c 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -5815,6 +5815,9 @@ static void do_hcr_write(CPUARMState *env, uint64_t 
value, uint64_t valid_mask)
 if (cpu_isar_feature(aa64_rme, cpu)) {
 valid_mask |= HCR_GPF;
 }
+if (cpu_isar_feature(aa64_nv, cpu)) {
+valid_mask |= HCR_NV | HCR_NV1 | HCR_AT;
+}
 }
 
 if (cpu_isar_feature(any_evt, cpu)) {
@@ -5833,9 +5836,10 @@ static void do_hcr_write(CPUARMState *env, uint64_t 
value, uint64_t valid_mask)
  * HCR_DC disables stage1 and enables stage2 translation
  * HCR_DCT enables tagging on (disabled) stage1 translation
  * HCR_FWB changes the interpretation of stage2 descriptor bits
+ * HCR_NV and HCR_NV1 affect interpretation of descriptor bits
  */
 if ((env->cp15.hcr_el2 ^ value) &
-(HCR_VM | HCR_PTW | HCR_DC | HCR_DCT | HCR_FWB)) {
+(HCR_VM | HCR_PTW | HCR_DC | HCR_DCT | HCR_FWB | HCR_NV | HCR_NV1)) {
 tlb_flush(CPU(cpu));
 }
 env->cp15.hcr_el2 = value;
-- 
2.34.1

[PULL 02/41] hw/arm: Add minimal support for the STM32L4x5 SoC

2024-01-11 Thread Peter Maydell

From: Inès Varhol 

This patch adds a new STM32L4x5 SoC, it is necessary to add support for
the B-L475E-IOT01A board.
The implementation is derived from the STM32F405 SoC.
The implementation contains no peripherals, only memory regions are
implemented.

Tested-by: Philippe Mathieu-Daudé 
Reviewed-by: Philippe Mathieu-Daudé 
Acked-by: Alistair Francis 
Signed-off-by: Arnaud Minier 
Signed-off-by: Inès Varhol 
Message-id: 20240108135849.351719-2-ines.var...@telecom-paris.fr
Signed-off-by: Peter Maydell 
---
 MAINTAINERS|   8 +
 include/hw/arm/stm32l4x5_soc.h |  57 +++
 hw/arm/stm32l4x5_soc.c | 265 +
 hw/arm/Kconfig |   5 +
 hw/arm/meson.build |   1 +
 5 files changed, 336 insertions(+)
 create mode 100644 include/hw/arm/stm32l4x5_soc.h
 create mode 100644 hw/arm/stm32l4x5_soc.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 00ec1f7ecaf..da29dcc16ec 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1122,6 +1122,14 @@ L: qemu-...@nongnu.org
 S: Maintained
 F: hw/arm/olimex-stm32-h405.c
 
+STM32L4x5 SoC Family
+M: Arnaud Minier 
+M: Inès Varhol 
+L: qemu-...@nongnu.org
+S: Maintained
+F: hw/arm/stm32l4x5_soc.c
+F: include/hw/arm/stm32l4x5_soc.h
+
 SmartFusion2
 M: Subbaraya Sundeep 
 M: Peter Maydell 
diff --git a/include/hw/arm/stm32l4x5_soc.h b/include/hw/arm/stm32l4x5_soc.h
new file mode 100644
index 000..2fd44a36a9d
--- /dev/null
+++ b/include/hw/arm/stm32l4x5_soc.h
@@ -0,0 +1,57 @@
+/*
+ * STM32L4x5 SoC family
+ *
+ * Copyright (c) 2023 Arnaud Minier 
+ * Copyright (c) 2023 Inès Varhol 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * This work is heavily inspired by the stm32f405_soc by Alistair Francis.
+ * Original code is licensed under the MIT License:
+ *
+ * Copyright (c) 2014 Alistair Francis 
+ */
+
+/*
+ * The reference used is the STMicroElectronics RM0351 Reference manual
+ * for STM32L4x5 and STM32L4x6 advanced Arm ® -based 32-bit MCUs.
+ * 
https://www.st.com/en/microcontrollers-microprocessors/stm32l4x5/documentation.html
+ */
+
+#ifndef HW_ARM_STM32L4x5_SOC_H
+#define HW_ARM_STM32L4x5_SOC_H
+
+#include "exec/memory.h"
+#include "hw/arm/armv7m.h"
+#include "qom/object.h"
+
+#define TYPE_STM32L4X5_SOC "stm32l4x5-soc"
+#define TYPE_STM32L4X5XC_SOC "stm32l4x5xc-soc"
+#define TYPE_STM32L4X5XE_SOC "stm32l4x5xe-soc"
+#define TYPE_STM32L4X5XG_SOC "stm32l4x5xg-soc"
+OBJECT_DECLARE_TYPE(Stm32l4x5SocState, Stm32l4x5SocClass, STM32L4X5_SOC)
+
+struct Stm32l4x5SocState {
+SysBusDevice parent_obj;
+
+ARMv7MState armv7m;
+
+MemoryRegion sram1;
+MemoryRegion sram2;
+MemoryRegion flash;
+MemoryRegion flash_alias;
+
+Clock *sysclk;
+Clock *refclk;
+};
+
+struct Stm32l4x5SocClass {
+SysBusDeviceClass parent_class;
+
+size_t flash_size;
+};
+
+#endif
diff --git a/hw/arm/stm32l4x5_soc.c b/hw/arm/stm32l4x5_soc.c
new file mode 100644
index 000..70609a6dac4
--- /dev/null
+++ b/hw/arm/stm32l4x5_soc.c
@@ -0,0 +1,265 @@
+/*
+ * STM32L4x5 SoC family
+ *
+ * Copyright (c) 2023 Arnaud Minier 
+ * Copyright (c) 2023 Inès Varhol 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * This work is heavily inspired by the stm32f405_soc by Alistair Francis.
+ * Original code is licensed under the MIT License:
+ *
+ * Copyright (c) 2014 Alistair Francis 
+ */
+
+/*
+ * The reference used is the STMicroElectronics RM0351 Reference manual
+ * for STM32L4x5 and STM32L4x6 advanced Arm ® -based 32-bit MCUs.
+ * 
https://www.st.com/en/microcontrollers-microprocessors/stm32l4x5/documentation.html
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qapi/error.h"
+#include "exec/address-spaces.h"
+#include "sysemu/sysemu.h"
+#include "hw/arm/stm32l4x5_soc.h"
+#include "hw/qdev-clock.h"
+#include "hw/misc/unimp.h"
+
+#define FLASH_BASE_ADDRESS 0x0800
+#define SRAM1_BASE_ADDRESS 0x2000
+#define SRAM1_SIZE (96 * KiB)
+#define SRAM2_BASE_ADDRESS 0x1000
+#define SRAM2_SIZE (32 * KiB)
+
+static void stm32l4x5_soc_initfn(Object *obj)
+{
+Stm32l4x5SocState *s = STM32L4X5_SOC(obj);
+
+s->sysclk = qdev_init_clock_in(DEVICE(s), "sysclk", NULL, NULL, 0);
+s->refclk = qdev_init_clock_in(DEVICE(s), "refclk", NULL, NULL, 0);
+}
+
+static void stm32l4x5_soc_realize(DeviceState *dev_soc, Error **errp)
+{
+ERRP_GUARD();
+Stm32l4x5SocState *s = STM32L4X5_SOC(dev_soc);
+const Stm32l4x5SocClass *sc = STM32L4X5_SOC_GET_CLASS(dev_soc);
+MemoryRegion *system_memory = get_system_memory();
+DeviceState *armv7m;
+
+/*
+ * We use s->refclk internally and only define it with qdev_init_clock_in()
+ * so it is correctly parented and not leaked on an init/deinit; it is not
+

[PULL 35/41] target/arm: Mark up VNCR offsets (offsets 0x100..0x160)

2024-01-11 Thread Peter Maydell

Mark up the cpreginfo structs to indicate offsets for system
registers from VNCR_EL2, as defined in table D8-66 in rule R_CSRPQ in
the Arm ARM.  This commit covers offsets 0x100 to 0x160.

Many (but not all) of the registers in this range have _EL12 aliases,
and the slot in memory is shared between the _EL12 version of the
register and the _EL1 version.  Where we programmatically generate
the regdef for the _EL12 register, arrange that its
nv2_redirect_offset is set up correctly to do this.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/debug_helper.c |  1 +
 target/arm/helper.c   | 22 ++
 2 files changed, 23 insertions(+)

diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c
index b39144d5b93..7d856acddf2 100644
--- a/target/arm/debug_helper.c
+++ b/target/arm/debug_helper.c
@@ -960,6 +960,7 @@ static const ARMCPRegInfo debug_cp_reginfo[] = {
   .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2,
   .access = PL1_RW, .accessfn = access_tda,
   .fgt = FGT_MDSCR_EL1,
+  .nv2_redirect_offset = 0x158,
   .fieldoffset = offsetof(CPUARMState, cp15.mdscr_el1),
   .resetvalue = 0 },
 /*
diff --git a/target/arm/helper.c b/target/arm/helper.c
index bc5a0810421..1d62d243cdc 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -647,6 +647,7 @@ static const ARMCPRegInfo cp_reginfo[] = {
   .opc0 = 3, .opc1 = 0, .crn = 13, .crm = 0, .opc2 = 1,
   .access = PL1_RW, .accessfn = access_tvm_trvm,
   .fgt = FGT_CONTEXTIDR_EL1,
+  .nv2_redirect_offset = 0x108 | NV2_REDIR_NV1,
   .secure = ARM_CP_SECSTATE_NS,
   .fieldoffset = offsetof(CPUARMState, cp15.contextidr_el[1]),
   .resetvalue = 0, .writefn = contextidr_write, .raw_writefn = raw_write, 
},
@@ -883,6 +884,7 @@ static const ARMCPRegInfo v6_cp_reginfo[] = {
 { .name = "CPACR", .state = ARM_CP_STATE_BOTH, .opc0 = 3,
   .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 2, .accessfn = cpacr_access,
   .fgt = FGT_CPACR_EL1,
+  .nv2_redirect_offset = 0x100 | NV2_REDIR_NV1,
   .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.cpacr_el1),
   .resetfn = cpacr_reset, .writefn = cpacr_write, .readfn = cpacr_read },
 };
@@ -2250,11 +2252,13 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
   .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 1, .opc2 = 0,
   .access = PL1_RW, .accessfn = access_tvm_trvm,
   .fgt = FGT_AFSR0_EL1,
+  .nv2_redirect_offset = 0x128 | NV2_REDIR_NV1,
   .type = ARM_CP_CONST, .resetvalue = 0 },
 { .name = "AFSR1_EL1", .state = ARM_CP_STATE_BOTH,
   .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 1, .opc2 = 1,
   .access = PL1_RW, .accessfn = access_tvm_trvm,
   .fgt = FGT_AFSR1_EL1,
+  .nv2_redirect_offset = 0x130 | NV2_REDIR_NV1,
   .type = ARM_CP_CONST, .resetvalue = 0 },
 /*
  * MAIR can just read-as-written because we don't implement caches
@@ -2264,6 +2268,7 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
   .opc0 = 3, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 0,
   .access = PL1_RW, .accessfn = access_tvm_trvm,
   .fgt = FGT_MAIR_EL1,
+  .nv2_redirect_offset = 0x140 | NV2_REDIR_NV1,
   .fieldoffset = offsetof(CPUARMState, cp15.mair_el[1]),
   .resetvalue = 0 },
 { .name = "MAIR_EL3", .state = ARM_CP_STATE_AA64,
@@ -4287,6 +4292,7 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = {
   .opc0 = 3, .crn = 5, .crm = 2, .opc1 = 0, .opc2 = 0,
   .access = PL1_RW, .accessfn = access_tvm_trvm,
   .fgt = FGT_ESR_EL1,
+  .nv2_redirect_offset = 0x138 | NV2_REDIR_NV1,
   .fieldoffset = offsetof(CPUARMState, cp15.esr_el[1]), .resetvalue = 0, },
 { .name = "TTBR0_EL1", .state = ARM_CP_STATE_BOTH,
   .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 0, .opc2 = 0,
@@ -4306,6 +4312,7 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = {
   .opc0 = 3, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2,
   .access = PL1_RW, .accessfn = access_tvm_trvm,
   .fgt = FGT_TCR_EL1,
+  .nv2_redirect_offset = 0x120 | NV2_REDIR_NV1,
   .writefn = vmsa_tcr_el12_write,
   .raw_writefn = raw_write,
   .resetvalue = 0,
@@ -4545,6 +4552,7 @@ static const ARMCPRegInfo lpae_cp_reginfo[] = {
   .opc0 = 3, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 0,
   .access = PL1_RW, .accessfn = access_tvm_trvm,
   .fgt = FGT_AMAIR_EL1,
+  .nv2_redirect_offset = 0x148 | NV2_REDIR_NV1,
   .type = ARM_CP_CONST, .resetvalue = 0 },
 /* AMAIR1 is mapped to AMAIR_EL1[63:32] */
 { .name = "AMAIR1", .cp = 15, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 1,
@@ -5734,6 +5742,7 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
   .type = ARM_CP_ALIAS,
   .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 0,
   .access = PL1_RW, .accessfn = access_nv1,
+  .nv2_redirect_offset = 0x160 | NV2_REDIR_NV1,
   .fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_SVC]) },
 /*
  * We rely on the

[PULL 05/41] hw/arm/armv7m: alias the NVIC "num-prio-bits" property

2024-01-11 Thread Peter Maydell

From: Samuel Tardieu 

A SoC will not have a direct access to the NVIC embedded in its ARM
core. By aliasing the "num-prio-bits" property similarly to what is
done for the "num-irq" one, a SoC can easily configure it on its
armv7m instance.

Signed-off-by: Samuel Tardieu 
Reviewed-by: Peter Maydell 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 20240106181503.1746200-3-...@rfc1149.net
Signed-off-by: Peter Maydell 
---
 include/hw/arm/armv7m.h | 1 +
 hw/arm/armv7m.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/include/hw/arm/armv7m.h b/include/hw/arm/armv7m.h
index e2cebbd15c0..5c057ab2ec9 100644
--- a/include/hw/arm/armv7m.h
+++ b/include/hw/arm/armv7m.h
@@ -43,6 +43,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(ARMv7MState, ARMV7M)
  *   a qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET).
  * + Property "cpu-type": CPU type to instantiate
  * + Property "num-irq": number of external IRQ lines
+ * + Property "num-prio-bits": number of priority bits in the NVIC
  * + Property "memory": MemoryRegion defining the physical address space
  *   that CPU accesses see. (The NVIC, bitbanding and other CPU-internal
  *   devices will be automatically layered on top of this view.)
diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c
index e39b61bc1af..1f218277734 100644
--- a/hw/arm/armv7m.c
+++ b/hw/arm/armv7m.c
@@ -256,6 +256,8 @@ static void armv7m_instance_init(Object *obj)
 object_initialize_child(obj, "nvic", &s->nvic, TYPE_NVIC);
 object_property_add_alias(obj, "num-irq",
   OBJECT(&s->nvic), "num-irq");
+object_property_add_alias(obj, "num-prio-bits",
+  OBJECT(&s->nvic), "num-prio-bits");
 
 object_initialize_child(obj, "systick-reg-ns", &s->systick[M_REG_NS],
 TYPE_SYSTICK);
-- 
2.34.1

[PULL 18/41] target/arm: Move FPU/SVE/SME access checks up above ARM_CP_SPECIAL_MASK check

2024-01-11 Thread Peter Maydell

In handle_sys() we don't do the check for whether the register is
marked as needing an FPU/SVE/SME access check until after we've
handled the special cases covered by ARM_CP_SPECIAL_MASK.  This is
conceptually the wrong way around, because if for example we happen
to implement an FPU-access-checked register as ARM_CP_NOP, we should
do the access check first.

Move the access checks up so they are with all the other access
checks, not sandwiched between the special-case read/write handling
and the normal-case read/write handling. This doesn't change
behaviour at the moment, because we happen not to define any
cpregs with both ARM_CPU_{FPU,SVE,SME} and one of the cases
dealt with by ARM_CP_SPECIAL_MASK.

Moving this code also means we have the correct place to put the
FEAT_NV/FEAT_NV2 access handling, which should come after the access
checks and before we try to do any read/write action.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/tcg/translate-a64.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 0f30e71f9bd..5975fc47930 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -2190,6 +2190,14 @@ static void handle_sys(DisasContext *s, bool isread,
 gen_a64_update_pc(s, 0);
 }
 
+if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
+return;
+} else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
+return;
+} else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
+return;
+}
+
 /* Handle special cases first */
 switch (ri->type & ARM_CP_SPECIAL_MASK) {
 case 0:
@@ -2268,13 +2276,6 @@ static void handle_sys(DisasContext *s, bool isread,
 default:
 g_assert_not_reached();
 }
-if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
-return;
-} else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
-return;
-} else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
-return;
-}
 
 if (ri->type & ARM_CP_IO) {
 /* I/O operations must end the TB here (whether read or write) */
-- 
2.34.1

[PULL 36/41] target/arm: Mark up VNCR offsets (offsets 0x168..0x1f8)

2024-01-11 Thread Peter Maydell

Mark up the cpreginfo structs to indicate offsets for system
registers from VNCR_EL2, as defined in table D8-66 in rule R_CSRPQ in
the Arm ARM.  This commit covers offsets 0x168 to 0x1f8.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/helper.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index 1d62d243cdc..aa66f5169ab 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -3191,6 +3191,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
   .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 1,
   .type = ARM_CP_IO, .access = PL0_RW,
   .accessfn = gt_ptimer_access,
+  .nv2_redirect_offset = 0x180 | NV2_REDIR_NV1,
   .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].ctl),
   .resetvalue = 0,
   .readfn = gt_phys_redir_ctl_read, .raw_readfn = raw_read,
@@ -3208,6 +3209,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
   .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 1,
   .type = ARM_CP_IO, .access = PL0_RW,
   .accessfn = gt_vtimer_access,
+  .nv2_redirect_offset = 0x170 | NV2_REDIR_NV1,
   .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].ctl),
   .resetvalue = 0,
   .readfn = gt_virt_redir_ctl_read, .raw_readfn = raw_read,
@@ -3287,6 +3289,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
   .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 2,
   .access = PL0_RW,
   .type = ARM_CP_IO,
+  .nv2_redirect_offset = 0x178 | NV2_REDIR_NV1,
   .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].cval),
   .resetvalue = 0, .accessfn = gt_ptimer_access,
   .readfn = gt_phys_redir_cval_read, .raw_readfn = raw_read,
@@ -3304,6 +3307,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
   .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 2,
   .access = PL0_RW,
   .type = ARM_CP_IO,
+  .nv2_redirect_offset = 0x168 | NV2_REDIR_NV1,
   .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].cval),
   .resetvalue = 0, .accessfn = gt_vtimer_access,
   .readfn = gt_virt_redir_cval_read, .raw_readfn = raw_read,
@@ -7052,6 +7056,7 @@ static void zcr_write(CPUARMState *env, const 
ARMCPRegInfo *ri,
 static const ARMCPRegInfo zcr_reginfo[] = {
 { .name = "ZCR_EL1", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 2, .opc2 = 0,
+  .nv2_redirect_offset = 0x1e0 | NV2_REDIR_NV1,
   .access = PL1_RW, .type = ARM_CP_SVE,
   .fieldoffset = offsetof(CPUARMState, vfp.zcr_el[1]),
   .writefn = zcr_write, .raw_writefn = raw_write },
@@ -7193,6 +7198,7 @@ static const ARMCPRegInfo sme_reginfo[] = {
   .writefn = svcr_write, .raw_writefn = raw_write },
 { .name = "SMCR_EL1", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 2, .opc2 = 6,
+  .nv2_redirect_offset = 0x1f0 | NV2_REDIR_NV1,
   .access = PL1_RW, .type = ARM_CP_SME,
   .fieldoffset = offsetof(CPUARMState, vfp.smcr_el[1]),
   .writefn = smcr_write, .raw_writefn = raw_write },
@@ -7226,6 +7232,7 @@ static const ARMCPRegInfo sme_reginfo[] = {
   .type = ARM_CP_CONST, .resetvalue = 0 },
 { .name = "SMPRIMAP_EL2", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 5,
+  .nv2_redirect_offset = 0x1f8,
   .access = PL2_RW, .accessfn = access_smprimap,
   .type = ARM_CP_CONST, .resetvalue = 0 },
 };
@@ -7948,6 +7955,7 @@ static const ARMCPRegInfo mte_reginfo[] = {
 { .name = "TFSR_EL1", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 6, .opc2 = 0,
   .access = PL1_RW, .accessfn = access_tfsr_el1,
+  .nv2_redirect_offset = 0x190 | NV2_REDIR_NV1,
   .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[1]) },
 { .name = "TFSR_EL2", .state = ARM_CP_STATE_AA64,
   .type = ARM_CP_NV2_REDIRECT,
@@ -8122,6 +8130,7 @@ static const ARMCPRegInfo scxtnum_reginfo[] = {
   .opc0 = 3, .opc1 = 0, .crn = 13, .crm = 0, .opc2 = 7,
   .access = PL1_RW, .accessfn = access_scxtnum_el1,
   .fgt = FGT_SCXTNUM_EL1,
+  .nv2_redirect_offset = 0x188 | NV2_REDIR_NV1,
   .fieldoffset = offsetof(CPUARMState, scxtnum_el[1]) },
 { .name = "SCXTNUM_EL2", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 4, .crn = 13, .crm = 0, .opc2 = 7,
@@ -8146,22 +8155,27 @@ static CPAccessResult access_fgt(CPUARMState *env, 
const ARMCPRegInfo *ri,
 static const ARMCPRegInfo fgt_reginfo[] = {
 { .name = "HFGRTR_EL2", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 4,
+  .nv2_redirect_offset = 0x1b8,
   .access = PL2_RW, .accessfn = access_fgt,
   .fieldoffset = offsetof(CPUARMState, cp15.fgt_read[FGTREG_HFGRTR]) },
 { .name = "HFGWTR_EL2", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 =

[PULL 31/41] target/arm: Handle FEAT_NV2 redirection of SPSR_EL2, ELR_EL2, ESR_EL2, FAR_EL2

2024-01-11 Thread Peter Maydell

Under FEAT_NV2, when HCR_EL2.{NV,NV2} == 0b11 at EL1, accesses to the
registers SPSR_EL2, ELR_EL2, ESR_EL2, FAR_EL2 and TFSR_EL2 (which
would UNDEF without FEAT_NV or FEAT_NV2) should instead access the
equivalent EL1 registers SPSR_EL1, ELR_EL1, ESR_EL1, FAR_EL1 and
TFSR_EL1.

Because there are only five registers involved and the encoding for
the EL1 register is identical to that of the EL2 register except
that opc1 is 0, we handle this by finding the EL1 register in the
hash table and using it instead.

Note that traps that apply to direct accesses to the EL1 register,
such as active fine-grained traps or other trap bits, do not trigger
when it is accessed via the EL2 encoding in this way.  However, some
traps that are defined by the EL2 register may apply.  We therefore
call the EL2 register's accessfn first.  The only one of the five
which has such traps is TFSR_EL2: make sure its accessfn correctly
handles both FEAT_NV (where we trap to EL2 without checking ATA bits)
and FEAT_NV2 (where we check ATA bits and then redirect to TFSR_EL1).

(We don't need the NV1 tbflag bit until the next patch, but we
introduce it here to avoid putting the NV, NV1, NV2 bits in an
odd order.)

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/cpregs.h|  5 +
 target/arm/cpu.h   |  2 ++
 target/arm/tcg/translate.h |  4 
 target/arm/helper.c| 13 +
 target/arm/tcg/hflags.c|  6 ++
 target/arm/tcg/translate-a64.c | 33 -
 6 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/target/arm/cpregs.h b/target/arm/cpregs.h
index 3c5f1b48879..cb795bed75b 100644
--- a/target/arm/cpregs.h
+++ b/target/arm/cpregs.h
@@ -118,6 +118,11 @@ enum {
  * ARM pseudocode function CheckSMEAccess().
  */
 ARM_CP_SME   = 1 << 19,
+/*
+ * Flag: one of the four EL2 registers which redirect to the
+ * equivalent EL1 register when FEAT_NV2 is enabled.
+ */
+ARM_CP_NV2_REDIRECT  = 1 << 20,
 };
 
 /*
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 0e48a1366bd..f521219ea95 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -3239,6 +3239,8 @@ FIELD(TBFLAG_A64, TRAP_ERET, 29, 1)
 FIELD(TBFLAG_A64, NAA, 30, 1)
 FIELD(TBFLAG_A64, ATA0, 31, 1)
 FIELD(TBFLAG_A64, NV, 32, 1)
+FIELD(TBFLAG_A64, NV1, 33, 1)
+FIELD(TBFLAG_A64, NV2, 34, 1)
 
 /*
  * Helpers for using the above. Note that only the A64 accessors use
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index 63e075bce3a..9e13c4ef7b6 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -146,6 +146,10 @@ typedef struct DisasContext {
 bool naa;
 /* True if FEAT_NV HCR_EL2.NV is enabled */
 bool nv;
+/* True if NV enabled and HCR_EL2.NV1 is set */
+bool nv1;
+/* True if NV enabled and HCR_EL2.NV2 is set */
+bool nv2;
 /*
  * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI.
  *  < 0, set by the current instruction.
diff --git a/target/arm/helper.c b/target/arm/helper.c
index b9b3aaf4db7..93991c07b78 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -6135,14 +6135,16 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
   .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 7,
   .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
 { .name = "ELR_EL2", .state = ARM_CP_STATE_AA64,
-  .type = ARM_CP_ALIAS,
+  .type = ARM_CP_ALIAS | ARM_CP_NV2_REDIRECT,
   .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 1,
   .access = PL2_RW,
   .fieldoffset = offsetof(CPUARMState, elr_el[2]) },
 { .name = "ESR_EL2", .state = ARM_CP_STATE_BOTH,
+  .type = ARM_CP_NV2_REDIRECT,
   .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 2, .opc2 = 0,
   .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.esr_el[2]) },
 { .name = "FAR_EL2", .state = ARM_CP_STATE_BOTH,
+  .type = ARM_CP_NV2_REDIRECT,
   .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 0,
   .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.far_el[2]) },
 { .name = "HIFAR", .state = ARM_CP_STATE_AA32,
@@ -6151,7 +6153,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
   .access = PL2_RW,
   .fieldoffset = offsetofhigh32(CPUARMState, cp15.far_el[2]) },
 { .name = "SPSR_EL2", .state = ARM_CP_STATE_AA64,
-  .type = ARM_CP_ALIAS,
+  .type = ARM_CP_ALIAS | ARM_CP_NV2_REDIRECT,
   .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 0,
   .access = PL2_RW,
   .fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_HYP]) },
@@ -7876,11 +7878,13 @@ static CPAccessResult access_tfsr_el2(CPUARMState *env, 
const ARMCPRegInfo *ri,
 /*
  * TFSR_EL2: similar to generic access_mte(), but we need to
  * account for FEAT_NV. At EL1 this must be a FEAT_NV access;
- * we will trap to EL2 and the HCR/SCR traps do not apply.
+ * if NV

[PULL 15/41] target/arm: Record correct opcode fields in cpreg for E2H aliases

2024-01-11 Thread Peter Maydell

For FEAT_VHE, we define a set of register aliases, so that for instance:
 * the SCTLR_EL1 either accesses the real SCTLR_EL1, or (if E2H is 1)
   SCTLR_EL2
 * a new SCTLR_EL12 register accesses SCTLR_EL1 if E2H is 1

However when we create the 'new_reg' cpreg struct for the SCTLR_EL12
register, we duplicate the information in the SCTLR_EL1 cpreg, which
means the opcode fields are those of SCTLR_EL1, not SCTLR_EL12.  This
is a problem for code which looks at the cpreg opcode fields to
determine behaviour (e.g.  in access_check_cp_reg()). In practice
the current checks we do there don't intersect with the *_EL12
registers, but for FEAT_NV this will become a problem.

Write the correct values from the encoding into the new_reg struct.
This restores the invariant that the cpreg that you get back
from the hashtable has opcode fields that match the key you used
to retrieve it.

When we call the readfn or writefn for the target register, we
pass it the cpreg struct for that target register, not the one
for the alias, in case the readfn/writefn want to look at the
opcode fields to determine behaviour. This means we need to
interpose custom read/writefns for the e12 aliases.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/helper.c | 35 +++
 1 file changed, 35 insertions(+)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index dc4b4123e00..dc2471eda7e 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -6522,6 +6522,19 @@ static void el2_e2h_write(CPUARMState *env, const 
ARMCPRegInfo *ri,
 writefn(env, ri, value);
 }
 
+static uint64_t el2_e2h_e12_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+/* Pass the EL1 register accessor its ri, not the EL12 alias ri */
+return ri->orig_readfn(env, ri->opaque);
+}
+
+static void el2_e2h_e12_write(CPUARMState *env, const ARMCPRegInfo *ri,
+  uint64_t value)
+{
+/* Pass the EL1 register accessor its ri, not the EL12 alias ri */
+return ri->orig_writefn(env, ri->opaque, value);
+}
+
 static void define_arm_vh_e2h_redirects_aliases(ARMCPU *cpu)
 {
 struct E2HAlias {
@@ -6621,6 +6634,28 @@ static void define_arm_vh_e2h_redirects_aliases(ARMCPU 
*cpu)
 new_reg->type |= ARM_CP_ALIAS;
 /* Remove PL1/PL0 access, leaving PL2/PL3 R/W in place.  */
 new_reg->access &= PL2_RW | PL3_RW;
+/* The new_reg op fields are as per new_key, not the target reg */
+new_reg->crn = (a->new_key & CP_REG_ARM64_SYSREG_CRN_MASK)
+>> CP_REG_ARM64_SYSREG_CRN_SHIFT;
+new_reg->crm = (a->new_key & CP_REG_ARM64_SYSREG_CRM_MASK)
+>> CP_REG_ARM64_SYSREG_CRM_SHIFT;
+new_reg->opc0 = (a->new_key & CP_REG_ARM64_SYSREG_OP0_MASK)
+>> CP_REG_ARM64_SYSREG_OP0_SHIFT;
+new_reg->opc1 = (a->new_key & CP_REG_ARM64_SYSREG_OP1_MASK)
+>> CP_REG_ARM64_SYSREG_OP1_SHIFT;
+new_reg->opc2 = (a->new_key & CP_REG_ARM64_SYSREG_OP2_MASK)
+>> CP_REG_ARM64_SYSREG_OP2_SHIFT;
+new_reg->opaque = src_reg;
+new_reg->orig_readfn = src_reg->readfn ?: raw_read;
+new_reg->orig_writefn = src_reg->writefn ?: raw_write;
+if (!new_reg->raw_readfn) {
+new_reg->raw_readfn = raw_read;
+}
+if (!new_reg->raw_writefn) {
+new_reg->raw_writefn = raw_write;
+}
+new_reg->readfn = el2_e2h_e12_read;
+new_reg->writefn = el2_e2h_e12_write;
 
 ok = g_hash_table_insert(cpu->cp_regs,
  (gpointer)(uintptr_t)a->new_key, new_reg);
-- 
2.34.1

[PULL 08/41] target/arm: Set CTR_EL0.{IDC,DIC} for the 'max' CPU

2024-01-11 Thread Peter Maydell

The CTR_EL0 register has some bits which allow the implementation to
tell the guest that it does not need to do cache maintenance for
data-to-instruction coherence and instruction-to-data coherence.
QEMU doesn't emulate caches and so our cache maintenance insns are
all NOPs.

We already have some models of specific CPUs where we set these bits
(e.g.  the Neoverse V1), but the 'max' CPU still uses the settings it
inherits from Cortex-A57.  Set the bits for 'max' as well, so the
guest doesn't need to do unnecessary work.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/tcg/cpu64.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
index fcda99e1583..40e7a45166f 100644
--- a/target/arm/tcg/cpu64.c
+++ b/target/arm/tcg/cpu64.c
@@ -1105,6 +1105,16 @@ void aarch64_max_tcg_initfn(Object *obj)
 u = FIELD_DP32(u, CLIDR_EL1, LOUU, 0);
 cpu->clidr = u;
 
+/*
+ * Set CTR_EL0.DIC and IDC to tell the guest it doesnt' need to
+ * do any cache maintenance for data-to-instruction or
+ * instruction-to-guest coherence. (Our cache ops are nops.)
+ */
+t = cpu->ctr;
+t = FIELD_DP64(t, CTR_EL0, IDC, 1);
+t = FIELD_DP64(t, CTR_EL0, DIC, 1);
+cpu->ctr = t;
+
 t = cpu->isar.id_aa64isar0;
 t = FIELD_DP64(t, ID_AA64ISAR0, AES, 2);  /* FEAT_PMULL */
 t = FIELD_DP64(t, ID_AA64ISAR0, SHA1, 1); /* FEAT_SHA1 */
-- 
2.34.1

[PULL 28/41] target/arm: Handle HCR_EL2 accesses for FEAT_NV2 bits

2024-01-11 Thread Peter Maydell

FEAT_NV2 defines another new bit in HCR_EL2: NV2. When the
feature is enabled, allow this bit to be written in HCR_EL2.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Tested-by: Miguel Luis 
---
 target/arm/cpu-features.h | 5 +
 target/arm/helper.c   | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
index 3a43c328d9e..7a590c824cf 100644
--- a/target/arm/cpu-features.h
+++ b/target/arm/cpu-features.h
@@ -844,6 +844,11 @@ static inline bool isar_feature_aa64_nv(const 
ARMISARegisters *id)
 return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, NV) != 0;
 }
 
+static inline bool isar_feature_aa64_nv2(const ARMISARegisters *id)
+{
+return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, NV) >= 2;
+}
+
 static inline bool isar_feature_aa64_pmuv3p1(const ARMISARegisters *id)
 {
 return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 &&
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 24751e05b24..e3e56539594 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -5857,6 +5857,9 @@ static void do_hcr_write(CPUARMState *env, uint64_t 
value, uint64_t valid_mask)
 if (cpu_isar_feature(aa64_nv, cpu)) {
 valid_mask |= HCR_NV | HCR_NV1 | HCR_AT;
 }
+if (cpu_isar_feature(aa64_nv2, cpu)) {
+valid_mask |= HCR_NV2;
+}
 }
 
 if (cpu_isar_feature(any_evt, cpu)) {
-- 
2.34.1

[PULL 00/14] loongarch-to-apply queue

2024-01-11 Thread Song Gao

The following changes since commit 34eac35f893664eb8545b98142e23d9954722766:

  Merge tag 'pull-riscv-to-apply-20240110' of 
https://github.com/alistair23/qemu into staging (2024-01-10 11:41:56 +)

are available in the Git repository at:

  https://gitlab.com/gaosong/qemu.git tags/pull-loongarch-20240111

for you to fetch changes up to 428a6ef4396aa910c86e16c1e4409e3927a3698e:

  hw/intc/loongarch_extioi: Add vmstate post_load support (2024-01-11 19:22:47 
+0800)


pull-loongarch-20240111


Bibo Mao (4):
  hw/intc/loongarch_ipi: Use MemTxAttrs interface for ipi ops
  hw/loongarch/virt: Set iocsr address space per-board rather than percpu
  hw/intc/loongarch_extioi: Add dynamic cpu number support
  hw/intc/loongarch_extioi: Add vmstate post_load support

Tianrui Zhao (10):
  linux-headers: Synchronize linux headers from linux v6.7.0-rc8
  target/loongarch: Define some kvm_arch interfaces
  target/loongarch: Supplement vcpu env initial when vcpu reset
  target/loongarch: Implement kvm get/set registers
  target/loongarch: Implement kvm_arch_init function
  target/loongarch: Implement kvm_arch_init_vcpu
  target/loongarch: Implement kvm_arch_handle_exit
  target/loongarch: Restrict TCG-specific code
  target/loongarch: Implement set vcpu intr for kvm
  target/loongarch: Add loongarch kvm into meson build

 hw/intc/loongarch_extioi.c| 230 ++
 hw/intc/loongarch_ipi.c   | 191 +
 hw/loongarch/virt.c   |  94 +++--
 include/hw/intc/loongarch_extioi.h|  12 +-
 include/hw/intc/loongarch_ipi.h   |   3 +-
 include/hw/loongarch/virt.h   |   3 +
 include/standard-headers/linux/fuse.h |  10 +-
 meson.build   |   3 +
 target/loongarch/cpu.c|  90 ++--
 target/loongarch/cpu.h|   9 +-
 target/loongarch/internals.h  |   5 +-
 target/loongarch/kvm/kvm.c| 768 ++
 target/loongarch/kvm/kvm_loongarch.h  |  16 +
 target/loongarch/kvm/meson.build  |   1 +
 target/loongarch/meson.build  |   1 +
 target/loongarch/tcg/iocsr_helper.c   |  16 +-
 target/loongarch/trace-events |  15 +
 target/loongarch/trace.h  |   1 +
 18 files changed, 1210 insertions(+), 258 deletions(-)
 create mode 100644 target/loongarch/kvm/kvm.c
 create mode 100644 target/loongarch/kvm/kvm_loongarch.h
 create mode 100644 target/loongarch/kvm/meson.build
 create mode 100644 target/loongarch/trace-events
 create mode 100644 target/loongarch/trace.h

Re: [NOTFORMERGE PATCH 2/2] gitlab: Add Loongarch64 KVM-only build

2024-01-11 Thread Philippe Mathieu-Daudé


On 11/1/24 10:51, gaosong wrote:

在 2024/1/11 下午5:04, Thomas Huth 写道:

On 11/01/2024 09.50, gaosong wrote:

在 2024/1/11 下午4:20, Thomas Huth 写道:

On 11/01/2024 08.37, gaosong wrote:



LoongArch no support these cmds  or some problems .
-    "gva2gpa 0",
-    "memsave 0 4096 \"/dev/null\"",
-    "x /8i 0x100",
-    "xp /16x 0",

Could we disable these 4 cmds or the test_temp check?
After we fix the cmds problems, we can enable them.


Even if loongarch does not support one of these commands, it should 
not crash QEMU. So please fix the crashes first before considering to 
enable the KVM-only test in the CI.




Sure,  we will fix the cmds problems first.


The issue might be missing get_phys_page_attrs_debug() implementation.

[PULL 04/14] target/loongarch: Implement kvm get/set registers

2024-01-11 Thread Song Gao

From: Tianrui Zhao 

Implement kvm_arch_get/set_registers interfaces, many regs
can be get/set in the function, such as core regs, csr regs,
fpu regs, mp state, etc.

Signed-off-by: Tianrui Zhao 
Signed-off-by: xianglai li 
Reviewed-by: Song Gao 
Change-Id: Ia8fc48fe08b1768853f7729e77d37cdf270031e4
Message-Id: <20240105075804.1228596-5-zhaotian...@loongson.cn>
Signed-off-by: Song Gao 
---
 meson.build   |   1 +
 target/loongarch/cpu.c|   3 +
 target/loongarch/cpu.h|   1 +
 target/loongarch/internals.h  |   5 +-
 target/loongarch/kvm/kvm.c| 580 +-
 target/loongarch/trace-events |  11 +
 target/loongarch/trace.h  |   1 +
 7 files changed, 599 insertions(+), 3 deletions(-)
 create mode 100644 target/loongarch/trace-events
 create mode 100644 target/loongarch/trace.h

diff --git a/meson.build b/meson.build
index 371edafae6..b0dd87b8f8 100644
--- a/meson.build
+++ b/meson.build
@@ -3329,6 +3329,7 @@ if have_system or have_user
 'target/hppa',
 'target/i386',
 'target/i386/kvm',
+'target/loongarch',
 'target/mips/tcg',
 'target/nios2',
 'target/ppc',
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index 9b768c9431..0c157bbd51 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -540,6 +540,9 @@ static void loongarch_cpu_reset_hold(Object *obj)
 #ifndef CONFIG_USER_ONLY
 env->pc = 0x1c00;
 memset(env->tlb, 0, sizeof(env->tlb));
+if (kvm_enabled()) {
+kvm_arch_reset_vcpu(env);
+}
 #endif
 
 restore_fp_status(env);
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 8022f44b44..a61c0405a2 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -360,6 +360,7 @@ typedef struct CPUArchState {
 MemoryRegion iocsr_mem;
 bool load_elf;
 uint64_t elf_address;
+uint32_t mp_state;
 /* Store ipistate to access from this struct */
 DeviceState *ipistate;
 #endif
diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h
index c492863cc5..0beb034748 100644
--- a/target/loongarch/internals.h
+++ b/target/loongarch/internals.h
@@ -31,8 +31,10 @@ void G_NORETURN do_raise_exception(CPULoongArchState *env,
 
 const char *loongarch_exception_name(int32_t exception);
 
+#ifdef CONFIG_TCG
 int ieee_ex_to_loongarch(int xcpt);
 void restore_fp_status(CPULoongArchState *env);
+#endif
 
 #ifndef CONFIG_USER_ONLY
 extern const VMStateDescription vmstate_loongarch_cpu;
@@ -44,12 +46,13 @@ uint64_t 
cpu_loongarch_get_constant_timer_counter(LoongArchCPU *cpu);
 uint64_t cpu_loongarch_get_constant_timer_ticks(LoongArchCPU *cpu);
 void cpu_loongarch_store_constant_timer_config(LoongArchCPU *cpu,
uint64_t value);
-
+#ifdef CONFIG_TCG
 bool loongarch_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
 MMUAccessType access_type, int mmu_idx,
 bool probe, uintptr_t retaddr);
 
 hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
+#endif
 #endif /* !CONFIG_USER_ONLY */
 
 uint64_t read_fcc(CPULoongArchState *env);
diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c
index 0d67322fd9..e7c9ef830c 100644
--- a/target/loongarch/kvm/kvm.c
+++ b/target/loongarch/kvm/kvm.c
@@ -26,19 +26,595 @@
 #include "sysemu/runstate.h"
 #include "cpu-csr.h"
 #include "kvm_loongarch.h"
+#include "trace.h"
 
 static bool cap_has_mp_state;
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
 KVM_CAP_LAST_INFO
 };
 
+static int kvm_loongarch_get_regs_core(CPUState *cs)
+{
+int ret = 0;
+int i;
+struct kvm_regs regs;
+LoongArchCPU *cpu = LOONGARCH_CPU(cs);
+CPULoongArchState *env = &cpu->env;
+
+/* Get the current register set as KVM seems it */
+ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s);
+if (ret < 0) {
+trace_kvm_failed_get_regs_core(strerror(errno));
+return ret;
+}
+/* gpr[0] value is always 0 */
+env->gpr[0] = 0;
+for (i = 1; i < 32; i++) {
+env->gpr[i] = regs.gpr[i];
+}
+
+env->pc = regs.pc;
+return ret;
+}
+
+static int kvm_loongarch_put_regs_core(CPUState *cs)
+{
+int ret = 0;
+int i;
+struct kvm_regs regs;
+LoongArchCPU *cpu = LOONGARCH_CPU(cs);
+CPULoongArchState *env = &cpu->env;
+
+/* Set the registers based on QEMU's view of things */
+for (i = 0; i < 32; i++) {
+regs.gpr[i] = env->gpr[i];
+}
+
+regs.pc = env->pc;
+ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s);
+if (ret < 0) {
+trace_kvm_failed_put_regs_core(strerror(errno));
+}
+
+return ret;
+}
+
+static int kvm_loongarch_get_csr(CPUState *cs)
+{
+int ret = 0;
+LoongArchCPU *cpu = LOONGARCH_CPU(cs);
+CPULoongArchState *env = &cpu->env;
+
+ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_CRMD),
+   &env->CSR_CRMD);
+
+ret |= kvm_get_one_reg(cs,

[PULL 00/14] loongarch-to-apply queue

2024-01-11 Thread Song Gao

The following changes since commit 34eac35f893664eb8545b98142e23d9954722766:

  Merge tag 'pull-riscv-to-apply-20240110' of 
https://github.com/alistair23/qemu into staging (2024-01-10 11:41:56 +)

are available in the Git repository at:

  https://gitlab.com/gaosong/qemu.git tags/pull-loongarch-20240111

for you to fetch changes up to 428a6ef4396aa910c86e16c1e4409e3927a3698e:

  hw/intc/loongarch_extioi: Add vmstate post_load support (2024-01-11 19:22:47 
+0800)


pull-loongarch-20240111


Bibo Mao (4):
  hw/intc/loongarch_ipi: Use MemTxAttrs interface for ipi ops
  hw/loongarch/virt: Set iocsr address space per-board rather than percpu
  hw/intc/loongarch_extioi: Add dynamic cpu number support
  hw/intc/loongarch_extioi: Add vmstate post_load support

Tianrui Zhao (10):
  linux-headers: Synchronize linux headers from linux v6.7.0-rc8
  target/loongarch: Define some kvm_arch interfaces
  target/loongarch: Supplement vcpu env initial when vcpu reset
  target/loongarch: Implement kvm get/set registers
  target/loongarch: Implement kvm_arch_init function
  target/loongarch: Implement kvm_arch_init_vcpu
  target/loongarch: Implement kvm_arch_handle_exit
  target/loongarch: Restrict TCG-specific code
  target/loongarch: Implement set vcpu intr for kvm
  target/loongarch: Add loongarch kvm into meson build

 hw/intc/loongarch_extioi.c| 230 ++
 hw/intc/loongarch_ipi.c   | 191 +
 hw/loongarch/virt.c   |  94 +++--
 include/hw/intc/loongarch_extioi.h|  12 +-
 include/hw/intc/loongarch_ipi.h   |   3 +-
 include/hw/loongarch/virt.h   |   3 +
 include/standard-headers/linux/fuse.h |  10 +-
 meson.build   |   3 +
 target/loongarch/cpu.c|  90 ++--
 target/loongarch/cpu.h|   9 +-
 target/loongarch/internals.h  |   5 +-
 target/loongarch/kvm/kvm.c| 768 ++
 target/loongarch/kvm/kvm_loongarch.h  |  16 +
 target/loongarch/kvm/meson.build  |   1 +
 target/loongarch/meson.build  |   1 +
 target/loongarch/tcg/iocsr_helper.c   |  16 +-
 target/loongarch/trace-events |  15 +
 target/loongarch/trace.h  |   1 +
 18 files changed, 1210 insertions(+), 258 deletions(-)
 create mode 100644 target/loongarch/kvm/kvm.c
 create mode 100644 target/loongarch/kvm/kvm_loongarch.h
 create mode 100644 target/loongarch/kvm/meson.build
 create mode 100644 target/loongarch/trace-events
 create mode 100644 target/loongarch/trace.h

[PULL 07/14] target/loongarch: Implement kvm_arch_handle_exit

2024-01-11 Thread Song Gao

From: Tianrui Zhao 

Implement kvm_arch_handle_exit for loongarch. In this
function, the KVM_EXIT_LOONGARCH_IOCSR is handled,
we read or write the iocsr address space by the addr,
length and is_write argument in kvm_run.

Signed-off-by: Tianrui Zhao 
Signed-off-by: xianglai li 
Reviewed-by: Richard Henderson 
Reviewed-by: Song Gao 
Message-Id: <20240105075804.1228596-8-zhaotian...@loongson.cn>
Signed-off-by: Song Gao 
---
 target/loongarch/kvm/kvm.c| 24 +++-
 target/loongarch/trace-events |  1 +
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c
index 85e7aeb083..d2dab3fef4 100644
--- a/target/loongarch/kvm/kvm.c
+++ b/target/loongarch/kvm/kvm.c
@@ -723,7 +723,29 @@ bool kvm_arch_cpu_check_are_resettable(void)
 
 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
 {
-return 0;
+int ret = 0;
+LoongArchCPU *cpu = LOONGARCH_CPU(cs);
+CPULoongArchState *env = &cpu->env;
+MemTxAttrs attrs = {};
+
+attrs.requester_id = env_cpu(env)->cpu_index;
+
+trace_kvm_arch_handle_exit(run->exit_reason);
+switch (run->exit_reason) {
+case KVM_EXIT_LOONGARCH_IOCSR:
+address_space_rw(&env->address_space_iocsr,
+ run->iocsr_io.phys_addr,
+ attrs,
+ run->iocsr_io.data,
+ run->iocsr_io.len,
+ run->iocsr_io.is_write);
+break;
+default:
+ret = -1;
+warn_report("KVM: unknown exit reason %d", run->exit_reason);
+break;
+}
+return ret;
 }
 
 void kvm_arch_accel_class_init(ObjectClass *oc)
diff --git a/target/loongarch/trace-events b/target/loongarch/trace-events
index 937c3c7c0c..021839880e 100644
--- a/target/loongarch/trace-events
+++ b/target/loongarch/trace-events
@@ -11,3 +11,4 @@ kvm_failed_get_counter(const char *msg) "Failed to get 
counter from KVM: %s"
 kvm_failed_put_counter(const char *msg) "Failed to put counter into KVM: %s"
 kvm_failed_get_cpucfg(const char *msg) "Failed to get cpucfg from KVM: %s"
 kvm_failed_put_cpucfg(const char *msg) "Failed to put cpucfg into KVM: %s"
+kvm_arch_handle_exit(int num) "kvm arch handle exit, the reason number: %d"
-- 
2.25.1

[PULL 01/14] linux-headers: Synchronize linux headers from linux v6.7.0-rc8

2024-01-11 Thread Song Gao

From: Tianrui Zhao 

Use the scripts/update-linux-headers.sh to synchronize linux
headers from linux v6.7.0-rc8. We mainly want to add the
loongarch linux headers and then add the loongarch kvm support
based on it.

Signed-off-by: Tianrui Zhao 
Acked-by: Song Gao 
Message-Id: <20240105075804.1228596-2-zhaotian...@loongson.cn>
Signed-off-by: Song Gao 
---
 include/standard-headers/linux/fuse.h | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/standard-headers/linux/fuse.h 
b/include/standard-headers/linux/fuse.h
index 6b9793842c..fc0dcd10ae 100644
--- a/include/standard-headers/linux/fuse.h
+++ b/include/standard-headers/linux/fuse.h
@@ -209,7 +209,7 @@
  *  - add FUSE_HAS_EXPIRE_ONLY
  *
  *  7.39
- *  - add FUSE_DIRECT_IO_RELAX
+ *  - add FUSE_DIRECT_IO_ALLOW_MMAP
  *  - add FUSE_STATX and related structures
  */
 
@@ -405,8 +405,7 @@ struct fuse_file_lock {
  * FUSE_CREATE_SUPP_GROUP: add supplementary group info to create, mkdir,
  * symlink and mknod (single group that matches parent)
  * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation
- * FUSE_DIRECT_IO_RELAX: relax restrictions in FOPEN_DIRECT_IO mode, for now
- *   allow shared mmap
+ * FUSE_DIRECT_IO_ALLOW_MMAP: allow shared mmap in FOPEN_DIRECT_IO mode.
  */
 #define FUSE_ASYNC_READ(1 << 0)
 #define FUSE_POSIX_LOCKS   (1 << 1)
@@ -445,7 +444,10 @@ struct fuse_file_lock {
 #define FUSE_HAS_INODE_DAX (1ULL << 33)
 #define FUSE_CREATE_SUPP_GROUP (1ULL << 34)
 #define FUSE_HAS_EXPIRE_ONLY   (1ULL << 35)
-#define FUSE_DIRECT_IO_RELAX   (1ULL << 36)
+#define FUSE_DIRECT_IO_ALLOW_MMAP (1ULL << 36)
+
+/* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */
+#define FUSE_DIRECT_IO_RELAX   FUSE_DIRECT_IO_ALLOW_MMAP
 
 /**
  * CUSE INIT request/reply flags
-- 
2.25.1

[PULL 11/14] hw/intc/loongarch_ipi: Use MemTxAttrs interface for ipi ops

2024-01-11 Thread Song Gao

From: Bibo Mao 

There are two interface pairs for MemoryRegionOps, read/write and
read_with_attrs/write_with_attrs. The later is better for ipi device
emulation since initial cpu can be parsed from attrs.requester_id.

And requester_id can be overrided for IOCSR_IPI_SEND and mail_send
function when it is to forward message to another vcpu.

Signed-off-by: Bibo Mao 
Reviewed-by: Song Gao 
Message-Id: <20231215100333.3933632-2-maob...@loongson.cn>
Signed-off-by: Song Gao 
---
 hw/intc/loongarch_ipi.c | 136 +++-
 1 file changed, 77 insertions(+), 59 deletions(-)

diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c
index a155c16509..4e104df71b 100644
--- a/hw/intc/loongarch_ipi.c
+++ b/hw/intc/loongarch_ipi.c
@@ -17,14 +17,16 @@
 #include "target/loongarch/internals.h"
 #include "trace.h"
 
-static void loongarch_ipi_writel(void *, hwaddr, uint64_t, unsigned);
-
-static uint64_t loongarch_ipi_readl(void *opaque, hwaddr addr, unsigned size)
+static MemTxResult loongarch_ipi_readl(void *opaque, hwaddr addr,
+   uint64_t *data,
+   unsigned size, MemTxAttrs attrs)
 {
-IPICore *s = opaque;
+IPICore *s;
+LoongArchIPI *ipi = opaque;
 uint64_t ret = 0;
 int index = 0;
 
+s = &ipi->ipi_core;
 addr &= 0xff;
 switch (addr) {
 case CORE_STATUS_OFF:
@@ -49,10 +51,12 @@ static uint64_t loongarch_ipi_readl(void *opaque, hwaddr 
addr, unsigned size)
 }
 
 trace_loongarch_ipi_read(size, (uint64_t)addr, ret);
-return ret;
+*data = ret;
+return MEMTX_OK;
 }
 
-static void send_ipi_data(CPULoongArchState *env, uint64_t val, hwaddr addr)
+static void send_ipi_data(CPULoongArchState *env, uint64_t val, hwaddr addr,
+  MemTxAttrs attrs)
 {
 int i, mask = 0, data = 0;
 
@@ -62,7 +66,7 @@ static void send_ipi_data(CPULoongArchState *env, uint64_t 
val, hwaddr addr)
  */
 if ((val >> 27) & 0xf) {
 data = address_space_ldl(&env->address_space_iocsr, addr,
- MEMTXATTRS_UNSPECIFIED, NULL);
+ attrs, NULL);
 for (i = 0; i < 4; i++) {
 /* get mask for byte writing */
 if (val & (0x1 << (27 + i))) {
@@ -74,7 +78,7 @@ static void send_ipi_data(CPULoongArchState *env, uint64_t 
val, hwaddr addr)
 data &= mask;
 data |= (val >> 32) & ~mask;
 address_space_stl(&env->address_space_iocsr, addr,
-  data, MEMTXATTRS_UNSPECIFIED, NULL);
+  data, attrs, NULL);
 }
 
 static int archid_cmp(const void *a, const void *b)
@@ -103,80 +107,72 @@ static CPUState *ipi_getcpu(int arch_id)
 CPUArchId *archid;
 
 archid = find_cpu_by_archid(machine, arch_id);
-return CPU(archid->cpu);
-}
-
-static void ipi_send(uint64_t val)
-{
-uint32_t cpuid;
-uint8_t vector;
-CPUState *cs;
-LoongArchCPU *cpu;
-LoongArchIPI *s;
-
-cpuid = extract32(val, 16, 10);
-if (cpuid >= LOONGARCH_MAX_CPUS) {
-trace_loongarch_ipi_unsupported_cpuid("IOCSR_IPI_SEND", cpuid);
-return;
+if (archid) {
+return CPU(archid->cpu);
 }
 
-/* IPI status vector */
-vector = extract8(val, 0, 5);
-
-cs = ipi_getcpu(cpuid);
-cpu = LOONGARCH_CPU(cs);
-s = LOONGARCH_IPI(cpu->env.ipistate);
-loongarch_ipi_writel(&s->ipi_core, CORE_SET_OFF, BIT(vector), 4);
+return NULL;
 }
 
-static void mail_send(uint64_t val)
+static MemTxResult mail_send(uint64_t val, MemTxAttrs attrs)
 {
 uint32_t cpuid;
 hwaddr addr;
-CPULoongArchState *env;
 CPUState *cs;
-LoongArchCPU *cpu;
 
 cpuid = extract32(val, 16, 10);
 if (cpuid >= LOONGARCH_MAX_CPUS) {
 trace_loongarch_ipi_unsupported_cpuid("IOCSR_MAIL_SEND", cpuid);
-return;
+return MEMTX_DECODE_ERROR;
 }
 
-addr = 0x1020 + (val & 0x1c);
 cs = ipi_getcpu(cpuid);
-cpu = LOONGARCH_CPU(cs);
-env = &cpu->env;
-send_ipi_data(env, val, addr);
+if (cs == NULL) {
+return MEMTX_DECODE_ERROR;
+}
+
+/* override requester_id */
+addr = SMP_IPI_MAILBOX + CORE_BUF_20 + (val & 0x1c);
+attrs.requester_id = cs->cpu_index;
+send_ipi_data(&LOONGARCH_CPU(cs)->env, val, addr, attrs);
+return MEMTX_OK;
 }
 
-static void any_send(uint64_t val)
+static MemTxResult any_send(uint64_t val, MemTxAttrs attrs)
 {
 uint32_t cpuid;
 hwaddr addr;
-CPULoongArchState *env;
 CPUState *cs;
-LoongArchCPU *cpu;
 
 cpuid = extract32(val, 16, 10);
 if (cpuid >= LOONGARCH_MAX_CPUS) {
 trace_loongarch_ipi_unsupported_cpuid("IOCSR_ANY_SEND", cpuid);
-return;
+return MEMTX_DECODE_ERROR;
 }
 
-addr = val & 0x;
 cs = ipi_getcpu(cpuid);
-cpu = LOONGARCH_CPU(cs);
-env = &cpu->env;
-send_ipi_data(env, val, addr);
+if (cs == NULL) {
+return MEMTX_DECODE_ERROR;
+}

[PULL 03/14] target/loongarch: Supplement vcpu env initial when vcpu reset

2024-01-11 Thread Song Gao

From: Tianrui Zhao 

Supplement vcpu env initial when vcpu reset, including
init vcpu CSR_CPUID,CSR_TID to cpu->cpu_index. The two
regs will be used in kvm_get/set_csr_ioctl.

Signed-off-by: Tianrui Zhao 
Signed-off-by: xianglai li 
Reviewed-by: Song Gao 
Message-Id: <20240105075804.1228596-4-zhaotian...@loongson.cn>
Signed-off-by: Song Gao 
---
 target/loongarch/cpu.c | 2 ++
 target/loongarch/cpu.h | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index 87dfcdb0a5..9b768c9431 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -518,10 +518,12 @@ static void loongarch_cpu_reset_hold(Object *obj)
 
 env->CSR_ESTAT = env->CSR_ESTAT & (~MAKE_64BIT_MASK(0, 2));
 env->CSR_RVACFG = FIELD_DP64(env->CSR_RVACFG, CSR_RVACFG, RBITS, 0);
+env->CSR_CPUID = cs->cpu_index;
 env->CSR_TCFG = FIELD_DP64(env->CSR_TCFG, CSR_TCFG, EN, 0);
 env->CSR_LLBCTL = FIELD_DP64(env->CSR_LLBCTL, CSR_LLBCTL, KLO, 0);
 env->CSR_TLBRERA = FIELD_DP64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR, 0);
 env->CSR_MERRCTL = FIELD_DP64(env->CSR_MERRCTL, CSR_MERRCTL, ISMERR, 0);
+env->CSR_TID = cs->cpu_index;
 
 env->CSR_PRCFG3 = FIELD_DP64(env->CSR_PRCFG3, CSR_PRCFG3, TLB_TYPE, 2);
 env->CSR_PRCFG3 = FIELD_DP64(env->CSR_PRCFG3, CSR_PRCFG3, MTLB_ENTRY, 63);
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 0c15a174e4..8022f44b44 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -319,6 +319,7 @@ typedef struct CPUArchState {
 uint64_t CSR_PWCH;
 uint64_t CSR_STLBPS;
 uint64_t CSR_RVACFG;
+uint64_t CSR_CPUID;
 uint64_t CSR_PRCFG1;
 uint64_t CSR_PRCFG2;
 uint64_t CSR_PRCFG3;
@@ -350,7 +351,6 @@ typedef struct CPUArchState {
 uint64_t CSR_DBG;
 uint64_t CSR_DERA;
 uint64_t CSR_DSAVE;
-uint64_t CSR_CPUID;
 
 #ifndef CONFIG_USER_ONLY
 LoongArchTLB  tlb[LOONGARCH_TLB_MAX];
-- 
2.25.1

[PULL 12/14] hw/loongarch/virt: Set iocsr address space per-board rather than percpu

2024-01-11 Thread Song Gao

From: Bibo Mao 

LoongArch system has iocsr address space, most iocsr registers are
per-board, however some iocsr register spaces banked for percpu such
as ipi mailbox and extioi interrupt status. For banked iocsr space,
each cpu has the same iocsr space, but separate data.

This patch changes iocsr address space per-board rather percpu,
for iocsr registers specified for cpu, MemTxAttrs.requester_id
can be parsed for the cpu. With this patches, the total address space
on board will be simple, only iocsr address space and system memory,
rather than the number of cpu and system memory.

Signed-off-by: Bibo Mao 
Reviewed-by: Song Gao 
Message-Id: <20231215100333.3933632-3-maob...@loongson.cn>
Signed-off-by: Song Gao 
---
 hw/intc/loongarch_extioi.c  |  3 -
 hw/intc/loongarch_ipi.c | 61 ++-
 hw/loongarch/virt.c | 91 +
 include/hw/intc/loongarch_extioi.h  |  1 -
 include/hw/intc/loongarch_ipi.h |  3 +-
 include/hw/loongarch/virt.h |  3 +
 target/loongarch/cpu.c  | 48 ---
 target/loongarch/cpu.h  |  4 +-
 target/loongarch/kvm/kvm.c  |  2 +-
 target/loongarch/tcg/iocsr_helper.c | 16 ++---
 10 files changed, 128 insertions(+), 104 deletions(-)

diff --git a/hw/intc/loongarch_extioi.c b/hw/intc/loongarch_extioi.c
index 4fa97f05bd..b37b4abf9d 100644
--- a/hw/intc/loongarch_extioi.c
+++ b/hw/intc/loongarch_extioi.c
@@ -282,9 +282,6 @@ static void loongarch_extioi_instance_init(Object *obj)
 qdev_init_gpio_in(DEVICE(obj), extioi_setirq, EXTIOI_IRQS);
 
 for (cpu = 0; cpu < EXTIOI_CPUS; cpu++) {
-memory_region_init_io(&s->extioi_iocsr_mem[cpu], OBJECT(s), 
&extioi_ops,
-  s, "extioi_iocsr", 0x900);
-sysbus_init_mmio(dev, &s->extioi_iocsr_mem[cpu]);
 for (pin = 0; pin < LS3A_INTC_IP; pin++) {
 qdev_init_gpio_out(DEVICE(obj), &s->parent_irq[cpu][pin], 1);
 }
diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c
index 4e104df71b..a184112b09 100644
--- a/hw/intc/loongarch_ipi.c
+++ b/hw/intc/loongarch_ipi.c
@@ -9,6 +9,7 @@
 #include "hw/sysbus.h"
 #include "hw/intc/loongarch_ipi.h"
 #include "hw/irq.h"
+#include "hw/qdev-properties.h"
 #include "qapi/error.h"
 #include "qemu/log.h"
 #include "exec/address-spaces.h"
@@ -26,7 +27,7 @@ static MemTxResult loongarch_ipi_readl(void *opaque, hwaddr 
addr,
 uint64_t ret = 0;
 int index = 0;
 
-s = &ipi->ipi_core;
+s = &ipi->cpu[attrs.requester_id];
 addr &= 0xff;
 switch (addr) {
 case CORE_STATUS_OFF:
@@ -65,7 +66,7 @@ static void send_ipi_data(CPULoongArchState *env, uint64_t 
val, hwaddr addr,
  * if the mask is 0, we need not to do anything.
  */
 if ((val >> 27) & 0xf) {
-data = address_space_ldl(&env->address_space_iocsr, addr,
+data = address_space_ldl(env->address_space_iocsr, addr,
  attrs, NULL);
 for (i = 0; i < 4; i++) {
 /* get mask for byte writing */
@@ -77,7 +78,7 @@ static void send_ipi_data(CPULoongArchState *env, uint64_t 
val, hwaddr addr,
 
 data &= mask;
 data |= (val >> 32) & ~mask;
-address_space_stl(&env->address_space_iocsr, addr,
+address_space_stl(env->address_space_iocsr, addr,
   data, attrs, NULL);
 }
 
@@ -172,7 +173,7 @@ static MemTxResult loongarch_ipi_writel(void *opaque, 
hwaddr addr, uint64_t val,
 uint8_t vector;
 CPUState *cs;
 
-s = &ipi->ipi_core;
+s = &ipi->cpu[attrs.requester_id];
 addr &= 0xff;
 trace_loongarch_ipi_write(size, (uint64_t)addr, val);
 switch (addr) {
@@ -214,7 +215,6 @@ static MemTxResult loongarch_ipi_writel(void *opaque, 
hwaddr addr, uint64_t val,
 
 /* override requester_id */
 attrs.requester_id = cs->cpu_index;
-ipi = LOONGARCH_IPI(LOONGARCH_CPU(cs)->env.ipistate);
 loongarch_ipi_writel(ipi, CORE_SET_OFF, BIT(vector), 4, attrs);
 break;
 default:
@@ -265,12 +265,18 @@ static const MemoryRegionOps loongarch_ipi64_ops = {
 .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
-static void loongarch_ipi_init(Object *obj)
+static void loongarch_ipi_realize(DeviceState *dev, Error **errp)
 {
-LoongArchIPI *s = LOONGARCH_IPI(obj);
-SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+LoongArchIPI *s = LOONGARCH_IPI(dev);
+SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+int i;
+
+if (s->num_cpu == 0) {
+error_setg(errp, "num-cpu must be at least 1");
+return;
+}
 
-memory_region_init_io(&s->ipi_iocsr_mem, obj, &loongarch_ipi_ops,
+memory_region_init_io(&s->ipi_iocsr_mem, OBJECT(dev), &loongarch_ipi_ops,
   s, "loongarch_ipi_iocsr", 0x48);
 
 /* loongarch_ipi_iocsr performs re-entrant IO through ipi_send */
@@ -278,10 +284,20 @@ static void loongarch_ipi_init(Object *obj)
 
 sysbus_init_mmio(sbd, &s->ipi_iocsr_mem);
 
-memory_regi

[PULL 09/14] target/loongarch: Implement set vcpu intr for kvm

2024-01-11 Thread Song Gao

From: Tianrui Zhao 

Implement loongarch kvm set vcpu interrupt interface,
when a irq is set in vcpu, we use the KVM_INTERRUPT
ioctl to set intr into kvm.

Signed-off-by: Tianrui Zhao 
Signed-off-by: xianglai li 
Reviewed-by: Song Gao 
Message-ID: <20240105075804.1228596-9-zhaotian...@loongson.cn>
[PMD: Split from bigger patch, part 2]
Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20240110094152.52138-2-phi...@linaro.org>
Signed-off-by: Song Gao 
---
 target/loongarch/cpu.c   |  9 -
 target/loongarch/kvm/kvm.c   | 15 +++
 target/loongarch/kvm/kvm_loongarch.h | 16 
 target/loongarch/trace-events|  1 +
 4 files changed, 40 insertions(+), 1 deletion(-)
 create mode 100644 target/loongarch/kvm/kvm_loongarch.h

diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index 70dd4622aa..7b94bab540 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -12,6 +12,8 @@
 #include "qemu/module.h"
 #include "sysemu/qtest.h"
 #include "sysemu/tcg.h"
+#include "sysemu/kvm.h"
+#include "kvm/kvm_loongarch.h"
 #include "exec/exec-all.h"
 #include "cpu.h"
 #include "internals.h"
@@ -21,6 +23,9 @@
 #include "sysemu/reset.h"
 #endif
 #include "vec.h"
+#ifdef CONFIG_KVM
+#include 
+#endif
 #ifdef CONFIG_TCG
 #include "exec/cpu_ldst.h"
 #include "tcg/tcg.h"
@@ -113,7 +118,9 @@ void loongarch_cpu_set_irq(void *opaque, int irq, int level)
 return;
 }
 
-if (tcg_enabled()) {
+if (kvm_enabled()) {
+kvm_loongarch_set_interrupt(cpu, irq, level);
+} else if (tcg_enabled()) {
 env->CSR_ESTAT = deposit64(env->CSR_ESTAT, irq, 1, level != 0);
 if (FIELD_EX64(env->CSR_ESTAT, CSR_ESTAT, IS)) {
 cpu_interrupt(cs, CPU_INTERRUPT_HARD);
diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c
index d2dab3fef4..bd33ec2114 100644
--- a/target/loongarch/kvm/kvm.c
+++ b/target/loongarch/kvm/kvm.c
@@ -748,6 +748,21 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
 return ret;
 }
 
+int kvm_loongarch_set_interrupt(LoongArchCPU *cpu, int irq, int level)
+{
+struct kvm_interrupt intr;
+CPUState *cs = CPU(cpu);
+
+if (level) {
+intr.irq = irq;
+} else {
+intr.irq = -irq;
+}
+
+trace_kvm_set_intr(irq, level);
+return kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &intr);
+}
+
 void kvm_arch_accel_class_init(ObjectClass *oc)
 {
 }
diff --git a/target/loongarch/kvm/kvm_loongarch.h 
b/target/loongarch/kvm/kvm_loongarch.h
new file mode 100644
index 00..d945b6bb82
--- /dev/null
+++ b/target/loongarch/kvm/kvm_loongarch.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * QEMU LoongArch kvm interface
+ *
+ * Copyright (c) 2023 Loongson Technology Corporation Limited
+ */
+
+#include "cpu.h"
+
+#ifndef QEMU_KVM_LOONGARCH_H
+#define QEMU_KVM_LOONGARCH_H
+
+int  kvm_loongarch_set_interrupt(LoongArchCPU *cpu, int irq, int level);
+void kvm_arch_reset_vcpu(CPULoongArchState *env);
+
+#endif
diff --git a/target/loongarch/trace-events b/target/loongarch/trace-events
index 021839880e..dea11edc0f 100644
--- a/target/loongarch/trace-events
+++ b/target/loongarch/trace-events
@@ -12,3 +12,4 @@ kvm_failed_put_counter(const char *msg) "Failed to put 
counter into KVM: %s"
 kvm_failed_get_cpucfg(const char *msg) "Failed to get cpucfg from KVM: %s"
 kvm_failed_put_cpucfg(const char *msg) "Failed to put cpucfg into KVM: %s"
 kvm_arch_handle_exit(int num) "kvm arch handle exit, the reason number: %d"
+kvm_set_intr(int irq, int level) "kvm set interrupt, irq num: %d, level: %d"
-- 
2.25.1

[PULL 08/14] target/loongarch: Restrict TCG-specific code

2024-01-11 Thread Song Gao

From: Tianrui Zhao 

In preparation of supporting KVM in the next commit.

Signed-off-by: Tianrui Zhao 
Signed-off-by: xianglai li 
Reviewed-by: Song Gao 
Message-ID: <20240105075804.1228596-9-zhaotian...@loongson.cn>
[PMD: Split from bigger patch, part 1]
Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20240110094152.52138-1-phi...@linaro.org>
Signed-off-by: Song Gao 
---
 target/loongarch/cpu.c | 30 +-
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index 0c157bbd51..70dd4622aa 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -11,7 +11,7 @@
 #include "qapi/error.h"
 #include "qemu/module.h"
 #include "sysemu/qtest.h"
-#include "exec/cpu_ldst.h"
+#include "sysemu/tcg.h"
 #include "exec/exec-all.h"
 #include "cpu.h"
 #include "internals.h"
@@ -20,8 +20,11 @@
 #ifndef CONFIG_USER_ONLY
 #include "sysemu/reset.h"
 #endif
-#include "tcg/tcg.h"
 #include "vec.h"
+#ifdef CONFIG_TCG
+#include "exec/cpu_ldst.h"
+#include "tcg/tcg.h"
+#endif
 
 const char * const regnames[32] = {
 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
@@ -110,12 +113,13 @@ void loongarch_cpu_set_irq(void *opaque, int irq, int 
level)
 return;
 }
 
-env->CSR_ESTAT = deposit64(env->CSR_ESTAT, irq, 1, level != 0);
-
-if (FIELD_EX64(env->CSR_ESTAT, CSR_ESTAT, IS)) {
-cpu_interrupt(cs, CPU_INTERRUPT_HARD);
-} else {
-cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
+if (tcg_enabled()) {
+env->CSR_ESTAT = deposit64(env->CSR_ESTAT, irq, 1, level != 0);
+if (FIELD_EX64(env->CSR_ESTAT, CSR_ESTAT, IS)) {
+cpu_interrupt(cs, CPU_INTERRUPT_HARD);
+} else {
+cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
+}
 }
 }
 
@@ -140,7 +144,10 @@ static inline bool 
cpu_loongarch_hw_interrupts_pending(CPULoongArchState *env)
 
 return (pending & status) != 0;
 }
+#endif
 
+#ifdef CONFIG_TCG
+#ifndef CONFIG_USER_ONLY
 static void loongarch_cpu_do_interrupt(CPUState *cs)
 {
 LoongArchCPU *cpu = LOONGARCH_CPU(cs);
@@ -322,7 +329,6 @@ static bool loongarch_cpu_exec_interrupt(CPUState *cs, int 
interrupt_request)
 }
 #endif
 
-#ifdef CONFIG_TCG
 static void loongarch_cpu_synchronize_from_tb(CPUState *cs,
   const TranslationBlock *tb)
 {
@@ -545,7 +551,9 @@ static void loongarch_cpu_reset_hold(Object *obj)
 }
 #endif
 
+#ifdef CONFIG_TCG
 restore_fp_status(env);
+#endif
 cs->exception_index = -1;
 }
 
@@ -688,8 +696,10 @@ static void loongarch_cpu_init(Object *obj)
 CPULoongArchState *env = &cpu->env;
 
 qdev_init_gpio_in(DEVICE(cpu), loongarch_cpu_set_irq, N_IRQS);
+#ifdef CONFIG_TCG
 timer_init_ns(&cpu->timer, QEMU_CLOCK_VIRTUAL,
   &loongarch_constant_timer_cb, cpu);
+#endif
 memory_region_init_io(&env->system_iocsr, OBJECT(cpu), NULL,
   env, "iocsr", UINT64_MAX);
 address_space_init(&env->address_space_iocsr, &env->system_iocsr, "IOCSR");
@@ -783,7 +793,9 @@ static struct TCGCPUOps loongarch_tcg_ops = {
 #include "hw/core/sysemu-cpu-ops.h"
 
 static const struct SysemuCPUOps loongarch_sysemu_ops = {
+#ifdef CONFIG_TCG
 .get_phys_page_debug = loongarch_cpu_get_phys_page_debug,
+#endif
 };
 
 static int64_t loongarch_cpu_get_arch_id(CPUState *cs)
-- 
2.25.1

1 2 3 4 >

1 - 100 of 309 matches

Mail list logo