date:20241023

Re: [PATCH v2 01/10] xen/arm: ffa: Rework firmware discovery

2024-10-23 Thread Bertrand Marquis

Hi Jens,

> On 21 Oct 2024, at 19:03, Jens Wiklander  wrote:
> 
> Hi Bertrand,
> 
> On Wed, Oct 16, 2024 at 10:32 AM Bertrand Marquis
>  wrote:
>> 
>> Rework firmware discovery during probe:
>> - move prints into the probe
>> - rename ffa_version to ffa_fw_version as the variable identifies the
>>  version of the firmware and not the one we support
>> - add error prints when allocation fail during probe
>> 
>> No functional changes.
>> 
>> Signed-off-by: Bertrand Marquis 
>> ---
>> Changes in v2:
>> - Fix error message when we fail to retrieve ffa_version
>> - Move back printing the firmware version before checking supported
>>  features
>> - Use Warning instead of Info to inform user that FF-A is not supported
>>  in firmware.
>> ---
>> xen/arch/arm/tee/ffa.c | 44 ++
>> 1 file changed, 27 insertions(+), 17 deletions(-)
>> 
>> diff --git a/xen/arch/arm/tee/ffa.c b/xen/arch/arm/tee/ffa.c
>> index 022089278e1c..1cc4023135d5 100644
>> --- a/xen/arch/arm/tee/ffa.c
>> +++ b/xen/arch/arm/tee/ffa.c
>> @@ -71,8 +71,8 @@
>> 
>> #include "ffa_private.h"
>> 
>> -/* Negotiated FF-A version to use with the SPMC */
>> -static uint32_t __ro_after_init ffa_version;
>> +/* Negotiated FF-A version to use with the SPMC, 0 if not there or 
>> supported */
>> +static uint32_t __ro_after_init ffa_fw_version;
>> 
>> 
>> /*
>> @@ -105,10 +105,7 @@ static bool ffa_get_version(uint32_t *vers)
>> 
>> arm_smccc_1_2_smc(&arg, &resp);
>> if ( resp.a0 == FFA_RET_NOT_SUPPORTED )
>> -{
>> -gprintk(XENLOG_ERR, "ffa: FFA_VERSION returned not supported\n");
>> return false;
>> -}
>> 
>> *vers = resp.a0;
>> 
>> @@ -372,7 +369,7 @@ static int ffa_domain_init(struct domain *d)
>> struct ffa_ctx *ctx;
>> int ret;
>> 
>> -if ( !ffa_version )
>> +if ( !ffa_fw_version )
>> return -ENODEV;
>>  /*
>>   * We can't use that last possible domain ID or ffa_get_vm_id() would
>> @@ -505,6 +502,9 @@ static bool ffa_probe(void)
>>  */
>> BUILD_BUG_ON(PAGE_SIZE != FFA_PAGE_SIZE);
>> 
>> +printk(XENLOG_INFO "ARM FF-A Mediator version %u.%u\n",
>> +   FFA_MY_VERSION_MAJOR, FFA_MY_VERSION_MINOR);
>> +
>> /*
>>  * psci_init_smccc() updates this value with what's reported by EL-3
>>  * or secure world.
>> @@ -514,22 +514,24 @@ static bool ffa_probe(void)
>> printk(XENLOG_ERR
>>"ffa: unsupported SMCCC version %#x (need at least %#x)\n",
>>smccc_ver, ARM_SMCCC_VERSION_1_2);
>> -return false;
>> +goto err_no_fw;
>> }
>> 
>> if ( !ffa_get_version(&vers) )
>> -return false;
>> +{
>> +gprintk(XENLOG_ERR, "Cannot retrieve the FFA version\n");
>> +goto err_no_fw;
>> +}
>> 
>> if ( vers < FFA_MIN_SPMC_VERSION || vers > FFA_MY_VERSION )
>> {
>> printk(XENLOG_ERR "ffa: Incompatible version %#x found\n", vers);
>> -return false;
>> +goto err_no_fw;
>> }
>> 
>> -major_vers = (vers >> FFA_VERSION_MAJOR_SHIFT) & FFA_VERSION_MAJOR_MASK;
>> +major_vers = (vers >> FFA_VERSION_MAJOR_SHIFT)
>> + & FFA_VERSION_MAJOR_MASK;
> 
> Spurious change?

Yes, I will fix that in next version.

> 
>> minor_vers = vers & FFA_VERSION_MINOR_MASK;
>> -printk(XENLOG_INFO "ARM FF-A Mediator version %u.%u\n",
>> -   FFA_MY_VERSION_MAJOR, FFA_MY_VERSION_MINOR);
> 
> It's not a big deal, but isn't it useful to know which version we're
> at? If it's too much with a separate line, how about adding "(our
> version %u.u%)" at the end of the line below?

This was moved up.

> 
>> printk(XENLOG_INFO "ARM FF-A Firmware version %u.%u\n",
>>major_vers, minor_vers);
>> 
>> @@ -546,12 +548,18 @@ static bool ffa_probe(void)
>>  !check_mandatory_feature(FFA_MEM_SHARE_32) ||
>>  !check_mandatory_feature(FFA_MEM_RECLAIM) ||
>>  !check_mandatory_feature(FFA_MSG_SEND_DIRECT_REQ_32) )
>> -return false;
>> +{
>> +printk(XENLOG_ERR "ffa: Mandatory feature not supported by fw\n");
>> +goto err_no_fw;
>> +}
>> 
>> -if ( !ffa_rxtx_init() )
>> -return false;
>> +ffa_fw_version = vers;
>> 
>> -ffa_version = vers;
>> +if ( !ffa_rxtx_init() )
>> +{
>> +printk(XENLOG_ERR "ffa: Error during RXTX buffer init\n");
> 
> With this added, wouldn't it make sense to remove the error print in
> ffa_rxtx_init()?

Definitely, I missed that.
Will fix in v3.

Cheers
Bertrand

> 
> Cheers,
> Jens
> 
>> +goto err_no_fw;
>> +}
>> 
>> if ( !ffa_partinfo_init() )
>> goto err_rxtx_destroy;
>> @@ -564,7 +572,9 @@ static bool ffa_probe(void)
>> 
>> err_rxtx_destroy:
>> ffa_rxtx_destroy();
>> -ffa_version = 0;
>> +err_no_fw:
>> +ffa_fw_version = 0;
>> +printk(XENLOG_WARNING "ARM FF-A No firmware support\n");
>> 
>> return false;
>> }
>> --
>> 2.47.0

Re: [PATCH v2 02/10] xen/arm: ffa: Rework feature discovery

2024-10-23 Thread Bertrand Marquis

Hi Jens,

> On 22 Oct 2024, at 08:30, Jens Wiklander  wrote:
> 
> Hi Bertrand,
> 
> On Wed, Oct 16, 2024 at 10:32 AM Bertrand Marquis
>  wrote:
>> 
>> Store the list of ABI we need in a list and go through the list instead
>> of having a list of conditions inside the code.
>> 
>> No functional change.
>> 
>> Signed-off-by: Bertrand Marquis 
>> ---
>> Changes in v2:
>> - Store a string version of ABI needed from firmware and print the name
>>  of the ABI not supported instead of the id
>> - Restore comment with TODO which should not have been removed at this
>>  stage
>> - fix to unsigned int the index in the loop on the array
>> - use abi instead of feature in the names of the functions and variables
>>  as we are not checking features but abis
>> ---
>> xen/arch/arm/tee/ffa.c | 57 +-
>> 1 file changed, 34 insertions(+), 23 deletions(-)
>> 
>> diff --git a/xen/arch/arm/tee/ffa.c b/xen/arch/arm/tee/ffa.c
>> index 1cc4023135d5..dde932422ecf 100644
>> --- a/xen/arch/arm/tee/ffa.c
>> +++ b/xen/arch/arm/tee/ffa.c
>> @@ -74,6 +74,31 @@
>> /* Negotiated FF-A version to use with the SPMC, 0 if not there or supported 
>> */
>> static uint32_t __ro_after_init ffa_fw_version;
>> 
>> +struct ffa_fw_abi {
>> +const uint32_t id;
> 
> I prefer removing the const attribute for this struct member since it
> doesn't add anything when the struct itself is const.

Works for me.
Will fix in v3.

Cheers
Bertrand

> 
> Cheers,
> Jens
> 
>> +const char *name;
>> +};
>> +
>> +#define FW_ABI(abi) {abi,#abi}
>> +
>> +/* List of ABI we use from the firmware */
>> +static const struct ffa_fw_abi ffa_fw_abi_needed[] = {
>> +FW_ABI(FFA_VERSION),
>> +FW_ABI(FFA_FEATURES),
>> +FW_ABI(FFA_NOTIFICATION_BITMAP_CREATE),
>> +FW_ABI(FFA_NOTIFICATION_BITMAP_DESTROY),
>> +FW_ABI(FFA_PARTITION_INFO_GET),
>> +FW_ABI(FFA_NOTIFICATION_INFO_GET_64),
>> +FW_ABI(FFA_NOTIFICATION_GET),
>> +FW_ABI(FFA_RX_RELEASE),
>> +FW_ABI(FFA_RXTX_MAP_64),
>> +FW_ABI(FFA_RXTX_UNMAP),
>> +FW_ABI(FFA_MEM_SHARE_32),
>> +FW_ABI(FFA_MEM_SHARE_64),
>> +FW_ABI(FFA_MEM_RECLAIM),
>> +FW_ABI(FFA_MSG_SEND_DIRECT_REQ_32),
>> +FW_ABI(FFA_MSG_SEND_DIRECT_REQ_64),
>> +};
>> 
>> /*
>>  * Our rx/tx buffers shared with the SPMC. FFA_RXTX_PAGE_COUNT is the
>> @@ -112,20 +137,9 @@ static bool ffa_get_version(uint32_t *vers)
>> return true;
>> }
>> 
>> -static int32_t ffa_features(uint32_t id)
>> -{
>> -return ffa_simple_call(FFA_FEATURES, id, 0, 0, 0);
>> -}
>> -
>> -static bool check_mandatory_feature(uint32_t id)
>> +static bool ffa_abi_supported(uint32_t id)
>> {
>> -int32_t ret = ffa_features(id);
>> -
>> -if ( ret )
>> -printk(XENLOG_ERR "ffa: mandatory feature id %#x missing: error 
>> %d\n",
>> -   id, ret);
>> -
>> -return !ret;
>> +return !ffa_simple_call(FFA_FEATURES, id, 0, 0, 0);
>> }
>> 
>> static void handle_version(struct cpu_user_regs *regs)
>> @@ -540,17 +554,14 @@ static bool ffa_probe(void)
>>  * TODO: Rework the code to allow domain to use a subset of the
>>  * features supported.
>>  */
>> -if ( !check_mandatory_feature(FFA_PARTITION_INFO_GET) ||
>> - !check_mandatory_feature(FFA_RX_RELEASE) ||
>> - !check_mandatory_feature(FFA_RXTX_MAP_64) ||
>> - !check_mandatory_feature(FFA_MEM_SHARE_64) ||
>> - !check_mandatory_feature(FFA_RXTX_UNMAP) ||
>> - !check_mandatory_feature(FFA_MEM_SHARE_32) ||
>> - !check_mandatory_feature(FFA_MEM_RECLAIM) ||
>> - !check_mandatory_feature(FFA_MSG_SEND_DIRECT_REQ_32) )
>> +for ( unsigned int i = 0; i < ARRAY_SIZE(ffa_fw_abi_needed); i++ )
>> {
>> -printk(XENLOG_ERR "ffa: Mandatory feature not supported by fw\n");
>> -goto err_no_fw;
>> +if ( !ffa_abi_supported(ffa_fw_abi_needed[i].id) )
>> +{
>> +printk(XENLOG_INFO "ARM FF-A Firmware does not support %s\n",
>> +   ffa_fw_abi_needed[i].name);
>> +goto err_no_fw;
>> +}
>> }
>> 
>> ffa_fw_version = vers;
>> --
>> 2.47.0

Re: [PATCH] x86/boot: Fix PVH boot during boot_info transition period

2024-10-23 Thread Andrew Cooper

On 23/10/2024 8:10 am, Alan Robinson wrote:
> Hi Andrew,
>
> A small suggestion for the commit log..
>
> On Tue, Oct 22, 2024 at 12:41:14PM +, Andrew Cooper wrote:
>> multiboot_fill_boot_info() taking the physical address of the multiboot_info
>> structure leads to a subtle use-after-free on the PVH path, with rather less
>> subtle fallout.
>>
>> The pointers used by __start_xen(), mbi and mod, are either:
>>
>>  - MB:  Directmap pointers into the trampoline, or
>>  - PVH: Xen pointers into .initdata, or
>>  - EFI: Directmap pointers into Xen.
>>
>> Critically, these either remain valid across move_xen() (MB, PVH), or rely on
>> move_xen() being inhibited (EFI).
>>
>> The conversion to multiboot_fill_boot_info(), taking only mbi_p, makes the 
>> PVH
>> path use directmap pointers into Xen, as well as move_xen() which invalidates
>> said pointers.
>>
>> Switch multiboot_fill_boot_info() to consume the same virtual addresses that
>> __start_xen() currently uses.  This keeps all the pointers valid for the
>> duration of __start_xen(), for all boot protocols.
>>
>> It can be safely untangled once multiboot_fill_boot_info() takes a full copy
>> the multiboot info data, and __start_xen() has been moved over to using the
>  of the multiboot info data, and __start_xen() has been moved over to using 
> the

Thanks for noticing.  Unfortunately this change was already taken to
unblock other pending work.

~Andrew

Re: [QEMU PATCH v8] xen/passthrough: use gsi to map pirq when dom0 is PVH

2024-10-23 Thread Chen, Jiqian

On 2024/10/22 04:55, Stewart Hildebrand wrote:
> On 10/16/24 02:28, Jiqian Chen wrote:
>> In PVH dom0, when passthrough a device to domU, QEMU code
>> xen_pt_realize->xc_physdev_map_pirq wants to use gsi, but in current codes
>> the gsi number is got from file /sys/bus/pci/devices//irq, that is
>> wrong, because irq is not equal with gsi, they are in different spaces, so
>> pirq mapping fails.
>>
>> To solve above problem, use new interface of Xen, xc_pcidev_get_gsi to get
>> gsi and use xc_physdev_map_pirq_gsi to map pirq when dom0 is PVH.
>>
>> Signed-off-by: Jiqian Chen 
>> Signed-off-by: Huang Rui 
>> Signed-off-by: Jiqian Chen 
>> ---
>> Hi All,
>> This is v8 to support passthrough on Xen when dom0 is PVH.
>> v7->v8 change:
>> * Since xc_physdev_gsi_from_dev was renamed to xc_pcidev_get_gsi, changed it.
>> * Added xen_run_qemu_on_hvm to check if Qemu run on PV dom0, if not use 
>> xc_physdev_map_pirq_gsi to map pirq.
>> * Used CONFIG_XEN_CTRL_INTERFACE_VERSION to wrap the new part for 
>> compatibility.
>> * Added "#define DOMID_RUN_QEMU 0" to represent the id of domain that Qemu 
>> run on.
>>
>>
>> Best regards,
>> Jiqian Chen
>>
>>
>>
>> v6->v7 changes:
>> * Because the function of obtaining gsi was changed on the kernel and Xen 
>> side. Changed to use
>>   xc_physdev_gsi_from_dev, that requires passing in sbdf instead of irq.
>>
>> v5->v6 changes:
>> * Because the function of obtaining gsi was changed on the kernel and Xen 
>> side. Changed to use
>>   xc_physdev_gsi_from_irq, instead of gsi sysfs.
>> * Since function changed, removed the Review-by of Stefano.
>>
>> v4->v5 changes:
>> * Added Review-by Stefano.
>>
>> v3->v4 changes:
>> * Added gsi into struct XenHostPCIDevice and used gsi number that read from 
>> gsi sysfs
>>   if it exists, if there is no gsi sysfs, still use irq.
>>
>> v2->v3 changes:
>> * Due to changes in the implementation of the second patch on kernel 
>> side(that adds
>>   a new sysfs for gsi instead of a new syscall), so read gsi number from the 
>> sysfs of gsi.
>>
>> v1 and v2:
>> We can record the relation between gsi and irq, then when userspace(qemu) 
>> want
>> to use gsi, we can do a translation. The third patch of kernel(xen/privcmd: 
>> Add new syscall
>> to get gsi from irq) records all the relations in 
>> acpi_register_gsi_xen_pvh() when dom0
>> initialize pci devices, and provide a syscall for userspace to get the gsi 
>> from irq. The
>> third patch of xen(tools: Add new function to get gsi from irq) add a new 
>> function
>> xc_physdev_gsi_from_irq() to call the new syscall added on kernel side.
>> And then userspace can use that function to get gsi. Then 
>> xc_physdev_map_pirq() will success.
>>
>> Issues we encountered:
>> 1. failed to map pirq for gsi
>> Problem: qemu will call xc_physdev_map_pirq() to map a passthrough device's 
>> gsi to pirq in
>> function xen_pt_realize(). But failed.
>>
>> Reason: According to the implement of xc_physdev_map_pirq(), it needs gsi 
>> instead of irq,
>> but qemu pass irq to it and treat irq as gsi, it is got from file
>> /sys/bus/pci/devices/:xx:xx.x/irq in function xen_host_pci_device_get(). 
>> But actually
>> the gsi number is not equal with irq. They are in different space.
>> ---
>>  hw/xen/xen_pt.c | 44 
>>  hw/xen/xen_pt.h |  1 +
>>  2 files changed, 45 insertions(+)
>>
>> diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
>> index 3635d1b39f79..7f8139d20915 100644
>> --- a/hw/xen/xen_pt.c
>> +++ b/hw/xen/xen_pt.c
>> @@ -766,6 +766,41 @@ static void xen_pt_destroy(PCIDevice *d) {
>>  }
>>  /* init */
>>  
>> +#define PCI_SBDF(seg, bus, dev, func) \
>> +uint32_t)(seg)) << 16) | \
>> +(PCI_BUILD_BDF(bus, PCI_DEVFN(dev, func
> 
> Nit: This macro looks generic and useful. Would it be better defined in
> include/hw/pci/pci.h?
> 
>> +
>> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 42000
>> +static bool xen_run_qemu_on_hvm(void)
> 
> This function name seems to imply "is qemu running on HVM?", but I think
> the question we're really trying to answer is whether the pcidev needs
> a GSI mapped. How about calling the function "xen_pt_needs_gsi" or
> similar?
> 
>> +{
>> +xc_domaininfo_t info;
>> +
>> +if (!xc_domain_getinfo_single(xen_xc, DOMID_RUN_QEMU, &info) &&
>> +(info.flags & XEN_DOMINF_hvm_guest)) {
> 
> I think reading /sys/hypervisor/guest_type would allow you to get the
> same information without another hypercall.
> 
>> +return true;
>> +}
>> +
>> +return false;
>> +}
>> +
>> +static int xen_map_pirq_for_gsi(PCIDevice *d, int *pirq)
> 
> Nit: s/xen_/xen_pt_/
> 
>> +{
>> +int gsi;
>> +XenPCIPassthroughState *s = XEN_PT_DEVICE(d);
>> +
>> +gsi = xc_pcidev_get_gsi(xen_xc,
>> +PCI_SBDF(s->real_device.domain,
>> + s->real_device.bus,
>> + s->real_device.dev,
>> +

Re: [QEMU PATCH v8] xen/passthrough: use gsi to map pirq when dom0 is PVH

2024-10-23 Thread Chen, Jiqian

On 2024/10/23 07:07, Marek Marczykowski-Górecki wrote:
> On Wed, Oct 16, 2024 at 02:28:27PM +0800, Jiqian Chen wrote:
>> --- a/hw/xen/xen_pt.h
>> +++ b/hw/xen/xen_pt.h
>> @@ -36,6 +36,7 @@ void xen_pt_log(const PCIDevice *d, const char *f, ...) 
>> G_GNUC_PRINTF(2, 3);
>>  #  define XEN_PT_LOG_CONFIG(d, addr, val, len)
>>  #endif
>>  
>> +#define DOMID_RUN_QEMU 0
> 
> Please, don't hardcode dom0 here, QEMU can be running in a stubdomain
> too (in which case, this will hilariously explode, as it will check what
> dom0 is, instead of where QEMU is running). 
> Stewart already provided an alternative.
Got it, will use Stewart's suggestion, thanks.

> 

-- 
Best regards,
Jiqian Chen.

Re: [PATCH v2 04/10] xen/arm: ffa: Fine granular call support

2024-10-23 Thread Jens Wiklander

Hi Bertrand,

On Wed, Oct 16, 2024 at 10:32 AM Bertrand Marquis
 wrote:
>
> Create a bitmap to store which feature is supported or not by the
> firmware and use it to filter which calls are done to the firmware.
>
> While there reoder ABI definition by numbers to easily find the min and
> max ones.
>
> Signed-off-by: Bertrand Marquis 
> ---
> Changes in v2:
> - rename fw_feat to abi and macros to FFA_ABI to be coherent with the
>   abi needed change done before
> - rework the macros to be simpler by directly defining MIN and MAX using
>   only Function ids
> - check that requested function ids do not go over the bitmap size in
>   ffa_fw_supports_fid
> - add an ASSERT to make sure that we do not try to set bits outside of
>   the bitmap
> - turn off FF-A if there is not firmware support and adapt the commit
>   message to reflect this
> - add a compile time check that FFA_ABI_MIN < FFA_ABI_MAX
> - remove spurious line removal
> - restore proper cleanup of rxtx init in case of error
> - reorder ABI by numbers
> ---
>  xen/arch/arm/tee/ffa.c  | 28 +++-
>  xen/arch/arm/tee/ffa_notif.c|  7 ++
>  xen/arch/arm/tee/ffa_partinfo.c | 30 +-
>  xen/arch/arm/tee/ffa_private.h  | 38 -
>  xen/arch/arm/tee/ffa_rxtx.c |  4 
>  xen/arch/arm/tee/ffa_shm.c  | 12 +++
>  6 files changed, 103 insertions(+), 16 deletions(-)

Looks good.
Reviewed-by: Jens Wiklander 

Cheers,
Jens

>
> diff --git a/xen/arch/arm/tee/ffa.c b/xen/arch/arm/tee/ffa.c
> index 1ee6b2895e92..267d4435ac08 100644
> --- a/xen/arch/arm/tee/ffa.c
> +++ b/xen/arch/arm/tee/ffa.c
> @@ -72,7 +72,10 @@
>  #include "ffa_private.h"
>
>  /* Negotiated FF-A version to use with the SPMC, 0 if not there or supported 
> */
> -static uint32_t __ro_after_init ffa_fw_version;
> +uint32_t __ro_after_init ffa_fw_version;
> +
> +/* Features supported by the SPMC or secure world when present */
> +DECLARE_BITMAP(ffa_fw_abi_supported, FFA_ABI_BITMAP_SIZE);
>
>  struct ffa_fw_abi {
>  const uint32_t id;
> @@ -177,6 +180,13 @@ static void handle_msg_send_direct_req(struct 
> cpu_user_regs *regs, uint32_t fid)
>  else
>  mask = GENMASK_ULL(31, 0);
>
> +if ( !ffa_fw_supports_fid(fid) )
> +{
> +resp.a0 = FFA_ERROR;
> +resp.a2 = FFA_RET_NOT_SUPPORTED;
> +goto out;
> +}
> +
>  src_dst = get_user_reg(regs, 1);
>  if ( (src_dst >> 16) != ffa_get_vm_id(d) )
>  {
> @@ -577,19 +587,16 @@ static bool ffa_probe(void)
>  else
>  ffa_fw_version = vers;
>
> -/*
> - * At the moment domains must support the same features used by Xen.
> - * TODO: Rework the code to allow domain to use a subset of the
> - * features supported.
> - */
>  for ( unsigned int i = 0; i < ARRAY_SIZE(ffa_fw_abi_needed); i++ )
>  {
> -if ( !ffa_abi_supported(ffa_fw_abi_needed[i].id) )
> -{
> +ASSERT(FFA_ABI_BITNUM(ffa_fw_abi_needed[i].id) < 
> FFA_ABI_BITMAP_SIZE);
> +
> +if ( ffa_abi_supported(ffa_fw_abi_needed[i].id) )
> +set_bit(FFA_ABI_BITNUM(ffa_fw_abi_needed[i].id),
> +ffa_fw_abi_supported);
> +else
>  printk(XENLOG_INFO "ARM FF-A Firmware does not support %s\n",
> ffa_fw_abi_needed[i].name);
> -goto err_no_fw;
> -}
>  }
>
>  if ( !ffa_rxtx_init() )
> @@ -611,6 +618,7 @@ err_rxtx_destroy:
>  ffa_rxtx_destroy();
>  err_no_fw:
>  ffa_fw_version = 0;
> +bitmap_zero(ffa_fw_abi_supported, FFA_ABI_BITMAP_SIZE);
>  printk(XENLOG_WARNING "ARM FF-A No firmware support\n");
>
>  return false;
> diff --git a/xen/arch/arm/tee/ffa_notif.c b/xen/arch/arm/tee/ffa_notif.c
> index 541e61d2f606..4b3e46318f4b 100644
> --- a/xen/arch/arm/tee/ffa_notif.c
> +++ b/xen/arch/arm/tee/ffa_notif.c
> @@ -377,6 +377,13 @@ void ffa_notif_init(void)
>  unsigned int irq;
>  int ret;
>
> +/* Only enable fw notification if all ABIs we need are supported */
> +if ( !(ffa_fw_supports_fid(FFA_NOTIFICATION_BITMAP_CREATE) &&
> +   ffa_fw_supports_fid(FFA_NOTIFICATION_BITMAP_DESTROY) &&
> +   ffa_fw_supports_fid(FFA_NOTIFICATION_GET) &&
> +   ffa_fw_supports_fid(FFA_NOTIFICATION_INFO_GET_64)) )
> +return;
> +
>  arm_smccc_1_2_smc(&arg, &resp);
>  if ( resp.a0 != FFA_SUCCESS_32 )
>  return;
> diff --git a/xen/arch/arm/tee/ffa_partinfo.c b/xen/arch/arm/tee/ffa_partinfo.c
> index 93a03c6bc672..99c48f0e5c05 100644
> --- a/xen/arch/arm/tee/ffa_partinfo.c
> +++ b/xen/arch/arm/tee/ffa_partinfo.c
> @@ -77,7 +77,15 @@ int32_t ffa_handle_partition_info_get(uint32_t w1, 
> uint32_t w2, uint32_t w3,
>   */
>  if ( w5 == FFA_PARTITION_INFO_GET_COUNT_FLAG &&
>   ctx->guest_vers == FFA_VERSION_1_1 )
> -return ffa_partition_info_get(w1, w2, w3, w4, w5, count, fpi_size);
> +{
> +if ( ffa_fw_supports_fid(FFA_PARTITION

[PATCH 1/3] x86/boot: Add a temporary module_map pointer to boot_image

2024-10-23 Thread Andrew Cooper

... in order to untangle parameter handling independently from other other
logic changes.

Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Roger Pau Monné 
CC: Daniel P. Smith 
---
 xen/arch/x86/include/asm/bootinfo.h | 1 +
 xen/arch/x86/setup.c| 1 +
 2 files changed, 2 insertions(+)

diff --git a/xen/arch/x86/include/asm/bootinfo.h 
b/xen/arch/x86/include/asm/bootinfo.h
index ffa443406747..6237da7e4d86 100644
--- a/xen/arch/x86/include/asm/bootinfo.h
+++ b/xen/arch/x86/include/asm/bootinfo.h
@@ -31,6 +31,7 @@ struct boot_info {
 size_t memmap_length;
 
 unsigned int nr_modules;
+unsigned long *module_map; /* Temporary */
 struct boot_module mods[MAX_NR_BOOTMODS + 1];
 };
 
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index c5b37bd2112e..d8001867c925 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1086,6 +1086,7 @@ void asmlinkage __init noreturn __start_xen(unsigned long 
mbi_p)
 }
 
 bi = multiboot_fill_boot_info(mbi, mod);
+bi->module_map = module_map; /* Temporary */
 
 /* Parse the command-line options. */
 if ( (kextra = strstr(bi->cmdline, " -- ")) != NULL )
-- 
2.39.5

[PATCH 0/3] x86/boot: Yet more PVH module handling fixes

2024-10-23 Thread Andrew Cooper

It turns out microcode and XSM module handling has been broken ever since PVH
boot was introduced.  Both appear to have have gone unnoticed because their
functionality isn't typically used in PVH guests.

https://gitlab.com/xen-project/people/andyhhp/xen/-/pipelines/1508716668

Andrew Cooper (1):
  x86/boot: Add a temporary module_map pointer to boot_image

Daniel P. Smith (2):
  x86/boot: Fix microcode module handling during PVH boot
  x86/boot: Fix XSM module handling during PVH boot

 xen/arch/x86/cpu/microcode/core.c| 40 +++-
 xen/arch/x86/include/asm/bootinfo.h  |  1 +
 xen/arch/x86/include/asm/microcode.h |  8 +++---
 xen/arch/x86/setup.c | 10 ---
 xen/include/xsm/xsm.h| 12 -
 xen/xsm/xsm_core.c   |  7 +++--
 xen/xsm/xsm_policy.c | 16 +--
 7 files changed, 43 insertions(+), 51 deletions(-)

-- 
2.39.5

[PATCH 3/3] x86/boot: Fix XSM module handling during PVH boot

2024-10-23 Thread Andrew Cooper

From: "Daniel P. Smith" 

As detailed in commit 0fe607b2a144 ("x86/boot: Fix PVH boot during boot_info
transition period"), the use of __va(mbi->mods_addr) constitutes a
use-after-free on the PVH boot path.

This pattern has been in use since before PVH support was added.  This has
most likely gone unnoticed because no-one's tried using a detached Flask
policy in a PVH VM before.

Plumb the boot_info pointer down, replacing module_map and mbi.  Importantly,
bi->mods[].mod is a safe way to access the module list during PVH boot.

As this is the final non-bi use of mbi in __start_xen(), make the pointer
unusable once bi has been established, to prevent new uses creeping back in.
This is a stopgap until mbi can be fully removed.

Signed-off-by: Daniel P. Smith 
Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Roger Pau Monné 
CC: Daniel P. Smith 

Refectored/extracted from the hyperlaunch series.

There's no good Fixes tag for this, because it can't reasonably be "introduce
PVH", nor can the fix as-is reasonably be backported.  If we want to fix the
bug in older trees, we need to plumb the mod pointer down alongside mbi.
---
 xen/arch/x86/setup.c  |  5 -
 xen/include/xsm/xsm.h | 12 +---
 xen/xsm/xsm_core.c|  7 +++
 xen/xsm/xsm_policy.c  | 16 +++-
 4 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index c75b8f15fa5d..8974b0c6ede6 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1088,6 +1088,9 @@ void asmlinkage __init noreturn __start_xen(unsigned long 
mbi_p)
 bi = multiboot_fill_boot_info(mbi, mod);
 bi->module_map = module_map; /* Temporary */
 
+/* Use bi-> instead */
+#define mbi DO_NOT_USE
+
 /* Parse the command-line options. */
 if ( (kextra = strstr(bi->cmdline, " -- ")) != NULL )
 {
@@ -1862,7 +1865,7 @@ void asmlinkage __init noreturn __start_xen(unsigned long 
mbi_p)
 mmio_ro_ranges = rangeset_new(NULL, "r/o mmio ranges",
   RANGESETF_prettyprint_hex);
 
-xsm_multiboot_init(module_map, mbi);
+xsm_multiboot_init(bi);
 
 /*
  * IOMMU-related ACPI table parsing may require some of the system domains
diff --git a/xen/include/xsm/xsm.h b/xen/include/xsm/xsm.h
index 627c0d2731af..4dbff9d866e0 100644
--- a/xen/include/xsm/xsm.h
+++ b/xen/include/xsm/xsm.h
@@ -17,7 +17,6 @@
 
 #include 
 #include 
-#include 
 
 /* policy magic number (defined by XSM_MAGIC) */
 typedef uint32_t xsm_magic_t;
@@ -778,11 +777,10 @@ static inline int xsm_argo_send(const struct domain *d, 
const struct domain *t)
 #endif /* XSM_NO_WRAPPERS */
 
 #ifdef CONFIG_MULTIBOOT
-int xsm_multiboot_init(
-unsigned long *module_map, const multiboot_info_t *mbi);
+struct boot_info;
+int xsm_multiboot_init(struct boot_info *bi);
 int xsm_multiboot_policy_init(
-unsigned long *module_map, const multiboot_info_t *mbi,
-void **policy_buffer, size_t *policy_size);
+struct boot_info *bi, void **policy_buffer, size_t *policy_size);
 #endif
 
 #ifdef CONFIG_HAS_DEVICE_TREE
@@ -828,8 +826,8 @@ static const inline struct xsm_ops *silo_init(void)
 #include 
 
 #ifdef CONFIG_MULTIBOOT
-static inline int xsm_multiboot_init (
-unsigned long *module_map, const multiboot_info_t *mbi)
+struct boot_info;
+static inline int xsm_multiboot_init(struct boot_info *bi)
 {
 return 0;
 }
diff --git a/xen/xsm/xsm_core.c b/xen/xsm/xsm_core.c
index eaa028109bde..6e3fac68c057 100644
--- a/xen/xsm/xsm_core.c
+++ b/xen/xsm/xsm_core.c
@@ -21,6 +21,7 @@
 #ifdef CONFIG_XSM
 
 #ifdef CONFIG_MULTIBOOT
+#include 
 #include 
 #endif
 
@@ -139,8 +140,7 @@ static int __init xsm_core_init(const void *policy_buffer, 
size_t policy_size)
 }
 
 #ifdef CONFIG_MULTIBOOT
-int __init xsm_multiboot_init(
-unsigned long *module_map, const multiboot_info_t *mbi)
+int __init xsm_multiboot_init(struct boot_info *bi)
 {
 int ret = 0;
 void *policy_buffer = NULL;
@@ -150,8 +150,7 @@ int __init xsm_multiboot_init(
 
 if ( XSM_MAGIC )
 {
-ret = xsm_multiboot_policy_init(module_map, mbi, &policy_buffer,
-&policy_size);
+ret = xsm_multiboot_policy_init(bi, &policy_buffer, &policy_size);
 if ( ret )
 {
 bootstrap_map(NULL);
diff --git a/xen/xsm/xsm_policy.c b/xen/xsm/xsm_policy.c
index 8dafbc93810f..6f799dd28f5b 100644
--- a/xen/xsm/xsm_policy.c
+++ b/xen/xsm/xsm_policy.c
@@ -20,7 +20,7 @@
 
 #include 
 #ifdef CONFIG_MULTIBOOT
-#include 
+#include 
 #include 
 #endif
 #include 
@@ -31,11 +31,9 @@
 
 #ifdef CONFIG_MULTIBOOT
 int __init xsm_multiboot_policy_init(
-unsigned long *module_map, const multiboot_info_t *mbi,
-void **policy_buffer, size_t *policy_size)
+struct boot_info *bi, void **policy_buffer, size_t *policy_size)
 {
 int i;
-module_t *mod = (module_t *)__va(mbi->mods_addr);
 int rc = 0;
 u32 *_policy_start;
 unsigned long _policy_len;
@@ -44,13 +4

[PATCH 2/3] x86/boot: Fix microcode module handling during PVH boot

2024-10-23 Thread Andrew Cooper

From: "Daniel P. Smith" 

As detailed in commit 0fe607b2a144 ("x86/boot: Fix PVH boot during boot_info
transition period"), the use of __va(mbi->mods_addr) constitutes a
use-after-free on the PVH boot path.

This pattern has been in use since before PVH support was added.  Inside a PVH
VM, it will go unnoticed as long as the microcode container parser doesn't
choke on the random data it finds.

The use within early_microcode_init() happens to be safe because it's prior to
move_xen().  microcode_init_cache() is after move_xen(), and therefore unsafe.

Plumb the boot_info pointer down, replacing module_map and mbi.  Importantly,
bi->mods[].mod is a safe way to access the module list during PVH boot.

Note: microcode_scan_module() is still bogusly stashing a bootstrap_map()'d
  pointer in ucode_blob.data, which constitutes a different
  use-after-free, and only works in general because of a second bug.  This
  is unrelated to PVH, and needs untangling differently.

Signed-off-by: Daniel P. Smith 
Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Roger Pau Monné 
CC: Daniel P. Smith 

Refectored/extracted from the hyperlaunch series.

There's no good Fixes tag for this, because it can't reasonably be "introduce
PVH", nor can the fix as-is reasonably be backported.  If we want to fix the
bug in older trees, we need to plumb the mod pointer down alongside mbi.
---
 xen/arch/x86/cpu/microcode/core.c| 40 +++-
 xen/arch/x86/include/asm/microcode.h |  8 +++---
 xen/arch/x86/setup.c |  4 +--
 3 files changed, 22 insertions(+), 30 deletions(-)

diff --git a/xen/arch/x86/cpu/microcode/core.c 
b/xen/arch/x86/cpu/microcode/core.c
index 8564e4d2c94c..1d58cb0f3bc1 100644
--- a/xen/arch/x86/cpu/microcode/core.c
+++ b/xen/arch/x86/cpu/microcode/core.c
@@ -35,6 +35,7 @@
 #include 
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -152,11 +153,8 @@ static int __init cf_check parse_ucode(const char *s)
 }
 custom_param("ucode", parse_ucode);
 
-static void __init microcode_scan_module(
-unsigned long *module_map,
-const multiboot_info_t *mbi)
+static void __init microcode_scan_module(struct boot_info *bi)
 {
-module_t *mod = (module_t *)__va(mbi->mods_addr);
 uint64_t *_blob_start;
 unsigned long _blob_size;
 struct cpio_data cd;
@@ -178,13 +176,13 @@ static void __init microcode_scan_module(
 /*
  * Try all modules and see whichever could be the microcode blob.
  */
-for ( i = 1 /* Ignore dom0 kernel */; i < mbi->mods_count; i++ )
+for ( i = 1 /* Ignore dom0 kernel */; i < bi->nr_modules; i++ )
 {
-if ( !test_bit(i, module_map) )
+if ( !test_bit(i, bi->module_map) )
 continue;
 
-_blob_start = bootstrap_map(&mod[i]);
-_blob_size = mod[i].mod_end;
+_blob_start = bootstrap_map(bi->mods[i].mod);
+_blob_size = bi->mods[i].mod->mod_end;
 if ( !_blob_start )
 {
 printk("Could not map multiboot module #%d (size: %ld)\n",
@@ -204,21 +202,17 @@ static void __init microcode_scan_module(
 }
 }
 
-static void __init microcode_grab_module(
-unsigned long *module_map,
-const multiboot_info_t *mbi)
+static void __init microcode_grab_module(struct boot_info *bi)
 {
-module_t *mod = (module_t *)__va(mbi->mods_addr);
-
 if ( ucode_mod_idx < 0 )
-ucode_mod_idx += mbi->mods_count;
-if ( ucode_mod_idx <= 0 || ucode_mod_idx >= mbi->mods_count ||
- !__test_and_clear_bit(ucode_mod_idx, module_map) )
+ucode_mod_idx += bi->nr_modules;
+if ( ucode_mod_idx <= 0 || ucode_mod_idx >= bi->nr_modules ||
+ !__test_and_clear_bit(ucode_mod_idx, bi->module_map) )
 goto scan;
-ucode_mod = mod[ucode_mod_idx];
+ucode_mod = *bi->mods[ucode_mod_idx].mod;
 scan:
 if ( ucode_scan )
-microcode_scan_module(module_map, mbi);
+microcode_scan_module(bi);
 }
 
 static struct microcode_ops __ro_after_init ucode_ops;
@@ -822,8 +816,7 @@ static int __init early_update_cache(const void *data, 
size_t len)
 return rc;
 }
 
-int __init microcode_init_cache(unsigned long *module_map,
-const struct multiboot_info *mbi)
+int __init microcode_init_cache(struct boot_info *bi)
 {
 int rc = 0;
 
@@ -832,7 +825,7 @@ int __init microcode_init_cache(unsigned long *module_map,
 
 if ( ucode_scan )
 /* Need to rescan the modules because they might have been relocated */
-microcode_scan_module(module_map, mbi);
+microcode_scan_module(bi);
 
 if ( ucode_mod.mod_end )
 rc = early_update_cache(bootstrap_map(&ucode_mod),
@@ -878,8 +871,7 @@ static int __init early_microcode_update_cpu(void)
 return microcode_update_cpu(patch, 0);
 }
 
-int __init early_microcode_init(unsigned long *module_map,
-const struct multiboot_info *mbi)
+int __init early_microcode_init(struct boot_info *bi)
 {

Re: [PATCH] x86/boot: Fix PVH boot during boot_info transition period

2024-10-23 Thread Alan Robinson

Hi Andrew,

A small suggestion for the commit log..

On Tue, Oct 22, 2024 at 12:41:14PM +, Andrew Cooper wrote:
> 
> multiboot_fill_boot_info() taking the physical address of the multiboot_info
> structure leads to a subtle use-after-free on the PVH path, with rather less
> subtle fallout.
> 
> The pointers used by __start_xen(), mbi and mod, are either:
> 
>  - MB:  Directmap pointers into the trampoline, or
>  - PVH: Xen pointers into .initdata, or
>  - EFI: Directmap pointers into Xen.
> 
> Critically, these either remain valid across move_xen() (MB, PVH), or rely on
> move_xen() being inhibited (EFI).
> 
> The conversion to multiboot_fill_boot_info(), taking only mbi_p, makes the PVH
> path use directmap pointers into Xen, as well as move_xen() which invalidates
> said pointers.
> 
> Switch multiboot_fill_boot_info() to consume the same virtual addresses that
> __start_xen() currently uses.  This keeps all the pointers valid for the
> duration of __start_xen(), for all boot protocols.
> 
> It can be safely untangled once multiboot_fill_boot_info() takes a full copy
> the multiboot info data, and __start_xen() has been moved over to using the

 of the multiboot info data, and __start_xen() has been moved over to using the

> new boot_info consistently.
> 

Alan

Re: [PATCH 1/3] x86/boot: Add a temporary module_map pointer to boot_image

2024-10-23 Thread Daniel P. Smith


On 10/23/24 06:57, Andrew Cooper wrote:

... in order to untangle parameter handling independently from other other
logic changes.

Signed-off-by: Andrew Cooper 
---


Reviewed-by: Daniel P. Smith

Re: [PATCH 3/3] x86/boot: Fix XSM module handling during PVH boot

2024-10-23 Thread Daniel P. Smith


On 10/23/24 06:57, Andrew Cooper wrote:

From: "Daniel P. Smith" 

As detailed in commit 0fe607b2a144 ("x86/boot: Fix PVH boot during boot_info
transition period"), the use of __va(mbi->mods_addr) constitutes a
use-after-free on the PVH boot path.

This pattern has been in use since before PVH support was added.  This has
most likely gone unnoticed because no-one's tried using a detached Flask
policy in a PVH VM before.

Plumb the boot_info pointer down, replacing module_map and mbi.  Importantly,
bi->mods[].mod is a safe way to access the module list during PVH boot.

As this is the final non-bi use of mbi in __start_xen(), make the pointer
unusable once bi has been established, to prevent new uses creeping back in.
This is a stopgap until mbi can be fully removed.

Signed-off-by: Daniel P. Smith 
Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Roger Pau Monné 
CC: Daniel P. Smith 


Reviewed-by: Daniel P. Smith

Re: [PATCH 2/3] x86/boot: Fix microcode module handling during PVH boot

2024-10-23 Thread Daniel P. Smith


On 10/23/24 06:57, Andrew Cooper wrote:

From: "Daniel P. Smith" 

As detailed in commit 0fe607b2a144 ("x86/boot: Fix PVH boot during boot_info
transition period"), the use of __va(mbi->mods_addr) constitutes a
use-after-free on the PVH boot path.

This pattern has been in use since before PVH support was added.  Inside a PVH
VM, it will go unnoticed as long as the microcode container parser doesn't
choke on the random data it finds.

The use within early_microcode_init() happens to be safe because it's prior to
move_xen().  microcode_init_cache() is after move_xen(), and therefore unsafe.

Plumb the boot_info pointer down, replacing module_map and mbi.  Importantly,
bi->mods[].mod is a safe way to access the module list during PVH boot.

Note: microcode_scan_module() is still bogusly stashing a bootstrap_map()'d
   pointer in ucode_blob.data, which constitutes a different
   use-after-free, and only works in general because of a second bug.  This
   is unrelated to PVH, and needs untangling differently.

Signed-off-by: Daniel P. Smith 
Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Roger Pau Monné 
CC: Daniel P. Smith 


Reviewed-by: Daniel P. Smith

Re: [PATCH v2 05/10] xen/arm: ffa: Rework partition info get

2024-10-23 Thread Jens Wiklander

Hi Bertrand,

On Wed, Oct 16, 2024 at 10:32 AM Bertrand Marquis
 wrote:
>
> Rework the partition info get implementation to use the correct size of
> structure depending on the version of the protocol and simplifies the
> structure copy to use only memcpy and prevent recreating the structure
> each time.
> The goal here is to have an implementation that will be easier to
> maintain in the long term as the specification is only adding fields to
> structure with versions to simplify support of several protocol
> versions and as such an SPMC implementation in the future could use this
> and return a size higher than the one we expect.
> The patch is fixing the part_info_get function for this and the
> subscriber discovery on probe.
>
> No functional changes expected.
>
> Signed-off-by: Bertrand Marquis 
> ---
> Changes in v2:
> - rebase
> ---
>  xen/arch/arm/tee/ffa.c  |  13 +--
>  xen/arch/arm/tee/ffa_partinfo.c | 185 
>  xen/arch/arm/tee/ffa_private.h  |   4 +-
>  3 files changed, 118 insertions(+), 84 deletions(-)

Looks good.
Reviewed-by: Jens Wiklander 

Cheers,
Jens

>
> diff --git a/xen/arch/arm/tee/ffa.c b/xen/arch/arm/tee/ffa.c
> index 267d4435ac08..4b55e8b48f01 100644
> --- a/xen/arch/arm/tee/ffa.c
> +++ b/xen/arch/arm/tee/ffa.c
> @@ -311,8 +311,6 @@ static bool ffa_handle_call(struct cpu_user_regs *regs)
>  uint32_t fid = get_user_reg(regs, 0);
>  struct domain *d = current->domain;
>  struct ffa_ctx *ctx = d->arch.tee;
> -uint32_t fpi_size;
> -uint32_t count;
>  int e;
>
>  if ( !ctx )
> @@ -338,16 +336,7 @@ static bool ffa_handle_call(struct cpu_user_regs *regs)
>  e = ffa_handle_rxtx_unmap();
>  break;
>  case FFA_PARTITION_INFO_GET:
> -e = ffa_handle_partition_info_get(get_user_reg(regs, 1),
> -  get_user_reg(regs, 2),
> -  get_user_reg(regs, 3),
> -  get_user_reg(regs, 4),
> -  get_user_reg(regs, 5), &count,
> -  &fpi_size);
> -if ( e )
> -ffa_set_regs_error(regs, e);
> -else
> -ffa_set_regs_success(regs, count, fpi_size);
> +ffa_handle_partition_info_get(regs);
>  return true;
>  case FFA_RX_RELEASE:
>  e = ffa_handle_rx_release();
> diff --git a/xen/arch/arm/tee/ffa_partinfo.c b/xen/arch/arm/tee/ffa_partinfo.c
> index 99c48f0e5c05..75a073d090e0 100644
> --- a/xen/arch/arm/tee/ffa_partinfo.c
> +++ b/xen/arch/arm/tee/ffa_partinfo.c
> @@ -33,21 +33,24 @@ static uint16_t subscr_vm_created_count __read_mostly;
>  static uint16_t *subscr_vm_destroyed __read_mostly;
>  static uint16_t subscr_vm_destroyed_count __read_mostly;
>
> -static int32_t ffa_partition_info_get(uint32_t w1, uint32_t w2, uint32_t w3,
> -  uint32_t w4, uint32_t w5, uint32_t 
> *count,
> -  uint32_t *fpi_size)
> +static int32_t ffa_partition_info_get(uint32_t *uuid, uint32_t flags,
> +  uint32_t *count, uint32_t *fpi_size)
>  {
> -const struct arm_smccc_1_2_regs arg = {
> +struct arm_smccc_1_2_regs arg = {
>  .a0 = FFA_PARTITION_INFO_GET,
> -.a1 = w1,
> -.a2 = w2,
> -.a3 = w3,
> -.a4 = w4,
> -.a5 = w5,
> +.a5 = flags,
>  };
>  struct arm_smccc_1_2_regs resp;
>  uint32_t ret;
>
> +if ( uuid )
> +{
> +arg.a1 = uuid[0];
> +arg.a2 = uuid[1];
> +arg.a3 = uuid[2];
> +arg.a4 = uuid[3];
> +}
> +
>  arm_smccc_1_2_smc(&arg, &resp);
>
>  ret = ffa_get_ret_code(&resp);
> @@ -60,13 +63,31 @@ static int32_t ffa_partition_info_get(uint32_t w1, 
> uint32_t w2, uint32_t w3,
>  return ret;
>  }
>
> -int32_t ffa_handle_partition_info_get(uint32_t w1, uint32_t w2, uint32_t w3,
> -  uint32_t w4, uint32_t w5, uint32_t 
> *count,
> -  uint32_t *fpi_size)
> +void ffa_handle_partition_info_get(struct cpu_user_regs *regs)
>  {
> -int32_t ret = FFA_RET_DENIED;
> +int32_t ret;
>  struct domain *d = current->domain;
>  struct ffa_ctx *ctx = d->arch.tee;
> +uint32_t flags = get_user_reg(regs, 5);
> +uint32_t uuid[4] = {
> +get_user_reg(regs, 1),
> +get_user_reg(regs, 2),
> +get_user_reg(regs, 3),
> +get_user_reg(regs, 4),
> +};
> +uint32_t src_size, dst_size;
> +void *dst_buf;
> +uint32_t ffa_sp_count = 0;
> +
> +/*
> + * If the guest is v1.0, he does not get back the entry size so we must
> + * use the v1.0 structure size in the destination buffer.
> + * Otherwise use the size of the highest version we support, here 1.1.
> + */
> +if ( ctx->guest_vers == FFA_VERSION_1_0 )
> +dst_size = sizeof

[PATCH] x86/ucode: Explain what microcode_set_module() does

2024-10-23 Thread Andrew Cooper

Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Roger Pau Monné 

I found this hiding in other microcode changes, and decided it was high time
it got included.
---
 xen/arch/x86/cpu/microcode/core.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/xen/arch/x86/cpu/microcode/core.c 
b/xen/arch/x86/cpu/microcode/core.c
index 8564e4d2c94c..dc2c064cf176 100644
--- a/xen/arch/x86/cpu/microcode/core.c
+++ b/xen/arch/x86/cpu/microcode/core.c
@@ -108,6 +108,10 @@ static bool ucode_in_nmi = true;
 /* Protected by microcode_mutex */
 static const struct microcode_patch *microcode_cache;
 
+/*
+ * Used by the EFI path only, when xen.cfg identifies an explicit microcode
+ * file.  Overrides ucode=|scan on the regular command line.
+ */
 void __init microcode_set_module(unsigned int idx)
 {
 ucode_mod_idx = idx;

base-commit: be84e7fe58b51f6b6dd907a038f0ef998a1e281e
prerequisite-patch-id: ef20898eb25a7ca1ea2d7b1d676f00b91b46d5f6
prerequisite-patch-id: e0d0c0acbe4864a00451187ef7232dcaf10b2477
prerequisite-patch-id: f6010b4a6e0b43ac837aea470b3b5e5f390ee3b2
-- 
2.39.5

Re: [PATCH 4/3] x86/boot: Remove the mbi_p parameter from __start_xen()

2024-10-23 Thread Andrew Cooper

On 23/10/2024 3:58 pm, Daniel P. Smith wrote:
> On 10/23/24 10:44, Andrew Cooper wrote:
>> The use of physical addresses in __start_xen() has proved to be
>> fertile soure
>> of bugs.
>>
>> The MB1/2 path stashes the MBI pointer in multiboot_ptr (a setup.c
>> variable
>> even), then re-loads it immediately before calling __start_xen(). 
>> For this,
>> we can just drop the function parameter and read multiboot_ptr in the
>> one
>> place it's used.
>>
>> The EFI path also passes this parameter into __start_xen().  Have the
>> EFI path
>> set up multiboot_ptr too, and move the explanation of phyiscal-mode
>> pointers.
>>
>> No functional change.
>>
>> Signed-off-by: Andrew Cooper 
>> ---
>> CC: Jan Beulich 
>> CC: Roger Pau Monné 
>> CC: Marek Marczykowski-Górecki 
>> CC: Daniel P. Smith 
>
> For EFI:
> Acked-by: Daniel P. Smith 
>
> For remainder:
> Reviewed-by: Daniel P. Smith 

Thanks.  That's usually just R-by then.

In Xen, we take A-by to mean "sure, whatever", and R-by to mean "I've
looked at this in detail and think it's good".

R-by implies A-by, and either are fine from a maintainership point of view.

~Andrew

Re: [PATCH v3 5/6] xen/arm: mpu: Enable MPU

2024-10-23 Thread Julien Grall





On 23/10/2024 17:18, Julien Grall wrote:



On 23/10/2024 17:13, Julien Grall wrote:



On 23/10/2024 17:06, Ayan Kumar Halder wrote:

Hi Luca/Julien,

On 22/10/2024 17:31, Luca Fancellu wrote:

Hi Julien,


On 22 Oct 2024, at 14:13, Julien Grall  wrote:



On 22/10/2024 10:56, Luca Fancellu wrote:

On 22 Oct 2024, at 10:47, Julien Grall  wrote:

Hi Luca,

On 22/10/2024 10:41, Luca Fancellu wrote:

On 21 Oct 2024, at 17:27, Julien Grall  wrote:
2) dsb+isb barrier after enabling the MPU, since we are enabling 
the MPU but also because we are disabling the background region
TBH, I don't understand this one. Why would disabling the 
background region requires a dsb + isb? Do you have any pointer 
in the Armv8-R specification?
I’m afraid this is only my deduction, Section C1.4 of the Arm® 
Architecture Reference Manual Supplement Armv8, for R-profile 
AArch64 architecture,
(DDI 0600B.a) explains what is the background region, it says it 
implements physical address range(s), so when we disable it, we 
would like any data
access to complete before changing this implementation defined 
range with the ranges defined by us tweaking PRBAR/PRLAR, am I right?

My mental model for the ordering is similar to a TLB flush which is:
   1/ dsb nsh
   2/ tlbi
   3/ dsb nsh
   4/ isb

Enabling the MPU is effectively 2. AFAIK, 1 is only necessary to 
ensure the update to the page-tables. But it is not a requirement 
to ensure any data access are completed (otherwise, we would have 
needed a dsb sy because we don't know how far the access are 
going...).
Uhm… I’m not sure we are on the same page, probably I explained that 
wrongly, so my point is that:


FUNC_LOCAL(enable_mpu)
 mrs   x0, SCTLR_EL2
 bic   x0, x0, #SCTLR_ELx_BR   /* Disable Background region */
 orr   x0, x0, #SCTLR_Axx_ELx_M    /* Enable MPU */
 orr   x0, x0, #SCTLR_Axx_ELx_C    /* Enable D-cache */
 orr   x0, x0, #SCTLR_Axx_ELx_WXN  /* Enable WXN */
 dsb   sy
 ^— This data barrier is needed in order to complete any data 
access, which guarantees that all explicit memory accesses before
    this instruction complete, so we can safely turn ON the 
MPU and disable the background region.


I think


Sorry I fat fingered and pressed send too early. I think this is the 
part I disagree with. All explicit accesses don't need to be complete 
(in the sense observed by everyone in the system). They only need to 
have gone through the permissions check.


I think I managed to find again the wording that would justify why a 
"dsb" is not necessary for the permission checks. From ARM DDI 0487K.a 
D23-7349:


```
Direct writes using the instructions in Table D22-2 require 
synchronization before software can rely on the effects
of changes to the System registers to affect instructions appearing in 
program order after the direct write to the
System register. Direct writes to these registers are not allowed to 
affect any instructions appearing in program order

before the direct write.
```

I understand the paragraph as enabling the MPU via SCTLR_EL2 will not 
affect any instruction before hand.


Cheers,

--
Julien Grall

Re: [PATCH 02/13] ALSA: hda_intel: Use always-managed version of pcim_intx()

2024-10-23 Thread Philipp Stanner

On Tue, 2024-10-22 at 16:08 +0200, Takashi Iwai wrote:
> On Tue, 15 Oct 2024 20:51:12 +0200,
> Philipp Stanner wrote:
> > 
> > pci_intx() is a hybrid function which can sometimes be managed
> > through
> > devres. To remove this hybrid nature from pci_intx(), it is
> > necessary to
> > port users to either an always-managed or a never-managed version.
> > 
> > hda_intel enables its PCI-Device with pcim_enable_device(). Thus,
> > it needs
> > the always-managed version.
> > 
> > Replace pci_intx() with pcim_intx().
> > 
> > Signed-off-by: Philipp Stanner 
> > ---
> >  sound/pci/hda/hda_intel.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
> > index b4540c5cd2a6..b44ca7b6e54f 100644
> > --- a/sound/pci/hda/hda_intel.c
> > +++ b/sound/pci/hda/hda_intel.c
> > @@ -786,7 +786,7 @@ static int azx_acquire_irq(struct azx *chip,
> > int do_disconnect)
> >     }
> >     bus->irq = chip->pci->irq;
> >     chip->card->sync_irq = bus->irq;
> > -   pci_intx(chip->pci, !chip->msi);
> > +   pcim_intx(chip->pci, !chip->msi);
> >     return 0;
> >  }
> >  
> 
> Hm, it's OK-ish to do this as it's practically same as what
> pci_intx()
> currently does.  But, the current code can be a bit inconsistent
> about
> the original intx value.  pcim_intx() always stores !enable to
> res->orig_intx unconditionally, and it means that the orig_intx value
> gets overridden at each time pcim_intx() gets called.

Yes.

> 
> Meanwhile, HD-audio driver does release and re-acquire the interrupt
> after disabling MSI when something goes wrong, and pci_intx() call
> above is a part of that procedure.  So, it can rewrite the
> res->orig_intx to another value by retry without MSI.  And after the
> driver removal, it'll lead to another state.

I'm not sure that I understand this paragraph completely. Still, could
a solution for the driver on the long-term just be to use pci_intx()?

> 
> In anyway, as it doesn't change the current behavior, feel free to
> take my ack for now:
> 
> Acked-by: Takashi Iwai 

Thank you,
P.

> 
> 
> thanks,
> 
> Takashi
>

Re: [PATCH] x86/ucode: Explain what microcode_set_module() does

2024-10-23 Thread Alejandro Vallejo

On Wed Oct 23, 2024 at 1:28 PM BST, Andrew Cooper wrote:
> Signed-off-by: Andrew Cooper 

  Reviewed-by: Alejandro Vallejo 

With a single nit that I don't care much about, but...

> ---
> CC: Jan Beulich 
> CC: Roger Pau Monné 
>
> I found this hiding in other microcode changes, and decided it was high time
> it got included.
> ---
>  xen/arch/x86/cpu/microcode/core.c | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/xen/arch/x86/cpu/microcode/core.c 
> b/xen/arch/x86/cpu/microcode/core.c
> index 8564e4d2c94c..dc2c064cf176 100644
> --- a/xen/arch/x86/cpu/microcode/core.c
> +++ b/xen/arch/x86/cpu/microcode/core.c
> @@ -108,6 +108,10 @@ static bool ucode_in_nmi = true;
>  /* Protected by microcode_mutex */
>  static const struct microcode_patch *microcode_cache;
>  
> +/*
> + * Used by the EFI path only, when xen.cfg identifies an explicit microcode
> + * file.  Overrides ucode=|scan on the regular command line.
> + */

... this it would be better at the interface in microcode.h, imo.

>  void __init microcode_set_module(unsigned int idx)
>  {
>  ucode_mod_idx = idx;
>
> base-commit: be84e7fe58b51f6b6dd907a038f0ef998a1e281e
> prerequisite-patch-id: ef20898eb25a7ca1ea2d7b1d676f00b91b46d5f6
> prerequisite-patch-id: e0d0c0acbe4864a00451187ef7232dcaf10b2477
> prerequisite-patch-id: f6010b4a6e0b43ac837aea470b3b5e5f390ee3b2

Cheers,
Alejandro

Re: [PATCH 1/6] xen: add a domain unique id to each domain

2024-10-23 Thread Alejandro Vallejo

On Wed Oct 23, 2024 at 2:10 PM BST, Juergen Gross wrote:
> Xenstore is referencing domains by their domid, but reuse of a domid
> can lead to the situation that Xenstore can't tell whether a domain
> with that domid has been deleted and created again without Xenstore
> noticing the domain is a new one now.
>
> Add a global domain creation unique id which is updated when creating
> a new domain, and store that value in struct domain of the new domain.
> The global unique id is initialized with the system time and updates
> are done via the xorshift algorithm which is used for pseudo random
> number generation, too (see https://en.wikipedia.org/wiki/Xorshift).
>
> Signed-off-by: Juergen Gross 
> Reviewed-by: Jan Beulich 
> ---
> V1:
> - make unique_id local to function (Jan Beulich)
> - add lock (Julien Grall)
> - add comment (Julien Grall)
> ---
>  xen/common/domain.c | 20 
>  xen/include/xen/sched.h |  3 +++
>  2 files changed, 23 insertions(+)
>
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index 92263a4fbd..3948640fb0 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -562,6 +562,25 @@ static void _domain_destroy(struct domain *d)
>  free_domain_struct(d);
>  }
>  
> +static uint64_t get_unique_id(void)
> +{
> +static uint64_t unique_id;
> +static DEFINE_SPINLOCK(lock);
> +uint64_t x = unique_id ? : NOW();
> +
> +spin_lock(&lock);
> +
> +/* Pseudo-randomize id in order to avoid consumers relying on sequence. 
> */
> +x ^= x << 13;
> +x ^= x >> 7;
> +x ^= x << 17;
> +unique_id = x;
> +
> +spin_unlock(&lock);
> +
> +return x;
> +}
> +
>  static int sanitise_domain_config(struct xen_domctl_createdomain *config)
>  {
>  bool hvm = config->flags & XEN_DOMCTL_CDF_hvm;
> @@ -654,6 +673,7 @@ struct domain *domain_create(domid_t domid,
>  
>  /* Sort out our idea of is_system_domain(). */
>  d->domain_id = domid;
> +d->unique_id = get_unique_id();
>  
>  /* Holding CDF_* internal flags. */
>  d->cdf = flags;
> diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
> index 90666576c2..1dd8a425f9 100644
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -370,6 +370,9 @@ struct domain
>  domid_t  domain_id;
>  
>  unsigned int max_vcpus;
> +
> +uint64_t unique_id;   /* Unique domain identifier */
> +

Why not xen_domain_handle_t handle, defined later on? That's meant to be a
UUID, so this feels like a duplicate field.

>  struct vcpu**vcpu;
>  
>  shared_info_t   *shared_info; /* shared data area */

Cheers,
Alejandro

Re: [PATCH v3 5/6] xen/arm: mpu: Enable MPU

2024-10-23 Thread Ayan Kumar Halder


Hi Luca/Julien,

On 22/10/2024 17:31, Luca Fancellu wrote:

Hi Julien,


On 22 Oct 2024, at 14:13, Julien Grall  wrote:



On 22/10/2024 10:56, Luca Fancellu wrote:

On 22 Oct 2024, at 10:47, Julien Grall  wrote:

Hi Luca,

On 22/10/2024 10:41, Luca Fancellu wrote:

On 21 Oct 2024, at 17:27, Julien Grall  wrote:

2) dsb+isb barrier after enabling the MPU, since we are enabling the MPU but 
also because we are disabling the background region

TBH, I don't understand this one. Why would disabling the background region 
requires a dsb + isb? Do you have any pointer in the Armv8-R specification?

I’m afraid this is only my deduction, Section C1.4 of the Arm® Architecture 
Reference Manual Supplement Armv8, for R-profile AArch64 architecture,
(DDI 0600B.a) explains what is the background region, it says it implements 
physical address range(s), so when we disable it, we would like any data
access to complete before changing this implementation defined range with the 
ranges defined by us tweaking PRBAR/PRLAR, am I right?

My mental model for the ordering is similar to a TLB flush which is:
   1/ dsb nsh
   2/ tlbi
   3/ dsb nsh
   4/ isb

Enabling the MPU is effectively 2. AFAIK, 1 is only necessary to ensure the 
update to the page-tables. But it is not a requirement to ensure any data 
access are completed (otherwise, we would have needed a dsb sy because we don't 
know how far the access are going...).

Uhm… I’m not sure we are on the same page, probably I explained that wrongly, 
so my point is that:

FUNC_LOCAL(enable_mpu)
 mrs   x0, SCTLR_EL2
 bic   x0, x0, #SCTLR_ELx_BR   /* Disable Background region */
 orr   x0, x0, #SCTLR_Axx_ELx_M/* Enable MPU */
 orr   x0, x0, #SCTLR_Axx_ELx_C/* Enable D-cache */
 orr   x0, x0, #SCTLR_Axx_ELx_WXN  /* Enable WXN */
 dsb   sy
 ^— This data barrier is needed in order to complete any data access, which 
guarantees that all explicit memory accesses before
this instruction complete, so we can safely turn ON the MPU and 
disable the background region.


I think Julien agreed to this in a previous email as long as we have an 
appropriate comment. So we will keep this as it is.


- Ayan

Re: [PATCH v3 5/6] xen/arm: mpu: Enable MPU

2024-10-23 Thread Julien Grall





On 23/10/2024 17:06, Ayan Kumar Halder wrote:

Hi Luca/Julien,

On 22/10/2024 17:31, Luca Fancellu wrote:

Hi Julien,


On 22 Oct 2024, at 14:13, Julien Grall  wrote:



On 22/10/2024 10:56, Luca Fancellu wrote:

On 22 Oct 2024, at 10:47, Julien Grall  wrote:

Hi Luca,

On 22/10/2024 10:41, Luca Fancellu wrote:

On 21 Oct 2024, at 17:27, Julien Grall  wrote:
2) dsb+isb barrier after enabling the MPU, since we are enabling 
the MPU but also because we are disabling the background region
TBH, I don't understand this one. Why would disabling the 
background region requires a dsb + isb? Do you have any pointer in 
the Armv8-R specification?
I’m afraid this is only my deduction, Section C1.4 of the Arm® 
Architecture Reference Manual Supplement Armv8, for R-profile 
AArch64 architecture,
(DDI 0600B.a) explains what is the background region, it says it 
implements physical address range(s), so when we disable it, we 
would like any data
access to complete before changing this implementation defined range 
with the ranges defined by us tweaking PRBAR/PRLAR, am I right?

My mental model for the ordering is similar to a TLB flush which is:
   1/ dsb nsh
   2/ tlbi
   3/ dsb nsh
   4/ isb

Enabling the MPU is effectively 2. AFAIK, 1 is only necessary to 
ensure the update to the page-tables. But it is not a requirement to 
ensure any data access are completed (otherwise, we would have needed 
a dsb sy because we don't know how far the access are going...).
Uhm… I’m not sure we are on the same page, probably I explained that 
wrongly, so my point is that:


FUNC_LOCAL(enable_mpu)
 mrs   x0, SCTLR_EL2
 bic   x0, x0, #SCTLR_ELx_BR   /* Disable Background region */
 orr   x0, x0, #SCTLR_Axx_ELx_M    /* Enable MPU */
 orr   x0, x0, #SCTLR_Axx_ELx_C    /* Enable D-cache */
 orr   x0, x0, #SCTLR_Axx_ELx_WXN  /* Enable WXN */
 dsb   sy
 ^— This data barrier is needed in order to complete any data 
access, which guarantees that all explicit memory accesses before
    this instruction complete, so we can safely turn ON the 
MPU and disable the background region.


I think



I think Julien agreed to this in a previous email as long as we have an 
appropriate comment. So we will keep this as it is.


Sorry I didn't manage to answer yet. But yes, I am ok with the barrier 
for now, but I am not agree on the reasoning used.


Cheers,

--
Julien Grall

Re: [PATCH 3/6] xen: add new domctl get_changed_domain

2024-10-23 Thread Daniel P. Smith


On 10/23/24 09:10, Juergen Gross wrote:

Add a new domctl sub-function to get data of a domain having changed
state (this is needed by Xenstore).

The returned state just contains the domid, the domain unique id,
and some flags (existing, shutdown, dying).

In order to enable Xenstore stubdom being built for multiple Xen
versions, make this domctl stable.  For stable domctls the
interface_version is specific to the respective domctl op and it is an
in/out parameter: On input the caller is specifying the desired version
of the op, while on output the hypervisor will return the used version
(this will be at max the caller supplied version, but might be lower in
case the hypervisor doesn't support this version).

Signed-off-by: Juergen Gross 
---
V1:
- use a domctl subop for the new interface (Jan Beulich)
---
  tools/flask/policy/modules/dom0.te  |  2 +-
  xen/common/domain.c | 51 +
  xen/common/domctl.c | 19 ++-
  xen/common/event_channel.c  |  9 -
  xen/include/public/domctl.h | 33 +++
  xen/include/xen/event.h |  6 
  xen/include/xen/sched.h |  2 ++
  xen/include/xsm/dummy.h |  8 +
  xen/include/xsm/xsm.h   |  6 
  xen/xsm/dummy.c |  1 +
  xen/xsm/flask/hooks.c   |  7 
  xen/xsm/flask/policy/access_vectors |  2 ++
  12 files changed, 143 insertions(+), 3 deletions(-)

diff --git a/tools/flask/policy/modules/dom0.te 
b/tools/flask/policy/modules/dom0.te
index 16b8c9646d..6043c01b12 100644
--- a/tools/flask/policy/modules/dom0.te
+++ b/tools/flask/policy/modules/dom0.te
@@ -40,7 +40,7 @@ allow dom0_t dom0_t:domain {
  };
  allow dom0_t dom0_t:domain2 {
set_cpu_policy gettsc settsc setscheduler set_vnumainfo
-   get_vnumainfo psr_cmt_op psr_alloc get_cpu_policy
+   get_vnumainfo psr_cmt_op psr_alloc get_cpu_policy get_domain_state


I don't think that is where you want it, as that restricts dom0 to only 
being able to make that call against dom0. The question I have is, are 
you looking for this permission to be explicitly assigned to dom0 or to 
the domain type that was allowed to create the domain. IMHO, I think you 
would want the latter, so not only should the permission go here, but 
also added to xen.if:create_domain_common.


Additionally, I would also recommend adding the following to xenstore.te:

allow xenstore_t domain_type:domain get_domain_state


  allow dom0_t dom0_t:resource { add remove };
  


...


@@ -866,6 +873,16 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) 
u_domctl)
  __HYPERVISOR_domctl, "h", u_domctl);
  break;
  
+case XEN_DOMCTL_get_domain_state:

+ret = xsm_get_domain_state(XSM_HOOK, d);


XSM_HOOK will allow any domain to make this call on any domain. What I 
think you want here is XSM_XS_PRIV. That will allow either a domain 
flagged as the xenstore domain or dom0 to make the call.



+if ( ret )
+break;
+
+copyback = 1;
+op->interface_version = XEN_DOMCTL_GETDOMSTATE_VERS_MAX;
+ret = get_domain_state(&op->u.get_domain_state, d);
+break;
+
  default:
  ret = arch_do_domctl(op, d, u_domctl);
  break;


...


@@ -815,6 +816,13 @@ static XSM_INLINE int cf_check xsm_argo_send(
  
  #endif /* CONFIG_ARGO */
  
+static XSM_INLINE int cf_check xsm_get_domain_state(

+XSM_DEFAULT_ARG struct domain *d)
+{
+XSM_ASSERT_ACTION(XSM_HOOK);


Per the above, this would need changed to XSM_XS_PRIV.


+return xsm_default_action(action, current->domain, d);
+}
+
  #include 
  static XSM_INLINE int cf_check xsm_xen_version(XSM_DEFAULT_ARG uint32_t op)
  {


...


@@ -1853,6 +1854,11 @@ static int cf_check flask_argo_send(
  
  #endif
  
+static int cf_check flask_get_domain_state(struct domain *d)

+{
+return current_has_perm(d, SECCLASS_DOMAIN, DOMAIN__GET_DOMAIN_STATE);


I believe you want SECCLASS_DOMAIN2 here.


+}
+
  static const struct xsm_ops __initconst_cf_clobber flask_ops = {
  .set_system_active = flask_set_system_active,
  .security_domaininfo = flask_security_domaininfo,


v/r,
dps

Re: [PATCH 2/3] x86/boot: Fix microcode module handling during PVH boot

2024-10-23 Thread Andrew Cooper

On 23/10/2024 6:01 pm, Roger Pau Monné wrote:
> On Wed, Oct 23, 2024 at 11:57:55AM +0100, Andrew Cooper wrote:
>> From: "Daniel P. Smith" 
>>
>> As detailed in commit 0fe607b2a144 ("x86/boot: Fix PVH boot during boot_info
>> transition period"), the use of __va(mbi->mods_addr) constitutes a
>> use-after-free on the PVH boot path.
>>
>> This pattern has been in use since before PVH support was added.  Inside a 
>> PVH
>> VM, it will go unnoticed as long as the microcode container parser doesn't
>> choke on the random data it finds.
>>
>> The use within early_microcode_init() happens to be safe because it's prior 
>> to
>> move_xen().  microcode_init_cache() is after move_xen(), and therefore 
>> unsafe.
>>
>> Plumb the boot_info pointer down, replacing module_map and mbi.  Importantly,
>> bi->mods[].mod is a safe way to access the module list during PVH boot.
>>
>> Note: microcode_scan_module() is still bogusly stashing a bootstrap_map()'d
>>   pointer in ucode_blob.data, which constitutes a different
>>   use-after-free, and only works in general because of a second bug.  
>> This
>>   is unrelated to PVH, and needs untangling differently.
>>
>> Signed-off-by: Daniel P. Smith 
>> Signed-off-by: Andrew Cooper 
>> ---
>> CC: Jan Beulich 
>> CC: Roger Pau Monné 
>> CC: Daniel P. Smith 
>>
>> Refectored/extracted from the hyperlaunch series.
>>
>> There's no good Fixes tag for this, because it can't reasonably be "introduce
>> PVH", nor can the fix as-is reasonably be backported.  If we want to fix the
>> bug in older trees, we need to plumb the mod pointer down alongside mbi.
>> ---
>>  xen/arch/x86/cpu/microcode/core.c| 40 +++-
>>  xen/arch/x86/include/asm/microcode.h |  8 +++---
>>  xen/arch/x86/setup.c |  4 +--
>>  3 files changed, 22 insertions(+), 30 deletions(-)
>>
>> diff --git a/xen/arch/x86/cpu/microcode/core.c 
>> b/xen/arch/x86/cpu/microcode/core.c
>> index 8564e4d2c94c..1d58cb0f3bc1 100644
>> --- a/xen/arch/x86/cpu/microcode/core.c
>> +++ b/xen/arch/x86/cpu/microcode/core.c
>> @@ -35,6 +35,7 @@
>>  #include 
>>  
>>  #include 
>> +#include 
>>  #include 
>>  #include 
>>  #include 
>> @@ -152,11 +153,8 @@ static int __init cf_check parse_ucode(const char *s)
>>  }
>>  custom_param("ucode", parse_ucode);
>>  
>> -static void __init microcode_scan_module(
>> -unsigned long *module_map,
>> -const multiboot_info_t *mbi)
>> +static void __init microcode_scan_module(struct boot_info *bi)
> Sorry to be pedantic, can't you keep bi as const?

Not really, no.

Yes technically in this patch, but no by the end of the hyperlaunch series.

I'm uninterested in taking extra churn just to have a pointer be const
for a few more patches.

[For the list, I conferred with Roger IRL and he was happy.]

~Andrew

Re: [PATCH v2 06/10] xen/arm: ffa: Use bit 15 convention for SPs

2024-10-23 Thread Jens Wiklander

Hi Bertrand,

On Wed, Oct 16, 2024 at 10:32 AM Bertrand Marquis
 wrote:
>
> Make use and required to have bit 15 convention respected by secure
> world (having bit 15 of IDs set for secure endpoints and non-set for
> non-secure ones).
> If any secure partition has an ID with bit 15 not set, it will not be
> possible to contact or detect them.
> Print an error log during probe for each secure endpoint detected with
> bit 15 not set.
>
> We are switching to this convention because Xen is currently not using
> VMIDs with bit 15 set so we are sure that no VM will have it set (this
> is ensured by BUILD_BUG_ON in case this becomes false in the future).
> It is allowing to easily distinguish between secure and non-secure
> endpoints, preventing the need to store a list of secure endpoint IDs in
> Xen.
>
> Signed-off-by: Bertrand Marquis 
> ---
> Changes in v2:
> - rebase
> ---
>  xen/arch/arm/tee/ffa.c  | 22 +++---
>  xen/arch/arm/tee/ffa_partinfo.c | 54 +
>  xen/arch/arm/tee/ffa_private.h  |  7 +
>  xen/arch/arm/tee/ffa_shm.c  | 12 +++-
>  4 files changed, 77 insertions(+), 18 deletions(-)
>
> diff --git a/xen/arch/arm/tee/ffa.c b/xen/arch/arm/tee/ffa.c
> index 4b55e8b48f01..a292003ca9fe 100644
> --- a/xen/arch/arm/tee/ffa.c
> +++ b/xen/arch/arm/tee/ffa.c
> @@ -195,6 +195,14 @@ static void handle_msg_send_direct_req(struct 
> cpu_user_regs *regs, uint32_t fid)
>  goto out;
>  }
>
> +/* we do not support direct messages to VMs */
> +if ( !FFA_ID_IS_SECURE(src_dst & GENMASK(15,0)) )
> +{
> +resp.a0 = FFA_ERROR;
> +resp.a2 = FFA_RET_NOT_SUPPORTED;
> +goto out;
> +}
> +
>  arg.a1 = src_dst;
>  arg.a2 = get_user_reg(regs, 2) & mask;
>  arg.a3 = get_user_reg(regs, 3) & mask;
> @@ -391,11 +399,15 @@ static int ffa_domain_init(struct domain *d)
>
>  if ( !ffa_fw_version )
>  return -ENODEV;
> - /*
> -  * We can't use that last possible domain ID or ffa_get_vm_id() would
> -  * cause an overflow.
> -  */
> -if ( d->domain_id >= UINT16_MAX)
> +/*
> + * We are using the domain_id + 1 as the FF-A ID for VMs as FF-A ID 0 is
> + * reserved for the hypervisor and we only support secure endpoints using
> + * FF-A IDs with BIT 15 set to 1 so make sure those are not used by Xen.
> + */
> +BUILD_BUG_ON(DOMID_FIRST_RESERVED >= UINT16_MAX);
> +BUILD_BUG_ON((DOMID_MASK & BIT(15, U)) != 0);
> +
> +if ( d->domain_id >= DOMID_FIRST_RESERVED )
>  return -ERANGE;
>
>  ctx = xzalloc(struct ffa_ctx);
> diff --git a/xen/arch/arm/tee/ffa_partinfo.c b/xen/arch/arm/tee/ffa_partinfo.c
> index 75a073d090e0..3cf801523296 100644
> --- a/xen/arch/arm/tee/ffa_partinfo.c
> +++ b/xen/arch/arm/tee/ffa_partinfo.c
> @@ -169,14 +169,26 @@ void ffa_handle_partition_info_get(struct cpu_user_regs 
> *regs)
>
>  if ( ffa_sp_count > 0 )
>  {
> -uint32_t n;
> +uint32_t n, real_num = ffa_sp_count;

Nit: how about n_limit instead of real_num?

>  void *src_buf = ffa_rx;
>
>  /* copy the secure partitions info */
> -for ( n = 0; n < ffa_sp_count; n++ )
> +for ( n = 0; n < real_num; n++ )
>  {
> -memcpy(dst_buf, src_buf, dst_size);
> -dst_buf += dst_size;
> +struct ffa_partition_info_1_1 *fpi = src_buf;
> +
> +/* filter out SP not following bit 15 convention if any */
> +if ( FFA_ID_IS_SECURE(fpi->id) )
> +{
> +memcpy(dst_buf, src_buf, dst_size);
> +dst_buf += dst_size;
> +}
> +else
> +{
> +printk(XENLOG_INFO "ffa: sp id 0x%04x skipped, bit 15 is 
> 0\n",
> +   fpi->id);

We have already logged this in init_subscribers() below. Is there a
risk of flooding the logs with this?

Cheers,
Jens

> +ffa_sp_count--;
> +}
>  src_buf += src_size;
>  }
>  }
> @@ -276,10 +288,25 @@ static bool init_subscribers(uint16_t count, uint32_t 
> fpi_size)
>  {
>  fpi = ffa_rx + n * fpi_size;
>
> -if ( fpi->partition_properties & FFA_PART_PROP_NOTIF_CREATED )
> -subscr_vm_created_count++;
> -if ( fpi->partition_properties & FFA_PART_PROP_NOTIF_DESTROYED )
> -subscr_vm_destroyed_count++;
> +/*
> + * We need to have secure partitions using bit 15 set convention for
> + * secure partition IDs.
> + * Inform the user with a log and discard giving created or destroy
> + * event to those IDs.
> + */
> +if ( !FFA_ID_IS_SECURE(fpi->id) )
> +{
> +printk(XENLOG_ERR "ffa: Firmware is not using bit 15 convention 
> for IDs !!\n"
> +  "ffa: Secure partition with id 0x%04x cannot 
> be used\n",
> +  fpi->id);
> +}
> +else

[MINI-OS PATCH] config: add support for libxenmanage

2024-10-23 Thread Juergen Gross

Add CONFIG_LIBXENMANAGE support.

Signed-off-by: Juergen Gross 
---
Please note that this patch should be committed only after the related
Xen patch "tools/libs: add a new libxenmanage library" has been Acked,
as otherwise it should either be dropped (in case the approach of
adding a new library is being rejected) or changed (in case the name
of the new library needs to be modified)!

 Config.mk | 2 ++
 Makefile  | 4 
 2 files changed, 6 insertions(+)

diff --git a/Config.mk b/Config.mk
index f59a0cf4..e493533a 100644
--- a/Config.mk
+++ b/Config.mk
@@ -46,6 +46,7 @@ GNTTAB_PATH ?= 
$(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/gnttab
 CALL_PATH ?= $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/call
 FOREIGNMEMORY_PATH ?= 
$(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/foreignmemory
 DEVICEMODEL_PATH ?= $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/devicemodel
+MANAGE_PATH ?= $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/manage
 CTRL_PATH ?= $(XEN_ROOT)/stubdom/libxc-$(MINIOS_TARGET_ARCH)
 GUEST_PATH ?= $(XEN_ROOT)/stubdom/libxc-$(MINIOS_TARGET_ARCH)
 else
@@ -202,6 +203,7 @@ CONFIG-n += CONFIG_LIBXENGNTTAB
 CONFIG-n += CONFIG_LIBXENGUEST
 CONFIG-n += CONFIG_LIBXENTOOLCORE
 CONFIG-n += CONFIG_LIBXENTOOLLOG
+CONFIG-n += CONFIG_LIBXENMANAGE
 # Setting CONFIG_USE_XEN_CONSOLE copies all print output to the Xen emergency
 # console apart of standard dom0 handled console.
 CONFIG-n += CONFIG_USE_XEN_CONSOLE
diff --git a/Makefile b/Makefile
index ffa8d1a8..d094858a 100644
--- a/Makefile
+++ b/Makefile
@@ -159,6 +159,10 @@ ifeq ($(CONFIG_LIBXENCTRL),y)
 APP_LDLIBS += -L$(CTRL_PATH) -whole-archive -lxenctrl -no-whole-archive
 LIBS += $(CTRL_PATH)/libxenctrl.a
 endif
+ifeq ($(CONFIG_LIBXENMANAGE),y)
+APP_LDLIBS += -L$(MANAGE_PATH) -whole-archive -lxenmanage -no-whole-archive
+LIBS += $(MANAGE_PATH)/libxenmanage.a
+endif
 APP_LDLIBS += -lpci
 APP_LDLIBS += -lz
 APP_LDLIBS += -lm
-- 
2.43.0

Re: [PATCH 4/3] x86/boot: Remove the mbi_p parameter from __start_xen()

2024-10-23 Thread Daniel P. Smith


On 10/23/24 10:44, Andrew Cooper wrote:

The use of physical addresses in __start_xen() has proved to be fertile soure
of bugs.

The MB1/2 path stashes the MBI pointer in multiboot_ptr (a setup.c variable
even), then re-loads it immediately before calling __start_xen().  For this,
we can just drop the function parameter and read multiboot_ptr in the one
place it's used.

The EFI path also passes this parameter into __start_xen().  Have the EFI path
set up multiboot_ptr too, and move the explanation of phyiscal-mode pointers.

No functional change.

Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Roger Pau Monné 
CC: Marek Marczykowski-Górecki 
CC: Daniel P. Smith 


For EFI:
Acked-by: Daniel P. Smith 

For remainder:
Reviewed-by: Daniel P. Smith

[PATCH v2 1/3] xen/riscv: introduce setup_mm()

2024-10-23 Thread Oleksii Kurochko

Introduce the implementation of setup_mm(), which includes:
1. Adding all free regions to the boot allocator, as memory is needed
   to allocate page tables used for frame table mapping.
2. Calculating RAM size and the RAM end address.
3. Setting up direct map mappings from each RAM bank and initialize
   directmap_virt_start (also introduce XENHEAP_VIRT_START which is
   defined as directmap_virt_start) to be properly aligned with RAM
   start to use more superpages to reduce pressure on the TLB.
4. Setting up frame table mappings from physical address 0 to ram_end
   to simplify mfn_to_page() and page_to_mfn() conversions.
5. Setting up total_pages and max_page.

Update virt_to_maddr() to use introduced XENHEAP_VIRT_START.

Implement maddr_to_virt() function to convert a machine address
to a virtual address. This function is specifically designed to be used
only for the DIRECTMAP region, so a check has been added to ensure that
the address does not exceed DIRECTMAP_SIZE.

After the introduction of maddr_to_virt() the following linkage error starts
to occur and to avoid it share_xen_page_with_guest() stub is added:
  riscv64-linux-gnu-ld: prelink.o: in function `tasklet_kill':
  /build/xen/common/tasklet.c:176: undefined reference to
 `share_xen_page_with_guest'
  riscv64-linux-gnu-ld: ./.xen-syms.0: hidden symbol `share_xen_page_with_guest'
isn't defined riscv64-linux-gnu-ld: final link failed: bad value

Despite the linkger fingering tasklet.c, it's trace.o which has the undefined
refenrece:
  $ find . -name \*.o | while read F; do nm $F | grep share_xen_page_with_guest 
&&
echo $F; done
 U share_xen_page_with_guest
./xen/common/built_in.o
 U share_xen_page_with_guest
./xen/common/trace.o
 U share_xen_page_with_guest
./xen/prelink.o

Looking at trace.i, there is call of share_xen_page_with_guest() but in case of
when maddr_to_virt() is defined as "return NULL" compiler optimizes the part of
common/trace.c code where share_xen_page_with_priviliged_guest() is called
( there is no any code in dissambled common/trace.o ) so there is no real call
of share_xen_page_with_priviliged_guest().

Signed-off-by: Oleksii Kurochko 
---
 Changes in V2:
  - merge patch 2 ( xen/riscv: implement maddr_to_virt() ) to the current one
as maddr_to_virt() started to use the thing which are introduced in the
current patch.
  - merge with patch 1 ( xen/riscv: add stub for share_xen_page_with_guest() )
as this linkage issue happens during introduction of maddr_to_virt().
  - use mathematical range expressions for log messages.
  - calculate properly amount of mfns in setup_frametable_mapping() taking into
account that ps and pe can be not properly aligned.
  - drop full stop at the end of debug message.
  - use PFN_DOWN(framsetable_size) instead of frametable_size >> PAGE_SHIFT.
  - round down ram_size when it is being accumulated in setup_mm() to guarantee
that banks can never have partial pages at their start/end.
  - call setup_directmap_mappings() only for ram bank regions instead of
mapping [0, ram_end] region.
  - drop directmap_virt_end for now as it isn't used at the moment.
  - update the commit message.
---
 xen/arch/riscv/include/asm/config.h |   1 +
 xen/arch/riscv/include/asm/mm.h |  13 ++-
 xen/arch/riscv/include/asm/setup.h  |   2 +
 xen/arch/riscv/mm.c | 121 
 xen/arch/riscv/setup.c  |   3 +
 xen/arch/riscv/stubs.c  |  10 +++
 6 files changed, 146 insertions(+), 4 deletions(-)

diff --git a/xen/arch/riscv/include/asm/config.h 
b/xen/arch/riscv/include/asm/config.h
index ad75871283..3aa9afa5ad 100644
--- a/xen/arch/riscv/include/asm/config.h
+++ b/xen/arch/riscv/include/asm/config.h
@@ -90,6 +90,7 @@
 #define DIRECTMAP_SLOT_START200
 #define DIRECTMAP_VIRT_STARTSLOTN(DIRECTMAP_SLOT_START)
 #define DIRECTMAP_SIZE  (SLOTN(DIRECTMAP_SLOT_END) - 
SLOTN(DIRECTMAP_SLOT_START))
+#define XENHEAP_VIRT_START  directmap_virt_start
 
 #define FRAMETABLE_SCALE_FACTOR  (PAGE_SIZE/sizeof(struct page_info))
 #define FRAMETABLE_SIZE_IN_SLOTS (((DIRECTMAP_SIZE / SLOTN(1)) / 
FRAMETABLE_SCALE_FACTOR) + 1)
diff --git a/xen/arch/riscv/include/asm/mm.h b/xen/arch/riscv/include/asm/mm.h
index ebb142502e..bff4e763d9 100644
--- a/xen/arch/riscv/include/asm/mm.h
+++ b/xen/arch/riscv/include/asm/mm.h
@@ -12,6 +12,8 @@
 
 #include 
 
+extern vaddr_t directmap_virt_start;
+
 #define pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT)
 #define paddr_to_pfn(pa)  ((unsigned long)((pa) >> PAGE_SHIFT))
 
@@ -25,8 +27,11 @@
 
 static inline void *maddr_to_virt(paddr_t ma)
 {
-BUG_ON("unimplemented");
-return NULL;
+unsigned long va_offset = maddr_to_directmapoff(ma);
+
+ASSERT(va_offset < DIRECTMAP_SIZE);
+
+return (void *)(XENHEAP_VIRT_START + va_offset);
 }
 
 /*
@@ -37,9 +42,9 @@ static inline void *maddr_to_virt(paddr_t ma)
  */
 st

[PATCH v2 3/3] xen/riscv: finalize boot allocator and transition to boot state

2024-10-23 Thread Oleksii Kurochko

Add a call to end_boot_allocator() in start_xen() to finalize the
boot memory allocator, moving free pages to the domain sub-allocator.

After initializing the memory subsystem, update `system_state` from
`SYS_STATE_early_boot` to `SYS_STATE_boot`, signifying the end of the
early boot phase.

Signed-off-by: Oleksii Kurochko 
Acked-by: Jan Beulich 
---
Change in V2:
 - Acked-by: Jan Beulich 
---
 xen/arch/riscv/setup.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/xen/arch/riscv/setup.c b/xen/arch/riscv/setup.c
index 3652cb056d..9680332fee 100644
--- a/xen/arch/riscv/setup.c
+++ b/xen/arch/riscv/setup.c
@@ -65,6 +65,14 @@ void __init noreturn start_xen(unsigned long bootcpu_id,
 
 vm_init();
 
+end_boot_allocator();
+
+/*
+ * The memory subsystem has been initialized, we can now switch from
+ * early_boot -> boot.
+ */
+system_state = SYS_STATE_boot;
+
 printk("All set up\n");
 
 machine_halt();
-- 
2.47.0

[PATCH v2 0/3] Setup memory management for RISC-V

2024-10-23 Thread Oleksii Kurochko

Finish initializing the memory subsystem by mapping the direct map and
frame table.
In the case of RISC-V 64, which has a large virtual address space
(the minimum supported MMU mode is Sv39, providing TB of VA space),
so frame table is mapped starting from physical address
0 to ram_end.
This simplifies the calculations and thereby improves performance for
page_to_mfn(), mfn_to_page() as there is no frametable_base_pdx or
frametable_base_mfn ( if CONFIG_PDX_COMPRESSION=n).
XENHEAP_VIRT_START is introduced which is equal to directmap_virt_start which
is aligned with ram start to have superpages mapping and reduce pressure only
TLB.
In addition, initialize the VMAP_DEFAULT region, finalize the boot allocator,
and update the system state from early_boot to boot.

Introduce share_xen_page_with_guest() to deal with linkage error:
  riscv64-linux-gnu-ld: prelink.o: in function `tasklet_kill':
  /build/xen/common/tasklet.c:176: undefined reference to
 `share_xen_page_with_guest'
  riscv64-linux-gnu-ld: ./.xen-syms.0: hidden symbol `share_xen_page_with_guest'
isn't defined riscv64-linux-gnu-ld: final link failed: bad value

The function maddr_to_virt() is introduced as part of this patch series, as
setup_directmap_mappings() uses it indirectly through mfn_to_virt().

virt_to_maddr() is updated as it is started to use XENHEAP_VIRT_START which is
introduced in this patch series.

---
Changes in V2:
 - update the cover letter message.
 - merge first 3 patches to "introduce setup_mm()" patch as after setup_mm()
   rework all the things in first two patches of v1 started to use changes
   introduced in "introduce setup_mm()" patch.
 - add Acked-by for some patch series.
 - All other details please look at the specific patch.
---

Oleksii Kurochko (3):
  xen/riscv: introduce setup_mm()
  xen/riscv: initialize the VMAP_DEFAULT virtual range
  xen/riscv: finalize boot allocator and transition to boot state

 xen/arch/riscv/include/asm/config.h |   1 +
 xen/arch/riscv/include/asm/mm.h |  13 ++-
 xen/arch/riscv/include/asm/setup.h  |   2 +
 xen/arch/riscv/mm.c | 132 ++--
 xen/arch/riscv/pt.c |   6 ++
 xen/arch/riscv/setup.c  |  14 +++
 xen/arch/riscv/stubs.c  |  10 +++
 7 files changed, 168 insertions(+), 10 deletions(-)

-- 
2.47.0

[PATCH 3/6] xen: add new domctl get_changed_domain

2024-10-23 Thread Juergen Gross

Add a new domctl sub-function to get data of a domain having changed
state (this is needed by Xenstore).

The returned state just contains the domid, the domain unique id,
and some flags (existing, shutdown, dying).

In order to enable Xenstore stubdom being built for multiple Xen
versions, make this domctl stable.  For stable domctls the
interface_version is specific to the respective domctl op and it is an
in/out parameter: On input the caller is specifying the desired version
of the op, while on output the hypervisor will return the used version
(this will be at max the caller supplied version, but might be lower in
case the hypervisor doesn't support this version).

Signed-off-by: Juergen Gross 
---
V1:
- use a domctl subop for the new interface (Jan Beulich)
---
 tools/flask/policy/modules/dom0.te  |  2 +-
 xen/common/domain.c | 51 +
 xen/common/domctl.c | 19 ++-
 xen/common/event_channel.c  |  9 -
 xen/include/public/domctl.h | 33 +++
 xen/include/xen/event.h |  6 
 xen/include/xen/sched.h |  2 ++
 xen/include/xsm/dummy.h |  8 +
 xen/include/xsm/xsm.h   |  6 
 xen/xsm/dummy.c |  1 +
 xen/xsm/flask/hooks.c   |  7 
 xen/xsm/flask/policy/access_vectors |  2 ++
 12 files changed, 143 insertions(+), 3 deletions(-)

diff --git a/tools/flask/policy/modules/dom0.te 
b/tools/flask/policy/modules/dom0.te
index 16b8c9646d..6043c01b12 100644
--- a/tools/flask/policy/modules/dom0.te
+++ b/tools/flask/policy/modules/dom0.te
@@ -40,7 +40,7 @@ allow dom0_t dom0_t:domain {
 };
 allow dom0_t dom0_t:domain2 {
set_cpu_policy gettsc settsc setscheduler set_vnumainfo
-   get_vnumainfo psr_cmt_op psr_alloc get_cpu_policy
+   get_vnumainfo psr_cmt_op psr_alloc get_cpu_policy get_domain_state
 };
 allow dom0_t dom0_t:resource { add remove };
 
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 61b7899cb8..e55c6f6c5e 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -154,6 +154,57 @@ void domain_reset_states(void)
 rcu_read_unlock(&domlist_read_lock);
 }
 
+static void set_domain_state_info(struct xen_domctl_get_domain_state *info,
+  const struct domain *d)
+{
+info->state = XEN_DOMCTL_GETDOMSTATE_STATE_EXIST;
+if ( d->is_shut_down )
+info->state |= XEN_DOMCTL_GETDOMSTATE_STATE_SHUTDOWN;
+if ( d->is_dying == DOMDYING_dead )
+info->state |= XEN_DOMCTL_GETDOMSTATE_STATE_DYING;
+info->unique_id = d->unique_id;
+}
+
+int get_domain_state(struct xen_domctl_get_domain_state *info, struct domain 
*d)
+{
+unsigned int dom;
+
+memset(info, 0, sizeof(*info));
+
+if ( d )
+{
+set_domain_state_info(info, d);
+
+return 0;
+}
+
+while ( (dom = find_first_bit(dom_state_changed, DOMID_MASK + 1)) <
+DOMID_FIRST_RESERVED )
+{
+d = rcu_lock_domain_by_id(dom);
+
+if ( test_and_clear_bit(dom, dom_state_changed) )
+{
+info->domid = dom;
+if ( d )
+{
+set_domain_state_info(info, d);
+
+rcu_unlock_domain(d);
+}
+
+return 0;
+}
+
+if ( d )
+{
+rcu_unlock_domain(d);
+}
+}
+
+return -ENOENT;
+}
+
 static void __domain_finalise_shutdown(struct domain *d)
 {
 struct vcpu *v;
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index ea16b75910..e030cce2ec 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -278,6 +278,11 @@ static struct vnuma_info *vnuma_init(const struct 
xen_domctl_vnuma *uinfo,
 return ERR_PTR(ret);
 }
 
+static bool is_stable_domctl(uint32_t cmd)
+{
+return cmd == XEN_DOMCTL_get_domain_state;
+}
+
 long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
 {
 long ret = 0;
@@ -288,7 +293,8 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) 
u_domctl)
 if ( copy_from_guest(op, u_domctl, 1) )
 return -EFAULT;
 
-if ( op->interface_version != XEN_DOMCTL_INTERFACE_VERSION )
+if ( op->interface_version != XEN_DOMCTL_INTERFACE_VERSION &&
+ !is_stable_domctl(op->cmd) )
 return -EACCES;
 
 switch ( op->cmd )
@@ -309,6 +315,7 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) 
u_domctl)
 fallthrough;
 case XEN_DOMCTL_test_assign_device:
 case XEN_DOMCTL_vm_event_op:
+case XEN_DOMCTL_get_domain_state:
 if ( op->domain == DOMID_INVALID )
 {
 d = NULL;
@@ -866,6 +873,16 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) 
u_domctl)
 __HYPERVISOR_domctl, "h", u_domctl);
 break;
 
+case XEN_DOMCTL_get_domain_state:
+ret = xsm_get_domain_state(XSM_HOOK, d);
+if ( ret )
+break;
+
+copyback = 1;
+op->interface_version = XEN_DOMCT

[PATCH 5/6] tools: add a dedicated header file for barrier definitions

2024-10-23 Thread Juergen Gross

Instead of having to include xenctrl.h for getting definitions of cpu
barriers, add a dedicated header for that purpose.

Switch the xen-9pfsd daemon to use the new header instead of xenctrl.h.

This is in preparation of making Xenstore independent from libxenctrl.

Signed-off-by: Juergen Gross 
---
V1:
- new patch
---
 tools/9pfsd/io.c|  5 +++-
 tools/include/xen-barrier.h | 51 +
 tools/include/xenctrl.h | 28 +---
 tools/libs/ctrl/Makefile|  2 +-
 4 files changed, 57 insertions(+), 29 deletions(-)
 create mode 100644 tools/include/xen-barrier.h

diff --git a/tools/9pfsd/io.c b/tools/9pfsd/io.c
index 468e0241f5..14cfcaf568 100644
--- a/tools/9pfsd/io.c
+++ b/tools/9pfsd/io.c
@@ -13,15 +13,18 @@
 
 #include 
 #include 
+#include 
 #include 
+#include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
-#include/* For cpu barriers. */
+#include 
 #include 
 
 #include "xen-9pfsd.h"
diff --git a/tools/include/xen-barrier.h b/tools/include/xen-barrier.h
new file mode 100644
index 00..62036f528b
--- /dev/null
+++ b/tools/include/xen-barrier.h
@@ -0,0 +1,51 @@
+/**
+ * xen-barrier.h
+ *
+ * Definition of CPU barriers, part of libxenctrl.
+ *
+ * Copyright (c) 2003-2004, K A Fraser.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; If not, see .
+ */
+
+#ifndef XENBARRIER_H
+#define XENBARRIER_H
+
+/*
+ *  DEFINITIONS FOR CPU BARRIERS
+ */
+
+#define xen_barrier() asm volatile ( "" : : : "memory")
+
+#if defined(__i386__)
+#define xen_mb()  asm volatile ( "lock addl $0, -4(%%esp)" ::: "memory" )
+#define xen_rmb() xen_barrier()
+#define xen_wmb() xen_barrier()
+#elif defined(__x86_64__)
+#define xen_mb()  asm volatile ( "lock addl $0, -32(%%rsp)" ::: "memory" )
+#define xen_rmb() xen_barrier()
+#define xen_wmb() xen_barrier()
+#elif defined(__arm__)
+#define xen_mb()   asm volatile ("dmb" : : : "memory")
+#define xen_rmb()  asm volatile ("dmb" : : : "memory")
+#define xen_wmb()  asm volatile ("dmb" : : : "memory")
+#elif defined(__aarch64__)
+#define xen_mb()   asm volatile ("dmb sy" : : : "memory")
+#define xen_rmb()  asm volatile ("dmb sy" : : : "memory")
+#define xen_wmb()  asm volatile ("dmb sy" : : : "memory")
+#else
+#error "Define barriers"
+#endif
+
+#endif /* XENBARRIER_H */
diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
index 29617585c5..ea57e9dbb9 100644
--- a/tools/include/xenctrl.h
+++ b/tools/include/xenctrl.h
@@ -48,6 +48,7 @@
 #include 
 
 #include "xentoollog.h"
+#include "xen-barrier.h"
 
 #if defined(__i386__) || defined(__x86_64__)
 #include 
@@ -61,33 +62,6 @@
 
 #define INVALID_MFN  (~0UL)
 
-/*
- *  DEFINITIONS FOR CPU BARRIERS
- */
-
-#define xen_barrier() asm volatile ( "" : : : "memory")
-
-#if defined(__i386__)
-#define xen_mb()  asm volatile ( "lock addl $0, -4(%%esp)" ::: "memory" )
-#define xen_rmb() xen_barrier()
-#define xen_wmb() xen_barrier()
-#elif defined(__x86_64__)
-#define xen_mb()  asm volatile ( "lock addl $0, -32(%%rsp)" ::: "memory" )
-#define xen_rmb() xen_barrier()
-#define xen_wmb() xen_barrier()
-#elif defined(__arm__)
-#define xen_mb()   asm volatile ("dmb" : : : "memory")
-#define xen_rmb()  asm volatile ("dmb" : : : "memory")
-#define xen_wmb()  asm volatile ("dmb" : : : "memory")
-#elif defined(__aarch64__)
-#define xen_mb()   asm volatile ("dmb sy" : : : "memory")
-#define xen_rmb()  asm volatile ("dmb sy" : : : "memory")
-#define xen_wmb()  asm volatile ("dmb sy" : : : "memory")
-#else
-#error "Define barriers"
-#endif
-
-
 #define XENCTRL_HAS_XC_INTERFACE 1
 /* In Xen 4.0 and earlier, xc_interface_open and xc_evtchn_open would
  * both return ints being the file descriptor.  In 4.1 and later, they
diff --git a/tools/libs/ctrl/Makefile b/tools/libs/ctrl/Makefile
index 5fe0bfad0c..acce8639d3 100644
--- a/tools/libs/ctrl/Makefile
+++ b/tools/libs/ctrl/Makefile
@@ -3,7 +3,7 @@ include $(XEN_ROOT)/tools/Rules.mk
 
 include Makefile.common
 
-LIBHEADER := xenctrl.h xenctrl_compat.h
+LIBHEADER := xenctrl.h xenctrl_compat.h xen-barrier.h
 PKG_CONFIG_FILE := xencontrol.pc
 PKG_CONFIG_NAME := Xencontrol
 
-- 
2.43.0

[PATCH 1/6] xen: add a domain unique id to each domain

2024-10-23 Thread Juergen Gross

Xenstore is referencing domains by their domid, but reuse of a domid
can lead to the situation that Xenstore can't tell whether a domain
with that domid has been deleted and created again without Xenstore
noticing the domain is a new one now.

Add a global domain creation unique id which is updated when creating
a new domain, and store that value in struct domain of the new domain.
The global unique id is initialized with the system time and updates
are done via the xorshift algorithm which is used for pseudo random
number generation, too (see https://en.wikipedia.org/wiki/Xorshift).

Signed-off-by: Juergen Gross 
Reviewed-by: Jan Beulich 
---
V1:
- make unique_id local to function (Jan Beulich)
- add lock (Julien Grall)
- add comment (Julien Grall)
---
 xen/common/domain.c | 20 
 xen/include/xen/sched.h |  3 +++
 2 files changed, 23 insertions(+)

diff --git a/xen/common/domain.c b/xen/common/domain.c
index 92263a4fbd..3948640fb0 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -562,6 +562,25 @@ static void _domain_destroy(struct domain *d)
 free_domain_struct(d);
 }
 
+static uint64_t get_unique_id(void)
+{
+static uint64_t unique_id;
+static DEFINE_SPINLOCK(lock);
+uint64_t x = unique_id ? : NOW();
+
+spin_lock(&lock);
+
+/* Pseudo-randomize id in order to avoid consumers relying on sequence. */
+x ^= x << 13;
+x ^= x >> 7;
+x ^= x << 17;
+unique_id = x;
+
+spin_unlock(&lock);
+
+return x;
+}
+
 static int sanitise_domain_config(struct xen_domctl_createdomain *config)
 {
 bool hvm = config->flags & XEN_DOMCTL_CDF_hvm;
@@ -654,6 +673,7 @@ struct domain *domain_create(domid_t domid,
 
 /* Sort out our idea of is_system_domain(). */
 d->domain_id = domid;
+d->unique_id = get_unique_id();
 
 /* Holding CDF_* internal flags. */
 d->cdf = flags;
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 90666576c2..1dd8a425f9 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -370,6 +370,9 @@ struct domain
 domid_t  domain_id;
 
 unsigned int max_vcpus;
+
+uint64_t unique_id;   /* Unique domain identifier */
+
 struct vcpu**vcpu;
 
 shared_info_t   *shared_info; /* shared data area */
-- 
2.43.0

[PATCH 2/6] xen: add bitmap to indicate per-domain state changes

2024-10-23 Thread Juergen Gross

Add a bitmap with one bit per possible domid indicating the respective
domain has changed its state (created, deleted, dying, crashed,
shutdown).

Registering the VIRQ_DOM_EXC event will result in setting the bits for
all existing domains and resetting all other bits.

Resetting a bit will be done in a future patch.

This information is needed for Xenstore to keep track of all domains.

Signed-off-by: Juergen Gross 
---
 xen/common/domain.c| 21 +
 xen/common/event_channel.c |  2 ++
 xen/include/xen/sched.h|  2 ++
 3 files changed, 25 insertions(+)

diff --git a/xen/common/domain.c b/xen/common/domain.c
index 3948640fb0..61b7899cb8 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -138,6 +138,22 @@ bool __read_mostly vmtrace_available;
 
 bool __read_mostly vpmu_is_available;
 
+static DECLARE_BITMAP(dom_state_changed, DOMID_MASK + 1);
+
+void domain_reset_states(void)
+{
+struct domain *d;
+
+bitmap_zero(dom_state_changed, DOMID_MASK + 1);
+
+rcu_read_lock(&domlist_read_lock);
+
+for_each_domain ( d )
+set_bit(d->domain_id, dom_state_changed);
+
+rcu_read_unlock(&domlist_read_lock);
+}
+
 static void __domain_finalise_shutdown(struct domain *d)
 {
 struct vcpu *v;
@@ -152,6 +168,7 @@ static void __domain_finalise_shutdown(struct domain *d)
 return;
 
 d->is_shut_down = 1;
+set_bit(d->domain_id, dom_state_changed);
 if ( (d->shutdown_code == SHUTDOWN_suspend) && d->suspend_evtchn )
 evtchn_send(d, d->suspend_evtchn);
 else
@@ -832,6 +849,7 @@ struct domain *domain_create(domid_t domid,
  */
 domlist_insert(d);
 
+set_bit(d->domain_id, dom_state_changed);
 memcpy(d->handle, config->handle, sizeof(d->handle));
 
 return d;
@@ -1097,6 +1115,7 @@ int domain_kill(struct domain *d)
 /* Mem event cleanup has to go here because the rings 
  * have to be put before we call put_domain. */
 vm_event_cleanup(d);
+set_bit(d->domain_id, dom_state_changed);
 put_domain(d);
 send_global_virq(VIRQ_DOM_EXC);
 /* fallthrough */
@@ -1286,6 +1305,8 @@ static void cf_check complete_domain_destroy(struct 
rcu_head *head)
 
 xfree(d->vcpu);
 
+set_bit(d->domain_id, dom_state_changed);
+
 _domain_destroy(d);
 
 send_global_virq(VIRQ_DOM_EXC);
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index 8db2ca4ba2..9b87d29968 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -1296,6 +1296,8 @@ long do_event_channel_op(int cmd, 
XEN_GUEST_HANDLE_PARAM(void) arg)
 rc = evtchn_bind_virq(&bind_virq, 0);
 if ( !rc && __copy_to_guest(arg, &bind_virq, 1) )
 rc = -EFAULT; /* Cleaning up here would be a mess! */
+if ( !rc && bind_virq.virq == VIRQ_DOM_EXC )
+domain_reset_states();
 break;
 }
 
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 1dd8a425f9..667863263d 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -800,6 +800,8 @@ void domain_resume(struct domain *d);
 
 int domain_soft_reset(struct domain *d, bool resuming);
 
+void domain_reset_states(void);
+
 int vcpu_start_shutdown_deferral(struct vcpu *v);
 void vcpu_end_shutdown_deferral(struct vcpu *v);
 
-- 
2.43.0

[PATCH 0/6] remove libxenctrl usage from xenstored

2024-10-23 Thread Juergen Gross

Xenstored is using libxenctrl for only one purpose: to get information
about state of domains.

This patch series is removing that dependency by introducing a new
stable interface which can be used by xenstored instead.

There was a RFC series sent out 3 years ago, which I have taken as a
base and by addressing all comments from back then.

The main differences since that RFC series are:

- Instead of introducing an new main hypercall for a stable management
  interface I have just added a new domctl sub-op, as requested in 2021.

- I have added a new library libxenmanage for easy use of the new
  stable hypervisor interface. Main motivation for adding the library
  was the recent attempt to decouple oxenstored from the Xen git tree.
  By using the new library, oxenstored could benefit in the same way as
  xenstored from the new interface: it would be possible to rely on
  stable libraries only.

- Mini-OS has gained some more config options recently, so it was rather
  easy to make xenstore[pvh]-stubdom independent from libxenctrl, too.

- By moving the CPU barrier definitions out of xenctrl.h into a new
  dedicated header xenstored code no longer needs to #include xenctrl.h,
  thus removing any xenctrl reference from xenstored code.

Please note that the last patch can be committed only after the related
Mini-OS patch "config: add support for libxenmanage" has gone in AND
the Mini-OS commit-id has been updated in Config.mk accordingly!

Juergen Gross (6):
  xen: add a domain unique id to each domain
  xen: add bitmap to indicate per-domain state changes
  xen: add new domctl get_changed_domain
  tools/libs: add a new libxenmanage library
  tools: add a dedicated header file for barrier definitions
  tools/xenstored: use new stable interface instead of libxenctrl

 stubdom/Makefile|   8 +-
 stubdom/mini-os.mk  |   1 +
 tools/9pfsd/io.c|   5 +-
 tools/flask/policy/modules/dom0.te  |   2 +-
 tools/include/xen-barrier.h |  51 +
 tools/include/xenctrl.h |  28 +
 tools/include/xenmanage.h   |  98 
 tools/libs/Makefile |   1 +
 tools/libs/ctrl/Makefile|   2 +-
 tools/libs/manage/Makefile  |  10 ++
 tools/libs/manage/Makefile.common   |   1 +
 tools/libs/manage/core.c| 170 
 tools/libs/manage/libxenmanage.map  |   8 ++
 tools/libs/uselibs.mk   |   2 +
 tools/xenstored/Makefile|   2 +-
 tools/xenstored/Makefile.common |   2 +-
 tools/xenstored/core.h  |   1 -
 tools/xenstored/domain.c|  52 -
 tools/xenstored/lu.c|   1 +
 tools/xenstored/lu_daemon.c |   1 +
 xen/common/domain.c |  92 +++
 xen/common/domctl.c |  19 +++-
 xen/common/event_channel.c  |  11 +-
 xen/include/public/domctl.h |  33 ++
 xen/include/xen/event.h |   6 +
 xen/include/xen/sched.h |   7 ++
 xen/include/xsm/dummy.h |   8 ++
 xen/include/xsm/xsm.h   |   6 +
 xen/xsm/dummy.c |   1 +
 xen/xsm/flask/hooks.c   |   7 ++
 xen/xsm/flask/policy/access_vectors |   2 +
 31 files changed, 566 insertions(+), 72 deletions(-)
 create mode 100644 tools/include/xen-barrier.h
 create mode 100644 tools/include/xenmanage.h
 create mode 100644 tools/libs/manage/Makefile
 create mode 100644 tools/libs/manage/Makefile.common
 create mode 100644 tools/libs/manage/core.c
 create mode 100644 tools/libs/manage/libxenmanage.map

-- 
2.43.0

[PATCH 4/6] tools/libs: add a new libxenmanage library

2024-10-23 Thread Juergen Gross

In order to have a stable interface in user land for using stable
domctl and possibly later sysctl interfaces, add a new library
libxenmanage.

Signed-off-by: Juergen Gross 
---
V1:
- new patch
---
 tools/include/xenmanage.h  |  98 +
 tools/libs/Makefile|   1 +
 tools/libs/manage/Makefile |  10 ++
 tools/libs/manage/Makefile.common  |   1 +
 tools/libs/manage/core.c   | 170 +
 tools/libs/manage/libxenmanage.map |   8 ++
 tools/libs/uselibs.mk  |   2 +
 7 files changed, 290 insertions(+)
 create mode 100644 tools/include/xenmanage.h
 create mode 100644 tools/libs/manage/Makefile
 create mode 100644 tools/libs/manage/Makefile.common
 create mode 100644 tools/libs/manage/core.c
 create mode 100644 tools/libs/manage/libxenmanage.map

diff --git a/tools/include/xenmanage.h b/tools/include/xenmanage.h
new file mode 100644
index 00..2e6c3dedaa
--- /dev/null
+++ b/tools/include/xenmanage.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2024 SUSE Software Solutions Germany GmbH
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; If not, see .
+ */
+#ifndef XENMANAGE_H
+#define XENMANAGE_H
+
+#include 
+
+/* Callers who don't care don't need to #include  */
+struct xentoollog_logger;
+
+typedef struct xenmanage_handle xenmanage_handle;
+
+/*
+ * Open libxenmanage.
+ *
+ * Get a handle of the xenmanage library. The handle is required for all
+ * further operations of the library.
+ * Parameters:
+ *   logger: Logging function to use. If NULL logging is done to stderr.
+ *   open_flags: Only 0 supported.
+ * Return value: Handle or NULL if error.
+ */
+xenmanage_handle *xenmanage_open(struct xentoollog_logger *logger,
+ unsigned int open_flags);
+
+/*
+ * Close libxenmanage.
+ *
+ * Return a handle of the xenmanage library.
+ * Parameters:
+ *hdl: Handle obtained by xenmanage_open().
+ * Return value: always 0.
+ */
+int xenmanage_close(xenmanage_handle *hdl);
+
+#define XENMANAGE_GETDOMSTATE_STATE_EXIST 0x0001  /* Domain is existing. */
+#define XENMANAGE_GETDOMSTATE_STATE_SHUTDOWN  0x0002  /* Shutdown finished. */
+#define XENMANAGE_GETDOMSTATE_STATE_DYING 0x0004  /* Domain dying. */
+
+/*
+ * Return state information of an existing domain.
+ *
+ * Returns the domain state and unique id of the given domain.
+ * Parameters:
+ *   hdl:   handle returned by xenmanage_open()
+ *   domid: domain id of the domain to get the information for
+ *   state: where to store the state (XENMANAGE_GETDOMSTATE_STATE_ flags,
+ *  nothing stored if NULL)
+ *   unique_id: where to store the unique id of the domain (nothing stored if
+ *  NULL)
+ * Return value: 0 if information was stored, -1 else (errno is set)
+ */
+int xenmanage_get_domain_info(xenmanage_handle *hdl, unsigned int domid,
+  unsigned int *state, uint64_t *unique_id);
+
+/*
+ * Return information of a domain having changed state recently.
+ *
+ * Returns the domain id, state and unique id of a domain having changed
+ * state (any of the state bits was modified) since the last time information
+ * for that domain was returned by this function. Only usable by callers who
+ * have registered the VIRQ_DOM_EXC event (normally Xenstore).
+ * Parameters:
+ *   hdl:   handle returned by xenmanage_open()
+ *   domid: where to store the domid of the domain (not NULL)
+ *   state: where to store the state (XENMANAGE_GETDOMSTATE_STATE_ flags,
+ *  nothing stored if NULL)
+ *   unique_id: where to store the unique id of the domain (nothing stored if
+ *  NULL)
+ * Return value: 0 if information was stored, -1 else (errno is set)
+ */
+int xenmanage_get_changed_domain(xenmanage_handle *hdl, unsigned int *domid,
+ unsigned int *state, uint64_t *unique_id);
+#endif /* XENMANAGE_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/Makefile b/tools/libs/Makefile
index 1afcd12e2b..d39516c1b3 100644
--- a/tools/libs/Makefile
+++ b/tools/libs/Makefile
@@ -12,6 +12,7 @@ SUBDIRS-y += devicemodel
 SUBDIRS-y += ctrl
 SUBDIRS-y += guest
 SUBDIRS-y += hypfs
+SUBDIRS-y += manage
 SUBDIRS-y += store
 SUBDIRS-y += stat
 SUBDIRS-$(CONFIG_Linux) += vcha

[PATCH 6/6] tools/xenstored: use new stable interface instead of libxenctrl

2024-10-23 Thread Juergen Gross

Replace the current use of the unstable xc_domain_getinfo_single()
interface with the stable domctl XEN_DOMCTL_get_domain_state call
via the new libxenmanage library.

This will remove the last usage of libxenctrl by Xenstore, so update
the library dependencies accordingly.

For now only do a direct replacement without using the functionality
of obtaining information about domains having changed the state.

Signed-off-by: Juergen Gross 
---
V1:
- use library instead of direct hypercall, only replace current
  libxenctrl use case

Please note that this patch can be committed only after the related
Mini-OS patch "config: add support for libxenmanage" has gone in AND
the Mini-OS commit-id has been updated in Config.mk accordingly!
---
 stubdom/Makefile|  8 ++---
 stubdom/mini-os.mk  |  1 +
 tools/xenstored/Makefile|  2 +-
 tools/xenstored/Makefile.common |  2 +-
 tools/xenstored/core.h  |  1 -
 tools/xenstored/domain.c| 52 -
 tools/xenstored/lu.c|  1 +
 tools/xenstored/lu_daemon.c |  1 +
 8 files changed, 28 insertions(+), 40 deletions(-)

diff --git a/stubdom/Makefile b/stubdom/Makefile
index 2a81af28a1..ca800b243c 100644
--- a/stubdom/Makefile
+++ b/stubdom/Makefile
@@ -307,7 +307,7 @@ endif
 # libraries under tools/libs
 ###
 
-STUB_LIBS := toolcore toollog evtchn gnttab call foreignmemory devicemodel 
ctrl guest
+STUB_LIBS := toolcore toollog evtchn gnttab call foreignmemory devicemodel 
ctrl guest manage
 
 LIBDEP_guest := cross-zlib
 
@@ -465,7 +465,7 @@ grub: cross-polarssl grub-upstream $(CROSS_ROOT) 
grub-$(XEN_TARGET_ARCH)-minios-
 # xenstore
 ##
 
-xenstore-minios.gen.cfg: APP_LIBS = gnttab evtchn toollog ctrl
+xenstore-minios.gen.cfg: APP_LIBS = gnttab evtchn toollog manage
 xenstore-minios.gen.cfg: xenstore-minios.cfg Makefile
$(GEN_config) >$@
 
@@ -480,7 +480,7 @@ xenstore: $(CROSS_ROOT) xenstore-minios-config.mk
 # xenstorepvh
 #
 
-xenstorepvh-minios.gen.cfg: APP_LIBS = gnttab evtchn toollog ctrl
+xenstorepvh-minios.gen.cfg: APP_LIBS = gnttab evtchn toollog manage
 xenstorepvh-minios.gen.cfg: xenstorepvh-minios.cfg Makefile
$(GEN_config) >$@
 
@@ -523,7 +523,7 @@ else
 pv-grub-if-enabled:
 endif
 
-XENSTORE_DEPS := libxenevtchn libxengnttab libxenctrl
+XENSTORE_DEPS := libxenevtchn libxengnttab libxenmanage
 
 .PHONY: xenstore-stubdom
 xenstore-stubdom: mini-os-$(XEN_TARGET_ARCH)-xenstore $(XENSTORE_DEPS) xenstore
diff --git a/stubdom/mini-os.mk b/stubdom/mini-os.mk
index 7e4968e026..be32302f9e 100644
--- a/stubdom/mini-os.mk
+++ b/stubdom/mini-os.mk
@@ -13,5 +13,6 @@ GNTTAB_PATH = 
$(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/gnttab
 CALL_PATH = $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/call
 FOREIGNMEMORY_PATH = 
$(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/foreignmemory
 DEVICEMODEL_PATH = $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/devicemodel
+MANAGE_PATH = $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/manage
 CTRL_PATH = $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/ctrl
 GUEST_PATH = $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/guest
diff --git a/tools/xenstored/Makefile b/tools/xenstored/Makefile
index 09adfe1d50..81c42838e0 100644
--- a/tools/xenstored/Makefile
+++ b/tools/xenstored/Makefile
@@ -5,7 +5,7 @@ include Makefile.common
 
 xenstored: LDLIBS += $(LDLIBS_libxenevtchn)
 xenstored: LDLIBS += $(LDLIBS_libxengnttab)
-xenstored: LDLIBS += $(LDLIBS_libxenctrl)
+xenstored: LDLIBS += $(LDLIBS_libxenmanage)
 xenstored: LDLIBS += -lrt
 xenstored: LDLIBS += $(SOCKET_LIBS)
 
diff --git a/tools/xenstored/Makefile.common b/tools/xenstored/Makefile.common
index 27fdb3b49e..271134fcc1 100644
--- a/tools/xenstored/Makefile.common
+++ b/tools/xenstored/Makefile.common
@@ -12,7 +12,7 @@ XENSTORED_OBJS-$(CONFIG_MiniOS) += minios.o lu_minios.o
 # Include configure output (config.h)
 CFLAGS += -include $(XEN_ROOT)/tools/config.h
 CFLAGS += $(CFLAGS_libxenevtchn)
-CFLAGS += $(CFLAGS_libxenctrl)
+CFLAGS += $(CFLAGS_libxenmanage)
 CFLAGS += $(CFLAGS_libxentoolcore)
 
 $(XENSTORED_OBJS-y): CFLAGS += $(CFLAGS_libxengnttab)
diff --git a/tools/xenstored/core.h b/tools/xenstored/core.h
index e58779e88c..632886cecf 100644
--- a/tools/xenstored/core.h
+++ b/tools/xenstored/core.h
@@ -19,7 +19,6 @@
 #ifndef _XENSTORED_CORE_H
 #define _XENSTORED_CORE_H
 
-#include 
 #include 
 
 #include 
diff --git a/tools/xenstored/domain.c b/tools/xenstored/domain.c
index 64c8fd0cc3..c0264d9477 100644
--- a/tools/xenstored/domain.c
+++ b/tools/xenstored/domain.c
@@ -34,14 +34,15 @@
 #include "control.h"
 
 #include 
-#include 
+#include 
+#include 
 #include 
 
 #ifdef __MINIOS__
 #include 
 #endif
 
-static xc_interface **xc_handle;
+static xenmanage_handle *xm_handle;
 xengnttab_handle **xgt_handle;
 static evtchn_port_t virq_port;
 
@@ -619,32 +620,28 @@ static int destroy_domain(void *_domain)
return 0;
 }
 
-static bool get_domain_info(unsigned int domid,

Re: [PATCH 1/6] xen: add a domain unique id to each domain

2024-10-23 Thread Juergen Gross


On 23.10.24 16:08, Alejandro Vallejo wrote:

On Wed Oct 23, 2024 at 2:10 PM BST, Juergen Gross wrote:

Xenstore is referencing domains by their domid, but reuse of a domid
can lead to the situation that Xenstore can't tell whether a domain
with that domid has been deleted and created again without Xenstore
noticing the domain is a new one now.

Add a global domain creation unique id which is updated when creating
a new domain, and store that value in struct domain of the new domain.
The global unique id is initialized with the system time and updates
are done via the xorshift algorithm which is used for pseudo random
number generation, too (see https://en.wikipedia.org/wiki/Xorshift).

Signed-off-by: Juergen Gross 
Reviewed-by: Jan Beulich 
---
V1:
- make unique_id local to function (Jan Beulich)
- add lock (Julien Grall)
- add comment (Julien Grall)
---
  xen/common/domain.c | 20 
  xen/include/xen/sched.h |  3 +++
  2 files changed, 23 insertions(+)

diff --git a/xen/common/domain.c b/xen/common/domain.c
index 92263a4fbd..3948640fb0 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -562,6 +562,25 @@ static void _domain_destroy(struct domain *d)
  free_domain_struct(d);
  }
  
+static uint64_t get_unique_id(void)

+{
+static uint64_t unique_id;
+static DEFINE_SPINLOCK(lock);
+uint64_t x = unique_id ? : NOW();
+
+spin_lock(&lock);
+
+/* Pseudo-randomize id in order to avoid consumers relying on sequence. */
+x ^= x << 13;
+x ^= x >> 7;
+x ^= x << 17;
+unique_id = x;
+
+spin_unlock(&lock);
+
+return x;
+}
+
  static int sanitise_domain_config(struct xen_domctl_createdomain *config)
  {
  bool hvm = config->flags & XEN_DOMCTL_CDF_hvm;
@@ -654,6 +673,7 @@ struct domain *domain_create(domid_t domid,
  
  /* Sort out our idea of is_system_domain(). */

  d->domain_id = domid;
+d->unique_id = get_unique_id();
  
  /* Holding CDF_* internal flags. */

  d->cdf = flags;
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 90666576c2..1dd8a425f9 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -370,6 +370,9 @@ struct domain
  domid_t  domain_id;
  
  unsigned int max_vcpus;

+
+uint64_t unique_id;   /* Unique domain identifier */
+


Why not xen_domain_handle_t handle, defined later on? That's meant to be a
UUID, so this feels like a duplicate field.


It is an input value for create domain. So there is absolutely no
guarantee that it is unique.

It can especially be specified in the xl config file, so you could have
a host running multiple guests all with the same UUID (even if this might
be rejected by libxl, destroying a guest and then recreating it with the
same UUID is possible, but Xenstore would need to see different unique Ids
for those 2 guests).


Juergen


OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature

Re: [PATCH v2 07/10] xen/arm: ffa: Transmit RXTX buffers to the SPMC

2024-10-23 Thread Jens Wiklander

Hi Bertrand,

On Wed, Oct 16, 2024 at 10:32 AM Bertrand Marquis
 wrote:
>
> When an RXTX buffer is mapped by a VM transmit it to the SPMC when it
> supports RX_ACQUIRE.
> As a consequence of that, we must acquire the RX buffer of a VM from the
> SPMC when we want to use it:
> - create a generic acquire and release function to get the rx buffer of
>   a VM which gets it from the SPMC when supported
> - rename the rx_acquire to hyp_rx_acquire to remove confusion
> - rework the rx_lock to only lock access to rx_is_free and only allow
>   usage of the rx buffer to one who managed to acquire it, thus removing
>   the trylock and returning busy if rx_is_free is false
>
> As part of this change move some structure definition to ffa_private
> from ffa_shm as those are need for the MAP call with the SPMC.
>
> Signed-off-by: Bertrand Marquis 
> ---
> Changes in v2:
> - unmap VM rxtx buffer in SPMC on unmap call or on VM destroy
> - rework the unmap call to the SPMC to properly pass the VM ID
> ---
>  xen/arch/arm/tee/ffa.c  |   2 +-
>  xen/arch/arm/tee/ffa_partinfo.c |  29 ++
>  xen/arch/arm/tee/ffa_private.h  |  22 -
>  xen/arch/arm/tee/ffa_rxtx.c | 158 ++--
>  xen/arch/arm/tee/ffa_shm.c  |  15 ---
>  5 files changed, 161 insertions(+), 65 deletions(-)
>
> diff --git a/xen/arch/arm/tee/ffa.c b/xen/arch/arm/tee/ffa.c
> index a292003ca9fe..40ea5398fa21 100644
> --- a/xen/arch/arm/tee/ffa.c
> +++ b/xen/arch/arm/tee/ffa.c
> @@ -347,7 +347,7 @@ static bool ffa_handle_call(struct cpu_user_regs *regs)
>  ffa_handle_partition_info_get(regs);
>  return true;
>  case FFA_RX_RELEASE:
> -e = ffa_handle_rx_release();
> +e = ffa_rx_release(d);
>  break;
>  case FFA_MSG_SEND_DIRECT_REQ_32:
>  case FFA_MSG_SEND_DIRECT_REQ_64:
> diff --git a/xen/arch/arm/tee/ffa_partinfo.c b/xen/arch/arm/tee/ffa_partinfo.c
> index 3cf801523296..fde187dba4e5 100644
> --- a/xen/arch/arm/tee/ffa_partinfo.c
> +++ b/xen/arch/arm/tee/ffa_partinfo.c
> @@ -121,11 +121,9 @@ void ffa_handle_partition_info_get(struct cpu_user_regs 
> *regs)
>  goto out;
>  }
>
> -if ( !spin_trylock(&ctx->rx_lock) )
> -{
> -ret = FFA_RET_BUSY;
> +ret = ffa_rx_acquire(d);
> +if ( ret != FFA_RET_OK )
>  goto out;
> -}
>
>  dst_buf = ctx->rx;
>
> @@ -135,22 +133,16 @@ void ffa_handle_partition_info_get(struct cpu_user_regs 
> *regs)
>  goto out_rx_release;
>  }
>
> -if ( !ctx->page_count || !ctx->rx_is_free )
> -{
> -ret = FFA_RET_DENIED;
> -goto out_rx_release;
> -}
> -
>  spin_lock(&ffa_rx_buffer_lock);
>
>  ret = ffa_partition_info_get(uuid, 0, &ffa_sp_count, &src_size);
>
>  if ( ret )
> -goto out_rx_buf_unlock;
> +goto out_rx_hyp_unlock;
>
>  /*
>   * ffa_partition_info_get() succeeded so we now own the RX buffer we
> - * share with the SPMC. We must give it back using ffa_rx_release()
> + * share with the SPMC. We must give it back using ffa_hyp_rx_release()
>   * once we've copied the content.
>   */
>
> @@ -193,15 +185,13 @@ void ffa_handle_partition_info_get(struct cpu_user_regs 
> *regs)
>  }
>  }
>
> -ctx->rx_is_free = false;
> -
>  out_rx_hyp_release:
> -ffa_rx_release();
> -out_rx_buf_unlock:
> +ffa_hyp_rx_release();
> +out_rx_hyp_unlock:
>  spin_unlock(&ffa_rx_buffer_lock);
>  out_rx_release:
> -spin_unlock(&ctx->rx_lock);
> -
> +if ( ret != FFA_RET_OK )
> +ffa_rx_release(d);

Please comment on why ffa_rx_release() must only be called on failure.

>  out:
>  if ( ret )
>  ffa_set_regs_error(regs, ret);
> @@ -368,8 +358,7 @@ bool ffa_partinfo_init(void)
>  ret = init_subscribers(count, fpi_size);
>
>  out:
> -ffa_rx_release();
> -
> +ffa_hyp_rx_release();
>  return ret;
>  }
>
> diff --git a/xen/arch/arm/tee/ffa_private.h b/xen/arch/arm/tee/ffa_private.h
> index afe69b43dbef..9adfe687c3c9 100644
> --- a/xen/arch/arm/tee/ffa_private.h
> +++ b/xen/arch/arm/tee/ffa_private.h
> @@ -265,6 +265,21 @@
>  #define FFA_ABI_BITNUM(id)((FFA_ABI_ID(id) - FFA_ABI_MIN) << 1 | \
> FFA_ABI_CONV(id))
>
> +/* Constituent memory region descriptor */
> +struct ffa_address_range {
> +uint64_t address;
> +uint32_t page_count;
> +uint32_t reserved;
> +};
> +
> +/* Composite memory region descriptor */
> +struct ffa_mem_region {
> +uint32_t total_page_count;
> +uint32_t address_range_count;
> +uint64_t reserved;
> +struct ffa_address_range address_range_array[];
> +};
> +
>  struct ffa_ctx_notif {
>  bool enabled;
>
> @@ -292,7 +307,7 @@ struct ffa_ctx {
>  struct ffa_ctx_notif notif;
>  /*
>   * tx_lock is used to serialize access to tx
> - * rx_lock is used to serialize access to rx
> + * rx_lock is used to serialize access to rx_is_free
>   * lock is used for the rest in this struct
>

[PATCH 4/3] x86/boot: Remove the mbi_p parameter from __start_xen()

2024-10-23 Thread Andrew Cooper

The use of physical addresses in __start_xen() has proved to be fertile soure
of bugs.

The MB1/2 path stashes the MBI pointer in multiboot_ptr (a setup.c variable
even), then re-loads it immediately before calling __start_xen().  For this,
we can just drop the function parameter and read multiboot_ptr in the one
place it's used.

The EFI path also passes this parameter into __start_xen().  Have the EFI path
set up multiboot_ptr too, and move the explanation of phyiscal-mode pointers.

No functional change.

Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Roger Pau Monné 
CC: Marek Marczykowski-Górecki 
CC: Daniel P. Smith 

https://gitlab.com/xen-project/people/andyhhp/xen/-/pipelines/1509072662
---
 xen/arch/x86/boot/x86_64.S   |  2 --
 xen/arch/x86/efi/efi-boot.h  |  9 +++--
 xen/arch/x86/include/asm/setup.h |  1 +
 xen/arch/x86/setup.c | 14 +-
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S
index 04bb62ae8680..26b9d1c2df9a 100644
--- a/xen/arch/x86/boot/x86_64.S
+++ b/xen/arch/x86/boot/x86_64.S
@@ -77,8 +77,6 @@ ENTRY(__high_start)
 tailcall start_secondary
 
 .L_bsp:
-/* Pass off the Multiboot info structure to C land (if applicable). */
-mov multiboot_ptr(%rip),%edi
 tailcall __start_xen
 
 .section .data.page_aligned, "aw", @progbits
diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
index 94f34433640f..3b26f0b0f500 100644
--- a/xen/arch/x86/efi/efi-boot.h
+++ b/xen/arch/x86/efi/efi-boot.h
@@ -248,6 +248,12 @@ static void __init noreturn efi_arch_post_exit_boot(void)
 efi_arch_relocate_image(__XEN_VIRT_START - xen_phys_start);
 memcpy((void *)trampoline_phys, trampoline_start, cfg.size);
 
+/*
+ * We're in physical mode right now (i.e. identity map), so a regular
+ * pointer is also a phyiscal address to the rest of Xen.
+ */
+multiboot_ptr = (unsigned long)&mbi;
+
 /* Set system registers and transfer control. */
 asm volatile("pushq $0\n\tpopfq");
 rdmsrl(MSR_EFER, efer);
@@ -279,8 +285,7 @@ static void __init noreturn efi_arch_post_exit_boot(void)
  [cr4] "+&r" (cr4)
: [cr3] "r" (idle_pg_table),
  [cs] "i" (__HYPERVISOR_CS),
- [ds] "r" (__HYPERVISOR_DS),
- "D" (&mbi)
+ [ds] "r" (__HYPERVISOR_DS)
: "memory" );
 unreachable();
 }
diff --git a/xen/arch/x86/include/asm/setup.h b/xen/arch/x86/include/asm/setup.h
index 3d189521189d..811855e57478 100644
--- a/xen/arch/x86/include/asm/setup.h
+++ b/xen/arch/x86/include/asm/setup.h
@@ -14,6 +14,7 @@ extern unsigned long xenheap_initial_phys_start;
 extern uint64_t boot_tsc_stamp;
 
 extern void *stack_start;
+extern unsigned int multiboot_ptr;
 
 void early_cpu_init(bool verbose);
 void early_time_init(void);
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index c5b37bd2112e..c9129c21787b 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -157,8 +157,8 @@ char asmlinkage __section(".init.bss.stack_aligned") 
__aligned(STACK_SIZE)
 /* Used by the BSP/AP paths to find the higher half stack mapping to use. */
 void *stack_start = cpu0_stack + STACK_SIZE - sizeof(struct cpu_info);
 
-/* Used by the boot asm to stash the relocated multiboot info pointer. */
-unsigned int asmlinkage __initdata multiboot_ptr;
+/* Used by the boot asm and EFI to stash the multiboot_info paddr. */
+unsigned int __initdata multiboot_ptr;
 
 struct cpuinfo_x86 __read_mostly boot_cpu_data = { 0, 0, 0, 0, -1 };
 
@@ -1014,7 +1014,7 @@ static struct domain *__init create_dom0(const module_t 
*image,
 /* How much of the directmap is prebuilt at compile time. */
 #define PREBUILT_MAP_LIMIT (1 << L2_PAGETABLE_SHIFT)
 
-void asmlinkage __init noreturn __start_xen(unsigned long mbi_p)
+void asmlinkage __init noreturn __start_xen(void)
 {
 const char *memmap_type = NULL;
 char *kextra;
@@ -1059,7 +1059,6 @@ void asmlinkage __init noreturn __start_xen(unsigned long 
mbi_p)
 
 if ( pvh_boot )
 {
-ASSERT(mbi_p == 0);
 pvh_init(&mbi, &mod);
 /*
  * mbi and mod are regular pointers to .initdata.  These remain valid
@@ -1068,7 +1067,7 @@ void asmlinkage __init noreturn __start_xen(unsigned long 
mbi_p)
 }
 else
 {
-mbi = __va(mbi_p);
+mbi = __va(multiboot_ptr);
 mod = __va(mbi->mods_addr);
 
 /*
@@ -1078,11 +1077,8 @@ void asmlinkage __init noreturn __start_xen(unsigned 
long mbi_p)
  * For EFI, these are directmap pointers into the Xen image.  They do
  * not remain valid across move_xen().  EFI boot only functions
  * because a non-zero xen_phys_start inhibits move_xen().
- *
- * Don't be fooled by efi_arch_post_exit_boot() passing "D" (&mbi).
- * This is a EFI physical-mode (i.

Re: [PATCH 02/13] ALSA: hda_intel: Use always-managed version of pcim_intx()

2024-10-23 Thread Takashi Iwai

On Wed, 23 Oct 2024 15:50:09 +0200,
Philipp Stanner wrote:
> 
> On Tue, 2024-10-22 at 16:08 +0200, Takashi Iwai wrote:
> > On Tue, 15 Oct 2024 20:51:12 +0200,
> > Philipp Stanner wrote:
> > > 
> > > pci_intx() is a hybrid function which can sometimes be managed
> > > through
> > > devres. To remove this hybrid nature from pci_intx(), it is
> > > necessary to
> > > port users to either an always-managed or a never-managed version.
> > > 
> > > hda_intel enables its PCI-Device with pcim_enable_device(). Thus,
> > > it needs
> > > the always-managed version.
> > > 
> > > Replace pci_intx() with pcim_intx().
> > > 
> > > Signed-off-by: Philipp Stanner 
> > > ---
> > >  sound/pci/hda/hda_intel.c | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > 
> > > diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
> > > index b4540c5cd2a6..b44ca7b6e54f 100644
> > > --- a/sound/pci/hda/hda_intel.c
> > > +++ b/sound/pci/hda/hda_intel.c
> > > @@ -786,7 +786,7 @@ static int azx_acquire_irq(struct azx *chip,
> > > int do_disconnect)
> > >   }
> > >   bus->irq = chip->pci->irq;
> > >   chip->card->sync_irq = bus->irq;
> > > - pci_intx(chip->pci, !chip->msi);
> > > + pcim_intx(chip->pci, !chip->msi);
> > >   return 0;
> > >  }
> > >  
> > 
> > Hm, it's OK-ish to do this as it's practically same as what
> > pci_intx()
> > currently does.  But, the current code can be a bit inconsistent
> > about
> > the original intx value.  pcim_intx() always stores !enable to
> > res->orig_intx unconditionally, and it means that the orig_intx value
> > gets overridden at each time pcim_intx() gets called.
> 
> Yes.
> 
> > 
> > Meanwhile, HD-audio driver does release and re-acquire the interrupt
> > after disabling MSI when something goes wrong, and pci_intx() call
> > above is a part of that procedure.  So, it can rewrite the
> > res->orig_intx to another value by retry without MSI.  And after the
> > driver removal, it'll lead to another state.
> 
> I'm not sure that I understand this paragraph completely. Still, could
> a solution for the driver on the long-term just be to use pci_intx()?

pci_intx() misses the restore of the original value, so it's no
long-term solution, either.

What I meant is that pcim_intx() blindly assumes the negative of the
passed argument as the original state, which isn't always true.  e.g.
when the driver calls it twice with different values, a wrong value
may be remembered.

That said, I thought of something like below.


thanks,

Takashi

-- 8< --
--- a/drivers/pci/devres.c
+++ b/drivers/pci/devres.c
@@ -438,8 +438,17 @@ static void pcim_intx_restore(struct device *dev, void 
*data)
__pcim_intx(pdev, res->orig_intx);
 }
 
-static struct pcim_intx_devres *get_or_create_intx_devres(struct device *dev)
+static void save_orig_intx(struct pci_dev *pdev)
 {
+   u16 pci_command;
+
+   pci_read_config_word(pdev, PCI_COMMAND, &pci_command);
+   res->orig_intx = !(pci_command & PCI_COMMAND_INTX_DISABLE);
+}
+
+static struct pcim_intx_devres *get_or_create_intx_devres(struct pci_dev *pdev)
+{
+   struct device *dev = &pdev->dev;
struct pcim_intx_devres *res;
 
res = devres_find(dev, pcim_intx_restore, NULL, NULL);
@@ -447,8 +456,10 @@ static struct pcim_intx_devres 
*get_or_create_intx_devres(struct device *dev)
return res;
 
res = devres_alloc(pcim_intx_restore, sizeof(*res), GFP_KERNEL);
-   if (res)
+   if (res) {
+   save_orig_intx(pdev);
devres_add(dev, res);
+   }
 
return res;
 }
@@ -467,11 +478,10 @@ int pcim_intx(struct pci_dev *pdev, int enable)
 {
struct pcim_intx_devres *res;
 
-   res = get_or_create_intx_devres(&pdev->dev);
+   res = get_or_create_intx_devres(pdev);
if (!res)
return -ENOMEM;
 
-   res->orig_intx = !enable;
__pcim_intx(pdev, enable);
 
return 0;

Goodbye to our Xen Project Colo

2024-10-23 Thread Kelly Choi

Hi all,

A while back, the Xen Project was notified by the data centre facility
hosting the physical hardware for OSSTest and some of the GitLab runners,
were due to be shut down.

Since then we have had extensive discussions and explored multiple options
to mitigate the impact of this on our project.

At the end of July, we sent out a communication explaining our decision to
best focus our efforts on GitLab, which offers a more modern and
sustainable infrastructure.  As we prepare to leave the colo by the end of
the month, the remainder servers we are saving will be shipped to AMD
to help host our future Xen Project operations.

These are listed below:

   - 1x Zen3: verdesse 1U AMD EPYC 7543P 32-Core Milan (Zen 3) 2021
   - 1x ThunderX: rochester 1U Cavium ThunderX-88XX ARMv8.1 2014
   - 1x Zen3: verdesse 1U AMD EPYC 7543P 32-Core Milan (Zen 3) 2021
   - 1x ThunderX: rochester 1U Cavium ThunderX-88XX ARMv8.1 2014
   - 1x serial port concentrator
   - 1x PDU
   - 2x Zen2 espadeiro: espadeiro 1U AMD EPYC 7502P 32-Core Rome (Zen 2)
   2019
   - 2x Zen2 rubintos: rubintos 2U AMD Ryzen 9 3900X 12-Core Matisse (Zen
   2) 2019

As we say goodbye to our colo, I'd like to reiterate that OSSTest has been
instrumental to the Xen Project for many years, and we'd like to extend our
gratitude to everyone who has contributed.

Thank you again to our community for your continued support

Kelly Choi
Community Manager
Xen Project

Re: [MINI-OS PATCH] config: add support for libxenmanage

2024-10-23 Thread Samuel Thibault

Juergen Gross, le mer. 23 oct. 2024 15:10:48 +0200, a ecrit:
> Add CONFIG_LIBXENMANAGE support.
> 
> Signed-off-by: Juergen Gross 
> ---
> Please note that this patch should be committed only after the related
> Xen patch "tools/libs: add a new libxenmanage library" has been Acked,
> as otherwise it should either be dropped (in case the approach of
> adding a new library is being rejected) or changed (in case the name
> of the new library needs to be modified)!

For the positive case,

Reviewed-by: Samuel Thibault 

> 
>  Config.mk | 2 ++
>  Makefile  | 4 
>  2 files changed, 6 insertions(+)
> 
> diff --git a/Config.mk b/Config.mk
> index f59a0cf4..e493533a 100644
> --- a/Config.mk
> +++ b/Config.mk
> @@ -46,6 +46,7 @@ GNTTAB_PATH ?= 
> $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/gnttab
>  CALL_PATH ?= $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/call
>  FOREIGNMEMORY_PATH ?= 
> $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/foreignmemory
>  DEVICEMODEL_PATH ?= 
> $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/devicemodel
> +MANAGE_PATH ?= $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/manage
>  CTRL_PATH ?= $(XEN_ROOT)/stubdom/libxc-$(MINIOS_TARGET_ARCH)
>  GUEST_PATH ?= $(XEN_ROOT)/stubdom/libxc-$(MINIOS_TARGET_ARCH)
>  else
> @@ -202,6 +203,7 @@ CONFIG-n += CONFIG_LIBXENGNTTAB
>  CONFIG-n += CONFIG_LIBXENGUEST
>  CONFIG-n += CONFIG_LIBXENTOOLCORE
>  CONFIG-n += CONFIG_LIBXENTOOLLOG
> +CONFIG-n += CONFIG_LIBXENMANAGE
>  # Setting CONFIG_USE_XEN_CONSOLE copies all print output to the Xen emergency
>  # console apart of standard dom0 handled console.
>  CONFIG-n += CONFIG_USE_XEN_CONSOLE
> diff --git a/Makefile b/Makefile
> index ffa8d1a8..d094858a 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -159,6 +159,10 @@ ifeq ($(CONFIG_LIBXENCTRL),y)
>  APP_LDLIBS += -L$(CTRL_PATH) -whole-archive -lxenctrl -no-whole-archive
>  LIBS += $(CTRL_PATH)/libxenctrl.a
>  endif
> +ifeq ($(CONFIG_LIBXENMANAGE),y)
> +APP_LDLIBS += -L$(MANAGE_PATH) -whole-archive -lxenmanage -no-whole-archive
> +LIBS += $(MANAGE_PATH)/libxenmanage.a
> +endif
>  APP_LDLIBS += -lpci
>  APP_LDLIBS += -lz
>  APP_LDLIBS += -lm
> -- 
> 2.43.0
> 

-- 
Samuel
 tohi.cybercable.fr (212.198.0.3) si une personne se reconnait derriere
 cette adresse que ce soit un pirate ou une victime qu'il se manifeste,
 cette personne pourrait bien etre un petit malin
 -+- Fred in NPC : Maman, y a le routeur qui veut me hacker -+-

Re: [PATCH 3/3] x86/boot: Fix XSM module handling during PVH boot

2024-10-23 Thread Roger Pau Monné

On Wed, Oct 23, 2024 at 11:57:56AM +0100, Andrew Cooper wrote:
> From: "Daniel P. Smith" 
> 
> As detailed in commit 0fe607b2a144 ("x86/boot: Fix PVH boot during boot_info
> transition period"), the use of __va(mbi->mods_addr) constitutes a
> use-after-free on the PVH boot path.
> 
> This pattern has been in use since before PVH support was added.  This has
> most likely gone unnoticed because no-one's tried using a detached Flask
> policy in a PVH VM before.
> 
> Plumb the boot_info pointer down, replacing module_map and mbi.  Importantly,
> bi->mods[].mod is a safe way to access the module list during PVH boot.
> 
> As this is the final non-bi use of mbi in __start_xen(), make the pointer
> unusable once bi has been established, to prevent new uses creeping back in.
> This is a stopgap until mbi can be fully removed.
> 
> Signed-off-by: Daniel P. Smith 
> Signed-off-by: Andrew Cooper 
> ---
> CC: Jan Beulich 
> CC: Roger Pau Monné 
> CC: Daniel P. Smith 
> 
> Refectored/extracted from the hyperlaunch series.
> 
> There's no good Fixes tag for this, because it can't reasonably be "introduce
> PVH", nor can the fix as-is reasonably be backported.  If we want to fix the
> bug in older trees, we need to plumb the mod pointer down alongside mbi.
> ---
>  xen/arch/x86/setup.c  |  5 -
>  xen/include/xsm/xsm.h | 12 +---
>  xen/xsm/xsm_core.c|  7 +++
>  xen/xsm/xsm_policy.c  | 16 +++-
>  4 files changed, 19 insertions(+), 21 deletions(-)
> 
> diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
> index c75b8f15fa5d..8974b0c6ede6 100644
> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -1088,6 +1088,9 @@ void asmlinkage __init noreturn __start_xen(unsigned 
> long mbi_p)
>  bi = multiboot_fill_boot_info(mbi, mod);
>  bi->module_map = module_map; /* Temporary */
>  
> +/* Use bi-> instead */
> +#define mbi DO_NOT_USE
> +
>  /* Parse the command-line options. */
>  if ( (kextra = strstr(bi->cmdline, " -- ")) != NULL )
>  {
> @@ -1862,7 +1865,7 @@ void asmlinkage __init noreturn __start_xen(unsigned 
> long mbi_p)
>  mmio_ro_ranges = rangeset_new(NULL, "r/o mmio ranges",
>RANGESETF_prettyprint_hex);
>  
> -xsm_multiboot_init(module_map, mbi);
> +xsm_multiboot_init(bi);
>  
>  /*
>   * IOMMU-related ACPI table parsing may require some of the system 
> domains
> diff --git a/xen/include/xsm/xsm.h b/xen/include/xsm/xsm.h
> index 627c0d2731af..4dbff9d866e0 100644
> --- a/xen/include/xsm/xsm.h
> +++ b/xen/include/xsm/xsm.h
> @@ -17,7 +17,6 @@
>  
>  #include 
>  #include 
> -#include 
>  
>  /* policy magic number (defined by XSM_MAGIC) */
>  typedef uint32_t xsm_magic_t;
> @@ -778,11 +777,10 @@ static inline int xsm_argo_send(const struct domain *d, 
> const struct domain *t)
>  #endif /* XSM_NO_WRAPPERS */
>  
>  #ifdef CONFIG_MULTIBOOT
> -int xsm_multiboot_init(
> -unsigned long *module_map, const multiboot_info_t *mbi);
> +struct boot_info;
> +int xsm_multiboot_init(struct boot_info *bi);

This one seems to also drop a const?

This also propagates to the functions below.

With that:

Acked-by: Roger Pau Monné 

Thanks, Roger.

Re: [PATCH v3 5/6] xen/arm: mpu: Enable MPU

2024-10-23 Thread Julien Grall





On 23/10/2024 17:13, Julien Grall wrote:



On 23/10/2024 17:06, Ayan Kumar Halder wrote:

Hi Luca/Julien,

On 22/10/2024 17:31, Luca Fancellu wrote:

Hi Julien,


On 22 Oct 2024, at 14:13, Julien Grall  wrote:



On 22/10/2024 10:56, Luca Fancellu wrote:

On 22 Oct 2024, at 10:47, Julien Grall  wrote:

Hi Luca,

On 22/10/2024 10:41, Luca Fancellu wrote:

On 21 Oct 2024, at 17:27, Julien Grall  wrote:
2) dsb+isb barrier after enabling the MPU, since we are enabling 
the MPU but also because we are disabling the background region
TBH, I don't understand this one. Why would disabling the 
background region requires a dsb + isb? Do you have any pointer in 
the Armv8-R specification?
I’m afraid this is only my deduction, Section C1.4 of the Arm® 
Architecture Reference Manual Supplement Armv8, for R-profile 
AArch64 architecture,
(DDI 0600B.a) explains what is the background region, it says it 
implements physical address range(s), so when we disable it, we 
would like any data
access to complete before changing this implementation defined 
range with the ranges defined by us tweaking PRBAR/PRLAR, am I right?

My mental model for the ordering is similar to a TLB flush which is:
   1/ dsb nsh
   2/ tlbi
   3/ dsb nsh
   4/ isb

Enabling the MPU is effectively 2. AFAIK, 1 is only necessary to 
ensure the update to the page-tables. But it is not a requirement to 
ensure any data access are completed (otherwise, we would have 
needed a dsb sy because we don't know how far the access are going...).
Uhm… I’m not sure we are on the same page, probably I explained that 
wrongly, so my point is that:


FUNC_LOCAL(enable_mpu)
 mrs   x0, SCTLR_EL2
 bic   x0, x0, #SCTLR_ELx_BR   /* Disable Background region */
 orr   x0, x0, #SCTLR_Axx_ELx_M    /* Enable MPU */
 orr   x0, x0, #SCTLR_Axx_ELx_C    /* Enable D-cache */
 orr   x0, x0, #SCTLR_Axx_ELx_WXN  /* Enable WXN */
 dsb   sy
 ^— This data barrier is needed in order to complete any data 
access, which guarantees that all explicit memory accesses before
    this instruction complete, so we can safely turn ON the 
MPU and disable the background region.


I think


Sorry I fat fingered and pressed send too early. I think this is the 
part I disagree with. All explicit accesses don't need to be complete 
(in the sense observed by everyone in the system). They only need to 
have gone through the permissions check.


We also need the change in the MPU registers to be visible to the MPU.

For instance, for the MMU, we only need to use "dsb nsh" because the 
walker is within the non-shareable domain. Personally, I think "sy" is a 
bit excessive here, but as this is boot code, that's ok.


Cheers,

--
Julien Grall

Re: [PATCH 2/3] x86/boot: Fix microcode module handling during PVH boot

2024-10-23 Thread Roger Pau Monné

On Wed, Oct 23, 2024 at 11:57:55AM +0100, Andrew Cooper wrote:
> From: "Daniel P. Smith" 
> 
> As detailed in commit 0fe607b2a144 ("x86/boot: Fix PVH boot during boot_info
> transition period"), the use of __va(mbi->mods_addr) constitutes a
> use-after-free on the PVH boot path.
> 
> This pattern has been in use since before PVH support was added.  Inside a PVH
> VM, it will go unnoticed as long as the microcode container parser doesn't
> choke on the random data it finds.
> 
> The use within early_microcode_init() happens to be safe because it's prior to
> move_xen().  microcode_init_cache() is after move_xen(), and therefore unsafe.
> 
> Plumb the boot_info pointer down, replacing module_map and mbi.  Importantly,
> bi->mods[].mod is a safe way to access the module list during PVH boot.
> 
> Note: microcode_scan_module() is still bogusly stashing a bootstrap_map()'d
>   pointer in ucode_blob.data, which constitutes a different
>   use-after-free, and only works in general because of a second bug.  This
>   is unrelated to PVH, and needs untangling differently.
> 
> Signed-off-by: Daniel P. Smith 
> Signed-off-by: Andrew Cooper 
> ---
> CC: Jan Beulich 
> CC: Roger Pau Monné 
> CC: Daniel P. Smith 
> 
> Refectored/extracted from the hyperlaunch series.
> 
> There's no good Fixes tag for this, because it can't reasonably be "introduce
> PVH", nor can the fix as-is reasonably be backported.  If we want to fix the
> bug in older trees, we need to plumb the mod pointer down alongside mbi.
> ---
>  xen/arch/x86/cpu/microcode/core.c| 40 +++-
>  xen/arch/x86/include/asm/microcode.h |  8 +++---
>  xen/arch/x86/setup.c |  4 +--
>  3 files changed, 22 insertions(+), 30 deletions(-)
> 
> diff --git a/xen/arch/x86/cpu/microcode/core.c 
> b/xen/arch/x86/cpu/microcode/core.c
> index 8564e4d2c94c..1d58cb0f3bc1 100644
> --- a/xen/arch/x86/cpu/microcode/core.c
> +++ b/xen/arch/x86/cpu/microcode/core.c
> @@ -35,6 +35,7 @@
>  #include 
>  
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -152,11 +153,8 @@ static int __init cf_check parse_ucode(const char *s)
>  }
>  custom_param("ucode", parse_ucode);
>  
> -static void __init microcode_scan_module(
> -unsigned long *module_map,
> -const multiboot_info_t *mbi)
> +static void __init microcode_scan_module(struct boot_info *bi)

Sorry to be pedantic, can't you keep bi as const?

>  {
> -module_t *mod = (module_t *)__va(mbi->mods_addr);
>  uint64_t *_blob_start;
>  unsigned long _blob_size;
>  struct cpio_data cd;
> @@ -178,13 +176,13 @@ static void __init microcode_scan_module(
>  /*
>   * Try all modules and see whichever could be the microcode blob.
>   */
> -for ( i = 1 /* Ignore dom0 kernel */; i < mbi->mods_count; i++ )
> +for ( i = 1 /* Ignore dom0 kernel */; i < bi->nr_modules; i++ )
>  {
> -if ( !test_bit(i, module_map) )
> +if ( !test_bit(i, bi->module_map) )
>  continue;
>  
> -_blob_start = bootstrap_map(&mod[i]);
> -_blob_size = mod[i].mod_end;
> +_blob_start = bootstrap_map(bi->mods[i].mod);
> +_blob_size = bi->mods[i].mod->mod_end;
>  if ( !_blob_start )
>  {
>  printk("Could not map multiboot module #%d (size: %ld)\n",
> @@ -204,21 +202,17 @@ static void __init microcode_scan_module(
>  }
>  }
>  
> -static void __init microcode_grab_module(
> -unsigned long *module_map,
> -const multiboot_info_t *mbi)
> +static void __init microcode_grab_module(struct boot_info *bi)

Same here re bi being const?

There are some further below, that I think all should keep the const
keywoard?

Otherwise looks good:

Acked-by: Roger Pau Monné 

Thanks, Roger.

[PATCH v2 2/3] xen/riscv: initialize the VMAP_DEFAULT virtual range

2024-10-23 Thread Oleksii Kurochko

Call vm_init() to initialize the VMAP_DEFAULT virtual range.

To support this, introduce the populate_pt_range() and
arch_vmap_virt_end() functions, which are used by
vm_init()->vm_init_type().

Signed-off-by: Oleksii Kurochko 
Acked-by: Jan Beulich 
---
Change in V2:
 - Acked-by: Jan Beulich 
---
 xen/arch/riscv/mm.c| 11 +--
 xen/arch/riscv/pt.c|  6 ++
 xen/arch/riscv/setup.c |  3 +++
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/xen/arch/riscv/mm.c b/xen/arch/riscv/mm.c
index 5be5a7b52a..262cec811e 100644
--- a/xen/arch/riscv/mm.c
+++ b/xen/arch/riscv/mm.c
@@ -352,12 +352,6 @@ void arch_dump_shared_mem_info(void)
 BUG_ON("unimplemented");
 }
 
-int populate_pt_range(unsigned long virt, unsigned long nr_mfns)
-{
-BUG_ON("unimplemented");
-return -1;
-}
-
 int xenmem_add_to_physmap_one(struct domain *d, unsigned int space,
   union add_to_physmap_extra extra,
   unsigned long idx, gfn_t gfn)
@@ -544,3 +538,8 @@ void __init setup_mm(void)
 #else /* CONFIG_RISCV_32 */
 #error setup_mm(), setup_{directmap,frametable}_mapping() should be 
implemented for RV_32
 #endif
+
+void *__init arch_vmap_virt_end(void)
+{
+return (void *)(VMAP_VIRT_START + VMAP_VIRT_SIZE);
+}
diff --git a/xen/arch/riscv/pt.c b/xen/arch/riscv/pt.c
index cc5e2d3266..d62aceb36c 100644
--- a/xen/arch/riscv/pt.c
+++ b/xen/arch/riscv/pt.c
@@ -1,6 +1,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -419,3 +420,8 @@ int map_pages_to_xen(unsigned long virt,
 
 return pt_update(virt, mfn, nr_mfns, flags);
 }
+
+int __init populate_pt_range(unsigned long virt, unsigned long nr_mfns)
+{
+return pt_update(virt, INVALID_MFN, nr_mfns, PTE_POPULATE);
+}
diff --git a/xen/arch/riscv/setup.c b/xen/arch/riscv/setup.c
index 2887a18c0c..3652cb056d 100644
--- a/xen/arch/riscv/setup.c
+++ b/xen/arch/riscv/setup.c
@@ -7,6 +7,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -62,6 +63,8 @@ void __init noreturn start_xen(unsigned long bootcpu_id,
 
 setup_mm();
 
+vm_init();
+
 printk("All set up\n");
 
 machine_halt();
-- 
2.47.0

48 matches

Mail list logo