date:20210421

Re: [PATCH 2/9] arm/domain: Get rid of READ/WRITE_SYSREG32

2021-04-21 Thread Michal Orzel

Hi Julien,

On 20.04.2021 15:12, Julien Grall wrote:
> Hi Michal,
> 
> On 20/04/2021 08:08, Michal Orzel wrote:
>> AArch64 system registers are 64bit whereas AArch32 ones
>> are 32bit or 64bit. MSR/MRS are expecting 64bit values thus
>> we should get rid of helpers READ/WRITE_SYSREG32
>> in favour of using READ/WRITE_SYSREG.
>> We should also use register_t type when reading sysregs
>> which can correspond to uint64_t or uint32_t.
>> Even though many AArch64 sysregs have upper 32bit reserved
>> it does not mean that they can't be widen in the future.
>>
>> Modify type of registers: actlr, cntkctl to register_t.
> 
> ACTLR is implementation defined, so in theory there might already bits 
> already defined in the range [32:63]. So I would consider to split it from 
> the patch so we can backport it.
> 
You mean not to touch it at all in this series or to create a seperate patch 
only for ACTLR?
>>
>> Signed-off-by: Michal Orzel 
>> ---
>>   xen/arch/arm/domain.c    | 20 ++--
>>   xen/include/asm-arm/domain.h |  4 ++--
>>   2 files changed, 12 insertions(+), 12 deletions(-)
>>
>> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
>> index bdd3d3e5b5..c021a03c61 100644
>> --- a/xen/arch/arm/domain.c
>> +++ b/xen/arch/arm/domain.c
>> @@ -113,13 +113,13 @@ static void ctxt_switch_from(struct vcpu *p)
>>   p->arch.tpidr_el1 = READ_SYSREG(TPIDR_EL1);
>>     /* Arch timer */
>> -    p->arch.cntkctl = READ_SYSREG32(CNTKCTL_EL1);
>> +    p->arch.cntkctl = READ_SYSREG(CNTKCTL_EL1);
>>   virt_timer_save(p);
>>     if ( is_32bit_domain(p->domain) && cpu_has_thumbee )
>>   {
>> -    p->arch.teecr = READ_SYSREG32(TEECR32_EL1);
>> -    p->arch.teehbr = READ_SYSREG32(TEEHBR32_EL1);
>> +    p->arch.teecr = READ_SYSREG(TEECR32_EL1);
>> +    p->arch.teehbr = READ_SYSREG(TEEHBR32_EL1);
> 
> It feels strange you converted cntkctl and actlr to use register_t but not 
> teecr and teehbr. Can you explain why?
> 
Because thumbee and its registers do not appear in any of ARMv8 ARM version.
This means that this code could be protected even with #ifdef CONFIG_ARM_32 as 
AArch64 do not have them.
> Cheers,
> 
Cheers,
Michal

Re: [PATCH] tools/xenstored: Remove unused prototype

2021-04-21 Thread Juergen Gross


On 20.04.21 15:49, Julien Grall wrote:

From: Julien Grall 

A prototype for dump_conn() has been present for quite a long time
but there are no implementation. Even, AFAICT in the patch that
introduced it. So drop it.

Signed-off-by: Julien Grall 


Reviewed-by: Juergen Gross 


Juergen


OpenPGP_0xB0DE9DD628BF132F.asc
Description: application/pgp-keys


OpenPGP_signature
Description: OpenPGP digital signature

Re: [PATCH 3/9] arm/gic: Get rid of READ/WRITE_SYSREG32

2021-04-21 Thread Michal Orzel

Hi Julien,

On 20.04.2021 15:28, Julien Grall wrote:
> Hi Michal,
> 
> On 20/04/2021 08:08, Michal Orzel wrote:
>> AArch64 system registers are 64bit whereas AArch32 ones
>> are 32bit or 64bit. MSR/MRS are expecting 64bit values thus
>> we should get rid of helpers READ/WRITE_SYSREG32
>> in favour of using READ/WRITE_SYSREG.
>> We should also use register_t type when reading sysregs
>> which can correspond to uint64_t or uint32_t.
>> Even though many AArch64 sysregs have upper 32bit reserved
>> it does not mean that they can't be widen in the future.
>>
>> Modify types of following members of struct gic_v3 to register_t:
>> -hcr(not used at all in Xen)
> 
> It looks like we never used it (even in the patch introducing it). So I would 
> suggest to remove it in a patch before this one.
>
Ok. In v2 I will add a patch before this one removing hcr from gic_v3.
>> -vmcr
>> -sre_el1
>> -apr0
>> -apr1
>>
>> Signed-off-by: Michal Orzel 
>> ---
>>   xen/arch/arm/gic-v3-lpi.c |  2 +-
>>   xen/arch/arm/gic-v3.c | 96 ---
>>   xen/include/asm-arm/gic.h |  6 +--
>>   3 files changed, 54 insertions(+), 50 deletions(-)
>>
>> diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
>> index 869bc97fa1..e1594dd20e 100644
>> --- a/xen/arch/arm/gic-v3-lpi.c
>> +++ b/xen/arch/arm/gic-v3-lpi.c
>> @@ -178,7 +178,7 @@ void gicv3_do_LPI(unsigned int lpi)
>>   irq_enter();
>>     /* EOI the LPI already. */
>> -    WRITE_SYSREG32(lpi, ICC_EOIR1_EL1);
>> +    WRITE_SYSREG(lpi, ICC_EOIR1_EL1);
>>     /* Find out if a guest mapped something to this physical LPI. */
>>   hlpip = gic_get_host_lpi(lpi);
>> diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
>> index ac28013c19..0634013a67 100644
>> --- a/xen/arch/arm/gic-v3.c
>> +++ b/xen/arch/arm/gic-v3.c
>> @@ -246,12 +246,12 @@ static void gicv3_ich_write_lr(int lr, uint64_t val)
>>    */
>>   static void gicv3_enable_sre(void)
>>   {
>> -    uint32_t val;
>> +    register_t val;
>>   -    val = READ_SYSREG32(ICC_SRE_EL2);
>> +    val = READ_SYSREG(ICC_SRE_EL2);
>>   val |= GICC_SRE_EL2_SRE;
>>   -    WRITE_SYSREG32(val, ICC_SRE_EL2);
>> +    WRITE_SYSREG(val, ICC_SRE_EL2);
>>   isb();
>>   }
>>   @@ -315,16 +315,16 @@ static void restore_aprn_regs(const union 
>> gic_state_data *d)
>>   switch ( gicv3.nr_priorities )
>>   {
>>   case 7:
>> -    WRITE_SYSREG32(d->v3.apr0[2], ICH_AP0R2_EL2);
>> -    WRITE_SYSREG32(d->v3.apr1[2], ICH_AP1R2_EL2);
>> +    WRITE_SYSREG(d->v3.apr0[2], ICH_AP0R2_EL2);
>> +    WRITE_SYSREG(d->v3.apr1[2], ICH_AP1R2_EL2);
>>   /* Fall through */
>>   case 6:
>> -    WRITE_SYSREG32(d->v3.apr0[1], ICH_AP0R1_EL2);
>> -    WRITE_SYSREG32(d->v3.apr1[1], ICH_AP1R1_EL2);
>> +    WRITE_SYSREG(d->v3.apr0[1], ICH_AP0R1_EL2);
>> +    WRITE_SYSREG(d->v3.apr1[1], ICH_AP1R1_EL2);
>>   /* Fall through */
>>   case 5:
>> -    WRITE_SYSREG32(d->v3.apr0[0], ICH_AP0R0_EL2);
>> -    WRITE_SYSREG32(d->v3.apr1[0], ICH_AP1R0_EL2);
>> +    WRITE_SYSREG(d->v3.apr0[0], ICH_AP0R0_EL2);
>> +    WRITE_SYSREG(d->v3.apr1[0], ICH_AP1R0_EL2);
>>   break;
>>   default:
>>   BUG();
>> @@ -338,16 +338,16 @@ static void save_aprn_regs(union gic_state_data *d)
>>   switch ( gicv3.nr_priorities )
>>   {
>>   case 7:
>> -    d->v3.apr0[2] = READ_SYSREG32(ICH_AP0R2_EL2);
>> -    d->v3.apr1[2] = READ_SYSREG32(ICH_AP1R2_EL2);
>> +    d->v3.apr0[2] = READ_SYSREG(ICH_AP0R2_EL2);
>> +    d->v3.apr1[2] = READ_SYSREG(ICH_AP1R2_EL2);
>>   /* Fall through */
>>   case 6:
>> -    d->v3.apr0[1] = READ_SYSREG32(ICH_AP0R1_EL2);
>> -    d->v3.apr1[1] = READ_SYSREG32(ICH_AP1R1_EL2);
>> +    d->v3.apr0[1] = READ_SYSREG(ICH_AP0R1_EL2);
>> +    d->v3.apr1[1] = READ_SYSREG(ICH_AP1R1_EL2);
>>   /* Fall through */
>>   case 5:
>> -    d->v3.apr0[0] = READ_SYSREG32(ICH_AP0R0_EL2);
>> -    d->v3.apr1[0] = READ_SYSREG32(ICH_AP1R0_EL2);
>> +    d->v3.apr0[0] = READ_SYSREG(ICH_AP0R0_EL2);
>> +    d->v3.apr1[0] = READ_SYSREG(ICH_AP1R0_EL2);
>>   break;
>>   default:
>>   BUG();
>> @@ -371,15 +371,15 @@ static void gicv3_save_state(struct vcpu *v)
>>   dsb(sy);
>>   gicv3_save_lrs(v);
>>   save_aprn_regs(&v->arch.gic);
>> -    v->arch.gic.v3.vmcr = READ_SYSREG32(ICH_VMCR_EL2);
>> -    v->arch.gic.v3.sre_el1 = READ_SYSREG32(ICC_SRE_EL1);
>> +    v->arch.gic.v3.vmcr = READ_SYSREG(ICH_VMCR_EL2);
>> +    v->arch.gic.v3.sre_el1 = READ_SYSREG(ICC_SRE_EL1);
>>   }
>>     static void gicv3_restore_state(const struct vcpu *v)
>>   {
>> -    uint32_t val;
>> +    register_t val;
>>   -    val = READ_SYSREG32(ICC_SRE_EL2);
>> +    val = READ_SYSREG(ICC_SRE_EL2);
>>   /*
>>    * Don't give access to system registers when the guest is using
>>    * GICv2
>> @@ -388,7 +388,7 @@ static void gicv3_restore_state(const struct vcpu *v)
>>

Re: [PATCH v2] xen/pci: Refactor PCI MSI interrupts related code

2021-04-21 Thread Rahul Singh

Hi Jan,

> On 20 Apr 2021, at 4:36 pm, Jan Beulich  wrote:
> 
> On 20.04.2021 15:45, Rahul Singh wrote:
>>> On 19 Apr 2021, at 1:33 pm, Jan Beulich  wrote:
>>> On 19.04.2021 13:54, Julien Grall wrote:
 For the time being, I think move this code in x86 is a lot better than 
 #ifdef or keep the code in common code.
>>> 
>>> Well, I would perhaps agree if it ended up being #ifdef CONFIG_X86.
>>> I would perhaps not agree if there was a new CONFIG_* which other
>>> (future) arch-es could select if desired.
>> 
>> I agree with Julien moving the code to x86 file as currently it is 
>> referenced only in x86 code
>> and as of now we are not sure how other architecture will implement the 
>> Interrupt remapping
>> (via IOMMU or any other means).  
>> 
>> Let me know if you are ok with moving the code to x86.
> 
> I can't answer this with "yes" or "no" without knowing what the alternative
> would be. As said, if the alternative is CONFIG_X86 #ifdef-ary, then yes.
> If a separate CONFIG_* gets introduced (and selected by X86), then a
> separate file (getting built only when that new setting is y) would seem
> better to me.

I just made a quick patch. Please let me know if below patch is ok. I move the 
definition to  "passthrough/x86/iommu.c” file.

diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 705137f8be..199ce08612 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -1303,13 +1303,6 @@ static int __init setup_dump_pcidevs(void)
 }
 __initcall(setup_dump_pcidevs);
 
-int iommu_update_ire_from_msi(
-struct msi_desc *msi_desc, struct msi_msg *msg)
-{
-return iommu_intremap
-   ? iommu_call(&iommu_ops, update_ire_from_msi, msi_desc, msg) : 0;
-}
-
 static int iommu_add_device(struct pci_dev *pdev)
 {
 const struct domain_iommu *hd;
diff --git a/xen/drivers/passthrough/x86/iommu.c 
b/xen/drivers/passthrough/x86/iommu.c
index b90bb31bfe..cf51dec564 100644
--- a/xen/drivers/passthrough/x86/iommu.c
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -340,6 +340,13 @@ bool arch_iommu_use_permitted(const struct domain *d)
 likely(!p2m_get_hostp2m(d)->global_logdirty));
 }
 
+int iommu_update_ire_from_msi(
+struct msi_desc *msi_desc, struct msi_msg *msg)
+{
+return iommu_intremap
+   ? iommu_call(&iommu_ops, update_ire_from_msi, msi_desc, msg) : 0;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index ea0cd0f1a2..bd42d87b72 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -243,7 +243,6 @@ struct iommu_ops {
u8 devfn, device_t *dev);
 #ifdef CONFIG_HAS_PCI
 int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn);
-int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg *msg);
 #endif /* HAS_PCI */
 
 void (*teardown)(struct domain *d);
@@ -272,6 +271,7 @@ struct iommu_ops {
 int (*adjust_irq_affinities)(void);
 void (*sync_cache)(const void *addr, unsigned int size);
 void (*clear_root_pgtable)(struct domain *d);
+int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg *msg);
 #endif /* CONFIG_X86 */
 
 int __must_check (*suspend)(void)

Regards,
Rahul
> 
> Jan

Re: [PATCH v2] xen/pci: Refactor PCI MSI interrupts related code

2021-04-21 Thread Roger Pau Monné

On Wed, Apr 21, 2021 at 08:07:08AM +, Rahul Singh wrote:
> Hi Jan,
> 
> > On 20 Apr 2021, at 4:36 pm, Jan Beulich  wrote:
> > 
> > On 20.04.2021 15:45, Rahul Singh wrote:
> >>> On 19 Apr 2021, at 1:33 pm, Jan Beulich  wrote:
> >>> On 19.04.2021 13:54, Julien Grall wrote:
>  For the time being, I think move this code in x86 is a lot better than 
>  #ifdef or keep the code in common code.
> >>> 
> >>> Well, I would perhaps agree if it ended up being #ifdef CONFIG_X86.
> >>> I would perhaps not agree if there was a new CONFIG_* which other
> >>> (future) arch-es could select if desired.
> >> 
> >> I agree with Julien moving the code to x86 file as currently it is 
> >> referenced only in x86 code
> >> and as of now we are not sure how other architecture will implement the 
> >> Interrupt remapping
> >> (via IOMMU or any other means).  
> >> 
> >> Let me know if you are ok with moving the code to x86.
> > 
> > I can't answer this with "yes" or "no" without knowing what the alternative
> > would be. As said, if the alternative is CONFIG_X86 #ifdef-ary, then yes.
> > If a separate CONFIG_* gets introduced (and selected by X86), then a
> > separate file (getting built only when that new setting is y) would seem
> > better to me.
> 
> I just made a quick patch. Please let me know if below patch is ok. I move 
> the definition to  "passthrough/x86/iommu.c” file.
> 
> diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
> index 705137f8be..199ce08612 100644
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -1303,13 +1303,6 @@ static int __init setup_dump_pcidevs(void)
>  }
>  __initcall(setup_dump_pcidevs);
>  
> -int iommu_update_ire_from_msi(
> -struct msi_desc *msi_desc, struct msi_msg *msg)
> -{
> -return iommu_intremap
> -   ? iommu_call(&iommu_ops, update_ire_from_msi, msi_desc, msg) : 0;
> -}
> -
>  static int iommu_add_device(struct pci_dev *pdev)
>  {
>  const struct domain_iommu *hd;
> diff --git a/xen/drivers/passthrough/x86/iommu.c 
> b/xen/drivers/passthrough/x86/iommu.c
> index b90bb31bfe..cf51dec564 100644
> --- a/xen/drivers/passthrough/x86/iommu.c
> +++ b/xen/drivers/passthrough/x86/iommu.c
> @@ -340,6 +340,13 @@ bool arch_iommu_use_permitted(const struct domain *d)
>  likely(!p2m_get_hostp2m(d)->global_logdirty));
>  }
>  
> +int iommu_update_ire_from_msi(
> +struct msi_desc *msi_desc, struct msi_msg *msg)
> +{
> +return iommu_intremap
> +   ? iommu_call(&iommu_ops, update_ire_from_msi, msi_desc, msg) : 0;
> +}
> +
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
> index ea0cd0f1a2..bd42d87b72 100644
> --- a/xen/include/xen/iommu.h
> +++ b/xen/include/xen/iommu.h
> @@ -243,7 +243,6 @@ struct iommu_ops {
> u8 devfn, device_t *dev);
>  #ifdef CONFIG_HAS_PCI
>  int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn);
> -int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg 
> *msg);
>  #endif /* HAS_PCI */
>  
>  void (*teardown)(struct domain *d);
> @@ -272,6 +271,7 @@ struct iommu_ops {
>  int (*adjust_irq_affinities)(void);
>  void (*sync_cache)(const void *addr, unsigned int size);
>  void (*clear_root_pgtable)(struct domain *d);
> +int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg 
> *msg);

You also need to move the function prototype
(iommu_update_ire_from_msi) from iommu.h into asm-x86/iommu.h, or
maybe you could just define the function itself as static inline in
asm-x86/iommu.h?

Thanks, Roger.

Re: [PATCH 2/8] x86/EFI: sections may not live at VA 0 in PE binaries

2021-04-21 Thread Roger Pau Monné

On Thu, Apr 01, 2021 at 11:44:45AM +0200, Jan Beulich wrote:
> PE binaries specify section addresses by (32-bit) RVA. GNU ld up to at
> least 2.36 would silently truncate the (negative) difference when a
> section is placed below the image base. Such sections would also be
> wrongly placed ahead of all "normal" ones. Since, for the time being,
> we build xen.efi with --strip-debug anyway, .stab* can't appear. And
> .comment has an entry in /DISCARD/ already anyway in the EFI case.
> 
> Because of their unclear origin, keep the directives for the ELF case
> though.

It's my understadng thonse sections are only there for debug purposes,
and never part of the final xen binary as they are stripped?

Could we maybe remove the section load address of 0 and instead just
use the (NOLOAD) directive?

Does it really matter to place them at address 0?

I also wonder, is this change fixing some existing bug, or it's just a
cleanup change?

I also only see the .comment section in my binary output, so maybe
it's fine to just remove them from the script?

Does the Arm linker script need a similar treatment?

Thanks, Roger.

Re: [PATCH v2] xen/pci: Refactor PCI MSI interrupts related code

2021-04-21 Thread Jan Beulich

On 21.04.2021 10:07, Rahul Singh wrote:
> Hi Jan,
> 
>> On 20 Apr 2021, at 4:36 pm, Jan Beulich  wrote:
>>
>> On 20.04.2021 15:45, Rahul Singh wrote:
 On 19 Apr 2021, at 1:33 pm, Jan Beulich  wrote:
 On 19.04.2021 13:54, Julien Grall wrote:
> For the time being, I think move this code in x86 is a lot better than 
> #ifdef or keep the code in common code.

 Well, I would perhaps agree if it ended up being #ifdef CONFIG_X86.
 I would perhaps not agree if there was a new CONFIG_* which other
 (future) arch-es could select if desired.
>>>
>>> I agree with Julien moving the code to x86 file as currently it is 
>>> referenced only in x86 code
>>> and as of now we are not sure how other architecture will implement the 
>>> Interrupt remapping
>>> (via IOMMU or any other means).  
>>>
>>> Let me know if you are ok with moving the code to x86.
>>
>> I can't answer this with "yes" or "no" without knowing what the alternative
>> would be. As said, if the alternative is CONFIG_X86 #ifdef-ary, then yes.
>> If a separate CONFIG_* gets introduced (and selected by X86), then a
>> separate file (getting built only when that new setting is y) would seem
>> better to me.
> 
> I just made a quick patch. Please let me know if below patch is ok. I move 
> the definition to  "passthrough/x86/iommu.c” file.

This patch on its own looks okay, but assumes you've already taken the
decision that no proper new CONFIG_* would want introducing. That
decision, however, touches (aiui) more than just this one hook.

Jan

Re: Discussion of Xenheap problems on AArch64

2021-04-21 Thread Julien Grall





On 21/04/2021 07:28, Henry Wang wrote:

Hi,


Hi Henry,



We are trying to implement the static memory allocation on AArch64. Part of
this feature is the reserved heap memory allocation, where a specific range of
memory is reserved only for heap. In the development process, we found a
pitfall in current AArch64 setup_xenheap_mappings() function.

According to a previous discussion in community
https://lore.kernel.org/xen-devel/20190216134456.10681-1-peng@nxp.com/,
on AArch64, bootmem is initialized after setup_xenheap_mappings(),
setup_xenheap_mappings() may try to allocate memory before memory has been
handed over to the boot allocator. If the reserved heap memory allocation is
introduced, either of below 2 cases will trigger a crash:

1. If the reserved heap memory is at the end of the memory block list and the
gap between reserved and unreserved memory is bigger than 512GB, when we setup
mappings from the beginning of the memory block list, we will get OOM caused
by lack of pages in boot allocator. This is because the memory that is reserved
for heap has not been mapped and added to the boot allocator.

2. If we add the memory that is reserved for heap to boot allocator first, and
then setup mappings for banks in the memory block list, we may get a page which
has not been setup mapping, causing a data abort.


There are a few issues with setup_xenheap_mappings(). I have been 
reworking the code on my spare time and started to upstream bits of it. 
A PoC can be found here:


https://xenbits.xen.org/gitweb/?p=people/julieng/xen-unstable.git;a=shortlog;h=refs/heads/pt/dev

Cheers,

--
Julien Grall

Re: [PATCH 3/8] x86/EFI: program headers are an ELF concept

2021-04-21 Thread Roger Pau Monné

On Thu, Apr 01, 2021 at 11:45:09AM +0200, Jan Beulich wrote:
> While they apparently do no harm when building xen.efi, their use is
> potentially misleading. Conditionalize their use to be for just the ELF
> binary we produce.
> 
> No change to the resulting binaries.

The GNU Linker manual notes that program headers would be ignored when
not generating an ELF file, so I'm not sure it's worth us adding more
churn to the linker script to hide something that's already ignored by
ld already.

Maybe adding a comment noting program headers are ignored when not
generating an ELF output would be enough?

Thanks, Roger.

Re: [PATCH v2] xen/pci: Refactor PCI MSI interrupts related code

2021-04-21 Thread Rahul Singh

Hi Roger,

> On 21 Apr 2021, at 9:16 am, Roger Pau Monné  wrote:
> 
> On Wed, Apr 21, 2021 at 08:07:08AM +, Rahul Singh wrote:
>> Hi Jan,
>> 
>>> On 20 Apr 2021, at 4:36 pm, Jan Beulich  wrote:
>>> 
>>> On 20.04.2021 15:45, Rahul Singh wrote:
> On 19 Apr 2021, at 1:33 pm, Jan Beulich  wrote:
> On 19.04.2021 13:54, Julien Grall wrote:
>> For the time being, I think move this code in x86 is a lot better than 
>> #ifdef or keep the code in common code.
> 
> Well, I would perhaps agree if it ended up being #ifdef CONFIG_X86.
> I would perhaps not agree if there was a new CONFIG_* which other
> (future) arch-es could select if desired.
 
 I agree with Julien moving the code to x86 file as currently it is 
 referenced only in x86 code
 and as of now we are not sure how other architecture will implement the 
 Interrupt remapping
 (via IOMMU or any other means).  
 
 Let me know if you are ok with moving the code to x86.
>>> 
>>> I can't answer this with "yes" or "no" without knowing what the alternative
>>> would be. As said, if the alternative is CONFIG_X86 #ifdef-ary, then yes.
>>> If a separate CONFIG_* gets introduced (and selected by X86), then a
>>> separate file (getting built only when that new setting is y) would seem
>>> better to me.
>> 
>> I just made a quick patch. Please let me know if below patch is ok. I move 
>> the definition to  "passthrough/x86/iommu.c” file.
>> 
>> diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
>> index 705137f8be..199ce08612 100644
>> --- a/xen/drivers/passthrough/pci.c
>> +++ b/xen/drivers/passthrough/pci.c
>> @@ -1303,13 +1303,6 @@ static int __init setup_dump_pcidevs(void)
>> }
>> __initcall(setup_dump_pcidevs);
>> 
>> -int iommu_update_ire_from_msi(
>> -struct msi_desc *msi_desc, struct msi_msg *msg)
>> -{
>> -return iommu_intremap
>> -   ? iommu_call(&iommu_ops, update_ire_from_msi, msi_desc, msg) : 0;
>> -}
>> -
>> static int iommu_add_device(struct pci_dev *pdev)
>> {
>> const struct domain_iommu *hd;
>> diff --git a/xen/drivers/passthrough/x86/iommu.c 
>> b/xen/drivers/passthrough/x86/iommu.c
>> index b90bb31bfe..cf51dec564 100644
>> --- a/xen/drivers/passthrough/x86/iommu.c
>> +++ b/xen/drivers/passthrough/x86/iommu.c
>> @@ -340,6 +340,13 @@ bool arch_iommu_use_permitted(const struct domain *d)
>> likely(!p2m_get_hostp2m(d)->global_logdirty));
>> }
>> 
>> +int iommu_update_ire_from_msi(
>> +struct msi_desc *msi_desc, struct msi_msg *msg)
>> +{
>> +return iommu_intremap
>> +   ? iommu_call(&iommu_ops, update_ire_from_msi, msi_desc, msg) : 0;
>> +}
>> +
>> /*
>>  * Local variables:
>>  * mode: C
>> diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
>> index ea0cd0f1a2..bd42d87b72 100644
>> --- a/xen/include/xen/iommu.h
>> +++ b/xen/include/xen/iommu.h
>> @@ -243,7 +243,6 @@ struct iommu_ops {
>>u8 devfn, device_t *dev);
>> #ifdef CONFIG_HAS_PCI
>> int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn);
>> -int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg 
>> *msg);
>> #endif /* HAS_PCI */
>> 
>> void (*teardown)(struct domain *d);
>> @@ -272,6 +271,7 @@ struct iommu_ops {
>> int (*adjust_irq_affinities)(void);
>> void (*sync_cache)(const void *addr, unsigned int size);
>> void (*clear_root_pgtable)(struct domain *d);
>> +int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg 
>> *msg);
> 
> You also need to move the function prototype
> (iommu_update_ire_from_msi) from iommu.h into asm-x86/iommu.h, or
> maybe you could just define the function itself as static inline in
> asm-x86/iommu.h?


Ok. I will move function prototype (iommu_update_ire_from_msi) from 
iommu.h into asm-x86/iommu.h.

I first tried to make the function as static inline in "asm-x86/iommu.h" but 
after 
that I observe the compilation error for debug build because "asm/iommu.h” 
is included in "xen/iommu.h” before iommu_call() is defined in "xen/iommu.h”.
That's why I decided to move it to the "passthrough/x86/iommu.c” file.

http://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=xen/include/xen/iommu.h;h=ea0cd0f1a2a4558eab4488468272f3ed35dd1dc0;hb=HEAD#l298

Regards,
Rahul
> 
> Thanks, Roger.

Re: [PATCH v2] VT-d: Don't assume register-based invalidation is always supported

2021-04-21 Thread Jan Beulich

On 20.04.2021 18:17, Roger Pau Monné wrote:
> On Tue, Apr 20, 2021 at 05:38:51PM +0200, Jan Beulich wrote:
>> On 20.04.2021 17:08, Roger Pau Monné wrote:
>>> On Thu, Apr 02, 2020 at 04:06:06AM +0800, Chao Gao wrote:
 --- a/xen/drivers/passthrough/vtd/qinval.c
 +++ b/xen/drivers/passthrough/vtd/qinval.c
 @@ -442,6 +442,23 @@ int enable_qinval(struct vtd_iommu *iommu)
  return 0;
  }
  
 +static int vtd_flush_context_noop(struct vtd_iommu *iommu, uint16_t did,
 +  uint16_t source_id, uint8_t 
 function_mask,
 +  uint64_t type, bool 
 flush_non_present_entry)
 +{
 +dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: Cannot flush CONTEXT.\n");
 +return -EIO;
 +}
 +
 +static int vtd_flush_iotlb_noop(struct vtd_iommu *iommu, uint16_t did,
 +uint64_t addr, unsigned int size_order,
 +uint64_t type, bool 
 flush_non_present_entry,
 +bool flush_dev_iotlb)
 +{
 +dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: Cannot flush IOTLB.\n");
 +return -EIO;
 +}
>>>
>>> I think I would add an ASSERT_UNREACHABLE() to both noop handlers
>>> above, as I would expect trying to use them without the proper mode
>>> being configured would point to an error elsewhere?
>>
>> If such an assertion triggered e.g. during S3 suspend/resume, it may
>> lead to the box simply not doing anything useful, without there being
>> any way to know what went wrong. If instead the system at least
>> managed to resume, the log message could be observed.
> 
> Oh, OK then. I'm simply worried that people might ignore such one line
> messages, maybe add a WARN?

Hmm, yes, perhaps - would allow seeing right away where the call
came from. Chao, I'd again be fine to flip the dprintk()-s to
WARN()-s while committing. But of course only provided you and
Kevin (as the maintainer) agree.

> Would it make sense to mark as tainted which could help identify the
> issue on production builds? Maybe that's too much.

Yeah, for something we expect shouldn't ever happen ...

Jan

[PATCH] CI: Drop TravisCI

2021-04-21 Thread Andrew Cooper

Travis-ci.org is shutting down shortly.  The arm cross-compile testing has
been broken for a long time now, and all testing has now been superseded by
our Gitlab infrastructure.

Signed-off-by: Andrew Cooper 
---
CC: George Dunlap 
CC: Ian Jackson 
CC: Jan Beulich 
CC: Stefano Stabellini 
CC: Wei Liu 
CC: Julien Grall 
CC: Doug Goldstein 
---
 .travis.yml | 86 -
 1 file changed, 86 deletions(-)
 delete mode 100644 .travis.yml

diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index f3cd15b79f..00
--- a/.travis.yml
+++ /dev/null
@@ -1,86 +0,0 @@
-language: c
-dist: trusty
-sudo: required
-# don't test master, smoke and coverity branches
-branches:
-except:
-- master
-- smoke
-- /^coverity-tested\/.*/
-- /^stable-.*/
-matrix:
-include:
-- compiler: gcc
-  env: XEN_TARGET_ARCH=x86_64 debug=n
-- compiler: gcc
-  env: XEN_TARGET_ARCH=x86_64 XEN_CONFIG_EXPERT=y RANDCONFIG=y debug=n
-- compiler: gcc-5
-  env: XEN_TARGET_ARCH=x86_64 debug=n
-- compiler: gcc
-  env: XEN_TARGET_ARCH=x86_64 debug=y
-- compiler: gcc-5
-  env: XEN_TARGET_ARCH=x86_64 debug=y
-- compiler: clang
-  env: XEN_TARGET_ARCH=x86_64 clang=y debug=n
-- compiler: clang
-  env: XEN_TARGET_ARCH=x86_64 clang=y debug=y
-- compiler: gcc
-  env: XEN_TARGET_ARCH=arm32 CROSS_COMPILE=arm-linux-gnueabihf- debug=n
-- compiler: gcc
-  env: XEN_TARGET_ARCH=arm32 CROSS_COMPILE=arm-linux-gnueabihf- 
XEN_CONFIG_EXPERT=y RANDCONFIG=y debug=n
-- compiler: gcc
-  env: XEN_TARGET_ARCH=arm32 CROSS_COMPILE=arm-linux-gnueabihf- debug=y
-- compiler: gcc
-  env: XEN_TARGET_ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- debug=n
-- compiler: gcc
-  env: XEN_TARGET_ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- 
XEN_CONFIG_EXPERT=y RANDCONFIG=y debug=n
-- compiler: gcc
-  env: XEN_TARGET_ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- debug=y
-addons:
-apt:
-sources:
-- ubuntu-toolchain-r-test
-packages:
-- zlib1g-dev
-- libncurses5-dev
-- libssl-dev
-- python-dev
-- xorg-dev
-- uuid-dev
-- libyajl-dev
-- libaio-dev
-- libglib2.0-dev
-- libpixman-1-dev
-- pkg-config
-- flex
-- bison
-- acpica-tools
-- bin86
-- bcc
-- libnl-3-dev
-- ocaml-nox
-- libfindlib-ocaml-dev
-- transfig
-- pandoc
-- gcc-arm-linux-gnueabihf
-- gcc-aarch64-linux-gnu
-- gcc-5
-- g++-5
-- seabios
-- checkpolicy
-- ghostscript
-# we must set CXX manually instead of using 'language: cpp' due to
-# travis-ci/travis-ci#3871
-before_script:
-- export CXX=${CC/cc/++}
-- export CXX=${CXX/clang/clang++}
-script:
-- ./scripts/travis-build
-after_script:
-- cat xen/.config
-- cat tools/config.log
-- cat docs/config.log
-notifications:
-irc:
-channels:
-- secure: 
"mPIFllF6eW3F3talvccMy55Tfcid66IPkkXZYCxDKRF2DQrMyvmg4qt0xN6gGZsdfOBMNr+/YfO5PxusBCUkVdBGBzd3QhFoIDYZbJZgzVh3yNDQ+x4L7p1cZNrwJ2loMmSX6KxGKZxZX9NRStrTUkVyp0jGZB9xkwT8Rl6jXj7EQkgQ95K1Wqafx0ycLfyDQmzX9bzi/3KIBFKMGmK18AFMh+R30zK0FPUUsS4+VhepIkVqO5puU3OYePd34wRnWlt7hjU2Vj5vYmVXp3UOE+E8/Lf9IGVAhitDi+EC35b8zo2BHJ9z6xZARYPvfSqbXcXV20RycabI+e3ufZJ40eatssly5QjWH+HhKS42C4gV1psmQhkTCNCM62Ty5uf6R1hsZJQuiOZrc8ojdje8ey2MxJk4R+Xz+Igg1/kD6+WX9/Y6Y3iRuj5HL1xCYfpTbK4mC7ofw0SofW2aAGI68jHpCqJdQCDzMl6748PlDMM0eKe0MPKIEenYHcoBnOEC/jciXUDa6wduV75EEip7oq2i+m44MopcsEDTpdliH077GhKapF0ActjvBTLpyoTRSfkKm0NZol/dgwd3PGG/mY8clIoeXWRb4opk93ejPC967KmSNC68SlfwaJmFZS5T9vAgb6k7r6i9G3dmYtrLKzws8IV1CPWqLzk58+v4pRk="
-- 
2.11.0

Re: [PATCH v2] x86/shim: Simplify compat handling in write_start_info()

2021-04-21 Thread Jan Beulich

On 20.04.2021 19:41, Andrew Cooper wrote:
> Factor out a compat boolean to remove the lfence overhead from multiple
> is_pv_32bit_domain() calls.
> 
> Signed-off-by: Andrew Cooper 
> ---
> CC: Jan Beulich 
> CC: Roger Pau Monné 
> CC: Wei Liu 
> 
> v2:
>  * Reinstate the conditional for the start info pointer, in case Intel/AMD
>can't be persuaded to adjust the architectural guarentees for upper bits in
>compatibility mode transitions.

As indicated before, with this adjustment
Reviewed-by: Jan Beulich 

Jan

Re: [PATCH 0/7] xen: Switch to using -Og for debug builds

2021-04-21 Thread Andrew Cooper

On 19/04/2021 16:45, Jan Beulich wrote:
> On 19.04.2021 16:01, Andrew Cooper wrote:
>> As with the toolstack side, we ought to use -Og for debug builds.
>>
>> All fixes are trivial.  The first 3 are understandable, given reduced
>> optimisations.  The next 3 are, AFAICT, bogus diagnostics.
>>
>> Andrew Cooper (7):
>>   xen/arm: Make make_cpus_node() compile at -Og
>>   x86/shim: Fix compilation at -Og
>>   x86/sysctl: Make arch_do_sysctl() compile at -Og
>>   x86/irq: Make create_irq() compile at -Og
>>   xen/efi: Make efi_start() compile at -Og
>>   x86/shadow: Make _shadow_prealloc() compile at -Og
>>   xen: Use -Og for debug builds when available
> Acked-by: Jan Beulich 
>
> I'd like to ask though that at least for the bogus warnings you
> amend the commit messages with the gcc version these were observed
> with. Perhaps even those seemingly bogus initializers would
> benefit from a very brief comment (or else there's a fair chance
> of them getting removed again at some point).

I'm afraid I don't have that information easily to hand, but all issues
were found by distro-provided compilers included in our Gitlab
infrastructure.

~Andrew

Re: [PATCH v2] xen/pci: Refactor PCI MSI interrupts related code

2021-04-21 Thread Jan Beulich

On 21.04.2021 10:16, Roger Pau Monné wrote:
> On Wed, Apr 21, 2021 at 08:07:08AM +, Rahul Singh wrote:
>> Hi Jan,
>>
>>> On 20 Apr 2021, at 4:36 pm, Jan Beulich  wrote:
>>>
>>> On 20.04.2021 15:45, Rahul Singh wrote:
> On 19 Apr 2021, at 1:33 pm, Jan Beulich  wrote:
> On 19.04.2021 13:54, Julien Grall wrote:
>> For the time being, I think move this code in x86 is a lot better than 
>> #ifdef or keep the code in common code.
>
> Well, I would perhaps agree if it ended up being #ifdef CONFIG_X86.
> I would perhaps not agree if there was a new CONFIG_* which other
> (future) arch-es could select if desired.

 I agree with Julien moving the code to x86 file as currently it is 
 referenced only in x86 code
 and as of now we are not sure how other architecture will implement the 
 Interrupt remapping
 (via IOMMU or any other means).  

 Let me know if you are ok with moving the code to x86.
>>>
>>> I can't answer this with "yes" or "no" without knowing what the alternative
>>> would be. As said, if the alternative is CONFIG_X86 #ifdef-ary, then yes.
>>> If a separate CONFIG_* gets introduced (and selected by X86), then a
>>> separate file (getting built only when that new setting is y) would seem
>>> better to me.
>>
>> I just made a quick patch. Please let me know if below patch is ok. I move 
>> the definition to  "passthrough/x86/iommu.c” file.
>>
>> diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
>> index 705137f8be..199ce08612 100644
>> --- a/xen/drivers/passthrough/pci.c
>> +++ b/xen/drivers/passthrough/pci.c
>> @@ -1303,13 +1303,6 @@ static int __init setup_dump_pcidevs(void)
>>  }
>>  __initcall(setup_dump_pcidevs);
>>  
>> -int iommu_update_ire_from_msi(
>> -struct msi_desc *msi_desc, struct msi_msg *msg)
>> -{
>> -return iommu_intremap
>> -   ? iommu_call(&iommu_ops, update_ire_from_msi, msi_desc, msg) : 0;
>> -}
>> -
>>  static int iommu_add_device(struct pci_dev *pdev)
>>  {
>>  const struct domain_iommu *hd;
>> diff --git a/xen/drivers/passthrough/x86/iommu.c 
>> b/xen/drivers/passthrough/x86/iommu.c
>> index b90bb31bfe..cf51dec564 100644
>> --- a/xen/drivers/passthrough/x86/iommu.c
>> +++ b/xen/drivers/passthrough/x86/iommu.c
>> @@ -340,6 +340,13 @@ bool arch_iommu_use_permitted(const struct domain *d)
>>  likely(!p2m_get_hostp2m(d)->global_logdirty));
>>  }
>>  
>> +int iommu_update_ire_from_msi(
>> +struct msi_desc *msi_desc, struct msi_msg *msg)
>> +{
>> +return iommu_intremap
>> +   ? iommu_call(&iommu_ops, update_ire_from_msi, msi_desc, msg) : 0;
>> +}
>> +
>>  /*
>>   * Local variables:
>>   * mode: C
>> diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
>> index ea0cd0f1a2..bd42d87b72 100644
>> --- a/xen/include/xen/iommu.h
>> +++ b/xen/include/xen/iommu.h
>> @@ -243,7 +243,6 @@ struct iommu_ops {
>> u8 devfn, device_t *dev);
>>  #ifdef CONFIG_HAS_PCI
>>  int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn);
>> -int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg 
>> *msg);
>>  #endif /* HAS_PCI */
>>  
>>  void (*teardown)(struct domain *d);
>> @@ -272,6 +271,7 @@ struct iommu_ops {
>>  int (*adjust_irq_affinities)(void);
>>  void (*sync_cache)(const void *addr, unsigned int size);
>>  void (*clear_root_pgtable)(struct domain *d);
>> +int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg 
>> *msg);
> 
> You also need to move the function prototype
> (iommu_update_ire_from_msi) from iommu.h into asm-x86/iommu.h,

The prototype can, in principle at least, remain where it is.

> or maybe you could just define the function itself as static inline in
> asm-x86/iommu.h?

Possibly (and in that case it would perhaps indeed be better to
move it there, compared to needing another #ifdef).

Jan

RE: Discussion of Xenheap problems on AArch64

2021-04-21 Thread Henry Wang

Hi Julien,

> -Original Message-
> From: Julien Grall 
> Sent: Wednesday, April 21, 2021 5:04 PM
> To: Henry Wang ; sstabell...@kernel.org; xen-
> de...@lists.xenproject.org
> Cc: Wei Chen ; Penny Zheng
> ; Bertrand Marquis 
> Subject: Re: Discussion of Xenheap problems on AArch64
> 
> 
> 
> On 21/04/2021 07:28, Henry Wang wrote:
> > Hi,
> 
> Hi Henry,
> 
> >
> > We are trying to implement the static memory allocation on AArch64. Part
> of
> > this feature is the reserved heap memory allocation, where a specific range
> of
> > memory is reserved only for heap. In the development process, we found a
> > pitfall in current AArch64 setup_xenheap_mappings() function.
> >
> > According to a previous discussion in community
> > https://lore.kernel.org/xen-devel/20190216134456.10681-1-
> peng@nxp.com/,
> > on AArch64, bootmem is initialized after setup_xenheap_mappings(),
> > setup_xenheap_mappings() may try to allocate memory before memory
> has been
> > handed over to the boot allocator. If the reserved heap memory allocation
> is
> > introduced, either of below 2 cases will trigger a crash:
> >
> > 1. If the reserved heap memory is at the end of the memory block list and
> the
> > gap between reserved and unreserved memory is bigger than 512GB, when
> we setup
> > mappings from the beginning of the memory block list, we will get OOM
> caused
> > by lack of pages in boot allocator. This is because the memory that is
> reserved
> > for heap has not been mapped and added to the boot allocator.
> >
> > 2. If we add the memory that is reserved for heap to boot allocator first,
> and
> > then setup mappings for banks in the memory block list, we may get a page
> which
> > has not been setup mapping, causing a data abort.
> 
> There are a few issues with setup_xenheap_mappings(). I have been
> reworking the code on my spare time and started to upstream bits of it.
> A PoC can be found here:
> 
> https://xenbits.xen.org/gitweb/?p=people/julieng/xen-
> unstable.git;a=shortlog;h=refs/heads/pt/dev
> 

Really great news! Thanks you very much for the information and your hard
work on the PoC :) I will start to go through your PoC code then.

Kind regards,
Henry

> Cheers,
> 
> --
> Julien Grall

Re: [PATCH v2] xen/pci: Refactor PCI MSI interrupts related code

2021-04-21 Thread Jan Beulich

On 21.04.2021 11:15, Rahul Singh wrote:
> Hi Roger,
> 
>> On 21 Apr 2021, at 9:16 am, Roger Pau Monné  wrote:
>>
>> On Wed, Apr 21, 2021 at 08:07:08AM +, Rahul Singh wrote:
>>> Hi Jan,
>>>
 On 20 Apr 2021, at 4:36 pm, Jan Beulich  wrote:

 On 20.04.2021 15:45, Rahul Singh wrote:
>> On 19 Apr 2021, at 1:33 pm, Jan Beulich  wrote:
>> On 19.04.2021 13:54, Julien Grall wrote:
>>> For the time being, I think move this code in x86 is a lot better than 
>>> #ifdef or keep the code in common code.
>>
>> Well, I would perhaps agree if it ended up being #ifdef CONFIG_X86.
>> I would perhaps not agree if there was a new CONFIG_* which other
>> (future) arch-es could select if desired.
>
> I agree with Julien moving the code to x86 file as currently it is 
> referenced only in x86 code
> and as of now we are not sure how other architecture will implement the 
> Interrupt remapping
> (via IOMMU or any other means).  
>
> Let me know if you are ok with moving the code to x86.

 I can't answer this with "yes" or "no" without knowing what the alternative
 would be. As said, if the alternative is CONFIG_X86 #ifdef-ary, then yes.
 If a separate CONFIG_* gets introduced (and selected by X86), then a
 separate file (getting built only when that new setting is y) would seem
 better to me.
>>>
>>> I just made a quick patch. Please let me know if below patch is ok. I move 
>>> the definition to  "passthrough/x86/iommu.c” file.
>>>
>>> diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
>>> index 705137f8be..199ce08612 100644
>>> --- a/xen/drivers/passthrough/pci.c
>>> +++ b/xen/drivers/passthrough/pci.c
>>> @@ -1303,13 +1303,6 @@ static int __init setup_dump_pcidevs(void)
>>> }
>>> __initcall(setup_dump_pcidevs);
>>>
>>> -int iommu_update_ire_from_msi(
>>> -struct msi_desc *msi_desc, struct msi_msg *msg)
>>> -{
>>> -return iommu_intremap
>>> -   ? iommu_call(&iommu_ops, update_ire_from_msi, msi_desc, msg) : 
>>> 0;
>>> -}
>>> -
>>> static int iommu_add_device(struct pci_dev *pdev)
>>> {
>>> const struct domain_iommu *hd;
>>> diff --git a/xen/drivers/passthrough/x86/iommu.c 
>>> b/xen/drivers/passthrough/x86/iommu.c
>>> index b90bb31bfe..cf51dec564 100644
>>> --- a/xen/drivers/passthrough/x86/iommu.c
>>> +++ b/xen/drivers/passthrough/x86/iommu.c
>>> @@ -340,6 +340,13 @@ bool arch_iommu_use_permitted(const struct domain *d)
>>> likely(!p2m_get_hostp2m(d)->global_logdirty));
>>> }
>>>
>>> +int iommu_update_ire_from_msi(
>>> +struct msi_desc *msi_desc, struct msi_msg *msg)
>>> +{
>>> +return iommu_intremap
>>> +   ? iommu_call(&iommu_ops, update_ire_from_msi, msi_desc, msg) : 
>>> 0;
>>> +}
>>> +
>>> /*
>>>  * Local variables:
>>>  * mode: C
>>> diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
>>> index ea0cd0f1a2..bd42d87b72 100644
>>> --- a/xen/include/xen/iommu.h
>>> +++ b/xen/include/xen/iommu.h
>>> @@ -243,7 +243,6 @@ struct iommu_ops {
>>>u8 devfn, device_t *dev);
>>> #ifdef CONFIG_HAS_PCI
>>> int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn);
>>> -int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg 
>>> *msg);
>>> #endif /* HAS_PCI */
>>>
>>> void (*teardown)(struct domain *d);
>>> @@ -272,6 +271,7 @@ struct iommu_ops {
>>> int (*adjust_irq_affinities)(void);
>>> void (*sync_cache)(const void *addr, unsigned int size);
>>> void (*clear_root_pgtable)(struct domain *d);
>>> +int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg 
>>> *msg);
>>
>> You also need to move the function prototype
>> (iommu_update_ire_from_msi) from iommu.h into asm-x86/iommu.h, or
>> maybe you could just define the function itself as static inline in
>> asm-x86/iommu.h?
> 
> 
> Ok. I will move function prototype (iommu_update_ire_from_msi) from 
> iommu.h into asm-x86/iommu.h.
> 
> I first tried to make the function as static inline in "asm-x86/iommu.h" but 
> after 
> that I observe the compilation error for debug build because "asm/iommu.h” 
> is included in "xen/iommu.h” before iommu_call() is defined in "xen/iommu.h”.
> That's why I decided to move it to the "passthrough/x86/iommu.c” file.

Which in turn would be an argument for keeping it in xen/iommu.h and
wrap it in an #ifdef.

Jan

Re: [PATCH] CI: Drop TravisCI

2021-04-21 Thread Jan Beulich

On 21.04.2021 11:27, Andrew Cooper wrote:
> Travis-ci.org is shutting down shortly.  The arm cross-compile testing has
> been broken for a long time now, and all testing has now been superseded by
> our Gitlab infrastructure.
> 
> Signed-off-by: Andrew Cooper 

FWIW
Acked-by: Jan Beulich

[xen-4.13-testing test] 161323: tolerable FAIL - PUSHED

2021-04-21 Thread osstest service owner

flight 161323 xen-4.13-testing real [real]
flight 161348 xen-4.13-testing real-retest [real]
http://logs.test-lab.xenproject.org/osstest/logs/161323/
http://logs.test-lab.xenproject.org/osstest/logs/161348/

Failures :-/ but no regressions.

Tests which are failing intermittently (not blocking):
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 8 xen-boot fail pass in 
161348-retest

Regressions which are regarded as allowable (not blocking):
 test-armhf-armhf-xl-rtds18 guest-start/debian.repeat fail REGR. vs. 160326

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail in 161348 never pass
 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 160326
 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 160326
 test-amd64-i386-xl-qemuu-win7-amd64 19 guest-stop fail like 160326
 test-armhf-armhf-libvirt-raw 15 saverestore-support-checkfail  like 160326
 test-amd64-i386-xl-qemut-win7-amd64 19 guest-stop fail like 160326
 test-amd64-i386-xl-qemut-ws16-amd64 19 guest-stop fail like 160326
 test-armhf-armhf-libvirt 16 saverestore-support-checkfail  like 160326
 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 160326
 test-amd64-i386-xl-qemuu-ws16-amd64 19 guest-stop fail like 160326
 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 160326
 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 160326
 test-amd64-i386-xl-pvshim14 guest-start  fail   never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt  15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-seattle  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-seattle  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  15 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail  never pass
 test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt-raw 14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-cubietruck 15 migrate-support-checkfail never pass
 test-armhf-armhf-xl-cubietruck 16 saverestore-support-checkfail never pass
 test-armhf-armhf-xl-rtds 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt 15 migrate-support-checkfail   never pass

version targeted for testing:
 xen  33049e3ad9c3a0f7432969f4b5d7b1a287b5b8f9
baseline version:
 xen  57a60c1f2779ce6d6ab5c2f677c4d0c66b09b08b

Last test of basis   160326  2021-03-22 17:06:05 Z   29 days
Testing same since   161323  2021-04-20 10:36:16 Z0 days

Re: [PATCH 4/8] x86/EFI: redo .reloc section bounds determination

2021-04-21 Thread Roger Pau Monné

On Thu, Apr 01, 2021 at 11:45:38AM +0200, Jan Beulich wrote:
> There's no need to link relocs-dummy.o into the ELF binary. The two
> symbols needed can as well be provided by the linker script. Then our
> mkreloc tool also doesn't need to put them in the generated assembler
> source.

Maybe I'm just dense today, but I think the message needs to be
expanded a bit to mention that while the __base_relocs_{start,end} are
not used when loaded as an EFI application, they are used by the EFI
code in Xen when booted using the multiboot2 protocol for example, as
they are used by efi_arch_relocate_image.

I think relocation is not needed when natively loaded as an EFI
application, as then the load address matches the one expected by
Xen?

I also wonder, at some point there where plans for providing a single
binary that would work as multiboot{1,2} and also be capable of being
loaded as an EFI application (ie: have a PE/COFF header also I assume
together with the ELF one), won't the changes here make it more
difficult to reach that goal or require reverting later on, as I feel
they are adding more differences between the PE binary and the ELF
one.

The code LGTM, but I think at least I would like the commit message to
be expanded.

Thanks, Roger.

[xen-unstable-coverity test] 161350: all pass - PUSHED

2021-04-21 Thread osstest service owner

flight 161350 xen-unstable-coverity real [real]
http://logs.test-lab.xenproject.org/osstest/logs/161350/

Perfect :-)
All tests in this flight passed as required
version targeted for testing:
 xen  aaa3eafb3ba8b544d19ca41cda1477640b22b8fc
baseline version:
 xen  dd22a64de7e02b48312839a15179528c8f7db5c6

Last test of basis   161257  2021-04-18 09:19:39 Z3 days
Testing same since   161350  2021-04-21 09:19:38 Z0 days1 attempts


People who touched revisions under test:
  Andrew Cooper 
  Bertrand Marquish 
  Jan Beulich 
  Julien Grall 
  Rahul Singh 
  Roger Pau Monné 
  Stefano Stabellini 
  Stefano Stabellini 
  Tamas K Lengyel 
  Tim Deegan 
  Wei Liu 

jobs:
 coverity-amd64   pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Pushing revision :

To xenbits.xen.org:/home/xen/git/xen.git
   dd22a64de7..aaa3eafb3b  aaa3eafb3ba8b544d19ca41cda1477640b22b8fc -> 
coverity-tested/smoke

Re: [PATCH 5/8] x86: drop use of prelink-efi.o

2021-04-21 Thread Roger Pau Monné

On Thu, Apr 01, 2021 at 11:46:00AM +0200, Jan Beulich wrote:
> Now that its contents matches prelink.o, use that one uniformly.
> 
> Signed-off-by: Jan Beulich 

Reviewed-by: Roger Pau Monné 

Thanks, Roger.

Re: [PATCH 0/7] xen: Switch to using -Og for debug builds

2021-04-21 Thread Jan Beulich

On 21.04.2021 11:31, Andrew Cooper wrote:
> On 19/04/2021 16:45, Jan Beulich wrote:
>> On 19.04.2021 16:01, Andrew Cooper wrote:
>>> As with the toolstack side, we ought to use -Og for debug builds.
>>>
>>> All fixes are trivial.  The first 3 are understandable, given reduced
>>> optimisations.  The next 3 are, AFAICT, bogus diagnostics.
>>>
>>> Andrew Cooper (7):
>>>   xen/arm: Make make_cpus_node() compile at -Og
>>>   x86/shim: Fix compilation at -Og
>>>   x86/sysctl: Make arch_do_sysctl() compile at -Og
>>>   x86/irq: Make create_irq() compile at -Og
>>>   xen/efi: Make efi_start() compile at -Og
>>>   x86/shadow: Make _shadow_prealloc() compile at -Og
>>>   xen: Use -Og for debug builds when available
>> Acked-by: Jan Beulich 
>>
>> I'd like to ask though that at least for the bogus warnings you
>> amend the commit messages with the gcc version these were observed
>> with. Perhaps even those seemingly bogus initializers would
>> benefit from a very brief comment (or else there's a fair chance
>> of them getting removed again at some point).
> 
> I'm afraid I don't have that information easily to hand, but all issues
> were found by distro-provided compilers included in our Gitlab
> infrastructure.

Well, okay, then we'll need to live with said risk. As an aside I'd
like to remind you though that on past occasions of working around
compiler oddities, I was asked as well to provide compiler version
information ...

Jan

Re: [PATCH v4 2/3] x86/time: yield to hyperthreads after updating TSC during rendezvous

2021-04-21 Thread Jan Beulich

On 20.04.2021 17:59, Roger Pau Monné wrote:
> On Thu, Apr 01, 2021 at 11:54:27AM +0200, Jan Beulich wrote:
>> Since we'd like the updates to be done as synchronously as possible,
>> make an attempt at yielding immediately after the TSC write.
>>
>> Signed-off-by: Jan Beulich 
> 
> Reviewed-by: Roger Pau Monné 

Thanks.

> Did you observe any difference with the pause inserted?

I wouldn't even know how to measure it precisely enough. So - no,
I haven't. In fact ...

> I wonder whether that's enough to give a chance the hyperthread to
> also perform the TSC write. In any case there's no harm from it
> certainly.

... I have an inquiry pending with Intel as to better (more
reliable) ways to yield, for quite a bit longer than the 8259 one,
yet with the same lack of any outcome so far. Until then it really
is going to be no more than "make an attempt", as said in the
commit message.

Jan

Re: [PATCH] CI: Drop TravisCI

2021-04-21 Thread Wei Liu

On Wed, Apr 21, 2021 at 10:27:05AM +0100, Andrew Cooper wrote:
> Travis-ci.org is shutting down shortly.  The arm cross-compile testing has
> been broken for a long time now, and all testing has now been superseded by
> our Gitlab infrastructure.
> 
> Signed-off-by: Andrew Cooper 

Acked-by: Wei Liu 

I guess this also needs backporting?

Re: [PATCH] tools: Drop XGETTEXT from Tools.mk.in

2021-04-21 Thread Wei Liu

On Fri, Apr 16, 2021 at 04:59:41PM +0100, Andrew Cooper wrote:
> This hunk was missing from the work to drop gettext as a build dependency.
> 
> Fixes: e21a6a4f96 ("tools: Drop gettext as a build dependency")
> Signed-off-by: Andrew Cooper 

Acked-by: Wei Liu 

> ---
> CC: George Dunlap 
> CC: Ian Jackson 
> CC: Jan Beulich 
> CC: Stefano Stabellini 
> CC: Wei Liu 
> CC: Roger Pau Monné 
> CC: Julien Grall 
> CC: Juergen Gross 
> ---
>  config/Tools.mk.in | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/config/Tools.mk.in b/config/Tools.mk.in
> index d47936686b..934d899967 100644
> --- a/config/Tools.mk.in
> +++ b/config/Tools.mk.in
> @@ -12,7 +12,6 @@ PYTHON  := @PYTHON@
>  PYTHON_PATH := @PYTHONPATH@
>  PY_NOOPT_CFLAGS := @PY_NOOPT_CFLAGS@
>  PERL:= @PERL@
> -XGETTTEXT   := @XGETTEXT@
>  AS86:= @AS86@
>  LD86:= @LD86@
>  BCC := @BCC@
> -- 
> 2.11.0
>

Re: [PATCH v4 3/3] x86/time: avoid reading the platform timer in rendezvous functions

2021-04-21 Thread Jan Beulich

On 20.04.2021 18:12, Roger Pau Monné wrote:
> On Thu, Apr 01, 2021 at 11:55:10AM +0200, Jan Beulich wrote:
>> Reading the platform timer isn't cheap, so we'd better avoid it when the
>> resulting value is of no interest to anyone.
>>
>> The consumer of master_stime, obtained by
>> time_calibration_{std,tsc}_rendezvous() and propagated through
>> this_cpu(cpu_calibration), is local_time_calibration(). With
>> CONSTANT_TSC the latter function uses an early exit path, which doesn't
>> explicitly use the field. While this_cpu(cpu_calibration) (including the
>> master_stime field) gets propagated to this_cpu(cpu_time).stamp on that
>> path, both structures' fields get consumed only by the !CONSTANT_TSC
>> logic of the function.
>>
>> Signed-off-by: Jan Beulich 
>> ---
>> v4: New.
>> ---
>> I realize there's some risk associated with potential new uses of the
>> field down the road. What would people think about compiling time.c a
>> 2nd time into a dummy object file, with a conditional enabled to force
>> assuming CONSTANT_TSC, and with that conditional used to suppress
>> presence of the field as well as all audited used of it (i.e. in
>> particular that large part of local_time_calibration())? Unexpected new
>> users of the field would then cause build time errors.
> 
> Wouldn't that add quite a lot of churn to the file itself in the form
> of pre-processor conditionals?

Possibly - I didn't try yet, simply because of fearing this might
not be liked even without presenting it in patch form.

> Could we instead set master_stime to an invalid value that would make
> the consumers explode somehow?

No idea whether there is any such "reliable" value.

> I know there might be new consumers, but those should be able to
> figure whether the value is sane by looking at the existing ones.

This could be the hope, yes. But the effort of auditing the code to
confirm the potential of optimizing this (after vaguely getting the
impression there might be room) was non-negligible (in fact I did
three runs just to be really certain). This in particular means
that I'm in no way certain that looking at existing consumers would
point out the possible pitfall.

> Also, since this is only done on the BSP on the last iteration I
> wonder if it really makes such a difference performance-wise to
> warrant all this trouble.

By "all this trouble", do you mean the outlined further steps or
the patch itself? In the latter case, while it's only the BSP to
read the value, all other CPUs are waiting for the BSP to get its
part done. So the extra time it takes to read the platform clock
affects the overall duration of the rendezvous, and hence the time
not "usefully" spent by _all_ of the CPUs.

Jan

Re: [PATCH] tools/libs/light: Remove unnecessary libxl_list_vm() call

2021-04-21 Thread Wei Liu

On Mon, Apr 19, 2021 at 04:01:42PM +0300, Costin Lupu wrote:
> The removed lines were initially added by commit 314e64084d31, but the
> subsequent code which was using the nb_vm variable was later removed by
> commit 2ba368d13893, which makes these lines of code an overlooked
> reminiscence. Moreover, the call becomes very expensive when there is a
> considerable number of VMs (~1000 instances) running on the host.
> 

Nice catch.

> Signed-off-by: Costin Lupu 

Acked-by: Wei Liu 

> ---
>  tools/libs/light/libxl_create.c | 11 +--
>  1 file changed, 1 insertion(+), 10 deletions(-)
> 
> diff --git a/tools/libs/light/libxl_create.c b/tools/libs/light/libxl_create.c
> index 0c64268f66..43e9ba9c63 100644
> --- a/tools/libs/light/libxl_create.c
> +++ b/tools/libs/light/libxl_create.c
> @@ -578,7 +578,7 @@ int libxl__domain_make(libxl__gc *gc, libxl_domain_config 
> *d_config,
> uint32_t *domid, bool soft_reset)
>  {
>  libxl_ctx *ctx = libxl__gc_owner(gc);
> -int ret, rc, nb_vm;
> +int ret, rc;
>  const char *dom_type;
>  char *uuid_string;
>  char *dom_path, *vm_path, *libxl_path;
> @@ -586,7 +586,6 @@ int libxl__domain_make(libxl__gc *gc, libxl_domain_config 
> *d_config,
>  struct xs_permissions rwperm[1];
>  struct xs_permissions noperm[1];
>  xs_transaction_t t = 0;
> -libxl_vminfo *vm_list;
>  
>  /* convenience aliases */
>  libxl_domain_create_info *info = &d_config->c_info;
> @@ -869,14 +868,6 @@ retry_transaction:
>  ARRAY_SIZE(rwperm));
>  }
>  
> -vm_list = libxl_list_vm(ctx, &nb_vm);
> -if (!vm_list) {
> -LOGD(ERROR, *domid, "cannot get number of running guests");
> -rc = ERROR_FAIL;
> -goto out;
> -}
> -libxl_vminfo_list_free(vm_list, nb_vm);
> -
>  xs_write(ctx->xsh, t, GCSPRINTF("%s/uuid", vm_path), uuid_string, 
> strlen(uuid_string));
>  xs_write(ctx->xsh, t, GCSPRINTF("%s/name", vm_path), info->name, 
> strlen(info->name));
>  
> -- 
> 2.20.1
>

Re: [PATCH] CI: Drop TravisCI

2021-04-21 Thread Andrew Cooper

On 21/04/2021 11:04, Wei Liu wrote:
> On Wed, Apr 21, 2021 at 10:27:05AM +0100, Andrew Cooper wrote:
>> Travis-ci.org is shutting down shortly.  The arm cross-compile testing has
>> been broken for a long time now, and all testing has now been superseded by
>> our Gitlab infrastructure.
>>
>> Signed-off-by: Andrew Cooper 
> Acked-by: Wei Liu 

Thanks.

> I guess this also needs backporting?

Unsure.  It logically depends on "automation: add arm32 cross-build
tests for Xen" which is currently the top commit on staging, but said
cross-compile tests were also the ones broken in Travis.

When travis shuts down, it will simply stop caring about repos/branches
containing a .travis.yml, so I don't think anything is going to break as
a consequence.

Then again, it is probably bad form to retain CI configuration for
something which we know doesn't work.

Jan - thoughts?

~Andrew

Re: [PATCH v2 2/6] tools/libs/ctrl: fix xc_core_arch_map_p2m() to support linear p2m table

2021-04-21 Thread Wei Liu

On Mon, Apr 12, 2021 at 05:22:32PM +0200, Juergen Gross wrote:
> The core of a pv linux guest produced via "xl dump-core" is not usable
> as since kernel 4.14 only the linear p2m table is kept if Xen indicates
> it is supporting that. Unfortunately xc_core_arch_map_p2m() is still
> supporting the 3-level p2m tree only.
> 
> Fix that by copying the functionality of map_p2m() from libxenguest to
> libxenctrl.
> 

So there are now two copies of the same logic? Is it possible to reduce
it to only one?

Wei.

Re: [PATCH v2 1/6] tools/libs/guest: fix max_pfn setting in map_p2m()

2021-04-21 Thread Wei Liu

On Mon, Apr 12, 2021 at 05:22:31PM +0200, Juergen Gross wrote:
> When setting the highest pfn used in the guest, don't subtract 1 from
> the value read from the shared_info data. The value read already is
> the correct pfn.
> 
> Fixes: 91e204d37f449 ("libxc: try to find last used pfn when migrating")
> Signed-off-by: Juergen Gross 

Acked-by: Wei Liu

Re: [PATCH v2 3/6] tools/libs/ctrl: use common p2m mapping code in xc_domain_resume_any()

2021-04-21 Thread Wei Liu

On Mon, Apr 12, 2021 at 05:22:33PM +0200, Juergen Gross wrote:
> Instead of open coding the mapping of the p2m list use the already
> existing xc_core_arch_map_p2m() call, especially as the current code
> does not support guests with the linear p2m map. It should be noted
> that this code is needed for colo/remus only.
> 
> Switching to xc_core_arch_map_p2m() drops the need to bail out for
> bitness of tool stack and guest differing.
> 
> Signed-off-by: Juergen Gross 

Acked-by: Wei Liu

Re: [PATCH] CI: Drop TravisCI

2021-04-21 Thread Jan Beulich

On 21.04.2021 12:08, Andrew Cooper wrote:
> On 21/04/2021 11:04, Wei Liu wrote:
>> On Wed, Apr 21, 2021 at 10:27:05AM +0100, Andrew Cooper wrote:
>>> Travis-ci.org is shutting down shortly.  The arm cross-compile testing has
>>> been broken for a long time now, and all testing has now been superseded by
>>> our Gitlab infrastructure.
>>>
>>> Signed-off-by: Andrew Cooper 
>> Acked-by: Wei Liu 
> 
> Thanks.
> 
>> I guess this also needs backporting?
> 
> Unsure.  It logically depends on "automation: add arm32 cross-build
> tests for Xen" which is currently the top commit on staging, but said
> cross-compile tests were also the ones broken in Travis.
> 
> When travis shuts down, it will simply stop caring about repos/branches
> containing a .travis.yml, so I don't think anything is going to break as
> a consequence.
> 
> Then again, it is probably bad form to retain CI configuration for
> something which we know doesn't work.
> 
> Jan - thoughts?

Since the patch ought to be trivial to backport, for the latter of your
arguments I think I'd prefer backporting it.

Jan

Re: [PATCH v2 4/6] tools/libs: move xc_resume.c to libxenguest

2021-04-21 Thread Wei Liu

On Mon, Apr 12, 2021 at 05:22:34PM +0200, Juergen Gross wrote:
> The guest suspend functionality is already part of libxenguest. Move
> the resume functionality from libxenctrl to libxenguest, too.
> 
> Signed-off-by: Juergen Gross 

Acked-by: Wei Liu

Re: [PATCH v2 5/6] tools/libs: move xc_core* from libxenctrl to libxenguest

2021-04-21 Thread Wei Liu

On Mon, Apr 12, 2021 at 05:22:35PM +0200, Juergen Gross wrote:
> The functionality in xc_core* should be part of libxenguest instead
> of libxenctrl. Users are already either in libxenguest, or in xl.
> There is one single exception: xc_core_arch_auto_translated_physmap()
> is being used by xc_domain_memory_mapping(), which is used by qemu.
> So leave the xc_core_arch_auto_translated_physmap() functionality in
> libxenctrl.
> 
> This will make it easier to merge common functionality of xc_core*
> and xg_sr_save*.
> 
> Signed-off-by: Juergen Gross 

Acked-by: Wei Liu

Re: [PATCH v2 6/6] tools/libs/guest: make some definitions private to libxenguest

2021-04-21 Thread Wei Liu

On Mon, Apr 12, 2021 at 05:22:36PM +0200, Juergen Gross wrote:
> There are some definitions which are used in libxenguest only now.
> Move them from libxenctrl over to libxenguest.
> 
> Remove an unused macro.
> 
> Signed-off-by: Juergen Gross 

Acked-by: Wei Liu

Re: [PATCH v2 2/6] tools/libs/ctrl: fix xc_core_arch_map_p2m() to support linear p2m table

2021-04-21 Thread Juergen Gross


On 21.04.21 12:13, Wei Liu wrote:

On Mon, Apr 12, 2021 at 05:22:32PM +0200, Juergen Gross wrote:

The core of a pv linux guest produced via "xl dump-core" is not usable
as since kernel 4.14 only the linear p2m table is kept if Xen indicates
it is supporting that. Unfortunately xc_core_arch_map_p2m() is still
supporting the 3-level p2m tree only.

Fix that by copying the functionality of map_p2m() from libxenguest to
libxenctrl.



So there are now two copies of the same logic? Is it possible to reduce
it to only one?


Yes. See the intro mail of the series.

I wanted to fix the issue first, before doing the major cleanup work.


Juergen


OpenPGP_0xB0DE9DD628BF132F.asc
Description: application/pgp-keys


OpenPGP_signature
Description: OpenPGP digital signature

Re: [PATCH 2/9] arm/domain: Get rid of READ/WRITE_SYSREG32

2021-04-21 Thread Julien Grall





On 21/04/2021 08:36, Michal Orzel wrote:

Hi Julien,


Hi Michal,



On 20.04.2021 15:12, Julien Grall wrote:

Hi Michal,

On 20/04/2021 08:08, Michal Orzel wrote:

AArch64 system registers are 64bit whereas AArch32 ones
are 32bit or 64bit. MSR/MRS are expecting 64bit values thus
we should get rid of helpers READ/WRITE_SYSREG32
in favour of using READ/WRITE_SYSREG.
We should also use register_t type when reading sysregs
which can correspond to uint64_t or uint32_t.
Even though many AArch64 sysregs have upper 32bit reserved
it does not mean that they can't be widen in the future.

Modify type of registers: actlr, cntkctl to register_t.


ACTLR is implementation defined, so in theory there might already bits already 
defined in the range [32:63]. So I would consider to split it from the patch so 
we can backport it.


You mean not to touch it at all in this series or to create a seperate patch 
only for ACTLR?


I meant creating a separate patch for ACTLR as part of this series.



Signed-off-by: Michal Orzel 
---
   xen/arch/arm/domain.c    | 20 ++--
   xen/include/asm-arm/domain.h |  4 ++--
   2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index bdd3d3e5b5..c021a03c61 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -113,13 +113,13 @@ static void ctxt_switch_from(struct vcpu *p)
   p->arch.tpidr_el1 = READ_SYSREG(TPIDR_EL1);
     /* Arch timer */
-    p->arch.cntkctl = READ_SYSREG32(CNTKCTL_EL1);
+    p->arch.cntkctl = READ_SYSREG(CNTKCTL_EL1);
   virt_timer_save(p);
     if ( is_32bit_domain(p->domain) && cpu_has_thumbee )
   {
-    p->arch.teecr = READ_SYSREG32(TEECR32_EL1);
-    p->arch.teehbr = READ_SYSREG32(TEEHBR32_EL1);
+    p->arch.teecr = READ_SYSREG(TEECR32_EL1);
+    p->arch.teehbr = READ_SYSREG(TEEHBR32_EL1);


It feels strange you converted cntkctl and actlr to use register_t but not 
teecr and teehbr. Can you explain why?


Because thumbee and its registers do not appear in any of ARMv8 ARM version.


This was defined on the very first version of the ARMv8-A spec (issue 
a.a) but was dropped on subsequent versinos. Hence why the compiler 
doesn't shout at you when finding TEECR32_EL1 and TEEHBR32_EL1.



This means that this code could be protected even with #ifdef CONFIG_ARM_32 as 
AArch64 do not have them.


+1. The feature was completely dropped for Armv8 (even if it appeared in 
earlier version of the spec). I have checked AMD Seattle, QEMU and the 
Foundation model to confirm the feature is not exposed.


Cheers,

--
Julien Grall

[PATCH v4 0/3] zstd decompression fallout / consolidation

2021-04-21 Thread Jan Beulich

1: unzstd: replace INIT{,DATA} and STATIC
2: xen/decompress: drop STATIC and INIT
3: unzstd: make helper symbols static

Jan

Re: [PATCH 3/9] arm/gic: Get rid of READ/WRITE_SYSREG32

2021-04-21 Thread Julien Grall


Hi Michal,

On 21/04/2021 08:48, Michal Orzel wrote:

On 20.04.2021 15:28, Julien Grall wrote:

On 20/04/2021 08:08, Michal Orzel wrote:

Signed-off-by: Michal Orzel 
     static unsigned int gicv3_read_irq(void)
   {
-    unsigned int irq = READ_SYSREG32(ICC_IAR1_EL1);
+    register_t irq = READ_SYSREG(ICC_IAR1_EL1);
     dsb(sy);
   -    return irq;
+    /* Number of IRQs do not exceed 32bit. */


If we want to be pedantic, the IRQs are encoded using 23-bit. So maybe we want 
to mask them below.


+    return (unsigned int)irq;


NIT: We tend to avoid explicit cast unless they are strictly necessary because 
they can be more harmful than implicit cast (the compiler may not cast every 
conversion). So I would drop it and just keep the comment.


Ok. I will do:
return (irq & 0xff);


Sounds good, so long we the value is not hardoced but provided through a 
define :).


Cheers,

--
Julien Grall

[PATCH v4 1/3] unzstd: replace INIT and STATIC

2021-04-21 Thread Jan Beulich

With xen/common/decompress.h now agreeing in both build modes about
what STATIC expands to, there's no need for these abstractions anymore.

Requested-by: Andrew Cooper 
Signed-off-by: Jan Beulich 
---
v4: Minor wording adjustment to description.
v3: New.

--- a/xen/common/unzstd.c
+++ b/xen/common/unzstd.c
@@ -71,7 +71,7 @@
  */
 #define ZSTD_IOBUF_SIZE(1 << 17)
 
-static int INIT handle_zstd_error(size_t ret, void (*error)(const char *x))
+static int __init handle_zstd_error(size_t ret, void (*error)(const char *x))
 {
const int err = ZSTD_getErrorCode(ret);
 
@@ -102,9 +102,9 @@ static int INIT handle_zstd_error(size_t
  * We can allocate less memory (no circular buffer for the sliding window),
  * and avoid some memcpy() calls.
  */
-static int INIT decompress_single(const u8 *in_buf, long in_len, u8 *out_buf,
- long out_len, unsigned int *in_pos,
- void (*error)(const char *x))
+static int __init decompress_single(const u8 *in_buf, long in_len, u8 *out_buf,
+   long out_len, unsigned int *in_pos,
+   void (*error)(const char *x))
 {
const size_t wksp_size = ZSTD_DCtxWorkspaceBound();
void *wksp = large_malloc(wksp_size);
@@ -142,11 +142,11 @@ out:
return err;
 }
 
-int INIT unzstd(unsigned char *in_buf, unsigned int in_len,
-   int (*fill)(void*, unsigned int),
-   int (*flush)(void*, unsigned int),
-   unsigned char *out_buf, unsigned int *in_pos,
-   void (*error)(const char *x))
+int __init unzstd(unsigned char *in_buf, unsigned int in_len,
+ int (*fill)(void*, unsigned int),
+ int (*flush)(void*, unsigned int),
+ unsigned char *out_buf, unsigned int *in_pos,
+ void (*error)(const char *x))
 {
ZSTD_inBuffer in;
ZSTD_outBuffer out;
--- a/xen/common/zstd/decompress.c
+++ b/xen/common/zstd/decompress.c
@@ -46,7 +46,7 @@
 /*_***
 *  Memory operations
 **/
-static void INIT ZSTD_copy4(void *dst, const void *src) { memcpy(dst, src, 4); 
}
+static void __init ZSTD_copy4(void *dst, const void *src) { memcpy(dst, src, 
4); }
 
 /*-*
 *   Context management
@@ -98,12 +98,12 @@ struct ZSTD_DCtx_s {
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
 }; /* typedef'd to ZSTD_DCtx within "zstd.h" */
 
-STATIC size_t INIT ZSTD_DCtxWorkspaceBound(void)
+static size_t __init ZSTD_DCtxWorkspaceBound(void)
 {
return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_DCtx));
 }
 
-STATIC size_t INIT ZSTD_decompressBegin(ZSTD_DCtx *dctx)
+static size_t __init ZSTD_decompressBegin(ZSTD_DCtx *dctx)
 {
dctx->expected = ZSTD_frameHeaderSize_prefix;
dctx->stage = ZSTDds_getFrameHeaderSize;
@@ -123,7 +123,7 @@ STATIC size_t INIT ZSTD_decompressBegin(
return 0;
 }
 
-STATIC ZSTD_DCtx *INIT ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
+static ZSTD_DCtx *__init ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
 {
ZSTD_DCtx *dctx;
 
@@ -138,13 +138,13 @@ STATIC ZSTD_DCtx *INIT ZSTD_createDCtx_a
return dctx;
 }
 
-STATIC ZSTD_DCtx *INIT ZSTD_initDCtx(void *workspace, size_t workspaceSize)
+static ZSTD_DCtx *__init ZSTD_initDCtx(void *workspace, size_t workspaceSize)
 {
ZSTD_customMem const stackMem = ZSTD_initStack(workspace, 
workspaceSize);
return ZSTD_createDCtx_advanced(stackMem);
 }
 
-size_t INIT ZSTD_freeDCtx(ZSTD_DCtx *dctx)
+size_t __init ZSTD_freeDCtx(ZSTD_DCtx *dctx)
 {
if (dctx == NULL)
return 0; /* support free on NULL */
@@ -153,15 +153,15 @@ size_t INIT ZSTD_freeDCtx(ZSTD_DCtx *dct
 }
 
 #ifdef BUILD_DEAD_CODE
-void INIT ZSTD_copyDCtx(ZSTD_DCtx *dstDCtx, const ZSTD_DCtx *srcDCtx)
+void __init ZSTD_copyDCtx(ZSTD_DCtx *dstDCtx, const ZSTD_DCtx *srcDCtx)
 {
size_t const workSpaceSize = (ZSTD_BLOCKSIZE_ABSOLUTEMAX + 
WILDCOPY_OVERLENGTH) + ZSTD_frameHeaderSize_max;
memcpy(dstDCtx, srcDCtx, sizeof(ZSTD_DCtx) - workSpaceSize); /* no need 
to copy workspace */
 }
 #endif
 
-STATIC size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize);
-STATIC size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict,
+static size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize);
+static size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict,
size_t dictSize);
 
 static void ZSTD_refDDict(ZSTD_DCtx *dstDCtx, const ZSTD_DDict *ddict);
@@ -176,7 +176,7 @@ static void ZSTD_refDDict(ZSTD_DCtx *dst
  *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 
0.
  *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy 
Support is enabled.
  *  Note 3 : Skippable

[PATCH v4 2/3] xen/decompress: drop STATIC and INIT

2021-04-21 Thread Jan Beulich

Except for one last instance, all users have been removed in earlier
changes.

Requested-by: Andrew Cooper 
Signed-off-by: Jan Beulich 
Acked-by: Julien Grall 
---
v3: New.

--- a/xen/arch/arm/efi/efi-dom0.c
+++ b/xen/arch/arm/efi/efi-dom0.c
@@ -28,7 +28,7 @@
 #include 
 #include 
 #include "../../../common/decompress.h"
-#define XZ_EXTERN STATIC
+#define XZ_EXTERN static
 #include "../../../common/xz/crc32.c"
 
 /* Constant to indicate "Xen" in unicode u16 format */
--- a/xen/common/decompress.h
+++ b/xen/common/decompress.h
@@ -7,9 +7,6 @@
 #include 
 #include 
 
-#define STATIC static
-#define INIT __init
-
 #define malloc xmalloc_bytes
 #define free xfree
 
@@ -18,9 +15,6 @@
 
 #else
 
-#define STATIC static
-#define INIT
-
 #undef __init /* tools/libs/guest/xg_private.h has its own one */
 #define __init
 #define __initdata

Re: [PATCH 6/8] x86/EFI: avoid use of GNU ld's --disable-reloc-section when possible

2021-04-21 Thread Roger Pau Monné

On Thu, Apr 01, 2021 at 11:46:44AM +0200, Jan Beulich wrote:
> As of commit 6fa7408d72b3 ("ld: don't generate base relocations in PE
> output for absolute symbols") I'm feeling sufficiently confident in GNU
> ld to use its logic for generating base relocations, which was enabled
> for executables at some point last year (prior to that this would have
> got done only for DLLs).
> 
> GNU ld, seeing the original relocations coming from the ELF object files,
> generates different relocation types for our page tables (64-bit ones,
> while mkreloc produces 32-bit ones). This requires also permitting and
> handling that type in efi_arch_relocate_image().
> 
> Signed-off-by: Jan Beulich 
> 
> --- a/xen/arch/x86/Makefile
> +++ b/xen/arch/x86/Makefile
> @@ -120,18 +120,37 @@ $(TARGET): $(TARGET)-syms $(efi-y) boot/
>   mv $(TMP) $(TARGET)
>  
>  ifneq ($(efi-y),)
> +
>  # Check if the compiler supports the MS ABI.
>  export XEN_BUILD_EFI := $(shell $(CC) $(XEN_CFLAGS) -c efi/check.c -o 
> efi/check.o 2>/dev/null && echo y)
> +CFLAGS-$(XEN_BUILD_EFI) += -DXEN_BUILD_EFI
> +
>  # Check if the linker supports PE.
>  EFI_LDFLAGS = $(patsubst -m%,-mi386pep,$(XEN_LDFLAGS)) --subsystem=10 
> --strip-debug
>  XEN_BUILD_PE := $(if $(XEN_BUILD_EFI),$(shell $(LD) $(EFI_LDFLAGS) -o 
> efi/check.efi efi/check.o 2>/dev/null && echo y))
> -CFLAGS-$(XEN_BUILD_EFI) += -DXEN_BUILD_EFI
> -# Check if the linker produces fixups in PE by default (we need to disable 
> it doing so for now).
> -XEN_NO_PE_FIXUPS := $(if $(XEN_BUILD_EFI), \
> - $(shell $(LD) $(EFI_LDFLAGS) 
> --disable-reloc-section -o efi/check.efi efi/check.o 2>/dev/null && \
> - echo --disable-reloc-section))
> +
> +ifeq ($(XEN_BUILD_PE),y)
> +
> +# Check if the linker produces fixups in PE by default
> +nr-fixups := $(shell $(OBJDUMP) -p efi/check.efi | grep 
> '^[[:blank:]]*reloc[[:blank:]]*[0-9][[:blank:]].*DIR64$$' | wc -l)
> +ifeq ($(nr-fixups),2)
> +MKRELOC := :
> +relocs-dummy :=
> +else
> +MKRELOC := efi/mkreloc
> +relocs-dummy := efi/relocs-dummy.o
> +# If the linker produced fixups but not precisely two of them, we need to
> +# disable it doing so.  But if it didn't produce any fixups, it also wouldn't
> +# recognize the option.
> +ifneq ($(nr-fixups),0)
> +EFI_LDFLAGS += --disable-reloc-section
> +endif
>  endif
>  
> +endif # $(XEN_BUILD_PE)
> +
> +endif # $(efi-y)
> +
>  ALL_OBJS := $(BASEDIR)/arch/x86/boot/built_in.o 
> $(BASEDIR)/arch/x86/efi/built_in.o $(ALL_OBJS)
>  
>  ifeq ($(CONFIG_LTO),y)
> @@ -175,7 +194,7 @@ note.o: $(TARGET)-syms
>   --rename-section=.data=.note.gnu.build-id -S $@.bin $@
>   rm -f $@.bin
>  
> -EFI_LDFLAGS += --image-base=$(1) --stack=0,0 --heap=0,0 $(XEN_NO_PE_FIXUPS)
> +EFI_LDFLAGS += --image-base=$(1) --stack=0,0 --heap=0,0
>  EFI_LDFLAGS += --section-alignment=0x20 --file-alignment=0x20
>  EFI_LDFLAGS += --major-image-version=$(XEN_VERSION)
>  EFI_LDFLAGS += --minor-image-version=$(XEN_SUBVERSION)
> @@ -189,7 +208,11 @@ EFI_LDFLAGS += --no-insert-timestamp
>  endif
>  
>  $(TARGET).efi: VIRT_BASE = 0x$(shell $(NM) efi/relocs-dummy.o | sed -n 's, A 
> VIRT_START$$,,p')
> +ifeq ($(MKRELOC),:)
> +$(TARGET).efi: ALT_BASE :=
> +else
>  $(TARGET).efi: ALT_BASE = 0x$(shell $(NM) efi/relocs-dummy.o | sed -n 's, A 
> ALT_START$$,,p')

Could you maybe check whether $(relocs-dummy) is set as the condition
here and use it here instead of efi/relocs-dummy.o?

> +endif
>  
>  ifneq ($(build_id_linker),)
>  ifeq ($(call ld-ver-build-id,$(LD) $(filter -m%,$(EFI_LDFLAGS))),y)
> @@ -210,16 +233,16 @@ note_file_option ?= $(note_file)
>  ifeq ($(XEN_BUILD_PE),y)
>  $(TARGET).efi: prelink.o $(note_file) efi.lds efi/relocs-dummy.o efi/mkreloc

Do you need to also replace the target prerequisite to use $(relocs-dummy)?

>   $(foreach base, $(VIRT_BASE) $(ALT_BASE), \
> -   $(LD) $(call EFI_LDFLAGS,$(base)) -T efi.lds -N $< 
> efi/relocs-dummy.o \
> +   $(LD) $(call EFI_LDFLAGS,$(base)) -T efi.lds -N $< 
> $(relocs-dummy) \
>   $(BASEDIR)/common/symbols-dummy.o $(note_file_option) 
> -o $(@D)/.$(@F).$(base).0 &&) :
> - efi/mkreloc $(foreach base,$(VIRT_BASE) 
> $(ALT_BASE),$(@D)/.$(@F).$(base).0) >$(@D)/.$(@F).0r.S
> + $(MKRELOC) $(foreach base,$(VIRT_BASE) 
> $(ALT_BASE),$(@D)/.$(@F).$(base).0) >$(@D)/.$(@F).0r.S
>   $(NM) -pa --format=sysv $(@D)/.$(@F).$(VIRT_BASE).0 \
>   | $(BASEDIR)/tools/symbols $(all_symbols) --sysv --sort 
> >$(@D)/.$(@F).0s.S
>   $(MAKE) -f $(BASEDIR)/Rules.mk $(@D)/.$(@F).0r.o $(@D)/.$(@F).0s.o
>   $(foreach base, $(VIRT_BASE) $(ALT_BASE), \
> $(LD) $(call EFI_LDFLAGS,$(base)) -T efi.lds -N $< \
>   $(@D)/.$(@F).0r.o $(@D)/.$(@F).0s.o $(note_file_option) 
> -o $(@D)/.$(@F).$(base).1 &&) :
> - efi/mkreloc $(foreach base,$(VIRT_BASE) 
> $(ALT_BASE),$(@D)/.$(@F).$(base).1) >$(@D)/.$(@F).1r.S
> + $(MKRELOC) $(foreach base,$(VIRT_

[PATCH v4 3/3] unzstd: make helper symbols static

2021-04-21 Thread Jan Beulich

While for the original library's purposes these functions of course want
to be externally exposed, we don't need this, and we also don't want
this both to prevent unintended use and to keep the name space tidy.
(When functions have no callers at all, wrap them with a suitable
#ifdef.) This has the added benefit of reducing the resulting binary
size - while this is all .init code, it's still desirable to not carry
dead code.

Additionally in the hypervisor we don't need the bulk of unzstd(), so
insert a conditional allowing the compiler to DCE the rest (including
the called functions).

Signed-off-by: Jan Beulich 
---
v3: New.

--- a/xen/common/unzstd.c
+++ b/xen/common/unzstd.c
@@ -53,6 +53,10 @@
  * <= 22 + (uncompressed_size >> 15) + 131072
  */
 
+#ifdef __XEN__
+#include 
+#endif
+
 #include "decompress.h"
 
 #include "zstd/entropy_common.c"
@@ -173,6 +177,11 @@ int __init unzstd(unsigned char *in_buf,
return decompress_single(in_buf, in_len, out_buf, out_len,
 in_pos, error);
 
+#ifdef __XEN__
+   ASSERT_UNREACHABLE();
+   return -1;
+#endif
+
/*
 * If in_buf is not provided, we must be using fill(), so allocate
 * a large enough buffer. If it is provided, it must be at least
--- a/xen/common/zstd/entropy_common.c
+++ b/xen/common/zstd/entropy_common.c
@@ -45,8 +45,10 @@
 #include "huf.h"
 #include "mem.h"
 
+#ifdef BUILD_DEAD_CODE
 /*===   Version   ===*/
 unsigned __init FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
+#endif
 
 /*===   Error Management   ===*/
 unsigned __init FSE_isError(size_t code) { return ERR_isError(code); }
--- a/xen/common/zstd/fse.h
+++ b/xen/common/zstd/fse.h
@@ -64,7 +64,7 @@ FSE_PUBLIC_API unsigned FSE_versionNumbe
 FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed 
size */
 
 /* Error Management */
-FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value 
is an error code */
+static unsigned FSE_isError(size_t code); /* tells if a return value is an 
error code */
 
 /*-*
 *  FSE detailed API
@@ -173,7 +173,7 @@ If there is an error, the function will
@return : size read from 'rBuffer',
  or an errorCode, which can be tested using 
FSE_isError().
  maxSymbolValuePtr[0] and tableLogPtr[0] will also be 
updated with their respective values */
-FSE_PUBLIC_API size_t FSE_readNCount(short *normalizedCounter, unsigned 
*maxSymbolValuePtr, unsigned *tableLogPtr, const void *rBuffer, size_t 
rBuffSize);
+static size_t FSE_readNCount(short *normalizedCounter, unsigned 
*maxSymbolValuePtr, unsigned *tableLogPtr, const void *rBuffer, size_t 
rBuffSize);
 
 /*! Constructor and Destructor of FSE_DTable.
Note that its size depends on 'tableLog' */
@@ -182,14 +182,14 @@ typedef unsigned FSE_DTable; /* don't al
 /*! FSE_buildDTable():
Builds 'dt', which must be already allocated, using FSE_createDTable().
return : 0, or an errorCode, which can be tested using FSE_isError() */
-FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable *dt, const short 
*normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void 
*workspace, size_t workspaceSize);
+static size_t FSE_buildDTable_wksp(FSE_DTable *dt, const short 
*normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void 
*workspace, size_t workspaceSize);
 
 /*! FSE_decompress_usingDTable():
Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
into `dst` which must be already allocated.
@return : size of regenerated data (necessarily <= `dstCapacity`),
  or an errorCode, which can be tested using 
FSE_isError() */
-FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void *dst, size_t 
dstCapacity, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt);
+static size_t FSE_decompress_usingDTable(void *dst, size_t dstCapacity, const 
void *cSrc, size_t cSrcSize, const FSE_DTable *dt);
 
 /*!
 Tutorial :
@@ -273,10 +273,10 @@ size_t FSE_buildCTable_wksp(FSE_CTable *
 size_t FSE_buildDTable_raw(FSE_DTable *dt, unsigned nbBits);
 /**< build a fake FSE_DTable, designed to read a flat distribution where each 
symbol uses nbBits */
 
-size_t FSE_buildDTable_rle(FSE_DTable *dt, unsigned char symbolValue);
+static size_t FSE_buildDTable_rle(FSE_DTable *dt, unsigned char symbolValue);
 /**< build a fake FSE_DTable, designed to always generate the same symbolValue 
*/
 
-size_t FSE_decompress_wksp(void *dst, size_t dstCapacity, const void *cSrc, 
size_t cSrcSize, unsigned maxLog, void *workspace, size_t workspaceSize);
+static size_t FSE_decompress_wksp(void *dst, size_t dstCapacity, const void 
*cSrc, size_t cSrcSize, unsigned maxLog, void *workspace, size_t workspaceSize);
 /**< same as FSE_decompress(), using an externally allocated `workSpace` 
produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
 
 /* ***

Re: [PATCH v2 15/21] libs/guest: obtain a compatible cpu policy from two input ones

2021-04-21 Thread Wei Liu

On Tue, Apr 13, 2021 at 04:01:33PM +0200, Roger Pau Monne wrote:
> Introduce a helper to obtain a compatible cpu policy based on two
> input cpu policies. Currently this is done by and'ing all CPUID leaves
> and MSR entries, except for MSR_ARCH_CAPABILITIES which has the RSBA
> bit or'ed.
> 

I thought canonical source for compatibility was to be put into the
hypervisor, thus all this functionality would be in the hypervisor. Am I
misremembering?

Wei.

Re: [PATCH v3] tools: create libxensaverestore

2021-04-21 Thread Wei Liu

On Tue, Apr 13, 2021 at 07:20:27PM +0200, Olaf Hering wrote:
> Move all save/restore related code from libxenguest.so into a separate
> library libxensaverestore.so. The only consumer is libxl-save-helper.
> There is no need to have the moved code mapped all the time in binaries
> where libxenguest.so is used.

Good idea. Please address Jan and Juergen's comment regarding the
prefix.

Wei.

Re: [PATCH v4] tools: create libxensaverestore

2021-04-21 Thread Wei Liu

On Thu, Apr 15, 2021 at 03:11:38PM +0200, Olaf Hering wrote:
> Move all save/restore related code from libxenguest.so into a separate
> library libxensaverestore.so. The only consumer is libxl-save-helper.
> There is no need to have the moved code mapped all the time in binaries
> where libxenguest.so is used.
> 
> According to size(1) the change is:
>text  data bss dec hex filename
>  187183  4304  48  191535   2ec2f guest/libxenguest.so.4.15.0
> 
>  124106  3376  48  127530   1f22a guest/libxenguest.so.4.15.0
>   67841  1872   8   69721   11059 
> saverestore/libxensaverestore.so.4.15.0
> 
> While touching the files anyway, take the opportunity to drop the
> reduntant xg_sr_ filename prefix.
> 
> Signed-off-by: Olaf Hering 

Acked-by: Wei Liu

Re: [PATCH 2/8] x86/EFI: sections may not live at VA 0 in PE binaries

2021-04-21 Thread Jan Beulich

On 21.04.2021 10:52, Roger Pau Monné wrote:
> On Thu, Apr 01, 2021 at 11:44:45AM +0200, Jan Beulich wrote:
>> PE binaries specify section addresses by (32-bit) RVA. GNU ld up to at
>> least 2.36 would silently truncate the (negative) difference when a
>> section is placed below the image base. Such sections would also be
>> wrongly placed ahead of all "normal" ones. Since, for the time being,
>> we build xen.efi with --strip-debug anyway, .stab* can't appear. And
>> .comment has an entry in /DISCARD/ already anyway in the EFI case.
>>
>> Because of their unclear origin, keep the directives for the ELF case
>> though.
> 
> It's my understadng thonse sections are only there for debug purposes,
> and never part of the final xen binary as they are stripped?
> 
> Could we maybe remove the section load address of 0 and instead just
> use the (NOLOAD) directive?

(NOLOAD) is meaningless for PE.

> Does it really matter to place them at address 0?

That's the convention for ELF, and also what ld defaults to for debugging
sections.

> I also wonder, is this change fixing some existing bug, or it's just a
> cleanup change?

If there were sections at 0, the resulting PE binary would end up broken.
Prior to binutils 2.37 this brokenness is silent, i.e. the linker doesn't
issue any form of diagnostic. The change therefore is addressing a latent
issue - if any such section started being non-empty, we'd be in trouble.

> I also only see the .comment section in my binary output, so maybe
> it's fine to just remove them from the script?

Which binary are you referring to - ELF or PE? In the former case, yes,
that's what the statement is for. In the latter case I can't see how this
would be, with .comment being explicitly part of /DISCARD/ in that case.

> Does the Arm linker script need a similar treatment?

No idea - they don't use ld to produce a PE binary. In fact during my
work on the binutils side for all of this, I was given a hint that on Arm
linking ELF objects into PE output may currently not be possible at all.

Jan

Re: [PATCH 3/8] x86/EFI: program headers are an ELF concept

2021-04-21 Thread Jan Beulich

On 21.04.2021 11:11, Roger Pau Monné wrote:
> On Thu, Apr 01, 2021 at 11:45:09AM +0200, Jan Beulich wrote:
>> While they apparently do no harm when building xen.efi, their use is
>> potentially misleading. Conditionalize their use to be for just the ELF
>> binary we produce.
>>
>> No change to the resulting binaries.
> 
> The GNU Linker manual notes that program headers would be ignored when
> not generating an ELF file, so I'm not sure it's worth us adding more
> churn to the linker script to hide something that's already ignored by
> ld already.
> 
> Maybe adding a comment noting program headers are ignored when not
> generating an ELF output would be enough?

Maybe, but I'd prefer this to be explicit, and I'd prefer for efi.lds
to not have any PE-unrelated baggage. The churn by this patch isn't
all this significant, is it? In fact in two cases it actually deletes
meaningless stuff.

Jan

Re: [PATCH 4/8] x86/EFI: redo .reloc section bounds determination

2021-04-21 Thread Jan Beulich

On 21.04.2021 11:46, Roger Pau Monné wrote:
> On Thu, Apr 01, 2021 at 11:45:38AM +0200, Jan Beulich wrote:
>> There's no need to link relocs-dummy.o into the ELF binary. The two
>> symbols needed can as well be provided by the linker script. Then our
>> mkreloc tool also doesn't need to put them in the generated assembler
>> source.
> 
> Maybe I'm just dense today, but I think the message needs to be
> expanded a bit to mention that while the __base_relocs_{start,end} are
> not used when loaded as an EFI application, they are used by the EFI
> code in Xen when booted using the multiboot2 protocol for example, as
> they are used by efi_arch_relocate_image.
> 
> I think relocation is not needed when natively loaded as an EFI
> application, as then the load address matches the one expected by
> Xen?

It's quite the other way around: The EFI loader applies relocations
to put the binary at its loaded _physical_ address (the image gets
linked for the final virtual address). Hence we need to apply the
same relocations a 2nd time (undoing what the EFI loader did)
before we can branch from the physical (identity mapped) address
range where xen.efi was loaded to the intended virtual address
range where we mean to run Xen from.

For the ELF binary the symbols are needed solely to make ld happy.

> I also wonder, at some point there where plans for providing a single
> binary that would work as multiboot{1,2} and also be capable of being
> loaded as an EFI application (ie: have a PE/COFF header also I assume
> together with the ELF one), won't the changes here make it more
> difficult to reach that goal or require reverting later on, as I feel
> they are adding more differences between the PE binary and the ELF
> one.

There were such plans, yes, but from the last round of that series
I seem to recall that there was at least one issue breaking this
idea. So no, at this point I'm not intending to take precautions to
make that work easier (or not further complicate it). This said, I
don't think the change here complicates anything there.

> The code LGTM, but I think at least I would like the commit message to
> be expanded.

Well, once I know what exactly you're missing there, I can certainly
try to expand it.

Jan

Re: [PATCH 7/8] x86/EFI: keep debug info in xen.efi

2021-04-21 Thread Roger Pau Monné

On Thu, Apr 01, 2021 at 11:47:03AM +0200, Jan Beulich wrote:
> ... provided the linker supports it (which it does as of commit
> 2dfa8341e079 ["ELF DWARF in PE output"]).
> 
> Without mentioning debugging sections, the linker would put them at
> VA 0, thus making them unreachable by 32-bit (relative or absolute)
> relocations. If relocations were resolvable (or absent) the resulting
> binary would have invalid section RVAs (0 - __image_base__, truncated to
> 32 bits). Mentioning debugging sections without specifying an address
> will result in the linker putting them all on the same RVA. A loader is,
> afaict, free to reject loading such an image, as sections shouldn't
> overlap. (The above describes GNU ld 2.36 behavior, which - if deemed
> buggy - could change.)

Isn't just using (NOLOAD) to signal those sections shouldn't be
loaded enough, and thus don't care about it's RVA?

> 
> Make sure our up-to-16Mb padding doesn't unnecessarily further extend
> the image.
> 
> Take the opportunity and also switch to using $(call ld-option,...).
> 
> Requested-by: Andrew Cooper 
> Signed-off-by: Jan Beulich 
> ---
> This way we could also avoid discarding .comment for xen.efi.
> 
> I'd like to point out that the linking of the debug info takes far
> longer than the linking of the "normal" parts of the image. The change
> therefore has the downside of slowing down debug builds.
> 
> --- a/xen/arch/x86/Makefile
> +++ b/xen/arch/x86/Makefile
> @@ -126,8 +126,14 @@ export XEN_BUILD_EFI := $(shell $(CC) $(
>  CFLAGS-$(XEN_BUILD_EFI) += -DXEN_BUILD_EFI
>  
>  # Check if the linker supports PE.
> -EFI_LDFLAGS = $(patsubst -m%,-mi386pep,$(XEN_LDFLAGS)) --subsystem=10 
> --strip-debug
> -XEN_BUILD_PE := $(if $(XEN_BUILD_EFI),$(shell $(LD) $(EFI_LDFLAGS) -o 
> efi/check.efi efi/check.o 2>/dev/null && echo y))
> +EFI_LDFLAGS = $(patsubst -m%,-mi386pep,$(XEN_LDFLAGS)) --subsystem=10
> +XEN_BUILD_PE := $(if $(XEN_BUILD_EFI),$(call ld-option,$(EFI_LDFLAGS) 
> --image-base=0x1 -o efi/check.efi efi/check.o))
> +# If the above failed, it may be merely because of the linker not dealing 
> well
> +# with debug info. Try again with stripping it.
> +ifeq ($(CONFIG_DEBUG_INFO)-$(XEN_BUILD_PE),y-n)
> +EFI_LDFLAGS += --strip-debug
> +XEN_BUILD_PE := $(call ld-option,$(EFI_LDFLAGS) --image-base=0x1 -o 
> efi/check.efi efi/check.o)
> +endif
>  
>  ifeq ($(XEN_BUILD_PE),y)
>  
> @@ -232,6 +238,9 @@ note_file_option ?= $(note_file)
>  
>  ifeq ($(XEN_BUILD_PE),y)
>  $(TARGET).efi: prelink.o $(note_file) efi.lds efi/relocs-dummy.o efi/mkreloc
> +ifeq ($(CONFIG_DEBUG_INFO),y)
> + $(if $(filter --strip-debug,$(EFI_LDFLAGS)),echo,:) "Will strip debug 
> info from $(@F)"
> +endif
>   $(foreach base, $(VIRT_BASE) $(ALT_BASE), \
> $(LD) $(call EFI_LDFLAGS,$(base)) -T efi.lds -N $< 
> $(relocs-dummy) \
>   $(BASEDIR)/common/symbols-dummy.o $(note_file_option) 
> -o $(@D)/.$(@F).$(base).0 &&) :
> --- a/xen/arch/x86/xen.lds.S
> +++ b/xen/arch/x86/xen.lds.S
> @@ -312,10 +312,60 @@ SECTIONS
>  *(.reloc)
>  __base_relocs_end = .;
>}
> -  /* Trick the linker into setting the image size to exactly 16Mb. */
> -  . = ALIGN(__section_alignment__);
> -  DECL_SECTION(.pad) {
> -. = ALIGN(MB(16));
> +  .debug_abbrev ALIGN(1) (NOLOAD) : {
> + *(.debug_abbrev)
> +  }
> +  .debug_info ALIGN(1) (NOLOAD) : {
> +*(.debug_info)
> +*(.gnu.linkonce.wi.*)
> +  }
> +  .debug_types ALIGN(1) (NOLOAD) : {
> +*(.debug_types)
> +  }
> +  .debug_str ALIGN(1) (NOLOAD) : {
> +*(.debug_str)
> +  }
> +  .debug_line ALIGN(1) (NOLOAD) : {
> +*(.debug_line)
> +*(.debug_line.*)
> +  }
> +  .debug_line_str ALIGN(1) (NOLOAD) : {
> +*(.debug_line_str)
> +  }
> +  .debug_names ALIGN(4) (NOLOAD) : {
> +*(.debug_names)
> +  }
> +  .debug_frame ALIGN(4) (NOLOAD) : {
> +*(.debug_frame)
> +  }
> +  .debug_loc ALIGN(1) (NOLOAD) : {
> +*(.debug_loc)
> +  }
> +  .debug_loclists ALIGN(4) (NOLOAD) : {
> +*(.debug_loclists)
> +  }
> +  .debug_ranges ALIGN(8) (NOLOAD) : {
> +*(.debug_ranges)
> +  }
> +  .debug_rnglists ALIGN(4) (NOLOAD) : {
> +*(.debug_rnglists)
> +  }
> +  .debug_addr ALIGN(8) (NOLOAD) : {
> +*(.debug_addr)
> +  }
> +  .debug_aranges ALIGN(1) (NOLOAD) : {
> +*(.debug_aranges)
> +  }
> +  .debug_pubnames ALIGN(1) (NOLOAD) : {
> +*(.debug_pubnames)
> +  }
> +  .debug_pubtypes ALIGN(1) (NOLOAD) : {
> +*(.debug_pubtypes)
> +  }
> +  /* Trick the linker into setting the image size to no less than 16Mb. */
> +  __image_end__ = .;
> +  .pad ALIGN(__section_alignment__) : {
> +. = __image_end__ < __image_base__ + MB(16) ? ALIGN(MB(16)) : .;

I think this is inside an ifdef EFI region, since this is DWARF info
couldn't it also be present when building with EFI disabled?

Maybe I'm missing part of the point here, sorry.

Thanks, Roger.

Re: [PATCH 8/8] x86/EFI: don't have an overly large image size

2021-04-21 Thread Roger Pau Monné

On Thu, Apr 01, 2021 at 11:47:35AM +0200, Jan Beulich wrote:
> While without debug info the difference is benign (so far), since we pad
> the image to 16Mb anyway, forcing the .reloc section to a 2Mb boundary
> causes subsequent .debug_* sections to go farther beyond 16Mb than
> needed. There's no reason to advance . for establishing __2M_rwdata_end,
> as all data past _end is of no interest at runtime anymore anyway.

So you just expand the load size.

> 
> Signed-off-by: Jan Beulich 

Reviewed-by: Roger Pau Monné 

> ---
> This makes more explicit a possible latent problem with the ELF image:
> It ends at _end, not __2M_rwdata_end (advancing . as was done here does
> not have the effect of increasing the image size). Interestingly the
> conversion xen-syms => xen rounds up the program header specified size
> suitably, as per the comment "Do not use p_memsz: it does not include
> BSS alignment padding" in mkelf32.c. I do think this would instead want
> taking care of in the linker script. Commit 7a95e0a2c572 ("x86: properly
> calculate xen ELF end of image address") clearly only hacked an existing
> hack rather than addressing the root cause. Thoughts?

We should likely define _end after __2M_rwdata_end to account for this
padding?

Thanks, Roger.

Re: [PATCH v2 15/21] libs/guest: obtain a compatible cpu policy from two input ones

2021-04-21 Thread Roger Pau Monné

On Wed, Apr 21, 2021 at 10:22:39AM +, Wei Liu wrote:
> On Tue, Apr 13, 2021 at 04:01:33PM +0200, Roger Pau Monne wrote:
> > Introduce a helper to obtain a compatible cpu policy based on two
> > input cpu policies. Currently this is done by and'ing all CPUID leaves
> > and MSR entries, except for MSR_ARCH_CAPABILITIES which has the RSBA
> > bit or'ed.
> > 
> 
> I thought canonical source for compatibility was to be put into the
> hypervisor, thus all this functionality would be in the hypervisor. Am I
> misremembering?

Andrew said something similar on v1, but I'm not able to figure how
this would be used by the hypervisor.

It's my understating that the toolstack will attempt to generate a CPU
policy and forward it to the hypervisor, which will either accept or
reject it based on the capabilities of the system. I'm not sure I see
why we would need to give the hypervisor two policies in order to
generate a resulting compatible one - it should all be done by the
toolstack AFAICT.

If there's a use case for this being in the hypervisor I'm happy to
add it there, but so far I haven't been able to come up with one
myself, and hence I don't see the need to make the code available.

Thanks, Roger.

Re: [PATCH v2] VT-d: Don't assume register-based invalidation is always supported

2021-04-21 Thread Chao Gao

On Wed, Apr 21, 2021 at 11:23:13AM +0200, Jan Beulich wrote:
>On 20.04.2021 18:17, Roger Pau Monné wrote:
>> On Tue, Apr 20, 2021 at 05:38:51PM +0200, Jan Beulich wrote:
>>> On 20.04.2021 17:08, Roger Pau Monné wrote:
 On Thu, Apr 02, 2020 at 04:06:06AM +0800, Chao Gao wrote:
> --- a/xen/drivers/passthrough/vtd/qinval.c
> +++ b/xen/drivers/passthrough/vtd/qinval.c
> @@ -442,6 +442,23 @@ int enable_qinval(struct vtd_iommu *iommu)
>  return 0;
>  }
>  
> +static int vtd_flush_context_noop(struct vtd_iommu *iommu, uint16_t did,
> +  uint16_t source_id, uint8_t 
> function_mask,
> +  uint64_t type, bool 
> flush_non_present_entry)
> +{
> +dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: Cannot flush CONTEXT.\n");
> +return -EIO;
> +}
> +
> +static int vtd_flush_iotlb_noop(struct vtd_iommu *iommu, uint16_t did,
> +uint64_t addr, unsigned int size_order,
> +uint64_t type, bool 
> flush_non_present_entry,
> +bool flush_dev_iotlb)
> +{
> +dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: Cannot flush IOTLB.\n");
> +return -EIO;
> +}

 I think I would add an ASSERT_UNREACHABLE() to both noop handlers
 above, as I would expect trying to use them without the proper mode
 being configured would point to an error elsewhere?
>>>
>>> If such an assertion triggered e.g. during S3 suspend/resume, it may
>>> lead to the box simply not doing anything useful, without there being
>>> any way to know what went wrong. If instead the system at least
>>> managed to resume, the log message could be observed.
>> 
>> Oh, OK then. I'm simply worried that people might ignore such one line
>> messages, maybe add a WARN?
>
>Hmm, yes, perhaps - would allow seeing right away where the call
>came from. Chao, I'd again be fine to flip the dprintk()-s to
>WARN()-s while committing. But of course only provided you and
>Kevin (as the maintainer) agree.

Sure, please go ahead.

Thanks
Chao

Re: [PATCH v9 01/13] x86/mm: rewrite virt_to_xen_l*e

2021-04-21 Thread Hongyan Xia

On Tue, 2021-04-20 at 14:17 +0200, Jan Beulich wrote:
> On 06.04.2021 13:05, Hongyan Xia wrote:
> > From: Wei Liu 
> > 
> > Rewrite those functions to use the new APIs. Modify its callers to
> > unmap
> > the pointer returned. Since alloc_xen_pagetable_new() is almost
> > never
> > useful unless accompanied by page clearing and a mapping, introduce
> > a
> > helper alloc_map_clear_xen_pt() for this sequence.
> > 
> > Signed-off-by: Wei Liu 
> > Signed-off-by: Hongyan Xia 
> > 
> > ---
> > Changed in v9:
> > - use domain_page_map_to_mfn() around the L3 table locking logic.
> > - remove vmap_to_mfn() changes since we now use xen_map_to_mfn().
> > 
> > Changed in v8:
> > - s/virtual address/linear address/.
> > - BUG_ON() on NULL return in vmap_to_mfn().
> > 
> > Changed in v7:
> > - remove a comment.
> > - use l1e_get_mfn() instead of converting things back and forth.
> > - add alloc_map_clear_xen_pt().
> 
> I realize this was in v7 already, but at v6 time the name I suggested
> was
> 
> void *alloc_mapped_pagetable(mfn_t *mfn);
> 
> "alloc_map_clear_xen", for my taste at least, is too strange. It
> doesn't really matter whether it's a page table for Xen's own use
> (it typically will be), so "xen" could be dropped. Clearing of a
> page table ought to also be the rule rather than the exception, so
> I'd see "clear" also dropped. I'd be fine with alloc_map_pt() or
> about any intermediate variant between that and my originally
> suggested name.

Sounds reasonable. I will go with alloc_mapped_pagetable().

> 
> > @@ -5108,7 +5140,8 @@ int map_pages_to_xen(
> >  unsigned int flags)
> >  {
> >  bool locking = system_state > SYS_STATE_boot;
> > -l2_pgentry_t *pl2e, ol2e;
> > +l3_pgentry_t *pl3e = NULL, ol3e;
> > +l2_pgentry_t *pl2e = NULL, ol2e;
> >  l1_pgentry_t *pl1e, ol1e;
> >  unsigned int  i;
> >  int rc = -ENOMEM;
> > @@ -5132,15 +5165,16 @@ int map_pages_to_xen(
> >  
> >  while ( nr_mfns != 0 )
> >  {
> > -l3_pgentry_t *pl3e, ol3e;
> > -
> > +/* Clean up the previous iteration. */
> >  L3T_UNLOCK(current_l3page);
> > +UNMAP_DOMAIN_PAGE(pl3e);
> > +UNMAP_DOMAIN_PAGE(pl2e);
> 
> Doing this here suggests the lower-case equivalent is needed at the
> out label, even without looking at the rest of the function (doing
> so confirms the suspicion, as there's at least one "goto out" with
> pl2e clearly still mapped).
> 
> > @@ -5305,6 +5339,8 @@ int map_pages_to_xen(
> >  pl1e = virt_to_xen_l1e(virt);
> >  if ( pl1e == NULL )
> >  goto out;
> > +
> > +UNMAP_DOMAIN_PAGE(pl1e);
> >  }
> 
> Unmapping the page right after mapping it looks suspicious. I see
> that
> further down we have
> 
> pl1e  = l2e_to_l1e(*pl2e) + l1_table_offset(virt);
> 
> but don't you need to also change that? Actually, you do in patch 2,
> but the latest then the double mapping should imo be avoided.

I would say the code was already suspicious to begin with, since pl1e
would be overwritten immediately below even before this patch. The
purpose of the virt_to_xen_l1e() is only to populate the L1 table.

Performance-wise the double map should be pretty harmless since the
mapping is in the cache, so I actually prefer it as is. Alternatively,
I can initialise pl1e to NULL at the beginning of the block and only do
the

pl1e  = l2e_to_l1e(*pl2e) + l1_table_offset(virt);

when the pl1e is still NULL. If you are okay I will go with this.

> 
> > @@ -5505,6 +5542,7 @@ int populate_pt_range(unsigned long virt,
> > unsigned long nr_mfns)
> >  int modify_xen_mappings(unsigned long s, unsigned long e, unsigned
> > int nf)
> >  {
> >  bool locking = system_state > SYS_STATE_boot;
> > +l3_pgentry_t *pl3e = NULL;
> >  l2_pgentry_t *pl2e;
> >  l1_pgentry_t *pl1e;
> >  unsigned int  i;
> 
> And here we have the opposite situation: You don't set pl2e to NULL
> and the function only uses l3e_to_l2e() to initialize the variable,
> yet ...
> 
> > @@ -5761,6 +5799,8 @@ int modify_xen_mappings(unsigned long s,
> > unsigned long e, unsigned int nf)
> >  
> >   out:
> >  L3T_UNLOCK(current_l3page);
> > +unmap_domain_page(pl2e);
> > +unmap_domain_page(pl3e);
> 
> ... here you try to unmap it. Did the two respective hunks somehow
> magically get swapped?

Since the +-3 contexts of the two hunks are exactly the same, I have
strong suspicion what you said is exactly what happened. Thank you for
spotting this.

> 
> > --- a/xen/common/vmap.c
> > +++ b/xen/common/vmap.c
> > @@ -1,6 +1,7 @@
> >  #ifdef VMAP_VIRT_START
> >  #include 
> >  #include 
> > +#include 
> 
> Why is this needed? (Looks like a now stale change.)

Stale change indeed. Will be removed.

Hongyan

Re: [PATCH v9 01/13] x86/mm: rewrite virt_to_xen_l*e

2021-04-21 Thread Jan Beulich

On 21.04.2021 13:33, Hongyan Xia wrote:
> On Tue, 2021-04-20 at 14:17 +0200, Jan Beulich wrote:
>> On 06.04.2021 13:05, Hongyan Xia wrote:
>>> @@ -5305,6 +5339,8 @@ int map_pages_to_xen(
>>>  pl1e = virt_to_xen_l1e(virt);
>>>  if ( pl1e == NULL )
>>>  goto out;
>>> +
>>> +UNMAP_DOMAIN_PAGE(pl1e);
>>>  }
>>
>> Unmapping the page right after mapping it looks suspicious. I see
>> that
>> further down we have
>>
>> pl1e  = l2e_to_l1e(*pl2e) + l1_table_offset(virt);
>>
>> but don't you need to also change that? Actually, you do in patch 2,
>> but the latest then the double mapping should imo be avoided.
> 
> I would say the code was already suspicious to begin with, since pl1e
> would be overwritten immediately below even before this patch. The
> purpose of the virt_to_xen_l1e() is only to populate the L1 table.
> 
> Performance-wise the double map should be pretty harmless since the
> mapping is in the cache, so I actually prefer it as is. Alternatively,
> I can initialise pl1e to NULL at the beginning of the block and only do
> the
> 
> pl1e  = l2e_to_l1e(*pl2e) + l1_table_offset(virt);
> 
> when the pl1e is still NULL. If you are okay I will go with this.

I'd prefer this alternative, indeed, as it'll make the overall
code look less odd. Albeit maybe not here, but in the subsequent
patch.

Jan

Re: [PATCH v2] xen/pci: Refactor PCI MSI interrupts related code

2021-04-21 Thread Rahul Singh

Hi Jan,

> On 21 Apr 2021, at 10:33 am, Jan Beulich  wrote:
> 
> On 21.04.2021 11:15, Rahul Singh wrote:
>> Hi Roger,
>> 
>>> On 21 Apr 2021, at 9:16 am, Roger Pau Monné  wrote:
>>> 
>>> On Wed, Apr 21, 2021 at 08:07:08AM +, Rahul Singh wrote:
 Hi Jan,
 
> On 20 Apr 2021, at 4:36 pm, Jan Beulich  wrote:
> 
> On 20.04.2021 15:45, Rahul Singh wrote:
>>> On 19 Apr 2021, at 1:33 pm, Jan Beulich  wrote:
>>> On 19.04.2021 13:54, Julien Grall wrote:
 For the time being, I think move this code in x86 is a lot better than 
 #ifdef or keep the code in common code.
>>> 
>>> Well, I would perhaps agree if it ended up being #ifdef CONFIG_X86.
>>> I would perhaps not agree if there was a new CONFIG_* which other
>>> (future) arch-es could select if desired.
>> 
>> I agree with Julien moving the code to x86 file as currently it is 
>> referenced only in x86 code
>> and as of now we are not sure how other architecture will implement the 
>> Interrupt remapping
>> (via IOMMU or any other means).  
>> 
>> Let me know if you are ok with moving the code to x86.
> 
> I can't answer this with "yes" or "no" without knowing what the 
> alternative
> would be. As said, if the alternative is CONFIG_X86 #ifdef-ary, then yes.
> If a separate CONFIG_* gets introduced (and selected by X86), then a
> separate file (getting built only when that new setting is y) would seem
> better to me.
 
 I just made a quick patch. Please let me know if below patch is ok. I move 
 the definition to  "passthrough/x86/iommu.c” file.
 
 diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
 index 705137f8be..199ce08612 100644
 --- a/xen/drivers/passthrough/pci.c
 +++ b/xen/drivers/passthrough/pci.c
 @@ -1303,13 +1303,6 @@ static int __init setup_dump_pcidevs(void)
 }
 __initcall(setup_dump_pcidevs);
 
 -int iommu_update_ire_from_msi(
 -struct msi_desc *msi_desc, struct msi_msg *msg)
 -{
 -return iommu_intremap
 -   ? iommu_call(&iommu_ops, update_ire_from_msi, msi_desc, msg) : 
 0;
 -}
 -
 static int iommu_add_device(struct pci_dev *pdev)
 {
const struct domain_iommu *hd;
 diff --git a/xen/drivers/passthrough/x86/iommu.c 
 b/xen/drivers/passthrough/x86/iommu.c
 index b90bb31bfe..cf51dec564 100644
 --- a/xen/drivers/passthrough/x86/iommu.c
 +++ b/xen/drivers/passthrough/x86/iommu.c
 @@ -340,6 +340,13 @@ bool arch_iommu_use_permitted(const struct domain *d)
likely(!p2m_get_hostp2m(d)->global_logdirty));
 }
 
 +int iommu_update_ire_from_msi(
 +struct msi_desc *msi_desc, struct msi_msg *msg)
 +{
 +return iommu_intremap
 +   ? iommu_call(&iommu_ops, update_ire_from_msi, msi_desc, msg) : 
 0;
 +}
 +
 /*
 * Local variables:
 * mode: C
 diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
 index ea0cd0f1a2..bd42d87b72 100644
 --- a/xen/include/xen/iommu.h
 +++ b/xen/include/xen/iommu.h
 @@ -243,7 +243,6 @@ struct iommu_ops {
   u8 devfn, device_t *dev);
 #ifdef CONFIG_HAS_PCI
int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn);
 -int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg 
 *msg);
 #endif /* HAS_PCI */
 
void (*teardown)(struct domain *d);
 @@ -272,6 +271,7 @@ struct iommu_ops {
int (*adjust_irq_affinities)(void);
void (*sync_cache)(const void *addr, unsigned int size);
void (*clear_root_pgtable)(struct domain *d);
 +int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg 
 *msg);
>>> 
>>> You also need to move the function prototype
>>> (iommu_update_ire_from_msi) from iommu.h into asm-x86/iommu.h, or
>>> maybe you could just define the function itself as static inline in
>>> asm-x86/iommu.h?
>> 
>> 
>> Ok. I will move function prototype (iommu_update_ire_from_msi) from 
>> iommu.h into asm-x86/iommu.h.
>> 
>> I first tried to make the function as static inline in "asm-x86/iommu.h" but 
>> after 
>> that I observe the compilation error for debug build because "asm/iommu.h” 
>> is included in "xen/iommu.h” before iommu_call() is defined in "xen/iommu.h”.
>> That's why I decided to move it to the "passthrough/x86/iommu.c” file.
> 
> Which in turn would be an argument for keeping it in xen/iommu.h and
> wrap it in an #ifdef.
> 

Ok. I will move the definition of iommu_update_ire_from_msi() to 
"xen/iommu.h” and will wrap it under CONFIG_X86

If everyone is ok with the above approach then I will send the next
version of the patch series for review.

Regards,
Rahul

> Jan

Re: [PATCH 6/8] x86/EFI: avoid use of GNU ld's --disable-reloc-section when possible

2021-04-21 Thread Jan Beulich

On 21.04.2021 12:21, Roger Pau Monné wrote:
> On Thu, Apr 01, 2021 at 11:46:44AM +0200, Jan Beulich wrote:
>> @@ -189,7 +208,11 @@ EFI_LDFLAGS += --no-insert-timestamp
>>  endif
>>  
>>  $(TARGET).efi: VIRT_BASE = 0x$(shell $(NM) efi/relocs-dummy.o | sed -n 's, 
>> A VIRT_START$$,,p')
>> +ifeq ($(MKRELOC),:)
>> +$(TARGET).efi: ALT_BASE :=
>> +else
>>  $(TARGET).efi: ALT_BASE = 0x$(shell $(NM) efi/relocs-dummy.o | sed -n 's, A 
>> ALT_START$$,,p')
> 
> Could you maybe check whether $(relocs-dummy) is set as the condition
> here and use it here instead of efi/relocs-dummy.o?

I can use it in the ifeq() if you think that's neater (the current way
is minimally shorter), but using it in the ALT_BASE assignment would
make this differ more from the VIRT_BASE one, which I'd like to avoid.

>> @@ -210,16 +233,16 @@ note_file_option ?= $(note_file)
>>  ifeq ($(XEN_BUILD_PE),y)
>>  $(TARGET).efi: prelink.o $(note_file) efi.lds efi/relocs-dummy.o efi/mkreloc
> 
> Do you need to also replace the target prerequisite to use $(relocs-dummy)?

No - without the dependency the file might not be generated (if this ends
up being the only real dependency on $(BASEDIR)/arch/x86/efi/built_in.o).
We can't rely on $(note_file) resolving to efi/buildid.o, and hence
recursing into $(BASEDIR)/arch/x86/efi/ may not otherwise get triggered.
Yet to calculate VIRT_BASE we need efi/relocs-dummy.o.

>> --- a/xen/arch/x86/efi/efi-boot.h
>> +++ b/xen/arch/x86/efi/efi-boot.h
>> @@ -86,10 +86,12 @@ static void __init efi_arch_relocate_ima
>>  }
>>  break;
>>  case PE_BASE_RELOC_DIR64:
>> -if ( in_page_tables(addr) )
>> -blexit(L"Unexpected relocation type");
>>  if ( delta )
>> +{
>>  *(u64 *)addr += delta;
>> +if ( in_page_tables(addr) )
>> +*(u64 *)addr += xen_phys_start;
> 
> Doesn't the in_page_tables check and modification also apply when
> delta == 0?

No, it would be wrong to do so: efi_arch_load_addr_check() sets
xen_phys_start, and subsequently (to still be able to produce human
visible output) we invoke efi_arch_relocate_image() with an argument
of 0. Later we'll invoke efi_arch_relocate_image() a 2nd time (when
having exited boot services already, and hence when we can't produce
output via EFI anymore, and we can't produce output yet via Xen's
normal mechanisms), with a non-zero argument. Thus we'd add in
xen_phys_start twice.

> Maybe you could just break on !delta to reduce indentation if none of
> this applies then?

Could be done, sure, and if you think this makes sufficiently much
of a difference I can add a patch. The purpose here though it to
have this and the preceding case block look as similar as possible,
yet also not re-format that earlier one (which would be an unrelated
change).

Jan

Re: [PATCH] xen/arm: Ensure the vCPU context is seen before clearing the _VPF_down

2021-04-21 Thread Julien Grall


Hi Stefano,

On 21/04/2021 01:38, Stefano Stabellini wrote:

On Tue, 20 Apr 2021, Julien Grall wrote:

AFAICT, there is nothing in the implementation of XEN_DOMCTL_getvcpucontext
that justify the extra barrier (assuming we consider vcpu_pause() as a full
memory barrier).

 From your e-mail, I also could not infer what is your exact concern regarding
the memory ordering. If you have something in mind, then would you mind to
provide a sketch showing what could go wrong?


Let me start with what I had in mind:

initialized = v->is_initialized;
smp_rmb();
if ( !initialized )
   goto getvcpucontext_out;


Without smp_rmb there are no guarantees that we see an up-to-date value
of v->is_initialized, right?


No. The smp_*mb() barriers only guarantee an ordering, it doesn't say 
anything about when the values will be seen. The write may in fact still 
be in the write buffer of the other CPU.



This READ of v->is_initialized is supposed
to pair with the WRITE in arch_set_info_guest.


I am assuming you meant the access and not the barrier, right? 
Regardless, the barriers used, the access will happen in any order. IOW 
the following two ordering is possible:


Sequence 1:

CPU0: v->vcpu_initialized = 1
CPU1: read v->vcpu_initialized

Sequence 2:

CPU1: read v->vcpu_initialized
CPU0: v->vcpu_initialized = 0

In the first sequence, CPU1 will observe 0 and therefore bailout. In the 
second sequence, we will be observing 1 and continue.




Relying on the the barrier in vcpu_pause is OK only if is_initialised
was already read as "1".

I think it might be OK in this case, because probably nothing wrong
happens if we read the old value of v->is_initialized as "0" but it
doesn't seem to be a good idea.


The smp_rmb() is not going to give you that guarantee. If you need it, 
then you mostly likely want to use a synchronization primitives such as 
spin_lock().



Theoretically it could pass a very long
time before we see v->is_initialized as "1" without the smp_rmb.


I have the impression that there might be confusion about what I am 
aiming to achieve.


The code today looks roughly like:

1) if ( v->is_initialized )
2)   return;
3) vcpu_pause();
4) /* Access registers */

My aim is to ensure that a processor will not READ any value for 4) are 
not happening before 1). If this happens, then we may provide garbagge 
to the domctl.


Returning an error is a lot better because the caller of the domctl will 
know the vCPU is not yet setup and can provide the infrastructure to try 
again.


From this PoV, your proposal and my two proposals are functionally 
equivalent. The main differences is whether there is an extra barrier 
and how easy is it to figure out the ordering.


Speaking with Ash, your proposal is probably the easiest one to 
understand. However, it also adds a barrier for the failure path (which 
is pointless).


I would like to avoid barriers when they are not necessary. Hence why I 
prefer the vcpu_pause() solution (we may want to add a comment).


Although, I could live with your proposal if the others are happy with 
it (Andrew? Jan?) because this is not an hot path.


Cheers,

--
Julien Grall

[xen-4.14-testing test] 161324: tolerable FAIL - PUSHED

2021-04-21 Thread osstest service owner

flight 161324 xen-4.14-testing real [real]
http://logs.test-lab.xenproject.org/osstest/logs/161324/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-armhf-armhf-libvirt 16 saverestore-support-checkfail  like 161048
 test-amd64-i386-xl-qemuu-win7-amd64 19 guest-stop fail like 161048
 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 161048
 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 161048
 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 161048
 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 161048
 test-armhf-armhf-libvirt-raw 15 saverestore-support-checkfail  like 161048
 test-amd64-i386-xl-qemut-ws16-amd64 19 guest-stop fail like 161048
 test-amd64-i386-xl-qemut-win7-amd64 19 guest-stop fail like 161048
 test-amd64-i386-xl-qemuu-ws16-amd64 19 guest-stop fail like 161048
 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 161048
 test-amd64-i386-libvirt-xsm  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail  never pass
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-amd64-i386-xl-pvshim14 guest-start  fail   never pass
 test-arm64-arm64-xl-seattle  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-seattle  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  16 saverestore-support-checkfail   never pass
 test-amd64-i386-libvirt  15 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-armhf-armhf-xl-arndale  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit1  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-armhf-armhf-xl-credit2  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-cubietruck 15 migrate-support-checkfail never pass
 test-armhf-armhf-xl-cubietruck 16 saverestore-support-checkfail never pass
 test-armhf-armhf-xl  15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 15 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-raw 14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  14 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  15 saverestore-support-checkfail   never pass

version targeted for testing:
 xen  b14d8345e530a54710700ec5fcb0879c2e056d31
baseline version:
 xen  1ebc077a5608bd8eff7e6401696f16c9a266b73f

Last test of basis   161048  2021-04-12 10:36:19 Z9 days
Testing same since   161324  2021-04-20 10:36:29 Z1 days1 attempts


People who touched revisions under test:
  Andrew Cooper 
  Boris Ostrovsky 
  Frédéric Pierret 
  Jan Beulich 

jobs:
 build-amd64-xsm  pass
 build-arm64-xsm  pass
 build-i386-xsm   pass

Re: [PATCH 2/8] x86/EFI: sections may not live at VA 0 in PE binaries

2021-04-21 Thread Roger Pau Monné

On Wed, Apr 21, 2021 at 12:32:42PM +0200, Jan Beulich wrote:
> On 21.04.2021 10:52, Roger Pau Monné wrote:
> > On Thu, Apr 01, 2021 at 11:44:45AM +0200, Jan Beulich wrote:
> >> PE binaries specify section addresses by (32-bit) RVA. GNU ld up to at
> >> least 2.36 would silently truncate the (negative) difference when a
> >> section is placed below the image base. Such sections would also be
> >> wrongly placed ahead of all "normal" ones. Since, for the time being,
> >> we build xen.efi with --strip-debug anyway, .stab* can't appear. And
> >> .comment has an entry in /DISCARD/ already anyway in the EFI case.
> >>
> >> Because of their unclear origin, keep the directives for the ELF case
> >> though.
> > 
> > It's my understadng thonse sections are only there for debug purposes,
> > and never part of the final xen binary as they are stripped?
> > 
> > Could we maybe remove the section load address of 0 and instead just
> > use the (NOLOAD) directive?
> 
> (NOLOAD) is meaningless for PE.
> 
> > Does it really matter to place them at address 0?
> 
> That's the convention for ELF, and also what ld defaults to for debugging
> sections.
> 
> > I also wonder, is this change fixing some existing bug, or it's just a
> > cleanup change?
> 
> If there were sections at 0, the resulting PE binary would end up broken.
> Prior to binutils 2.37 this brokenness is silent, i.e. the linker doesn't
> issue any form of diagnostic. The change therefore is addressing a latent
> issue - if any such section started being non-empty, we'd be in trouble.
> 
> > I also only see the .comment section in my binary output, so maybe
> > it's fine to just remove them from the script?
> 
> Which binary are you referring to - ELF or PE? In the former case, yes,
> that's what the statement is for. In the latter case I can't see how this
> would be, with .comment being explicitly part of /DISCARD/ in that case.

So from a bit of searching I just did it seems like stab sections
where used during the 90s with ELF, but that this has long been
superseded by DWARF 2 becoming the default in the late 90s, hence I
think it would be fine to just remove those sections even in the ELF
case?

Thanks, Roger.

Re: [PATCH 7/8] x86/EFI: keep debug info in xen.efi

2021-04-21 Thread Jan Beulich

On 21.04.2021 13:15, Roger Pau Monné wrote:
> On Thu, Apr 01, 2021 at 11:47:03AM +0200, Jan Beulich wrote:
>> ... provided the linker supports it (which it does as of commit
>> 2dfa8341e079 ["ELF DWARF in PE output"]).
>>
>> Without mentioning debugging sections, the linker would put them at
>> VA 0, thus making them unreachable by 32-bit (relative or absolute)
>> relocations. If relocations were resolvable (or absent) the resulting
>> binary would have invalid section RVAs (0 - __image_base__, truncated to
>> 32 bits). Mentioning debugging sections without specifying an address
>> will result in the linker putting them all on the same RVA. A loader is,
>> afaict, free to reject loading such an image, as sections shouldn't
>> overlap. (The above describes GNU ld 2.36 behavior, which - if deemed
>> buggy - could change.)
> 
> Isn't just using (NOLOAD) to signal those sections shouldn't be
> loaded enough, and thus don't care about it's RVA?

As said in a reply earlier on another sub-thread, (NOLOAD) is meaningless
for PE. The fact that I add them nevertheless is just for docs purposes
(or if, in the future, the item gains significance).

The main problem though isn't "load" vs "no-load" but, as I thought I
have expressed in the description, that there's no "don't care about it's
RVA" in PE. All sections have to have a non-zero VA above the image base.
This is the only way via which sane RVA values can result. If sections
get placed at VA 0 (which is perfectly fine for ELF), the RVA would be a
huge negative number truncated to 32 bits. (Again, prior to binutils 2.37
this will go all silently.) Plus the respective sections would come first
(rather than last) in the binary (which by itself may or may not be a
problem for the EFI loader, but I wouldn't want to chance it).

>> --- a/xen/arch/x86/xen.lds.S
>> +++ b/xen/arch/x86/xen.lds.S
>> @@ -312,10 +312,60 @@ SECTIONS
>>  *(.reloc)
>>  __base_relocs_end = .;
>>}
>> -  /* Trick the linker into setting the image size to exactly 16Mb. */
>> -  . = ALIGN(__section_alignment__);
>> -  DECL_SECTION(.pad) {
>> -. = ALIGN(MB(16));
>> +  .debug_abbrev ALIGN(1) (NOLOAD) : {
>> + *(.debug_abbrev)
>> +  }
>> +  .debug_info ALIGN(1) (NOLOAD) : {
>> +*(.debug_info)
>> +*(.gnu.linkonce.wi.*)
>> +  }
>> +  .debug_types ALIGN(1) (NOLOAD) : {
>> +*(.debug_types)
>> +  }
>> +  .debug_str ALIGN(1) (NOLOAD) : {
>> +*(.debug_str)
>> +  }
>> +  .debug_line ALIGN(1) (NOLOAD) : {
>> +*(.debug_line)
>> +*(.debug_line.*)
>> +  }
>> +  .debug_line_str ALIGN(1) (NOLOAD) : {
>> +*(.debug_line_str)
>> +  }
>> +  .debug_names ALIGN(4) (NOLOAD) : {
>> +*(.debug_names)
>> +  }
>> +  .debug_frame ALIGN(4) (NOLOAD) : {
>> +*(.debug_frame)
>> +  }
>> +  .debug_loc ALIGN(1) (NOLOAD) : {
>> +*(.debug_loc)
>> +  }
>> +  .debug_loclists ALIGN(4) (NOLOAD) : {
>> +*(.debug_loclists)
>> +  }
>> +  .debug_ranges ALIGN(8) (NOLOAD) : {
>> +*(.debug_ranges)
>> +  }
>> +  .debug_rnglists ALIGN(4) (NOLOAD) : {
>> +*(.debug_rnglists)
>> +  }
>> +  .debug_addr ALIGN(8) (NOLOAD) : {
>> +*(.debug_addr)
>> +  }
>> +  .debug_aranges ALIGN(1) (NOLOAD) : {
>> +*(.debug_aranges)
>> +  }
>> +  .debug_pubnames ALIGN(1) (NOLOAD) : {
>> +*(.debug_pubnames)
>> +  }
>> +  .debug_pubtypes ALIGN(1) (NOLOAD) : {
>> +*(.debug_pubtypes)
>> +  }
>> +  /* Trick the linker into setting the image size to no less than 16Mb. */
>> +  __image_end__ = .;
>> +  .pad ALIGN(__section_alignment__) : {
>> +. = __image_end__ < __image_base__ + MB(16) ? ALIGN(MB(16)) : .;
> 
> I think this is inside an ifdef EFI region, since this is DWARF info
> couldn't it also be present when building with EFI disabled?

Of course (and it's not just "could" but "will"), yet the linker will
do fine (and perhaps even better) without when building ELF. Also
note that we'll be responsible for keeping the list of sections up-to-
date. The linker will recognize Dwarf sections by looking for a
.debug_ prefix. We can't use such here (or at least I'm not aware of
a suitable mechanism); .debug_* would mean munging together all the
different kinds of Dwarf sections. Hence by limiting the explicit
enumeration to PE, I'm trying to avoid anomalies in ELF down the road.

Jan

Re: [PATCH 8/8] x86/EFI: don't have an overly large image size

2021-04-21 Thread Jan Beulich

On 21.04.2021 13:18, Roger Pau Monné wrote:
> On Thu, Apr 01, 2021 at 11:47:35AM +0200, Jan Beulich wrote:
>> While without debug info the difference is benign (so far), since we pad
>> the image to 16Mb anyway, forcing the .reloc section to a 2Mb boundary
>> causes subsequent .debug_* sections to go farther beyond 16Mb than
>> needed. There's no reason to advance . for establishing __2M_rwdata_end,
>> as all data past _end is of no interest at runtime anymore anyway.
> 
> So you just expand the load size.

Shrink. Or maybe I'm misunderstanding you.

>> Signed-off-by: Jan Beulich 
> 
> Reviewed-by: Roger Pau Monné 

Thanks.

>> ---
>> This makes more explicit a possible latent problem with the ELF image:
>> It ends at _end, not __2M_rwdata_end (advancing . as was done here does
>> not have the effect of increasing the image size). Interestingly the
>> conversion xen-syms => xen rounds up the program header specified size
>> suitably, as per the comment "Do not use p_memsz: it does not include
>> BSS alignment padding" in mkelf32.c. I do think this would instead want
>> taking care of in the linker script. Commit 7a95e0a2c572 ("x86: properly
>> calculate xen ELF end of image address") clearly only hacked an existing
>> hack rather than addressing the root cause. Thoughts?
> 
> We should likely define _end after __2M_rwdata_end to account for this
> padding?

I don't think this would help - we'd need to arrange for the image size
to cover that extra padding. Like advancing . doesn't grow the image
size, I also don't think placing _end later would do.

Jan

Re: [PATCH 2/8] x86/EFI: sections may not live at VA 0 in PE binaries

2021-04-21 Thread Jan Beulich

On 21.04.2021 14:57, Roger Pau Monné wrote:
> So from a bit of searching I just did it seems like stab sections
> where used during the 90s with ELF, but that this has long been
> superseded by DWARF 2 becoming the default in the late 90s, hence I
> think it would be fine to just remove those sections even in the ELF
> case?

Well, maybe. Even up-to-date gcc still supports -gstabs. Plus I seem
to have a vague recollection that Andrew objected to their removal
at some (not overly distant) point. I can't find any reference there
though, so Andrew: Do you have any specific thoughts here?

Of course with the stabs sections gone, .comment would remain. I'm
very firm in not wanting to leave any statements there putting
sections at VA zero. Irrespective of .comment (to take this example)
being listed under /DISCARD/ for PE. This would only be acceptable
(to me) if ld would always have at least warned about such sections.

Jan

Re: [PATCH v9 10/13] x86/smpboot: switch clone_mapping() to new APIs

2021-04-21 Thread Hongyan Xia

On Tue, 2021-04-20 at 14:32 +0200, Jan Beulich wrote:
> On 06.04.2021 13:05, Hongyan Xia wrote:
> > @@ -742,51 +742,58 @@ static int clone_mapping(const void *ptr,
> > root_pgentry_t *rpt)
> >  }
> >  }
> >  
> > +UNMAP_DOMAIN_PAGE(pl1e);
> > +UNMAP_DOMAIN_PAGE(pl2e);
> > +UNMAP_DOMAIN_PAGE(pl3e);
> 
> Just one minor remark: A pedantic(?) compiler might warn about the
> setting to NULL of pl3e here, when
> 
> >  if ( !(root_get_flags(rpt[root_table_offset(linear)]) &
> > _PAGE_PRESENT) )
> >  {
> > -pl3e = alloc_xen_pagetable();
> > +mfn_t l3mfn;
> > +
> > +pl3e = alloc_map_clear_xen_pt(&l3mfn);
> >  rc = -ENOMEM;
> >  if ( !pl3e )
> >  goto out;
> > -clear_page(pl3e);
> >  l4e_write(&rpt[root_table_offset(linear)],
> > -  l4e_from_paddr(__pa(pl3e), __PAGE_HYPERVISOR));
> > +  l4e_from_mfn(l3mfn, __PAGE_HYPERVISOR));
> >  }
> >  else
> > -pl3e = l4e_to_l3e(rpt[root_table_offset(linear)]);
> > +pl3e = map_l3t_from_l4e(rpt[root_table_offset(linear)]);
> 
> ... it is guaranteed to get initialized again before any further
> consumption. IOW strictly speaking the last of those three would
> want to be unmap_domain_page(), just like you have ...
> 
> > @@ -802,6 +809,9 @@ static int clone_mapping(const void *ptr,
> > root_pgentry_t *rpt)
> >  
> >  rc = 0;
> >   out:
> > +unmap_domain_page(pl1e);
> > +unmap_domain_page(pl2e);
> > +unmap_domain_page(pl3e);
> >  return rc;
> >  }
> 
> ... here.

True. I will switch to lower case for pl3e just in case.

Hongyan

Re: x86: memset() / clear_page() / page scrubbing

2021-04-21 Thread Jan Beulich

On 15.04.2021 18:21, Andrew Cooper wrote:
> On 14/04/2021 09:12, Jan Beulich wrote:
>> On 13.04.2021 15:17, Andrew Cooper wrote:
>>> Do you have actual numbers from these experiments?
>> Attached is the collected raw output from a number of systems.
> 
> Wow Tulsa is vintage.  Is that new enough to have nonstop_tsc ?

No.

>>> For memset(), please don't move in the direction of memcpy().  memcpy()
>>> is problematic because the common case is likely to be a multiple of 8
>>> bytes, meaning that we feed 0 into the REP MOVSB, and this a hit wanting
>>> avoiding.
>> And you say this despite me having pointed out that REP STOSL may
>> be faster in a number of cases? Or do you mean to suggest we should
>> branch around the trailing REP {MOV,STO}SB?
>>
>>>   The "Fast Zero length $FOO" bits on future parts indicate
>>> when passing %ecx=0 is likely to be faster than branching around the
>>> invocation.
>> IOW down the road we could use alternatives patching to remove such
>> branches. But this of course is only if we don't end up using
>> exclusively REP MOVSB / REP STOSB there anyway, as you seem to be
>> suggesting ...
>>
>>> With ERMS/etc, our logic should be a REP MOVSB/STOSB only, without any
>>> cleverness about larger word sizes.  The Linux forms do this fairly well
>>> already, and probably better than Xen, although there might be some room
>>> for improvement IMO.
>> ... here.
>>
>> As to the Linux implementations - for memcpy_erms() I don't think
>> I see any room for improvement in the function itself. We could do
>> alternatives patching somewhat differently (and I probably would).
>> For memset_erms() the tiny bit of improvement over Linux'es code
>> that I would see is to avoid the partial register access when
>> loading %al. But to be honest - in both cases I wouldn't have
>> bothered looking at their code anyway, if you hadn't pointed me
>> there.
> 
> Answering multiple of the points together.
> 
> Yes, the partial register access on %al was one thing I spotted, and
> movbzl would be an improvement.  The alternatives are a bit weird, but
> they're best as they are IMO.  It makes a useful enough difference to
> backtraces/etc, and unconditional jmp's are about as close to free as
> you can get these days.
> 
> On an ERMS system, we want to use REP MOVSB unilaterally.  It is my
> understanding that it is faster across the board than any algorithm
> variation trying to use wider accesses.

Not according to the numbers I've collected. There are cases where
clearing a full page via REP STOS{L,Q} is (often just a little)
faster. Whether this also applies to MOVS I can't tell.

>>> It is worth nothing that we have extra variations of memset/memcpy where
>>> __builtin_memcpy() gets expanded inline, and the result is a
>>> compiler-chosen sequence, and doesn't hit any of our optimised
>>> sequences.  I'm not sure what to do about this, because there is surely
>>> a larger win from the cases which can be turned into a single mov, or an
>>> elided store/copy, than using a potentially inefficient sequence in the
>>> rare cases.  Maybe there is room for a fine-tuning option to say "just
>>> call memset() if you're going to expand it inline".
>> You mean "just call memset() instead of expanding it inline"?
> 
> I think want I really mean is "if the result of optimising memset() is
> going to result in a REP instruction, call memset() instead".
> 
> You want the compiler to do conversion to single mov's/etc, but you
> don't want is ...
> 
>> If the inline expansion is merely REP STOS, I'm not sure we'd
>> actually gain anything from keeping the compiler from expanding it
>> inline. But if the inline construct was more complicated (as I
>> observe e.g. in map_vcpu_info() with gcc 10), then it would likely
>> be nice if there was such a control. I'll take note to see if I
>> can find anything.
> 
> ... this.  What GCC currently expands inline is a REP MOVS{L,Q}, with
> the first and final element done manually ahead of the REP, presumably
> for prefetching/pagewalk reasons.

Not sure about the reasons, but the compiler doesn't always do it
like this - there are also cases of plain REP STOSQ. My initial
guess the splitting of the first and last elements was when the
compiler couldn't prove the buffer is 8-byte aligned and a
multiple of 8 bytes in size.

>>> For all set/copy operations, whether you want non-temporal or not
>>> depends on when/where the lines are next going to be consumed.  Page
>>> scrubbing in idle context is the only example I can think of where we
>>> aren't plausibly going to consume the destination imminently.  Even
>>> clear/copy page in a hypercall doesn't want to be non-temporal, because
>>> chances are good that the vcpu is going to touch the page on return.
>> I'm afraid the situation isn't as black-and-white. Take HAP or
>> IOMMU page table allocations, for example: They need to clear the
>> full page, yes. But often this is just to then insert one single
>> entry, i.e. re-use exactly o

[PATCH] tools/xenstored: Wire properly the command line option -M/--path-max

2021-04-21 Thread Julien Grall

From: Julien Grall 

The command line option -M/--path-max was meant to be added by
commit 924bf8c793cb "tools/xenstore: rework path length check" but this
wasn't wired through properly.

Fix it by adding the missing "case 'M':".

Fixes: 924bf8c793cb ("tools/xenstore: rework path length check")
Signed-off-by: Julien Grall 
---
 tools/xenstore/xenstored_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index 591b28e4552f..c638e46221eb 100644
--- a/tools/xenstore/xenstored_core.c
+++ b/tools/xenstore/xenstored_core.c
@@ -2148,6 +2148,7 @@ int main(int argc, char *argv[])
case 'A':
quota_nb_perms_per_node = strtol(optarg, NULL, 10);
break;
+   case 'M':
quota_max_path_len = strtol(optarg, NULL, 10);
quota_max_path_len = min(XENSTORE_REL_PATH_MAX,
 quota_max_path_len);
-- 
2.17.1

Re: [PATCH] build: centralize / unify asm-offsets generation

2021-04-21 Thread Jan Beulich

On 20.04.2021 18:20, Roger Pau Monné wrote:
> On Tue, Apr 20, 2021 at 05:47:49PM +0200, Jan Beulich wrote:
>> On 20.04.2021 17:29, Roger Pau Monné wrote:
>>> On Thu, Apr 01, 2021 at 10:33:47AM +0200, Jan Beulich wrote:
 @@ -399,7 +399,11 @@ include/xen/compile.h: include/xen/compi
@sed -rf tools/process-banner.sed < .banner >> $@.new
@mv -f $@.new $@
  
 -include/asm-$(TARGET_ARCH)/asm-offsets.h: 
 arch/$(TARGET_ARCH)/asm-offsets.s
 +asm-offsets.s: arch/$(TARGET_ARCH)/$(TARGET_SUBARCH)/asm-offsets.c
 +  $(CC) $(filter-out -Wa$(comma)% -flto,$(c_flags)) -S -g0 -o $@.new -MQ 
 $@ $<
 +  $(call move-if-changed,$@.new,$@)
>>>
>>> Won't it be more natural to keep the .s file in arch/$(TARGET_ARCH)?
>>
>> Yes and no: Yes as far as the actual file location is concerned.
>> No when considering where it gets generated: I generally consider
>> it risky to generate files outside of the directory where make
>> currently runs. There may be good reasons for certain exceptions,
>> but personally I don't see this case as good enough a reason.
>>
>> Somewhat related - if doing as you suggest, which Makefile's
>> clean: rule should clean up that file in your opinion?
> 
> The clean rule should be in the makefile where it's generated IMO,
> same as asm-offsets.h clean rule currently in xen/Makefile.
> 
>> Nevertheless, if there's general agreement that keeping the file
>> there is better, I'd make the change and simply ignore my unhappy
>> feelings about it.
> 
> I don't have a strong opinion, it just feels weird to have this IMO
> stray asm-offsets.s outside of it's arch directory, taking into
> account that we have asm-offsets.h generated from xen/Makefile into an
> arch specific directory already as a precedent in that makefile.

Well, asm-offsets.h generation doesn't involve the compiler, hence
no .*.d files get generated and want including later. For
asm-offsets.s to have dependencies properly honored, if we
generated it in xen/arch/, .asm-offsets.d would also end up
there, and hence including of it would need separately taking care
of.

Jan

Re: [PATCH] tools/xenstored: Wire properly the command line option -M/--path-max

2021-04-21 Thread Luca Fancellu




> On 21 Apr 2021, at 15:02, Julien Grall  wrote:
> 
> From: Julien Grall 
> 
> The command line option -M/--path-max was meant to be added by
> commit 924bf8c793cb "tools/xenstore: rework path length check" but this
> wasn't wired through properly.
> 
> Fix it by adding the missing "case 'M':".
> 
> Fixes: 924bf8c793cb ("tools/xenstore: rework path length check")
> Signed-off-by: Julien Grall 
> ---
> tools/xenstore/xenstored_core.c | 1 +
> 1 file changed, 1 insertion(+)
> 
> diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
> index 591b28e4552f..c638e46221eb 100644
> --- a/tools/xenstore/xenstored_core.c
> +++ b/tools/xenstore/xenstored_core.c
> @@ -2148,6 +2148,7 @@ int main(int argc, char *argv[])
>   case 'A':
>   quota_nb_perms_per_node = strtol(optarg, NULL, 10);
>   break;
> + case 'M':
>   quota_max_path_len = strtol(optarg, NULL, 10);
>   quota_max_path_len = min(XENSTORE_REL_PATH_MAX,
>quota_max_path_len);
> — 
> 2.17.1
> 
> 

Reviewed-by: Luca Fancellu

[PATCH v2 00/20] further population of xen/lib/

2021-04-21 Thread Jan Beulich

This is to dissolve / move xen/common/string.c. One benefit of moving
these functions into an archive is that we can drop some of the related
__HAVE_ARCH_* #define-s: By living in an archive, the per-arch functions
will preempt any loading of the respective functions (objects) from the
archive. (Down the road we may want to move the per-arch functions into
archives as well, at which point the per-arch archive(s) would need to
be specified ahead of the common one(s) to the linker.)

The only change in v2 is adjustment to all of the commit messages.

01: lib: move memset()
02: lib: move memcpy()
03: lib: move memmove()
04: lib: move memcmp()
05: lib: move memchr()
06: lib: move memchr_inv()
07: lib: move strlen()
08: lib: move strnlen()
09: lib: move strcmp()
10: lib: move strncmp()
11: lib: move strlcpy()
12: lib: move strlcat()
13: lib: move strchr()
14: lib: move strrchr()
15: lib: move strstr()
16: lib: move strcasecmp()
17: lib: move/rename strnicmp() to strncasecmp()
18: lib: move strspn()
19: lib: move strpbrk()
20: lib: move strsep()

Jan

[PATCH v10 00/13] switch to domheap for Xen page tables

2021-04-21 Thread Hongyan Xia

From: Hongyan Xia 

This series rewrites all the remaining functions and finally makes the
switch from xenheap to domheap for Xen page tables, so that they no
longer need to rely on the direct map, which is a big step towards
removing the direct map.

---
Changed in v10:
- rebase.
- address comments in 01/13, which propagates a change into 02/13.

Changed in v9:
- drop first 2 patches which have been merged in XSA-345.
- adjust code around L3 page locking in mm.c.

Changed in v8:
- address comments in v7.
- rebase

Changed in v7:
- rebase and cleanup.
- address comments in v6.
- add alloc_map_clear_xen_pt() helper to simplify the patches in this
  series.

Changed in v6:
- drop the patches that have already been merged.
- rebase and cleanup.
- rewrite map_pages_to_xen() and modify_xen_mappings() in a way that
  does not require an end_of_loop goto label.

Hongyan Xia (2):
  x86/mm: drop old page table APIs
  x86: switch to use domheap page for page tables

Wei Liu (11):
  x86/mm: rewrite virt_to_xen_l*e
  x86/mm: switch to new APIs in map_pages_to_xen
  x86/mm: switch to new APIs in modify_xen_mappings
  x86_64/mm: introduce pl2e in paging_init
  x86_64/mm: switch to new APIs in paging_init
  x86_64/mm: switch to new APIs in setup_m2p_table
  efi: use new page table APIs in copy_mapping
  efi: switch to new APIs in EFI code
  x86/smpboot: add exit path for clone_mapping()
  x86/smpboot: switch clone_mapping() to new APIs
  x86/mm: drop _new suffix for page table APIs

 xen/arch/x86/efi/runtime.h |  13 +-
 xen/arch/x86/mm.c  | 247 ++---
 xen/arch/x86/setup.c   |   4 +-
 xen/arch/x86/smpboot.c |  70 +++
 xen/arch/x86/x86_64/mm.c   |  80 +++-
 xen/common/efi/boot.c  |  83 -
 xen/common/efi/efi.h   |   3 +-
 xen/common/efi/runtime.c   |   8 +-
 xen/include/asm-x86/mm.h   |   7 +-
 xen/include/asm-x86/page.h |   5 -
 10 files changed, 314 insertions(+), 206 deletions(-)

-- 
2.23.4

[PATCH v10 01/13] x86/mm: rewrite virt_to_xen_l*e

2021-04-21 Thread Hongyan Xia

From: Wei Liu 

Rewrite those functions to use the new APIs. Modify its callers to unmap
the pointer returned. Since alloc_xen_pagetable_new() is almost never
useful unless accompanied by page clearing and a mapping, introduce a
helper alloc_map_clear_xen_pt() for this sequence.

Signed-off-by: Wei Liu 
Signed-off-by: Hongyan Xia 

---
Changed in v10:
- remove stale include.
- s/alloc_map_clear_xen_pt/alloc_mapped_pagetable/g.
- fix mis-hunks.

Changed in v9:
- use domain_page_map_to_mfn() around the L3 table locking logic.
- remove vmap_to_mfn() changes since we now use xen_map_to_mfn().

Changed in v8:
- s/virtual address/linear address/.
- BUG_ON() on NULL return in vmap_to_mfn().

Changed in v7:
- remove a comment.
- use l1e_get_mfn() instead of converting things back and forth.
- add alloc_map_clear_xen_pt().
- unmap before the next mapping to reduce mapcache pressure.
- use normal unmap calls instead of the macro in error paths because
  unmap can handle NULL now.
---
 xen/arch/x86/mm.c| 102 +++
 xen/include/asm-x86/mm.h |   1 +
 2 files changed, 72 insertions(+), 31 deletions(-)

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index b7a10bbdd401..5944ef19dc50 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4931,8 +4931,28 @@ void free_xen_pagetable_new(mfn_t mfn)
 free_xenheap_page(mfn_to_virt(mfn_x(mfn)));
 }
 
+void *alloc_mapped_pagetable(mfn_t *pmfn)
+{
+mfn_t mfn = alloc_xen_pagetable_new();
+void *ret;
+
+if ( mfn_eq(mfn, INVALID_MFN) )
+return NULL;
+
+if ( pmfn )
+*pmfn = mfn;
+ret = map_domain_page(mfn);
+clear_page(ret);
+
+return ret;
+}
+
 static DEFINE_SPINLOCK(map_pgdir_lock);
 
+/*
+ * For virt_to_xen_lXe() functions, they take a linear address and return a
+ * pointer to Xen's LX entry. Caller needs to unmap the pointer.
+ */
 static l3_pgentry_t *virt_to_xen_l3e(unsigned long v)
 {
 l4_pgentry_t *pl4e;
@@ -4941,33 +4961,33 @@ static l3_pgentry_t *virt_to_xen_l3e(unsigned long v)
 if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
 {
 bool locking = system_state > SYS_STATE_boot;
-l3_pgentry_t *l3t = alloc_xen_pagetable();
+mfn_t l3mfn;
+l3_pgentry_t *l3t = alloc_mapped_pagetable(&l3mfn);
 
 if ( !l3t )
 return NULL;
-clear_page(l3t);
+UNMAP_DOMAIN_PAGE(l3t);
 if ( locking )
 spin_lock(&map_pgdir_lock);
 if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
 {
-l4_pgentry_t l4e = l4e_from_paddr(__pa(l3t), __PAGE_HYPERVISOR);
+l4_pgentry_t l4e = l4e_from_mfn(l3mfn, __PAGE_HYPERVISOR);
 
 l4e_write(pl4e, l4e);
 efi_update_l4_pgtable(l4_table_offset(v), l4e);
-l3t = NULL;
+l3mfn = INVALID_MFN;
 }
 if ( locking )
 spin_unlock(&map_pgdir_lock);
-if ( l3t )
-free_xen_pagetable(l3t);
+free_xen_pagetable_new(l3mfn);
 }
 
-return l4e_to_l3e(*pl4e) + l3_table_offset(v);
+return map_l3t_from_l4e(*pl4e) + l3_table_offset(v);
 }
 
 static l2_pgentry_t *virt_to_xen_l2e(unsigned long v)
 {
-l3_pgentry_t *pl3e;
+l3_pgentry_t *pl3e, l3e;
 
 pl3e = virt_to_xen_l3e(v);
 if ( !pl3e )
@@ -4976,31 +4996,37 @@ static l2_pgentry_t *virt_to_xen_l2e(unsigned long v)
 if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
 {
 bool locking = system_state > SYS_STATE_boot;
-l2_pgentry_t *l2t = alloc_xen_pagetable();
+mfn_t l2mfn;
+l2_pgentry_t *l2t = alloc_mapped_pagetable(&l2mfn);
 
 if ( !l2t )
+{
+unmap_domain_page(pl3e);
 return NULL;
-clear_page(l2t);
+}
+UNMAP_DOMAIN_PAGE(l2t);
 if ( locking )
 spin_lock(&map_pgdir_lock);
 if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
 {
-l3e_write(pl3e, l3e_from_paddr(__pa(l2t), __PAGE_HYPERVISOR));
-l2t = NULL;
+l3e_write(pl3e, l3e_from_mfn(l2mfn, __PAGE_HYPERVISOR));
+l2mfn = INVALID_MFN;
 }
 if ( locking )
 spin_unlock(&map_pgdir_lock);
-if ( l2t )
-free_xen_pagetable(l2t);
+free_xen_pagetable_new(l2mfn);
 }
 
 BUG_ON(l3e_get_flags(*pl3e) & _PAGE_PSE);
-return l3e_to_l2e(*pl3e) + l2_table_offset(v);
+l3e = *pl3e;
+unmap_domain_page(pl3e);
+
+return map_l2t_from_l3e(l3e) + l2_table_offset(v);
 }
 
 l1_pgentry_t *virt_to_xen_l1e(unsigned long v)
 {
-l2_pgentry_t *pl2e;
+l2_pgentry_t *pl2e, l2e;
 
 pl2e = virt_to_xen_l2e(v);
 if ( !pl2e )
@@ -5009,26 +5035,32 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v)
 if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
 {
 bool locking = system_state > SYS_STATE_boot;
-l1_pgentry_t *l1t = alloc_xen_pagetable();
+mfn_t l1mfn;
+l1_pgentry_t *l1t = alloc_mapped_pa

[PATCH v10 03/13] x86/mm: switch to new APIs in modify_xen_mappings

2021-04-21 Thread Hongyan Xia

From: Wei Liu 

Page tables allocated in that function should be mapped and unmapped
now.

Note that pl2e now maybe mapped and unmapped in different iterations, so
we need to add clean-ups for that.

Signed-off-by: Wei Liu 
Signed-off-by: Hongyan Xia 
Reviewed-by: Jan Beulich 

---
Changed in v7:
- use normal unmap in the error path.
---
 xen/arch/x86/mm.c | 57 ++-
 1 file changed, 36 insertions(+), 21 deletions(-)

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 8a68da26f45f..832e654294b4 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -5546,6 +5546,7 @@ int map_pages_to_xen(
 
  out:
 L3T_UNLOCK(current_l3page);
+unmap_domain_page(pl2e);
 unmap_domain_page(pl3e);
 unmap_domain_page(pl2e);
 return rc;
@@ -5572,7 +5573,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, 
unsigned int nf)
 {
 bool locking = system_state > SYS_STATE_boot;
 l3_pgentry_t *pl3e = NULL;
-l2_pgentry_t *pl2e;
+l2_pgentry_t *pl2e = NULL;
 l1_pgentry_t *pl1e;
 unsigned int  i;
 unsigned long v = s;
@@ -5592,6 +5593,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, 
unsigned int nf)
 {
 /* Clean up the previous iteration. */
 L3T_UNLOCK(current_l3page);
+UNMAP_DOMAIN_PAGE(pl2e);
 UNMAP_DOMAIN_PAGE(pl3e);
 
 pl3e = virt_to_xen_l3e(v);
@@ -5614,6 +5616,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, 
unsigned int nf)
 if ( l3e_get_flags(*pl3e) & _PAGE_PSE )
 {
 l2_pgentry_t *l2t;
+mfn_t l2mfn;
 
 if ( l2_table_offset(v) == 0 &&
  l1_table_offset(v) == 0 &&
@@ -5630,35 +5633,38 @@ int modify_xen_mappings(unsigned long s, unsigned long 
e, unsigned int nf)
 }
 
 /* PAGE1GB: shatter the superpage and fall through. */
-l2t = alloc_xen_pagetable();
-if ( !l2t )
+l2mfn = alloc_xen_pagetable_new();
+if ( mfn_eq(l2mfn, INVALID_MFN) )
 goto out;
 
+l2t = map_domain_page(l2mfn);
 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
 l2e_write(l2t + i,
   l2e_from_pfn(l3e_get_pfn(*pl3e) +
(i << PAGETABLE_ORDER),
l3e_get_flags(*pl3e)));
+UNMAP_DOMAIN_PAGE(l2t);
+
 if ( locking )
 spin_lock(&map_pgdir_lock);
 if ( (l3e_get_flags(*pl3e) & _PAGE_PRESENT) &&
  (l3e_get_flags(*pl3e) & _PAGE_PSE) )
 {
-l3e_write_atomic(pl3e, l3e_from_mfn(virt_to_mfn(l2t),
-__PAGE_HYPERVISOR));
-l2t = NULL;
+l3e_write_atomic(pl3e,
+ l3e_from_mfn(l2mfn, __PAGE_HYPERVISOR));
+l2mfn = INVALID_MFN;
 }
 if ( locking )
 spin_unlock(&map_pgdir_lock);
-if ( l2t )
-free_xen_pagetable(l2t);
+
+free_xen_pagetable_new(l2mfn);
 }
 
 /*
  * The L3 entry has been verified to be present, and we've dealt with
  * 1G pages as well, so the L2 table cannot require allocation.
  */
-pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(v);
+pl2e = map_l2t_from_l3e(*pl3e) + l2_table_offset(v);
 
 if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
 {
@@ -5686,41 +5692,45 @@ int modify_xen_mappings(unsigned long s, unsigned long 
e, unsigned int nf)
 else
 {
 l1_pgentry_t *l1t;
-
 /* PSE: shatter the superpage and try again. */
-l1t = alloc_xen_pagetable();
-if ( !l1t )
+mfn_t l1mfn = alloc_xen_pagetable_new();
+
+if ( mfn_eq(l1mfn, INVALID_MFN) )
 goto out;
 
+l1t = map_domain_page(l1mfn);
 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
 l1e_write(&l1t[i],
   l1e_from_pfn(l2e_get_pfn(*pl2e) + i,
l2e_get_flags(*pl2e) & ~_PAGE_PSE));
+UNMAP_DOMAIN_PAGE(l1t);
+
 if ( locking )
 spin_lock(&map_pgdir_lock);
 if ( (l2e_get_flags(*pl2e) & _PAGE_PRESENT) &&
  (l2e_get_flags(*pl2e) & _PAGE_PSE) )
 {
-l2e_write_atomic(pl2e, l2e_from_mfn(virt_to_mfn(l1t),
+l2e_write_atomic(pl2e, l2e_from_mfn(l1mfn,
 __PAGE_HYPERVISOR));
-l1t = NULL;
+l1mfn = INVALID_MFN;
 }
 if ( locking )
 spin_unlock(&map_pgdir_lock);
-if ( l

[PATCH v10 02/13] x86/mm: switch to new APIs in map_pages_to_xen

2021-04-21 Thread Hongyan Xia

From: Wei Liu 

Page tables allocated in that function should be mapped and unmapped
now.

Take the opportunity to avoid a potential double map in
map_pages_to_xen() by initialising pl1e to NULL and only map it if it
was not mapped earlier.

Signed-off-by: Wei Liu 
Signed-off-by: Hongyan Xia 

---
Changed in v10:
- avoid a potential double map.
- drop RoB due to this change.
---
 xen/arch/x86/mm.c | 64 ---
 1 file changed, 38 insertions(+), 26 deletions(-)

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 5944ef19dc50..8a68da26f45f 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -5215,7 +5215,7 @@ int map_pages_to_xen(
 }
 else
 {
-l2_pgentry_t *l2t = l3e_to_l2e(ol3e);
+l2_pgentry_t *l2t = map_l2t_from_l3e(ol3e);
 
 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
 {
@@ -5227,10 +5227,11 @@ int map_pages_to_xen(
 else
 {
 unsigned int j;
-const l1_pgentry_t *l1t = l2e_to_l1e(ol2e);
+const l1_pgentry_t *l1t = map_l1t_from_l2e(ol2e);
 
 for ( j = 0; j < L1_PAGETABLE_ENTRIES; j++ )
 flush_flags(l1e_get_flags(l1t[j]));
+unmap_domain_page(l1t);
 }
 }
 flush_area(virt, flush_flags);
@@ -5239,9 +5240,10 @@ int map_pages_to_xen(
 ol2e = l2t[i];
 if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) &&
  !(l2e_get_flags(ol2e) & _PAGE_PSE) )
-free_xen_pagetable(l2e_to_l1e(ol2e));
+free_xen_pagetable_new(l2e_get_mfn(ol2e));
 }
-free_xen_pagetable(l2t);
+unmap_domain_page(l2t);
+free_xen_pagetable_new(l3e_get_mfn(ol3e));
 }
 }
 
@@ -5258,6 +5260,7 @@ int map_pages_to_xen(
 unsigned int flush_flags =
 FLUSH_TLB | FLUSH_ORDER(2 * PAGETABLE_ORDER);
 l2_pgentry_t *l2t;
+mfn_t l2mfn;
 
 /* Skip this PTE if there is no change. */
 if ( ((l3e_get_pfn(ol3e) & ~(L2_PAGETABLE_ENTRIES *
@@ -5279,15 +5282,17 @@ int map_pages_to_xen(
 continue;
 }
 
-l2t = alloc_xen_pagetable();
-if ( l2t == NULL )
+l2mfn = alloc_xen_pagetable_new();
+if ( mfn_eq(l2mfn, INVALID_MFN) )
 goto out;
 
+l2t = map_domain_page(l2mfn);
 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
 l2e_write(l2t + i,
   l2e_from_pfn(l3e_get_pfn(ol3e) +
(i << PAGETABLE_ORDER),
l3e_get_flags(ol3e)));
+UNMAP_DOMAIN_PAGE(l2t);
 
 if ( l3e_get_flags(ol3e) & _PAGE_GLOBAL )
 flush_flags |= FLUSH_TLB_GLOBAL;
@@ -5297,15 +5302,15 @@ int map_pages_to_xen(
 if ( (l3e_get_flags(*pl3e) & _PAGE_PRESENT) &&
  (l3e_get_flags(*pl3e) & _PAGE_PSE) )
 {
-l3e_write_atomic(pl3e, l3e_from_mfn(virt_to_mfn(l2t),
-__PAGE_HYPERVISOR));
-l2t = NULL;
+l3e_write_atomic(pl3e,
+ l3e_from_mfn(l2mfn, __PAGE_HYPERVISOR));
+l2mfn = INVALID_MFN;
 }
 if ( locking )
 spin_unlock(&map_pgdir_lock);
 flush_area(virt, flush_flags);
-if ( l2t )
-free_xen_pagetable(l2t);
+
+free_xen_pagetable_new(l2mfn);
 }
 
 pl2e = virt_to_xen_l2e(virt);
@@ -5333,12 +5338,13 @@ int map_pages_to_xen(
 }
 else
 {
-l1_pgentry_t *l1t = l2e_to_l1e(ol2e);
+l1_pgentry_t *l1t = map_l1t_from_l2e(ol2e);
 
 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
 flush_flags(l1e_get_flags(l1t[i]));
 flush_area(virt, flush_flags);
-free_xen_pagetable(l1t);
+unmap_domain_page(l1t);
+free_xen_pagetable_new(l2e_get_mfn(ol2e));
 }
 }
 
@@ -5349,20 +5355,20 @@ int map_pages_to_xen(
 }
 else
 {
+pl1e = NULL;
 /* Normal page mapping. */
 if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
 {
 pl1e = virt_to_xen_l1e(virt);
 if ( pl1e == NULL )
 goto out;
-
-UNMAP_DOMAIN_PA

[PATCH v10 04/13] x86_64/mm: introduce pl2e in paging_init

2021-04-21 Thread Hongyan Xia

From: Wei Liu 

We will soon map and unmap pages in paging_init(). Introduce pl2e so
that we can use l2_ro_mpt to point to the page table itself.

No functional change.

Signed-off-by: Wei Liu 
Acked-by: Jan Beulich 

---
Changed in v7:
- reword commit message.
---
 xen/arch/x86/x86_64/mm.c | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index d7e67311fa69..59049bdf8e83 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -496,7 +496,7 @@ void __init paging_init(void)
 unsigned long i, mpt_size, va;
 unsigned int n, memflags;
 l3_pgentry_t *l3_ro_mpt;
-l2_pgentry_t *l2_ro_mpt = NULL;
+l2_pgentry_t *pl2e = NULL, *l2_ro_mpt = NULL;
 struct page_info *l1_pg;
 
 /*
@@ -546,7 +546,7 @@ void __init paging_init(void)
 (L2_PAGETABLE_SHIFT - 3 + PAGE_SHIFT)));
 
 if ( cpu_has_page1gb &&
- !((unsigned long)l2_ro_mpt & ~PAGE_MASK) &&
+ !((unsigned long)pl2e & ~PAGE_MASK) &&
  (mpt_size >> L3_PAGETABLE_SHIFT) > (i >> PAGETABLE_ORDER) )
 {
 unsigned int k, holes;
@@ -606,7 +606,7 @@ void __init paging_init(void)
 memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)),
0xFF, 1UL << L2_PAGETABLE_SHIFT);
 }
-if ( !((unsigned long)l2_ro_mpt & ~PAGE_MASK) )
+if ( !((unsigned long)pl2e & ~PAGE_MASK) )
 {
 if ( (l2_ro_mpt = alloc_xen_pagetable()) == NULL )
 goto nomem;
@@ -614,13 +614,14 @@ void __init paging_init(void)
 l3e_write(&l3_ro_mpt[l3_table_offset(va)],
   l3e_from_paddr(__pa(l2_ro_mpt),
  __PAGE_HYPERVISOR_RO | _PAGE_USER));
+pl2e = l2_ro_mpt;
 ASSERT(!l2_table_offset(va));
 }
 /* NB. Cannot be GLOBAL: guest user mode should not see it. */
 if ( l1_pg )
-l2e_write(l2_ro_mpt, l2e_from_page(
+l2e_write(pl2e, l2e_from_page(
 l1_pg, /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
-l2_ro_mpt++;
+pl2e++;
 }
 #undef CNT
 #undef MFN
@@ -632,6 +633,7 @@ void __init paging_init(void)
 goto nomem;
 compat_idle_pg_table_l2 = l2_ro_mpt;
 clear_page(l2_ro_mpt);
+pl2e = l2_ro_mpt;
 
 /* Allocate and map the compatibility mode machine-to-phys table. */
 mpt_size = (mpt_size >> 1) + (1UL << (L2_PAGETABLE_SHIFT - 1));
@@ -649,7 +651,7 @@ void __init paging_init(void)
  sizeof(*compat_machine_to_phys_mapping))
 BUILD_BUG_ON((sizeof(*frame_table) & ~sizeof(*frame_table)) % \
  sizeof(*compat_machine_to_phys_mapping));
-for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++, l2_ro_mpt++ )
+for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++, pl2e++ )
 {
 memflags = MEMF_node(phys_to_nid(i <<
 (L2_PAGETABLE_SHIFT - 2 + PAGE_SHIFT)));
@@ -671,7 +673,7 @@ void __init paging_init(void)
 (i << L2_PAGETABLE_SHIFT)),
0xFF, 1UL << L2_PAGETABLE_SHIFT);
 /* NB. Cannot be GLOBAL as the ptes get copied into per-VM space. */
-l2e_write(l2_ro_mpt, l2e_from_page(l1_pg, _PAGE_PSE|_PAGE_PRESENT));
+l2e_write(pl2e, l2e_from_page(l1_pg, _PAGE_PSE|_PAGE_PRESENT));
 }
 #undef CNT
 #undef MFN
-- 
2.23.4

[PATCH v10 06/13] x86_64/mm: switch to new APIs in setup_m2p_table

2021-04-21 Thread Hongyan Xia

From: Wei Liu 

While doing so, avoid repetitive mapping of l2_ro_mpt by keeping it
across loops, and only unmap and map it when crossing 1G boundaries.

Signed-off-by: Wei Liu 
Signed-off-by: Hongyan Xia 
Reviewed-by: Jan Beulich 

---
Changed in v8:
- re-structure if condition around l2_ro_mpt.
- reword the commit message.

Changed in v7:
- avoid repetitive mapping of l2_ro_mpt.
- edit commit message.
- switch to alloc_map_clear_xen_pt().
---
 xen/arch/x86/x86_64/mm.c | 32 +++-
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index 3e40d529bbf3..c625075695e0 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -402,7 +402,8 @@ static int setup_m2p_table(struct mem_hotadd_info *info)
 
 ASSERT(l4e_get_flags(idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)])
 & _PAGE_PRESENT);
-l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)]);
+l3_ro_mpt = map_l3t_from_l4e(
+idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)]);
 
 smap = (info->spfn & (~((1UL << (L2_PAGETABLE_SHIFT - 3)) -1)));
 emap = ((info->epfn + ((1UL << (L2_PAGETABLE_SHIFT - 3)) - 1 )) &
@@ -420,6 +421,10 @@ static int setup_m2p_table(struct mem_hotadd_info *info)
 i = smap;
 while ( i < emap )
 {
+if ( (RO_MPT_VIRT_START + i * sizeof(*machine_to_phys_mapping)) &
+ ((1UL << L3_PAGETABLE_SHIFT) - 1) )
+UNMAP_DOMAIN_PAGE(l2_ro_mpt);
+
 switch ( m2p_mapped(i) )
 {
 case M2P_1G_MAPPED:
@@ -455,32 +460,31 @@ static int setup_m2p_table(struct mem_hotadd_info *info)
 
 ASSERT(!(l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) &
   _PAGE_PSE));
-if ( l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) &
-  _PAGE_PRESENT )
-l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(va)]) +
-  l2_table_offset(va);
+if ( l2_ro_mpt )
+/* nothing */;
+else if ( l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) &
+  _PAGE_PRESENT )
+l2_ro_mpt = map_l2t_from_l3e(l3_ro_mpt[l3_table_offset(va)]);
 else
 {
-l2_ro_mpt = alloc_xen_pagetable();
+mfn_t l2_ro_mpt_mfn;
+
+l2_ro_mpt = alloc_mapped_pagetable(&l2_ro_mpt_mfn);
 if ( !l2_ro_mpt )
 {
 ret = -ENOMEM;
 goto error;
 }
 
-clear_page(l2_ro_mpt);
 l3e_write(&l3_ro_mpt[l3_table_offset(va)],
-  l3e_from_paddr(__pa(l2_ro_mpt),
- __PAGE_HYPERVISOR_RO | _PAGE_USER));
-l2_ro_mpt += l2_table_offset(va);
+  l3e_from_mfn(l2_ro_mpt_mfn,
+   __PAGE_HYPERVISOR_RO | _PAGE_USER));
 }
 
 /* NB. Cannot be GLOBAL: guest user mode should not see it. */
-l2e_write(l2_ro_mpt, l2e_from_mfn(mfn,
+l2e_write(&l2_ro_mpt[l2_table_offset(va)], l2e_from_mfn(mfn,
/*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
 }
-if ( !((unsigned long)l2_ro_mpt & ~PAGE_MASK) )
-l2_ro_mpt = NULL;
 i += ( 1UL << (L2_PAGETABLE_SHIFT - 3));
 }
 #undef CNT
@@ -488,6 +492,8 @@ static int setup_m2p_table(struct mem_hotadd_info *info)
 
 ret = setup_compat_m2p_table(info);
 error:
+unmap_domain_page(l2_ro_mpt);
+unmap_domain_page(l3_ro_mpt);
 return ret;
 }
 
-- 
2.23.4

[PATCH v10 05/13] x86_64/mm: switch to new APIs in paging_init

2021-04-21 Thread Hongyan Xia

From: Wei Liu 

Map and unmap pages instead of relying on the direct map.

Signed-off-by: Wei Liu 
Signed-off-by: Hongyan Xia 
Reviewed-by: Jan Beulich 

---
Changed in v9:
- remove an unnecessary l3mfn variable.

Changed in v8:
- replace l3/2_ro_mpt_mfn with just mfn since their lifetimes do not
  overlap

Changed in v7:
- use the new alloc_map_clear_xen_pt() helper.
- move the unmap of pl3t up a bit.
- remove the unmaps in the nomem path.
---
 xen/arch/x86/x86_64/mm.c | 34 --
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index 59049bdf8e83..3e40d529bbf3 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -498,6 +498,7 @@ void __init paging_init(void)
 l3_pgentry_t *l3_ro_mpt;
 l2_pgentry_t *pl2e = NULL, *l2_ro_mpt = NULL;
 struct page_info *l1_pg;
+mfn_t mfn;
 
 /*
  * We setup the L3s for 1:1 mapping if host support memory hotplug
@@ -510,22 +511,22 @@ void __init paging_init(void)
 if ( !(l4e_get_flags(idle_pg_table[l4_table_offset(va)]) &
   _PAGE_PRESENT) )
 {
-l3_pgentry_t *pl3t = alloc_xen_pagetable();
+l3_pgentry_t *pl3t = alloc_mapped_pagetable(&mfn);
 
 if ( !pl3t )
 goto nomem;
-clear_page(pl3t);
+UNMAP_DOMAIN_PAGE(pl3t);
 l4e_write(&idle_pg_table[l4_table_offset(va)],
-  l4e_from_paddr(__pa(pl3t), __PAGE_HYPERVISOR_RW));
+  l4e_from_mfn(mfn, __PAGE_HYPERVISOR_RW));
 }
 }
 
 /* Create user-accessible L2 directory to map the MPT for guests. */
-if ( (l3_ro_mpt = alloc_xen_pagetable()) == NULL )
+l3_ro_mpt = alloc_mapped_pagetable(&mfn);
+if ( !l3_ro_mpt )
 goto nomem;
-clear_page(l3_ro_mpt);
 l4e_write(&idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)],
-  l4e_from_paddr(__pa(l3_ro_mpt), __PAGE_HYPERVISOR_RO | 
_PAGE_USER));
+  l4e_from_mfn(mfn, __PAGE_HYPERVISOR_RO | _PAGE_USER));
 
 /*
  * Allocate and map the machine-to-phys table.
@@ -608,12 +609,14 @@ void __init paging_init(void)
 }
 if ( !((unsigned long)pl2e & ~PAGE_MASK) )
 {
-if ( (l2_ro_mpt = alloc_xen_pagetable()) == NULL )
+UNMAP_DOMAIN_PAGE(l2_ro_mpt);
+
+l2_ro_mpt = alloc_mapped_pagetable(&mfn);
+if ( !l2_ro_mpt )
 goto nomem;
-clear_page(l2_ro_mpt);
+
 l3e_write(&l3_ro_mpt[l3_table_offset(va)],
-  l3e_from_paddr(__pa(l2_ro_mpt),
- __PAGE_HYPERVISOR_RO | _PAGE_USER));
+  l3e_from_mfn(mfn, __PAGE_HYPERVISOR_RO | _PAGE_USER));
 pl2e = l2_ro_mpt;
 ASSERT(!l2_table_offset(va));
 }
@@ -625,15 +628,18 @@ void __init paging_init(void)
 }
 #undef CNT
 #undef MFN
+UNMAP_DOMAIN_PAGE(l2_ro_mpt);
+UNMAP_DOMAIN_PAGE(l3_ro_mpt);
 
 /* Create user-accessible L2 directory to map the MPT for compat guests. */
 if ( opt_pv32 )
 {
-if ( (l2_ro_mpt = alloc_xen_pagetable()) == NULL )
+mfn = alloc_xen_pagetable_new();
+if ( mfn_eq(mfn, INVALID_MFN) )
 goto nomem;
-compat_idle_pg_table_l2 = l2_ro_mpt;
-clear_page(l2_ro_mpt);
-pl2e = l2_ro_mpt;
+compat_idle_pg_table_l2 = map_domain_page_global(mfn);
+clear_page(compat_idle_pg_table_l2);
+pl2e = compat_idle_pg_table_l2;
 
 /* Allocate and map the compatibility mode machine-to-phys table. */
 mpt_size = (mpt_size >> 1) + (1UL << (L2_PAGETABLE_SHIFT - 1));
-- 
2.23.4

[PATCH v10 08/13] efi: switch to new APIs in EFI code

2021-04-21 Thread Hongyan Xia

From: Wei Liu 

Signed-off-by: Wei Liu 
Signed-off-by: Hongyan Xia 
Reviewed-by: Jan Beulich 

---
Changed in v7:
- add blank line after declaration.
- rename efi_l4_pgtable into efi_l4t.
- pass the mapped efi_l4t to copy_mapping() instead of map it again.
- use the alloc_map_clear_xen_pt() API.
- unmap pl3e, pl2e, l1t earlier.
---
 xen/arch/x86/efi/runtime.h | 13 ++---
 xen/common/efi/boot.c  | 55 ++
 xen/common/efi/efi.h   |  3 ++-
 xen/common/efi/runtime.c   |  8 +++---
 4 files changed, 48 insertions(+), 31 deletions(-)

diff --git a/xen/arch/x86/efi/runtime.h b/xen/arch/x86/efi/runtime.h
index d9eb8f5c270f..77866c5f2178 100644
--- a/xen/arch/x86/efi/runtime.h
+++ b/xen/arch/x86/efi/runtime.h
@@ -1,12 +1,19 @@
+#include 
+#include 
 #include 
 #include 
 
 #ifndef COMPAT
-l4_pgentry_t *__read_mostly efi_l4_pgtable;
+mfn_t __read_mostly efi_l4_mfn = INVALID_MFN_INITIALIZER;
 
 void efi_update_l4_pgtable(unsigned int l4idx, l4_pgentry_t l4e)
 {
-if ( efi_l4_pgtable )
-l4e_write(efi_l4_pgtable + l4idx, l4e);
+if ( !mfn_eq(efi_l4_mfn, INVALID_MFN) )
+{
+l4_pgentry_t *efi_l4t = map_domain_page(efi_l4_mfn);
+
+l4e_write(efi_l4t + l4idx, l4e);
+unmap_domain_page(efi_l4t);
+}
 }
 #endif
diff --git a/xen/common/efi/boot.c b/xen/common/efi/boot.c
index 539d86c6e8c2..758f9d74d20f 100644
--- a/xen/common/efi/boot.c
+++ b/xen/common/efi/boot.c
@@ -1437,14 +1437,15 @@ custom_param("efi", parse_efi_param);
 
 static __init void copy_mapping(unsigned long mfn, unsigned long end,
 bool (*is_valid)(unsigned long smfn,
- unsigned long emfn))
+ unsigned long emfn),
+l4_pgentry_t *efi_l4t)
 {
 unsigned long next;
 l3_pgentry_t *l3src = NULL, *l3dst = NULL;
 
 for ( ; mfn < end; mfn = next )
 {
-l4_pgentry_t l4e = efi_l4_pgtable[l4_table_offset(mfn << PAGE_SHIFT)];
+l4_pgentry_t l4e = efi_l4t[l4_table_offset(mfn << PAGE_SHIFT)];
 unsigned long va = (unsigned long)mfn_to_virt(mfn);
 
 if ( !(mfn & ((1UL << (L4_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)) )
@@ -1463,7 +1464,7 @@ static __init void copy_mapping(unsigned long mfn, 
unsigned long end,
 
 l3dst = alloc_mapped_pagetable(&l3mfn);
 BUG_ON(!l3dst);
-efi_l4_pgtable[l4_table_offset(mfn << PAGE_SHIFT)] =
+efi_l4t[l4_table_offset(mfn << PAGE_SHIFT)] =
 l4e_from_mfn(l3mfn, __PAGE_HYPERVISOR);
 }
 else
@@ -1496,6 +1497,7 @@ static bool __init rt_range_valid(unsigned long smfn, 
unsigned long emfn)
 void __init efi_init_memory(void)
 {
 unsigned int i;
+l4_pgentry_t *efi_l4t;
 struct rt_extra {
 struct rt_extra *next;
 unsigned long smfn, emfn;
@@ -1610,11 +1612,10 @@ void __init efi_init_memory(void)
  * Set up 1:1 page tables for runtime calls. See SetVirtualAddressMap() in
  * efi_exit_boot().
  */
-efi_l4_pgtable = alloc_xen_pagetable();
-BUG_ON(!efi_l4_pgtable);
-clear_page(efi_l4_pgtable);
+efi_l4t = alloc_mapped_pagetable(&efi_l4_mfn);
+BUG_ON(!efi_l4t);
 
-copy_mapping(0, max_page, ram_range_valid);
+copy_mapping(0, max_page, ram_range_valid, efi_l4t);
 
 /* Insert non-RAM runtime mappings inside the direct map. */
 for ( i = 0; i < efi_memmap_size; i += efi_mdesc_size )
@@ -1630,58 +1631,64 @@ void __init efi_init_memory(void)
 copy_mapping(PFN_DOWN(desc->PhysicalStart),
  PFN_UP(desc->PhysicalStart +
 (desc->NumberOfPages << EFI_PAGE_SHIFT)),
- rt_range_valid);
+ rt_range_valid, efi_l4t);
 }
 
 /* Insert non-RAM runtime mappings outside of the direct map. */
 while ( (extra = extra_head) != NULL )
 {
 unsigned long addr = extra->smfn << PAGE_SHIFT;
-l4_pgentry_t l4e = efi_l4_pgtable[l4_table_offset(addr)];
+l4_pgentry_t l4e = efi_l4t[l4_table_offset(addr)];
 l3_pgentry_t *pl3e;
 l2_pgentry_t *pl2e;
 l1_pgentry_t *l1t;
 
 if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
 {
-pl3e = alloc_xen_pagetable();
+mfn_t l3mfn;
+
+pl3e = alloc_mapped_pagetable(&l3mfn);
 BUG_ON(!pl3e);
-clear_page(pl3e);
-efi_l4_pgtable[l4_table_offset(addr)] =
-l4e_from_paddr(virt_to_maddr(pl3e), __PAGE_HYPERVISOR);
+efi_l4t[l4_table_offset(addr)] =
+l4e_from_mfn(l3mfn, __PAGE_HYPERVISOR);
 }
 else
-pl3e = l4e_to_l3e(l4e);
+pl3e = map_l3t_from_l4e(l4e);
 pl3e += l3_table_offset(addr);
 if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
 {
-pl2e = alloc_xen_pagetable();
+

[PATCH v10 07/13] efi: use new page table APIs in copy_mapping

2021-04-21 Thread Hongyan Xia

From: Wei Liu 

Signed-off-by: Wei Liu 
Signed-off-by: Hongyan Xia 
Reviewed-by: Jan Beulich 

---
Changed in v8:
- remove redundant commit message.
- unmap l3src based on va instead of mfn.
- re-structure if condition around l3dst.

Changed in v7:
- hoist l3 variables out of the loop to avoid repetitive mappings.
---
 xen/common/efi/boot.c | 28 +---
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/xen/common/efi/boot.c b/xen/common/efi/boot.c
index 63e289ab8506..539d86c6e8c2 100644
--- a/xen/common/efi/boot.c
+++ b/xen/common/efi/boot.c
@@ -6,6 +6,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1439,29 +1440,42 @@ static __init void copy_mapping(unsigned long mfn, 
unsigned long end,
  unsigned long emfn))
 {
 unsigned long next;
+l3_pgentry_t *l3src = NULL, *l3dst = NULL;
 
 for ( ; mfn < end; mfn = next )
 {
 l4_pgentry_t l4e = efi_l4_pgtable[l4_table_offset(mfn << PAGE_SHIFT)];
-l3_pgentry_t *l3src, *l3dst;
 unsigned long va = (unsigned long)mfn_to_virt(mfn);
 
+if ( !(mfn & ((1UL << (L4_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)) )
+UNMAP_DOMAIN_PAGE(l3dst);
+if ( !(va & ((1UL << L4_PAGETABLE_SHIFT) - 1)) )
+UNMAP_DOMAIN_PAGE(l3src);
 next = mfn + (1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT));
 if ( !is_valid(mfn, min(next, end)) )
 continue;
-if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
+
+if ( l3dst )
+/* nothing */;
+else if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
 {
-l3dst = alloc_xen_pagetable();
+mfn_t l3mfn;
+
+l3dst = alloc_mapped_pagetable(&l3mfn);
 BUG_ON(!l3dst);
-clear_page(l3dst);
 efi_l4_pgtable[l4_table_offset(mfn << PAGE_SHIFT)] =
-l4e_from_paddr(virt_to_maddr(l3dst), __PAGE_HYPERVISOR);
+l4e_from_mfn(l3mfn, __PAGE_HYPERVISOR);
 }
 else
-l3dst = l4e_to_l3e(l4e);
-l3src = l4e_to_l3e(idle_pg_table[l4_table_offset(va)]);
+l3dst = map_l3t_from_l4e(l4e);
+
+if ( !l3src )
+l3src = map_l3t_from_l4e(idle_pg_table[l4_table_offset(va)]);
 l3dst[l3_table_offset(mfn << PAGE_SHIFT)] = l3src[l3_table_offset(va)];
 }
+
+unmap_domain_page(l3src);
+unmap_domain_page(l3dst);
 }
 
 static bool __init ram_range_valid(unsigned long smfn, unsigned long emfn)
-- 
2.23.4

[PATCH v10 09/13] x86/smpboot: add exit path for clone_mapping()

2021-04-21 Thread Hongyan Xia

From: Wei Liu 

We will soon need to clean up page table mappings in the exit path.

No functional change.

Signed-off-by: Wei Liu 
Signed-off-by: Hongyan Xia 
Acked-by: Jan Beulich 

---
Changed in v7:
- edit commit message.
- begin with rc = 0 and set it to -ENOMEM ahead of if().
---
 xen/arch/x86/smpboot.c | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 82c1012e892f..e90c4dfa8a88 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -696,6 +696,7 @@ static int clone_mapping(const void *ptr, root_pgentry_t 
*rpt)
 l3_pgentry_t *pl3e;
 l2_pgentry_t *pl2e;
 l1_pgentry_t *pl1e;
+int rc = 0;
 
 /*
  * Sanity check 'linear'.  We only allow cloning from the Xen virtual
@@ -736,7 +737,7 @@ static int clone_mapping(const void *ptr, root_pgentry_t 
*rpt)
 pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(linear);
 flags = l1e_get_flags(*pl1e);
 if ( !(flags & _PAGE_PRESENT) )
-return 0;
+goto out;
 pfn = l1e_get_pfn(*pl1e);
 }
 }
@@ -744,8 +745,9 @@ static int clone_mapping(const void *ptr, root_pgentry_t 
*rpt)
 if ( !(root_get_flags(rpt[root_table_offset(linear)]) & _PAGE_PRESENT) )
 {
 pl3e = alloc_xen_pagetable();
+rc = -ENOMEM;
 if ( !pl3e )
-return -ENOMEM;
+goto out;
 clear_page(pl3e);
 l4e_write(&rpt[root_table_offset(linear)],
   l4e_from_paddr(__pa(pl3e), __PAGE_HYPERVISOR));
@@ -758,8 +760,9 @@ static int clone_mapping(const void *ptr, root_pgentry_t 
*rpt)
 if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
 {
 pl2e = alloc_xen_pagetable();
+rc = -ENOMEM;
 if ( !pl2e )
-return -ENOMEM;
+goto out;
 clear_page(pl2e);
 l3e_write(pl3e, l3e_from_paddr(__pa(pl2e), __PAGE_HYPERVISOR));
 }
@@ -774,8 +777,9 @@ static int clone_mapping(const void *ptr, root_pgentry_t 
*rpt)
 if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
 {
 pl1e = alloc_xen_pagetable();
+rc = -ENOMEM;
 if ( !pl1e )
-return -ENOMEM;
+goto out;
 clear_page(pl1e);
 l2e_write(pl2e, l2e_from_paddr(__pa(pl1e), __PAGE_HYPERVISOR));
 }
@@ -796,7 +800,9 @@ static int clone_mapping(const void *ptr, root_pgentry_t 
*rpt)
 else
 l1e_write(pl1e, l1e_from_pfn(pfn, flags));
 
-return 0;
+rc = 0;
+ out:
+return rc;
 }
 
 DEFINE_PER_CPU(root_pgentry_t *, root_pgt);
-- 
2.23.4

[PATCH v2 01/20] lib: move memset()

2021-04-21 Thread Jan Beulich

By moving the function into an archive, x86 doesn't need to announce
anymore that is has its own implementation - symbol resolution by the
linker will now guarantee that the generic function remains unused, and
the forwarding to the compiler built-in gets done by the common header
anyway.

Allow the function to be individually linkable, discardable, and
overridable.

Signed-off-by: Jan Beulich 

--- a/xen/common/string.c
+++ b/xen/common/string.c
@@ -311,26 +311,6 @@ char *(strstr)(const char *s1, const cha
 }
 #endif
 
-#ifndef __HAVE_ARCH_MEMSET
-/**
- * memset - Fill a region of memory with the given value
- * @s: Pointer to the start of the area.
- * @c: The byte to fill the area with
- * @count: The size of the area.
- *
- * Do not use memset() to access IO space, use memset_io() instead.
- */
-void *(memset)(void *s, int c, size_t count)
-{
-   char *xs = (char *) s;
-
-   while (count--)
-   *xs++ = c;
-
-   return s;
-}
-#endif
-
 #ifndef __HAVE_ARCH_MEMCPY
 /**
  * memcpy - Copy one area of memory to another
--- a/xen/include/asm-x86/string.h
+++ b/xen/include/asm-x86/string.h
@@ -7,9 +7,6 @@
 #define __HAVE_ARCH_MEMMOVE
 #define memmove(d, s, n) __builtin_memmove(d, s, n)
 
-#define __HAVE_ARCH_MEMSET
-#define memset(s, c, n) __builtin_memset(s, c, n)
-
 #endif /* __X86_STRING_H__ */
 /*
  * Local variables:
--- a/xen/lib/Makefile
+++ b/xen/lib/Makefile
@@ -4,6 +4,7 @@ lib-y += bsearch.o
 lib-y += ctors.o
 lib-y += ctype.o
 lib-y += list-sort.o
+lib-y += memset.o
 lib-y += muldiv64.o
 lib-y += parse-size.o
 lib-y += rbtree.o
--- /dev/null
+++ b/xen/lib/memset.c
@@ -0,0 +1,33 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include 
+
+/**
+ * memset - Fill a region of memory with the given value
+ * @s: Pointer to the start of the area.
+ * @c: The byte to fill the area with
+ * @count: The size of the area.
+ *
+ * Do not use memset() to access IO space, use memset_io() instead.
+ */
+void *(memset)(void *s, int c, size_t count)
+{
+   char *xs = (char *) s;
+
+   while (count--)
+   *xs++ = c;
+
+   return s;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */

[PATCH v2 02/20] lib: move memcpy()

2021-04-21 Thread Jan Beulich

By moving the function into an archive, x86 doesn't need to announce
anymore that is has its own implementation - symbol resolution by the
linker will now guarantee that the generic function remains unused, and
the forwarding to the compiler built-in gets done by the common header
anyway.

Allow the function to be individually linkable, discardable, and
overridable.

Signed-off-by: Jan Beulich 

--- a/xen/common/string.c
+++ b/xen/common/string.c
@@ -311,27 +311,6 @@ char *(strstr)(const char *s1, const cha
 }
 #endif
 
-#ifndef __HAVE_ARCH_MEMCPY
-/**
- * memcpy - Copy one area of memory to another
- * @dest: Where to copy to
- * @src: Where to copy from
- * @count: The size of the area.
- *
- * You should not use this function to access IO space, use memcpy_toio()
- * or memcpy_fromio() instead.
- */
-void *(memcpy)(void *dest, const void *src, size_t count)
-{
-   char *tmp = (char *) dest, *s = (char *) src;
-
-   while (count--)
-   *tmp++ = *s++;
-
-   return dest;
-}
-#endif
-
 #ifndef __HAVE_ARCH_MEMMOVE
 /**
  * memmove - Copy one area of memory to another
--- a/xen/include/asm-x86/string.h
+++ b/xen/include/asm-x86/string.h
@@ -1,9 +1,6 @@
 #ifndef __X86_STRING_H__
 #define __X86_STRING_H__
 
-#define __HAVE_ARCH_MEMCPY
-#define memcpy(d, s, n) __builtin_memcpy(d, s, n)
-
 #define __HAVE_ARCH_MEMMOVE
 #define memmove(d, s, n) __builtin_memmove(d, s, n)
 
--- a/xen/lib/Makefile
+++ b/xen/lib/Makefile
@@ -4,6 +4,7 @@ lib-y += bsearch.o
 lib-y += ctors.o
 lib-y += ctype.o
 lib-y += list-sort.o
+lib-y += memcpy.o
 lib-y += memset.o
 lib-y += muldiv64.o
 lib-y += parse-size.o
--- /dev/null
+++ b/xen/lib/memcpy.c
@@ -0,0 +1,34 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include 
+
+/**
+ * memcpy - Copy one area of memory to another
+ * @dest: Where to copy to
+ * @src: Where to copy from
+ * @count: The size of the area.
+ *
+ * You should not use this function to access IO space, use memcpy_toio()
+ * or memcpy_fromio() instead.
+ */
+void *(memcpy)(void *dest, const void *src, size_t count)
+{
+   char *tmp = (char *) dest, *s = (char *) src;
+
+   while (count--)
+   *tmp++ = *s++;
+
+   return dest;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */

[PATCH v2 03/20] lib: move memmove()

2021-04-21 Thread Jan Beulich

By moving the function into an archive, x86 doesn't need to announce
anymore that is has its own implementation - symbol resolution by the
linker will now guarantee that the generic function remains unused, and
the forwarding to the compiler built-in gets done by the common header
anyway.

Allow the function to be individually linkable, discardable, and
overridable.

Signed-off-by: Jan Beulich 

--- a/xen/common/string.c
+++ b/xen/common/string.c
@@ -311,36 +311,6 @@ char *(strstr)(const char *s1, const cha
 }
 #endif
 
-#ifndef __HAVE_ARCH_MEMMOVE
-/**
- * memmove - Copy one area of memory to another
- * @dest: Where to copy to
- * @src: Where to copy from
- * @count: The size of the area.
- *
- * Unlike memcpy(), memmove() copes with overlapping areas.
- */
-void *(memmove)(void *dest, const void *src, size_t count)
-{
-   char *tmp, *s;
-
-   if (dest <= src) {
-   tmp = (char *) dest;
-   s = (char *) src;
-   while (count--)
-   *tmp++ = *s++;
-   }
-   else {
-   tmp = (char *) dest + count;
-   s = (char *) src + count;
-   while (count--)
-   *--tmp = *--s;
-   }
-
-   return dest;
-}
-#endif
-
 #ifndef __HAVE_ARCH_MEMCMP
 /**
  * memcmp - Compare two areas of memory
--- a/xen/include/asm-x86/string.h
+++ b/xen/include/asm-x86/string.h
@@ -1,9 +1,6 @@
 #ifndef __X86_STRING_H__
 #define __X86_STRING_H__
 
-#define __HAVE_ARCH_MEMMOVE
-#define memmove(d, s, n) __builtin_memmove(d, s, n)
-
 #endif /* __X86_STRING_H__ */
 /*
  * Local variables:
--- a/xen/lib/Makefile
+++ b/xen/lib/Makefile
@@ -5,6 +5,7 @@ lib-y += ctors.o
 lib-y += ctype.o
 lib-y += list-sort.o
 lib-y += memcpy.o
+lib-y += memmove.o
 lib-y += memset.o
 lib-y += muldiv64.o
 lib-y += parse-size.o
--- /dev/null
+++ b/xen/lib/memmove.c
@@ -0,0 +1,42 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include 
+
+/**
+ * memmove - Copy one area of memory to another
+ * @dest: Where to copy to
+ * @src: Where to copy from
+ * @count: The size of the area.
+ *
+ * Unlike memcpy(), memmove() copes with overlapping areas.
+ */
+void *(memmove)(void *dest, const void *src, size_t count)
+{
+   char *tmp, *s;
+
+   if (dest <= src) {
+   tmp = (char *) dest;
+   s = (char *) src;
+   while (count--)
+   *tmp++ = *s++;
+   } else {
+   tmp = (char *) dest + count;
+   s = (char *) src + count;
+   while (count--)
+   *--tmp = *--s;
+   }
+
+   return dest;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */

[PATCH v2 04/20] lib: move memcmp()

2021-04-21 Thread Jan Beulich

Allow the function to be individually linkable, discardable, and
overridable.

Signed-off-by: Jan Beulich 

--- a/xen/common/string.c
+++ b/xen/common/string.c
@@ -311,25 +311,6 @@ char *(strstr)(const char *s1, const cha
 }
 #endif
 
-#ifndef __HAVE_ARCH_MEMCMP
-/**
- * memcmp - Compare two areas of memory
- * @cs: One area of memory
- * @ct: Another area of memory
- * @count: The size of the area.
- */
-int (memcmp)(const void *cs, const void *ct, size_t count)
-{
-   const unsigned char *su1, *su2;
-   int res = 0;
-
-   for( su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)
-   if ((res = *su1 - *su2) != 0)
-   break;
-   return res;
-}
-#endif
-
 #ifndef __HAVE_ARCH_MEMCHR
 /**
  * memchr - Find a character in an area of memory.
--- a/xen/lib/Makefile
+++ b/xen/lib/Makefile
@@ -4,6 +4,7 @@ lib-y += bsearch.o
 lib-y += ctors.o
 lib-y += ctype.o
 lib-y += list-sort.o
+lib-y += memcmp.o
 lib-y += memcpy.o
 lib-y += memmove.o
 lib-y += memset.o
--- /dev/null
+++ b/xen/lib/memcmp.c
@@ -0,0 +1,32 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include 
+
+/**
+ * memcmp - Compare two areas of memory
+ * @cs: One area of memory
+ * @ct: Another area of memory
+ * @count: The size of the area.
+ */
+int (memcmp)(const void *cs, const void *ct, size_t count)
+{
+   const unsigned char *su1, *su2;
+   int res = 0;
+
+   for( su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)
+   if ((res = *su1 - *su2) != 0)
+   break;
+   return res;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */

[PATCH v2 05/20] lib: move memchr()

2021-04-21 Thread Jan Beulich

Allow the function to be individually linkable, discardable, and
overridable.

Signed-off-by: Jan Beulich 

--- a/xen/common/string.c
+++ b/xen/common/string.c
@@ -311,28 +311,6 @@ char *(strstr)(const char *s1, const cha
 }
 #endif
 
-#ifndef __HAVE_ARCH_MEMCHR
-/**
- * memchr - Find a character in an area of memory.
- * @s: The memory area
- * @c: The byte to search for
- * @n: The size of the area.
- *
- * returns the address of the first occurrence of @c, or %NULL
- * if @c is not found
- */
-void *(memchr)(const void *s, int c, size_t n)
-{
-   const unsigned char *p = s;
-
-   while (n--)
-   if ((unsigned char)c == *p++)
-   return (void *)(p - 1);
-
-   return NULL;
-}
-#endif
-
 /**
  * memchr_inv - Find an unmatching character in an area of memory.
  * @s: The memory area
--- a/xen/lib/Makefile
+++ b/xen/lib/Makefile
@@ -4,6 +4,7 @@ lib-y += bsearch.o
 lib-y += ctors.o
 lib-y += ctype.o
 lib-y += list-sort.o
+lib-y += memchr.o
 lib-y += memcmp.o
 lib-y += memcpy.o
 lib-y += memmove.o
--- /dev/null
+++ b/xen/lib/memchr.c
@@ -0,0 +1,35 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include 
+
+/**
+ * memchr - Find a character in an area of memory.
+ * @s: The memory area
+ * @c: The byte to search for
+ * @n: The size of the area.
+ *
+ * returns the address of the first occurrence of @c, or %NULL
+ * if @c is not found
+ */
+void *(memchr)(const void *s, int c, size_t n)
+{
+   const unsigned char *p = s;
+
+   while (n--)
+   if ((unsigned char)c == *p++)
+   return (void *)(p - 1);
+
+   return NULL;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */

[PATCH v2 06/20] lib: move memchr_inv()

2021-04-21 Thread Jan Beulich

Allow the function to be individually linkable, discardable, and
overridable.

Signed-off-by: Jan Beulich 

--- a/xen/common/string.c
+++ b/xen/common/string.c
@@ -311,26 +311,6 @@ char *(strstr)(const char *s1, const cha
 }
 #endif
 
-/**
- * memchr_inv - Find an unmatching character in an area of memory.
- * @s: The memory area
- * @c: The byte that is expected
- * @n: The size of the area.
- *
- * returns the address of the first occurrence of a character other than @c,
- * or %NULL if the whole buffer contains just @c.
- */
-void *memchr_inv(const void *s, int c, size_t n)
-{
-   const unsigned char *p = s;
-
-   while (n--)
-   if ((unsigned char)c != *p++)
-   return (void *)(p - 1);
-
-   return NULL;
-}
-
 /*
  * Local variables:
  * mode: C
--- a/xen/lib/Makefile
+++ b/xen/lib/Makefile
@@ -5,6 +5,7 @@ lib-y += ctors.o
 lib-y += ctype.o
 lib-y += list-sort.o
 lib-y += memchr.o
+lib-y += memchr_inv.o
 lib-y += memcmp.o
 lib-y += memcpy.o
 lib-y += memmove.o
--- /dev/null
+++ b/xen/lib/memchr_inv.c
@@ -0,0 +1,35 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include 
+
+/**
+ * memchr_inv - Find an unmatching character in an area of memory.
+ * @s: The memory area
+ * @c: The byte that is expected
+ * @n: The size of the area.
+ *
+ * returns the address of the first occurrence of a character other than @c,
+ * or %NULL if the whole buffer contains just @c.
+ */
+void *memchr_inv(const void *s, int c, size_t n)
+{
+   const unsigned char *p = s;
+
+   while (n--)
+   if ((unsigned char)c != *p++)
+   return (void *)(p - 1);
+
+   return NULL;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */

Re: [PATCH 3/8] x86/EFI: program headers are an ELF concept

2021-04-21 Thread Roger Pau Monné

On Wed, Apr 21, 2021 at 12:36:16PM +0200, Jan Beulich wrote:
> On 21.04.2021 11:11, Roger Pau Monné wrote:
> > On Thu, Apr 01, 2021 at 11:45:09AM +0200, Jan Beulich wrote:
> >> While they apparently do no harm when building xen.efi, their use is
> >> potentially misleading. Conditionalize their use to be for just the ELF
> >> binary we produce.
> >>
> >> No change to the resulting binaries.
> > 
> > The GNU Linker manual notes that program headers would be ignored when
> > not generating an ELF file, so I'm not sure it's worth us adding more
> > churn to the linker script to hide something that's already ignored by
> > ld already.
> > 
> > Maybe adding a comment noting program headers are ignored when not
> > generating an ELF output would be enough?
> 
> Maybe, but I'd prefer this to be explicit, and I'd prefer for efi.lds
> to not have any PE-unrelated baggage. The churn by this patch isn't
> all this significant, is it? In fact in two cases it actually deletes
> meaningless stuff.

Acked-by: Roger Pau Monné 

I would prefer if the new PHDR macro was used for all program headers
directives for consistency though.

Thanks, Roger.

[PATCH v2 07/20] lib: move strlen()

2021-04-21 Thread Jan Beulich

Allow the function to be individually linkable, discardable, and
overridable.

Signed-off-by: Jan Beulich 

--- a/xen/common/string.c
+++ b/xen/common/string.c
@@ -184,21 +184,6 @@ char *(strrchr)(const char *s, int c)
 }
 #endif
 
-#ifndef __HAVE_ARCH_STRLEN
-/**
- * strlen - Find the length of a string
- * @s: The string to be sized
- */
-size_t (strlen)(const char * s)
-{
-   const char *sc;
-
-   for (sc = s; *sc != '\0'; ++sc)
-   /* nothing */;
-   return sc - s;
-}
-#endif
-
 #ifndef __HAVE_ARCH_STRNLEN
 /**
  * strnlen - Find the length of a length-limited string
--- a/xen/lib/Makefile
+++ b/xen/lib/Makefile
@@ -14,6 +14,7 @@ lib-y += muldiv64.o
 lib-y += parse-size.o
 lib-y += rbtree.o
 lib-y += sort.o
+lib-y += strlen.o
 lib-$(CONFIG_X86) += xxhash32.o
 lib-$(CONFIG_X86) += xxhash64.o
 
--- /dev/null
+++ b/xen/lib/strlen.c
@@ -0,0 +1,28 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include 
+
+/**
+ * strlen - Find the length of a string
+ * @s: The string to be sized
+ */
+size_t (strlen)(const char * s)
+{
+   const char *sc;
+
+   for (sc = s; *sc != '\0'; ++sc)
+   /* nothing */;
+   return sc - s;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */

[PATCH v2 08/20] lib: move strnlen()

2021-04-21 Thread Jan Beulich

Allow the function to be individually linkable, discardable, and
overridable.

Signed-off-by: Jan Beulich 

--- a/xen/common/string.c
+++ b/xen/common/string.c
@@ -184,22 +184,6 @@ char *(strrchr)(const char *s, int c)
 }
 #endif
 
-#ifndef __HAVE_ARCH_STRNLEN
-/**
- * strnlen - Find the length of a length-limited string
- * @s: The string to be sized
- * @count: The maximum number of bytes to search
- */
-size_t strnlen(const char * s, size_t count)
-{
-   const char *sc;
-
-   for (sc = s; count-- && *sc != '\0'; ++sc)
-   /* nothing */;
-   return sc - s;
-}
-#endif
-
 #ifndef __HAVE_ARCH_STRSPN
 /**
  * strspn - Calculate the length of the initial substring of @s which only
--- a/xen/lib/Makefile
+++ b/xen/lib/Makefile
@@ -15,6 +15,7 @@ lib-y += parse-size.o
 lib-y += rbtree.o
 lib-y += sort.o
 lib-y += strlen.o
+lib-y += strnlen.o
 lib-$(CONFIG_X86) += xxhash32.o
 lib-$(CONFIG_X86) += xxhash64.o
 
--- /dev/null
+++ b/xen/lib/strnlen.c
@@ -0,0 +1,29 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include 
+
+/**
+ * strnlen - Find the length of a length-limited string
+ * @s: The string to be sized
+ * @count: The maximum number of bytes to search
+ */
+size_t strnlen(const char * s, size_t count)
+{
+   const char *sc;
+
+   for (sc = s; count-- && *sc != '\0'; ++sc)
+   /* nothing */;
+   return sc - s;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */

[PATCH v2 09/20] lib: move strcmp()

2021-04-21 Thread Jan Beulich

Allow the function to be individually linkable, discardable, and
overridable.

Signed-off-by: Jan Beulich 

--- a/xen/common/string.c
+++ b/xen/common/string.c
@@ -111,25 +111,6 @@ size_t strlcat(char *dest, const char *s
 EXPORT_SYMBOL(strlcat);
 #endif
 
-#ifndef __HAVE_ARCH_STRCMP
-/**
- * strcmp - Compare two strings
- * @cs: One string
- * @ct: Another string
- */
-int (strcmp)(const char *cs, const char *ct)
-{
-   register signed char __res;
-
-   while (1) {
-   if ((__res = *cs - *ct++) != 0 || !*cs++)
-   break;
-   }
-
-   return __res;
-}
-#endif
-
 #ifndef __HAVE_ARCH_STRNCMP
 /**
  * strncmp - Compare two length-limited strings
--- a/xen/lib/Makefile
+++ b/xen/lib/Makefile
@@ -14,6 +14,7 @@ lib-y += muldiv64.o
 lib-y += parse-size.o
 lib-y += rbtree.o
 lib-y += sort.o
+lib-y += strcmp.o
 lib-y += strlen.o
 lib-y += strnlen.o
 lib-$(CONFIG_X86) += xxhash32.o
--- /dev/null
+++ b/xen/lib/strcmp.c
@@ -0,0 +1,32 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include 
+
+/**
+ * strcmp - Compare two strings
+ * @cs: One string
+ * @ct: Another string
+ */
+int (strcmp)(const char *cs, const char *ct)
+{
+   register signed char __res;
+
+   while (1) {
+   if ((__res = *cs - *ct++) != 0 || !*cs++)
+   break;
+   }
+
+   return __res;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */

[PATCH v2 10/20] lib: move strncmp()

2021-04-21 Thread Jan Beulich

Allow the function to be individually linkable, discardable, and
overridable.

Signed-off-by: Jan Beulich 

--- a/xen/common/string.c
+++ b/xen/common/string.c
@@ -111,27 +111,6 @@ size_t strlcat(char *dest, const char *s
 EXPORT_SYMBOL(strlcat);
 #endif
 
-#ifndef __HAVE_ARCH_STRNCMP
-/**
- * strncmp - Compare two length-limited strings
- * @cs: One string
- * @ct: Another string
- * @count: The maximum number of bytes to compare
- */
-int (strncmp)(const char *cs, const char *ct, size_t count)
-{
-   register signed char __res = 0;
-
-   while (count) {
-   if ((__res = *cs - *ct++) != 0 || !*cs++)
-   break;
-   count--;
-   }
-
-   return __res;
-}
-#endif
-
 #ifndef __HAVE_ARCH_STRCHR
 /**
  * strchr - Find the first occurrence of a character in a string
--- a/xen/lib/Makefile
+++ b/xen/lib/Makefile
@@ -16,6 +16,7 @@ lib-y += rbtree.o
 lib-y += sort.o
 lib-y += strcmp.o
 lib-y += strlen.o
+lib-y += strncmp.o
 lib-y += strnlen.o
 lib-$(CONFIG_X86) += xxhash32.o
 lib-$(CONFIG_X86) += xxhash64.o
--- /dev/null
+++ b/xen/lib/strncmp.c
@@ -0,0 +1,34 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include 
+
+/**
+ * strncmp - Compare two length-limited strings
+ * @cs: One string
+ * @ct: Another string
+ * @count: The maximum number of bytes to compare
+ */
+int (strncmp)(const char *cs, const char *ct, size_t count)
+{
+   register signed char __res = 0;
+
+   while (count) {
+   if ((__res = *cs - *ct++) != 0 || !*cs++)
+   break;
+   count--;
+   }
+
+   return __res;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */

[PATCH v2 11/20] lib: move strlcpy()

2021-04-21 Thread Jan Beulich

Allow the function to be individually linkable, discardable, and
overridable.

Signed-off-by: Jan Beulich 

--- a/xen/common/string.c
+++ b/xen/common/string.c
@@ -56,32 +56,6 @@ int (strcasecmp)(const char *s1, const c
 }
 #endif
 
-#ifndef __HAVE_ARCH_STRLCPY
-/**
- * strlcpy - Copy a %NUL terminated string into a sized buffer
- * @dest: Where to copy the string to
- * @src: Where to copy the string from
- * @size: size of destination buffer
- *
- * Compatible with *BSD: the result is always a valid
- * NUL-terminated string that fits in the buffer (unless,
- * of course, the buffer size is zero). It does not pad
- * out the result like strncpy() does.
- */
-size_t strlcpy(char *dest, const char *src, size_t size)
-{
-   size_t ret = strlen(src);
-
-   if (size) {
-   size_t len = (ret >= size) ? size-1 : ret;
-   memcpy(dest, src, len);
-   dest[len] = '\0';
-   }
-   return ret;
-}
-EXPORT_SYMBOL(strlcpy);
-#endif
-
 #ifndef __HAVE_ARCH_STRLCAT
 /**
  * strlcat - Append a %NUL terminated string into a sized buffer
--- a/xen/lib/Makefile
+++ b/xen/lib/Makefile
@@ -15,6 +15,7 @@ lib-y += parse-size.o
 lib-y += rbtree.o
 lib-y += sort.o
 lib-y += strcmp.o
+lib-y += strlcpy.o
 lib-y += strlen.o
 lib-y += strncmp.o
 lib-y += strnlen.o
--- /dev/null
+++ b/xen/lib/strlcpy.c
@@ -0,0 +1,38 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include 
+
+/**
+ * strlcpy - Copy a %NUL terminated string into a sized buffer
+ * @dest: Where to copy the string to
+ * @src: Where to copy the string from
+ * @size: size of destination buffer
+ *
+ * Compatible with *BSD: the result is always a valid
+ * NUL-terminated string that fits in the buffer (unless,
+ * of course, the buffer size is zero). It does not pad
+ * out the result like strncpy() does.
+ */
+size_t strlcpy(char *dest, const char *src, size_t size)
+{
+   size_t ret = strlen(src);
+
+   if (size) {
+   size_t len = (ret >= size) ? size-1 : ret;
+   memcpy(dest, src, len);
+   dest[len] = '\0';
+   }
+   return ret;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */

[PATCH v2 12/20] lib: move strlcat()

2021-04-21 Thread Jan Beulich

Allow the function to be individually linkable, discardable, and
overridable.

Signed-off-by: Jan Beulich 

--- a/xen/common/string.c
+++ b/xen/common/string.c
@@ -56,35 +56,6 @@ int (strcasecmp)(const char *s1, const c
 }
 #endif
 
-#ifndef __HAVE_ARCH_STRLCAT
-/**
- * strlcat - Append a %NUL terminated string into a sized buffer
- * @dest: Where to copy the string to
- * @src: Where to copy the string from
- * @size: size of destination buffer
- *
- * Compatible with *BSD: the result is always a valid
- * NUL-terminated string that fits in the buffer (unless,
- * of course, the buffer size is zero).
- */
-size_t strlcat(char *dest, const char *src, size_t size)
-{
-   size_t slen = strlen(src);
-   size_t dlen = strnlen(dest, size);
-   char *p = dest + dlen;
-
-   while ((p - dest) < size)
-   if ((*p++ = *src++) == '\0')
-   break;
-
-   if (dlen < size)
-   *(p-1) = '\0';
-
-   return slen + dlen;
-}
-EXPORT_SYMBOL(strlcat);
-#endif
-
 #ifndef __HAVE_ARCH_STRCHR
 /**
  * strchr - Find the first occurrence of a character in a string
--- a/xen/lib/Makefile
+++ b/xen/lib/Makefile
@@ -15,6 +15,7 @@ lib-y += parse-size.o
 lib-y += rbtree.o
 lib-y += sort.o
 lib-y += strcmp.o
+lib-y += strlcat.o
 lib-y += strlcpy.o
 lib-y += strlen.o
 lib-y += strncmp.o
--- /dev/null
+++ b/xen/lib/strlcat.c
@@ -0,0 +1,41 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include 
+
+/**
+ * strlcat - Append a %NUL terminated string into a sized buffer
+ * @dest: Where to copy the string to
+ * @src: Where to copy the string from
+ * @size: size of destination buffer
+ *
+ * Compatible with *BSD: the result is always a valid
+ * NUL-terminated string that fits in the buffer (unless,
+ * of course, the buffer size is zero).
+ */
+size_t strlcat(char *dest, const char *src, size_t size)
+{
+   size_t slen = strlen(src);
+   size_t dlen = strnlen(dest, size);
+   char *p = dest + dlen;
+
+   while ((p - dest) < size)
+   if ((*p++ = *src++) == '\0')
+   break;
+
+   if (dlen < size)
+   *(p-1) = '\0';
+
+   return slen + dlen;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */

[PATCH v2 13/20] lib: move strchr()

2021-04-21 Thread Jan Beulich

Allow the function to be individually linkable, discardable, and
overridable.

Signed-off-by: Jan Beulich 

--- a/xen/common/string.c
+++ b/xen/common/string.c
@@ -56,21 +56,6 @@ int (strcasecmp)(const char *s1, const c
 }
 #endif
 
-#ifndef __HAVE_ARCH_STRCHR
-/**
- * strchr - Find the first occurrence of a character in a string
- * @s: The string to be searched
- * @c: The character to search for
- */
-char *(strchr)(const char *s, int c)
-{
-   for(; *s != (char) c; ++s)
-   if (*s == '\0')
-   return NULL;
-   return (char *) s;
-}
-#endif
-
 #ifndef __HAVE_ARCH_STRRCHR
 /**
  * strrchr - Find the last occurrence of a character in a string
--- a/xen/lib/Makefile
+++ b/xen/lib/Makefile
@@ -14,6 +14,7 @@ lib-y += muldiv64.o
 lib-y += parse-size.o
 lib-y += rbtree.o
 lib-y += sort.o
+lib-y += strchr.o
 lib-y += strcmp.o
 lib-y += strlcat.o
 lib-y += strlcpy.o
--- /dev/null
+++ b/xen/lib/strchr.c
@@ -0,0 +1,28 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include 
+
+/**
+ * strchr - Find the first occurrence of a character in a string
+ * @s: The string to be searched
+ * @c: The character to search for
+ */
+char *(strchr)(const char *s, int c)
+{
+   for(; *s != (char) c; ++s)
+   if (*s == '\0')
+   return NULL;
+   return (char *) s;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */

[PATCH v2 14/20] lib: move strrchr()

2021-04-21 Thread Jan Beulich

Allow the function to be individually linkable, discardable, and
overridable.

Signed-off-by: Jan Beulich 

--- a/xen/common/string.c
+++ b/xen/common/string.c
@@ -56,24 +56,6 @@ int (strcasecmp)(const char *s1, const c
 }
 #endif
 
-#ifndef __HAVE_ARCH_STRRCHR
-/**
- * strrchr - Find the last occurrence of a character in a string
- * @s: The string to be searched
- * @c: The character to search for
- */
-char *(strrchr)(const char *s, int c)
-{
-   const char *p = s + strlen(s);
-
-   for (; *p != (char)c; --p)
-   if (p == s)
-   return NULL;
-
-   return (char *)p;
-}
-#endif
-
 #ifndef __HAVE_ARCH_STRSPN
 /**
  * strspn - Calculate the length of the initial substring of @s which only
--- a/xen/lib/Makefile
+++ b/xen/lib/Makefile
@@ -21,6 +21,7 @@ lib-y += strlcpy.o
 lib-y += strlen.o
 lib-y += strncmp.o
 lib-y += strnlen.o
+lib-y += strrchr.o
 lib-$(CONFIG_X86) += xxhash32.o
 lib-$(CONFIG_X86) += xxhash64.o
 
--- /dev/null
+++ b/xen/lib/strrchr.c
@@ -0,0 +1,31 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include 
+
+/**
+ * strrchr - Find the last occurrence of a character in a string
+ * @s: The string to be searched
+ * @c: The character to search for
+ */
+char *(strrchr)(const char *s, int c)
+{
+   const char *p = s + strlen(s);
+
+   for (; *p != (char)c; --p)
+   if (p == s)
+   return NULL;
+
+   return (char *)p;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */

[PATCH v2 15/20] lib: move strstr()

2021-04-21 Thread Jan Beulich

Allow the function to be individually linkable, discardable, and
overridable.

Signed-off-by: Jan Beulich 

--- a/xen/common/string.c
+++ b/xen/common/string.c
@@ -131,27 +131,6 @@ char * strsep(char **s, const char *ct)
 }
 #endif
 
-#ifndef __HAVE_ARCH_STRSTR
-/**
- * strstr - Find the first substring in a %NUL terminated string
- * @s1: The string to be searched
- * @s2: The string to search for
- */
-char *(strstr)(const char *s1, const char *s2)
-{
-   size_t l1, l2 = strlen(s2);
-
-   if (!l2)
-   return (char *)s1;
-
-   for (l1 = strlen(s1); l1 >= l2; --l1, ++s1)
-   if (!memcmp(s1, s2, l2))
-   return (char *)s1;
-
-   return NULL;
-}
-#endif
-
 /*
  * Local variables:
  * mode: C
--- a/xen/lib/Makefile
+++ b/xen/lib/Makefile
@@ -22,6 +22,7 @@ lib-y += strlen.o
 lib-y += strncmp.o
 lib-y += strnlen.o
 lib-y += strrchr.o
+lib-y += strstr.o
 lib-$(CONFIG_X86) += xxhash32.o
 lib-$(CONFIG_X86) += xxhash64.o
 
--- /dev/null
+++ b/xen/lib/strstr.c
@@ -0,0 +1,34 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include 
+
+/**
+ * strstr - Find the first substring in a %NUL terminated string
+ * @s1: The string to be searched
+ * @s2: The string to search for
+ */
+char *(strstr)(const char *s1, const char *s2)
+{
+   size_t l1, l2 = strlen(s2);
+
+   if (!l2)
+   return (char *)s1;
+
+   for (l1 = strlen(s1); l1 >= l2; --l1, ++s1)
+   if (!memcmp(s1, s2, l2))
+   return (char *)s1;
+
+   return NULL;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */

[PATCH v2 16/20] lib: move strcasecmp()

2021-04-21 Thread Jan Beulich

Allow the function to be individually linkable, discardable, and
overridable.

Signed-off-by: Jan Beulich 

--- a/xen/common/string.c
+++ b/xen/common/string.c
@@ -41,21 +41,6 @@ int strnicmp(const char *s1, const char
 }
 #endif
 
-#ifndef __HAVE_ARCH_STRCASECMP
-int (strcasecmp)(const char *s1, const char *s2)
-{
-int c1, c2;
-
-do
-{
-c1 = tolower(*s1++);
-c2 = tolower(*s2++);
-} while ( c1 == c2 && c1 != 0 );
-
-return c1 - c2;
-}
-#endif
-
 #ifndef __HAVE_ARCH_STRSPN
 /**
  * strspn - Calculate the length of the initial substring of @s which only
--- a/xen/lib/Makefile
+++ b/xen/lib/Makefile
@@ -14,6 +14,7 @@ lib-y += muldiv64.o
 lib-y += parse-size.o
 lib-y += rbtree.o
 lib-y += sort.o
+lib-y += strcasecmp.o
 lib-y += strchr.o
 lib-y += strcmp.o
 lib-y += strlcat.o
--- /dev/null
+++ b/xen/lib/strcasecmp.c
@@ -0,0 +1,29 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include 
+#include 
+
+int (strcasecmp)(const char *s1, const char *s2)
+{
+int c1, c2;
+
+do
+{
+c1 = tolower(*s1++);
+c2 = tolower(*s2++);
+} while ( c1 == c2 && c1 != 0 );
+
+return c1 - c2;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */

[PATCH v2 17/20] lib: move/rename strnicmp() to strncasecmp()

2021-04-21 Thread Jan Beulich

While moving the implementation, also rename it to match strcasecmp(),
allowing the similar use of a compiler builting in this case as well.

Allow the function to be individually linkable, discardable, and
overridable.

Signed-off-by: Jan Beulich 

--- a/xen/arch/arm/domain_build.c
+++ b/xen/arch/arm/domain_build.c
@@ -1224,8 +1224,8 @@ static int __init map_range_to_domain(co
  * They are not MMIO and therefore a domain should not be able to
  * manage them via the IOMEM interface.
  */
-if ( strnicmp(dt_node_full_name(dev), "/reserved-memory/",
- strlen("/reserved-memory/")) != 0 )
+if ( strncasecmp(dt_node_full_name(dev), "/reserved-memory/",
+ strlen("/reserved-memory/")) != 0 )
 {
 res = iomem_permit_access(d, paddr_to_pfn(addr),
 paddr_to_pfn(PAGE_ALIGN(addr + len - 1)));
--- a/xen/common/string.c
+++ b/xen/common/string.c
@@ -8,39 +8,6 @@
 #include 
 #include 
 
-#ifndef __HAVE_ARCH_STRNICMP
-/**
- * strnicmp - Case insensitive, length-limited string comparison
- * @s1: One string
- * @s2: The other string
- * @len: the maximum number of characters to compare
- */
-int strnicmp(const char *s1, const char *s2, size_t len)
-{
-   /* Yes, Virginia, it had better be unsigned */
-   unsigned char c1, c2;
-
-   c1 = 0; c2 = 0;
-   if (len) {
-   do {
-   c1 = *s1; c2 = *s2;
-   s1++; s2++;
-   if (!c1)
-   break;
-   if (!c2)
-   break;
-   if (c1 == c2)
-   continue;
-   c1 = tolower(c1);
-   c2 = tolower(c2);
-   if (c1 != c2)
-   break;
-   } while (--len);
-   }
-   return (int)c1 - (int)c2;
-}
-#endif
-
 #ifndef __HAVE_ARCH_STRSPN
 /**
  * strspn - Calculate the length of the initial substring of @s which only
--- a/xen/drivers/acpi/pmstat.c
+++ b/xen/drivers/acpi/pmstat.c
@@ -275,14 +275,14 @@ static int get_cpufreq_para(struct xen_s
 strlcpy(op->u.get_para.scaling_governor, "Unknown", CPUFREQ_NAME_LEN);
 
 /* governor specific para */
-if ( !strnicmp(op->u.get_para.scaling_governor, 
-   "userspace", CPUFREQ_NAME_LEN) )
+if ( !strncasecmp(op->u.get_para.scaling_governor,
+  "userspace", CPUFREQ_NAME_LEN) )
 {
 op->u.get_para.u.userspace.scaling_setspeed = policy->cur;
 }
 
-if ( !strnicmp(op->u.get_para.scaling_governor, 
-   "ondemand", CPUFREQ_NAME_LEN) )
+if ( !strncasecmp(op->u.get_para.scaling_governor,
+  "ondemand", CPUFREQ_NAME_LEN) )
 {
 ret = get_cpufreq_ondemand_para(
 &op->u.get_para.u.ondemand.sampling_rate_max,
@@ -350,8 +350,8 @@ static int set_cpufreq_para(struct xen_s
 {
 unsigned int freq =op->u.set_para.ctrl_value;
 
-if ( !strnicmp(policy->governor->name,
-   "userspace", CPUFREQ_NAME_LEN) )
+if ( !strncasecmp(policy->governor->name,
+  "userspace", CPUFREQ_NAME_LEN) )
 ret = write_userspace_scaling_setspeed(op->cpuid, freq);
 else
 ret = -EINVAL;
@@ -363,8 +363,8 @@ static int set_cpufreq_para(struct xen_s
 {
 unsigned int sampling_rate = op->u.set_para.ctrl_value;
 
-if ( !strnicmp(policy->governor->name,
-   "ondemand", CPUFREQ_NAME_LEN) )
+if ( !strncasecmp(policy->governor->name,
+  "ondemand", CPUFREQ_NAME_LEN) )
 ret = write_ondemand_sampling_rate(sampling_rate);
 else
 ret = -EINVAL;
@@ -376,8 +376,8 @@ static int set_cpufreq_para(struct xen_s
 {
 unsigned int up_threshold = op->u.set_para.ctrl_value;
 
-if ( !strnicmp(policy->governor->name,
-   "ondemand", CPUFREQ_NAME_LEN) )
+if ( !strncasecmp(policy->governor->name,
+  "ondemand", CPUFREQ_NAME_LEN) )
 ret = write_ondemand_up_threshold(up_threshold);
 else
 ret = -EINVAL;
--- a/xen/drivers/cpufreq/cpufreq.c
+++ b/xen/drivers/cpufreq/cpufreq.c
@@ -111,7 +111,7 @@ struct cpufreq_governor *__find_governor
 return NULL;
 
 list_for_each_entry(t, &cpufreq_governor_list, governor_list)
-if (!strnicmp(governor, t->name, CPUFREQ_NAME_LEN))
+if (!strncasecmp(governor, t->name, CPUFREQ_NAME_LEN))
 return t;
 
 return NULL;
--- a/xen/include/xen/string.h
+++ b/xen/include/xen/string.h
@@ -16,8 +16,8 @@ size_t strlcpy(char *, const char *, siz
 size_t strlcat(char *, const char *, size_t);
 int strcmp(const char *, const char *);
 int strncmp(const char *, const char *, size_t);
-int strnicmp(const char *, const char *, size_t);
 int strcasecmp(const cha

[PATCH v2 18/20] lib: move strspn()

2021-04-21 Thread Jan Beulich

Allow the function to be individually linkable, discardable, and
overridable. In fact the function is unused at present, and hence will
now get omitted from the final binaries.

Signed-off-by: Jan Beulich 

--- a/xen/common/string.c
+++ b/xen/common/string.c
@@ -8,33 +8,6 @@
 #include 
 #include 
 
-#ifndef __HAVE_ARCH_STRSPN
-/**
- * strspn - Calculate the length of the initial substring of @s which only
- * contain letters in @accept
- * @s: The string to be searched
- * @accept: The string to search for
- */
-size_t strspn(const char *s, const char *accept)
-{
-   const char *p;
-   const char *a;
-   size_t count = 0;
-
-   for (p = s; *p != '\0'; ++p) {
-   for (a = accept; *a != '\0'; ++a) {
-   if (*p == *a)
-   break;
-   }
-   if (*a == '\0')
-   return count;
-   ++count;
-   }
-
-   return count;
-}
-#endif
-
 #ifndef __HAVE_ARCH_STRPBRK
 /**
  * strpbrk - Find the first occurrence of a set of characters
--- a/xen/lib/Makefile
+++ b/xen/lib/Makefile
@@ -24,6 +24,7 @@ lib-y += strncasecmp.o
 lib-y += strncmp.o
 lib-y += strnlen.o
 lib-y += strrchr.o
+lib-y += strspn.o
 lib-y += strstr.o
 lib-$(CONFIG_X86) += xxhash32.o
 lib-$(CONFIG_X86) += xxhash64.o
--- /dev/null
+++ b/xen/lib/strspn.c
@@ -0,0 +1,40 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include 
+
+/**
+ * strspn - Calculate the length of the initial substring of @s which only
+ * contain letters in @accept
+ * @s: The string to be searched
+ * @accept: The string to search for
+ */
+size_t strspn(const char *s, const char *accept)
+{
+   const char *p;
+   const char *a;
+   size_t count = 0;
+
+   for (p = s; *p != '\0'; ++p) {
+   for (a = accept; *a != '\0'; ++a) {
+   if (*p == *a)
+   break;
+   }
+   if (*a == '\0')
+   return count;
+   ++count;
+   }
+
+   return count;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */

[PATCH v2 19/20] lib: move strpbrk()

2021-04-21 Thread Jan Beulich

Allow the function to be individually linkable, discardable, and
overridable.

Signed-off-by: Jan Beulich 

--- a/xen/common/string.c
+++ b/xen/common/string.c
@@ -8,26 +8,6 @@
 #include 
 #include 
 
-#ifndef __HAVE_ARCH_STRPBRK
-/**
- * strpbrk - Find the first occurrence of a set of characters
- * @cs: The string to be searched
- * @ct: The characters to search for
- */
-char * strpbrk(const char * cs,const char * ct)
-{
-   const char *sc1,*sc2;
-
-   for( sc1 = cs; *sc1 != '\0'; ++sc1) {
-   for( sc2 = ct; *sc2 != '\0'; ++sc2) {
-   if (*sc1 == *sc2)
-   return (char *) sc1;
-   }
-   }
-   return NULL;
-}
-#endif
-
 #ifndef __HAVE_ARCH_STRSEP
 /**
  * strsep - Split a string into tokens
--- a/xen/lib/Makefile
+++ b/xen/lib/Makefile
@@ -23,6 +23,7 @@ lib-y += strlen.o
 lib-y += strncasecmp.o
 lib-y += strncmp.o
 lib-y += strnlen.o
+lib-y += strpbrk.o
 lib-y += strrchr.o
 lib-y += strspn.o
 lib-y += strstr.o
--- /dev/null
+++ b/xen/lib/strpbrk.c
@@ -0,0 +1,33 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include 
+
+/**
+ * strpbrk - Find the first occurrence of a set of characters
+ * @cs: The string to be searched
+ * @ct: The characters to search for
+ */
+char *strpbrk(const char * cs,const char * ct)
+{
+   const char *sc1,*sc2;
+
+   for( sc1 = cs; *sc1 != '\0'; ++sc1) {
+   for( sc2 = ct; *sc2 != '\0'; ++sc2) {
+   if (*sc1 == *sc2)
+   return (char *) sc1;
+   }
+   }
+   return NULL;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */

1 2 >

1 - 100 of 141 matches

Mail list logo