date:20170621

Re: [Xen-devel] [PATCH v7 08/36] x86/mm: Add support to enable SME in early boot processing

2017-06-21 Thread Thomas Gleixner

On Fri, 16 Jun 2017, Tom Lendacky wrote:
> diff --git a/arch/x86/include/asm/mem_encrypt.h 
> b/arch/x86/include/asm/mem_encrypt.h
> index a105796..988b336 100644
> --- a/arch/x86/include/asm/mem_encrypt.h
> +++ b/arch/x86/include/asm/mem_encrypt.h
> @@ -15,16 +15,24 @@
>  
>  #ifndef __ASSEMBLY__
>  
> +#include 
> +
>  #ifdef CONFIG_AMD_MEM_ENCRYPT
>  
>  extern unsigned long sme_me_mask;
>  
> +void __init sme_enable(void);
> +
>  #else/* !CONFIG_AMD_MEM_ENCRYPT */
>  
>  #define sme_me_mask  0UL
>  
> +static inline void __init sme_enable(void) { }
> +
>  #endif   /* CONFIG_AMD_MEM_ENCRYPT */
>  
> +unsigned long sme_get_me_mask(void);

Why is this an unconditional function? Isn't the mask simply 0 when the MEM
ENCRYPT support is disabled?

> diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
> index 6225550..ef12729 100644
> --- a/arch/x86/kernel/head_64.S
> +++ b/arch/x86/kernel/head_64.S
> @@ -78,7 +78,29 @@ startup_64:
>   call__startup_64
>   popq%rsi
>  
> - movq$(early_top_pgt - __START_KERNEL_map), %rax
> + /*
> +  * Encrypt the kernel if SME is active.
> +  * The real_mode_data address is in %rsi and that register can be
> +  * clobbered by the called function so be sure to save it.
> +  */
> + push%rsi
> + callsme_encrypt_kernel
> + pop %rsi

That does not make any sense. Neither the call to sme_encrypt_kernel() nor
the following call to sme_get_me_mask().

__startup_64() is already C code, so why can't you simply call that from
__startup_64() in C and return the mask from there?

> @@ -98,7 +120,20 @@ ENTRY(secondary_startup_64)
>   /* Sanitize CPU configuration */
>   call verify_cpu
>  
> - movq$(init_top_pgt - __START_KERNEL_map), %rax
> + /*
> +  * Get the SME encryption mask.
> +  *  The encryption mask will be returned in %rax so we do an ADD
> +  *  below to be sure that the encryption mask is part of the
> +  *  value that will stored in %cr3.
> +  *
> +  * The real_mode_data address is in %rsi and that register can be
> +  * clobbered by the called function so be sure to save it.
> +  */
> + push%rsi
> + callsme_get_me_mask
> + pop %rsi

Do we really need a call here? The mask is established at this point, so
it's either 0 when the encryption stuff is not compiled in or it can be
retrieved from a variable which is accessible at this point.

> +
> + addq$(init_top_pgt - __START_KERNEL_map), %rax
>  1:
>  
>   /* Enable PAE mode, PGE and LA57 */

Thanks,

tglx

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v7 10/36] x86/mm: Provide general kernel support for memory encryption

2017-06-21 Thread Thomas Gleixner

On Fri, 16 Jun 2017, Tom Lendacky wrote:
>  
> +#ifndef pgprot_encrypted
> +#define pgprot_encrypted(prot)   (prot)
> +#endif
> +
> +#ifndef pgprot_decrypted

That looks wrong. It's not decrypted it's rather unencrypted, right?

Thanks,

tglx

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH RFC] Implement hypercall for tracing of program counters

2017-06-21 Thread Felix Schmoll

This commit makes the changes to the hypervisor, the build system as
well as libxc necessary in order to facilitate tracing of program counters.

A discussion of the design can be found in the mailing list:
https://lists.xen.org/archives/html/xen-devel/2017-05/threads.html#02210

The list of files to be included for tracing might still be too extensive,
resulting in indeterministic tracing output for some use cases. It is also
not clear what other causes of indeterminism there might be.

Signed-off-by: Felix Schmoll 
---
 tools/libxc/include/xenctrl.h |  2 ++
 tools/libxc/xc_private.c  | 22 +
 tools/libxc/xc_private.h  |  8 +
 xen/Kconfig   |  4 +++
 xen/Rules.mk  |  4 +++
 xen/arch/arm/traps.c  |  1 +
 xen/arch/x86/hvm/hypercall.c  |  1 +
 xen/arch/x86/hypercall.c  |  1 +
 xen/arch/x86/pv/hypercall.c   |  1 +
 xen/common/Makefile   | 13 
 xen/common/edge_trace.c   | 77 +++
 xen/common/edge_tracer.c  | 25 ++
 xen/include/public/xen.h  |  1 +
 xen/include/xen/edge_trace.h  | 19 +++
 xen/include/xen/hypercall.h   |  7 
 xen/include/xen/sched.h   |  6 
 16 files changed, 192 insertions(+)
 create mode 100644 xen/common/edge_trace.c
 create mode 100644 xen/common/edge_tracer.c
 create mode 100644 xen/include/xen/edge_trace.h

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 8c26cb4141..75e03337f9 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -1572,6 +1572,8 @@ int xc_domctl(xc_interface *xch, struct xen_domctl 
*domctl);
 int xc_sysctl(xc_interface *xch, struct xen_sysctl *sysctl);
 
 int xc_version(xc_interface *xch, int cmd, void *arg);
+int xc_edge_trace(xc_interface *xch, domid_t dom_id, int mode,
+unsigned int size, uint64_t *buf);
 
 int xc_flask_op(xc_interface *xch, xen_flask_op_t *op);
 
diff --git a/tools/libxc/xc_private.c b/tools/libxc/xc_private.c
index f395594a8f..97663f219b 100644
--- a/tools/libxc/xc_private.c
+++ b/tools/libxc/xc_private.c
@@ -530,6 +530,28 @@ int xc_version(xc_interface *xch, int cmd, void *arg)
 return rc;
 }
 
+int xc_edge_trace(xc_interface *xch,
+domid_t dom_id, int mode, unsigned int size, uint64_t* arg)
+{
+int rc;
+
+DECLARE_HYPERCALL_BOUNCE(arg, size * sizeof(uint64_t),
+XC_HYPERCALL_BUFFER_BOUNCE_OUT);
+
+if ( xc_hypercall_bounce_pre(xch, arg) )
+{
+PERROR("Could not bounce buffer for edge_trace hypercall");
+return -ENOMEM;
+}
+
+rc = do_edge_trace(xch, dom_id, mode, size, HYPERCALL_BUFFER(arg));
+
+xc_hypercall_bounce_post(xch, arg);
+
+return rc;
+}
+
+
 unsigned long xc_make_page_below_4G(
 xc_interface *xch, uint32_t domid, unsigned long mfn)
 {
diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h
index 1c27b0fded..60b0d8ebe3 100644
--- a/tools/libxc/xc_private.h
+++ b/tools/libxc/xc_private.h
@@ -229,6 +229,14 @@ static inline int do_xen_version(xc_interface *xch, int 
cmd, xc_hypercall_buffer
 cmd, HYPERCALL_BUFFER_AS_ARG(dest));
 }
 
+static inline int do_edge_trace(xc_interface *xch, domid_t dom_id, int mode,
+unsigned int size, xc_hypercall_buffer_t *buf)
+{
+DECLARE_HYPERCALL_BUFFER_ARGUMENT(buf);
+return xencall4(xch->xcall, __HYPERVISOR_edge_trace, dom_id, mode,
+size, HYPERCALL_BUFFER_AS_ARG(buf));
+}
+
 static inline int do_physdev_op(xc_interface *xch, int cmd, void *op, size_t 
len)
 {
 int ret = -1;
diff --git a/xen/Kconfig b/xen/Kconfig
index 65d491d776..5ed2c9c390 100644
--- a/xen/Kconfig
+++ b/xen/Kconfig
@@ -38,4 +38,8 @@ config LTO
 
  If unsure, say N.
 
+config TRACE_PC
+bool "Enable tracing e.g. for fuzzing"
+default false
+
 source "Kconfig.debug"
diff --git a/xen/Rules.mk b/xen/Rules.mk
index 77bcd44922..dde14e3228 100644
--- a/xen/Rules.mk
+++ b/xen/Rules.mk
@@ -170,6 +170,10 @@ clean:: $(addprefix _clean_, $(subdir-all))
 _clean_%/: FORCE
$(MAKE) -f $(BASEDIR)/Rules.mk -C $* clean
 
+ifeq ($(CONFIG_TRACE_PC),y)
+$(objs-need-tracing): CFLAGS += -fsanitize-coverage=trace-pc
+endif
+
 %.o: %.c Makefile
$(CC) $(CFLAGS) -c $< -o $@
 
diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
index c07999b518..a4d36517f9 100644
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -1419,6 +1419,7 @@ static arm_hypercall_t arm_hypercall_table[] = {
 HYPERCALL(platform_op, 1),
 HYPERCALL_ARM(vcpu_op, 3),
 HYPERCALL(vm_assist, 2),
+HYPERCALL(edge_trace, 4),
 };
 
 #ifndef NDEBUG
diff --git a/xen/arch/x86/hvm/hypercall.c b/xen/arch/x86/hvm/hypercall.c
index e7238ce293..fed8363d8a 100644
--- a/xen/arch/x86/hvm/hypercall.c
+++ b/xen/arch/x86/hvm/hypercall.c
@@ -132,6 +132,7 @@ static const hypercall_table_t hvm_hypercall_table[] = {
 COMPAT_CALL(mmuext_op),
 HYPERCALL(xenpmu_op),
 COMPAT_CALL(dm_op),
+

Re: [Xen-devel] [PATCH 2/2] xen/input: add multi-touch support

2017-06-21 Thread Dmitry Torokhov

On Thu, Jun 08, 2017 at 09:45:18AM +0300, Oleksandr Andrushchenko wrote:
> Hi, Dmitry!
> 
> On 06/07/2017 07:56 PM, Dmitry Torokhov wrote:
> >On Wed, May 31, 2017 at 12:06:56PM +0300, Oleksandr Andrushchenko wrote:
> >>Hi, Dmitry!
> >>
> >>On 05/30/2017 07:37 PM, Dmitry Torokhov wrote:
> >>>On Tue, May 30, 2017 at 03:50:20PM +0300, Oleksandr Andrushchenko wrote:
> Hi, Dmitry!
> 
> On 05/30/2017 08:51 AM, Dmitry Torokhov wrote:
> >On Fri, Apr 21, 2017 at 09:40:36AM +0300, Oleksandr Andrushchenko wrote:
> >>Hi, Dmitry!
> >>
> >>On 04/21/2017 05:10 AM, Dmitry Torokhov wrote:
> >>>Hi Oleksandr,
> >>>
> >>>On Thu, Apr 13, 2017 at 02:38:04PM +0300, Oleksandr Andrushchenko 
> >>>wrote:
> From: Oleksandr Andrushchenko 
> 
> Extend xen_kbdfront to provide multi-touch support
> to unprivileged domains.
> 
> Signed-off-by: Oleksandr Andrushchenko 
> 
> ---
>   drivers/input/misc/xen-kbdfront.c | 142 
>  +-
>   1 file changed, 140 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/input/misc/xen-kbdfront.c 
> b/drivers/input/misc/xen-kbdfront.c
> index 01c27b4c3288..e5d064aaa237 100644
> --- a/drivers/input/misc/xen-kbdfront.c
> +++ b/drivers/input/misc/xen-kbdfront.c
> @@ -17,6 +17,7 @@
>   #include 
>   #include 
>   #include 
> +#include 
>   #include 
>   #include 
> @@ -34,11 +35,14 @@
>   struct xenkbd_info {
>   struct input_dev *kbd;
>   struct input_dev *ptr;
> + struct input_dev *mtouch;
>   struct xenkbd_page *page;
>   int gref;
>   int irq;
>   struct xenbus_device *xbdev;
>   char phys[32];
> + /* current MT slot/contact ID we are injecting events in */
> + int mtouch_cur_contact_id;
>   };
>   enum { KPARAM_X, KPARAM_Y, KPARAM_CNT };
> @@ -47,6 +51,12 @@ module_param_array(ptr_size, int, NULL, 0444);
>   MODULE_PARM_DESC(ptr_size,
>   "Pointing device width, height in pixels (default 800,600)");
> +enum { KPARAM_MT_X, KPARAM_MT_Y, KPARAM_MT_CNT };
> +static int mtouch_size[KPARAM_MT_CNT] = { XENFB_WIDTH, XENFB_HEIGHT 
> };
> +module_param_array(mtouch_size, int, NULL, 0444);
> +MODULE_PARM_DESC(ptr_size,
> + "Multi-touch device width, height in pixels (default 800,600)");
> +
> >>>Why do you need separate module parameters for multi-touch device?
> >>please see below
>   static int xenkbd_remove(struct xenbus_device *);
>   static int xenkbd_connect_backend(struct xenbus_device *, struct 
>  xenkbd_info *);
>   static void xenkbd_disconnect_backend(struct xenkbd_info *);
> @@ -100,6 +110,60 @@ static irqreturn_t input_handler(int rq, void 
> *dev_id)
>   input_report_rel(dev, REL_WHEEL,
>    -event->pos.rel_z);
>   break;
> + case XENKBD_TYPE_MTOUCH:
> + dev = info->mtouch;
> + if (unlikely(!dev))
> + break;
> + if (unlikely(event->mtouch.contact_id !=
> + info->mtouch_cur_contact_id)) {
> >>>Why is this unlikely? Does contact ID changes once in 1000 packets or
> >>>even less?
> >>Mu assumption was that regardless of the fact that we are multi-touch
> >>device still single touches will come in more frequently
> >>But I can remove *unlikely* if my assumption is not correct
> >I think the normal expectation is that "unlikely" is supposed for
> >something that happens once in a blue moon, so I'd rather remove it.
> >
> agree, removed "unlikely"
> + info->mtouch_cur_contact_id =
> + event->mtouch.contact_id;
> + input_mt_slot(dev, 
> event->mtouch.contact_id);
> + }
> + switch (event->mtouch.event_type) {
> + case XENKBD_MT_EV_DOWN:
> + input_mt_report_slot_state(dev, 
> MT_TOOL_FINGER,
> +true);
> >Should we establish tool event? We have MT_TOOL_PEN, etc.
> I think that for multi-touch MT_TOOL_FINGER is enough
> any reason we would also want MT_TOOL_PEN here?
> >>>Why would not you? Let's say you have a drawing application running in
> >>>guest that can make use of tool types. Why would not yo

Re: [Xen-devel] [PATCH v7 07/36] x86/mm: Don't use phys_to_virt in ioremap() if SME is active

2017-06-21 Thread Thomas Gleixner

On Fri, 16 Jun 2017, Tom Lendacky wrote:
> Currently there is a check if the address being mapped is in the ISA
> range (is_ISA_range()), and if it is then phys_to_virt() is used to
> perform the mapping.  When SME is active, however, this will result
> in the mapping having the encryption bit set when it is expected that
> an ioremap() should not have the encryption bit set. So only use the
> phys_to_virt() function if SME is not active
> 
> Reviewed-by: Borislav Petkov 
> Signed-off-by: Tom Lendacky 
> ---
>  arch/x86/mm/ioremap.c |7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
> index 4c1b5fd..a382ba9 100644
> --- a/arch/x86/mm/ioremap.c
> +++ b/arch/x86/mm/ioremap.c
> @@ -13,6 +13,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> @@ -106,9 +107,11 @@ static void __iomem *__ioremap_caller(resource_size_t 
> phys_addr,
>   }
>  
>   /*
> -  * Don't remap the low PCI/ISA area, it's always mapped..
> +  * Don't remap the low PCI/ISA area, it's always mapped.
> +  *   But if SME is active, skip this so that the encryption bit
> +  *   doesn't get set.
>*/
> - if (is_ISA_range(phys_addr, last_addr))
> + if (is_ISA_range(phys_addr, last_addr) && !sme_active())
>   return (__force void __iomem *)phys_to_virt(phys_addr);

More thoughts about that.

Making this conditional on !sme_active() is not the best idea. I'd rather
remove that whole thing and make it unconditional so the code pathes get
always exercised and any subtle wreckage is detected on a broader base and
not only on that hard to access and debug SME capable machine owned by Joe
User.

Thanks,

tglx

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 1/2] efi: Process MEMATTR table only if EFI_MEMMAP

2017-06-21 Thread Ard Biesheuvel

On 20 June 2017 at 22:14, Daniel Kiper  wrote:
> Otherwise e.g. Xen dom0 on x86_64 EFI platforms crashes.
>
> In theory we can check EFI_PARAVIRT too, however,
> EFI_MEMMAP looks more generic and covers more cases.
>
> Signed-off-by: Daniel Kiper 

Reviewed-by: Ard Biesheuvel 

> ---
>  drivers/firmware/efi/efi.c |3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
> index b372aad..045d6d3 100644
> --- a/drivers/firmware/efi/efi.c
> +++ b/drivers/firmware/efi/efi.c
> @@ -528,7 +528,8 @@ int __init efi_config_parse_tables(void *config_tables, 
> int count, int sz,
> }
> }
>
> -   efi_memattr_init();
> +   if (efi_enabled(EFI_MEMMAP))
> +   efi_memattr_init();
>
> /* Parse the EFI Properties table if it exists */
> if (efi.properties_table != EFI_INVALID_TABLE_ADDR) {
> --
> 1.7.10.4
>

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 2/2] x86/xen/efi: Init only efi struct members used by Xen

2017-06-21 Thread Ard Biesheuvel

On 20 June 2017 at 22:14, Daniel Kiper  wrote:
> Current approach, wholesale efi struct initialization from efi_xen, is not
> good. Usually if new member is defined then it is properly initialized in
> drivers/firmware/efi/efi.c but not in arch/x86/xen/efi.c. As I saw it happened
> a few times until now. So, let's initialize only efi struct members used by
> Xen to avoid such issues in the future.
>
> Signed-off-by: Daniel Kiper 

Acked-by: Ard Biesheuvel 

> ---
>  arch/x86/xen/efi.c |   45 -
>  1 file changed, 12 insertions(+), 33 deletions(-)
>
> diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
> index 30bb2e8..01b9faf 100644
> --- a/arch/x86/xen/efi.c
> +++ b/arch/x86/xen/efi.c
> @@ -54,38 +54,6 @@
> .tables = EFI_INVALID_TABLE_ADDR  /* Initialized later. */
>  };
>
> -static const struct efi efi_xen __initconst = {
> -   .systab   = NULL, /* Initialized later. */
> -   .runtime_version  = 0,/* Initialized later. */
> -   .mps  = EFI_INVALID_TABLE_ADDR,
> -   .acpi = EFI_INVALID_TABLE_ADDR,
> -   .acpi20   = EFI_INVALID_TABLE_ADDR,
> -   .smbios   = EFI_INVALID_TABLE_ADDR,
> -   .smbios3  = EFI_INVALID_TABLE_ADDR,
> -   .sal_systab   = EFI_INVALID_TABLE_ADDR,
> -   .boot_info= EFI_INVALID_TABLE_ADDR,
> -   .hcdp = EFI_INVALID_TABLE_ADDR,
> -   .uga  = EFI_INVALID_TABLE_ADDR,
> -   .uv_systab= EFI_INVALID_TABLE_ADDR,
> -   .fw_vendor= EFI_INVALID_TABLE_ADDR,
> -   .runtime  = EFI_INVALID_TABLE_ADDR,
> -   .config_table = EFI_INVALID_TABLE_ADDR,
> -   .get_time = xen_efi_get_time,
> -   .set_time = xen_efi_set_time,
> -   .get_wakeup_time  = xen_efi_get_wakeup_time,
> -   .set_wakeup_time  = xen_efi_set_wakeup_time,
> -   .get_variable = xen_efi_get_variable,
> -   .get_next_variable= xen_efi_get_next_variable,
> -   .set_variable = xen_efi_set_variable,
> -   .query_variable_info  = xen_efi_query_variable_info,
> -   .update_capsule   = xen_efi_update_capsule,
> -   .query_capsule_caps   = xen_efi_query_capsule_caps,
> -   .get_next_high_mono_count = xen_efi_get_next_high_mono_count,
> -   .reset_system = xen_efi_reset_system,
> -   .set_virtual_address_map  = NULL, /* Not used under Xen. */
> -   .flags= 0 /* Initialized later. */
> -};
> -
>  static efi_system_table_t __init *xen_efi_probe(void)
>  {
> struct xen_platform_op op = {
> @@ -102,7 +70,18 @@ static efi_system_table_t __init *xen_efi_probe(void)
>
> /* Here we know that Xen runs on EFI platform. */
>
> -   efi = efi_xen;
> +   efi.get_time = xen_efi_get_time;
> +   efi.set_time = xen_efi_set_time;
> +   efi.get_wakeup_time = xen_efi_get_wakeup_time;
> +   efi.set_wakeup_time = xen_efi_set_wakeup_time;
> +   efi.get_variable = xen_efi_get_variable;
> +   efi.get_next_variable = xen_efi_get_next_variable;
> +   efi.set_variable = xen_efi_set_variable;
> +   efi.query_variable_info = xen_efi_query_variable_info;
> +   efi.update_capsule = xen_efi_update_capsule;
> +   efi.query_capsule_caps = xen_efi_query_capsule_caps;
> +   efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count;
> +   efi.reset_system = xen_efi_reset_system;
>
> efi_systab_xen.tables = info->cfg.addr;
> efi_systab_xen.nr_tables = info->cfg.nent;
> --
> 1.7.10.4
>

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 2/2] x86/xen/efi: Init only efi struct members used by Xen

2017-06-21 Thread Andrew Cooper

On 20/06/2017 21:14, Daniel Kiper wrote:
> Current approach, wholesale efi struct initialization from efi_xen, is not
> good. Usually if new member is defined then it is properly initialized in
> drivers/firmware/efi/efi.c but not in arch/x86/xen/efi.c. As I saw it happened
> a few times until now. So, let's initialize only efi struct members used by
> Xen to avoid such issues in the future.
>
> Signed-off-by: Daniel Kiper 
> ---
>  arch/x86/xen/efi.c |   45 -
>  1 file changed, 12 insertions(+), 33 deletions(-)
>
> diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
> index 30bb2e8..01b9faf 100644
> --- a/arch/x86/xen/efi.c
> +++ b/arch/x86/xen/efi.c
> @@ -54,38 +54,6 @@
>   .tables = EFI_INVALID_TABLE_ADDR  /* Initialized later. */
>  };
>  
> -static const struct efi efi_xen __initconst = {
> - .systab   = NULL, /* Initialized later. */
> - .runtime_version  = 0,/* Initialized later. */
> - .mps  = EFI_INVALID_TABLE_ADDR,
> - .acpi = EFI_INVALID_TABLE_ADDR,
> - .acpi20   = EFI_INVALID_TABLE_ADDR,
> - .smbios   = EFI_INVALID_TABLE_ADDR,
> - .smbios3  = EFI_INVALID_TABLE_ADDR,
> - .sal_systab   = EFI_INVALID_TABLE_ADDR,
> - .boot_info= EFI_INVALID_TABLE_ADDR,
> - .hcdp = EFI_INVALID_TABLE_ADDR,
> - .uga  = EFI_INVALID_TABLE_ADDR,
> - .uv_systab= EFI_INVALID_TABLE_ADDR,
> - .fw_vendor= EFI_INVALID_TABLE_ADDR,
> - .runtime  = EFI_INVALID_TABLE_ADDR,
> - .config_table = EFI_INVALID_TABLE_ADDR,
> - .get_time = xen_efi_get_time,
> - .set_time = xen_efi_set_time,
> - .get_wakeup_time  = xen_efi_get_wakeup_time,
> - .set_wakeup_time  = xen_efi_set_wakeup_time,
> - .get_variable = xen_efi_get_variable,
> - .get_next_variable= xen_efi_get_next_variable,
> - .set_variable = xen_efi_set_variable,
> - .query_variable_info  = xen_efi_query_variable_info,
> - .update_capsule   = xen_efi_update_capsule,
> - .query_capsule_caps   = xen_efi_query_capsule_caps,
> - .get_next_high_mono_count = xen_efi_get_next_high_mono_count,
> - .reset_system = xen_efi_reset_system,
> - .set_virtual_address_map  = NULL, /* Not used under Xen. */
> - .flags= 0 /* Initialized later. */
> -};
> -
>  static efi_system_table_t __init *xen_efi_probe(void)
>  {
>   struct xen_platform_op op = {
> @@ -102,7 +70,18 @@ static efi_system_table_t __init *xen_efi_probe(void)
>  
>   /* Here we know that Xen runs on EFI platform. */
>  
> - efi = efi_xen;
> + efi.get_time = xen_efi_get_time;
> + efi.set_time = xen_efi_set_time;
> + efi.get_wakeup_time = xen_efi_get_wakeup_time;
> + efi.set_wakeup_time = xen_efi_set_wakeup_time;
> + efi.get_variable = xen_efi_get_variable;
> + efi.get_next_variable = xen_efi_get_next_variable;
> + efi.set_variable = xen_efi_set_variable;
> + efi.query_variable_info = xen_efi_query_variable_info;
> + efi.update_capsule = xen_efi_update_capsule;
> + efi.query_capsule_caps = xen_efi_query_capsule_caps;
> + efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count;
> + efi.reset_system = xen_efi_reset_system;

This presumably means that the system default values are already present
in efi at the point that we overwrite some Xen specifics?

If so, surely you need to retain the clobbering of set_virtual_address_map ?

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v7 10/36] x86/mm: Provide general kernel support for memory encryption

2017-06-21 Thread Borislav Petkov

On Wed, Jun 21, 2017 at 09:18:59AM +0200, Thomas Gleixner wrote:
> That looks wrong. It's not decrypted it's rather unencrypted, right?

Yeah, it previous versions of the patchset, "decrypted" and
"unencrypted" were both present so we settled on "decrypted" for the
nomenclature.

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] Lockup/High ksoftirqd when rate-limiting is enabled

2017-06-21 Thread Jean-Louis Dupond


Thanks for this quick patch.
I was able to test it today, and the high ksoftirqd cpu usage is gone.

Great!

Is there a chance this can get pushed into stable kernel versions 
(3.18.x, 4.4.x, etc)?
There is not really a backport work, as the netback driver hasn't 
changed alot recently.



Tested-by: Jean-Louis Dupond 


Op 2017-06-20 13:18, schreef Wei Liu:

On Tue, Jun 20, 2017 at 11:31:02AM +0200, Jean-Louis Dupond wrote:

Hi,

As requested via IRC i'm sending this to xen-devel & netback 
maintainers.


We are using Xen 4.4.4-23.el6 with kernel 3.18.44-20.el6.x86_64.
Now recently we're having issues with rate-limiting enabled.

When we enable rate limiting in Xen, and then do alot of outbound 
traffic on

the domU, we notice a high ksoftirqd load.
But in some cases the system locks up completely.



Can you give this patch a try?

---8<--
From a242d4a74cc4ec46c5e3d43dd07eb146be4ca233 Mon Sep 17 00:00:00 2001
From: Wei Liu 
Date: Tue, 20 Jun 2017 11:49:28 +0100
Subject: [PATCH] xen-netback: correctly schedule rate-limited queues

Add a flag to indicate if a queue is rate-limited. Test the flag in
NAPI poll handler and avoid rescheduling the queue if true, otherwise
we risk locking up the host. The rescheduling shall be done when
replenishing credit.

Reported-by: Jean-Louis Dupond 
Signed-off-by: Wei Liu 
---
 drivers/net/xen-netback/common.h| 1 +
 drivers/net/xen-netback/interface.c | 6 +-
 drivers/net/xen-netback/netback.c   | 6 +-
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/net/xen-netback/common.h 
b/drivers/net/xen-netback/common.h

index 530586be05b4..5b1d2e8402d9 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -199,6 +199,7 @@ struct xenvif_queue { /* Per-queue data for xenvif 
*/

unsigned long   remaining_credit;
struct timer_list credit_timeout;
u64 credit_window_start;
+   bool rate_limited;

/* Statistics */
struct xenvif_stats stats;
diff --git a/drivers/net/xen-netback/interface.c
b/drivers/net/xen-netback/interface.c
index 8397f6c92451..e322a862ddfe 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -106,7 +106,11 @@ static int xenvif_poll(struct napi_struct *napi,
int budget)

if (work_done < budget) {
napi_complete_done(napi, work_done);
-   xenvif_napi_schedule_or_enable_events(queue);
+   /* If the queue is rate-limited, it shall be
+* rescheduled in the timer callback.
+*/
+   if (likely(!queue->rate_limited))
+   xenvif_napi_schedule_or_enable_events(queue);
}

return work_done;
diff --git a/drivers/net/xen-netback/netback.c
b/drivers/net/xen-netback/netback.c
index 602d408fa25e..5042ff8d449a 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -180,6 +180,7 @@ static void tx_add_credit(struct xenvif_queue 
*queue)

max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */

queue->remaining_credit = min(max_credit, max_burst);
+   queue->rate_limited = false;
 }

 void xenvif_tx_credit_callback(unsigned long data)
@@ -686,8 +687,10 @@ static bool tx_credit_exceeded(struct
xenvif_queue *queue, unsigned size)
msecs_to_jiffies(queue->credit_usec / 1000);

/* Timer could already be pending in rare cases. */
-   if (timer_pending(&queue->credit_timeout))
+   if (timer_pending(&queue->credit_timeout)) {
+   queue->rate_limited = true;
return true;
+   }

/* Passed the point where we can replenish credit? */
if (time_after_eq64(now, next_credit)) {
@@ -702,6 +705,7 @@ static bool tx_credit_exceeded(struct xenvif_queue
*queue, unsigned size)
mod_timer(&queue->credit_timeout,
  next_credit);
queue->credit_window_start = next_credit;
+   queue->rate_limited = true;

return true;
}


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v7 20/36] x86, mpparse: Use memremap to map the mpf and mpc data

2017-06-21 Thread Borislav Petkov

On Fri, Jun 16, 2017 at 01:53:38PM -0500, Tom Lendacky wrote:
> The SMP MP-table is built by UEFI and placed in memory in a decrypted
> state. These tables are accessed using a mix of early_memremap(),
> early_memunmap(), phys_to_virt() and virt_to_phys(). Change all accesses
> to use early_memremap()/early_memunmap(). This allows for proper setting
> of the encryption mask so that the data can be successfully accessed when
> SME is active.
> 
> Signed-off-by: Tom Lendacky 
> ---
>  arch/x86/kernel/mpparse.c |   98 
> -
>  1 file changed, 70 insertions(+), 28 deletions(-)

Reviewed-by: Borislav Petkov 

Please put the conversion to pr_fmt() on the TODO list for later.

Thanks.

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 2/2] xen/input: add multi-touch support

2017-06-21 Thread Oleksandr Andrushchenko


Hi, Dmitry!

On 06/21/2017 10:24 AM, Dmitry Torokhov wrote:

On Thu, Jun 08, 2017 at 09:45:18AM +0300, Oleksandr Andrushchenko wrote:

Hi, Dmitry!

On 06/07/2017 07:56 PM, Dmitry Torokhov wrote:

On Wed, May 31, 2017 at 12:06:56PM +0300, Oleksandr Andrushchenko wrote:

Hi, Dmitry!

On 05/30/2017 07:37 PM, Dmitry Torokhov wrote:

On Tue, May 30, 2017 at 03:50:20PM +0300, Oleksandr Andrushchenko wrote:

Hi, Dmitry!

On 05/30/2017 08:51 AM, Dmitry Torokhov wrote:

On Fri, Apr 21, 2017 at 09:40:36AM +0300, Oleksandr Andrushchenko wrote:

Hi, Dmitry!

On 04/21/2017 05:10 AM, Dmitry Torokhov wrote:

Hi Oleksandr,

On Thu, Apr 13, 2017 at 02:38:04PM +0300, Oleksandr Andrushchenko wrote:

From: Oleksandr Andrushchenko 

Extend xen_kbdfront to provide multi-touch support
to unprivileged domains.

Signed-off-by: Oleksandr Andrushchenko 
---
  drivers/input/misc/xen-kbdfront.c | 142 +-
  1 file changed, 140 insertions(+), 2 deletions(-)

diff --git a/drivers/input/misc/xen-kbdfront.c 
b/drivers/input/misc/xen-kbdfront.c
index 01c27b4c3288..e5d064aaa237 100644
--- a/drivers/input/misc/xen-kbdfront.c
+++ b/drivers/input/misc/xen-kbdfront.c
@@ -17,6 +17,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
@@ -34,11 +35,14 @@
  struct xenkbd_info {
struct input_dev *kbd;
struct input_dev *ptr;
+   struct input_dev *mtouch;
struct xenkbd_page *page;
int gref;
int irq;
struct xenbus_device *xbdev;
char phys[32];
+   /* current MT slot/contact ID we are injecting events in */
+   int mtouch_cur_contact_id;
  };
  enum { KPARAM_X, KPARAM_Y, KPARAM_CNT };
@@ -47,6 +51,12 @@ module_param_array(ptr_size, int, NULL, 0444);
  MODULE_PARM_DESC(ptr_size,
"Pointing device width, height in pixels (default 800,600)");
+enum { KPARAM_MT_X, KPARAM_MT_Y, KPARAM_MT_CNT };
+static int mtouch_size[KPARAM_MT_CNT] = { XENFB_WIDTH, XENFB_HEIGHT };
+module_param_array(mtouch_size, int, NULL, 0444);
+MODULE_PARM_DESC(ptr_size,
+   "Multi-touch device width, height in pixels (default 800,600)");
+

Why do you need separate module parameters for multi-touch device?

please see below

  static int xenkbd_remove(struct xenbus_device *);
  static int xenkbd_connect_backend(struct xenbus_device *, struct xenkbd_info 
*);
  static void xenkbd_disconnect_backend(struct xenkbd_info *);
@@ -100,6 +110,60 @@ static irqreturn_t input_handler(int rq, void *dev_id)
input_report_rel(dev, REL_WHEEL,
 -event->pos.rel_z);
break;
+   case XENKBD_TYPE_MTOUCH:
+   dev = info->mtouch;
+   if (unlikely(!dev))
+   break;
+   if (unlikely(event->mtouch.contact_id !=
+   info->mtouch_cur_contact_id)) {

Why is this unlikely? Does contact ID changes once in 1000 packets or
even less?

Mu assumption was that regardless of the fact that we are multi-touch
device still single touches will come in more frequently
But I can remove *unlikely* if my assumption is not correct

I think the normal expectation is that "unlikely" is supposed for
something that happens once in a blue moon, so I'd rather remove it.


agree, removed "unlikely"

+   info->mtouch_cur_contact_id =
+   event->mtouch.contact_id;
+   input_mt_slot(dev, event->mtouch.contact_id);
+   }
+   switch (event->mtouch.event_type) {
+   case XENKBD_MT_EV_DOWN:
+   input_mt_report_slot_state(dev, MT_TOOL_FINGER,
+  true);

Should we establish tool event? We have MT_TOOL_PEN, etc.

I think that for multi-touch MT_TOOL_FINGER is enough
any reason we would also want MT_TOOL_PEN here?

Why would not you? Let's say you have a drawing application running in
guest that can make use of tool types. Why would not you want to tell it
that the tool user is currently using is in fact a pen and not finger?

But it is a finger :) we are multi-touch, not multi pen

So for tablets that support both touch and stylus you would export them
as 2 separate devices?

this could be done in different ways, but please see on
pen support below

Besides, that, if I am about to implement pen support
(which I still not convinced we really need), how will I
do that?

I do not know what you have on the backend side, but roughly speaking if
you detect a pen/stylus you let your guest know that the contact is not
a finger, but pen. How you plumb it through is up to you.

we do not detect pen, only finger at the moment
and the existing protocol has no means to tell
type of the tool used, everything is supposed to
be "finger", so front-end has no possi

Re: [Xen-devel] [PATCH v4 04/27] x86: move PV invalid op emulation code

2017-06-21 Thread Wei Liu

On Wed, Jun 21, 2017 at 12:15:46AM -0600, Jan Beulich wrote:
> >>> On 20.06.17 at 18:25,  wrote:
> > On Tue, Jun 20, 2017 at 10:21:27AM -0600, Jan Beulich wrote:
> >> >>> On 08.06.17 at 19:11,  wrote:
> >> > @@ -1053,8 +982,8 @@ void do_invalid_op(struct cpu_user_regs *regs)
> >> >  
> >> >  if ( likely(guest_mode(regs)) )
> >> >  {
> >> > -if ( !emulate_invalid_rdtscp(regs) &&
> >> > - !emulate_forced_invalid_op(regs) )
> >> > +if ( !pv_emulate_invalid_rdtscp(regs) &&
> >> > + !pv_emulate_forced_invalid_op(regs) )
> >> 
> >> I wonder if the first couldn't be called by the second, making it
> >> unnecessary to export both. Or maybe have a wrapper
> >> pv_emulate_invalid_op() around both.
> >> 
> > 
> > Do you want me to refactor and move code in the same patch? Wouldn't
> > that make it hard for you to review?
> 
> Why - especially in the wrapper variant you'd move both functions
> unchanged (perhaps even with the names left as they are), and
> merely add the wrapper (and of course use it in the code fragment
> above). That'll make review rather simple, as you'll still be able to
> state that you left both existing functions unchanged.

OK

---8<---
From 50dfe1fe116c28a3953f0b72acc7b1dee4136e2b Mon Sep 17 00:00:00 2001
From: Wei Liu 
Date: Mon, 5 Jun 2017 13:07:16 +0100
Subject: [PATCH] x86: move PV invalid op emulation code

Move the code to pv/emul-inv-op.c. Both functions are unchanged.
Provide pv_emulate_invalid_op and use it in traps.c.

Signed-off-by: Wei Liu 
---
 xen/arch/x86/pv/Makefile   |   1 +
 xen/arch/x86/pv/emul-inv-op.c  | 128 +
 xen/arch/x86/traps.c   |  74 +---
 xen/include/asm-x86/pv/traps.h |   2 +
 4 files changed, 132 insertions(+), 73 deletions(-)
 create mode 100644 xen/arch/x86/pv/emul-inv-op.c

diff --git a/xen/arch/x86/pv/Makefile b/xen/arch/x86/pv/Makefile
index 1f6fbd3f5c..42ca64dc9e 100644
--- a/xen/arch/x86/pv/Makefile
+++ b/xen/arch/x86/pv/Makefile
@@ -5,5 +5,6 @@ obj-bin-y += dom0_build.init.o
 obj-y += domain.o
 obj-y += emulate.o
 obj-y += emul-gate-op.o
+obj-y += emul-inv-op.o
 obj-y += emul-priv-op.o
 obj-bin-y += gpr_switch.o
diff --git a/xen/arch/x86/pv/emul-inv-op.c b/xen/arch/x86/pv/emul-inv-op.c
new file mode 100644
index 00..a1c56da171
--- /dev/null
+++ b/xen/arch/x86/pv/emul-inv-op.c
@@ -0,0 +1,128 @@
+/**
+ * arch/x86/pv/emul-inv-op.c
+ *
+ * Emulate invalid op for PV guests
+ *
+ * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see .
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "emulate.h"
+
+static int emulate_invalid_rdtscp(struct cpu_user_regs *regs)
+{
+char opcode[3];
+unsigned long eip, rc;
+struct vcpu *v = current;
+
+eip = regs->rip;
+if ( (rc = copy_from_user(opcode, (char *)eip, sizeof(opcode))) != 0 )
+{
+pv_inject_page_fault(0, eip + sizeof(opcode) - rc);
+return EXCRET_fault_fixed;
+}
+if ( memcmp(opcode, "\xf\x1\xf9", sizeof(opcode)) )
+return 0;
+eip += sizeof(opcode);
+pv_soft_rdtsc(v, regs, 1);
+pv_emul_instruction_done(regs, eip);
+return EXCRET_fault_fixed;
+}
+
+static int emulate_forced_invalid_op(struct cpu_user_regs *regs)
+{
+char sig[5], instr[2];
+unsigned long eip, rc;
+struct cpuid_leaf res;
+
+eip = regs->rip;
+
+/* Check for forced emulation signature: ud2 ; .ascii "xen". */
+if ( (rc = copy_from_user(sig, (char *)eip, sizeof(sig))) != 0 )
+{
+pv_inject_page_fault(0, eip + sizeof(sig) - rc);
+return EXCRET_fault_fixed;
+}
+if ( memcmp(sig, "\xf\xbxen", sizeof(sig)) )
+return 0;
+eip += sizeof(sig);
+
+/* We only emulate CPUID. */
+if ( ( rc = copy_from_user(instr, (char *)eip, sizeof(instr))) != 0 )
+{
+pv_inject_page_fault(0, eip + sizeof(instr) - rc);
+return EXCRET_fault_fixed;
+}
+if ( memcmp(instr, "\xf\xa2", sizeof(instr)) )
+return 0;
+
+/* If cpuid faulting is enabled and CPL>0 inject a #GP in place of #UD. */
+

Re: [Xen-devel] [PATCH v4 04/27] x86: move PV invalid op emulation code

2017-06-21 Thread Jan Beulich

>>> On 21.06.17 at 10:57,  wrote:
> +int pv_emulate_invalid_op(struct cpu_user_regs *regs)
> +{
> +return !emulate_invalid_rdtscp(regs) && !emulate_forced_invalid_op(regs);
> +}

This way you want to make the function return bool. Alternatively
you would want to preserve the EXCRET_* return value here, and
handle it accordingly in the caller.

Jan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] Lockup/High ksoftirqd when rate-limiting is enabled

2017-06-21 Thread Wei Liu

On Wed, Jun 21, 2017 at 10:35:11AM +0200, Jean-Louis Dupond wrote:
> Thanks for this quick patch.
> I was able to test it today, and the high ksoftirqd cpu usage is gone.
> 
> Great!
> 
> Is there a chance this can get pushed into stable kernel versions (3.18.x,
> 4.4.x, etc)?
> There is not really a backport work, as the netback driver hasn't changed
> alot recently.

3.18 is EOL. I think it will eventually trickle down to all 4.X longterm
kernels.

> 
> 
> Tested-by: Jean-Louis Dupond 
> 

Thanks. I will submit this to netdev soon.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v4 04/27] x86: move PV invalid op emulation code

2017-06-21 Thread Wei Liu

On Wed, Jun 21, 2017 at 03:09:41AM -0600, Jan Beulich wrote:
> >>> On 21.06.17 at 10:57,  wrote:
> > +int pv_emulate_invalid_op(struct cpu_user_regs *regs)
> > +{
> > +return !emulate_invalid_rdtscp(regs) && 
> > !emulate_forced_invalid_op(regs);
> > +}
> 
> This way you want to make the function return bool. Alternatively
> you would want to preserve the EXCRET_* return value here, and
> handle it accordingly in the caller.
> 

I will just make it return bool. Do you want me to send another version?

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 1/3] xen-disk: only advertize feature-persistent if grant copy is not available

2017-06-21 Thread Roger Pau Monné

On Tue, Jun 20, 2017 at 03:19:33PM -0700, Stefano Stabellini wrote:
> On Tue, 20 Jun 2017, Paul Durrant wrote:
> > If grant copy is available then it will always be used in preference to
> > persistent maps. In this case feature-persistent should not be advertized
> > to the frontend, otherwise it may needlessly copy data into persistently
> > granted buffers.
> > 
> > Signed-off-by: Paul Durrant 
> 
> CC'ing Roger.
> 
> It is true that using feature-persistent together with grant copies is a
> a very bad idea.
> 
> But this change enstablishes an explicit preference of
> feature_grant_copy over feature-persistent in the xen_disk backend. It
> is not obvious to me that it should be the case.
> 
> Why is feature_grant_copy (without feature-persistent) better than
> feature-persistent (without feature_grant_copy)? Shouldn't we simply
> avoid grant copies to copy data to persistent grants?

When using persistent grants the frontend must always copy data from
the buffer to the persistent grant, there's no way to avoid this.

Using grant_copy we move the copy from the frontend to the backend,
which means the CPU time of the copy is accounted to the backend. This
is not ideal, but IMHO it's better than persistent grants because it
avoids keeping a pool of mapped grants that consume memory and make
the code more complex.

Do you have some performance data showing the difference between
persistent grants vs grant copy?

Roger.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH net] xen-netback: correctly schedule rate-limited queues

2017-06-21 Thread Wei Liu

Add a flag to indicate if a queue is rate-limited. Test the flag in
NAPI poll handler and avoid rescheduling the queue if true, otherwise
we risk locking up the host. The rescheduling will be done in the
timer callback function.

Reported-by: Jean-Louis Dupond 
Signed-off-by: Wei Liu 
Tested-by: Jean-Louis Dupond 
---
 drivers/net/xen-netback/common.h| 1 +
 drivers/net/xen-netback/interface.c | 6 +-
 drivers/net/xen-netback/netback.c   | 6 +-
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 530586be05b4..5b1d2e8402d9 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -199,6 +199,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
unsigned long   remaining_credit;
struct timer_list credit_timeout;
u64 credit_window_start;
+   bool rate_limited;
 
/* Statistics */
struct xenvif_stats stats;
diff --git a/drivers/net/xen-netback/interface.c 
b/drivers/net/xen-netback/interface.c
index 8397f6c92451..e322a862ddfe 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -106,7 +106,11 @@ static int xenvif_poll(struct napi_struct *napi, int 
budget)
 
if (work_done < budget) {
napi_complete_done(napi, work_done);
-   xenvif_napi_schedule_or_enable_events(queue);
+   /* If the queue is rate-limited, it shall be
+* rescheduled in the timer callback.
+*/
+   if (likely(!queue->rate_limited))
+   xenvif_napi_schedule_or_enable_events(queue);
}
 
return work_done;
diff --git a/drivers/net/xen-netback/netback.c 
b/drivers/net/xen-netback/netback.c
index 602d408fa25e..5042ff8d449a 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -180,6 +180,7 @@ static void tx_add_credit(struct xenvif_queue *queue)
max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
 
queue->remaining_credit = min(max_credit, max_burst);
+   queue->rate_limited = false;
 }
 
 void xenvif_tx_credit_callback(unsigned long data)
@@ -686,8 +687,10 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, 
unsigned size)
msecs_to_jiffies(queue->credit_usec / 1000);
 
/* Timer could already be pending in rare cases. */
-   if (timer_pending(&queue->credit_timeout))
+   if (timer_pending(&queue->credit_timeout)) {
+   queue->rate_limited = true;
return true;
+   }
 
/* Passed the point where we can replenish credit? */
if (time_after_eq64(now, next_credit)) {
@@ -702,6 +705,7 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, 
unsigned size)
mod_timer(&queue->credit_timeout,
  next_credit);
queue->credit_window_start = next_credit;
+   queue->rate_limited = true;
 
return true;
}
-- 
2.11.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 2/2] x86/xen/efi: Init only efi struct members used by Xen

2017-06-21 Thread Ingo Molnar


* Daniel Kiper  wrote:

> -static const struct efi efi_xen __initconst = {
> - .systab   = NULL, /* Initialized later. */
> - .runtime_version  = 0,/* Initialized later. */
> - .mps  = EFI_INVALID_TABLE_ADDR,
> - .acpi = EFI_INVALID_TABLE_ADDR,
> - .acpi20   = EFI_INVALID_TABLE_ADDR,
> - .smbios   = EFI_INVALID_TABLE_ADDR,
> - .smbios3  = EFI_INVALID_TABLE_ADDR,
> - .sal_systab   = EFI_INVALID_TABLE_ADDR,
> - .boot_info= EFI_INVALID_TABLE_ADDR,
> - .hcdp = EFI_INVALID_TABLE_ADDR,
> - .uga  = EFI_INVALID_TABLE_ADDR,
> - .uv_systab= EFI_INVALID_TABLE_ADDR,
> - .fw_vendor= EFI_INVALID_TABLE_ADDR,
> - .runtime  = EFI_INVALID_TABLE_ADDR,
> - .config_table = EFI_INVALID_TABLE_ADDR,
> - .get_time = xen_efi_get_time,
> - .set_time = xen_efi_set_time,
> - .get_wakeup_time  = xen_efi_get_wakeup_time,
> - .set_wakeup_time  = xen_efi_set_wakeup_time,
> - .get_variable = xen_efi_get_variable,
> - .get_next_variable= xen_efi_get_next_variable,
> - .set_variable = xen_efi_set_variable,
> - .query_variable_info  = xen_efi_query_variable_info,
> - .update_capsule   = xen_efi_update_capsule,
> - .query_capsule_caps   = xen_efi_query_capsule_caps,
> - .get_next_high_mono_count = xen_efi_get_next_high_mono_count,
> - .reset_system = xen_efi_reset_system,
> - .set_virtual_address_map  = NULL, /* Not used under Xen. */
> - .flags= 0 /* Initialized later. */
> -};
> -
>  static efi_system_table_t __init *xen_efi_probe(void)
>  {
>   struct xen_platform_op op = {
> @@ -102,7 +70,18 @@ static efi_system_table_t __init *xen_efi_probe(void)
>  
>   /* Here we know that Xen runs on EFI platform. */
>  
> - efi = efi_xen;
> + efi.get_time = xen_efi_get_time;
> + efi.set_time = xen_efi_set_time;
> + efi.get_wakeup_time = xen_efi_get_wakeup_time;
> + efi.set_wakeup_time = xen_efi_set_wakeup_time;
> + efi.get_variable = xen_efi_get_variable;
> + efi.get_next_variable = xen_efi_get_next_variable;
> + efi.set_variable = xen_efi_set_variable;
> + efi.query_variable_info = xen_efi_query_variable_info;
> + efi.update_capsule = xen_efi_update_capsule;
> + efi.query_capsule_caps = xen_efi_query_capsule_caps;
> + efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count;
> + efi.reset_system = xen_efi_reset_system;

This is a step back stylistically, as you lost the nice vertical tabulation of 
the 
original initializer ...

Thanks,

Ingo

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH 00/11] assorted follow-ups to recent XSAs

2017-06-21 Thread Jan Beulich

01: public: adjust documentation following XSA-217
02: gnttab: remove redundant xenheap check from gnttab_transfer()
03: make steal_page() return a proper error value
04: domctl: restrict DOMCTL_set_target to HVM domains
05: evtchn: convert evtchn_port_is_*() to plain bool
06: ARM: simplify page type handling
07: x86: fold identical error paths in xenmem_add_to_physmap_one()
08: gnttab: remove host map in the event of a grant_map failure
09: gnttab: avoid spurious maptrack handle allocation failures
10: gnttab: limit mapkind()'s iteration count
11: gnttab: drop useless locking

Signed-off-by: Jan Beulich 


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v4 04/27] x86: move PV invalid op emulation code

2017-06-21 Thread Jan Beulich

>>> On 21.06.17 at 11:14,  wrote:
> On Wed, Jun 21, 2017 at 03:09:41AM -0600, Jan Beulich wrote:
>> >>> On 21.06.17 at 10:57,  wrote:
>> > +int pv_emulate_invalid_op(struct cpu_user_regs *regs)
>> > +{
>> > +return !emulate_invalid_rdtscp(regs) && 
>> > !emulate_forced_invalid_op(regs);
>> > +}
>> 
>> This way you want to make the function return bool. Alternatively
>> you would want to preserve the EXCRET_* return value here, and
>> handle it accordingly in the caller.
>> 
> 
> I will just make it return bool. Do you want me to send another version?

No need to I think - feel free to add my ack.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v4 04/27] x86: move PV invalid op emulation code

2017-06-21 Thread Wei Liu

On Wed, Jun 21, 2017 at 03:26:12AM -0600, Jan Beulich wrote:
> >>> On 21.06.17 at 11:14,  wrote:
> > On Wed, Jun 21, 2017 at 03:09:41AM -0600, Jan Beulich wrote:
> >> >>> On 21.06.17 at 10:57,  wrote:
> >> > +int pv_emulate_invalid_op(struct cpu_user_regs *regs)
> >> > +{
> >> > +return !emulate_invalid_rdtscp(regs) && 
> >> > !emulate_forced_invalid_op(regs);
> >> > +}
> >> 
> >> This way you want to make the function return bool. Alternatively
> >> you would want to preserve the EXCRET_* return value here, and
> >> handle it accordingly in the caller.
> >> 
> > 
> > I will just make it return bool. Do you want me to send another version?
> 
> No need to I think - feel free to add my ack.
> 

Cool. Thank you.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 2/2] x86/xen/efi: Init only efi struct members used by Xen

2017-06-21 Thread Daniel Kiper

On Wed, Jun 21, 2017 at 09:10:51AM +0100, Andrew Cooper wrote:
> On 20/06/2017 21:14, Daniel Kiper wrote:
> > Current approach, wholesale efi struct initialization from efi_xen, is not
> > good. Usually if new member is defined then it is properly initialized in
> > drivers/firmware/efi/efi.c but not in arch/x86/xen/efi.c. As I saw it 
> > happened
> > a few times until now. So, let's initialize only efi struct members used by
> > Xen to avoid such issues in the future.
> >
> > Signed-off-by: Daniel Kiper 
> > ---
> >  arch/x86/xen/efi.c |   45 -
> >  1 file changed, 12 insertions(+), 33 deletions(-)
> >
> > diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
> > index 30bb2e8..01b9faf 100644
> > --- a/arch/x86/xen/efi.c
> > +++ b/arch/x86/xen/efi.c
> > @@ -54,38 +54,6 @@
> > .tables = EFI_INVALID_TABLE_ADDR  /* Initialized later. */
> >  };
> >
> > -static const struct efi efi_xen __initconst = {
> > -   .systab   = NULL, /* Initialized later. */
> > -   .runtime_version  = 0,/* Initialized later. */
> > -   .mps  = EFI_INVALID_TABLE_ADDR,
> > -   .acpi = EFI_INVALID_TABLE_ADDR,
> > -   .acpi20   = EFI_INVALID_TABLE_ADDR,
> > -   .smbios   = EFI_INVALID_TABLE_ADDR,
> > -   .smbios3  = EFI_INVALID_TABLE_ADDR,
> > -   .sal_systab   = EFI_INVALID_TABLE_ADDR,
> > -   .boot_info= EFI_INVALID_TABLE_ADDR,
> > -   .hcdp = EFI_INVALID_TABLE_ADDR,
> > -   .uga  = EFI_INVALID_TABLE_ADDR,
> > -   .uv_systab= EFI_INVALID_TABLE_ADDR,
> > -   .fw_vendor= EFI_INVALID_TABLE_ADDR,
> > -   .runtime  = EFI_INVALID_TABLE_ADDR,
> > -   .config_table = EFI_INVALID_TABLE_ADDR,
> > -   .get_time = xen_efi_get_time,
> > -   .set_time = xen_efi_set_time,
> > -   .get_wakeup_time  = xen_efi_get_wakeup_time,
> > -   .set_wakeup_time  = xen_efi_set_wakeup_time,
> > -   .get_variable = xen_efi_get_variable,
> > -   .get_next_variable= xen_efi_get_next_variable,
> > -   .set_variable = xen_efi_set_variable,
> > -   .query_variable_info  = xen_efi_query_variable_info,
> > -   .update_capsule   = xen_efi_update_capsule,
> > -   .query_capsule_caps   = xen_efi_query_capsule_caps,
> > -   .get_next_high_mono_count = xen_efi_get_next_high_mono_count,
> > -   .reset_system = xen_efi_reset_system,
> > -   .set_virtual_address_map  = NULL, /* Not used under Xen. */
> > -   .flags= 0 /* Initialized later. */
> > -};
> > -
> >  static efi_system_table_t __init *xen_efi_probe(void)
> >  {
> > struct xen_platform_op op = {
> > @@ -102,7 +70,18 @@ static efi_system_table_t __init *xen_efi_probe(void)
> >
> > /* Here we know that Xen runs on EFI platform. */
> >
> > -   efi = efi_xen;
> > +   efi.get_time = xen_efi_get_time;
> > +   efi.set_time = xen_efi_set_time;
> > +   efi.get_wakeup_time = xen_efi_get_wakeup_time;
> > +   efi.set_wakeup_time = xen_efi_set_wakeup_time;
> > +   efi.get_variable = xen_efi_get_variable;
> > +   efi.get_next_variable = xen_efi_get_next_variable;
> > +   efi.set_variable = xen_efi_set_variable;
> > +   efi.query_variable_info = xen_efi_query_variable_info;
> > +   efi.update_capsule = xen_efi_update_capsule;
> > +   efi.query_capsule_caps = xen_efi_query_capsule_caps;
> > +   efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count;
> > +   efi.reset_system = xen_efi_reset_system;
>
> This presumably means that the system default values are already present
> in efi at the point that we overwrite some Xen specifics?

More or less.

> If so, surely you need to retain the clobbering of set_virtual_address_map ?

Nope, by default efi.set_virtual_address_map is NULL (please take a look
at efi struct initialization in drivers/firmware/efi/efi.c). And it is
not touched if efi_enabled(EFI_PARAVIRT).

Daniel

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH 01/11] public: adjust documentation following XSA-217

2017-06-21 Thread Jan Beulich

Signed-off-by: Jan Beulich 

--- a/xen/include/public/grant_table.h
+++ b/xen/include/public/grant_table.h
@@ -411,12 +411,13 @@ typedef struct gnttab_dump_table gnttab_
 DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t);
 
 /*
- * GNTTABOP_transfer_grant_ref: Transfer  to a foreign domain. The
- * foreign domain has previously registered its interest in the transfer via
- * .
+ * GNTTABOP_transfer: Transfer  to a foreign domain. The foreign domain
+ * has previously registered its interest in the transfer via .
  *
  * Note that, even if the transfer fails, the specified page no longer belongs
  * to the calling domain *unless* the error is GNTST_bad_page.
+ *
+ * Note further that only PV guests can use this operation.
  */
 struct gnttab_transfer {
 /* IN parameters. */
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -102,6 +102,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_memory_reser
  * Returns zero on complete success, otherwise a negative error code.
  * On complete success then always @nr_exchanged == @in.nr_extents.
  * On partial success @nr_exchanged indicates how much work was done.
+ *
+ * Note that only PV guests can use this operation.
  */
 #define XENMEM_exchange 11
 struct xen_memory_exchange {



public: adjust documentation following XSA-217

Signed-off-by: Jan Beulich 

--- a/xen/include/public/grant_table.h
+++ b/xen/include/public/grant_table.h
@@ -411,12 +411,13 @@ typedef struct gnttab_dump_table gnttab_
 DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t);
 
 /*
- * GNTTABOP_transfer_grant_ref: Transfer  to a foreign domain. The
- * foreign domain has previously registered its interest in the transfer via
- * .
+ * GNTTABOP_transfer: Transfer  to a foreign domain. The foreign domain
+ * has previously registered its interest in the transfer via .
  *
  * Note that, even if the transfer fails, the specified page no longer belongs
  * to the calling domain *unless* the error is GNTST_bad_page.
+ *
+ * Note further that only PV guests can use this operation.
  */
 struct gnttab_transfer {
 /* IN parameters. */
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -102,6 +102,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_memory_reser
  * Returns zero on complete success, otherwise a negative error code.
  * On complete success then always @nr_exchanged == @in.nr_extents.
  * On partial success @nr_exchanged indicates how much work was done.
+ *
+ * Note that only PV guests can use this operation.
  */
 #define XENMEM_exchange 11
 struct xen_memory_exchange {
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH 02/11] gnttab: remove redundant xenheap check from gnttab_transfer()

2017-06-21 Thread Jan Beulich

The message isn't very useful, and the check is being done by
steal_page() anyway.

Signed-off-by: Jan Beulich 

--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -1843,15 +1843,6 @@ gnttab_transfer(
 }
 
 page = mfn_to_page(mfn);
-if ( unlikely(is_xen_heap_page(page)) )
-{ 
-put_gfn(d, gop.mfn);
-gdprintk(XENLOG_INFO, "gnttab_transfer: xen frame %lx\n",
-(unsigned long)gop.mfn);
-gop.status = GNTST_bad_page;
-goto copyback;
-}
-
 if ( steal_page(d, page, 0) < 0 )
 {
 put_gfn(d, gop.mfn);



gnttab: remove redundant xenheap check from gnttab_transfer()

The message isn't very useful, and the check is being done by
steal_page() anyway.

Signed-off-by: Jan Beulich 

--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -1843,15 +1843,6 @@ gnttab_transfer(
 }
 
 page = mfn_to_page(mfn);
-if ( unlikely(is_xen_heap_page(page)) )
-{ 
-put_gfn(d, gop.mfn);
-gdprintk(XENLOG_INFO, "gnttab_transfer: xen frame %lx\n",
-(unsigned long)gop.mfn);
-gop.status = GNTST_bad_page;
-goto copyback;
-}
-
 if ( steal_page(d, page, 0) < 0 )
 {
 put_gfn(d, gop.mfn);
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH 03/11] make steal_page() return a proper error value

2017-06-21 Thread Jan Beulich

... and use it where suitable (the tmem caller doesn't propagate an
error code). While it doesn't matter as much, also make donate_page()
follow suit on x86 (on ARM it already returns -ENOSYS).

Also move their declarations to common code and add __must_check.

Signed-off-by: Jan Beulich 

--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -1090,7 +1090,7 @@ int donate_page(struct domain *d, struct
 int steal_page(
 struct domain *d, struct page_info *page, unsigned int memflags)
 {
-return -1;
+return -EOPNOTSUPP;
 }
 
 int page_is_ram_type(unsigned long mfn, unsigned long mem_type)
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4424,7 +4424,7 @@ int donate_page(
  page_to_mfn(page), d->domain_id,
  owner ? owner->domain_id : DOMID_INVALID,
  page->count_info, page->u.inuse.type_info);
-return -1;
+return -EINVAL;
 }
 
 int steal_page(
@@ -4435,7 +4435,7 @@ int steal_page(
 const struct domain *owner = dom_xen;
 
 if ( paging_mode_external(d) )
-return -1;
+return -EOPNOTSUPP;
 
 spin_lock(&d->page_alloc_lock);
 
@@ -4490,7 +4490,7 @@ int steal_page(
  page_to_mfn(page), d->domain_id,
  owner ? owner->domain_id : DOMID_INVALID,
  page->count_info, page->u.inuse.type_info);
-return -1;
+return -EINVAL;
 }
 
 static int __do_update_va_mapping(
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -1843,10 +1843,10 @@ gnttab_transfer(
 }
 
 page = mfn_to_page(mfn);
-if ( steal_page(d, page, 0) < 0 )
+if ( (rc = steal_page(d, page, 0)) < 0 )
 {
 put_gfn(d, gop.mfn);
-gop.status = GNTST_bad_page;
+gop.status = rc == -EINVAL ? GNTST_bad_page : GNTST_general_error;
 goto copyback;
 }
 
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -566,10 +566,10 @@ static long memory_exchange(XEN_GUEST_HA
 
 page = mfn_to_page(mfn);
 
-if ( unlikely(steal_page(d, page, MEMF_no_refcount)) )
+rc = steal_page(d, page, MEMF_no_refcount);
+if ( unlikely(rc) )
 {
 put_gfn(d, gmfn + k);
-rc = -EINVAL;
 goto fail;
 }
 
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -322,11 +322,6 @@ static inline int relinquish_shared_page
 /* Arch-specific portion of memory_op hypercall. */
 long arch_memory_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg);
 
-int steal_page(
-struct domain *d, struct page_info *page, unsigned int memflags);
-int donate_page(
-struct domain *d, struct page_info *page, unsigned int memflags);
-
 #define domain_set_alloc_bitsize(d) ((void)0)
 #define domain_clamp_alloc_bitsize(d, b) (b)
 
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -550,11 +550,6 @@ long subarch_memory_op(unsigned long cmd
 int compat_arch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void));
 int compat_subarch_memory_op(int op, XEN_GUEST_HANDLE_PARAM(void));
 
-int steal_page(
-struct domain *d, struct page_info *page, unsigned int memflags);
-int donate_page(
-struct domain *d, struct page_info *page, unsigned int memflags);
-
 int map_ldt_shadow_page(unsigned int);
 
 #define NIL(type) ((type *)-sizeof(type))
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -567,8 +567,12 @@ int xenmem_add_to_physmap_one(struct dom
   union xen_add_to_physmap_batch_extra extra,
   unsigned long idx, gfn_t gfn);
 
-/* Returns 0 on success, or negative on error. */
+/* Return 0 on success, or negative on error. */
 int __must_check guest_remove_page(struct domain *d, unsigned long gmfn);
+int __must_check steal_page(struct domain *d, struct page_info *page,
+unsigned int memflags);
+int __must_check donate_page(struct domain *d, struct page_info *page,
+ unsigned int memflags);
 
 #define RAM_TYPE_CONVENTIONAL 0x0001
 #define RAM_TYPE_RESERVED 0x0002


make steal_page() return a proper error value

... and use it where suitable (the tmem caller doesn't propagate an
error code). While it doesn't matter as much, also make donate_page()
follow suit on x86 (on ARM it already returns -ENOSYS).

Also move their declarations to common code and add __must_check.

Signed-off-by: Jan Beulich 

--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -1090,7 +1090,7 @@ int donate_page(struct domain *d, struct
 int steal_page(
 struct domain *d, struct page_info *page, unsigned int memflags)
 {
-return -1;
+return -EOPNOTSUPP;
 }
 
 int page_is_ram_type(unsigned long mfn, unsigned long mem_type)
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4424,7 +4424,7 @@ int donate_page(
  page_to_mfn(page), d->domain_id,
  owner ? owner->domain_id : DOMID_INVALID,

Re: [Xen-devel] [PATCH 2/2] x86/xen/efi: Init only efi struct members used by Xen

2017-06-21 Thread Daniel Kiper

On Wed, Jun 21, 2017 at 11:24:06AM +0200, Ingo Molnar wrote:
>
> * Daniel Kiper  wrote:
>
> > -static const struct efi efi_xen __initconst = {
> > -   .systab   = NULL, /* Initialized later. */
> > -   .runtime_version  = 0,/* Initialized later. */
> > -   .mps  = EFI_INVALID_TABLE_ADDR,
> > -   .acpi = EFI_INVALID_TABLE_ADDR,
> > -   .acpi20   = EFI_INVALID_TABLE_ADDR,
> > -   .smbios   = EFI_INVALID_TABLE_ADDR,
> > -   .smbios3  = EFI_INVALID_TABLE_ADDR,
> > -   .sal_systab   = EFI_INVALID_TABLE_ADDR,
> > -   .boot_info= EFI_INVALID_TABLE_ADDR,
> > -   .hcdp = EFI_INVALID_TABLE_ADDR,
> > -   .uga  = EFI_INVALID_TABLE_ADDR,
> > -   .uv_systab= EFI_INVALID_TABLE_ADDR,
> > -   .fw_vendor= EFI_INVALID_TABLE_ADDR,
> > -   .runtime  = EFI_INVALID_TABLE_ADDR,
> > -   .config_table = EFI_INVALID_TABLE_ADDR,
> > -   .get_time = xen_efi_get_time,
> > -   .set_time = xen_efi_set_time,
> > -   .get_wakeup_time  = xen_efi_get_wakeup_time,
> > -   .set_wakeup_time  = xen_efi_set_wakeup_time,
> > -   .get_variable = xen_efi_get_variable,
> > -   .get_next_variable= xen_efi_get_next_variable,
> > -   .set_variable = xen_efi_set_variable,
> > -   .query_variable_info  = xen_efi_query_variable_info,
> > -   .update_capsule   = xen_efi_update_capsule,
> > -   .query_capsule_caps   = xen_efi_query_capsule_caps,
> > -   .get_next_high_mono_count = xen_efi_get_next_high_mono_count,
> > -   .reset_system = xen_efi_reset_system,
> > -   .set_virtual_address_map  = NULL, /* Not used under Xen. */
> > -   .flags= 0 /* Initialized later. */
> > -};
> > -
> >  static efi_system_table_t __init *xen_efi_probe(void)
> >  {
> > struct xen_platform_op op = {
> > @@ -102,7 +70,18 @@ static efi_system_table_t __init *xen_efi_probe(void)
> >
> > /* Here we know that Xen runs on EFI platform. */
> >
> > -   efi = efi_xen;
> > +   efi.get_time = xen_efi_get_time;
> > +   efi.set_time = xen_efi_set_time;
> > +   efi.get_wakeup_time = xen_efi_get_wakeup_time;
> > +   efi.set_wakeup_time = xen_efi_set_wakeup_time;
> > +   efi.get_variable = xen_efi_get_variable;
> > +   efi.get_next_variable = xen_efi_get_next_variable;
> > +   efi.set_variable = xen_efi_set_variable;
> > +   efi.query_variable_info = xen_efi_query_variable_info;
> > +   efi.update_capsule = xen_efi_update_capsule;
> > +   efi.query_capsule_caps = xen_efi_query_capsule_caps;
> > +   efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count;
> > +   efi.reset_system = xen_efi_reset_system;
>
> This is a step back stylistically, as you lost the nice vertical tabulation 
> of the
> original initializer ...

If you wish and others do not object I can realign it back.

Daniel

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH 04/11] domctl: restrict DOMCTL_set_target to HVM domains

2017-06-21 Thread Jan Beulich

Both the XSA-217 fix and
lists.xenproject.org/archives/html/xen-devel/2017-04/msg02945.html
make this assumption, so let's enforce it.

Signed-off-by: Jan Beulich 

--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -1071,7 +1071,9 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
 break;
 }
 
-ret = xsm_set_target(XSM_HOOK, d, e);
+ret = -EOPNOTSUPP;
+if ( is_hvm_domain(e) )
+ret = xsm_set_target(XSM_HOOK, d, e);
 if ( ret ) {
 put_domain(e);
 break;



domctl: restrict DOMCTL_set_target to HVM domains

Both the XSA-217 fix and
lists.xenproject.org/archives/html/xen-devel/2017-04/msg02945.html
make this assumption, so let's enforce it.

Signed-off-by: Jan Beulich 

--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -1071,7 +1071,9 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
 break;
 }
 
-ret = xsm_set_target(XSM_HOOK, d, e);
+ret = -EOPNOTSUPP;
+if ( is_hvm_domain(e) )
+ret = xsm_set_target(XSM_HOOK, d, e);
 if ( ret ) {
 put_domain(e);
 break;
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH 05/11] evtchn: convert evtchn_port_is_*() to plain bool

2017-06-21 Thread Jan Beulich

... at once reducing overall source size by combining some statements
and constifying a few pointers.

Signed-off-by: Jan Beulich 

--- a/xen/common/event_2l.c
+++ b/xen/common/event_2l.c
@@ -61,7 +61,7 @@ static void evtchn_2l_unmask(struct doma
 }
 }
 
-static bool_t evtchn_2l_is_pending(struct domain *d, evtchn_port_t port)
+static bool evtchn_2l_is_pending(const struct domain *d, evtchn_port_t port)
 {
 unsigned int max_ports = BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d);
 
@@ -69,7 +69,7 @@ static bool_t evtchn_2l_is_pending(struc
 return port < max_ports && test_bit(port, &shared_info(d, evtchn_pending));
 }
 
-static bool_t evtchn_2l_is_masked(struct domain *d, evtchn_port_t port)
+static bool evtchn_2l_is_masked(const struct domain *d, evtchn_port_t port)
 {
 unsigned int max_ports = BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d);
 
--- a/xen/common/event_fifo.c
+++ b/xen/common/event_fifo.c
@@ -19,7 +19,7 @@
 
 #include 
 
-static inline event_word_t *evtchn_fifo_word_from_port(struct domain *d,
+static inline event_word_t *evtchn_fifo_word_from_port(const struct domain *d,
unsigned int port)
 {
 unsigned int p, w;
@@ -293,37 +293,25 @@ static void evtchn_fifo_unmask(struct do
 evtchn_fifo_set_pending(v, evtchn);
 }
 
-static bool_t evtchn_fifo_is_pending(struct domain *d, evtchn_port_t port)
+static bool evtchn_fifo_is_pending(const struct domain *d, evtchn_port_t port)
 {
-event_word_t *word;
-
-word = evtchn_fifo_word_from_port(d, port);
-if ( unlikely(!word) )
-return 0;
+const event_word_t *word = evtchn_fifo_word_from_port(d, port);
 
-return test_bit(EVTCHN_FIFO_PENDING, word);
+return word && test_bit(EVTCHN_FIFO_PENDING, word);
 }
 
-static bool_t evtchn_fifo_is_masked(struct domain *d, evtchn_port_t port)
+static bool_t evtchn_fifo_is_masked(const struct domain *d, evtchn_port_t port)
 {
-event_word_t *word;
+const event_word_t *word = evtchn_fifo_word_from_port(d, port);
 
-word = evtchn_fifo_word_from_port(d, port);
-if ( unlikely(!word) )
-return 1;
-
-return test_bit(EVTCHN_FIFO_MASKED, word);
+return !word || test_bit(EVTCHN_FIFO_MASKED, word);
 }
 
-static bool_t evtchn_fifo_is_busy(struct domain *d, evtchn_port_t port)
+static bool_t evtchn_fifo_is_busy(const struct domain *d, evtchn_port_t port)
 {
-event_word_t *word;
-
-word = evtchn_fifo_word_from_port(d, port);
-if ( unlikely(!word) )
-return 0;
+const event_word_t *word = evtchn_fifo_word_from_port(d, port);
 
-return test_bit(EVTCHN_FIFO_LINKED, word);
+return word && test_bit(EVTCHN_FIFO_LINKED, word);
 }
 
 static int evtchn_fifo_set_priority(struct domain *d, struct evtchn *evtchn,
--- a/xen/include/xen/event.h
+++ b/xen/include/xen/event.h
@@ -137,13 +137,13 @@ struct evtchn_port_ops {
 void (*set_pending)(struct vcpu *v, struct evtchn *evtchn);
 void (*clear_pending)(struct domain *d, struct evtchn *evtchn);
 void (*unmask)(struct domain *d, struct evtchn *evtchn);
-bool_t (*is_pending)(struct domain *d, evtchn_port_t port);
-bool_t (*is_masked)(struct domain *d, evtchn_port_t port);
+bool (*is_pending)(const struct domain *d, evtchn_port_t port);
+bool (*is_masked)(const struct domain *d, evtchn_port_t port);
 /*
  * Is the port unavailable because it's still being cleaned up
  * after being closed?
  */
-bool_t (*is_busy)(struct domain *d, evtchn_port_t port);
+bool (*is_busy)(const struct domain *d, evtchn_port_t port);
 int (*set_priority)(struct domain *d, struct evtchn *evtchn,
 unsigned int priority);
 void (*print_state)(struct domain *d, const struct evtchn *evtchn);
@@ -174,23 +174,23 @@ static inline void evtchn_port_unmask(st
 d->evtchn_port_ops->unmask(d, evtchn);
 }
 
-static inline bool_t evtchn_port_is_pending(struct domain *d,
-evtchn_port_t port)
+static inline bool evtchn_port_is_pending(const struct domain *d,
+  evtchn_port_t port)
 {
 return d->evtchn_port_ops->is_pending(d, port);
 }
 
-static inline bool_t evtchn_port_is_masked(struct domain *d,
-   evtchn_port_t port)
+static inline bool evtchn_port_is_masked(const struct domain *d,
+ evtchn_port_t port)
 {
 return d->evtchn_port_ops->is_masked(d, port);
 }
 
-static inline bool_t evtchn_port_is_busy(struct domain *d, evtchn_port_t port)
+static inline bool evtchn_port_is_busy(const struct domain *d,
+   evtchn_port_t port)
 {
-if ( d->evtchn_port_ops->is_busy )
-return d->evtchn_port_ops->is_busy(d, port);
-return 0;
+return d->evtchn_port_ops->is_busy &&
+   d->evtchn_port_ops->is_busy(d, port);
 }
 
 static inline int evtchn_port_set_prio

[Xen-devel] [PATCH 06/11] ARM: simplify page type handling

2017-06-21 Thread Jan Beulich

There's no need to have anything here on ARM other than the distinction
between writable and non-writable pages (and even that could likely be
eliminated, but with a more intrusive change). Limit type to a single
bit and drop pinned and validated flags altogether.

Signed-off-by: Jan Beulich 
---
Note: Compile tested only.

--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -1113,8 +1113,7 @@ void share_xen_page_with_guest(struct pa
 spin_lock(&d->page_alloc_lock);
 
 /* The incremented type count pins as writable or read-only. */
-page->u.inuse.type_info  = (readonly ? PGT_none : PGT_writable_page);
-page->u.inuse.type_info |= PGT_validated | 1;
+page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page) | 1;
 
 page_set_owner(page, d);
 smp_wmb(); /* install valid domain ptr before updating refcnt. */
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -354,8 +354,10 @@ int guest_remove_page(struct domain *d,
 
 rc = guest_physmap_remove_page(d, _gfn(gmfn), mfn, 0);
 
+#ifdef _PGT_pinned
 if ( !rc && test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
 put_page_and_type(page);
+#endif
 
 /*
  * With the lack of an IOMMU on some platforms, domains with DMA-capable
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -77,20 +77,12 @@ struct page_info
 #define PG_shift(idx)   (BITS_PER_LONG - (idx))
 #define PG_mask(x, idx) (x ## UL << PG_shift(idx))
 
-#define PGT_none  PG_mask(0, 4)  /* no special uses of this page   */
-#define PGT_writable_page PG_mask(7, 4)  /* has writable mappings? */
-#define PGT_type_mask PG_mask(15, 4) /* Bits 28-31 or 60-63.   */
-
- /* Owning guest has pinned this page to its current type? */
-#define _PGT_pinned   PG_shift(5)
-#define PGT_pinnedPG_mask(1, 5)
-
- /* Has this page been validated for use as its current type? */
-#define _PGT_validatedPG_shift(6)
-#define PGT_validated PG_mask(1, 6)
+#define PGT_none  PG_mask(0, 1)  /* no special uses of this page   */
+#define PGT_writable_page PG_mask(1, 1)  /* has writable mappings? */
+#define PGT_type_mask PG_mask(1, 1)  /* Bits 31 or 63. */
 
  /* Count of uses of this frame as its current type. */
-#define PGT_count_width   PG_shift(9)
+#define PGT_count_width   PG_shift(2)
 #define PGT_count_mask((1ULpage_alloc_lock);
 
 /* The incremented type count pins as writable or read-only. */
-page->u.inuse.type_info  = (readonly ? PGT_none : PGT_writable_page);
-page->u.inuse.type_info |= PGT_validated | 1;
+page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page) | 1;
 
 page_set_owner(page, d);
 smp_wmb(); /* install valid domain ptr before updating refcnt. */
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -354,8 +354,10 @@ int guest_remove_page(struct domain *d,
 
 rc = guest_physmap_remove_page(d, _gfn(gmfn), mfn, 0);
 
+#ifdef _PGT_pinned
 if ( !rc && test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
 put_page_and_type(page);
+#endif
 
 /*
  * With the lack of an IOMMU on some platforms, domains with DMA-capable
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -77,20 +77,12 @@ struct page_info
 #define PG_shift(idx)   (BITS_PER_LONG - (idx))
 #define PG_mask(x, idx) (x ## UL << PG_shift(idx))
 
-#define PGT_none  PG_mask(0, 4)  /* no special uses of this page   */
-#define PGT_writable_page PG_mask(7, 4)  /* has writable mappings? */
-#define PGT_type_mask PG_mask(15, 4) /* Bits 28-31 or 60-63.   */
-
- /* Owning guest has pinned this page to its current type? */
-#define _PGT_pinned   PG_shift(5)
-#define PGT_pinnedPG_mask(1, 5)
-
- /* Has this page been validated for use as its current type? */
-#define _PGT_validatedPG_shift(6)
-#define PGT_validated PG_mask(1, 6)
+#define PGT_none  PG_mask(0, 1)  /* no special uses of this page   */
+#define PGT_writable_page PG_mask(1, 1)  /* has writable mappings? */
+#define PGT_type_mask PG_mask(1, 1)  /* Bits 31 or 63. */
 
  /* Count of uses of this frame as its current type. */
-#define PGT_count_width   PG_shift(9)
+#define PGT_count_width   PG_shift(2)
 #define PGT_count_mask((1UL<___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 1/3] xen-disk: only advertize feature-persistent if grant copy is not available

2017-06-21 Thread Paul Durrant

> -Original Message-
> From: Roger Pau Monne
> Sent: 21 June 2017 10:18
> To: Stefano Stabellini 
> Cc: Paul Durrant ; xen-de...@lists.xenproject.org;
> qemu-de...@nongnu.org; qemu-bl...@nongnu.org; Anthony Perard
> ; Kevin Wolf ; Max Reitz
> 
> Subject: Re: [PATCH 1/3] xen-disk: only advertize feature-persistent if grant
> copy is not available
> 
> On Tue, Jun 20, 2017 at 03:19:33PM -0700, Stefano Stabellini wrote:
> > On Tue, 20 Jun 2017, Paul Durrant wrote:
> > > If grant copy is available then it will always be used in preference to
> > > persistent maps. In this case feature-persistent should not be advertized
> > > to the frontend, otherwise it may needlessly copy data into persistently
> > > granted buffers.
> > >
> > > Signed-off-by: Paul Durrant 
> >
> > CC'ing Roger.
> >
> > It is true that using feature-persistent together with grant copies is a
> > a very bad idea.
> >
> > But this change enstablishes an explicit preference of
> > feature_grant_copy over feature-persistent in the xen_disk backend. It
> > is not obvious to me that it should be the case.
> >
> > Why is feature_grant_copy (without feature-persistent) better than
> > feature-persistent (without feature_grant_copy)? Shouldn't we simply
> > avoid grant copies to copy data to persistent grants?
> 
> When using persistent grants the frontend must always copy data from
> the buffer to the persistent grant, there's no way to avoid this.
> 
> Using grant_copy we move the copy from the frontend to the backend,
> which means the CPU time of the copy is accounted to the backend. This
> is not ideal, but IMHO it's better than persistent grants because it
> avoids keeping a pool of mapped grants that consume memory and make
> the code more complex.
> 
> Do you have some performance data showing the difference between
> persistent grants vs grant copy?
> 

No, but I can get some :-)

For a little background... I've been trying to push throughput of fio running 
in a debian stretch guest on my skull canyon NUC. When I started out, I was 
getting ~100MBbs. When I finished, with this patch, the IOThreads one, the 
multi-page ring one and a bit of hackery to turn off all the aio flushes that 
seem to occur even if the image is opened with O_DIRECT, I was getting 
~960Mbps... which is about line rate for the SSD in the in NUC.

So, I'll force use of persistent grants on and see what sort of throughput I 
get.

Cheers,

  Paul

> Roger.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH 07/11] x86: fold identical error paths in xenmem_add_to_physmap_one()

2017-06-21 Thread Jan Beulich

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4899,11 +4899,8 @@ int xenmem_add_to_physmap_one(
 
 if ( !paging_mode_translate(d) || (mfn == 0) )
 {
-if ( page )
-put_page(page);
-if ( space == XENMAPSPACE_gmfn || space == XENMAPSPACE_gmfn_range )
-put_gfn(d, gfn);
-return -EINVAL;
+rc = -EINVAL;
+goto put_both;
 }
 
 /* Remove previously mapped page if it was present. */



x86: fold identical error paths in xenmem_add_to_physmap_one()

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4899,11 +4899,8 @@ int xenmem_add_to_physmap_one(
 
 if ( !paging_mode_translate(d) || (mfn == 0) )
 {
-if ( page )
-put_page(page);
-if ( space == XENMAPSPACE_gmfn || space == XENMAPSPACE_gmfn_range )
-put_gfn(d, gfn);
-return -EINVAL;
+rc = -EINVAL;
+goto put_both;
 }
 
 /* Remove previously mapped page if it was present. */
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH 08/11] gnttab: remove host map in the event of a grant_map failure

2017-06-21 Thread Jan Beulich

From: George Dunlap 

The current code appropriately removes the reference and type counts
on failure, but leaves the mapping set up. As the only path which can
trigger this is failure from IOMMU manipulation, and as unprivileged
domains are being crashed in that case, this is not by itself a
security issue.

Reported-by: Jan Beulich 
Signed-off-by: George Dunlap 
Reviewed-by: Jan Beulich 

--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -764,6 +764,7 @@ __gnttab_map_grant_ref(
 u32old_pin;
 u32act_pin;
 unsigned int   cache_flags, refcnt = 0, typecnt = 0;
+bool   host_map_created = false;
 struct active_grant_entry *act = NULL;
 struct grant_mapping *mt;
 grant_entry_header_t *shah;
@@ -923,6 +924,8 @@ __gnttab_map_grant_ref(
cache_flags);
 if ( rc != GNTST_okay )
 goto undo_out;
+
+host_map_created = true;
 }
 }
 else if ( owner == rd || owner == dom_cow )
@@ -960,6 +963,8 @@ __gnttab_map_grant_ref(
 rc = create_grant_host_mapping(op->host_addr, frame, op->flags, 0);
 if ( rc != GNTST_okay )
 goto undo_out;
+
+host_map_created = true;
 }
 }
 else
@@ -1030,6 +1035,12 @@ __gnttab_map_grant_ref(
 return;
 
  undo_out:
+if ( host_map_created )
+{
+replace_grant_host_mapping(op->host_addr, frame, 0, op->flags);
+gnttab_flush_tlb(ld);
+}
+
 while ( typecnt-- )
 put_page_type(pg);
 



gnttab: remove host map in the event of a grant_map failure

From: George Dunlap 

The current code appropriately removes the reference and type counts
on failure, but leaves the mapping set up. As the only path which can
trigger this is failure from IOMMU manipulation, and as unprivileged
domains are being crashed in that case, this is not by itself a
security issue.

Reported-by: Jan Beulich 
Signed-off-by: George Dunlap 
Reviewed-by: Jan Beulich 

--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -764,6 +764,7 @@ __gnttab_map_grant_ref(
 u32old_pin;
 u32act_pin;
 unsigned int   cache_flags, refcnt = 0, typecnt = 0;
+bool   host_map_created = false;
 struct active_grant_entry *act = NULL;
 struct grant_mapping *mt;
 grant_entry_header_t *shah;
@@ -923,6 +924,8 @@ __gnttab_map_grant_ref(
cache_flags);
 if ( rc != GNTST_okay )
 goto undo_out;
+
+host_map_created = true;
 }
 }
 else if ( owner == rd || owner == dom_cow )
@@ -960,6 +963,8 @@ __gnttab_map_grant_ref(
 rc = create_grant_host_mapping(op->host_addr, frame, op->flags, 0);
 if ( rc != GNTST_okay )
 goto undo_out;
+
+host_map_created = true;
 }
 }
 else
@@ -1030,6 +1035,12 @@ __gnttab_map_grant_ref(
 return;
 
  undo_out:
+if ( host_map_created )
+{
+replace_grant_host_mapping(op->host_addr, frame, 0, op->flags);
+gnttab_flush_tlb(ld);
+}
+
 while ( typecnt-- )
 put_page_type(pg);
 
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH 09/11] gnttab: avoid spurious maptrack handle allocation failures

2017-06-21 Thread Jan Beulich

When no memory is available in the hypervisor, rather than immediately
failing the request try to steal a handle from another vCPU.

Reported-by: George Dunlap 
Signed-off-by: Jan Beulich 

--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -397,7 +397,7 @@ get_maptrack_handle(
 struct vcpu  *curr = current;
 unsigned int  i, head;
 grant_handle_thandle;
-struct grant_mapping *new_mt;
+struct grant_mapping *new_mt = NULL;
 
 handle = __get_maptrack_handle(lgt, curr);
 if ( likely(handle != -1) )
@@ -408,8 +408,13 @@ get_maptrack_handle(
 /*
  * If we've run out of frames, try stealing an entry from another
  * VCPU (in case the guest isn't mapping across its VCPUs evenly).
+ * Also use this path in case we're out of memory, to avoid spurious
+ * failures.
  */
-if ( nr_maptrack_frames(lgt) >= max_maptrack_frames )
+if ( nr_maptrack_frames(lgt) < max_maptrack_frames )
+new_mt = alloc_xenheap_page();
+
+if ( !new_mt )
 {
 /*
  * Can drop the lock since no other VCPU can be adding a new
@@ -432,12 +437,6 @@ get_maptrack_handle(
 return steal_maptrack_handle(lgt, curr);
 }
 
-new_mt = alloc_xenheap_page();
-if ( !new_mt )
-{
-spin_unlock(&lgt->maptrack_lock);
-return -1;
-}
 clear_page(new_mt);
 
 /*



gnttab: avoid spurious maptrack handle allocation failures

When no memory is available in the hypervisor, rather than immediately
failing the request try to steal a handle from another vCPU.

Reported-by: George Dunlap 
Signed-off-by: Jan Beulich 

--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -397,7 +397,7 @@ get_maptrack_handle(
 struct vcpu  *curr = current;
 unsigned int  i, head;
 grant_handle_thandle;
-struct grant_mapping *new_mt;
+struct grant_mapping *new_mt = NULL;
 
 handle = __get_maptrack_handle(lgt, curr);
 if ( likely(handle != -1) )
@@ -408,8 +408,13 @@ get_maptrack_handle(
 /*
  * If we've run out of frames, try stealing an entry from another
  * VCPU (in case the guest isn't mapping across its VCPUs evenly).
+ * Also use this path in case we're out of memory, to avoid spurious
+ * failures.
  */
-if ( nr_maptrack_frames(lgt) >= max_maptrack_frames )
+if ( nr_maptrack_frames(lgt) < max_maptrack_frames )
+new_mt = alloc_xenheap_page();
+
+if ( !new_mt )
 {
 /*
  * Can drop the lock since no other VCPU can be adding a new
@@ -432,12 +437,6 @@ get_maptrack_handle(
 return steal_maptrack_handle(lgt, curr);
 }
 
-new_mt = alloc_xenheap_page();
-if ( !new_mt )
-{
-spin_unlock(&lgt->maptrack_lock);
-return -1;
-}
 clear_page(new_mt);
 
 /*
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH 10/11] gnttab: limit mapkind()'s iteration count

2017-06-21 Thread Jan Beulich

There's no need for the function to observe increases of the maptrack
table (which can occur as the maptrack lock isn't being held) - actual
population of maptrack entries is excluded while we're here (by way of
holding the respective grant table lock for writing, while code
populating entries acquires it for reading). Latch the limit ahead of
the loop, allowing for the barrier to move out, too.

Signed-by: Jan Beulich 

--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -712,7 +712,7 @@ static unsigned int mapkind(
 struct grant_table *lgt, const struct domain *rd, unsigned long mfn)
 {
 struct grant_mapping *map;
-grant_handle_t handle;
+grant_handle_t handle, limit = lgt->maptrack_limit;
 unsigned int kind = 0;
 
 /*
@@ -726,10 +726,10 @@ static unsigned int mapkind(
  */
 ASSERT(percpu_rw_is_write_locked(&rd->grant_table->lock));
 
-for ( handle = 0; !(kind & MAPKIND_WRITE) &&
-  handle < lgt->maptrack_limit; handle++ )
+smp_rmb();
+
+for ( handle = 0; !(kind & MAPKIND_WRITE) && handle < limit; handle++ )
 {
-smp_rmb();
 map = &maptrack_entry(lgt, handle);
 if ( !(map->flags & (GNTMAP_device_map|GNTMAP_host_map)) ||
  map->domid != rd->domain_id )



gnttab: limit mapkind()'s iteration count

There's no need for the function to observe increases of the maptrack
table (which can occur as the maptrack lock isn't being held) - actual
population of maptrack entries is excluded while we're here (by way of
holding the respective grant table lock for writing, while code
populating entries acquires it for reading). Latch the limit ahead of
the loop, allowing for the barrier to move out, too.

Signed-by: Jan Beulich 

--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -712,7 +712,7 @@ static unsigned int mapkind(
 struct grant_table *lgt, const struct domain *rd, unsigned long mfn)
 {
 struct grant_mapping *map;
-grant_handle_t handle;
+grant_handle_t handle, limit = lgt->maptrack_limit;
 unsigned int kind = 0;
 
 /*
@@ -726,10 +726,10 @@ static unsigned int mapkind(
  */
 ASSERT(percpu_rw_is_write_locked(&rd->grant_table->lock));
 
-for ( handle = 0; !(kind & MAPKIND_WRITE) &&
-  handle < lgt->maptrack_limit; handle++ )
+smp_rmb();
+
+for ( handle = 0; !(kind & MAPKIND_WRITE) && handle < limit; handle++ )
 {
-smp_rmb();
 map = &maptrack_entry(lgt, handle);
 if ( !(map->flags & (GNTMAP_device_map|GNTMAP_host_map)) ||
  map->domid != rd->domain_id )
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH 11/11] gnttab: drop useless locking

2017-06-21 Thread Jan Beulich

Holding any lock while accessing the maptrack entry fields is
pointless, as these entries are protected by their associated active
entry lock (which is being acquired later, before re-validating the
fields read without holding the lock).

Signed-off-by: Jan Beulich 

--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -1122,19 +1122,14 @@ __gnttab_unmap_common(
 smp_rmb();
 map = &maptrack_entry(lgt, op->handle);
 
-grant_read_lock(lgt);
-
 if ( unlikely(!read_atomic(&map->flags)) )
 {
-grant_read_unlock(lgt);
 gdprintk(XENLOG_INFO, "Zero flags for handle %#x\n", op->handle);
 op->status = GNTST_bad_handle;
 return;
 }
 
 dom = map->domid;
-grant_read_unlock(lgt);
-
 if ( unlikely((rd = rcu_lock_domain_by_id(dom)) == NULL) )
 {
 /* This can happen when a grant is implicitly unmapped. */



gnttab: drop useless locking

Holding any lock while accessing the maptrack entry fields is
pointless, as these entries are protected by their associated active
entry lock (which is being acquired later, before re-validating the
fields read without holding the lock).

Signed-off-by: Jan Beulich 

--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -1122,19 +1122,14 @@ __gnttab_unmap_common(
 smp_rmb();
 map = &maptrack_entry(lgt, op->handle);
 
-grant_read_lock(lgt);
-
 if ( unlikely(!read_atomic(&map->flags)) )
 {
-grant_read_unlock(lgt);
 gdprintk(XENLOG_INFO, "Zero flags for handle %#x\n", op->handle);
 op->status = GNTST_bad_handle;
 return;
 }
 
 dom = map->domid;
-grant_read_unlock(lgt);
-
 if ( unlikely((rd = rcu_lock_domain_by_id(dom)) == NULL) )
 {
 /* This can happen when a grant is implicitly unmapped. */
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH net] xen-netback: correctly schedule rate-limited queues

2017-06-21 Thread Paul Durrant

> -Original Message-
> From: Wei Liu [mailto:wei.l...@citrix.com]
> Sent: 21 June 2017 10:21
> To: net...@vger.kernel.org
> Cc: Xen-devel ; Paul Durrant
> ; David Miller ; jean-
> lo...@dupond.be; Wei Liu 
> Subject: [PATCH net] xen-netback: correctly schedule rate-limited queues
> 
> Add a flag to indicate if a queue is rate-limited. Test the flag in
> NAPI poll handler and avoid rescheduling the queue if true, otherwise
> we risk locking up the host. The rescheduling will be done in the
> timer callback function.
> 
> Reported-by: Jean-Louis Dupond 
> Signed-off-by: Wei Liu 
> Tested-by: Jean-Louis Dupond 

Reviewed-by: Paul Durrant 

> ---
>  drivers/net/xen-netback/common.h| 1 +
>  drivers/net/xen-netback/interface.c | 6 +-
>  drivers/net/xen-netback/netback.c   | 6 +-
>  3 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-
> netback/common.h
> index 530586be05b4..5b1d2e8402d9 100644
> --- a/drivers/net/xen-netback/common.h
> +++ b/drivers/net/xen-netback/common.h
> @@ -199,6 +199,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
>   unsigned long   remaining_credit;
>   struct timer_list credit_timeout;
>   u64 credit_window_start;
> + bool rate_limited;
> 
>   /* Statistics */
>   struct xenvif_stats stats;
> diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-
> netback/interface.c
> index 8397f6c92451..e322a862ddfe 100644
> --- a/drivers/net/xen-netback/interface.c
> +++ b/drivers/net/xen-netback/interface.c
> @@ -106,7 +106,11 @@ static int xenvif_poll(struct napi_struct *napi, int
> budget)
> 
>   if (work_done < budget) {
>   napi_complete_done(napi, work_done);
> - xenvif_napi_schedule_or_enable_events(queue);
> + /* If the queue is rate-limited, it shall be
> +  * rescheduled in the timer callback.
> +  */
> + if (likely(!queue->rate_limited))
> + xenvif_napi_schedule_or_enable_events(queue);
>   }
> 
>   return work_done;
> diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-
> netback/netback.c
> index 602d408fa25e..5042ff8d449a 100644
> --- a/drivers/net/xen-netback/netback.c
> +++ b/drivers/net/xen-netback/netback.c
> @@ -180,6 +180,7 @@ static void tx_add_credit(struct xenvif_queue
> *queue)
>   max_credit = ULONG_MAX; /* wrapped: clamp to
> ULONG_MAX */
> 
>   queue->remaining_credit = min(max_credit, max_burst);
> + queue->rate_limited = false;
>  }
> 
>  void xenvif_tx_credit_callback(unsigned long data)
> @@ -686,8 +687,10 @@ static bool tx_credit_exceeded(struct xenvif_queue
> *queue, unsigned size)
>   msecs_to_jiffies(queue->credit_usec / 1000);
> 
>   /* Timer could already be pending in rare cases. */
> - if (timer_pending(&queue->credit_timeout))
> + if (timer_pending(&queue->credit_timeout)) {
> + queue->rate_limited = true;
>   return true;
> + }
> 
>   /* Passed the point where we can replenish credit? */
>   if (time_after_eq64(now, next_credit)) {
> @@ -702,6 +705,7 @@ static bool tx_credit_exceeded(struct xenvif_queue
> *queue, unsigned size)
>   mod_timer(&queue->credit_timeout,
> next_credit);
>   queue->credit_window_start = next_credit;
> + queue->rate_limited = true;
> 
>   return true;
>   }
> --
> 2.11.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v7 23/36] x86, realmode: Decrypt trampoline area if memory encryption is active

2017-06-21 Thread Borislav Petkov

On Fri, Jun 16, 2017 at 01:54:12PM -0500, Tom Lendacky wrote:
> When Secure Memory Encryption is enabled, the trampoline area must not
> be encrypted. A CPU running in real mode will not be able to decrypt
> memory that has been encrypted because it will not be able to use addresses
> with the memory encryption mask.
> 
> Signed-off-by: Tom Lendacky 
> ---
>  arch/x86/realmode/init.c |8 
>  1 file changed, 8 insertions(+)

Subject: x86/realmode: ...

other than that:

Reviewed-by: Borislav Petkov 

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v7 24/36] x86, swiotlb: Add memory encryption support

2017-06-21 Thread Borislav Petkov

On Fri, Jun 16, 2017 at 01:54:24PM -0500, Tom Lendacky wrote:
> Since DMA addresses will effectively look like 48-bit addresses when the
> memory encryption mask is set, SWIOTLB is needed if the DMA mask of the
> device performing the DMA does not support 48-bits. SWIOTLB will be
> initialized to create decrypted bounce buffers for use by these devices.
> 
> Signed-off-by: Tom Lendacky 
> ---
>  arch/x86/include/asm/dma-mapping.h |5 ++-
>  arch/x86/include/asm/mem_encrypt.h |5 +++
>  arch/x86/kernel/pci-dma.c  |   11 +--
>  arch/x86/kernel/pci-nommu.c|2 +
>  arch/x86/kernel/pci-swiotlb.c  |   15 +-
>  arch/x86/mm/mem_encrypt.c  |   22 +++
>  include/linux/swiotlb.h|1 +
>  init/main.c|   10 +++
>  lib/swiotlb.c  |   54 
> +++-
>  9 files changed, 108 insertions(+), 17 deletions(-)

Reviewed-by: Borislav Petkov 

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] Fwd: VM Live Migration with Local Storage

2017-06-21 Thread Paul Durrant

> -Original Message-
> From: Xen-devel [mailto:xen-devel-boun...@lists.xen.org] On Behalf Of
> Konrad Rzeszutek Wilk
> Sent: 20 June 2017 18:57
> To: Bruno Alvisio 
> Cc: xen-devel@lists.xen.org
> Subject: Re: [Xen-devel] Fwd: VM Live Migration with Local Storage
> 
> On Sun, Jun 11, 2017 at 08:16:04PM -0700, Bruno Alvisio wrote:
> > Hello,
> >
> > I think it would be beneficial to add local disk migration feature for
> > ‘blkback' backend since it is one of the mostly used backends. I would like
> > to start a discussion about the design of the machinery needed to achieve
> > this feature.
> >
> > ===
> > Objective
> > Add a feature to migrate VMs that have local storage and use the blkback
> > iface.
> > ===
> >
> > ===
> > User Interface
> > Add a cmd line option in “xl migrate” command to specify if local disks
> > need to be copied to the destination node.
> > ===
> >
> > ===
> > Design
> >
> >1. As part of the libxl_domain_suspend, the “disk mirroring machinery”
> >starts an asynchronous job that copies the disks blocks from source to 
> > the
> >destination.
> >2. The protocol to copy the disks should resemble the one used for
> >memory copy:
> >
> >
> >- Do first initial copy of the disk.
> >- Check of sectors that have been written since copy started. For this,
> >the blkback driver should be aware that migration of disk is happening
> and
> >in this case forward the write request to the “migration machinery” so
> that
> >a record of dirty blocks are logged.
> >- Migration machinery copies “dirty” blocks until convergence.
> >- Duplicate all the disk writes/reads to both disks in source and
> >destinations node while VM is being suspended.
> >
> >
> > Block Diagram
> >
> >+—--+
> >|  VM   |
> >+---+
> >   |
> >   | I/O Write
> >   |
> >   V
> > +--+   +---+   +-+
> > |  blkback | > |  Source   |  sectors Stream   | Destination |
> > +--+   |  mirror   |-->|   mirror|
> >   || machinery |   I/O Writes  |  machinery  |
> >   |+---+   +-+
> >   |  |
> >   |  |
> >   | To I/O block layer   |
> >   |  |
> >   V  V
> > +--+   +-+
> > |   disk   |   |   Mirrored  |
> > +--+   | Disk|
> >+-+
> >
> >
> > ==
> > Initial Questions
> >
> >1. Is it possible to leverage the current design of QEMU for drive
> >mirroring for Xen?
> 
> Yes. It has qdisk which implement blkback interface.
> 
> >2. What is the best place to implement this protocol? As part of Xen or
> >the kernel?
> 
> QEMU

Moreover QEMU can already export disk images via NBD, and even layer qcow2 
images on top of an NBD socket. It also has comprehensive support for mirroring 
block devices with copy-on-read and background copy threads. I'm sure all of 
these capabilities could be used by a libxl toolstack, so I don't see any need 
to re-invent the wheel with blkback.

  Paul

> >3. Is it possible to use the same stream currently used for migrating
> >the memory to also migrate the disk blocks?
> 
> Probably.
> >
> >
> > Any guidance/feedback for a more specific design is greatly appreciated.
> >
> > Thanks,
> >
> > Bruno
> >
> > On Wed, Feb 22, 2017 at 5:00 AM, Wei Liu  wrote:
> >
> > > Hi Bruno
> > >
> > > Thanks for your interest.
> > >
> > > On Tue, Feb 21, 2017 at 10:34:45AM -0800, Bruno Alvisio wrote:
> > > > Hello,
> > > >
> > > > I have been to doing some research and as far as I know XEN supports
> > > > Live Migration
> > > > of VMs that only have shared storage. (i.e. iSCSI) If the VM has been
> > > > booted with local storage it cannot be live migrated.
> > > > QEMU seems to support live migration with local storage (I have tested
> > > using
> > > > 'virsh migrate with the '--storage-copy-all' option)
> > > >
> > > > I am wondering if this still true in the latest XEN release. Are there
> > > plans
> > > > to add this functionality in future releases? I would be interested in
> > > > contributing to the Xen Project by adding this functionality.
> > > >
> > >
> > > No plan at the moment.
> > >
> > > Xen supports a wide variety of disk backends. QEMU is one of them. The
> > > others are blktap (not upstreamed yet) and in-kernel blkback. The latter
> > > two don't have the capability to cop

[Xen-devel] [PATCH] passthrough: give XEN_DOMCTL_test_assign_device more sane semantics

2017-06-21 Thread Jan Beulich

So far callers of the libxc interface passed in a domain ID which was
then ignored in the hypervisor. Instead, make the hypervisor honor it
(accepting DOMID_INVALID to obtain original behavior), allowing to
query whether a device is assigned to a particular domain. Ignore the
passed in domain ID at the libxc layer instead, in order to not break
existing callers. New libxc functions would need to be added if callers
wanted to leverage the new functionality.

Signed-off-by: Jan Beulich 
---
TBD: Would DOMID_IO be a better fit than DOMID_INVALID here?

--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -1539,13 +1539,13 @@ int xc_get_device_group(
 
 int xc_test_assign_device(
 xc_interface *xch,
-uint32_t domid,
+uint32_t domid, /* ignored */
 uint32_t machine_sbdf)
 {
 DECLARE_DOMCTL;
 
 domctl.cmd = XEN_DOMCTL_test_assign_device;
-domctl.domain = domid;
+domctl.domain = DOMID_INVALID;
 domctl.u.assign_device.dev = XEN_DOMCTL_DEV_PCI;
 domctl.u.assign_device.u.pci.machine_sbdf = machine_sbdf;
 domctl.u.assign_device.flags = 0;
@@ -1603,7 +1603,7 @@ int xc_assign_dt_device(
 
 int xc_test_assign_dt_device(
 xc_interface *xch,
-uint32_t domid,
+uint32_t domid, /* ignored */
 char *path)
 {
 int rc;
@@ -1615,7 +1615,7 @@ int xc_test_assign_dt_device(
 return -1;
 
 domctl.cmd = XEN_DOMCTL_test_assign_device;
-domctl.domain = (domid_t)domid;
+domctl.domain = DOMID_INVALID;
 
 domctl.u.assign_device.dev = XEN_DOMCTL_DEV_DT;
 domctl.u.assign_device.u.dt.size = size;
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -391,11 +391,15 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
 
 switch ( op->cmd )
 {
-case XEN_DOMCTL_createdomain:
 case XEN_DOMCTL_test_assign_device:
+if ( op->domain == DOMID_INVALID )
+{
+case XEN_DOMCTL_createdomain:
 case XEN_DOMCTL_gdbsx_guestmemio:
-d = NULL;
-break;
+d = NULL;
+break;
+}
+/* fall through */
 default:
 d = rcu_lock_domain_by_id(op->domain);
 if ( !d && op->cmd != XEN_DOMCTL_getdomaininfo )
--- a/xen/drivers/passthrough/device_tree.c
+++ b/xen/drivers/passthrough/device_tree.c
@@ -93,7 +93,8 @@ fail:
 return rc;
 }
 
-static bool_t iommu_dt_device_is_assigned(const struct dt_device_node *dev)
+static bool_t iommu_dt_device_is_assigned(const struct domain *d,
+  const struct dt_device_node *dev)
 {
 bool_t assigned = 0;
 
@@ -101,7 +102,8 @@ static bool_t iommu_dt_device_is_assigne
 return 0;
 
 spin_lock(&dtdevs_lock);
-assigned = !list_empty(&dev->domain_list);
+assigned = d ? dt_device_used_by(dev) == d->domain_id
+ : !list_empty(&dev->domain_list);
 spin_unlock(&dtdevs_lock);
 
 return assigned;
@@ -209,11 +211,11 @@ int iommu_do_dt_domctl(struct xen_domctl
 if ( ret )
 break;
 
-ret = xsm_test_assign_dtdevice(XSM_HOOK, dt_node_full_name(dev));
+ret = xsm_test_assign_dtdevice(XSM_HOOK, d, dt_node_full_name(dev));
 if ( ret )
 break;
 
-if ( iommu_dt_device_is_assigned(dev) )
+if ( iommu_dt_device_is_assigned(d, dev) )
 {
 printk(XENLOG_G_ERR "%s already assigned.\n",
dt_node_full_name(dev));
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -522,7 +522,7 @@ struct pci_dev *pci_get_real_pdev(int se
 }
 
 struct pci_dev *pci_get_pdev_by_domain(
-struct domain *d, int seg, int bus, int devfn)
+const struct domain *d, int seg, int bus, int devfn)
 {
 struct pci_seg *pseg = get_pseg(seg);
 struct pci_dev *pdev = NULL;
@@ -1337,12 +1337,12 @@ int iommu_remove_device(struct pci_dev *
  * If the device isn't owned by the hardware domain, it means it already
  * has been assigned to other domain, or it doesn't exist.
  */
-static int device_assigned(u16 seg, u8 bus, u8 devfn)
+static int device_assigned(const struct domain *d, u16 seg, u8 bus, u8 devfn)
 {
-struct pci_dev *pdev;
+const struct pci_dev *pdev;
 
 pcidevs_lock();
-pdev = pci_get_pdev_by_domain(hardware_domain, seg, bus, devfn);
+pdev = pci_get_pdev_by_domain(d ?: hardware_domain, seg, bus, devfn);
 pcidevs_unlock();
 
 return pdev ? 0 : -EBUSY;
@@ -1590,7 +1590,7 @@ int iommu_do_pci_domctl(
 
 machine_sbdf = domctl->u.assign_device.u.pci.machine_sbdf;
 
-ret = xsm_test_assign_device(XSM_HOOK, machine_sbdf);
+ret = xsm_test_assign_device(XSM_HOOK, d, machine_sbdf);
 if ( ret )
 break;
 
@@ -1598,13 +1598,12 @@ int iommu_do_pci_domctl(
 bus = PCI_BUS(machine_sbdf);
 devfn = PCI_DEVFN2(machine_sbdf);
 
-if ( device_assigned(seg, bus, devfn) )
-{
+ret = device_assigned(d, seg, bus, devfn);
+if ( ret && !d )
 printk(XENLOG_G_INFO

[Xen-devel] [PATCH] x86/mmuext: don't allow copying/clearing non-RAM pages

2017-06-21 Thread Jan Beulich

The two operations really aren't meant for anything else.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -3229,6 +3229,7 @@ long do_mmuext_op(
 switch ( op.cmd )
 {
 struct page_info *page;
+p2m_type_t p2mt;
 
 case MMUEXT_PIN_L1_TABLE:
 type = PGT_l1_page_table;
@@ -3528,7 +3529,12 @@ long do_mmuext_op(
 }
 
 case MMUEXT_CLEAR_PAGE:
-page = get_page_from_gfn(pg_owner, op.arg1.mfn, NULL, P2M_ALLOC);
+page = get_page_from_gfn(pg_owner, op.arg1.mfn, &p2mt, P2M_ALLOC);
+if ( unlikely(p2mt != p2m_ram_rw) && page )
+{
+put_page(page);
+page = NULL;
+}
 if ( !page || !get_page_type(page, PGT_writable_page) )
 {
 if ( page )
@@ -3551,8 +3557,13 @@ long do_mmuext_op(
 {
 struct page_info *src_page, *dst_page;
 
-src_page = get_page_from_gfn(pg_owner, op.arg2.src_mfn, NULL,
+src_page = get_page_from_gfn(pg_owner, op.arg2.src_mfn, &p2mt,
  P2M_ALLOC);
+if ( unlikely(p2mt != p2m_ram_rw) && src_page )
+{
+put_page(src_page);
+src_page = NULL;
+}
 if ( unlikely(!src_page) )
 {
 gdprintk(XENLOG_WARNING,
@@ -3562,8 +3573,13 @@ long do_mmuext_op(
 break;
 }
 
-dst_page = get_page_from_gfn(pg_owner, op.arg1.mfn, NULL,
+dst_page = get_page_from_gfn(pg_owner, op.arg1.mfn, &p2mt,
  P2M_ALLOC);
+if ( unlikely(p2mt != p2m_ram_rw) && dst_page )
+{
+put_page(dst_page);
+dst_page = NULL;
+}
 rc = (dst_page &&
   get_page_type(dst_page, PGT_writable_page)) ? 0 : -EINVAL;
 if ( unlikely(rc) )



x86/mmuext: don't allow copying/clearing non-RAM pages

The two operations really aren't meant for anything else.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -3229,6 +3229,7 @@ long do_mmuext_op(
 switch ( op.cmd )
 {
 struct page_info *page;
+p2m_type_t p2mt;
 
 case MMUEXT_PIN_L1_TABLE:
 type = PGT_l1_page_table;
@@ -3528,7 +3529,12 @@ long do_mmuext_op(
 }
 
 case MMUEXT_CLEAR_PAGE:
-page = get_page_from_gfn(pg_owner, op.arg1.mfn, NULL, P2M_ALLOC);
+page = get_page_from_gfn(pg_owner, op.arg1.mfn, &p2mt, P2M_ALLOC);
+if ( unlikely(p2mt != p2m_ram_rw) && page )
+{
+put_page(page);
+page = NULL;
+}
 if ( !page || !get_page_type(page, PGT_writable_page) )
 {
 if ( page )
@@ -3551,8 +3557,13 @@ long do_mmuext_op(
 {
 struct page_info *src_page, *dst_page;
 
-src_page = get_page_from_gfn(pg_owner, op.arg2.src_mfn, NULL,
+src_page = get_page_from_gfn(pg_owner, op.arg2.src_mfn, &p2mt,
  P2M_ALLOC);
+if ( unlikely(p2mt != p2m_ram_rw) && src_page )
+{
+put_page(src_page);
+src_page = NULL;
+}
 if ( unlikely(!src_page) )
 {
 gdprintk(XENLOG_WARNING,
@@ -3562,8 +3573,13 @@ long do_mmuext_op(
 break;
 }
 
-dst_page = get_page_from_gfn(pg_owner, op.arg1.mfn, NULL,
+dst_page = get_page_from_gfn(pg_owner, op.arg1.mfn, &p2mt,
  P2M_ALLOC);
+if ( unlikely(p2mt != p2m_ram_rw) && dst_page )
+{
+put_page(dst_page);
+dst_page = NULL;
+}
 rc = (dst_page &&
   get_page_type(dst_page, PGT_writable_page)) ? 0 : -EINVAL;
 if ( unlikely(rc) )
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v2 1/3] libxl: add PV display device driver interface

2017-06-21 Thread Oleksandr Grytsov

On Tue, Jun 20, 2017 at 4:54 PM, Wei Liu  wrote:
> On Thu, May 25, 2017 at 03:17:29PM +0300, Oleksandr Grytsov wrote:
>> From: Oleksandr Grytsov 
>>

Hi Wei,

Thank you for your reply.

> I'm sorry, patch like this is impossible to review because: 1. there is
> no commit message 2. it is huge.

I will separate it to small ones and will add commit message.

> I can see it is adding a lot of hooks to the device handling framework.
> Please explain why they are needed. This sort of changes (refactoring
> and extending existing code) should also be in separate patches.

Hooks in the device handling framework is needed to avoid code duplication
on new PV device adding. There were two possibilities either use macro
or extend the device handling framework. See [1] and following conversation
for more details. The patches don't refactor existing code they extend
hooks to the device handling framework and add new functionality required
to add the display device driver.

Almost all libxl__device__add functions are the same except Xen store
parameters. So, I've moved setting Xen store parameters  to
set_xenstore_config hook
and have created common function to add device (add hook).
Also I've created functions to libxl_device__list and
libxl_device__list_free.
which take device type as parameter.

>
>> Signed-off-by: Oleksandr Grytsov 
>> ---
>>  tools/libxl/Makefile |   2 +-
>>  tools/libxl/libxl.h  |  21 ++
>>  tools/libxl/libxl_create.c   |   3 +
>>  tools/libxl/libxl_device.c   | 178 -
>>  tools/libxl/libxl_internal.h |  24 +++
>>  tools/libxl/libxl_types.idl  |  40 +++-
>>  tools/libxl/libxl_types_internal.idl |   1 +
>>  tools/libxl/libxl_usb.c  |   2 +
>>  tools/libxl/libxl_utils.h|   4 +
>>  tools/libxl/libxl_vdispl.c   | 372 
>> +++
>>  10 files changed, 643 insertions(+), 4 deletions(-)
>>  create mode 100644 tools/libxl/libxl_vdispl.c
>>  };
>>
>> diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c
>> index 5e96676..2954800 100644
>> --- a/tools/libxl/libxl_device.c
>> +++ b/tools/libxl/libxl_device.c
>> @@ -18,7 +18,7 @@
>>
>>  #include "libxl_internal.h"
>>
>> -static char *libxl__device_frontend_path(libxl__gc *gc, libxl__device 
>> *device)
>> +char *libxl__device_frontend_path(libxl__gc *gc, libxl__device *device)
>>  {
>>  char *dom_path = libxl__xs_get_dompath(gc, device->domid);
>>
>> @@ -1776,6 +1776,182 @@ out:
>>  return AO_CREATE_FAIL(rc);
>>  }
>>
>> +static int device_add_domain_config(libxl__gc *gc, uint32_t domid,
>> +const struct libxl_device_type *dt,
>> +void *type)
> [...]
>> +
>> +void libxl__device_add(libxl__egc *egc, uint32_t domid,
>> +   const struct libxl_device_type *dt, void *type,
>> +   libxl__ao_device *aodev)
> [...]
>> +
>> +void* libxl__device_list(const struct libxl_device_type *dt,
>> + libxl_ctx *ctx, uint32_t domid, int *num)
> [...]
>> +
>> +void libxl__device_list_free(const struct libxl_device_type *dt,
>> + void *list, int num)
>>
>
> I think existing code already provides these functionalities, right?

Right, but as I mentioned before there are almost same functions for
each device. These new functions are generic.

[1] http://marc.info/?l=xen-devel&m=149026463411873&w=2

-- 
Best Regards,
Oleksandr Grytsov.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH] x86/mm: consolidate setting of TLB flush time stamps

2017-06-21 Thread Jan Beulich

Move code and comment into a helper function instead of repeating it in
multiple places.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -602,6 +602,20 @@ static inline void guest_get_eff_kern_l1
 TOGGLE_MODE();
 }
 
+static inline void page_set_tlbflush_timestamp(struct page_info *page)
+{
+/*
+ * Record TLB information for flush later. We do not stamp page tables
+ * when running in shadow mode:
+ *  1. Pointless, since it's the shadow pt's which must be tracked.
+ *  2. Shadow mode reuses this field for shadowed page tables to store
+ * flags info -- we don't want to conflict with that.
+ */
+if ( !(page->count_info & PGC_page_table) ||
+ !shadow_mode_enabled(page_get_owner(page)) )
+page->tlbflush_timestamp = tlbflush_current_time();
+}
+
 const char __section(".bss.page_aligned.const") __aligned(PAGE_SIZE)
 zero_page[PAGE_SIZE];
 
@@ -2417,16 +2431,7 @@ static int __put_final_page_type(
 /* No need for atomic update of type_info here: noone else updates it. */
 if ( rc == 0 )
 {
-/*
- * Record TLB information for flush later. We do not stamp page tables
- * when running in shadow mode:
- *  1. Pointless, since it's the shadow pt's which must be tracked.
- *  2. Shadow mode reuses this field for shadowed page tables to
- * store flags info -- we don't want to conflict with that.
- */
-if ( !(shadow_mode_enabled(page_get_owner(page)) &&
-   (page->count_info & PGC_page_table)) )
-page->tlbflush_timestamp = tlbflush_current_time();
+page_set_tlbflush_timestamp(page);
 wmb();
 page->u.inuse.type_info--;
 }
@@ -2434,9 +2439,7 @@ static int __put_final_page_type(
 {
 ASSERT((page->u.inuse.type_info &
 (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
-if ( !(shadow_mode_enabled(page_get_owner(page)) &&
-   (page->count_info & PGC_page_table)) )
-page->tlbflush_timestamp = tlbflush_current_time();
+page_set_tlbflush_timestamp(page);
 wmb();
 page->u.inuse.type_info |= PGT_validated;
 }
@@ -2486,16 +2489,7 @@ static int __put_page_type(struct page_i
 break;
 }
 
-/*
- * Record TLB information for flush later. We do not stamp page
- * tables when running in shadow mode:
- *  1. Pointless, since it's the shadow pt's which must be tracked.
- *  2. Shadow mode reuses this field for shadowed page tables to
- * store flags info -- we don't want to conflict with that.
- */
-if ( !(shadow_mode_enabled(page_get_owner(page)) &&
-   (page->count_info & PGC_page_table)) )
-page->tlbflush_timestamp = tlbflush_current_time();
+page_set_tlbflush_timestamp(page);
 }
 
 if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )



x86/mm: consolidate setting of TLB flush time stamps

Move code and comment into a helper function instead of repeating it in
multiple places.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -602,6 +602,20 @@ static inline void guest_get_eff_kern_l1
 TOGGLE_MODE();
 }
 
+static inline void page_set_tlbflush_timestamp(struct page_info *page)
+{
+/*
+ * Record TLB information for flush later. We do not stamp page tables
+ * when running in shadow mode:
+ *  1. Pointless, since it's the shadow pt's which must be tracked.
+ *  2. Shadow mode reuses this field for shadowed page tables to store
+ * flags info -- we don't want to conflict with that.
+ */
+if ( !(page->count_info & PGC_page_table) ||
+ !shadow_mode_enabled(page_get_owner(page)) )
+page->tlbflush_timestamp = tlbflush_current_time();
+}
+
 const char __section(".bss.page_aligned.const") __aligned(PAGE_SIZE)
 zero_page[PAGE_SIZE];
 
@@ -2417,16 +2431,7 @@ static int __put_final_page_type(
 /* No need for atomic update of type_info here: noone else updates it. */
 if ( rc == 0 )
 {
-/*
- * Record TLB information for flush later. We do not stamp page tables
- * when running in shadow mode:
- *  1. Pointless, since it's the shadow pt's which must be tracked.
- *  2. Shadow mode reuses this field for shadowed page tables to
- * store flags info -- we don't want to conflict with that.
- */
-if ( !(shadow_mode_enabled(page_get_owner(page)) &&
-   (page->count_info & PGC_page_table)) )
-page->tlbflush_timestamp = tlbflush_current_time();
+page_set_tlbflush_timestamp(page);
 wmb();
 page->u.inuse.type_info--;
 }
@@ -2434,9 +2439,7 @@ static int __put_final_page_type(
 {
 ASSERT((page->u.inuse.type_info &

[Xen-devel] [PATCH] x86/mm: drop redundant domain parameter from get_page_from_gfn_p2m()

2017-06-21 Thread Jan Beulich

It can always be read from the passed p2m. Take the opportunity and
also rename the function, making the "p2m" suffix a prefix, to match
other p2m functions, and convert the "gfn" parameter's type.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/mm/hap/guest_walk.c
+++ b/xen/arch/x86/mm/hap/guest_walk.c
@@ -55,13 +55,13 @@ unsigned long hap_p2m_ga_to_gfn(GUEST_PA
 void *top_map;
 p2m_type_t p2mt;
 walk_t gw;
-unsigned long top_gfn;
+gfn_t top_gfn;
 struct page_info *top_page;
 
 /* Get the top-level table's MFN */
-top_gfn = cr3 >> PAGE_SHIFT;
-top_page = get_page_from_gfn_p2m(p2m->domain, p2m, top_gfn,
- &p2mt, NULL, P2M_ALLOC | P2M_UNSHARE);
+top_gfn = _gfn(cr3 >> PAGE_SHIFT);
+top_page = p2m_get_page_from_gfn(p2m, top_gfn, &p2mt, NULL,
+ P2M_ALLOC | P2M_UNSHARE);
 if ( p2m_is_paging(p2mt) )
 {
 ASSERT(p2m_is_hostp2m(p2m));
@@ -100,8 +100,9 @@ unsigned long hap_p2m_ga_to_gfn(GUEST_PA
 {
 gfn_t gfn = guest_walk_to_gfn(&gw);
 struct page_info *page;
-page = get_page_from_gfn_p2m(p2m->domain, p2m, gfn_x(gfn), &p2mt,
- NULL, P2M_ALLOC | P2M_UNSHARE);
+
+page = p2m_get_page_from_gfn(p2m, gfn, &p2mt, NULL,
+ P2M_ALLOC | P2M_UNSHARE);
 if ( page )
 put_page(page);
 if ( p2m_is_paging(p2mt) )
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -472,8 +472,8 @@ void __put_gfn(struct p2m_domain *p2m, u
 }
 
 /* Atomically look up a GFN and take a reference count on the backing page. */
-struct page_info *get_page_from_gfn_p2m(
-struct domain *d, struct p2m_domain *p2m, unsigned long gfn,
+struct page_info *p2m_get_page_from_gfn(
+struct p2m_domain *p2m, gfn_t gfn,
 p2m_type_t *t, p2m_access_t *a, p2m_query_t q)
 {
 struct page_info *page = NULL;
@@ -489,7 +489,7 @@ struct page_info *get_page_from_gfn_p2m(
 {
 /* Fast path: look up and get out */
 p2m_read_lock(p2m);
-mfn = __get_gfn_type_access(p2m, gfn, t, a, 0, NULL, 0);
+mfn = __get_gfn_type_access(p2m, gfn_x(gfn), t, a, 0, NULL, 0);
 if ( p2m_is_any_ram(*t) && mfn_valid(mfn)
  && !((q & P2M_UNSHARE) && p2m_is_shared(*t)) )
 {
@@ -497,11 +497,12 @@ struct page_info *get_page_from_gfn_p2m(
 if ( unlikely(p2m_is_foreign(*t)) )
 {
 struct domain *fdom = page_get_owner_and_reference(page);
-ASSERT(fdom != d);
+
+ASSERT(fdom != p2m->domain);
 if ( fdom == NULL )
 page = NULL;
 }
-else if ( !get_page(page, d) &&
+else if ( !get_page(page, p2m->domain) &&
   /* Page could be shared */
   (!p2m_is_shared(*t) || !get_page(page, dom_cow)) )
 page = NULL;
@@ -517,14 +518,14 @@ struct page_info *get_page_from_gfn_p2m(
 }
 
 /* Slow path: take the write lock and do fixups */
-mfn = get_gfn_type_access(p2m, gfn, t, a, q, NULL);
+mfn = get_gfn_type_access(p2m, gfn_x(gfn), t, a, q, NULL);
 if ( p2m_is_ram(*t) && mfn_valid(mfn) )
 {
 page = mfn_to_page(mfn);
-if ( !get_page(page, d) )
+if ( !get_page(page, p2m->domain) )
 page = NULL;
 }
-put_gfn(d, gfn);
+put_gfn(p2m->domain, gfn_x(gfn));
 
 return page;
 }
@@ -1900,7 +1901,7 @@ void *map_domain_gfn(struct p2m_domain *
 }
 
 /* Translate the gfn, unsharing if shared. */
-page = get_page_from_gfn_p2m(p2m->domain, p2m, gfn_x(gfn), p2mt, NULL, q);
+page = p2m_get_page_from_gfn(p2m, gfn, p2mt, NULL, q);
 if ( p2m_is_paging(*p2mt) )
 {
 ASSERT(p2m_is_hostp2m(p2m));
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -465,9 +465,7 @@ static inline mfn_t get_gfn_query_unlock
  * and should be used by any path that intends to write to the backing page.
  * Returns NULL if the page is not backed by RAM.
  * The caller is responsible for calling put_page() afterwards. */
-struct page_info *get_page_from_gfn_p2m(struct domain *d,
-struct p2m_domain *p2m,
-unsigned long gfn,
+struct page_info *p2m_get_page_from_gfn(struct p2m_domain *p2m, gfn_t gfn,
 p2m_type_t *t, p2m_access_t *a,
 p2m_query_t q);
 
@@ -477,7 +475,7 @@ static inline struct page_info *get_page
 struct page_info *page;
 
 if ( paging_mode_translate(d) )
-return get_page_from_gfn_p2m(d, p2m_get_hostp2m(d), gfn, t, NULL, q);
+return p2m_get_page_from_gfn(p2m_get_hostp2m(d), _gfn(gfn), t, NULL, 
q);
 
 /* Non-translated guests see 1-1 RAM / MMIO mappings everywhere */
 if ( t )


x86/mm: drop redundant domain parameter fr

[Xen-devel] [PATCH 0/2] x86/p2m: some code simplification

2017-06-21 Thread Jan Beulich

1: simplify p2m_next_level()
2: make p2m_alloc_ptp() return an MFN

Signed-off-by: Jan Beulich 


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 06/14 v4] xen/arm: vpl011: Add a new domctl API to initialize vpl011

2017-06-21 Thread Bhupinder Thakur

Hi Julien,

On 20 June 2017 at 16:46, Julien Grall  wrote:
> On 06/19/2017 02:11 PM, Bhupinder Thakur wrote:
>>
>> Hi Wei,
>
>
> Hi Bhupinder,
>
>
>> On 19 June 2017 at 17:17, Wei Liu  wrote:
>>>
>>> On Mon, Jun 19, 2017 at 12:01:32PM +0100, Julien Grall wrote:



 On 19/06/17 11:59, Bhupinder Thakur wrote:
>
> Hi Julien,
>
> I was mistaken in my earlier mail about vpl011 init working if it is
> moved to libxl__arch_domain_create(). It is failing because as you
> have mentioned vuart_pfn is allocated later in xc_dom_build_image().
>
> Can we delay mapping of this page in Xen until the ring buffer is
> actually required by the emulation code for reading/writing data. By
> that time, the page would have been physically mapped.


 You would not be able to report an error if you fail to map it. But this
 looks like to me a workaround for a tool problem.

 Anyway, as I said, I'd like feedback from the tools maintainers to see
 how
 we can proceed.

>>>
>>> Is there a summary of the problem, is there a particular email in this
>>> thread I should look at? Sorry I'm swamped by emails and patches at the
>>> moment.
>>
>>
>> I will summarize the problem.
>>
>> It was decided to call domain_vpl011_init() from inside
>> libxl__arch_domain_create() to initialize vpl011. However,
>> domain_vpl011_init() fails to map the the vuart GFN because it has not
>> been physically mapped yet by the toolstack.
>>
>> The following call flows highlights the issue.
>>
>> libxl__domain_build() ---> libxl__build_pv ---> libxl__build_dom()
>> > xc_dom_build_image() ---> alloc_magic_pages() > vuart GFN
>> allocated/mapped here
>>
>> libxl__domain_build() > libxl__build_pre()  >
>> libxl__arch_domain_create() > domain_vpl011_init() ---> this call
>> fails as the vuart GFN has not been physically mapped yet as shown in
>> the first call flow.
>>
>> However, libxl__build_pv() is called after libxl__build_pre(). It
>> means that the domain_vpl011_init() is called before
>> alloc_magic_pages() is called and hence the initialization fails.
>>
>> For that reason, I had introduced a new function
>> libxl__arch_domain_create_finish() which will be called from
>> libxl__build_post(). I moved the domain_vpl011_init() there. However,
>> Julien pointed out that vuart should be initialized early in
>> libxl__arch_domain_create() function.
>
>
> libxl__arch_domain_create could be a place or even
> libxl__arch_domain_finalise_hw_descriptions.
>
> My point is it looks a bit odd to create the vpl011 UART very late in the
> process as from the code you would expect all the hardware to be setup after
> libxl__arch_domain_finialise_hw_descriptions is called.
>

libxl__arch_domain_finalise_hw_descriptions() is called just before
xc_dom_build_image() and therefore the vuart gfn is still not
allocated. Maybe I can introduce a new arch specific
libxl__arch_domain_init_vpl011() function and call it from inside
libxl__build_dom() after call to xc_dom_build_image() so that the
vuart gfn is allocated.

Regards,
Bhupinder

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [qemu-mainline test] 110901: regressions - FAIL

2017-06-21 Thread osstest service owner

flight 110901 qemu-mainline real [real]
http://logs.test-lab.xenproject.org/osstest/logs/110901/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-armhf-armhf-xl-credit2  16 guest-start.2fail REGR. vs. 110478

Regressions which are regarded as allowable (not blocking):
 test-amd64-amd64-xl-rtds  9 debian-install   fail REGR. vs. 110478
 test-armhf-armhf-xl-rtds 16 guest-start.2fail REGR. vs. 110478

Tests which did not succeed, but are not blocking:
 test-armhf-armhf-libvirt-xsm 13 saverestore-support-checkfail  like 110478
 test-armhf-armhf-libvirt 13 saverestore-support-checkfail  like 110478
 test-amd64-amd64-xl-qemuu-win7-amd64 15 guest-localmigrate/x10 fail like 110478
 test-amd64-i386-xl-qemuu-win7-amd64 16 guest-stop fail like 110478
 test-armhf-armhf-libvirt-raw 12 saverestore-support-checkfail  like 110478
 test-amd64-i386-libvirt  12 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt-xsm  12 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-qemuu-ws16-amd64  9 windows-installfail never pass
 test-amd64-amd64-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 13 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 10 migrate-support-check 
fail never pass
 test-arm64-arm64-xl-credit2  12 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  13 saverestore-support-checkfail   never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 10 migrate-support-check 
fail never pass
 test-armhf-armhf-xl-arndale  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  13 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  12 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  13 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl  12 migrate-support-checkfail   never pass
 test-arm64-arm64-xl  13 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-vhd 11 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt 12 migrate-support-checkfail   never pass
 test-amd64-amd64-qemuu-nested-amd 16 debian-hvm-install/l1/l2  fail never pass
 test-armhf-armhf-xl-cubietruck 12 migrate-support-checkfail never pass
 test-armhf-armhf-xl-cubietruck 13 saverestore-support-checkfail never pass
 test-armhf-armhf-xl  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-xsm  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-xsm  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 12 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 13 saverestore-support-checkfail  never pass
 test-amd64-amd64-libvirt 12 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt-raw 11 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  11 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  12 saverestore-support-checkfail   never pass
 test-amd64-i386-xl-qemuu-win10-i386  9 windows-install fail never pass
 test-amd64-amd64-xl-qemuu-win10-i386  9 windows-installfail never pass
 test-amd64-i386-xl-qemuu-ws16-amd64  9 windows-install fail never pass

version targeted for testing:
 qemuu8dfaf23ae1f2273a9730a9b309cc8471269bb524
baseline version:
 qemuuedf8bc98424d62035d5e4c0f39542722d72d7979

Last test of basis   110478  2017-06-16 03:24:46 Z5 days
Failing since110587  2017-06-19 17:24:01 Z1 days2 attempts
Testing same since   110901  2017-06-21 00:08:56 Z0 days1 attempts


People who touched revisions under test:
  Eric Blake 
  Fam Zheng 
  Felipe Franciosi 
  Gerd Hoffmann 
  Ian McKellar 
  Ian McKellar via Qemu-devel 
  Jeff Cody 
  Jonathon Jongsma 
  Laszlo Ersek 
  Laurent Vivier 
  Laurent Vivier 
  Marc-AndrÃ© Lureau 
  Max Reitz 
  Michael S. Tsirkin 
  Paolo Bonzini 
  Peter Maydell 
  Peter Xu 
  Philippe Mathieu-DaudÃ© 
  Richard W.M. Jones 
  Roman Kagan 
  Stefan Hajnoczi 
  Thomas Huth 
  Vladimir

[Xen-devel] [PATCH 1/2] x86/p2m: simplify p2m_next_level()

2017-06-21 Thread Jan Beulich

Calculate entry PFN and flags just once, making the respective
variables (and also pg) function wide. Take the opportunity and also
make the induction variable unsigned.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -195,7 +195,9 @@ p2m_next_level(struct p2m_domain *p2m, v
 l1_pgentry_t *p2m_entry;
 l1_pgentry_t new_entry;
 void *next;
-int i;
+struct page_info *pg;
+unsigned int i, flags;
+unsigned long pfn;
 
 if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
   shift, max)) )
@@ -204,8 +206,6 @@ p2m_next_level(struct p2m_domain *p2m, v
 /* PoD/paging: Not present doesn't imply empty. */
 if ( !l1e_get_flags(*p2m_entry) )
 {
-struct page_info *pg;
-
 pg = p2m_alloc_ptp(p2m, type);
 if ( pg == NULL )
 return -ENOMEM;
@@ -232,21 +232,17 @@ p2m_next_level(struct p2m_domain *p2m, v
 }
 }
 
-ASSERT(l1e_get_flags(*p2m_entry) & (_PAGE_PRESENT|_PAGE_PSE));
+flags = l1e_get_flags(*p2m_entry);
+pfn = l1e_get_pfn(*p2m_entry);
+ASSERT(flags & (_PAGE_PRESENT|_PAGE_PSE));
 
 /* split 1GB pages into 2MB pages */
-if ( type == PGT_l2_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+if ( type == PGT_l2_page_table && (flags & _PAGE_PSE) )
 {
-unsigned long flags, pfn;
-struct page_info *pg;
-
 pg = p2m_alloc_ptp(p2m, PGT_l2_page_table);
 if ( pg == NULL )
 return -ENOMEM;
 
-flags = l1e_get_flags(*p2m_entry);
-pfn = l1e_get_pfn(*p2m_entry);
-
 l1_entry = __map_domain_page(pg);
 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
 {
@@ -263,19 +259,14 @@ p2m_next_level(struct p2m_domain *p2m, v
 
 
 /* split single 2MB large page into 4KB page in P2M table */
-if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+if ( type == PGT_l1_page_table && (flags & _PAGE_PSE) )
 {
-unsigned long flags, pfn;
-struct page_info *pg;
-
 pg = p2m_alloc_ptp(p2m, PGT_l1_page_table);
 if ( pg == NULL )
 return -ENOMEM;
 
 /* New splintered mappings inherit the flags of the old superpage, 
  * with a little reorganisation for the _PAGE_PSE_PAT bit. */
-flags = l1e_get_flags(*p2m_entry);
-pfn = l1e_get_pfn(*p2m_entry);
 if ( pfn & 1 )   /* ==> _PAGE_PSE_PAT was set */
 pfn -= 1;/* Clear it; _PAGE_PSE becomes _PAGE_PAT */
 else



x86/p2m-pt: simplify p2m_next_level()

Calculate entry PFN and flags just once, making the respective
variables (and also pg) function wide. Take the opportunity and also
make the induction variable unsigned.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -195,7 +195,9 @@ p2m_next_level(struct p2m_domain *p2m, v
 l1_pgentry_t *p2m_entry;
 l1_pgentry_t new_entry;
 void *next;
-int i;
+struct page_info *pg;
+unsigned int i, flags;
+unsigned long pfn;
 
 if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
   shift, max)) )
@@ -204,8 +206,6 @@ p2m_next_level(struct p2m_domain *p2m, v
 /* PoD/paging: Not present doesn't imply empty. */
 if ( !l1e_get_flags(*p2m_entry) )
 {
-struct page_info *pg;
-
 pg = p2m_alloc_ptp(p2m, type);
 if ( pg == NULL )
 return -ENOMEM;
@@ -232,21 +232,17 @@ p2m_next_level(struct p2m_domain *p2m, v
 }
 }
 
-ASSERT(l1e_get_flags(*p2m_entry) & (_PAGE_PRESENT|_PAGE_PSE));
+flags = l1e_get_flags(*p2m_entry);
+pfn = l1e_get_pfn(*p2m_entry);
+ASSERT(flags & (_PAGE_PRESENT|_PAGE_PSE));
 
 /* split 1GB pages into 2MB pages */
-if ( type == PGT_l2_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+if ( type == PGT_l2_page_table && (flags & _PAGE_PSE) )
 {
-unsigned long flags, pfn;
-struct page_info *pg;
-
 pg = p2m_alloc_ptp(p2m, PGT_l2_page_table);
 if ( pg == NULL )
 return -ENOMEM;
 
-flags = l1e_get_flags(*p2m_entry);
-pfn = l1e_get_pfn(*p2m_entry);
-
 l1_entry = __map_domain_page(pg);
 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
 {
@@ -263,19 +259,14 @@ p2m_next_level(struct p2m_domain *p2m, v
 
 
 /* split single 2MB large page into 4KB page in P2M table */
-if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+if ( type == PGT_l1_page_table && (flags & _PAGE_PSE) )
 {
-unsigned long flags, pfn;
-struct page_info *pg;
-
 pg = p2m_alloc_ptp(p2m, PGT_l1_page_table);
 if ( pg == NULL )
 return -ENOMEM;
 
 /* New splintered mappings inherit the flags of the old superpage, 
  * with a little reorganisation for the _PAGE_PSE_PAT bit. */
-

[Xen-devel] [PATCH 2/2] x86/p2m: make p2m_alloc_ptp() return an MFN

2017-06-21 Thread Jan Beulich

None of the callers really needs the struct page_info pointer.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -569,7 +569,7 @@ int p2m_set_entry(struct p2m_domain *p2m
 return rc;
 }
 
-struct page_info *p2m_alloc_ptp(struct p2m_domain *p2m, unsigned long type)
+mfn_t p2m_alloc_ptp(struct p2m_domain *p2m, unsigned long type)
 {
 struct page_info *pg;
 
@@ -577,13 +577,13 @@ struct page_info *p2m_alloc_ptp(struct p
 ASSERT(p2m->domain);
 ASSERT(p2m->domain->arch.paging.alloc_page);
 pg = p2m->domain->arch.paging.alloc_page(p2m->domain);
-if (pg == NULL)
-return NULL;
+if ( !pg )
+return INVALID_MFN;
 
 page_list_add_tail(pg, &p2m->pages);
 pg->u.inuse.type_info = type | 1 | PGT_validated;
 
-return pg;
+return page_to_mfn(pg);
 }
 
 void p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg)
@@ -609,7 +609,7 @@ void p2m_free_ptp(struct p2m_domain *p2m
  */
 int p2m_alloc_table(struct p2m_domain *p2m)
 {
-struct page_info *p2m_top;
+mfn_t top_mfn;
 struct domain *d = p2m->domain;
 int rc = 0;
 
@@ -632,14 +632,14 @@ int p2m_alloc_table(struct p2m_domain *p
 
 P2M_PRINTK("allocating p2m table\n");
 
-p2m_top = p2m_alloc_ptp(p2m, PGT_l4_page_table);
-if ( p2m_top == NULL )
+top_mfn = p2m_alloc_ptp(p2m, PGT_l4_page_table);
+if ( mfn_eq(top_mfn, INVALID_MFN) )
 {
 p2m_unlock(p2m);
 return -ENOMEM;
 }
 
-p2m->phys_table = pagetable_from_mfn(page_to_mfn(p2m_top));
+p2m->phys_table = pagetable_from_mfn(top_mfn);
 
 if ( hap_enabled(d) )
 iommu_share_p2m_table(d);
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -225,16 +225,16 @@ static void ept_p2m_type_to_flags(struct
 /* Fill in middle levels of ept table */
 static int ept_set_middle_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry)
 {
-struct page_info *pg;
+mfn_t mfn;
 ept_entry_t *table;
 unsigned int i;
 
-pg = p2m_alloc_ptp(p2m, 0);
-if ( pg == NULL )
+mfn = p2m_alloc_ptp(p2m, 0);
+if ( mfn_eq(mfn, INVALID_MFN) )
 return 0;
 
 ept_entry->epte = 0;
-ept_entry->mfn = page_to_mfn(pg);
+ept_entry->mfn = mfn_x(mfn);
 ept_entry->access = p2m->default_access;
 
 ept_entry->r = ept_entry->w = ept_entry->x = 1;
@@ -243,7 +243,7 @@ static int ept_set_middle_entry(struct p
 
 ept_entry->suppress_ve = 1;
 
-table = __map_domain_page(pg);
+table = map_domain_page(mfn);
 
 for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
 table[i].suppress_ve = 1;
--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -195,7 +195,7 @@ p2m_next_level(struct p2m_domain *p2m, v
 l1_pgentry_t *p2m_entry;
 l1_pgentry_t new_entry;
 void *next;
-struct page_info *pg;
+mfn_t mfn;
 unsigned int i, flags;
 unsigned long pfn;
 
@@ -206,12 +206,11 @@ p2m_next_level(struct p2m_domain *p2m, v
 /* PoD/paging: Not present doesn't imply empty. */
 if ( !l1e_get_flags(*p2m_entry) )
 {
-pg = p2m_alloc_ptp(p2m, type);
-if ( pg == NULL )
+mfn = p2m_alloc_ptp(p2m, type);
+if ( mfn_eq(mfn, INVALID_MFN) )
 return -ENOMEM;
 
-new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
- P2M_BASE_FLAGS | _PAGE_RW);
+new_entry = l1e_from_pfn(mfn_x(mfn), P2M_BASE_FLAGS | _PAGE_RW);
 
 switch ( type ) {
 case PGT_l3_page_table:
@@ -239,11 +238,11 @@ p2m_next_level(struct p2m_domain *p2m, v
 /* split 1GB pages into 2MB pages */
 if ( type == PGT_l2_page_table && (flags & _PAGE_PSE) )
 {
-pg = p2m_alloc_ptp(p2m, PGT_l2_page_table);
-if ( pg == NULL )
+mfn = p2m_alloc_ptp(p2m, PGT_l2_page_table);
+if ( mfn_eq(mfn, INVALID_MFN) )
 return -ENOMEM;
 
-l1_entry = __map_domain_page(pg);
+l1_entry = map_domain_page(mfn);
 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
 {
 new_entry = l1e_from_pfn(pfn | (i * L1_PAGETABLE_ENTRIES), flags);
@@ -251,8 +250,7 @@ p2m_next_level(struct p2m_domain *p2m, v
 p2m->write_p2m_entry(p2m, gfn, l1_entry + i, new_entry, 2);
 }
 unmap_domain_page(l1_entry);
-new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
- P2M_BASE_FLAGS | _PAGE_RW); /* disable PSE */
+new_entry = l1e_from_pfn(mfn_x(mfn), P2M_BASE_FLAGS | _PAGE_RW);
 p2m_add_iommu_flags(&new_entry, 2, IOMMUF_readable|IOMMUF_writable);
 p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, 3);
 }
@@ -261,8 +259,8 @@ p2m_next_level(struct p2m_domain *p2m, v
 /* split single 2MB large page into 4KB page in P2M table */
 if ( type == PGT_l1_page_table && (flags & _PAGE_PSE) )
 {
-pg = p2m_alloc_ptp(p2m, PGT_l1_page_table);
-if ( pg == NULL )
+mfn = p2m_alloc_ptp(p2m, PGT_l1_page_table);
+

Re: [Xen-devel] [PATCH] x86/mm: drop redundant domain parameter from get_page_from_gfn_p2m()

2017-06-21 Thread Andrew Cooper

On 21/06/17 11:12, Jan Beulich wrote:
> It can always be read from the passed p2m. Take the opportunity and
> also rename the function, making the "p2m" suffix a prefix, to match
> other p2m functions, and convert the "gfn" parameter's type.
>
> Signed-off-by: Jan Beulich 

Reviewed-by: Andrew Cooper 

One observation though.  Given its name, I'd expect this to be common
with ARM.  As it isn't, I suspect there is some API/infrastructure
de-duplication which could be worked on in due course.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] x86/mm: consolidate setting of TLB flush time stamps

2017-06-21 Thread Andrew Cooper

On 21/06/17 11:11, Jan Beulich wrote:
> Move code and comment into a helper function instead of repeating it in
> multiple places.
>
> Signed-off-by: Jan Beulich 

Reivewed-by: Andrew Cooper 

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] Notes on stubdoms and latency on ARM

2017-06-21 Thread Julien Grall




On 20/06/17 17:23, Volodymyr Babchuk wrote:

Hi Julien,


Hi Volodymyr,



On 20 June 2017 at 03:45, Julien Grall  wrote:

On 19 June 2017 at 10:54, Stefano Stabellini 
wrote:


But given the conversation so far, it seems likely that that is mainly
due to the fact that context switching on ARM has not been optimized.



True. However, Volodymyr took the time to demonstrate the performance of
EL0 apps vs. stubdoms with a PoC, which is much more than most Xen
contributors do. Nodoby provided numbers for a faster ARM context switch
yet. I don't know on whom should fall the burden of proving that a
lighter context switch can match the EL0 app numbers. I am not sure it
would be fair to ask Volodymyr to do it.


Thanks. Actually, we discussed this topic internally today. Main
concern today is not a SMCs and OP-TEE (I will be happy to do this
right in XEN), but vcopros and GPU virtualization. Because of legal
issues, we can't put this in XEN. And because of vcpu framework nature
we will need multiple calls to vgpu driver per one vcpu context
switch.
I'm going to create worst case scenario, where multiple vcpu are
active and there are no free pcpu, to see how credit or credit2
scheduler will call my stubdom.
Also, I'm very interested in Julien's idea about stubdom without GIC.
Probably, I'll try to hack something like that to see how it will
affect overall switching latency


This can only work if your stubdomain does not require interrupt. However,
if you are dealing with devices you likely need interrupts, am I correct?

Ah yes, you are correct. I thought about OP-TEE use case, when there
are no interrupts. In case of co-processor virtualization we probably
will need interrupts.


The problem would be the same with an EL0 app.

In case of EL0 there will be no problem, because EL0 can't handle
interrupts :) XEN should receive interrupt and invoke app. Yes, this
is another problem with apps, if we want to use them as devices
drivers.


Well, this is a bit more complex than that. When you receive an 
interrupt Xen may run a vCPU that will not use that app. So you have to 
ensure the time will not get accounted for it.


The more I read the discussion, the more I think we should look at 
optimizing the stubdom case. Xen EL0 should only be used for tiny 
emulation for a given domain. Otherwise you end up to re-invent the domain.


Cheers,

--
Julien Grall

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [Qemu-devel] [PATCH 1/3] xen-disk: only advertize feature-persistent if grant copy is not available

2017-06-21 Thread Paul Durrant

> -Original Message-
> From: Qemu-devel [mailto:qemu-devel-
> bounces+paul.durrant=citrix@nongnu.org] On Behalf Of Paul Durrant
> Sent: 21 June 2017 10:36
> To: Roger Pau Monne ; Stefano Stabellini
> 
> Cc: Kevin Wolf ; qemu-bl...@nongnu.org; qemu-
> de...@nongnu.org; Max Reitz ; Anthony Perard
> ; xen-de...@lists.xenproject.org
> Subject: Re: [Qemu-devel] [PATCH 1/3] xen-disk: only advertize feature-
> persistent if grant copy is not available
> 
> > -Original Message-
> > From: Roger Pau Monne
> > Sent: 21 June 2017 10:18
> > To: Stefano Stabellini 
> > Cc: Paul Durrant ; xen-
> de...@lists.xenproject.org;
> > qemu-de...@nongnu.org; qemu-bl...@nongnu.org; Anthony Perard
> > ; Kevin Wolf ; Max
> Reitz
> > 
> > Subject: Re: [PATCH 1/3] xen-disk: only advertize feature-persistent if
> grant
> > copy is not available
> >
> > On Tue, Jun 20, 2017 at 03:19:33PM -0700, Stefano Stabellini wrote:
> > > On Tue, 20 Jun 2017, Paul Durrant wrote:
> > > > If grant copy is available then it will always be used in preference to
> > > > persistent maps. In this case feature-persistent should not be
> advertized
> > > > to the frontend, otherwise it may needlessly copy data into persistently
> > > > granted buffers.
> > > >
> > > > Signed-off-by: Paul Durrant 
> > >
> > > CC'ing Roger.
> > >
> > > It is true that using feature-persistent together with grant copies is a
> > > a very bad idea.
> > >
> > > But this change enstablishes an explicit preference of
> > > feature_grant_copy over feature-persistent in the xen_disk backend. It
> > > is not obvious to me that it should be the case.
> > >
> > > Why is feature_grant_copy (without feature-persistent) better than
> > > feature-persistent (without feature_grant_copy)? Shouldn't we simply
> > > avoid grant copies to copy data to persistent grants?
> >
> > When using persistent grants the frontend must always copy data from
> > the buffer to the persistent grant, there's no way to avoid this.
> >
> > Using grant_copy we move the copy from the frontend to the backend,
> > which means the CPU time of the copy is accounted to the backend. This
> > is not ideal, but IMHO it's better than persistent grants because it
> > avoids keeping a pool of mapped grants that consume memory and make
> > the code more complex.
> >
> > Do you have some performance data showing the difference between
> > persistent grants vs grant copy?
> >
> 
> No, but I can get some :-)
> 
> For a little background... I've been trying to push throughput of fio running 
> in
> a debian stretch guest on my skull canyon NUC. When I started out, I was
> getting ~100MBbs. When I finished, with this patch, the IOThreads one, the
> multi-page ring one and a bit of hackery to turn off all the aio flushes that
> seem to occur even if the image is opened with O_DIRECT, I was getting
> ~960Mbps... which is about line rate for the SSD in the in NUC.
> 
> So, I'll force use of persistent grants on and see what sort of throughput I
> get.

A quick test with grant copy forced off (causing persistent grants to be 
used)... My VM is debian stretch using a 16 page shared ring from blkfront. The 
image backing xvdb is a fully inflated 10G qcow2.

root@dhcp-237-70:~# fio --randrepeat=1 --ioengine=libaio --direct=0 
--gtod_reduce=1 --name=test --filename=/dev/xvdb --bs=512k --iodepth=64 
--size=10G --readwrite=randwrite --ramp_time=4
test: (g=0): rw=randwrite, bs=512K-512K/512K-512K/512K-512K, ioengine=libaio, 
iodepth=64
fio-2.16
Starting 1 process
Jobs: 1 (f=1): [w(1)] [70.6% done] [0KB/539.4MB/0KB /s] [0/1078/0 iops] [eta 
00m:05s]
test: (groupid=0, jobs=1): err= 0: pid=633: Wed Jun 21 06:26:06 2017
  write: io=6146.6MB, bw=795905KB/s, iops=1546, runt=  7908msec
  cpu  : usr=2.07%, sys=34.00%, ctx=4490, majf=0, minf=1
  IO depths: 1=0.1%, 2=0.1%, 4=0.1%, 8=0.1%, 16=0.1%, 32=0.3%, >=64=166.9%
 submit: 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
 complete  : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.1%, >=64=0.0%
 issued: total=r=0/w=12230/d=0, short=r=0/w=0/d=0, drop=r=0/w=0/d=0
 latency   : target=0, window=0, percentile=100.00%, depth=64

Run status group 0 (all jobs):
  WRITE: io=6146.6MB, aggrb=795904KB/s, minb=795904KB/s, maxb=795904KB/s, 
mint=7908msec, maxt=7908msec

Disk stats (read/write):
  xvdb: ios=54/228860, merge=0/2230616, ticks=16/5403048, in_queue=5409068, 
util=98.26%

The dom0 cpu usage for the relevant IOThread was ~60%

The same test with grant copy...

root@dhcp-237-70:~# fio --randrepeat=1 --ioengine=libaio --direct=0 
--gtod_reduce=1 --name=test --filename=/dev/xvdb --bs=512k --iodepth=64 
--size=10G --readwrite=randwrite --ramp_time=4
test: (g=0): rw=randwrite, bs=512K-512K/512K-512K/512K-512K, ioengine=libaio, 
iodepth=64
fio-2.16
Starting 1 process
Jobs: 1 (f=1): [w(1)] [70.6% done] [0KB/607.7MB/0KB /s] [0/1215/0 iops] [eta 
00m:05s]
test: (groupid=0, jobs=1): err= 0: pid=483: Wed Jun 21 06:35:14 2017
  write: io=6232.0

Re: [Xen-devel] [PATCH 2/3] xen-disk: add support for multi-page shared rings

2017-06-21 Thread Paul Durrant

> -Original Message-
> From: Stefano Stabellini [mailto:sstabell...@kernel.org]
> Sent: 20 June 2017 23:51
> To: Paul Durrant 
> Cc: xen-de...@lists.xenproject.org; qemu-de...@nongnu.org; qemu-
> bl...@nongnu.org; Stefano Stabellini ; Anthony
> Perard ; Kevin Wolf ;
> Max Reitz 
> Subject: Re: [PATCH 2/3] xen-disk: add support for multi-page shared rings
> 
> On Tue, 20 Jun 2017, Paul Durrant wrote:
> > The blkif protocol has had provision for negotiation of multi-page shared
> > rings for some time now and many guest OS have support in their frontend
> > drivers.
> >
> > This patch makes the necessary modifications to xen-disk support a shared
> > ring up to order 4 (i.e. 16 pages).
> >
> > Signed-off-by: Paul Durrant 
> 
> Thanks for the patch!
> 

You're welcome.

> > ---
> > Cc: Stefano Stabellini 
> > Cc: Anthony Perard 
> > Cc: Kevin Wolf 
> > Cc: Max Reitz 
> > ---
> >  hw/block/xen_disk.c | 141
> 
> >  1 file changed, 110 insertions(+), 31 deletions(-)
> >
> > diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
> > index 9b06e3aa81..a9942d32db 100644
> > --- a/hw/block/xen_disk.c
> > +++ b/hw/block/xen_disk.c
> > @@ -36,8 +36,6 @@
> >
> >  static int batch_maps   = 0;
> >
> > -static int max_requests = 32;
> > -
> >  /* - */
> >
> >  #define BLOCK_SIZE  512
> > @@ -84,6 +82,8 @@ struct ioreq {
> >  BlockAcctCookie acct;
> >  };
> >
> > +#define MAX_RING_PAGE_ORDER 4
> > +
> >  struct XenBlkDev {
> >  struct XenDevicexendev;  /* must be first */
> >  char*params;
> > @@ -94,7 +94,8 @@ struct XenBlkDev {
> >  booldirectiosafe;
> >  const char  *fileproto;
> >  const char  *filename;
> > -int ring_ref;
> > +unsigned intring_ref[1 << MAX_RING_PAGE_ORDER];
> > +unsigned intnr_ring_ref;
> >  void*sring;
> >  int64_t file_blk;
> >  int64_t file_size;
> > @@ -110,6 +111,7 @@ struct XenBlkDev {
> >  int requests_total;
> >  int requests_inflight;
> >  int requests_finished;
> > +unsigned intmax_requests;
> >
> >  /* Persistent grants extension */
> >  gbooleanfeature_discard;
> > @@ -199,7 +201,7 @@ static struct ioreq *ioreq_start(struct XenBlkDev
> *blkdev)
> >  struct ioreq *ioreq = NULL;
> >
> >  if (QLIST_EMPTY(&blkdev->freelist)) {
> > -if (blkdev->requests_total >= max_requests) {
> > +if (blkdev->requests_total >= blkdev->max_requests) {
> >  goto out;
> >  }
> >  /* allocate new struct */
> > @@ -905,7 +907,7 @@ static void blk_handle_requests(struct XenBlkDev
> *blkdev)
> >  ioreq_runio_qemu_aio(ioreq);
> >  }
> >
> > -if (blkdev->more_work && blkdev->requests_inflight < max_requests) {
> > +if (blkdev->more_work && blkdev->requests_inflight < blkdev-
> >max_requests) {
> >  qemu_bh_schedule(blkdev->bh);
> >  }
> >  }
> > @@ -918,15 +920,6 @@ static void blk_bh(void *opaque)
> >  blk_handle_requests(blkdev);
> >  }
> >
> > -/*
> > - * We need to account for the grant allocations requiring contiguous
> > - * chunks; the worst case number would be
> > - * max_req * max_seg + (max_req - 1) * (max_seg - 1) + 1,
> > - * but in order to keep things simple just use
> > - * 2 * max_req * max_seg.
> > - */
> > -#define MAX_GRANTS(max_req, max_seg) (2 * (max_req) * (max_seg))
> > -
> >  static void blk_alloc(struct XenDevice *xendev)
> >  {
> >  struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev,
> xendev);
> > @@ -938,11 +931,6 @@ static void blk_alloc(struct XenDevice *xendev)
> >  if (xen_mode != XEN_EMULATE) {
> >  batch_maps = 1;
> >  }
> > -if (xengnttab_set_max_grants(xendev->gnttabdev,
> > -MAX_GRANTS(max_requests,
> BLKIF_MAX_SEGMENTS_PER_REQUEST)) < 0) {
> > -xen_pv_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n",
> > -  strerror(errno));
> > -}
> >  }
> >
> >  static void blk_parse_discard(struct XenBlkDev *blkdev)
> > @@ -1037,6 +1025,9 @@ static int blk_init(struct XenDevice *xendev)
> >!blkdev->feature_grant_copy);
> >  xenstore_write_be_int(&blkdev->xendev, "info", info);
> >
> > +xenstore_write_be_int(&blkdev->xendev, "max-ring-page-order",
> > +  MAX_RING_PAGE_ORDER);
> > +
> >  blk_parse_discard(blkdev);
> >
> >  g_free(directiosafe);
> > @@ -1058,12 +1049,25 @@ out_error:
> >  return -1;
> >  }
> >
> > +/*
> > + * We need to account for the grant allocations requiring contiguous
> > + * chunks; the worst case number would be
> > + * max_req * max_seg + (max_req - 1) * (max_seg - 1) + 1,
> > + * but in order to keep things simple just use
> > + *

[Xen-devel] [PATCH v2] VT-d: fix VF of RC integrated endpoint matched to wrong VT-d unit

2017-06-21 Thread Chao Gao

The problem is a VF of RC integrated PF (e.g. PF's BDF is 00:02.0),
we would wrongly use 00:00.0 to search VT-d unit.

To search VT-d unit for a VF, the BDF of the PF is used. And If the
PF is an Extended Function, the BDF of one traditional function is
used.  The following line (from acpi_find_matched_drhd_unit()):
devfn = PCI_SLOT(pdev->info.physfn.devfn) ? 0 : pdev->info.physfn.devfn;
sets 'devfn' to 0 if PF's devfn > 7. Apparently, it treats all
PFs which has devfn > 7 as extended function. However, it is wrong for
a RC integrated PF, which is not ARI-capable but may have devfn > 7.

This patch directly looks up the 'is_extfn' field of PF's struct pci_dev
to decide whether the PF is a extended function.

Reported-by: Crawford Eric R 
Signed-off-by: Chao Gao 
---
 xen/drivers/passthrough/vtd/dmar.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/xen/drivers/passthrough/vtd/dmar.c 
b/xen/drivers/passthrough/vtd/dmar.c
index 82040dd..3ba33c7 100644
--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -218,8 +218,18 @@ struct acpi_drhd_unit *acpi_find_matched_drhd_unit(const 
struct pci_dev *pdev)
 }
 else if ( pdev->info.is_virtfn )
 {
+struct pci_dev *physfn;
+
 bus = pdev->info.physfn.bus;
-devfn = PCI_SLOT(pdev->info.physfn.devfn) ? 0 : 
pdev->info.physfn.devfn;
+/*
+ * Use 0 as 'devfn' to search VT-d unit when the physical function
+ * is an Extended Function.
+ */
+pcidevs_lock();
+physfn = pci_get_pdev(pdev->seg, bus, pdev->info.physfn.devfn);
+pcidevs_unlock();
+ASSERT(physfn);
+devfn = physfn->info.is_extfn ? 0 : pdev->info.physfn.devfn;
 }
 else
 {
-- 
1.8.3.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v7 25/36] swiotlb: Add warnings for use of bounce buffers with SME

2017-06-21 Thread Borislav Petkov

On Fri, Jun 16, 2017 at 01:54:36PM -0500, Tom Lendacky wrote:
> Add warnings to let the user know when bounce buffers are being used for
> DMA when SME is active.  Since the bounce buffers are not in encrypted
> memory, these notifications are to allow the user to determine some
> appropriate action - if necessary.  Actions can range from utilizing an
> IOMMU, replacing the device with another device that can support 64-bit
> DMA, ignoring the message if the device isn't used much, etc.
> 
> Signed-off-by: Tom Lendacky 
> ---
>  include/linux/dma-mapping.h |   11 +++
>  include/linux/mem_encrypt.h |8 
>  lib/swiotlb.c   |3 +++
>  3 files changed, 22 insertions(+)
> 
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> index 4f3eece..ee2307e 100644
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -10,6 +10,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  /**
>   * List of possible attributes associated with a DMA mapping. The semantics
> @@ -577,6 +578,11 @@ static inline int dma_set_mask(struct device *dev, u64 
> mask)
>  
>   if (!dev->dma_mask || !dma_supported(dev, mask))
>   return -EIO;
> +
> + /* Since mask is unsigned, this can only be true if SME is active */
> + if (mask < sme_dma_mask())
> + dev_warn(dev, "SME is active, device will require DMA bounce 
> buffers\n");
> +
>   *dev->dma_mask = mask;
>   return 0;
>  }
> @@ -596,6 +602,11 @@ static inline int dma_set_coherent_mask(struct device 
> *dev, u64 mask)
>  {
>   if (!dma_supported(dev, mask))
>   return -EIO;
> +
> + /* Since mask is unsigned, this can only be true if SME is active */
> + if (mask < sme_dma_mask())
> + dev_warn(dev, "SME is active, device will require DMA bounce 
> buffers\n");

Looks to me like those two checks above need to be a:

void sme_check_mask(struct device *dev, u64 mask)
{
if (!sme_me_mask)
return;

/* Since mask is unsigned, this can only be true if SME is active */
if (mask < (((u64)sme_me_mask << 1) - 1))
dev_warn(dev, "SME is active, device will require DMA bounce 
buffers\n");
}

which gets called and sme_dma_mask() is not really needed.

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [Qemu-devel] [PATCH 1/3] xen-disk: only advertize feature-persistent if grant copy is not available

2017-06-21 Thread Roger Pau Monne

On Wed, Jun 21, 2017 at 11:40:00AM +0100, Paul Durrant wrote:
> > -Original Message-
> > From: Qemu-devel [mailto:qemu-devel-
> > bounces+paul.durrant=citrix@nongnu.org] On Behalf Of Paul Durrant
> > Sent: 21 June 2017 10:36
> > To: Roger Pau Monne ; Stefano Stabellini
> > 
> > Cc: Kevin Wolf ; qemu-bl...@nongnu.org; qemu-
> > de...@nongnu.org; Max Reitz ; Anthony Perard
> > ; xen-de...@lists.xenproject.org
> > Subject: Re: [Qemu-devel] [PATCH 1/3] xen-disk: only advertize feature-
> > persistent if grant copy is not available
> > 
> > > -Original Message-
> > > From: Roger Pau Monne
> > > Sent: 21 June 2017 10:18
> > > To: Stefano Stabellini 
> > > Cc: Paul Durrant ; xen-
> > de...@lists.xenproject.org;
> > > qemu-de...@nongnu.org; qemu-bl...@nongnu.org; Anthony Perard
> > > ; Kevin Wolf ; Max
> > Reitz
> > > 
> > > Subject: Re: [PATCH 1/3] xen-disk: only advertize feature-persistent if
> > grant
> > > copy is not available
> > >
> > > On Tue, Jun 20, 2017 at 03:19:33PM -0700, Stefano Stabellini wrote:
> > > > On Tue, 20 Jun 2017, Paul Durrant wrote:
> > > > > If grant copy is available then it will always be used in preference 
> > > > > to
> > > > > persistent maps. In this case feature-persistent should not be
> > advertized
> > > > > to the frontend, otherwise it may needlessly copy data into 
> > > > > persistently
> > > > > granted buffers.
> > > > >
> > > > > Signed-off-by: Paul Durrant 
> > > >
> > > > CC'ing Roger.
> > > >
> > > > It is true that using feature-persistent together with grant copies is a
> > > > a very bad idea.
> > > >
> > > > But this change enstablishes an explicit preference of
> > > > feature_grant_copy over feature-persistent in the xen_disk backend. It
> > > > is not obvious to me that it should be the case.
> > > >
> > > > Why is feature_grant_copy (without feature-persistent) better than
> > > > feature-persistent (without feature_grant_copy)? Shouldn't we simply
> > > > avoid grant copies to copy data to persistent grants?
> > >
> > > When using persistent grants the frontend must always copy data from
> > > the buffer to the persistent grant, there's no way to avoid this.
> > >
> > > Using grant_copy we move the copy from the frontend to the backend,
> > > which means the CPU time of the copy is accounted to the backend. This
> > > is not ideal, but IMHO it's better than persistent grants because it
> > > avoids keeping a pool of mapped grants that consume memory and make
> > > the code more complex.
> > >
> > > Do you have some performance data showing the difference between
> > > persistent grants vs grant copy?
> > >
> > 
> > No, but I can get some :-)
> > 
> > For a little background... I've been trying to push throughput of fio 
> > running in
> > a debian stretch guest on my skull canyon NUC. When I started out, I was
> > getting ~100MBbs. When I finished, with this patch, the IOThreads one, the
> > multi-page ring one and a bit of hackery to turn off all the aio flushes 
> > that
> > seem to occur even if the image is opened with O_DIRECT, I was getting
> > ~960Mbps... which is about line rate for the SSD in the in NUC.
> > 
> > So, I'll force use of persistent grants on and see what sort of throughput I
> > get.
> 
> A quick test with grant copy forced off (causing persistent grants to be 
> used)... My VM is debian stretch using a 16 page shared ring from blkfront. 
> The image backing xvdb is a fully inflated 10G qcow2.
> 
> root@dhcp-237-70:~# fio --randrepeat=1 --ioengine=libaio --direct=0 
> --gtod_reduce=1 --name=test --filename=/dev/xvdb --bs=512k --iodepth=64 
> --size=10G --readwrite=randwrite --ramp_time=4
> test: (g=0): rw=randwrite, bs=512K-512K/512K-512K/512K-512K, ioengine=libaio, 
> iodepth=64
> fio-2.16
> Starting 1 process
> Jobs: 1 (f=1): [w(1)] [70.6% done] [0KB/539.4MB/0KB /s] [0/1078/0 iops] [eta 
> 00m:05s]
> test: (groupid=0, jobs=1): err= 0: pid=633: Wed Jun 21 06:26:06 2017
>   write: io=6146.6MB, bw=795905KB/s, iops=1546, runt=  7908msec
>   cpu  : usr=2.07%, sys=34.00%, ctx=4490, majf=0, minf=1
>   IO depths: 1=0.1%, 2=0.1%, 4=0.1%, 8=0.1%, 16=0.1%, 32=0.3%, >=64=166.9%
>  submit: 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, 
> >=64=0.0%
>  complete  : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.1%, 
> >=64=0.0%
>  issued: total=r=0/w=12230/d=0, short=r=0/w=0/d=0, drop=r=0/w=0/d=0
>  latency   : target=0, window=0, percentile=100.00%, depth=64
> 
> Run status group 0 (all jobs):
>   WRITE: io=6146.6MB, aggrb=795904KB/s, minb=795904KB/s, maxb=795904KB/s, 
> mint=7908msec, maxt=7908msec
> 
> Disk stats (read/write):
>   xvdb: ios=54/228860, merge=0/2230616, ticks=16/5403048, in_queue=5409068, 
> util=98.26%
> 
> The dom0 cpu usage for the relevant IOThread was ~60%
> 
> The same test with grant copy...
> 
> root@dhcp-237-70:~# fio --randrepeat=1 --ioengine=libaio --direct=0 
> --gtod_reduce=1 --name=test --filename=/dev/xvdb --bs=512k --iodepth=64 
> --size=10G --readwri

Re: [Xen-devel] [PATCH] x86/mmuext: don't allow copying/clearing non-RAM pages

2017-06-21 Thread Andrew Cooper

On 21/06/17 11:10, Jan Beulich wrote:
> The two operations really aren't meant for anything else.
>
> Signed-off-by: Jan Beulich 

Reviewed-by: Andrew Cooper , however...

>
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -3229,6 +3229,7 @@ long do_mmuext_op(
>  switch ( op.cmd )
>  {
>  struct page_info *page;
> +p2m_type_t p2mt;
>  
>  case MMUEXT_PIN_L1_TABLE:
>  type = PGT_l1_page_table;
> @@ -3528,7 +3529,12 @@ long do_mmuext_op(
>  }
>  
>  case MMUEXT_CLEAR_PAGE:
> -page = get_page_from_gfn(pg_owner, op.arg1.mfn, NULL, P2M_ALLOC);
> +page = get_page_from_gfn(pg_owner, op.arg1.mfn, &p2mt, 
> P2M_ALLOC);
> +if ( unlikely(p2mt != p2m_ram_rw) && page )

... it would seem more natural to have the null pointer check before the
p2mt check.

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [Qemu-devel] [PATCH 1/3] xen-disk: only advertize feature-persistent if grant copy is not available

2017-06-21 Thread Paul Durrant

> -Original Message-
> From: Roger Pau Monne
> Sent: 21 June 2017 11:51
> To: Paul Durrant 
> Cc: Stefano Stabellini ; Kevin Wolf
> ; qemu-bl...@nongnu.org; qemu-de...@nongnu.org;
> Max Reitz ; Anthony Perard
> ; xen-de...@lists.xenproject.org
> Subject: Re: [Qemu-devel] [PATCH 1/3] xen-disk: only advertize feature-
> persistent if grant copy is not available
> 
> On Wed, Jun 21, 2017 at 11:40:00AM +0100, Paul Durrant wrote:
> > > -Original Message-
> > > From: Qemu-devel [mailto:qemu-devel-
> > > bounces+paul.durrant=citrix@nongnu.org] On Behalf Of Paul
> Durrant
> > > Sent: 21 June 2017 10:36
> > > To: Roger Pau Monne ; Stefano Stabellini
> > > 
> > > Cc: Kevin Wolf ; qemu-bl...@nongnu.org; qemu-
> > > de...@nongnu.org; Max Reitz ; Anthony Perard
> > > ; xen-de...@lists.xenproject.org
> > > Subject: Re: [Qemu-devel] [PATCH 1/3] xen-disk: only advertize feature-
> > > persistent if grant copy is not available
> > >
> > > > -Original Message-
> > > > From: Roger Pau Monne
> > > > Sent: 21 June 2017 10:18
> > > > To: Stefano Stabellini 
> > > > Cc: Paul Durrant ; xen-
> > > de...@lists.xenproject.org;
> > > > qemu-de...@nongnu.org; qemu-bl...@nongnu.org; Anthony Perard
> > > > ; Kevin Wolf ; Max
> > > Reitz
> > > > 
> > > > Subject: Re: [PATCH 1/3] xen-disk: only advertize feature-persistent if
> > > grant
> > > > copy is not available
> > > >
> > > > On Tue, Jun 20, 2017 at 03:19:33PM -0700, Stefano Stabellini wrote:
> > > > > On Tue, 20 Jun 2017, Paul Durrant wrote:
> > > > > > If grant copy is available then it will always be used in 
> > > > > > preference to
> > > > > > persistent maps. In this case feature-persistent should not be
> > > advertized
> > > > > > to the frontend, otherwise it may needlessly copy data into
> persistently
> > > > > > granted buffers.
> > > > > >
> > > > > > Signed-off-by: Paul Durrant 
> > > > >
> > > > > CC'ing Roger.
> > > > >
> > > > > It is true that using feature-persistent together with grant copies 
> > > > > is a
> > > > > a very bad idea.
> > > > >
> > > > > But this change enstablishes an explicit preference of
> > > > > feature_grant_copy over feature-persistent in the xen_disk backend.
> It
> > > > > is not obvious to me that it should be the case.
> > > > >
> > > > > Why is feature_grant_copy (without feature-persistent) better than
> > > > > feature-persistent (without feature_grant_copy)? Shouldn't we
> simply
> > > > > avoid grant copies to copy data to persistent grants?
> > > >
> > > > When using persistent grants the frontend must always copy data from
> > > > the buffer to the persistent grant, there's no way to avoid this.
> > > >
> > > > Using grant_copy we move the copy from the frontend to the backend,
> > > > which means the CPU time of the copy is accounted to the backend.
> This
> > > > is not ideal, but IMHO it's better than persistent grants because it
> > > > avoids keeping a pool of mapped grants that consume memory and
> make
> > > > the code more complex.
> > > >
> > > > Do you have some performance data showing the difference between
> > > > persistent grants vs grant copy?
> > > >
> > >
> > > No, but I can get some :-)
> > >
> > > For a little background... I've been trying to push throughput of fio
> running in
> > > a debian stretch guest on my skull canyon NUC. When I started out, I was
> > > getting ~100MBbs. When I finished, with this patch, the IOThreads one,
> the
> > > multi-page ring one and a bit of hackery to turn off all the aio flushes 
> > > that
> > > seem to occur even if the image is opened with O_DIRECT, I was getting
> > > ~960Mbps... which is about line rate for the SSD in the in NUC.
> > >
> > > So, I'll force use of persistent grants on and see what sort of 
> > > throughput I
> > > get.
> >
> > A quick test with grant copy forced off (causing persistent grants to be
> used)... My VM is debian stretch using a 16 page shared ring from blkfront.
> The image backing xvdb is a fully inflated 10G qcow2.
> >
> > root@dhcp-237-70:~# fio --randrepeat=1 --ioengine=libaio --direct=0 --
> gtod_reduce=1 --name=test --filename=/dev/xvdb --bs=512k --iodepth=64 -
> -size=10G --readwrite=randwrite --ramp_time=4
> > test: (g=0): rw=randwrite, bs=512K-512K/512K-512K/512K-512K,
> ioengine=libaio, iodepth=64
> > fio-2.16
> > Starting 1 process
> > Jobs: 1 (f=1): [w(1)] [70.6% done] [0KB/539.4MB/0KB /s] [0/1078/0 iops]
> [eta 00m:05s]
> > test: (groupid=0, jobs=1): err= 0: pid=633: Wed Jun 21 06:26:06 2017
> >   write: io=6146.6MB, bw=795905KB/s, iops=1546, runt=  7908msec
> >   cpu  : usr=2.07%, sys=34.00%, ctx=4490, majf=0, minf=1
> >   IO depths: 1=0.1%, 2=0.1%, 4=0.1%, 8=0.1%, 16=0.1%, 32=0.3%,
> >=64=166.9%
> >  submit: 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%,
> >=64=0.0%
> >  complete  : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.1%,
> >=64=0.0%
> >  issued: total=r=0/w=12230/d=0, short=r=0/w=0/d=0,
> drop=r=0/w=0/d=0
> >  latency   : target=0, window=

Re: [Xen-devel] [PATCH v3 3/9] xen/mm: move modify_identity_mmio to global file and drop __init

2017-06-21 Thread Roger Pau Monne

On Fri, May 19, 2017 at 07:35:39AM -0600, Jan Beulich wrote:
> >>> On 27.04.17 at 16:35,  wrote:
> > And also allow it to do non-identity mappings by adding a new parameter. 
> > This
> > function will be needed in other parts apart from PVH Dom0 build. While 
> > there
> > fix the function to use gfn_t and mfn_t instead of unsigned long for memory
> > addresses.
> 
> I'm afraid both title and description don't (or no longer) properly reflect
> what the patch does. I'm also afraid the reason the new parameter as
> well as the placement in common/memory.c aren't sufficiently explained.
> For example, what use is the function going to be without
> CONFIG_HAS_PCI?

It will still be needed in order to map the low 1MB for a PVH Dom0,
but anyway, see below.

> > --- a/xen/arch/x86/hvm/dom0_build.c
> > +++ b/xen/arch/x86/hvm/dom0_build.c
> > @@ -64,27 +64,7 @@ static struct acpi_madt_nmi_source __initdata *nmisrc;
> >  static int __init modify_identity_mmio(struct domain *d, unsigned long pfn,
> > unsigned long nr_pages, const bool 
> > map)
> >  {
> > -int rc;
> > -
> > -for ( ; ; )
> > -{
> > -rc = (map ? map_mmio_regions : unmap_mmio_regions)
> > - (d, _gfn(pfn), nr_pages, _mfn(pfn));
> > -if ( rc == 0 )
> > -break;
> > -if ( rc < 0 )
> > -{
> > -printk(XENLOG_WARNING
> > -   "Failed to identity %smap [%#lx,%#lx) for d%d: %d\n",
> > -   map ? "" : "un", pfn, pfn + nr_pages, d->domain_id, rc);
> > -break;
> > -}
> > -nr_pages -= rc;
> > -pfn += rc;
> > -process_pending_softirqs();
> > -}
> > -
> > -return rc;
> > +return modify_mmio(d, _gfn(pfn), _mfn(pfn), nr_pages, map);
> >  }
> 
> I don't see the value of retaining this wrapper.
> 
> > --- a/xen/common/memory.c
> > +++ b/xen/common/memory.c
> > @@ -1438,6 +1438,42 @@ int prepare_ring_for_helper(
> >  return 0;
> >  }
> >  
> > +int modify_mmio(struct domain *d, gfn_t gfn, mfn_t mfn, unsigned long 
> > nr_pages,
> > +const bool map)
> > +{
> > +int rc;
> > +
> > +/*
> > + * Make sure this function is only used by the hardware domain, 
> > because it
> > + * can take an arbitrary long time, and could DoS the whole system.
> > + */
> > +ASSERT(is_hardware_domain(d));
> 
> If that can happen arbitrarily at run time (rather than just at boot,
> as suggested by the removal of __init), it definitely can't remain as
> is and will instead need to make use of continuations. I'm therefore
> unconvinced you really want to move this code instead of simply
> calling {,un}map_mmio_regions() while taking care of preemption
> needs.

I'm not sure I know how to use continuations with non-hypercall
vmexits. Do you have any recommendations about how to do this? pause
the domain and run the mmio changes inside of a tasklet?

Roger.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH RFC] Implement hypercall for tracing of program counters

2017-06-21 Thread Wei Liu

On Wed, Jun 21, 2017 at 09:20:20AM +0200, Felix Schmoll wrote:
> This commit makes the changes to the hypervisor, the build system as
> well as libxc necessary in order to facilitate tracing of program counters.
> 
> A discussion of the design can be found in the mailing list:
> https://lists.xen.org/archives/html/xen-devel/2017-05/threads.html#02210
> 
> The list of files to be included for tracing might still be too extensive,
> resulting in indeterministic tracing output for some use cases. It is also
> not clear what other causes of indeterminism there might be.

Now you've got everything you need, you can check the IP against known
binary to gradually remove the indeterminism.

I've only skim-read your patch. It looks to be in line with what we
discussed before.

The only blocker to get this merge (not this exact patch, but the
approach in general), as I see it, is the build system.  But then I'm
not really comfortable to ask you to work on that. So if you want to
move on to the next step, that would be fine by me.

> 
> Signed-off-by: Felix Schmoll 
> ---
>  tools/libxc/include/xenctrl.h |  2 ++
>  tools/libxc/xc_private.c  | 22 +
>  tools/libxc/xc_private.h  |  8 +
>  xen/Kconfig   |  4 +++
>  xen/Rules.mk  |  4 +++
>  xen/arch/arm/traps.c  |  1 +
>  xen/arch/x86/hvm/hypercall.c  |  1 +
>  xen/arch/x86/hypercall.c  |  1 +
>  xen/arch/x86/pv/hypercall.c   |  1 +
>  xen/common/Makefile   | 13 
>  xen/common/edge_trace.c   | 77 
> +++
>  xen/common/edge_tracer.c  | 25 ++
>  xen/include/public/xen.h  |  1 +
>  xen/include/xen/edge_trace.h  | 19 +++
>  xen/include/xen/hypercall.h   |  7 
>  xen/include/xen/sched.h   |  6 
>  16 files changed, 192 insertions(+)
>  create mode 100644 xen/common/edge_trace.c
>  create mode 100644 xen/common/edge_tracer.c
>  create mode 100644 xen/include/xen/edge_trace.h
> 
> diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
> index 8c26cb4141..75e03337f9 100644
> --- a/tools/libxc/include/xenctrl.h
> +++ b/tools/libxc/include/xenctrl.h
> @@ -1572,6 +1572,8 @@ int xc_domctl(xc_interface *xch, struct xen_domctl 
> *domctl);
>  int xc_sysctl(xc_interface *xch, struct xen_sysctl *sysctl);
>  
>  int xc_version(xc_interface *xch, int cmd, void *arg);
> +int xc_edge_trace(xc_interface *xch, domid_t dom_id, int mode,
> +unsigned int size, uint64_t *buf);

Coding style. Please align the second line with "xc_interface ...".

The same comment applies to all other similar places in this patch.

>  
>  int xc_flask_op(xc_interface *xch, xen_flask_op_t *op);
>  
> diff --git a/tools/libxc/xc_private.c b/tools/libxc/xc_private.c
> index f395594a8f..97663f219b 100644
> --- a/tools/libxc/xc_private.c
> +++ b/tools/libxc/xc_private.c
> @@ -530,6 +530,28 @@ int xc_version(xc_interface *xch, int cmd, void *arg)
>  return rc;
>  }
>  
> +int xc_edge_trace(xc_interface *xch,
> +domid_t dom_id, int mode, unsigned int size, uint64_t* arg)

uint64_t *arg (note the position of '*')

> +{
> +int rc;
> +
> +DECLARE_HYPERCALL_BOUNCE(arg, size * sizeof(uint64_t),
> +XC_HYPERCALL_BUFFER_BOUNCE_OUT);
> +
> +if ( xc_hypercall_bounce_pre(xch, arg) )
> +{
> +PERROR("Could not bounce buffer for edge_trace hypercall");
> +return -ENOMEM;
> +}
> +
> +rc = do_edge_trace(xch, dom_id, mode, size, HYPERCALL_BUFFER(arg));
> +
> +xc_hypercall_bounce_post(xch, arg);
> +
> +return rc;
> +}
> +
> +
>  unsigned long xc_make_page_below_4G(
>  xc_interface *xch, uint32_t domid, unsigned long mfn)
>  {
> diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h
> index 1c27b0fded..60b0d8ebe3 100644
> --- a/tools/libxc/xc_private.h
> +++ b/tools/libxc/xc_private.h
> @@ -229,6 +229,14 @@ static inline int do_xen_version(xc_interface *xch, int 
> cmd, xc_hypercall_buffer
>  cmd, HYPERCALL_BUFFER_AS_ARG(dest));
>  }
>  
> +static inline int do_edge_trace(xc_interface *xch, domid_t dom_id, int mode,
> +unsigned int size, xc_hypercall_buffer_t 
> *buf)
> +{
> +DECLARE_HYPERCALL_BUFFER_ARGUMENT(buf);
> +return xencall4(xch->xcall, __HYPERVISOR_edge_trace, dom_id, mode,
> +size, HYPERCALL_BUFFER_AS_ARG(buf));
> +}
> +

This function can be folded into xc_edge_trace.

[...]
> diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
> index c07999b518..a4d36517f9 100644
> --- a/xen/arch/arm/traps.c
> +++ b/xen/arch/arm/traps.c
> @@ -1419,6 +1419,7 @@ static arm_hypercall_t arm_hypercall_table[] = {
>  HYPERCALL(platform_op, 1),
>  HYPERCALL_ARM(vcpu_op, 3),
>  HYPERCALL(vm_assist, 2),
> +HYPERCALL(edge_trace, 4),
>  };
>  
>  #ifndef NDEBUG
> diff --git a/xen/arch/x86/hvm/hypercall.c b/xen/arch/x86/hvm/hypercall.c
> index e7238ce293..fed8363d8a 100644
> --- a/xen/arch/x86/hvm/hyperc

[Xen-devel] [PATCH 00/17] x86: emulator enhancements

2017-06-21 Thread Jan Beulich

01: support remaining AVX insns
02: re-order cases of main switch statement
03: build SIMD tests with -Os
04: support F16C insns
05: support FMA4 insns
06: support FMA insns
07: support most remaining AVX2 insns
08: fold/eliminate some local variables
09: support AVX2 gather insns
10: add tables for XOP 08 and 09 extension spaces
11: support XOP insns
12: support 3DNow! insns
13: re-order checks in test harness
14: abstract out XCRn accesses
15: adjust_bnd() should check XCR0
16: make all FPU emulation use the stub
17: eliminate custom #MF/#XM handling

Signed-off-by: Jan Beulich 


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 01/11] public: adjust documentation following XSA-217

2017-06-21 Thread Andrew Cooper

On 21/06/17 10:30, Jan Beulich wrote:
> Signed-off-by: Jan Beulich 

Reviewed-by: Andrew Cooper 

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 02/11] gnttab: remove redundant xenheap check from gnttab_transfer()

2017-06-21 Thread Andrew Cooper

On 21/06/17 10:31, Jan Beulich wrote:
> The message isn't very useful, and the check is being done by
> steal_page() anyway.
>
> Signed-off-by: Jan Beulich 

Reviewed-by: Andrew Cooper 

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [xen-4.6-testing test] 110899: regressions - FAIL

2017-06-21 Thread osstest service owner

flight 110899 xen-4.6-testing real [real]
http://logs.test-lab.xenproject.org/osstest/logs/110899/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-xtf-amd64-amd64-3 45 xtf/test-hvm64-lbr-tsx-vmentry fail REGR. vs. 110386
 test-armhf-armhf-xl-xsm 15 guest-start/debian.repeat fail REGR. vs. 110386

Regressions which are regarded as allowable (not blocking):
 test-armhf-armhf-xl-rtds 11 guest-start  fail REGR. vs. 110386

Tests which did not succeed, but are not blocking:
 test-armhf-armhf-libvirt 13 saverestore-support-checkfail  like 110386
 test-armhf-armhf-libvirt-xsm 13 saverestore-support-checkfail  like 110386
 test-amd64-amd64-xl-qemut-win7-amd64 16 guest-stopfail like 110386
 test-amd64-amd64-xl-qemuu-win7-amd64 16 guest-stopfail like 110386
 test-armhf-armhf-libvirt-raw 12 saverestore-support-checkfail  like 110386
 test-amd64-i386-xl-qemuu-win7-amd64 16 guest-stop fail like 110386
 test-amd64-i386-xl-qemut-win7-amd64 16 guest-stop fail like 110386
 test-xtf-amd64-amd64-3   65 xtf/test-pv32pae-xsa-194 fail   never pass
 test-xtf-amd64-amd64-2   65 xtf/test-pv32pae-xsa-194 fail   never pass
 test-amd64-amd64-xl-pvh-intel 11 guest-start  fail  never pass
 test-xtf-amd64-amd64-5   65 xtf/test-pv32pae-xsa-194 fail   never pass
 test-amd64-i386-libvirt-xsm  12 migrate-support-checkfail   never pass
 test-xtf-amd64-amd64-4   65 xtf/test-pv32pae-xsa-194 fail   never pass
 test-amd64-amd64-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-qemuu-ws16-amd64  9 windows-installfail never pass
 test-amd64-amd64-libvirt 12 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-qemut-ws16-amd64  9 windows-installfail never pass
 test-xtf-amd64-amd64-1   65 xtf/test-pv32pae-xsa-194 fail   never pass
 test-amd64-i386-libvirt  12 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-pvh-amd  11 guest-start  fail   never pass
 test-armhf-armhf-xl-arndale  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  13 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 10 migrate-support-check 
fail never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 10 migrate-support-check 
fail never pass
 test-amd64-amd64-qemuu-nested-amd 16 debian-hvm-install/l1/l2  fail never pass
 test-amd64-amd64-libvirt-vhd 11 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-cubietruck 12 migrate-support-checkfail never pass
 test-armhf-armhf-xl-cubietruck 13 saverestore-support-checkfail never pass
 test-armhf-armhf-xl-xsm  12 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-xsm  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 12 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 13 saverestore-support-checkfail  never pass
 test-armhf-armhf-xl-credit2  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-raw 11 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  11 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  12 saverestore-support-checkfail   never pass
 test-amd64-i386-xl-qemuu-win10-i386  9 windows-install fail never pass
 test-amd64-i386-xl-qemut-win10-i386  9 windows-install fail never pass
 test-amd64-amd64-xl-qemuu-win10-i386  9 windows-installfail never pass
 test-amd64-amd64-xl-qemut-win10-i386  9 windows-installfail never pass
 test-amd64-i386-xl-qemut-ws16-amd64  9 windows-install fail never pass
 test-amd64-i386-xl-qemuu-ws16-amd64  9 windows-install fail never pass

version targeted for testing:
 xen  4efd74db51d75f22b9c65e5f1dbe5332854ffa0a
baseline version:
 xen  2893fce1b7a748fd13b0fb8cbed9e8f7b62ef07b

Last test of basis   110386  2017-06-12 22:18:54 Z8 days
Testing same since   110899  2017-06-21 00:10:20 Z0 days1 attempts


People who touched revisions under test:
  Andrew Cooper 
  George Dunlap 
  Jan Beulich 
  Julien Grall 
  Quan Xu 

jobs:
 build-amd64-xsm  pass
 build-armhf-xsm  pass
 build-i386-xsm

[Xen-devel] [xen-unstable-coverity test] 110924: all pass - PUSHED

2017-06-21 Thread osstest service owner

flight 110924 xen-unstable-coverity real [real]
http://logs.test-lab.xenproject.org/osstest/logs/110924/

Perfect :-)
All tests in this flight passed as required
version targeted for testing:
 xen  d8f1b48fd665d7aad1711de2f073540d07d2d041
baseline version:
 xen  7251b0d2b28552bf8d7287af9dc2504b4a43278b

Last test of basis   110543  2017-06-18 09:20:09 Z3 days
Testing same since   110924  2017-06-21 09:54:11 Z0 days1 attempts


People who touched revisions under test:
  Andrew Cooper 
  Andrew Morton 
  Artem Bityutskiy 
  David Woodhouse 
  George Dunlap 
  Ian Jackson 
  Jan Beulich 
  Julien Grall 
  Konrad Rzeszutek Wilk 
  Linus Torvalds 
  Peter Zijlstra 
  Petre Pircalabu 
  Praveen Kumar 
  Roger Pau MonnÃ© 
  Ross Lagerwall 
  Tamas K Lengyel 
  Tim Deegan 
  Wei Liu 
  Wolfram Strepp 
  Zhongze Liu 

jobs:
 coverity-amd64   pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Pushing revision :

+ branch=xen-unstable-coverity
+ revision=d8f1b48fd665d7aad1711de2f073540d07d2d041
+ . ./cri-lock-repos
++ . ./cri-common
+++ . ./cri-getconfig
+++ umask 002
+++ getrepos
 getconfig Repos
 perl -e '
use Osstest;
readglobalconfig();
print $c{"Repos"} or die $!;
'
+++ local repos=/home/osstest/repos
+++ '[' -z /home/osstest/repos ']'
+++ '[' '!' -d /home/osstest/repos ']'
+++ echo /home/osstest/repos
++ repos=/home/osstest/repos
++ repos_lock=/home/osstest/repos/lock
++ '[' x '!=' x/home/osstest/repos/lock ']'
++ OSSTEST_REPOS_LOCK_LOCKED=/home/osstest/repos/lock
++ exec with-lock-ex -w /home/osstest/repos/lock ./ap-push 
xen-unstable-coverity d8f1b48fd665d7aad1711de2f073540d07d2d041
+ branch=xen-unstable-coverity
+ revision=d8f1b48fd665d7aad1711de2f073540d07d2d041
+ . ./cri-lock-repos
++ . ./cri-common
+++ . ./cri-getconfig
+++ umask 002
+++ getrepos
 getconfig Repos
 perl -e '
use Osstest;
readglobalconfig();
print $c{"Repos"} or die $!;
'
+++ local repos=/home/osstest/repos
+++ '[' -z /home/osstest/repos ']'
+++ '[' '!' -d /home/osstest/repos ']'
+++ echo /home/osstest/repos
++ repos=/home/osstest/repos
++ repos_lock=/home/osstest/repos/lock
++ '[' x/home/osstest/repos/lock '!=' x/home/osstest/repos/lock ']'
+ . ./cri-common
++ . ./cri-getconfig
++ umask 002
+ select_xenbranch
+ case "$branch" in
+ tree=xen
+ xenbranch=xen-unstable-coverity
+ qemuubranch=qemu-upstream-unstable-coverity
+ qemuubranch=qemu-upstream-unstable
+ '[' xxen = xlinux ']'
+ linuxbranch=
+ '[' xqemu-upstream-unstable = x ']'
+ select_prevxenbranch
++ ./cri-getprevxenbranch xen-unstable-coverity
+ prevxenbranch=xen-4.9-testing
+ '[' xd8f1b48fd665d7aad1711de2f073540d07d2d041 = x ']'
+ : tested/2.6.39.x
+ . ./ap-common
++ : osst...@xenbits.xen.org
+++ getconfig OsstestUpstream
+++ perl -e '
use Osstest;
readglobalconfig();
print $c{"OsstestUpstream"} or die $!;
'
++ :
++ : git://xenbits.xen.org/xen.git
++ : osst...@xenbits.xen.org:/home/xen/git/xen.git
++ : git://xenbits.xen.org/qemu-xen-traditional.git
++ : git://git.kernel.org
++ : git://git.kernel.org/pub/scm/linux/kernel/git
++ : git
++ : git://xenbits.xen.org/xtf.git
++ : osst...@xenbits.xen.org:/home/xen/git/xtf.git
++ : git://xenbits.xen.org/xtf.git
++ : git://xenbits.xen.org/libvirt.git
++ : osst...@xenbits.xen.org:/home/xen/git/libvirt.git
++ : git://xenbits.xen.org/libvirt.git
++ : git://xenbits.xen.org/osstest/rumprun.git
++ : git
++ : git://xenbits.xen.org/osstest/rumprun.git
++ : osst...@xenbits.xen.org:/home/xen/git/osstest/rumprun.git
++ : git://git.seabios.org/seabios.git
++ : osst...@xenbits.xen.org:/home/xen/git/osstest/seabios.git
++ : git://xenbits.xen.org/osstest/seabios.git
++ : https://github.com/tianocore/edk2.git
++ : osst...@xenbits.xen.org:/home/xen/git/osstest/ovmf.git
++ : git://xenbits.xen.org/osstest/ovmf.git
++ : git://xenbits.xen.org/osstest/linux-firmware.git
++ : osst...@xenbits.xen.org:/home/osstest/ext/linux-firmware.git
++ : git://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git
++ : osst...@xenbits.xen.org:/home/xen/git/linux-pvops.git
++ : git://xenbits.xen.org/linux-pvops.git
++ : tested/linux-3.14
++ : tested/linux-arm-xen
++ '[' xgit://xenbits.xen.org/linux-pvops.git = x ']'
++ '

Re: [Xen-devel] [PATCH 03/11] make steal_page() return a proper error value

2017-06-21 Thread Andrew Cooper

On 21/06/17 10:32, Jan Beulich wrote:
> ... and use it where suitable (the tmem caller doesn't propagate an
> error code). While it doesn't matter as much, also make donate_page()
> follow suit on x86 (on ARM it already returns -ENOSYS).
>
> Also move their declarations to common code and add __must_check.
>
> Signed-off-by: Jan Beulich 

Reviewed-by: Andrew Cooper 

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 04/11] domctl: restrict DOMCTL_set_target to HVM domains

2017-06-21 Thread Andrew Cooper

On 21/06/17 10:33, Jan Beulich wrote:
> Both the XSA-217 fix and
> lists.xenproject.org/archives/html/xen-devel/2017-04/msg02945.html
> make this assumption, so let's enforce it.
>
> Signed-off-by: Jan Beulich 

Reviewed-by: Andrew Cooper , although...

>
> --- a/xen/common/domctl.c
> +++ b/xen/common/domctl.c
> @@ -1071,7 +1071,9 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
>  break;
>  }
>  
> -ret = xsm_set_target(XSM_HOOK, d, e);
> +ret = -EOPNOTSUPP;
> +if ( is_hvm_domain(e) )
> +ret = xsm_set_target(XSM_HOOK, d, e);
>  if ( ret ) {

... do you mind fixing this style while you are here?

~Andrew

>  put_domain(e);
>  break;
>
>
>


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 05/11] evtchn: convert evtchn_port_is_*() to plain bool

2017-06-21 Thread Andrew Cooper

On 21/06/17 10:34, Jan Beulich wrote:
> ... at once reducing overall source size by combining some statements
> and constifying a few pointers.
>
> Signed-off-by: Jan Beulich 

Ah - I was planning to do precisely this.  I'm glad I hadn't started yet.

Reviewed-by: Andrew Cooper 

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] x86/mmuext: don't allow copying/clearing non-RAM pages

2017-06-21 Thread Jan Beulich

>>> On 21.06.17 at 12:51,  wrote:
> On 21/06/17 11:10, Jan Beulich wrote:
>> The two operations really aren't meant for anything else.
>>
>> Signed-off-by: Jan Beulich 
> 
> Reviewed-by: Andrew Cooper , however...
> 
>>
>> --- a/xen/arch/x86/mm.c
>> +++ b/xen/arch/x86/mm.c
>> @@ -3229,6 +3229,7 @@ long do_mmuext_op(
>>  switch ( op.cmd )
>>  {
>>  struct page_info *page;
>> +p2m_type_t p2mt;
>>  
>>  case MMUEXT_PIN_L1_TABLE:
>>  type = PGT_l1_page_table;
>> @@ -3528,7 +3529,12 @@ long do_mmuext_op(
>>  }
>>  
>>  case MMUEXT_CLEAR_PAGE:
>> -page = get_page_from_gfn(pg_owner, op.arg1.mfn, NULL, 
>> P2M_ALLOC);
>> +page = get_page_from_gfn(pg_owner, op.arg1.mfn, &p2mt, 
>> P2M_ALLOC);
>> +if ( unlikely(p2mt != p2m_ram_rw) && page )
> 
> ... it would seem more natural to have the null pointer check before the
> p2mt check.

Since the checks are independent, the order doesn't really matter,
and with that it seemed better to put the unlikely() first (to get the
other check off the fast path).

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 07/11] x86: fold identical error paths in xenmem_add_to_physmap_one()

2017-06-21 Thread Andrew Cooper

On 21/06/17 10:36, Jan Beulich wrote:
> Signed-off-by: Jan Beulich 

Reviewed-by: Andrew Cooper 

I think this function is another one which could benefit from explicitly
counting the number of references it collects, as per XSA-224.

>
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -4899,11 +4899,8 @@ int xenmem_add_to_physmap_one(
>  
>  if ( !paging_mode_translate(d) || (mfn == 0) )
>  {
> -if ( page )
> -put_page(page);
> -if ( space == XENMAPSPACE_gmfn || space == XENMAPSPACE_gmfn_range )
> -put_gfn(d, gfn);
> -return -EINVAL;
> +rc = -EINVAL;
> +goto put_both;
>  }
>  
>  /* Remove previously mapped page if it was present. */
>
>
>


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v3 3/9] xen/mm: move modify_identity_mmio to global file and drop __init

2017-06-21 Thread Jan Beulich

>>> On 21.06.17 at 13:11,  wrote:
> On Fri, May 19, 2017 at 07:35:39AM -0600, Jan Beulich wrote:
>> >>> On 27.04.17 at 16:35,  wrote:
>> > And also allow it to do non-identity mappings by adding a new parameter. 
>> > This
>> > function will be needed in other parts apart from PVH Dom0 build. While 
>> > there
>> > fix the function to use gfn_t and mfn_t instead of unsigned long for memory
>> > addresses.
>> 
>> I'm afraid both title and description don't (or no longer) properly reflect
>> what the patch does. I'm also afraid the reason the new parameter as
>> well as the placement in common/memory.c aren't sufficiently explained.
>> For example, what use is the function going to be without
>> CONFIG_HAS_PCI?
> 
> It will still be needed in order to map the low 1MB for a PVH Dom0,
> but anyway, see below.

That's still implying CONFIG_HAS_PCI, as that's still x86. The
question was with ARM in mind.

>> > --- a/xen/common/memory.c
>> > +++ b/xen/common/memory.c
>> > @@ -1438,6 +1438,42 @@ int prepare_ring_for_helper(
>> >  return 0;
>> >  }
>> >  
>> > +int modify_mmio(struct domain *d, gfn_t gfn, mfn_t mfn, unsigned long 
>> > nr_pages,
>> > +const bool map)
>> > +{
>> > +int rc;
>> > +
>> > +/*
>> > + * Make sure this function is only used by the hardware domain, 
>> > because it
>> > + * can take an arbitrary long time, and could DoS the whole system.
>> > + */
>> > +ASSERT(is_hardware_domain(d));
>> 
>> If that can happen arbitrarily at run time (rather than just at boot,
>> as suggested by the removal of __init), it definitely can't remain as
>> is and will instead need to make use of continuations. I'm therefore
>> unconvinced you really want to move this code instead of simply
>> calling {,un}map_mmio_regions() while taking care of preemption
>> needs.
> 
> I'm not sure I know how to use continuations with non-hypercall
> vmexits. Do you have any recommendations about how to do this? pause
> the domain and run the mmio changes inside of a tasklet?

That would be one option. Or you could derive from the approach
used for waiting for a response from the device model. Even exiting
back to the guest without updating rIP may be possible, provided
you have a means to store the continuation information such that
when coming back you won't start from the beginning again.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH 01/17] x86emul: support remaining AVX insns

2017-06-21 Thread Jan Beulich

I.e. those not being equivalents of SSEn ones.

There's one necessary change to generic code: Faulting behavior of
VMASKMOVP{S,D} requires us to do partial reads/writes.

Signed-off-by: Jan Beulich 

--- a/.gitignore
+++ b/.gitignore
@@ -224,7 +224,7 @@
 tools/tests/x86_emulator/*.bin
 tools/tests/x86_emulator/*.tmp
 tools/tests/x86_emulator/asm
-tools/tests/x86_emulator/avx*.h
+tools/tests/x86_emulator/avx*.[ch]
 tools/tests/x86_emulator/blowfish.h
 tools/tests/x86_emulator/sse*.[ch]
 tools/tests/x86_emulator/test_x86_emulator
--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -11,8 +11,8 @@ all: $(TARGET)
 run: $(TARGET)
./$(TARGET)
 
-SIMD := sse sse2 sse4
-TESTCASES := blowfish $(SIMD) $(addsuffix -avx,$(filter sse%,$(SIMD)))
+SIMD := sse sse2 sse4 avx
+TESTCASES := blowfish $(SIMD) sse2-avx sse4-avx
 
 blowfish-cflags := ""
 blowfish-cflags-x86_32 := "-mno-accumulate-outgoing-args -Dstatic="
@@ -26,34 +26,36 @@ sse2-flts := 4 8
 sse4-vecs := $(sse2-vecs)
 sse4-ints := $(sse2-ints)
 sse4-flts := $(sse2-flts)
+avx-vecs := 16 32
+avx-ints :=
+avx-flts := 4 8
 
 # When converting SSE to AVX, have the compiler avoid XMM0 to widen
 # coverage of the VEX. checks in the emulator. We must not do this,
 # however, for SSE4.1 and later, as there are instructions with XMM0 as
 # an implicit operand.
-sse2avx-sse  := -ffixed-xmm0 -Wa,-msse2avx
-sse2avx-sse2 := $(sse2avx-sse)
+sse2avx-sse2 := -ffixed-xmm0 -Wa,-msse2avx
 sse2avx-sse4 := -Wa,-msse2avx
 
+# For AVX and later, have the compiler avoid XMM0 to widen coverage of
+# the VEX. checks in the emulator.
+non-sse = $(if $(filter sse%,$(1)),,-ffixed-xmm0)
+
 define simd-defs
 $(1)-cflags := \
$(foreach vec,$($(1)-vecs), \
  $(foreach int,$($(1)-ints), \
-   "-D_$(vec)i$(int) -m$(1) -O2 -DVEC_SIZE=$(vec) -DINT_SIZE=$(int)" \
-   "-D_$(vec)u$(int) -m$(1) -O2 -DVEC_SIZE=$(vec) -DUINT_SIZE=$(int)") 
\
+   "-D_$(vec)i$(int) -m$(1) $(call non-sse,$(1)) -O2 -DVEC_SIZE=$(vec) 
-DINT_SIZE=$(int)" \
+   "-D_$(vec)u$(int) -m$(1) $(call non-sse,$(1)) -O2 -DVEC_SIZE=$(vec) 
-DUINT_SIZE=$(int)") \
  $(foreach flt,$($(1)-flts), \
-   "-D_$(vec)f$(flt) -m$(1) -O2 -DVEC_SIZE=$(vec) 
-DFLOAT_SIZE=$(flt)")) \
+   "-D_$(vec)f$(flt) -m$(1) $(call non-sse,$(1)) -O2 -DVEC_SIZE=$(vec) 
-DFLOAT_SIZE=$(flt)")) \
$(foreach flt,$($(1)-flts), \
- "-D_f$(flt) -m$(1) -mfpmath=sse -O2 -DFLOAT_SIZE=$(flt)")
+ "-D_f$(flt) -m$(1) $(call non-sse,$(1)) -mfpmath=sse -O2 
-DFLOAT_SIZE=$(flt)")
 $(1)-avx-cflags := \
$(foreach vec,$($(1)-vecs), \
  $(foreach int,$($(1)-ints), \
"-D_$(vec)i$(int) -m$(1) $(sse2avx-$(1)) -O2 -DVEC_SIZE=$(vec) 
-DINT_SIZE=$(int)" \
-   "-D_$(vec)u$(int) -m$(1) $(sse2avx-$(1)) -O2 -DVEC_SIZE=$(vec) 
-DUINT_SIZE=$(int)") \
- $(foreach flt,$($(1)-flts), \
-   "-D_$(vec)f$(flt) -m$(1) $(sse2avx-$(1)) -O2 -DVEC_SIZE=$(vec) 
-DFLOAT_SIZE=$(flt)")) \
-   $(foreach flt,$($(1)-flts), \
- "-D_f$(flt) -m$(1) -mfpmath=sse $(sse2avx-$(1)) -O2 
-DFLOAT_SIZE=$(flt)")
+   "-D_$(vec)u$(int) -m$(1) $(sse2avx-$(1)) -O2 -DVEC_SIZE=$(vec) 
-DUINT_SIZE=$(int)"))
 endef
 
 $(foreach flavor,$(SIMD),$(eval $(call simd-defs,$(flavor
--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -70,7 +70,13 @@ typedef long long __attribute__((vector_
 #if VEC_SIZE == 8 && defined(__SSE__)
 # define to_bool(cmp) (__builtin_ia32_pmovmskb(cmp) == 0xff)
 #elif VEC_SIZE == 16
-# if defined(__SSE4_1__)
+# if defined(__AVX__) && defined(FLOAT_SIZE)
+#  if ELEM_SIZE == 4
+#   define to_bool(cmp) __builtin_ia32_vtestcps(cmp, (vec_t){} == 0)
+#  elif ELEM_SIZE == 8
+#   define to_bool(cmp) __builtin_ia32_vtestcpd(cmp, (vec_t){} == 0)
+#  endif
+# elif defined(__SSE4_1__)
 #  define to_bool(cmp) __builtin_ia32_ptestc128(cmp, (vdi_t){} == 0)
 # elif defined(__SSE__) && ELEM_SIZE == 4
 #  define to_bool(cmp) (__builtin_ia32_movmskps(cmp) == 0xf)
@@ -81,6 +87,12 @@ typedef long long __attribute__((vector_
 #   define to_bool(cmp) (__builtin_ia32_pmovmskb128(cmp) == 0x)
 #  endif
 # endif
+#elif VEC_SIZE == 32
+# if defined(__AVX__) && ELEM_SIZE == 4
+#  define to_bool(cmp) (__builtin_ia32_movmskps256(cmp) == 0xff)
+# elif defined(__AVX__) && ELEM_SIZE == 8
+#  define to_bool(cmp) (__builtin_ia32_movmskpd256(cmp) == 0xf)
+# endif
 #endif
 
 #ifndef to_bool
@@ -105,6 +117,12 @@ static inline bool _to_bool(byte_vec_t b
 # elif FLOAT_SIZE == 8
 #  define to_int(x) __builtin_ia32_cvtdq2pd(__builtin_ia32_cvtpd2dq(x))
 # endif
+#elif VEC_SIZE == 32 && defined(__AVX__)
+# if FLOAT_SIZE == 4
+#  define to_int(x) __builtin_ia32_cvtdq2ps256(__builtin_ia32_cvtps2dq256(x))
+# elif FLOAT_SIZE == 8
+#  define to_int(x) __builtin_ia32_cvtdq2pd256(__builtin_ia32_cvtpd2dq256(x))
+# endif
 #endif
 
 #if VEC_SIZE == FLOAT_SIZE
@@ -116,7 +134,25 @@ static inline bool _to_bool(byte_vec_t b
 #endif

[Xen-devel] [PATCH 02/17] x86emul: re-order cases of main switch statement

2017-06-21 Thread Jan Beulich

Re-store intended numerical ordering, which has become "violated"
mostly by incremental additions where moving around bigger chunks did
not seem advisable. One exception though at the very top of the
switch(): Keeping the arithmetic ops together seems preferable over
entirely strict ordering.

Additionally move a few macro definitions before their first uses (the
placement is benign as long as those uses are themselves only macro
definitions, but that's going to change when those macros have helpers
broken out).

No (intended) functional change.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -843,6 +843,27 @@ do{ asm volatile (
 #define __emulate_1op_8byte(_op, _dst, _eflags)
 #endif /* __i386__ */
 
+#define fail_if(p)  \
+do {\
+rc = (p) ? X86EMUL_UNHANDLEABLE : X86EMUL_OKAY; \
+if ( rc ) goto done;\
+} while (0)
+
+static inline int mkec(uint8_t e, int32_t ec, ...)
+{
+return (e < 32 && ((1u << e) & EXC_HAS_EC)) ? ec : X86_EVENT_NO_EC;
+}
+
+#define generate_exception_if(p, e, ec...)\
+({  if ( (p) ) {  \
+x86_emul_hw_exception(e, mkec(e, ##ec, 0), ctxt); \
+rc = X86EMUL_EXCEPTION;   \
+goto done;\
+} \
+})
+
+#define generate_exception(e, ec...) generate_exception_if(true, e, ##ec)
+
 #ifdef __XEN__
 # define invoke_stub(pre, post, constraints...) do {\
 union stub_exception_token res_ = { .raw = ~0 };\
@@ -911,27 +932,6 @@ do{ asm volatile (
 # define mode_64bit() false
 #endif
 
-#define fail_if(p)  \
-do {\
-rc = (p) ? X86EMUL_UNHANDLEABLE : X86EMUL_OKAY; \
-if ( rc ) goto done;\
-} while (0)
-
-static inline int mkec(uint8_t e, int32_t ec, ...)
-{
-return (e < 32 && ((1u << e) & EXC_HAS_EC)) ? ec : X86_EVENT_NO_EC;
-}
-
-#define generate_exception_if(p, e, ec...)\
-({  if ( (p) ) {  \
-x86_emul_hw_exception(e, mkec(e, ##ec, 0), ctxt); \
-rc = X86EMUL_EXCEPTION;   \
-goto done;\
-} \
-})
-
-#define generate_exception(e, ec...) generate_exception_if(true, e, ##ec)
-
 /*
  * Given byte has even parity (even number of 1s)? SDM Vol. 1 Sec. 3.4.3.1,
  * "Status Flags": EFLAGS.PF reflects parity of least-sig. byte of result only.
@@ -3596,6 +3596,11 @@ x86_emulate(
 dst.bytes = 2;
 break;
 
+case 0x8d: /* lea */
+generate_exception_if(ea.type != OP_MEM, EXC_UD);
+dst.val = ea.mem.off;
+break;
+
 case 0x8e: /* mov r/m,Sreg */
 seg = modrm_reg & 7; /* REX.R is ignored. */
 generate_exception_if(!is_x86_user_segment(seg) ||
@@ -3607,11 +3612,6 @@ x86_emulate(
 dst.type = OP_NONE;
 break;
 
-case 0x8d: /* lea */
-generate_exception_if(ea.type != OP_MEM, EXC_UD);
-dst.val = ea.mem.off;
-break;
-
 case 0x8f: /* pop (sole member of Grp1a) */
 generate_exception_if((modrm_reg & 7) != 0, EXC_UD);
 /* 64-bit mode: POP defaults to a 64-bit operand. */
@@ -5746,12 +5746,6 @@ x86_emulate(
 _regs.r(ax) = (uint32_t)msr_val;
 break;
 
-case X86EMUL_OPC(0x0f, 0x40) ... X86EMUL_OPC(0x0f, 0x4f): /* cmovcc */
-vcpu_must_have(cmov);
-if ( test_cc(b, _regs.eflags) )
-dst.val = src.val;
-break;
-
 case X86EMUL_OPC(0x0f, 0x34): /* sysenter */
 vcpu_must_have(sep);
 generate_exception_if(mode_ring0(), EXC_GP, 0);
@@ -5834,6 +5828,12 @@ x86_emulate(
 singlestep = _regs.eflags & X86_EFLAGS_TF;
 break;
 
+case X86EMUL_OPC(0x0f, 0x40) ... X86EMUL_OPC(0x0f, 0x4f): /* cmovcc */
+vcpu_must_have(cmov);
+if ( test_cc(b, _regs.eflags) )
+dst.val = src.val;
+break;
+
 CASE_SIMD_PACKED_FP(, 0x0f, 0x50): /* movmskp{s,d} xmm,reg */
 CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x50): /* vmovmskp{s,d} {x,y}mm,reg */
 CASE_SIMD_PACKED_INT(0x0f, 0xd7):  /* pmovmskb {,x}mm,reg */
@@ -6050,10 +6050,6 @@ x86_emulate(
 get_fpu(X86EMUL_FPU_mmx, &fic);
 goto simd_0f_common;
 
-case X86EMUL_OPC_VEX_66(0x0f38, 0x41): /* vphminposuw xmm/m128,xmm,xmm */
-generate_exception_if(vex.l, EXC_UD);
-

[Xen-devel] [PATCH 03/17] x86emul: build SIMD tests with -Os

2017-06-21 Thread Jan Beulich

Namely in the context of putting together subsequent patches I've
noticed that together with the touch() macro using -Os further
increases the chances of the compiler using memory operands for the
instructions we actually care to test.

Signed-off-by: Jan Beulich 

--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -45,17 +45,17 @@ define simd-defs
 $(1)-cflags := \
$(foreach vec,$($(1)-vecs), \
  $(foreach int,$($(1)-ints), \
-   "-D_$(vec)i$(int) -m$(1) $(call non-sse,$(1)) -O2 -DVEC_SIZE=$(vec) 
-DINT_SIZE=$(int)" \
-   "-D_$(vec)u$(int) -m$(1) $(call non-sse,$(1)) -O2 -DVEC_SIZE=$(vec) 
-DUINT_SIZE=$(int)") \
+   "-D_$(vec)i$(int) -m$(1) $(call non-sse,$(1)) -Os -DVEC_SIZE=$(vec) 
-DINT_SIZE=$(int)" \
+   "-D_$(vec)u$(int) -m$(1) $(call non-sse,$(1)) -Os -DVEC_SIZE=$(vec) 
-DUINT_SIZE=$(int)") \
  $(foreach flt,$($(1)-flts), \
-   "-D_$(vec)f$(flt) -m$(1) $(call non-sse,$(1)) -O2 -DVEC_SIZE=$(vec) 
-DFLOAT_SIZE=$(flt)")) \
+   "-D_$(vec)f$(flt) -m$(1) $(call non-sse,$(1)) -Os -DVEC_SIZE=$(vec) 
-DFLOAT_SIZE=$(flt)")) \
$(foreach flt,$($(1)-flts), \
- "-D_f$(flt) -m$(1) $(call non-sse,$(1)) -mfpmath=sse -O2 
-DFLOAT_SIZE=$(flt)")
+ "-D_f$(flt) -m$(1) $(call non-sse,$(1)) -mfpmath=sse -Os 
-DFLOAT_SIZE=$(flt)")
 $(1)-avx-cflags := \
$(foreach vec,$($(1)-vecs), \
  $(foreach int,$($(1)-ints), \
-   "-D_$(vec)i$(int) -m$(1) $(sse2avx-$(1)) -O2 -DVEC_SIZE=$(vec) 
-DINT_SIZE=$(int)" \
-   "-D_$(vec)u$(int) -m$(1) $(sse2avx-$(1)) -O2 -DVEC_SIZE=$(vec) 
-DUINT_SIZE=$(int)"))
+   "-D_$(vec)i$(int) -m$(1) $(sse2avx-$(1)) -Os -DVEC_SIZE=$(vec) 
-DINT_SIZE=$(int)" \
+   "-D_$(vec)u$(int) -m$(1) $(sse2avx-$(1)) -Os -DVEC_SIZE=$(vec) 
-DUINT_SIZE=$(int)"))
 endef
 
 $(foreach flavor,$(SIMD),$(eval $(call simd-defs,$(flavor



x86emul: build SIMD tests with -Os

Namely in the context of putting together subsequent patches I've
noticed that together with the touch() macro using -Os further
increases the chances of the compiler using memory operands for the
instructions we actually care to test.

Signed-off-by: Jan Beulich 

--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -45,17 +45,17 @@ define simd-defs
 $(1)-cflags := \
$(foreach vec,$($(1)-vecs), \
  $(foreach int,$($(1)-ints), \
-   "-D_$(vec)i$(int) -m$(1) $(call non-sse,$(1)) -O2 -DVEC_SIZE=$(vec) 
-DINT_SIZE=$(int)" \
-   "-D_$(vec)u$(int) -m$(1) $(call non-sse,$(1)) -O2 -DVEC_SIZE=$(vec) 
-DUINT_SIZE=$(int)") \
+   "-D_$(vec)i$(int) -m$(1) $(call non-sse,$(1)) -Os -DVEC_SIZE=$(vec) 
-DINT_SIZE=$(int)" \
+   "-D_$(vec)u$(int) -m$(1) $(call non-sse,$(1)) -Os -DVEC_SIZE=$(vec) 
-DUINT_SIZE=$(int)") \
  $(foreach flt,$($(1)-flts), \
-   "-D_$(vec)f$(flt) -m$(1) $(call non-sse,$(1)) -O2 -DVEC_SIZE=$(vec) 
-DFLOAT_SIZE=$(flt)")) \
+   "-D_$(vec)f$(flt) -m$(1) $(call non-sse,$(1)) -Os -DVEC_SIZE=$(vec) 
-DFLOAT_SIZE=$(flt)")) \
$(foreach flt,$($(1)-flts), \
- "-D_f$(flt) -m$(1) $(call non-sse,$(1)) -mfpmath=sse -O2 
-DFLOAT_SIZE=$(flt)")
+ "-D_f$(flt) -m$(1) $(call non-sse,$(1)) -mfpmath=sse -Os 
-DFLOAT_SIZE=$(flt)")
 $(1)-avx-cflags := \
$(foreach vec,$($(1)-vecs), \
  $(foreach int,$($(1)-ints), \
-   "-D_$(vec)i$(int) -m$(1) $(sse2avx-$(1)) -O2 -DVEC_SIZE=$(vec) 
-DINT_SIZE=$(int)" \
-   "-D_$(vec)u$(int) -m$(1) $(sse2avx-$(1)) -O2 -DVEC_SIZE=$(vec) 
-DUINT_SIZE=$(int)"))
+   "-D_$(vec)i$(int) -m$(1) $(sse2avx-$(1)) -Os -DVEC_SIZE=$(vec) 
-DINT_SIZE=$(int)" \
+   "-D_$(vec)u$(int) -m$(1) $(sse2avx-$(1)) -Os -DVEC_SIZE=$(vec) 
-DUINT_SIZE=$(int)"))
 endef
 
 $(foreach flavor,$(SIMD),$(eval $(call simd-defs,$(flavor
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH 04/17] x86emul: support F16C insns

2017-06-21 Thread Jan Beulich

Note that this avoids emulating the behavior of VCVTPS2PH found on at
least some Intel CPUs, which update MXCSR even when the memory write
faults.

Signed-off-by: Jan Beulich 

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -3028,6 +3028,47 @@ int main(int argc, char **argv)
 printf("skipped\n");
 #endif
 
+printf("%-40s", "Testing vcvtph2ps (%ecx),%ymm1...");
+if ( stack_exec && cpu_has_f16c )
+{
+decl_insn(vcvtph2ps);
+decl_insn(vcvtps2ph);
+
+asm volatile ( "vxorps %%xmm1, %%xmm1, %%xmm1\n"
+   put_insn(vcvtph2ps, "vcvtph2ps (%0), %%ymm1")
+   :: "c" (NULL) );
+
+set_insn(vcvtph2ps);
+res[1] = 0x40003c00; /* (1.0, 2.0) */
+res[2] = 0x44004200; /* (3.0, 4.0) */
+res[3] = 0x3400b800; /* (-.5, .25) */
+res[4] = 0xbc00; /* (0.0, -1.) */
+memset(res + 5, 0xff, 16);
+regs.ecx = (unsigned long)(res + 1);
+rc = x86_emulate(&ctxt, &emulops);
+asm volatile ( "vmovups %%ymm1, %0" : "=m" (res[16]) );
+if ( rc != X86EMUL_OKAY || !check_eip(vcvtph2ps) )
+goto fail;
+printf("okay\n");
+
+printf("%-40s", "Testing vcvtps2ph $0,%ymm1,(%edx)...");
+asm volatile ( "vmovups %0, %%ymm1\n"
+   put_insn(vcvtps2ph, "vcvtps2ph $0, %%ymm1, (%1)")
+   :: "m" (res[16]), "d" (NULL) );
+
+set_insn(vcvtps2ph);
+memset(res + 7, 0, 32);
+regs.edx = (unsigned long)(res + 7);
+rc = x86_emulate(&ctxt, &emulops);
+if ( rc != X86EMUL_OKAY || !check_eip(vcvtps2ph) ||
+ memcmp(res + 1, res + 7, 16) ||
+ res[11] || res[12] || res[13] || res[14] )
+goto fail;
+printf("okay\n");
+}
+else
+printf("skipped\n");
+
 #undef decl_insn
 #undef put_insn
 #undef set_insn
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -127,6 +127,14 @@ static inline uint64_t xgetbv(uint32_t x
 (res.c & (1U << 28)) != 0; \
 })
 
+#define cpu_has_f16c ({ \
+struct cpuid_leaf res; \
+emul_test_cpuid(1, 0, &res, NULL); \
+if ( !(res.c & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \
+res.c = 0; \
+(res.c & (1U << 29)) != 0; \
+})
+
 #define cpu_has_avx2 ({ \
 struct cpuid_leaf res; \
 emul_test_cpuid(1, 0, &res, NULL); \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -369,6 +369,7 @@ static const struct {
 [0x00 ... 0x0b] = { .simd_size = simd_packed_int },
 [0x0c ... 0x0f] = { .simd_size = simd_packed_fp },
 [0x10] = { .simd_size = simd_packed_int },
+[0x13] = { .simd_size = simd_other, .two_op = 1 },
 [0x14 ... 0x15] = { .simd_size = simd_packed_fp },
 [0x17] = { .simd_size = simd_packed_int, .two_op = 1 },
 [0x18 ... 0x19] = { .simd_size = simd_scalar_fp, .two_op = 1 },
@@ -411,6 +412,7 @@ static const struct {
 [0x14 ... 0x17] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1 },
 [0x18] = { .simd_size = simd_128 },
 [0x19] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1 },
+[0x1d] = { .simd_size = simd_other, .to_mem = 1, .two_op = 1 },
 [0x20] = { .simd_size = simd_none },
 [0x21] = { .simd_size = simd_other },
 [0x22] = { .simd_size = simd_none },
@@ -1601,6 +1603,7 @@ static bool vcpu_has(
 #define vcpu_has_popcnt()  vcpu_has( 1, ECX, 23, ctxt, ops)
 #define vcpu_has_aesni()   vcpu_has( 1, ECX, 25, ctxt, ops)
 #define vcpu_has_avx() vcpu_has( 1, ECX, 28, ctxt, ops)
+#define vcpu_has_f16c()vcpu_has( 1, ECX, 29, ctxt, ops)
 #define vcpu_has_rdrand()  vcpu_has( 1, ECX, 30, ctxt, ops)
 #define vcpu_has_mmxext() (vcpu_has(0x8001, EDX, 22, ctxt, ops) || \
vcpu_has_sse())
@@ -7216,6 +7219,12 @@ x86_emulate(
 host_and_vcpu_must_have(sse4_1);
 goto simd_0f38_common;
 
+case X86EMUL_OPC_VEX_66(0x0f38, 0x13): /* vcvtph2ps xmm/mem,{x,y}mm */
+generate_exception_if(vex.w, EXC_UD);
+host_and_vcpu_must_have(f16c);
+op_bytes = 8 << vex.l;
+goto simd_0f_ymm;
+
 case X86EMUL_OPC_VEX_66(0x0f38, 0x20): /* vpmovsxbw xmm/mem,{x,y}mm */
 case X86EMUL_OPC_VEX_66(0x0f38, 0x21): /* vpmovsxbd xmm/mem,{x,y}mm */
 case X86EMUL_OPC_VEX_66(0x0f38, 0x22): /* vpmovsxbq xmm/mem,{x,y}mm */
@@ -7607,6 +7616,50 @@ x86_emulate(
 opc = init_prefixes(stub);
 goto pextr;
 
+case X86EMUL_OPC_VEX_66(0x0f3a, 0x1d): /* vcvtps2ph $imm8,{x,y}mm,xmm/mem 
*/
+{
+uint32_t mxcsr;
+
+generate_exception_if(vex.w || vex.reg != 0xf, EXC_UD);
+host_and_vcpu_must_have(f16c);
+fail_if(!ops->write);
+
+opc = init_prefixes(stub);
+opc[0] = b;
+opc[1] = modrm;
+if ( ea.type == OP_MEM )
+{
+

[Xen-devel] [PATCH 05/17] x86emul: support FMA4 insns

2017-06-21 Thread Jan Beulich

Signed-off-by: Jan Beulich 

--- a/.gitignore
+++ b/.gitignore
@@ -226,7 +226,7 @@
 tools/tests/x86_emulator/asm
 tools/tests/x86_emulator/avx*.[ch]
 tools/tests/x86_emulator/blowfish.h
+tools/tests/x86_emulator/fma*.[ch]
 tools/tests/x86_emulator/sse*.[ch]
 tools/tests/x86_emulator/test_x86_emulator
 tools/tests/x86_emulator/x86_emulate
--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -12,7 +12,8 @@ run: $(TARGET)
./$(TARGET)
 
 SIMD := sse sse2 sse4 avx
-TESTCASES := blowfish $(SIMD) sse2-avx sse4-avx
+FMA := fma4
+TESTCASES := blowfish $(SIMD) sse2-avx sse4-avx $(FMA)
 
 blowfish-cflags := ""
 blowfish-cflags-x86_32 := "-mno-accumulate-outgoing-args -Dstatic="
@@ -29,6 +30,9 @@ sse4-flts := $(sse2-flts)
 avx-vecs := 16 32
 avx-ints :=
 avx-flts := 4 8
+fma4-vecs := $(avx-vecs)
+fma4-ints :=
+fma4-flts := $(avx-flts)
 
 # When converting SSE to AVX, have the compiler avoid XMM0 to widen
 # coverage of the VEX. checks in the emulator. We must not do this,
@@ -58,7 +62,7 @@ $(1)-avx-cflags := \
"-D_$(vec)u$(int) -m$(1) $(sse2avx-$(1)) -Os -DVEC_SIZE=$(vec) 
-DUINT_SIZE=$(int)"))
 endef
 
-$(foreach flavor,$(SIMD),$(eval $(call simd-defs,$(flavor
+$(foreach flavor,$(SIMD) $(FMA),$(eval $(call simd-defs,$(flavor
 
 $(addsuffix .h,$(TESTCASES)): %.h: %.c testcase.mk Makefile
rm -f $@.new $*.bin
@@ -77,6 +81,11 @@ $(addsuffix .h,$(TESTCASES)): %.h: %.c t
 $(addsuffix .c,$(SIMD)) $(addsuffix -avx.c,$(filter sse%,$(SIMD))):
ln -sf simd.c $@
 
+$(addsuffix .c,$(FMA)):
+   ln -sf simd-fma.c $@
+
+$(addsuffix .o,$(SIMD) $(FMA)) $(addsuffix -avx.o,$(filter sse%,$(SIMD))): 
simd.h
+
 $(TARGET): x86_emulate.o test_x86_emulator.o
$(HOSTCC) -o $@ $^
 
--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -1,71 +1,6 @@
-#include 
+#include "simd.h"
 
-asm (
-"\t.text\n"
-"\t.globl _start\n"
-"_start:\n"
-#if defined(__i386__) && VEC_SIZE == 16
-"\tpush %ebp\n"
-"\tmov %esp,%ebp\n"
-"\tand $~0xf,%esp\n"
-"\tcall simd_test\n"
-"\tleave\n"
-"\tret"
-#else
-"\tjmp simd_test"
-#endif
-);
-
-typedef
-#if defined(INT_SIZE)
-# define ELEM_SIZE INT_SIZE
-signed int
-# if INT_SIZE == 1
-#  define MODE QI
-# elif INT_SIZE == 2
-#  define MODE HI
-# elif INT_SIZE == 4
-#  define MODE SI
-# elif INT_SIZE == 8
-#  define MODE DI
-# endif
-#elif defined(UINT_SIZE)
-# define ELEM_SIZE UINT_SIZE
-unsigned int
-# if UINT_SIZE == 1
-#  define MODE QI
-# elif UINT_SIZE == 2
-#  define MODE HI
-# elif UINT_SIZE == 4
-#  define MODE SI
-# elif UINT_SIZE == 8
-#  define MODE DI
-# endif
-#elif defined(FLOAT_SIZE)
-float
-# define ELEM_SIZE FLOAT_SIZE
-# if FLOAT_SIZE == 4
-#  define MODE SF
-# elif FLOAT_SIZE == 8
-#  define MODE DF
-# endif
-#endif
-#ifndef VEC_SIZE
-# define VEC_SIZE ELEM_SIZE
-#endif
-__attribute__((mode(MODE), vector_size(VEC_SIZE))) vec_t;
-
-#define ELEM_COUNT (VEC_SIZE / ELEM_SIZE)
-
-typedef unsigned int __attribute__((mode(QI), vector_size(VEC_SIZE))) 
byte_vec_t;
-
-/* Various builtins want plain char / int / long long vector types ... */
-typedef char __attribute__((vector_size(VEC_SIZE))) vqi_t;
-typedef short __attribute__((vector_size(VEC_SIZE))) vhi_t;
-typedef int __attribute__((vector_size(VEC_SIZE))) vsi_t;
-#if VEC_SIZE >= 8
-typedef long long __attribute__((vector_size(VEC_SIZE))) vdi_t;
-#endif
+ENTRY(simd_test);
 
 #if VEC_SIZE == 8 && defined(__SSE__)
 # define to_bool(cmp) (__builtin_ia32_pmovmskb(cmp) == 0xff)
@@ -418,13 +353,6 @@ static inline bool _to_bool(byte_vec_t b
 # endif
 #endif
 
-/*
- * Suppress value propagation by the compiler, preventing unwanted
- * optimization. This at once makes the compiler use memory operands
- * more often, which for our purposes is the more interesting case.
- */
-#define touch(var) asm volatile ( "" : "+m" (var) )
-
 int simd_test(void)
 {
 unsigned int i, j;
--- /dev/null
+++ b/tools/tests/x86_emulator/simd.h
@@ -0,0 +1,78 @@
+#include 
+
+#if defined(__i386__) && VEC_SIZE == 16
+# define ENTRY(name) \
+asm ( "\t.text\n" \
+  "\t.globl _start\n" \
+  "_start:\n" \
+  "\tpush %ebp\n" \
+  "\tmov %esp,%ebp\n" \
+  "\tand $~0xf,%esp\n" \
+  "\tcall " #name "\n" \
+  "\tleave\n" \
+  "\tret" )
+#else
+# define ENTRY(name) \
+asm ( "\t.text\n" \
+  "\t.globl _start\n" \
+  "_start:\n" \
+  "\tjmp " #name )
+#endif
+
+typedef
+#if defined(INT_SIZE)
+# define ELEM_SIZE INT_SIZE
+signed int
+# if INT_SIZE == 1
+#  define MODE QI
+# elif INT_SIZE == 2
+#  define MODE HI
+# elif INT_SIZE == 4
+#  define MODE SI
+# elif INT_SIZE == 8
+#  define MODE DI
+# endif
+#elif defined(UINT_SIZE)
+# define ELEM_SIZE UINT_SIZE
+unsigned int
+# if UINT_SIZE == 1
+#  define MODE QI
+# elif UINT_SIZE == 2
+#  define MODE HI
+# elif UINT_SIZE == 4
+#  define MODE SI
+# elif UINT_SIZE == 8
+#  define MODE DI
+# endif
+#elif defined(FLOAT_SIZE)
+float
+# define ELEM_SIZE FLOAT_SIZE

[Xen-devel] [PATCH 06/17] x86emul: support FMA insns

2017-06-21 Thread Jan Beulich

Signed-off-by: Jan Beulich 

--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -12,7 +12,7 @@ run: $(TARGET)
./$(TARGET)
 
 SIMD := sse sse2 sse4 avx
-FMA := fma4
+FMA := fma4 fma
 TESTCASES := blowfish $(SIMD) sse2-avx sse4-avx $(FMA)
 
 blowfish-cflags := ""
@@ -33,6 +33,9 @@ avx-flts := 4 8
 fma4-vecs := $(avx-vecs)
 fma4-ints :=
 fma4-flts := $(avx-flts)
+fma-vecs := $(avx-vecs)
+fma-ints :=
+fma-flts := $(avx-flts)
 
 # When converting SSE to AVX, have the compiler avoid XMM0 to widen
 # coverage of the VEX. checks in the emulator. We must not do this,
--- a/tools/tests/x86_emulator/simd-fma.c
+++ b/tools/tests/x86_emulator/simd-fma.c
@@ -21,24 +21,24 @@ ENTRY(fma_test);
 #if VEC_SIZE == 16
 # if FLOAT_SIZE == 4
 #  define addsub(x, y) __builtin_ia32_addsubps(x, y)
-#  if defined(__FMA4__)
+#  if defined(__FMA4__) || defined(__FMA__)
 #   define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubps(x, y, z)
 #  endif
 # elif FLOAT_SIZE == 8
 #  define addsub(x, y) __builtin_ia32_addsubpd(x, y)
-#  if defined(__FMA4__)
+#  if defined(__FMA4__) || defined(__FMA__)
 #   define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubpd(x, y, z)
 #  endif
 # endif
 #elif VEC_SIZE == 32
 # if FLOAT_SIZE == 4
 #  define addsub(x, y) __builtin_ia32_addsubps256(x, y)
-#  if defined(__FMA4__)
+#  if defined(__FMA4__) || defined(__FMA__)
 #   define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubps256(x, y, z)
 #  endif
 # elif FLOAT_SIZE == 8
 #  define addsub(x, y) __builtin_ia32_addsubpd256(x, y)
-#  if defined(__FMA4__)
+#  if defined(__FMA4__) || defined(__FMA__)
 #   define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubpd256(x, y, z)
 #  endif
 # endif
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -12,6 +12,7 @@
 #include "sse4-avx.h"
 #include "avx.h"
 #include "fma4.h"
+#include "fma.h"
 
 #define verbose false /* Switch to true for far more logging. */
 
@@ -53,6 +54,11 @@ static bool simd_check_fma4(void)
 return cpu_has_fma4;
 }
 
+static bool simd_check_fma(void)
+{
+return cpu_has_fma;
+}
+
 static void simd_set_regs(struct cpu_user_regs *regs)
 {
 if ( cpu_has_mmx )
@@ -155,6 +161,12 @@ static const struct {
 SIMD(FMA4 scalar double, fma4,f8),
 SIMD(FMA4 128bit double, fma4,  16f8),
 SIMD(FMA4 256bit double, fma4,  32f8),
+SIMD(FMA scalar single,  fma, f4),
+SIMD(FMA 128bit single,  fma,   16f4),
+SIMD(FMA 256bit single,  fma,   32f4),
+SIMD(FMA scalar double,  fma, f8),
+SIMD(FMA 128bit double,  fma,   16f8),
+SIMD(FMA 256bit double,  fma,   32f8),
 #undef SIMD_
 #undef SIMD
 };
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -94,6 +94,14 @@ static inline uint64_t xgetbv(uint32_t x
 (res.c & (1U << 0)) != 0; \
 })
 
+#define cpu_has_fma ({ \
+struct cpuid_leaf res; \
+emul_test_cpuid(1, 0, &res, NULL); \
+if ( !(res.c & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \
+res.c = 0; \
+(res.c & (1U << 12)) != 0; \
+})
+
 #define cpu_has_sse4_1 ({ \
 struct cpuid_leaf res; \
 emul_test_cpuid(1, 0, &res, NULL); \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -385,6 +385,9 @@ static const struct {
 [0x37 ... 0x3f] = { .simd_size = simd_packed_int },
 [0x40] = { .simd_size = simd_packed_int },
 [0x41] = { .simd_size = simd_packed_int, .two_op = 1 },
+[0x96 ... 0x9f] = { .simd_size = simd_packed_fp },
+[0xa6 ... 0xaf] = { .simd_size = simd_packed_fp },
+[0xb6 ... 0xbf] = { .simd_size = simd_packed_fp },
 [0xc8 ... 0xcd] = { .simd_size = simd_other },
 [0xdb] = { .simd_size = simd_packed_int, .two_op = 1 },
 [0xdc ... 0xdf] = { .simd_size = simd_packed_int },
@@ -1605,6 +1608,7 @@ static bool vcpu_has(
 #define vcpu_has_sse3()vcpu_has( 1, ECX,  0, ctxt, ops)
 #define vcpu_has_pclmulqdq()   vcpu_has( 1, ECX,  1, ctxt, ops)
 #define vcpu_has_ssse3()   vcpu_has( 1, ECX,  9, ctxt, ops)
+#define vcpu_has_fma() vcpu_has( 1, ECX, 12, ctxt, ops)
 #define vcpu_has_cx16()vcpu_has( 1, ECX, 13, ctxt, ops)
 #define vcpu_has_sse4_1()  vcpu_has( 1, ECX, 19, ctxt, ops)
 #define vcpu_has_sse4_2()  vcpu_has( 1, ECX, 20, ctxt, ops)
@@ -7352,6 +7356,39 @@ x86_emulate(
 generate_exception_if(vex.l, EXC_UD);
 goto simd_0f_avx;
 
+case X86EMUL_OPC_VEX_66(0x0f38, 0x96): /* vfmaddsub132p{s,d} 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
+case X86EMUL_OPC_VEX_66(0x0f38, 0x97): /* vfmsubadd132p{s,d} 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
+case X86EMUL_OPC_VEX_66(0x0f38, 0x98): /* vfmadd132p{s,d} 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
+case X86EMUL_OPC_VEX_66(0x0f38, 0x99): /* vfmadd132s{s,d} 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
+case X86EMUL_OPC_VEX_66(0x0f38, 0x9a): /* vfmsub132p{s,d

[Xen-devel] [PATCH 07/17] x86emul: support most remaining AVX2 insns

2017-06-21 Thread Jan Beulich

I.e. those not being equivalents of SSEn ones, but with the exception
of the various gather operations.

Signed-off-by: Jan Beulich 

--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -11,9 +11,9 @@ all: $(TARGET)
 run: $(TARGET)
./$(TARGET)
 
-SIMD := sse sse2 sse4 avx
+SIMD := sse sse2 sse4 avx avx2
 FMA := fma4 fma
-TESTCASES := blowfish $(SIMD) sse2-avx sse4-avx $(FMA)
+TESTCASES := blowfish $(SIMD) $(FMA)
 
 blowfish-cflags := ""
 blowfish-cflags-x86_32 := "-mno-accumulate-outgoing-args -Dstatic="
@@ -36,13 +36,9 @@ fma4-flts := $(avx-flts)
 fma-vecs := $(avx-vecs)
 fma-ints :=
 fma-flts := $(avx-flts)
-
-# When converting SSE to AVX, have the compiler avoid XMM0 to widen
-# coverage of the VEX. checks in the emulator. We must not do this,
-# however, for SSE4.1 and later, as there are instructions with XMM0 as
-# an implicit operand.
-sse2avx-sse2 := -ffixed-xmm0 -Wa,-msse2avx
-sse2avx-sse4 := -Wa,-msse2avx
+avx2-vecs := $(avx-vecs)
+avx2-ints := 1 2 4 8
+avx2-flts := 4 8
 
 # For AVX and later, have the compiler avoid XMM0 to widen coverage of
 # the VEX. checks in the emulator.
@@ -58,11 +54,6 @@ $(1)-cflags := \
"-D_$(vec)f$(flt) -m$(1) $(call non-sse,$(1)) -Os -DVEC_SIZE=$(vec) 
-DFLOAT_SIZE=$(flt)")) \
$(foreach flt,$($(1)-flts), \
  "-D_f$(flt) -m$(1) $(call non-sse,$(1)) -mfpmath=sse -Os 
-DFLOAT_SIZE=$(flt)")
-$(1)-avx-cflags := \
-   $(foreach vec,$($(1)-vecs), \
- $(foreach int,$($(1)-ints), \
-   "-D_$(vec)i$(int) -m$(1) $(sse2avx-$(1)) -Os -DVEC_SIZE=$(vec) 
-DINT_SIZE=$(int)" \
-   "-D_$(vec)u$(int) -m$(1) $(sse2avx-$(1)) -Os -DVEC_SIZE=$(vec) 
-DUINT_SIZE=$(int)"))
 endef
 
 $(foreach flavor,$(SIMD) $(FMA),$(eval $(call simd-defs,$(flavor
@@ -81,13 +72,13 @@ $(addsuffix .h,$(TESTCASES)): %.h: %.c t
)
mv $@.new $@
 
-$(addsuffix .c,$(SIMD)) $(addsuffix -avx.c,$(filter sse%,$(SIMD))):
+$(addsuffix .c,$(SIMD)):
ln -sf simd.c $@
 
 $(addsuffix .c,$(FMA)):
ln -sf simd-fma.c $@
 
-$(addsuffix .o,$(SIMD) $(FMA)) $(addsuffix -avx.o,$(filter sse%,$(SIMD))): 
simd.h
+$(addsuffix .o,$(SIMD) $(FMA)): simd.h
 
 $(TARGET): x86_emulate.o test_x86_emulator.o
$(HOSTCC) -o $@ $^
--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -23,7 +23,9 @@ ENTRY(simd_test);
 #  endif
 # endif
 #elif VEC_SIZE == 32
-# if defined(__AVX__) && ELEM_SIZE == 4
+# if defined(__AVX2__)
+#  define to_bool(cmp) __builtin_ia32_ptestc256(cmp, (vdi_t){} == 0)
+# elif defined(__AVX__) && ELEM_SIZE == 4
 #  define to_bool(cmp) (__builtin_ia32_movmskps256(cmp) == 0xff)
 # elif defined(__AVX__) && ELEM_SIZE == 8
 #  define to_bool(cmp) (__builtin_ia32_movmskpd256(cmp) == 0xf)
@@ -80,10 +82,14 @@ static inline bool _to_bool(byte_vec_t b
 vec_t t_ = __builtin_ia32_vpermilps256(x, 0b00011011); \
 __builtin_ia32_vperm2f128_ps256(t_, t_, 0b0001); \
 })
-#  define swap2(x) ({ \
-vec_t t_ = __builtin_ia32_vpermilvarps256(x, 
__builtin_ia32_cvtps2dq256(inv) - 1); \
-__builtin_ia32_vperm2f128_ps256(t_, t_, 0b0001); \
+#  ifdef __AVX2__
+#   define swap2(x) __builtin_ia32_permvarsf256(x, 
__builtin_ia32_cvtps2dq256(inv) - 1)
+#  else
+#   define swap2(x) ({ \
+vec_t t_ = __builtin_ia32_vpermilvarps256(x, 
__builtin_ia32_cvtps2dq256(inv) - 1); \
+__builtin_ia32_vperm2f128_ps256(t_, t_, 0b0001); \
 })
+#  endif
 # elif VEC_SIZE == 16
 #  ifdef __AVX__
 #   define broadcast(x) ({ float t_ = (x); __builtin_ia32_vbroadcastss(&t_); })
@@ -128,6 +134,9 @@ static inline bool _to_bool(byte_vec_t b
 vec_t t_ = __builtin_ia32_vpermilpd256(x, 0b0101); \
 __builtin_ia32_vperm2f128_pd256(t_, t_, 0b0001); \
 })
+#  ifdef __AVX2__
+#   define swap2(x) __builtin_ia32_permdf256(x, 0b00011011)
+#  endif
 # elif VEC_SIZE == 16
 #  define interleave_hi(x, y) __builtin_ia32_unpckhpd(x, y)
 #  define interleave_lo(x, y) __builtin_ia32_unpcklpd(x, y)
@@ -184,6 +193,104 @@ static inline bool _to_bool(byte_vec_t b
 __builtin_ia32_maskmovdqu((vqi_t)(x),  m_, d_); \
 __builtin_ia32_maskmovdqu((vqi_t)(y), ~m_, d_); \
 })
+#elif VEC_SIZE == 32 && defined(__AVX2__)
+# define swap_lanes(x, y, func, type) ({ \
+long long __attribute__((vector_size(16))) t_ = 
__builtin_ia32_extract128i256((vdi_t)(y), 0); \
+type t1_ = (type)__builtin_ia32_insert128i256((vdi_t)(x), t_, 1), t2_; \
+t_ = __builtin_ia32_extract128i256((vdi_t)(x), 1); \
+t2_ = (type)__builtin_ia32_insert128i256((vdi_t)(y), t_, 0); \
+func(t1_, t2_); \
+})
+# if INT_SIZE == 1 || UINT_SIZE == 1
+#  define broadcast(x) ({ char s_ = (x); vec_t d_; asm ( "vpbroadcastb %1,%0" 
: "=x" (d_) : "m" (s_)); d_; })
+#  define copysignz(x, y) ((vec_t)__builtin_ia32_psignb256((vqi_t)(x), 
(vqi_t)(y)))
+#  define rotr(x, n) 
((vec_t)__builtin_ia32_palignr256(__builtin_ia32_permti256((vdi_t)(x), 
(vdi_t)(x), 0b0001), \
+

Re: [Xen-devel] [PATCH 09/11] gnttab: avoid spurious maptrack handle allocation failures

2017-06-21 Thread Andrew Cooper

On 21/06/17 10:37, Jan Beulich wrote:
> When no memory is available in the hypervisor, rather than immediately
> failing the request try to steal a handle from another vCPU.

"request, try"

>
> Reported-by: George Dunlap 
> Signed-off-by: Jan Beulich 
>
> --- a/xen/common/grant_table.c
> +++ b/xen/common/grant_table.c
> @@ -397,7 +397,7 @@ get_maptrack_handle(
>  struct vcpu  *curr = current;
>  unsigned int  i, head;
>  grant_handle_thandle;
> -struct grant_mapping *new_mt;
> +struct grant_mapping *new_mt = NULL;
>  
>  handle = __get_maptrack_handle(lgt, curr);
>  if ( likely(handle != -1) )
> @@ -408,8 +408,13 @@ get_maptrack_handle(
>  /*
>   * If we've run out of frames, try stealing an entry from another
>   * VCPU (in case the guest isn't mapping across its VCPUs evenly).
> + * Also use this path in case we're out of memory, to avoid spurious
> + * failures.
>   */
> -if ( nr_maptrack_frames(lgt) >= max_maptrack_frames )
> +if ( nr_maptrack_frames(lgt) < max_maptrack_frames )
> +new_mt = alloc_xenheap_page();
> +
> +if ( !new_mt )
>  {
>  /*
>   * Can drop the lock since no other VCPU can be adding a new

* frame once they've run out.
*/

It doesn't look like this comment is true any more, which brings the
locking correctness into question.

~Andrew

> @@ -432,12 +437,6 @@ get_maptrack_handle(
>  return steal_maptrack_handle(lgt, curr);
>  }
>  
> -new_mt = alloc_xenheap_page();
> -if ( !new_mt )
> -{
> -spin_unlock(&lgt->maptrack_lock);
> -return -1;
> -}
>  clear_page(new_mt);
>  
>  /*
>
>
>


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH 08/17] x86emul: fold/eliminate some local variables

2017-06-21 Thread Jan Beulich

Make i switch-wide (at once makeing it unsigned, as it should have
been) and introduce n (for immediate use in enter and aam/aad
handling). Eliminate on-stack arrays in pusha/popa handling. Use ea.val
insatead of a custom variable in bound handling.

No (intended) functional change.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -3229,6 +3229,7 @@ x86_emulate(
 struct segment_register cs, sreg;
 struct cpuid_leaf cpuid_leaf;
 uint64_t msr_val;
+unsigned int i, n;
 unsigned long dummy;
 
 case 0x00 ... 0x05: add: /* add */
@@ -3361,47 +3362,45 @@ x86_emulate(
 goto done;
 break;
 
-case 0x60: /* pusha */ {
-int i;
-unsigned int regs[] = {
-_regs.eax, _regs.ecx, _regs.edx, _regs.ebx,
-_regs.esp, _regs.ebp, _regs.esi, _regs.edi };
-
+case 0x60: /* pusha */
 fail_if(!ops->write);
+ea.val = _regs.esp;
 for ( i = 0; i < 8; i++ )
+{
+void *reg = decode_register(i, &_regs, 0);
+
 if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
-  ®s[i], op_bytes, ctxt)) != 0 )
-goto done;
+  reg != &_regs.esp ? reg : &ea.val,
+  op_bytes, ctxt)) != 0 )
+goto done;
+}
 break;
-}
-
-case 0x61: /* popa */ {
-int i;
-unsigned int dummy_esp, *regs[] = {
-&_regs.edi, &_regs.esi, &_regs.ebp, &dummy_esp,
-&_regs.ebx, &_regs.edx, &_regs.ecx, &_regs.eax };
 
+case 0x61: /* popa */
 for ( i = 0; i < 8; i++ )
 {
+void *reg = decode_register(7 - i, &_regs, 0);
+
 if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes),
   &dst.val, op_bytes, ctxt, ops)) != 0 )
 goto done;
+if ( reg == &_regs.r(sp) )
+continue;
 if ( op_bytes == 2 )
-*(uint16_t *)regs[i] = (uint16_t)dst.val;
+*(uint16_t *)reg = dst.val;
 else
-*regs[i] = dst.val; /* 64b: zero-ext done by read_ulong() */
+*(unsigned long *)reg = dst.val;
 }
 break;
-}
 
 case 0x62: /* bound */ {
-unsigned long src_val2;
 int lb, ub, idx;
+
 generate_exception_if(src.type != OP_MEM, EXC_UD);
 if ( (rc = read_ulong(src.mem.seg, src.mem.off + op_bytes,
-  &src_val2, op_bytes, ctxt, ops)) )
+  &ea.val, op_bytes, ctxt, ops)) )
 goto done;
-ub  = (op_bytes == 2) ? (int16_t)src_val2 : (int32_t)src_val2;
+ub  = (op_bytes == 2) ? (int16_t)ea.val   : (int32_t)ea.val;
 lb  = (op_bytes == 2) ? (int16_t)src.val  : (int32_t)src.val;
 idx = (op_bytes == 2) ? (int16_t)dst.val  : (int32_t)dst.val;
 generate_exception_if((idx < lb) || (idx > ub), EXC_BR);
@@ -3957,10 +3956,7 @@ x86_emulate(
 dst.val = src.val;
 break;
 
-case 0xc8: /* enter imm16,imm8 */ {
-uint8_t depth = imm2 & 31;
-int i;
-
+case 0xc8: /* enter imm16,imm8 */
 dst.type = OP_REG;
 dst.bytes = (mode_64bit() && (op_bytes == 4)) ? 8 : op_bytes;
 dst.reg = (unsigned long *)&_regs.r(bp);
@@ -3970,9 +3966,10 @@ x86_emulate(
 goto done;
 dst.val = _regs.r(sp);
 
-if ( depth > 0 )
+n = imm2 & 31;
+if ( n )
 {
-for ( i = 1; i < depth; i++ )
+for ( i = 1; i < n; i++ )
 {
 unsigned long ebp, temp_data;
 ebp = truncate_word(_regs.r(bp) - i*dst.bytes, 
ctxt->sp_size/8);
@@ -3989,7 +3986,6 @@ x86_emulate(
 
 sp_pre_dec(src.val);
 break;
-}
 
 case 0xc9: /* leave */
 /* First writeback, to %%esp. */
@@ -4084,28 +4080,21 @@ x86_emulate(
 goto grp2;
 
 case 0xd4: /* aam */
-case 0xd5: /* aad */ {
-unsigned int base = (uint8_t)src.val;
-
+case 0xd5: /* aad */
+n = (uint8_t)src.val;
 if ( b & 0x01 )
-{
-uint16_t ax = _regs.ax;
-
-_regs.ax = (uint8_t)(ax + ((ax >> 8) * base));
-}
+_regs.ax = (uint8_t)(_regs.al + (_regs.ah * n));
 else
 {
-uint8_t al = _regs.al;
-
-generate_exception_if(!base, EXC_DE);
-_regs.ax = ((al / base) << 8) | (al % base);
+generate_exception_if(!n, EXC_DE);
+_regs.al = _regs.al % n;
+_regs.ah = _regs.al / n;
 }
 _regs.eflags &= ~(X86_EFLAGS_SF | X86_EFLAGS_ZF | X86_EFLAGS_PF);
 _regs.eflags |= !_regs.al ? X86_EFLAGS_ZF : 0;
 _regs.eflags |= ((int8_t)_regs.al < 0) ? X86_EFLAGS_SF : 0;
 _regs.eflags |= even_parity(

[Xen-devel] [PATCH 09/17] x86emul: support AVX2 gather insns

2017-06-21 Thread Jan Beulich

Signed-off-by: Jan Beulich 

--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -13,7 +13,8 @@ run: $(TARGET)
 
 SIMD := sse sse2 sse4 avx avx2
 FMA := fma4 fma
-TESTCASES := blowfish $(SIMD) $(FMA)
+SG := avx2-sg
+TESTCASES := blowfish $(SIMD) $(FMA) $(SG)
 
 blowfish-cflags := ""
 blowfish-cflags-x86_32 := "-mno-accumulate-outgoing-args -Dstatic="
@@ -39,6 +40,10 @@ fma-flts := $(avx-flts)
 avx2-vecs := $(avx-vecs)
 avx2-ints := 1 2 4 8
 avx2-flts := 4 8
+avx2-sg-vecs := $(avx2-vecs)
+avx2-sg-idxs := 4 8
+avx2-sg-ints := 4 8
+avx2-sg-flts := 4 8
 
 # For AVX and later, have the compiler avoid XMM0 to widen coverage of
 # the VEX. checks in the emulator.
@@ -55,8 +60,18 @@ $(1)-cflags := \
$(foreach flt,$($(1)-flts), \
  "-D_f$(flt) -m$(1) $(call non-sse,$(1)) -mfpmath=sse -Os 
-DFLOAT_SIZE=$(flt)")
 endef
+define simd-sg-defs
+$(1)-cflags := \
+   $(foreach vec,$($(1)-vecs), \
+ $(foreach idx,$($(1)-idxs), \
+  $(foreach int,$($(1)-ints), \
+"-D_$(vec)x$(idx)i$(int) -m$(1:-sg=) $(call non-sse,$(1)) -Os 
-DVEC_MAX=$(vec) -DIDX_SIZE=$(idx) -DINT_SIZE=$(int)") \
+  $(foreach flt,$($(1)-flts), \
+"-D_$(vec)x$(idx)f$(flt) -m$(1:-sg=) $(call non-sse,$(1)) -Os 
-DVEC_MAX=$(vec) -DIDX_SIZE=$(idx) -DFLOAT_SIZE=$(flt)")))
+endef
 
 $(foreach flavor,$(SIMD) $(FMA),$(eval $(call simd-defs,$(flavor
+$(foreach flavor,$(SG),$(eval $(call simd-sg-defs,$(flavor
 
 $(addsuffix .h,$(TESTCASES)): %.h: %.c testcase.mk Makefile
rm -f $@.new $*.bin
@@ -78,7 +93,10 @@ $(addsuffix .c,$(SIMD)):
 $(addsuffix .c,$(FMA)):
ln -sf simd-fma.c $@
 
-$(addsuffix .o,$(SIMD) $(FMA)): simd.h
+$(addsuffix .c,$(SG)):
+   ln -sf simd-sg.c $@
+
+$(addsuffix .o,$(SIMD) $(FMA) $(SG)): simd.h
 
 $(TARGET): x86_emulate.o test_x86_emulator.o
$(HOSTCC) -o $@ $^
--- /dev/null
+++ b/tools/tests/x86_emulator/simd-sg.c
@@ -0,0 +1,209 @@
+#ifdef INT_SIZE
+# define ELEM_SIZE INT_SIZE
+#else
+# define ELEM_SIZE FLOAT_SIZE
+#endif
+
+#define VEC_SIZE (IDX_SIZE <= ELEM_SIZE ? VEC_MAX \
+: VEC_MAX * ELEM_SIZE / IDX_SIZE)
+#if VEC_SIZE < 16
+# undef VEC_SIZE
+# define VEC_SIZE 16
+#endif
+
+#include "simd.h"
+
+ENTRY(sg_test);
+
+#undef MODE
+#if IDX_SIZE == 4
+# define MODE SI
+#elif IDX_SIZE == 8
+# define MODE DI
+#endif
+
+#define IVEC_SIZE (ELEM_SIZE <= IDX_SIZE ? VEC_MAX \
+ : VEC_MAX * IDX_SIZE / ELEM_SIZE)
+#if IVEC_SIZE < 16
+# undef IVEC_SIZE
+# define IVEC_SIZE 16
+#endif
+
+typedef signed int __attribute__((mode(MODE), vector_size(IVEC_SIZE))) idx_t;
+typedef long long __attribute__((vector_size(IVEC_SIZE))) idi_t;
+
+#define ITEM_COUNT (VEC_SIZE / ELEM_SIZE < IVEC_SIZE / IDX_SIZE ? \
+VEC_SIZE / ELEM_SIZE : IVEC_SIZE / IDX_SIZE)
+
+#if VEC_SIZE == 16
+# define to_bool(cmp) __builtin_ia32_ptestc128(cmp, (vec_t){} == 0)
+#else
+# define to_bool(cmp) __builtin_ia32_ptestc256(cmp, (vec_t){} == 0)
+#endif
+
+#if defined(__AVX2__)
+# if VEC_MAX == 16
+#  if IDX_SIZE == 4
+#   if INT_SIZE == 4
+#define gather __builtin_ia32_gathersiv4si
+#   elif INT_SIZE == 8
+#define gather(reg, mem, idx, msk, scl) \
+(vec_t)(__builtin_ia32_gathersiv2di((vdi_t)(reg), \
+(const void *)(mem), \
+idx, (vdi_t)(msk), scl))
+#   elif FLOAT_SIZE == 4
+#define gather __builtin_ia32_gathersiv4sf
+#   elif FLOAT_SIZE == 8
+#define gather __builtin_ia32_gathersiv2df
+#   endif
+#  elif IDX_SIZE == 8
+#   if INT_SIZE == 4
+#define gather(reg, mem, idx, msk, scl) \
+__builtin_ia32_gatherdiv4si(reg, mem, (vdi_t)(idx), msk, scl)
+#   elif INT_SIZE == 8
+#define gather(reg, mem, idx, msk, scl) \
+(vec_t)(__builtin_ia32_gatherdiv2di((vdi_t)(reg), \
+(const void *)(mem), \
+(vdi_t)(idx), (vdi_t)(msk), \
+scl))
+#   elif FLOAT_SIZE == 4
+#define gather(reg, mem, idx, msk, scl) \
+__builtin_ia32_gatherdiv4sf(reg, mem, (vdi_t)(idx), msk, scl)
+#   elif FLOAT_SIZE == 8
+#define gather(reg, mem, idx, msk, scl) \
+__builtin_ia32_gatherdiv2df(reg, mem, (vdi_t)(idx), msk, scl)
+#   endif
+#  endif
+# elif VEC_MAX == 32
+#  if IDX_SIZE == 4
+#   if INT_SIZE == 4
+#define gather __builtin_ia32_gathersiv8si
+#   elif INT_SIZE == 8
+#define gather(reg, mem, idx, msk, scl) \
+(vec_t)(__builtin_ia32_gathersiv4di((vdi_t)(reg), \
+(const void *)(mem), \
+idx, (vdi_t)(msk), scl))
+
+#   elif FLOAT_SIZE == 4
+#define gather __builtin_ia32_gathersiv8sf
+#   elif FLOAT_SIZE == 8
+#define gather __builtin_ia32_gathersiv4df
+#

[Xen-devel] [PATCH 10/17] x86emul: add tables for XOP 08 and 09 extension spaces

2017-06-21 Thread Jan Beulich

Convert the few existing opcodes so far supported.

Also adjust two vex_* case labels to better be ext_* (the values are
identical).

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -458,6 +458,20 @@ static const opcode_desc_t xop_table[] =
 DstReg|SrcImm|ModRM,
 };
 
+static const struct {
+uint8_t simd_size:5;
+uint8_t two_op:1;
+uint8_t four_op:1;
+} ext8f08_table[256] = {
+};
+
+static const struct {
+uint8_t simd_size:5;
+uint8_t two_op:1;
+} ext8f09_table[256] = {
+[0x01 ... 0x02] = { .two_op = 1 },
+};
+
 #define REX_PREFIX 0x40
 #define REX_B 0x01
 #define REX_X 0x02
@@ -2716,7 +2730,7 @@ x86_decode(
 }
 break;
 
-case vex_0f38:
+case ext_0f38:
 d = ext0f38_table[b].to_mem ? DstMem | SrcReg
 : DstReg | SrcMem;
 if ( ext0f38_table[b].two_op )
@@ -2726,7 +2740,14 @@ x86_decode(
 state->simd_size = ext0f38_table[b].simd_size;
 break;
 
-case vex_0f3a:
+case ext_8f09:
+if ( ext8f09_table[b].two_op )
+d |= TwoOp;
+state->simd_size = ext8f09_table[b].simd_size;
+break;
+
+case ext_0f3a:
+case ext_8f08:
 /*
  * Cannot update d here yet, as the immediate operand still
  * needs fetching.
@@ -2919,6 +2940,15 @@ x86_decode(
 break;
 
 case ext_8f08:
+d = DstReg | SrcMem;
+if ( ext8f08_table[b].two_op )
+d |= TwoOp;
+else if ( ext8f08_table[b].four_op && !mode_64bit() )
+imm1 &= 0x7f;
+state->desc = d;
+state->simd_size = ext8f08_table[b].simd_size;
+break;
+
 case ext_8f09:
 case ext_8f0a:
 break;



x86emul: add tables for XOP 08 and 09 extension spaces

Convert the few existing opcodes so far supported.

Also adjust two vex_* case labels to better be ext_* (the values are
identical).

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -458,6 +458,20 @@ static const opcode_desc_t xop_table[] =
 DstReg|SrcImm|ModRM,
 };
 
+static const struct {
+uint8_t simd_size:5;
+uint8_t two_op:1;
+uint8_t four_op:1;
+} ext8f08_table[256] = {
+};
+
+static const struct {
+uint8_t simd_size:5;
+uint8_t two_op:1;
+} ext8f09_table[256] = {
+[0x01 ... 0x02] = { .two_op = 1 },
+};
+
 #define REX_PREFIX 0x40
 #define REX_B 0x01
 #define REX_X 0x02
@@ -2716,7 +2730,7 @@ x86_decode(
 }
 break;
 
-case vex_0f38:
+case ext_0f38:
 d = ext0f38_table[b].to_mem ? DstMem | SrcReg
 : DstReg | SrcMem;
 if ( ext0f38_table[b].two_op )
@@ -2726,7 +2740,14 @@ x86_decode(
 state->simd_size = ext0f38_table[b].simd_size;
 break;
 
-case vex_0f3a:
+case ext_8f09:
+if ( ext8f09_table[b].two_op )
+d |= TwoOp;
+state->simd_size = ext8f09_table[b].simd_size;
+break;
+
+case ext_0f3a:
+case ext_8f08:
 /*
  * Cannot update d here yet, as the immediate operand still
  * needs fetching.
@@ -2919,6 +2940,15 @@ x86_decode(
 break;
 
 case ext_8f08:
+d = DstReg | SrcMem;
+if ( ext8f08_table[b].two_op )
+d |= TwoOp;
+else if ( ext8f08_table[b].four_op && !mode_64bit() )
+imm1 &= 0x7f;
+state->desc = d;
+state->simd_size = ext8f08_table[b].simd_size;
+break;
+
 case ext_8f09:
 case ext_8f0a:
 break;
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH 11/17] x86emul: support XOP insns

2017-06-21 Thread Jan Beulich

Signed-off-by: Jan Beulich 

--- a/.gitignore
+++ b/.gitignore
@@ -230,7 +230,7 @@
 tools/tests/x86_emulator/sse*.[ch]
 tools/tests/x86_emulator/test_x86_emulator
 tools/tests/x86_emulator/x86_emulate
+tools/tests/x86_emulator/xop*.[ch]
 tools/tests/xen-access/xen-access
 tools/tests/xenstore/xs-test
 tools/tests/regression/installed/*
--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -11,7 +11,7 @@ all: $(TARGET)
 run: $(TARGET)
./$(TARGET)
 
-SIMD := sse sse2 sse4 avx avx2
+SIMD := sse sse2 sse4 avx avx2 xop
 FMA := fma4 fma
 SG := avx2-sg
 TESTCASES := blowfish $(SIMD) $(FMA) $(SG)
@@ -44,6 +44,9 @@ avx2-sg-vecs := $(avx2-vecs)
 avx2-sg-idxs := 4 8
 avx2-sg-ints := 4 8
 avx2-sg-flts := 4 8
+xop-vecs := $(avx-vecs)
+xop-ints := 1 2 4 8
+xop-flts := $(avx-flts)
 
 # For AVX and later, have the compiler avoid XMM0 to widen coverage of
 # the VEX. checks in the emulator.
@@ -98,6 +101,8 @@ $(addsuffix .c,$(SG)):
 
 $(addsuffix .o,$(SIMD) $(FMA) $(SG)): simd.h
 
+xop.o: simd-fma.c
+
 $(TARGET): x86_emulate.o test_x86_emulator.o
$(HOSTCC) -o $@ $^
 
--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -471,6 +471,86 @@ static inline bool _to_bool(byte_vec_t b
 #  endif
 # endif
 #endif
+#ifdef __XOP__
+# undef select
+# if VEC_SIZE == 16
+#  if INT_SIZE == 2 || INT_SIZE == 4
+#   include "simd-fma.c"
+#  endif
+#  define select(d, x, y, m) \
+(*(d) = (vec_t)__builtin_ia32_vpcmov((vdi_t)(x), (vdi_t)(y), (vdi_t)(m)))
+#  if INT_SIZE == 1 || UINT_SIZE == 1
+#   define swap2(x) ((vec_t)__builtin_ia32_vpperm((vqi_t)(x), (vqi_t)(x), 
(vqi_t)inv - 1))
+#  elif INT_SIZE == 2 || UINT_SIZE == 2
+#   define swap2(x) \
+((vec_t)__builtin_ia32_vpperm((vqi_t)(x), (vqi_t)(x), \
+  (vqi_t)(__builtin_ia32_vprotwi(2 * 
(vhi_t)inv - 1, 8) | \
+  (2 * inv - 2
+#  elif FLOAT_SIZE == 4
+#   define frac(x) __builtin_ia32_vfrczps(x)
+#   undef swap2
+#   define swap2(x) ({ \
+/* Buggy in gcc 7.1.0 and earlier. */ \
+/* __builtin_ia32_vpermil2ps((vec_t){}, x, __builtin_ia32_cvtps2dq(inv) + 
3, 0) */ \
+vec_t t_; \
+asm ( "vpermil2ps $0, %3, %2, %1, %0" : \
+  "=x" (t_) : \
+  "x" ((vec_t){}), "m" (x), "x" (__builtin_ia32_cvtps2dq(inv) + 3) ); \
+t_; \
+})
+#  elif FLOAT_SIZE == 8
+#   define frac(x) __builtin_ia32_vfrczpd(x)
+#   undef swap2
+#   define swap2(x) ({ \
+/* Buggy in gcc 7.1.0 and earlier. */ \
+/* __builtin_ia32_vpermil2pd((vec_t){}, x, */ \
+/*__builtin_ia32_pmovsxdq128( */ \
+/*__builtin_ia32_cvtpd2dq(inv) + 1) << 1, 
0) */ \
+vdi_t s_ = __builtin_ia32_pmovsxdq128( \
+   __builtin_ia32_cvtpd2dq(inv) + 1) << 1; \
+vec_t t_; \
+asm ( "vpermil2pd $0, %3, %2, %1, %0" : \
+  "=x" (t_) : "x" ((vec_t){}), "x" (x), "m" (s_) ); \
+t_; \
+})
+#  endif
+#  if INT_SIZE == 1
+#   define hadd(x, y) 
((vec_t)__builtin_ia32_packsswb128(__builtin_ia32_vphaddbw((vqi_t)(x)), \
+ 
__builtin_ia32_vphaddbw((vqi_t)(y
+#   define hsub(x, y) 
((vec_t)__builtin_ia32_packsswb128(__builtin_ia32_vphsubbw((vqi_t)(x)), \
+ 
__builtin_ia32_vphsubbw((vqi_t)(y
+#  elif UINT_SIZE == 1
+#   define hadd(x, y) 
((vec_t)__builtin_ia32_packuswb128(__builtin_ia32_vphaddubw((vqi_t)(x)), \
+ 
__builtin_ia32_vphaddubw((vqi_t)(y
+#  elif INT_SIZE == 2
+#   undef hadd
+#   define hadd(x, y) __builtin_ia32_packssdw128(__builtin_ia32_vphaddwd(x), \
+ __builtin_ia32_vphaddwd(y))
+#   undef hsub
+#   define hsub(x, y) __builtin_ia32_packssdw128(__builtin_ia32_vphsubwd(x), \
+ __builtin_ia32_vphsubwd(y))
+#  elif UINT_SIZE == 2
+#   undef hadd
+#   define hadd(x, y) 
((vec_t)__builtin_ia32_packusdw128(__builtin_ia32_vphadduwd((vhi_t)(x)), \
+ 
__builtin_ia32_vphadduwd((vhi_t)(y
+#   undef hsub
+#  endif
+# elif VEC_SIZE == 32
+#  define select(d, x, y, m) \
+(*(d) = (vec_t)__builtin_ia32_vpcmov256((vdi_t)(x), (vdi_t)(y), 
(vdi_t)(m)))
+#  if FLOAT_SIZE == 4
+#   define frac(x) __builtin_ia32_vfrczps256(x)
+#  elif FLOAT_SIZE == 8
+#   define frac(x) __builtin_ia32_vfrczpd256(x)
+#  endif
+# elif VEC_SIZE == FLOAT_SIZE
+#  if VEC_SIZE == 4
+#   define frac(x) scalar_1op(x, "vfrczss %[in], %[out]")
+#  elif VEC_SIZE == 8
+#   define frac(x) scalar_1op(x, "vfrczsd %[in], %[out]")
+#  endif
+# endif
+#endif
 
 int simd_test(void)
 {
@@ -576,6 +656,29 @@ int simd_test(void)
 if ( !to_bool(y == z) ) return __LINE__;
 # endif
 
+# ifdef frac
+touch(src);
+x = frac(src);
+touch(src);
+if ( !to_bool(x == 0) ) retur

[Xen-devel] [PATCH 12/17] x86emul: support 3DNow! insns

2017-06-21 Thread Jan Beulich

Yes, recent AMD CPUs don't support them anymore, but I think we should
nevertheless cope.

Signed-off-by: Jan Beulich 

--- a/.gitignore
+++ b/.gitignore
@@ -223,7 +223,7 @@
 tools/security/xensec_tool
 tools/tests/x86_emulator/*.bin
 tools/tests/x86_emulator/*.tmp
+tools/tests/x86_emulator/3dnow*.[ch]
 tools/tests/x86_emulator/asm
 tools/tests/x86_emulator/avx*.[ch]
 tools/tests/x86_emulator/blowfish.h
--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -11,7 +11,7 @@ all: $(TARGET)
 run: $(TARGET)
./$(TARGET)
 
-SIMD := sse sse2 sse4 avx avx2 xop
+SIMD := 3dnow sse sse2 sse4 avx avx2 xop
 FMA := fma4 fma
 SG := avx2-sg
 TESTCASES := blowfish $(SIMD) $(FMA) $(SG)
@@ -19,6 +19,9 @@ TESTCASES := blowfish $(SIMD) $(FMA) $(S
 blowfish-cflags := ""
 blowfish-cflags-x86_32 := "-mno-accumulate-outgoing-args -Dstatic="
 
+3dnow-vecs := 8
+3dnow-ints :=
+3dnow-flts := 4
 sse-vecs := 16
 sse-ints :=
 sse-flts := 4
@@ -49,8 +52,13 @@ xop-ints := 1 2 4 8
 xop-flts := $(avx-flts)
 
 # For AVX and later, have the compiler avoid XMM0 to widen coverage of
-# the VEX. checks in the emulator.
-non-sse = $(if $(filter sse%,$(1)),,-ffixed-xmm0)
+# the VEX. checks in the emulator.  For 3DNow!, however, force SSE
+# use for floating point operations, to avoid mixing MMX and FPU register
+# uses.  Also enable 3DNow! extensions, but note that we can't use 3dnowa
+# as the test flavor right away since -m3dnowa is being understood only
+# by gcc 7.x and newer (older ones want a specific machine model instead).
+3dnowa := $(call cc-option,$(CC),-m3dnowa,-march=k8)
+non-sse = $(if $(filter sse%,$(1)),,$(if $(filter 3dnow%,$(1)),-msse 
-mfpmath=sse $(3dnowa),-ffixed-xmm0))
 
 define simd-defs
 $(1)-cflags := \
@@ -81,8 +89,9 @@ $(addsuffix .h,$(TESTCASES)): %.h: %.c t
$(foreach arch,$(filter-out $(XEN_COMPILE_ARCH),x86_32) 
$(XEN_COMPILE_ARCH), \
for cflags in $($*-cflags) $($*-cflags-$(arch)); do \
$(MAKE) -f testcase.mk TESTCASE=$* XEN_TARGET_ARCH=$(arch) 
$*-cflags="$$cflags" all; \
+   prefix=$(shell echo $(subst -,_,$*) | sed -e 
's,^\([0-9]\),_\1,'); \
flavor=$$(echo $${cflags} | sed -e 's, .*,,' -e 'y,-=,__,') ; \
-   (echo "static const unsigned int $(subst 
-,_,$*)_$(arch)$${flavor}[] = {"; \
+   (echo "static const unsigned int $${prefix}_$(arch)$${flavor}[] 
= {"; \
 od -v -t x $*.bin | sed -e 's/^[0-9]* /0x/' -e 's/ /, 0x/g' -e 
's/$$/,/'; \
 echo "};") >>$@.new; \
rm -f $*.bin; \
--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -48,6 +48,8 @@ static inline bool _to_bool(byte_vec_t b
 
 #if VEC_SIZE == FLOAT_SIZE
 # define to_int(x) ((vec_t){ (int)(x)[0] })
+#elif VEC_SIZE == 8 && FLOAT_SIZE == 4 && defined(__3dNOW__)
+# define to_int(x) __builtin_ia32_pi2fd(__builtin_ia32_pf2id(x))
 #elif VEC_SIZE == 16 && defined(__SSE2__)
 # if FLOAT_SIZE == 4
 #  define to_int(x) __builtin_ia32_cvtdq2ps(__builtin_ia32_cvtps2dq(x))
@@ -70,7 +72,24 @@ static inline bool _to_bool(byte_vec_t b
 })
 #endif
 
-#if FLOAT_SIZE == 4 && defined(__SSE__)
+#if VEC_SIZE == 8 && FLOAT_SIZE == 4 && defined(__3dNOW_A__)
+# define max __builtin_ia32_pfmax
+# define min __builtin_ia32_pfmin
+# define recip(x) ({ \
+vec_t t_ = __builtin_ia32_pfrcp(x); \
+touch(x); \
+t_[1] = __builtin_ia32_pfrcp(__builtin_ia32_pswapdsf(x))[0]; \
+touch(x); \
+__builtin_ia32_pfrcpit2(__builtin_ia32_pfrcpit1(t_, x), t_); \
+})
+# define rsqrt(x) ({ \
+vec_t t_ = __builtin_ia32_pfrsqrt(x); \
+touch(x); \
+t_[1] = __builtin_ia32_pfrsqrt(__builtin_ia32_pswapdsf(x))[0]; \
+touch(x); \
+__builtin_ia32_pfrcpit2(__builtin_ia32_pfrsqit1(__builtin_ia32_pfmul(t_, 
t_), x), t_); \
+})
+#elif FLOAT_SIZE == 4 && defined(__SSE__)
 # if VEC_SIZE == 32 && defined(__AVX__)
 #  define broadcast(x) ({ float t_ = (x); __builtin_ia32_vbroadcastss256(&t_); 
})
 #  define max(x, y) __builtin_ia32_maxps256(x, y)
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -5,6 +5,7 @@
 
 #include "x86_emulate.h"
 #include "blowfish.h"
+#include "3dnow.h"
 #include "sse.h"
 #include "sse2.h"
 #include "sse4.h"
@@ -28,6 +29,11 @@ static bool blowfish_check_regs(const st
 return regs->eax == 2 && regs->edx == 1;
 }
 
+static bool simd_check__3dnow(void)
+{
+return cpu_has_3dnow_ext && cpu_has_sse;
+}
+
 static bool simd_check_sse(void)
 {
 return cpu_has_sse;
@@ -117,6 +123,7 @@ static const struct {
 #else
 # define SIMD(desc, feat, form) SIMD_(32, desc, feat, form)
 #endif
+SIMD(3DNow! single,  _3dnow, 8f4),
 SIMD(SSE scalar single,  sse, f4),
 SIMD(SSE packed single,  sse,   16f4),
 SIMD(SSE2 scalar single, sse2,f4),
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -166,6 +166,12 @@ static

[Xen-devel] [PATCH 13/17] x86emul: re-order checks in test harness

2017-06-21 Thread Jan Beulich

On older systems printing the "n/a" messages (resulting from the
compiler not being new enough to deal with some of the test code) isn't
very useful: If both CPU and compiler are too old for a certain test,
we can as well omit those messages, as those tests wouldn't be run even
if the compiler did produce code. (This has become obvious with the
3DNow! tests, which I had to run on an older system still supporting
those insns, and that system naturally also had an older compiler.)

Signed-off-by: Jan Beulich 

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -3267,15 +3267,15 @@ int main(int argc, char **argv)
 
 for ( j = 0; j < ARRAY_SIZE(blobs); j++ )
 {
+if ( blobs[j].check_cpu && !blobs[j].check_cpu() )
+continue;
+
 if ( !blobs[j].size )
 {
 printf("%-39s n/a\n", blobs[j].name);
 continue;
 }
 
-if ( blobs[j].check_cpu && !blobs[j].check_cpu() )
-continue;
-
 memcpy(res, blobs[j].code, blobs[j].size);
 ctxt.lma = blobs[j].bitness == 64;
 ctxt.addr_size = ctxt.sp_size = blobs[j].bitness;



x86emul: re-order checks in test harness

On older systems printing the "n/a" messages (resulting from the
compiler not being new enough to deal with some of the test code) isn't
very useful: If both CPU and compiler are too old for a certain test,
we can as well omit those messages, as those tests wouldn't be run even
if the compiler did produce code. (This has become obvious with the
3DNow! tests, which I had to run on an older system still supporting
those insns, and that system naturally also had an older compiler.)

Signed-off-by: Jan Beulich 

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -3267,15 +3267,15 @@ int main(int argc, char **argv)
 
 for ( j = 0; j < ARRAY_SIZE(blobs); j++ )
 {
+if ( blobs[j].check_cpu && !blobs[j].check_cpu() )
+continue;
+
 if ( !blobs[j].size )
 {
 printf("%-39s n/a\n", blobs[j].name);
 continue;
 }
 
-if ( blobs[j].check_cpu && !blobs[j].check_cpu() )
-continue;
-
 memcpy(res, blobs[j].code, blobs[j].size);
 ctxt.lma = blobs[j].bitness == 64;
 ctxt.addr_size = ctxt.sp_size = blobs[j].bitness;
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH 14/17] x86emul: abstract out XCRn accesses

2017-06-21 Thread Jan Beulich

Use hooks, just like done for other special purpose registers.

This includes moving XCR0 checks from hvmemul_get_fpu() to the emulator
itself as well as adding support for XGETBV emulation.

For now fuzzer reads will obtain the real values (minus the fuzzing of
the hook pointer itself).

Signed-off-by: Jan Beulich 

--- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
+++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
@@ -409,6 +409,8 @@ static int fuzz_write_cr(
 return X86EMUL_OKAY;
 }
 
+#define fuzz_read_xcr emul_test_read_xcr
+
 enum {
 MSRI_IA32_SYSENTER_CS,
 MSRI_IA32_SYSENTER_ESP,
@@ -527,6 +529,7 @@ static const struct x86_emulate_ops all_
 SET(write_io),
 SET(read_cr),
 SET(write_cr),
+SET(read_xcr),
 SET(read_msr),
 SET(write_msr),
 SET(wbinvd),
@@ -635,6 +638,7 @@ enum {
 HOOK_write_cr,
 HOOK_read_dr,
 HOOK_write_dr,
+HOOK_read_xcr,
 HOOK_read_msr,
 HOOK_write_msr,
 HOOK_wbinvd,
@@ -679,6 +683,7 @@ static void disable_hooks(struct x86_emu
 MAYBE_DISABLE_HOOK(write_io);
 MAYBE_DISABLE_HOOK(read_cr);
 MAYBE_DISABLE_HOOK(write_cr);
+MAYBE_DISABLE_HOOK(read_xcr);
 MAYBE_DISABLE_HOOK(read_msr);
 MAYBE_DISABLE_HOOK(write_msr);
 MAYBE_DISABLE_HOOK(wbinvd);
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -368,6 +368,7 @@ static struct x86_emulate_ops emulops =
 .read_segment = read_segment,
 .cpuid  = emul_test_cpuid,
 .read_cr= emul_test_read_cr,
+.read_xcr   = emul_test_read_xcr,
 .read_msr   = read_msr,
 .get_fpu= emul_test_get_fpu,
 .put_fpu= emul_test_put_fpu,
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -120,6 +120,19 @@ int emul_test_read_cr(
 return X86EMUL_UNHANDLEABLE;
 }
 
+int emul_test_read_xcr(
+unsigned int reg,
+uint64_t *val,
+struct x86_emulate_ctxt *ctxt)
+{
+uint32_t lo, hi;
+
+asm ( "xgetbv" : "=a" (lo), "=d" (hi) : "c" (reg) );
+*val = lo | ((uint64_t)hi << 32);
+
+return X86EMUL_OKAY;
+}
+
 int emul_test_get_fpu(
 void (*exception_callback)(void *, struct cpu_user_regs *),
 void *exception_callback_arg,
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -215,6 +215,11 @@ int emul_test_read_cr(
 unsigned long *val,
 struct x86_emulate_ctxt *ctxt);
 
+int emul_test_read_xcr(
+unsigned int reg,
+uint64_t *val,
+struct x86_emulate_ctxt *ctxt);
+
 int emul_test_get_fpu(
 void (*exception_callback)(void *, struct cpu_user_regs *),
 void *exception_callback_arg,
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1643,6 +1643,49 @@ static int hvmemul_write_cr(
 return rc;
 }
 
+static int hvmemul_read_xcr(
+unsigned int reg,
+uint64_t *val,
+struct x86_emulate_ctxt *ctxt)
+{
+uint32_t lo, hi;
+
+switch ( reg )
+{
+case 0:
+*val = current->arch.xcr0;
+return X86EMUL_OKAY;
+
+case 1:
+if ( !cpu_has_xgetbv1 )
+return X86EMUL_UNHANDLEABLE;
+break;
+
+default:
+return X86EMUL_UNHANDLEABLE;
+}
+
+asm ( ".byte 0x0f,0x01,0xd0" /* xgetbv */
+  : "=a" (lo), "=d" (hi) : "c" (reg) );
+*val = lo | ((uint64_t)hi << 32);
+HVMTRACE_LONG_2D(XCR_READ, reg, TRC_PAR_LONG(*val));
+
+return X86EMUL_OKAY;
+}
+
+static int hvmemul_write_xcr(
+unsigned int reg,
+uint64_t val,
+struct x86_emulate_ctxt *ctxt)
+{
+HVMTRACE_LONG_2D(XCR_WRITE, reg, TRC_PAR_LONG(val));
+if ( likely(handle_xsetbv(reg, val) == 0) )
+return X86EMUL_OKAY;
+
+x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
+return X86EMUL_EXCEPTION;
+}
+
 static int hvmemul_read_msr(
 unsigned int reg,
 uint64_t *val,
@@ -1691,22 +1734,6 @@ static int hvmemul_get_fpu(
 {
 struct vcpu *curr = current;
 
-switch ( type )
-{
-case X86EMUL_FPU_fpu:
-case X86EMUL_FPU_wait:
-case X86EMUL_FPU_mmx:
-case X86EMUL_FPU_xmm:
-break;
-case X86EMUL_FPU_ymm:
-if ( !(curr->arch.xcr0 & XSTATE_SSE) ||
- !(curr->arch.xcr0 & XSTATE_YMM) )
-return X86EMUL_UNHANDLEABLE;
-break;
-default:
-return X86EMUL_UNHANDLEABLE;
-}
-
 if ( !curr->fpu_dirtied )
 hvm_funcs.fpu_dirty_intercept();
 else if ( type == X86EMUL_FPU_fpu )
@@ -1890,6 +1917,8 @@ static const struct x86_emulate_ops hvm_
 .write_io  = hvmemul_write_io,
 .read_cr   = hvmemul_read_cr,
 .write_cr  = hvmemul_write_cr,
+.read_xcr  = hvmemul_read_xcr,
+.write_xcr = hvmemul_write_xcr,
 .read_msr  = hvmemul_read_msr,
 .write_msr = hvmemul_write_msr,
 .wbinvd= hvmemul_wbinvd,
@@ -1915,6 +1944,8 @@ static const struct x86_emulate_ops hvm_
 .write_io  = hvmemul_write_io_discard,
 .read_cr   = hvmemul_read_

[Xen-devel] [PATCH 15/17] x86emul: adjust_bnd() should check XCR0

2017-06-21 Thread Jan Beulich

Experimentally MPX instructions have been confirmed to behave as NOPs
unless both related XCR0 bits are set to 1. By implication branches
then also don't clear BNDn.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -2141,12 +2141,16 @@ static bool umip_active(struct x86_emula
 static void adjust_bnd(struct x86_emulate_ctxt *ctxt,
const struct x86_emulate_ops *ops, enum vex_pfx pfx)
 {
-uint64_t bndcfg;
+uint64_t xcr0, bndcfg;
 int rc;
 
 if ( pfx == vex_f2 || !cpu_has_mpx || !vcpu_has_mpx() )
 return;
 
+if ( !ops->read_xcr || ops->read_xcr(0, &xcr0, ctxt) != X86EMUL_OKAY ||
+ !(xcr0 & XSTATE_BNDREGS) || !(xcr0 & XSTATE_BNDCSR) )
+return;
+
 if ( !mode_ring0() )
 bndcfg = read_bndcfgu();
 else if ( !ops->read_msr ||



x86emul: adjust_bnd() should check XCR0

Experimentally MPX instructions have been confirmed to behave as NOPs
unless both related XCR0 bits are set to 1. By implication branches
then also don't clear BNDn.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -2141,12 +2141,16 @@ static bool umip_active(struct x86_emula
 static void adjust_bnd(struct x86_emulate_ctxt *ctxt,
const struct x86_emulate_ops *ops, enum vex_pfx pfx)
 {
-uint64_t bndcfg;
+uint64_t xcr0, bndcfg;
 int rc;
 
 if ( pfx == vex_f2 || !cpu_has_mpx || !vcpu_has_mpx() )
 return;
 
+if ( !ops->read_xcr || ops->read_xcr(0, &xcr0, ctxt) != X86EMUL_OKAY ||
+ !(xcr0 & XSTATE_BNDREGS) || !(xcr0 & XSTATE_BNDCSR) )
+return;
+
 if ( !mode_ring0() )
 bndcfg = read_bndcfgu();
 else if ( !ops->read_msr ||
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH 16/17] x86emul: make all FPU emulation use the stub

2017-06-21 Thread Jan Beulich

While this means quite some reduction of (source) code, the main
purpose is to no longer have exceptions raised from other than stubs.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1261,28 +1261,25 @@ static inline bool fpu_check_write(void)
 return !(fsw & FSW_ES);
 }
 
-#define emulate_fpu_insn(_op)   \
-asm volatile (  \
-"movb $2f-1f,%0 \n" \
-"1: " _op " \n" \
-"2: \n" \
-: "=m" (fic.insn_bytes) : : "memory" )
-
-#define emulate_fpu_insn_memdst(_op, _arg)  \
-asm volatile (  \
-"movb $2f-1f,%0 \n" \
-"1: " _op " %1  \n" \
-"2: \n" \
-: "=m" (fic.insn_bytes), "=m" (_arg)\
-: : "memory" )
-
-#define emulate_fpu_insn_memsrc(_op, _arg)  \
-asm volatile (  \
-"movb $2f-1f,%0 \n" \
-"1: " _op " %1  \n" \
-"2: \n" \
-: "=m" (fic.insn_bytes) \
-: "m" (_arg) : "memory" )
+#define emulate_fpu_insn_memdst(opc, ext, arg)  \
+do {\
+/* ModRM: mod=0, reg=ext, rm=0, i.e. a (%rax) operand */\
+fic.insn_bytes = 2; \
+memcpy(get_stub(stub),  \
+   ((uint8_t[]){ opc, ((ext) & 7) << 3, 0xc3 }), 3);\
+invoke_stub("", "", "+m" (fic), "+m" (arg) : "a" (&(arg))); \
+put_stub(stub); \
+} while (0)
+
+#define emulate_fpu_insn_memsrc(opc, ext, arg)  \
+do {\
+/* ModRM: mod=0, reg=ext, rm=0, i.e. a (%rax) operand */\
+fic.insn_bytes = 2; \
+memcpy(get_stub(stub),  \
+   ((uint8_t[]){ opc, ((ext) & 7) << 3, 0xc3 }), 3);\
+invoke_stub("", "", "+m" (fic) : "m" (arg), "a" (&(arg)));  \
+put_stub(stub); \
+} while (0)
 
 #define emulate_fpu_insn_stub(bytes...) \
 do {\
@@ -3834,8 +3831,7 @@ x86_emulate(
 case 0x9b:  /* wait/fwait */
 host_and_vcpu_must_have(fpu);
 get_fpu(X86EMUL_FPU_wait, &fic);
-fic.insn_bytes = 1;
-asm volatile ( "fwait" ::: "memory" );
+emulate_fpu_insn_stub(b);
 check_fpu_exn(&fic);
 break;
 
@@ -4253,37 +4249,13 @@ x86_emulate(
 emulate_fpu_insn_stub(0xd8, modrm);
 break;
 default:
+fpu_memsrc32:
 ASSERT(ea.type == OP_MEM);
 if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
  4, ctxt)) != X86EMUL_OKAY )
 goto done;
-switch ( modrm_reg & 7 )
-{
-case 0: /* fadd */
-emulate_fpu_insn_memsrc("fadds", src.val);
-break;
-case 1: /* fmul */
-emulate_fpu_insn_memsrc("fmuls", src.val);
-break;
-case 2: /* fcom */
-emulate_fpu_insn_memsrc("fcoms", src.val);
-break;
-case 3: /* fcomp */
-emulate_fpu_insn_memsrc("fcomps", src.val);
-break;
-case 4: /* fsub */
-emulate_fpu_insn_memsrc("fsubs", src.val);
-break;
-case 5: /* fsubr */
-emulate_fpu_insn_memsrc("fsubrs", src.val);
-break;
-case 6: /* fdiv */
-emulate_fpu_insn_memsrc("fdivs", src.val);
-break;
-case 7: /* fdivr */
-emulate_fpu_insn_memsrc("fdivrs", src.val);
-break;
-}
+emulate_fpu_insn_memsrc(b, modrm_reg, src.val);
+break;
 }
 check_fpu_exn(&fic);
 break;
@@ -4330,52 +4302,46 @@ x86_emulate(
 break;
 default:
 generate_exception_if(ea.type != OP_MEM, EXC_UD);
-dst = ea;
 switch ( modrm_reg & 7 )
 {
 case 0: /* fld m32fp */
-if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
-

[Xen-devel] [PATCH 17/17] x86/HVM: eliminate custom #MF/#XM handling

2017-06-21 Thread Jan Beulich

Use the generic stub exception handling instead.

Signed-off-by: Jan Beulich 

--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -134,8 +134,6 @@ int emul_test_read_xcr(
 }
 
 int emul_test_get_fpu(
-void (*exception_callback)(void *, struct cpu_user_regs *),
-void *exception_callback_arg,
 enum x86_emulate_fpu_type type,
 struct x86_emulate_ctxt *ctxt)
 {
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -221,8 +221,6 @@ int emul_test_read_xcr(
 struct x86_emulate_ctxt *ctxt);
 
 int emul_test_get_fpu(
-void (*exception_callback)(void *, struct cpu_user_regs *),
-void *exception_callback_arg,
 enum x86_emulate_fpu_type type,
 struct x86_emulate_ctxt *ctxt);
 
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1727,8 +1727,6 @@ int hvmemul_cpuid(uint32_t leaf, uint32_
 }
 
 static int hvmemul_get_fpu(
-void (*exception_callback)(void *, struct cpu_user_regs *),
-void *exception_callback_arg,
 enum x86_emulate_fpu_type type,
 struct x86_emulate_ctxt *ctxt)
 {
@@ -1766,9 +1764,6 @@ static int hvmemul_get_fpu(
 }
 }
 
-curr->arch.hvm_vcpu.fpu_exception_callback = exception_callback;
-curr->arch.hvm_vcpu.fpu_exception_callback_arg = exception_callback_arg;
-
 return X86EMUL_OKAY;
 }
 
@@ -1779,8 +1774,6 @@ static void hvmemul_put_fpu(
 {
 struct vcpu *curr = current;
 
-curr->arch.hvm_vcpu.fpu_exception_callback = NULL;
-
 if ( aux )
 {
 typeof(curr->arch.xsave_area->fpu_sse) *fpu_ctxt = curr->arch.fpu_ctxt;
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -780,7 +780,6 @@ void do_reserved_trap(struct cpu_user_re
 
 void do_trap(struct cpu_user_regs *regs)
 {
-struct vcpu *curr = current;
 unsigned int trapnr = regs->entry_vector;
 unsigned long fixup;
 
@@ -800,15 +799,6 @@ void do_trap(struct cpu_user_regs *regs)
 return;
 }
 
-if ( ((trapnr == TRAP_copro_error) || (trapnr == TRAP_simd_error)) &&
- system_state >= SYS_STATE_active && is_hvm_vcpu(curr) &&
- curr->arch.hvm_vcpu.fpu_exception_callback )
-{
-curr->arch.hvm_vcpu.fpu_exception_callback(
-curr->arch.hvm_vcpu.fpu_exception_callback_arg, regs);
-return;
-}
-
 if ( likely((fixup = search_exception_table(regs)) != 0) )
 {
 dprintk(XENLOG_ERR, "Trap %u: %p [%ps] -> %p\n",
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -959,6 +959,33 @@ static inline int mkec(uint8_t e, int32_
 #define generate_exception(e, ec...) generate_exception_if(true, e, ##ec)
 
 #ifdef __XEN__
+static int exception_from_stub(union stub_exception_token res,
+   void *stub, unsigned int line,
+   struct x86_emulate_ctxt *ctxt,
+   const struct x86_emulate_ops *ops)
+{
+int rc = X86EMUL_UNHANDLEABLE;
+
+generate_exception_if(res.fields.trapnr == EXC_MF, EXC_MF);
+if ( res.fields.trapnr == EXC_XM )
+{
+unsigned long cr4;
+
+if ( !ops->read_cr || !ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY )
+cr4 = X86_CR4_OSXMMEXCPT;
+generate_exception(cr4 & X86_CR4_OSXMMEXCPT ? EXC_XM : EXC_UD);
+}
+gprintk(XENLOG_WARNING,
+"exception %u (ec=%04x) in emulation stub (line %u)\n",
+res.fields.trapnr, res.fields.ec, line);
+gprintk(XENLOG_INFO, "stub: %"__stringify(MAX_INST_LEN)"ph\n",  stub);
+generate_exception_if(res.fields.trapnr == EXC_UD, EXC_UD);
+domain_crash(current->domain);
+
+ done:
+return rc;
+}
+
 # define invoke_stub(pre, post, constraints...) do {\
 union stub_exception_token res_ = { .raw = ~0 };\
 asm volatile ( pre "\n\tcall *%[stub]\n\t" post "\n"\
@@ -974,14 +1001,8 @@ static inline int mkec(uint8_t e, int32_
  "m" (*(uint8_t(*)[MAX_INST_LEN + 1])stub.ptr) );   \
 if ( unlikely(~res_.raw) )  \
 {   \
-gprintk(XENLOG_WARNING, \
-"exception %u (ec=%04x) in emulation stub (line %u)\n", \
-res_.fields.trapnr, res_.fields.ec, __LINE__);  \
-gprintk(XENLOG_INFO, "stub: %"__stringify(MAX_INST_LEN)"ph\n",  \
-stub.func); \
-generate_exception_if(res_.fields.trapnr == EXC_UD, EXC_UD);\
-domain_crash(current->domain);  \
-goto cannot_emulate;\
+rc = exception_from_stub(res_, stub.func, __LINE__, ctxt, ops); \
+goto done;  \
 }

Re: [Xen-devel] [PATCH 10/11] gnttab: limit mapkind()'s iteration count

2017-06-21 Thread Andrew Cooper

On 21/06/17 10:38, Jan Beulich wrote:
> There's no need for the function to observe increases of the maptrack
> table (which can occur as the maptrack lock isn't being held) - actual
> population of maptrack entries is excluded while we're here (by way of
> holding the respective grant table lock for writing, while code
> populating entries acquires it for reading). Latch the limit ahead of
> the loop, allowing for the barrier to move out, too.
>
> Signed-by: Jan Beulich 

Reviewed-by: Andrew Cooper 

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 09/11] gnttab: avoid spurious maptrack handle allocation failures

2017-06-21 Thread Jan Beulich

>>> On 21.06.17 at 14:02,  wrote:
> On 21/06/17 10:37, Jan Beulich wrote:
>> --- a/xen/common/grant_table.c
>> +++ b/xen/common/grant_table.c
>> @@ -397,7 +397,7 @@ get_maptrack_handle(
>>  struct vcpu  *curr = current;
>>  unsigned int  i, head;
>>  grant_handle_thandle;
>> -struct grant_mapping *new_mt;
>> +struct grant_mapping *new_mt = NULL;
>>  
>>  handle = __get_maptrack_handle(lgt, curr);
>>  if ( likely(handle != -1) )
>> @@ -408,8 +408,13 @@ get_maptrack_handle(
>>  /*
>>   * If we've run out of frames, try stealing an entry from another
>>   * VCPU (in case the guest isn't mapping across its VCPUs evenly).
>> + * Also use this path in case we're out of memory, to avoid spurious
>> + * failures.
>>   */
>> -if ( nr_maptrack_frames(lgt) >= max_maptrack_frames )
>> +if ( nr_maptrack_frames(lgt) < max_maptrack_frames )
>> +new_mt = alloc_xenheap_page();
>> +
>> +if ( !new_mt )
>>  {
>>  /*
>>   * Can drop the lock since no other VCPU can be adding a new
> 
> * frame once they've run out.
> */
> 
> It doesn't look like this comment is true any more, which brings the
> locking correctness into question.

Oh, indeed. I'll need to revive the locking change I had done here
and then dropped because we did realize we didn't need it for
XSA-218.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH] xen: Replace ASSERT(0) with ASSERT_UNREACHABLE()

2017-06-21 Thread Andrew Cooper

No functional change, but the result is more informative both in the code and
error messages if the assertions do get hit.

Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Konrad Rzeszutek Wilk 
CC: Stefano Stabellini 
CC: Julien Grall 
---
 xen/arch/arm/mm.c| 4 ++--
 xen/common/tmem_xen.c| 4 ++--
 xen/drivers/passthrough/amd/iommu_acpi.c | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index 341bacf..5475557 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -1154,7 +1154,7 @@ void arch_dump_shared_mem_info(void)
 
 int donate_page(struct domain *d, struct page_info *page, unsigned int 
memflags)
 {
-ASSERT(0);
+ASSERT_UNREACHABLE();
 return -ENOSYS;
 }
 
@@ -1166,7 +1166,7 @@ int steal_page(
 
 int page_is_ram_type(unsigned long mfn, unsigned long mem_type)
 {
-ASSERT(0);
+ASSERT_UNREACHABLE();
 return 0;
 }
 
diff --git a/xen/common/tmem_xen.c b/xen/common/tmem_xen.c
index 06ce3ef..725ae93 100644
--- a/xen/common/tmem_xen.c
+++ b/xen/common/tmem_xen.c
@@ -34,14 +34,14 @@ static DEFINE_PER_CPU_READ_MOSTLY(void *, scratch_page);
 static inline void *cli_get_page(xen_pfn_t cmfn, unsigned long *pcli_mfn,
  struct page_info **pcli_pfp, bool_t cli_write)
 {
-ASSERT(0);
+ASSERT_UNREACHABLE();
 return NULL;
 }
 
 static inline void cli_put_page(void *cli_va, struct page_info *cli_pfp,
 unsigned long cli_mfn, bool_t mark_dirty)
 {
-ASSERT(0);
+ASSERT_UNREACHABLE();
 }
 #else
 #include 
diff --git a/xen/drivers/passthrough/amd/iommu_acpi.c 
b/xen/drivers/passthrough/amd/iommu_acpi.c
index d8a9205..f4c7206 100644
--- a/xen/drivers/passthrough/amd/iommu_acpi.c
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
@@ -818,7 +818,7 @@ static u16 __init parse_ivhd_device_special(
 hpet_sbdf.init = HPET_IVHD;
 break;
 default:
-ASSERT(0);
+ASSERT_UNREACHABLE();
 break;
 }
 break;
-- 
2.1.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v3 3/9] xen/mm: move modify_identity_mmio to global file and drop __init

2017-06-21 Thread Roger Pau Monne

On Wed, Jun 21, 2017 at 05:57:19AM -0600, Jan Beulich wrote:
> >>> On 21.06.17 at 13:11,  wrote:
> > On Fri, May 19, 2017 at 07:35:39AM -0600, Jan Beulich wrote:
> >> >>> On 27.04.17 at 16:35,  wrote:
> >> > +int modify_mmio(struct domain *d, gfn_t gfn, mfn_t mfn, unsigned long 
> >> > nr_pages,
> >> > +const bool map)
> >> > +{
> >> > +int rc;
> >> > +
> >> > +/*
> >> > + * Make sure this function is only used by the hardware domain, 
> >> > because it
> >> > + * can take an arbitrary long time, and could DoS the whole system.
> >> > + */
> >> > +ASSERT(is_hardware_domain(d));
> >> 
> >> If that can happen arbitrarily at run time (rather than just at boot,
> >> as suggested by the removal of __init), it definitely can't remain as
> >> is and will instead need to make use of continuations. I'm therefore
> >> unconvinced you really want to move this code instead of simply
> >> calling {,un}map_mmio_regions() while taking care of preemption
> >> needs.
> > 
> > I'm not sure I know how to use continuations with non-hypercall
> > vmexits. Do you have any recommendations about how to do this? pause
> > the domain and run the mmio changes inside of a tasklet?
> 
> That would be one option. Or you could derive from the approach
> used for waiting for a response from the device model.

AFAICT the ioreq code pauses the domain and waits for a reply from the
dm, but in that case I would still need the tasklet in order to perform
the work (since there's no dm here).

> Even exiting
> back to the guest without updating rIP may be possible, provided
> you have a means to store the continuation information such that
> when coming back you won't start from the beginning again.

I don't really fancy this since it would mean wasting a lot of time in
vmexits/vmenters.

Roger.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH] x86emul: correct CF output of SHLD/SHRD

2017-06-21 Thread Jan Beulich

CF reflects the last bit shifted out, i.e. can't possibly be derived
from the result value.

Signed-off-by: Jan Beulich 
---
This will only apply cleanly on top of
https://lists.xenproject.org/archives/html/xen-devel/2017-06/msg02137.html 

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -783,6 +783,29 @@ int main(int argc, char **argv)
 printf("okay\n");
 #endif
 
+printf("%-40s", "Testing shld $1,%ecx,(%edx)...");
+res[0]  = 0x12345678;
+regs.edx= (unsigned long)res;
+regs.ecx= 0x9abcdef0;
+instr[0] = 0x0f; instr[1] = 0xa4; instr[2] = 0x0a; instr[3] = 0x01;
+for ( i = 0; i < 0x20; ++i )
+{
+uint32_t r = res[0], f, m = X86_EFLAGS_ARITH_MASK & ~X86_EFLAGS_AF;
+
+asm ( "shld $1,%2,%0; pushf; pop %1"
+  : "+rm" (r), "=rm" (f) : "r" (regs.ecx) );
+regs.eflags = f ^ m;
+regs.eip= (unsigned long)&instr[0];
+rc = x86_emulate(&ctxt, &emulops);
+if ( (rc != X86EMUL_OKAY) ||
+ (regs.eip != (unsigned long)&instr[4]) ||
+ (res[0] != r) ||
+ ((regs.eflags ^ f) & m) )
+goto fail;
+regs.ecx <<= 1;
+}
+printf("okay\n");
+
 printf("%-40s", "Testing movbe (%ecx),%eax...");
 instr[0] = 0x0f; instr[1] = 0x38; instr[2] = 0xf0; instr[3] = 0x01;
 regs.eflags = 0x200;
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -6424,7 +6424,7 @@ x86_emulate(
(src.val >> (width - shift)));
 _regs.eflags &= ~(X86_EFLAGS_OF | X86_EFLAGS_SF | X86_EFLAGS_ZF |
   X86_EFLAGS_PF | X86_EFLAGS_CF);
-if ( (dst.val >> ((b & 8) ? (shift - 1) : (width - shift))) & 1 )
+if ( (dst.orig_val >> ((b & 8) ? (shift - 1) : (width - shift))) & 1 )
 _regs.eflags |= X86_EFLAGS_CF;
 if ( ((dst.val ^ dst.orig_val) >> (width - 1)) & 1 )
 _regs.eflags |= X86_EFLAGS_OF;



x86emul: correct CF output of SHLD/SHRD

CF reflects the last bit shifted out, i.e. can't possibly be derived
from the result value.

Signed-off-by: Jan Beulich 
---
This will only apply cleanly on top of
https://lists.xenproject.org/archives/html/xen-devel/2017-06/msg02137.html

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -783,6 +783,29 @@ int main(int argc, char **argv)
 printf("okay\n");
 #endif
 
+printf("%-40s", "Testing shld $1,%ecx,(%edx)...");
+res[0]  = 0x12345678;
+regs.edx= (unsigned long)res;
+regs.ecx= 0x9abcdef0;
+instr[0] = 0x0f; instr[1] = 0xa4; instr[2] = 0x0a; instr[3] = 0x01;
+for ( i = 0; i < 0x20; ++i )
+{
+uint32_t r = res[0], f, m = X86_EFLAGS_ARITH_MASK & ~X86_EFLAGS_AF;
+
+asm ( "shld $1,%2,%0; pushf; pop %1"
+  : "+rm" (r), "=rm" (f) : "r" (regs.ecx) );
+regs.eflags = f ^ m;
+regs.eip= (unsigned long)&instr[0];
+rc = x86_emulate(&ctxt, &emulops);
+if ( (rc != X86EMUL_OKAY) ||
+ (regs.eip != (unsigned long)&instr[4]) ||
+ (res[0] != r) ||
+ ((regs.eflags ^ f) & m) )
+goto fail;
+regs.ecx <<= 1;
+}
+printf("okay\n");
+
 printf("%-40s", "Testing movbe (%ecx),%eax...");
 instr[0] = 0x0f; instr[1] = 0x38; instr[2] = 0xf0; instr[3] = 0x01;
 regs.eflags = 0x200;
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -6424,7 +6424,7 @@ x86_emulate(
(src.val >> (width - shift)));
 _regs.eflags &= ~(X86_EFLAGS_OF | X86_EFLAGS_SF | X86_EFLAGS_ZF |
   X86_EFLAGS_PF | X86_EFLAGS_CF);
-if ( (dst.val >> ((b & 8) ? (shift - 1) : (width - shift))) & 1 )
+if ( (dst.orig_val >> ((b & 8) ? (shift - 1) : (width - shift))) & 1 )
 _regs.eflags |= X86_EFLAGS_CF;
 if ( ((dst.val ^ dst.orig_val) >> (width - 1)) & 1 )
 _regs.eflags |= X86_EFLAGS_OF;
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] xen: Replace ASSERT(0) with ASSERT_UNREACHABLE()

2017-06-21 Thread Jan Beulich

>>> On 21.06.17 at 14:40,  wrote:
> No functional change, but the result is more informative both in the code and
> error messages if the assertions do get hit.
> 
> Signed-off-by: Andrew Cooper 

Reviewed-by: Jan Beulich 



___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v3 3/9] xen/mm: move modify_identity_mmio to global file and drop __init

2017-06-21 Thread Jan Beulich

>>> On 21.06.17 at 14:43,  wrote:
> On Wed, Jun 21, 2017 at 05:57:19AM -0600, Jan Beulich wrote:
>> >>> On 21.06.17 at 13:11,  wrote:
>> > On Fri, May 19, 2017 at 07:35:39AM -0600, Jan Beulich wrote:
>> >> >>> On 27.04.17 at 16:35,  wrote:
>> >> > +int modify_mmio(struct domain *d, gfn_t gfn, mfn_t mfn, unsigned long 
> nr_pages,
>> >> > +const bool map)
>> >> > +{
>> >> > +int rc;
>> >> > +
>> >> > +/*
>> >> > + * Make sure this function is only used by the hardware domain, 
> because it
>> >> > + * can take an arbitrary long time, and could DoS the whole system.
>> >> > + */
>> >> > +ASSERT(is_hardware_domain(d));
>> >> 
>> >> If that can happen arbitrarily at run time (rather than just at boot,
>> >> as suggested by the removal of __init), it definitely can't remain as
>> >> is and will instead need to make use of continuations. I'm therefore
>> >> unconvinced you really want to move this code instead of simply
>> >> calling {,un}map_mmio_regions() while taking care of preemption
>> >> needs.
>> > 
>> > I'm not sure I know how to use continuations with non-hypercall
>> > vmexits. Do you have any recommendations about how to do this? pause
>> > the domain and run the mmio changes inside of a tasklet?
>> 
>> That would be one option. Or you could derive from the approach
>> used for waiting for a response from the device model.
> 
> AFAICT the ioreq code pauses the domain and waits for a reply from the
> dm, but in that case I would still need the tasklet in order to perform
> the work (since there's no dm here).

Well, that's kind of pausing (it's not an explicit domain_pause(),
and you really would mean to pause just the vCPU here). Otoh
to prevent hangs we simply call process_pending_softirqs()
every once in a while in a few other cases, so maybe doing that
would already suffice here.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH v2 1/3] xen-disk: only advertize feature-persistent if grant copy is not available

2017-06-21 Thread Paul Durrant

If grant copy is available then it will always be used in preference to
persistent maps. In this case feature-persistent should not be advertized
to the frontend, otherwise it may needlessly copy data into persistently
granted buffers.

Signed-off-by: Paul Durrant 
---
Cc: Stefano Stabellini 
Cc: Anthony Perard 
Cc: Kevin Wolf 
Cc: Max Reitz 
---
 hw/block/xen_disk.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 3a22805fbc..9b06e3aa81 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -1023,11 +1023,18 @@ static int blk_init(struct XenDevice *xendev)
 
 blkdev->file_blk  = BLOCK_SIZE;
 
+blkdev->feature_grant_copy =
+(xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);
+
+xen_pv_printf(&blkdev->xendev, 3, "grant copy operation %s\n",
+  blkdev->feature_grant_copy ? "enabled" : "disabled");
+
 /* fill info
  * blk_connect supplies sector-size and sectors
  */
 xenstore_write_be_int(&blkdev->xendev, "feature-flush-cache", 1);
-xenstore_write_be_int(&blkdev->xendev, "feature-persistent", 1);
+xenstore_write_be_int(&blkdev->xendev, "feature-persistent",
+  !blkdev->feature_grant_copy);
 xenstore_write_be_int(&blkdev->xendev, "info", info);
 
 blk_parse_discard(blkdev);
@@ -1202,12 +1209,6 @@ static int blk_connect(struct XenDevice *xendev)
 
 xen_be_bind_evtchn(&blkdev->xendev);
 
-blkdev->feature_grant_copy =
-(xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);
-
-xen_pv_printf(&blkdev->xendev, 3, "grant copy operation %s\n",
-  blkdev->feature_grant_copy ? "enabled" : "disabled");
-
 xen_pv_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
   "remote port %d, local port %d\n",
   blkdev->xendev.protocol, blkdev->ring_ref,
-- 
2.11.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

[Xen-devel] [PATCH v2 0/3] xen-disk: performance improvements

2017-06-21 Thread Paul Durrant

Paul Durrant (3):
  xen-disk: only advertize feature-persistent if grant copy is not
available
  xen-disk: add support for multi-page shared rings
  xen-disk: use an IOThread per instance

 hw/block/trace-events |   7 ++
 hw/block/xen_disk.c   | 228 +++---
 2 files changed, 188 insertions(+), 47 deletions(-)

-- 
2.11.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

1 2 3 >

1 - 100 of 232 matches

Mail list logo